diff --git a/pom.xml b/pom.xml index e09837a5..f563f8d8 100644 --- a/pom.xml +++ b/pom.xml @@ -145,6 +145,12 @@ commons-compress 1.22 + + + commons-io + commons-io + 2.11.0 + org.springframework.boot @@ -168,10 +174,11 @@ 1.17.6 test + org.mockito - mockito-all - 1.10.19 + mockito-inline + 4.7.0 test diff --git a/src/main/java/org/commonwl/view/Scheduler.java b/src/main/java/org/commonwl/view/Scheduler.java index b8a73c7c..6f86a26b 100644 --- a/src/main/java/org/commonwl/view/Scheduler.java +++ b/src/main/java/org/commonwl/view/Scheduler.java @@ -1,6 +1,9 @@ package org.commonwl.view; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.file.AccumulatorPathVisitor; +import org.apache.commons.io.filefilter.AgeFileFilter; import org.commonwl.view.workflow.QueuedWorkflowRepository; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -9,8 +12,18 @@ import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; import java.util.Calendar; +import java.util.Collections; import java.util.Date; +import java.util.List; +import java.util.stream.Stream; /** * Scheduler class for recurrent processes. @@ -24,6 +37,17 @@ public class Scheduler { @Value("${queuedWorkflowAgeLimitHours}") private Integer QUEUED_WORKFLOW_AGE_LIMIT_HOURS; + @Value("${tmpDirAgeLimitDays}") + private Integer TMP_DIR_AGE_LIMIT_DAYS; + + // We do not want to remove the bundles, as we use the disk as a sort of + // cache. Whenever a workflow page is displayed in the browser the UI + // fires a request to re-generate it. We skip that by keeping files on disk. + @Value("${graphvizStorage}") + private String graphvizStorage; + @Value("${gitStorage}") + private String gitStorage; + @Autowired public Scheduler(QueuedWorkflowRepository queuedWorkflowRepository) { this.queuedWorkflowRepository = queuedWorkflowRepository; @@ -55,4 +79,59 @@ public void removeOldQueuedWorkflowEntries() { logger.info(queuedWorkflowRepository.deleteByTempRepresentation_RetrievedOnLessThanEqual(removeTime) + " Old queued workflows removed"); } + + /** + * Scheduled function to delete old temporary directories. + * + *

Will scan each temporary directory (graphviz, RO, git), searching + * for files exceeding a specified threshold.

+ * + *

It scans the first level directories, i.e. it does not recursively + * scans directories. So it will delete any RO or Git repository directories + * that exceed the threshold. Similarly, it will delete any graph (svg, png, + * etc) that also exceed it.

+ * + *

Errors logged through Logger. Settings in Spring application properties + * file.

+ * + * @since 1.4.5 + */ + @Scheduled(cron = "${cron.clearTmpDir}") + public void clearTmpDir() { + // Temporary files used for graphviz, RO, and git may be stored in different + // locations, so we will collect all of them here. + List temporaryDirectories = Stream.of(graphvizStorage, gitStorage) + .distinct() + .toList(); + temporaryDirectories.forEach(this::clearDirectory); + } + + /** + * For a given temporary directory, scans it (not recursively) for files and + * directories exceeding the age limit threshold. + * + * @since 1.4.5 + * @see https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/AgeFileFilter.html + * @param temporaryDirectory temporary directory + */ + private void clearDirectory(String temporaryDirectory) { + final Instant cutoff = Instant.now().minus(Duration.ofDays(TMP_DIR_AGE_LIMIT_DAYS)); + + File temporaryDirectoryFile = new File(temporaryDirectory); + String[] files = temporaryDirectoryFile.list(new AgeFileFilter(Date.from(cutoff))); + + if (files != null && files.length > 0) { + for (String fileName : files) { + File fileToDelete = new File(temporaryDirectoryFile, fileName); + try { + FileUtils.forceDelete(fileToDelete); + } catch (IOException e) { + // Here we probably have a more serious case. Since the Git repository, RO directory, or graphs are + // not expected to be in use, and the application must have access, I/O errors are not expected and + // must be treated as errors. + logger.error(String.format("Failed to delete old temporary file or directory [%s]: %s", fileToDelete.getAbsolutePath(), e.getMessage()), e); + } + } + } + } } diff --git a/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java b/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java index 87884cc2..121d8f11 100644 --- a/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java +++ b/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java @@ -24,6 +24,7 @@ import org.commonwl.view.git.GitSemaphore; import org.commonwl.view.git.GitService; import org.commonwl.view.researchobject.ROBundleFactory; +import org.commonwl.view.util.FileUtils; import org.commonwl.view.workflow.QueuedWorkflow; import org.commonwl.view.workflow.QueuedWorkflowRepository; import org.commonwl.view.workflow.Workflow; @@ -78,9 +79,10 @@ public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow) GitDetails gitInfo = tempWorkflow.getRetrievedFrom(); final String repoUrl = gitInfo.getRepoUrl(); // Parse using cwltool and replace in database + Git repo = null; try { boolean safeToAccess = gitSemaphore.acquire(repoUrl); - Git repo = gitService.getRepository(gitInfo, safeToAccess); + repo = gitService.getRepository(gitInfo, safeToAccess); Path localPath = repo.getRepository().getWorkTree().toPath(); Path workflowFile = localPath.resolve(gitInfo.getPath()).normalize().toAbsolutePath(); Workflow newWorkflow = cwlService.parseWorkflowWithCwltool( @@ -106,16 +108,19 @@ public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow) logger.error("Jena query exception ", ex); queuedWorkflow.setCwltoolStatus(CWLToolStatus.ERROR); queuedWorkflow.setMessage("An error occurred when executing a query on the SPARQL store"); + FileUtils.deleteGitRepository(repo); } catch (CWLValidationException ex) { logger.error(ex.getMessage(), ex); queuedWorkflow.setCwltoolStatus(CWLToolStatus.ERROR); queuedWorkflow.setMessage(ex.getMessage()); + FileUtils.deleteGitRepository(repo); } catch (Exception ex) { logger.error("Unexpected error", ex); queuedWorkflow.setCwltoolStatus(CWLToolStatus.ERROR); queuedWorkflow.setMessage("Whoops! Cwltool ran successfully, but an unexpected " + "error occurred in CWLViewer!\n" + "Help us by reporting it on Gitter or a Github issue\n"); + FileUtils.deleteGitRepository(repo); } finally { gitSemaphore.release(repoUrl); queuedWorkflowRepository.save(queuedWorkflow); diff --git a/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java b/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java index fa310f5f..afc0c0a0 100644 --- a/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java +++ b/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java @@ -21,7 +21,9 @@ import org.apache.commons.io.FilenameUtils; import org.apache.taverna.robundle.Bundle; +import org.apache.taverna.robundle.fs.BundleFileSystem; import org.commonwl.view.git.GitDetails; +import org.commonwl.view.util.FileUtils; import org.commonwl.view.workflow.Workflow; import org.commonwl.view.workflow.WorkflowRepository; import org.slf4j.Logger; @@ -31,7 +33,9 @@ import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.stereotype.Component; +import java.io.File; import java.io.IOException; +import java.nio.file.FileSystem; import java.nio.file.Path; /** @@ -77,6 +81,11 @@ public void createWorkflowRO(Workflow workflow) // Save the bundle to the storage location in properties Path bundleLocation = roBundleService.saveToFile(bundle); + try { + FileUtils.deleteBundleTemporaryDirectory(bundle); + } catch (IOException e) { + logger.warn(String.format("Failed to delete temporary directory for bundle [%s]: %s", bundle.getSource(), e.getMessage()), e); + } // Add RO Bundle to associated workflow model workflow.setRoBundlePath(bundleLocation.toString()); diff --git a/src/main/java/org/commonwl/view/researchobject/ROBundleService.java b/src/main/java/org/commonwl/view/researchobject/ROBundleService.java index 427b230a..2b375614 100644 --- a/src/main/java/org/commonwl/view/researchobject/ROBundleService.java +++ b/src/main/java/org/commonwl/view/researchobject/ROBundleService.java @@ -19,23 +19,6 @@ package org.commonwl.view.researchobject; -import static org.apache.commons.io.FileUtils.readFileToString; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.jena.query.QuerySolution; @@ -64,6 +47,25 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.apache.commons.io.FileUtils.readFileToString; + /** * Service handling Research Object Bundles */ @@ -73,16 +75,16 @@ public class ROBundleService { private final Logger logger = LoggerFactory.getLogger(this.getClass()); // Services - private GraphVizService graphVizService; - private GitService gitService; - private RDFService rdfService; - private CWLTool cwlTool; - private GitSemaphore gitSemaphore; + private final GraphVizService graphVizService; + private final GitService gitService; + private final RDFService rdfService; + private final CWLTool cwlTool; + private final GitSemaphore gitSemaphore; // Configuration variables - private Agent appAgent; - private int singleFileSizeLimit; - private Path bundleStorage; + private final Agent appAgent; + private final int singleFileSizeLimit; + private final Path bundleStorage; // Pattern for extracting version from a cwl file private final String CWL_VERSION_REGEX = "cwlVersion:\\s*\"?(?:cwl:)?([^\\s\"]+)\"?"; @@ -150,13 +152,19 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep Set authors = new HashSet<>(); boolean safeToAccess = gitSemaphore.acquire(gitInfo.getRepoUrl()); + Git gitRepo = null; try { - Git gitRepo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess); + gitRepo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess); Path relativePath = Paths.get(FilenameUtils.getPath(gitInfo.getPath())); Path gitPath = gitRepo.getRepository().getWorkTree().toPath().resolve(relativePath); addFilesToBundle(gitInfo, bundle, bundlePath, gitRepo, gitPath, authors, workflow); + } catch (GitAPIException | IOException e) { + org.commonwl.view.util.FileUtils.deleteBundleTemporaryDirectory(bundle); + org.commonwl.view.util.FileUtils.deleteBundleParentDirectory(bundle); + throw e; } finally { gitSemaphore.release(gitInfo.getRepoUrl()); + org.commonwl.view.util.FileUtils.deleteGitRepository(gitRepo); } // Add combined authors @@ -195,7 +203,7 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep addAggregation(bundle, manifestAnnotations, "merged.cwl", cwlTool.getPackedVersion(rawUrl)); } catch (CWLValidationException ex) { - logger.error("Could not pack workflow when creating Research Object", ex.getMessage()); + logger.error(String.format("Could not pack workflow when creating Research Object: %s", ex.getMessage()), ex); } String rdfUrl = workflow.getIdentifier(); if (rdfService.graphExists(rdfUrl)) { @@ -215,9 +223,15 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep } catch (URISyntaxException ex) { logger.error("Error creating URI for RO Bundle", ex); + org.commonwl.view.util.FileUtils.deleteBundleTemporaryDirectory(bundle); + org.commonwl.view.util.FileUtils.deleteBundleParentDirectory(bundle); } catch (GitAPIException ex) { logger.error("Error getting repository to create RO Bundle", ex); + org.commonwl.view.util.FileUtils.deleteBundleTemporaryDirectory(bundle); + org.commonwl.view.util.FileUtils.deleteBundleParentDirectory(bundle); } + // TODO: Why are we not re-throwing the exceptions above? It looks like we should + // so users/admins are aware of the problem. // Return the completed bundle return bundle; @@ -238,118 +252,121 @@ private void addFilesToBundle(GitDetails gitDetails, Bundle bundle, Path bundleP Workflow workflow) throws IOException { File[] files = repoPath.toFile().listFiles(); - for (File file : files) { - if (!file.getName().equals(".git")) { - if (file.isDirectory()) { - - // Create a new folder in the RO for this directory - Path newBundlePath = bundlePath.resolve(file.getName()); - Files.createDirectory(newBundlePath); - - // Create git details object for subfolder - GitDetails subfolderGitDetails = new GitDetails(gitDetails.getRepoUrl(), gitDetails.getBranch(), - Paths.get(gitDetails.getPath()).resolve(file.getName()).toString()); - - // Add all files in the subdirectory to this new folder - addFilesToBundle(subfolderGitDetails, bundle, newBundlePath, gitRepo, - repoPath.resolve(file.getName()), authors, workflow); - - } else { - try { - // Where to store the new file - Path bundleFilePath = bundlePath.resolve(file.getName()); - Path gitFolder = Paths.get(gitDetails.getPath()); - String relativePath = gitFolder.resolve(file.getName()).toString(); - Path gitPath = bundlePath.getRoot().resolve(relativePath); // would start with / - - // Get direct URL permalink - URI rawURI = new URI("https://w3id.org/cwl/view/git/" + workflow.getLastCommit() + - gitPath + "?format=raw"); - - // Variable to store file contents and aggregation - String fileContent = null; - PathMetadata aggregation; - - // Download or externally link if oversized - if (file.length() <= singleFileSizeLimit) { - // Save file to research object bundle - fileContent = readFileToString(file); - Bundles.setStringValue(bundleFilePath, fileContent); + if (files != null) { + for (File file : files) { + if (!file.getName().equals(".git")) { + BasicFileAttributes basicFileAttributes = Files.readAttributes(file.toPath(), BasicFileAttributes.class); + if (basicFileAttributes.isDirectory()) { - // Set retrieved information for this file in the manifest - aggregation = bundle.getManifest().getAggregation(bundleFilePath); - aggregation.setRetrievedFrom(rawURI); - aggregation.setRetrievedBy(appAgent); - aggregation.setRetrievedOn(aggregation.getCreatedOn()); - } else { - logger.info("File " + file.getName() + " is too large to download - " + - FileUtils.byteCountToDisplaySize(file.length()) + "/" + - FileUtils.byteCountToDisplaySize(singleFileSizeLimit) + - ", linking externally to RO bundle"); - - // Set information for this file in the manifest - aggregation = bundle.getManifest().getAggregation(rawURI); - Proxy bundledAs = new Proxy(); - bundledAs.setURI(); - bundledAs.setFolder(repoPath); - aggregation.setBundledAs(bundledAs); - } + // Create a new folder in the RO for this directory + Path newBundlePath = bundlePath.resolve(file.getName()); + Files.createDirectory(newBundlePath); - // Special handling for cwl files - boolean cwl = FilenameUtils.getExtension(file.getName()).equals("cwl"); - if (cwl) { - // Correct mime type (no official standard for yaml) - aggregation.setMediatype("text/x-yaml"); - - // Add conformsTo for version extracted from regex - if (fileContent != null) { - Matcher m = cwlVersionPattern.matcher(fileContent); - if (m.find()) { - aggregation.setConformsTo(new URI("https://w3id.org/cwl/" + m.group(1))); - } - } - } + // Create git details object for subfolder + GitDetails subfolderGitDetails = new GitDetails(gitDetails.getRepoUrl(), gitDetails.getBranch(), + Paths.get(gitDetails.getPath()).resolve(file.getName()).toString()); + // Add all files in the subdirectory to this new folder + addFilesToBundle(subfolderGitDetails, bundle, newBundlePath, gitRepo, + repoPath.resolve(file.getName()), authors, workflow); + + } else { try { - // Add authors from git commits to the file - Set fileAuthors = gitService.getAuthors(gitRepo, - gitPath.toString()); + // Where to store the new file + Path bundleFilePath = bundlePath.resolve(file.getName()); + Path gitFolder = Paths.get(gitDetails.getPath()); + String relativePath = gitFolder.resolve(file.getName()).toString(); + Path gitPath = bundlePath.getRoot().resolve(relativePath); // would start with / + + // Get direct URL permalink + URI rawURI = new URI("https://w3id.org/cwl/view/git/" + workflow.getLastCommit() + + gitPath + "?format=raw"); + + // Variable to store file contents and aggregation + String fileContent = null; + PathMetadata aggregation; + + // Download or externally link if oversized + if (basicFileAttributes.size() <= singleFileSizeLimit) { + // Save file to research object bundle + fileContent = readFileToString(file, Charset.defaultCharset()); + Bundles.setStringValue(bundleFilePath, fileContent); + + // Set retrieved information for this file in the manifest + aggregation = bundle.getManifest().getAggregation(bundleFilePath); + aggregation.setRetrievedFrom(rawURI); + aggregation.setRetrievedBy(appAgent); + aggregation.setRetrievedOn(aggregation.getCreatedOn()); + } else { + logger.info("File " + file.getName() + " is too large to download - " + + FileUtils.byteCountToDisplaySize(basicFileAttributes.size()) + "/" + + FileUtils.byteCountToDisplaySize(singleFileSizeLimit) + + ", linking externally to RO bundle"); + + // Set information for this file in the manifest + aggregation = bundle.getManifest().getAggregation(rawURI); + Proxy bundledAs = new Proxy(); + bundledAs.setURI(); + bundledAs.setFolder(repoPath); + aggregation.setBundledAs(bundledAs); + } + // Special handling for cwl files + boolean cwl = FilenameUtils.getExtension(file.getName()).equals("cwl"); if (cwl) { - // Attempt to get authors from cwl description - takes priority - ResultSet descAuthors = rdfService.getAuthors(bundlePath - .resolve(file.getName()).toString().substring(10), - workflow.getIdentifier()); - if (descAuthors.hasNext()) { - QuerySolution authorSoln = descAuthors.nextSolution(); - HashableAgent newAuthor = new HashableAgent(); - if (authorSoln.contains("name")) { - newAuthor.setName(authorSoln.get("name").toString()); - } - if (authorSoln.contains("email")) { - newAuthor.setUri(new URI(authorSoln.get("email").toString())); - } - if (authorSoln.contains("orcid")) { - newAuthor.setOrcid(new URI(authorSoln.get("orcid").toString())); + // Correct mime type (no official standard for yaml) + aggregation.setMediatype("text/x-yaml"); + + // Add conformsTo for version extracted from regex + if (fileContent != null) { + Matcher m = cwlVersionPattern.matcher(fileContent); + if (m.find()) { + aggregation.setConformsTo(new URI("https://w3id.org/cwl/" + m.group(1))); } - fileAuthors.remove(newAuthor); - fileAuthors.add(newAuthor); } } - authors.addAll(fileAuthors); - aggregation.setAuthoredBy(new ArrayList<>(fileAuthors)); - } catch (GitAPIException ex) { - logger.error("Could not get commits for file " + repoPath, ex); - } + try { + // Add authors from git commits to the file + Set fileAuthors = gitService.getAuthors(gitRepo, + gitPath.toString()); + + if (cwl) { + // Attempt to get authors from cwl description - takes priority + ResultSet descAuthors = rdfService.getAuthors(bundlePath + .resolve(file.getName()).toString().substring(10), + workflow.getIdentifier()); + if (descAuthors.hasNext()) { + QuerySolution authorSolution = descAuthors.nextSolution(); + HashableAgent newAuthor = new HashableAgent(); + if (authorSolution.contains("name")) { + newAuthor.setName(authorSolution.get("name").toString()); + } + if (authorSolution.contains("email")) { + newAuthor.setUri(new URI(authorSolution.get("email").toString())); + } + if (authorSolution.contains("orcid")) { + newAuthor.setOrcid(new URI(authorSolution.get("orcid").toString())); + } + fileAuthors.remove(newAuthor); + fileAuthors.add(newAuthor); + } + } - // Set retrieved information for this file in the manifest - aggregation.setRetrievedFrom(rawURI); - aggregation.setRetrievedBy(appAgent); - aggregation.setRetrievedOn(aggregation.getCreatedOn()); + authors.addAll(fileAuthors); + aggregation.setAuthoredBy(new ArrayList<>(fileAuthors)); + } catch (GitAPIException ex) { + logger.error("Could not get commits for file " + repoPath, ex); + } - } catch (URISyntaxException ex) { - logger.error("Error creating URI for RO Bundle", ex); + // Set retrieved information for this file in the manifest + aggregation.setRetrievedFrom(rawURI); + aggregation.setRetrievedBy(appAgent); + aggregation.setRetrievedOn(aggregation.getCreatedOn()); + + } catch (URISyntaxException ex) { + logger.error("Error creating URI for RO Bundle", ex); + } } } } diff --git a/src/main/java/org/commonwl/view/util/FileUtils.java b/src/main/java/org/commonwl/view/util/FileUtils.java new file mode 100644 index 00000000..816860a5 --- /dev/null +++ b/src/main/java/org/commonwl/view/util/FileUtils.java @@ -0,0 +1,128 @@ +package org.commonwl.view.util; + +import org.apache.taverna.robundle.Bundle; +import org.apache.taverna.robundle.fs.BundleFileSystem; +import org.eclipse.jgit.api.Git; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Objects; + +/** + * File utilities for CWL Viewer. + * + *

Uses other utilities, such as Apache Commons IO's {@code FileUtils}, but + * with refinements specific for CWL Viewer (e.g. handling Git repositories).

+ * + * @since 1.4.5 + */ +public class FileUtils { + + private FileUtils() {} + + /** + * Deletes the directory of a git repository. Note that the Git object + * contains a repository with a directory, but this directory points to the
.git
+ * directory. This method will delete the parent of the
.git
directory, + * which corresponds to the cloned folder with the source code from git. + * + * @param repo Git repository object + * @throws IOException if it fails to delete the Git repository directory + * @since 1.4.5 + */ + public static void deleteGitRepository(Git repo) throws IOException { + if ( + repo != null && + repo.getRepository() != null && + repo.getRepository().getDirectory() != null && + repo.getRepository().getDirectory().exists() + ) { + // This is literally the git directory, i.e. /some/hierarchy/repository/.git, + // but we want to delete its parent directory. + File gitDirectory = repo.getRepository().getDirectory(); + org.apache.commons.io.FileUtils.forceDelete(gitDirectory.getParentFile()); + } + } + + /** + * Deletes the bundle temporary directory. + * + *

The Bundle object is an Apache Taverna object. Be careful + * as it acts as a virtual file system, with a root directory (normally "/"), + * but that's an abstract representation of the bundle files.

+ * + *

This method looks inside the bundle for the bundle source directory. + * This directory contains a hash in its name structure. Apache Taverna creates + * temporary directories in

java.io.tmpdir
using this same + * hash. This method will delete any directory matching that hash in the + * temporary directory.

+ * + *

NOTE: the directory deleted is expected to be empty. It is a left over + * by Apache Taverna, and failing to delete it shouldn't be a major issue + * (other than using an inode to represent the file in the file system). And + * also note that we are not deleting the parent directory, since the + * left-over directory is always stored in the system temporary directory, + * while the ROBundle with the hash is stored in the Spring configured + * directory for bundles.

+ * + * @param bundle A bundle object + * @throws IOException if it fails to delete the bundle temporary directory + * @since 1.4.5 + */ + public static void deleteBundleTemporaryDirectory(Bundle bundle) throws IOException { + // The RoBundleService#saveToFile call will delegate to Apache Taverna's + // Bundles.closeAndSaveBundle, which empties the bundle temporary + // directory without deleting the directory itself. The following call is + // just for cleaning it up. + if (bundle != null) { + BundleFileSystem fs = (BundleFileSystem) bundle.getFileSystem(); + File tmpDir = new File(System.getProperty("java.io.tmpdir")); + // The file system source will be something like /tmp/bundles_dir/hash/bundle.zip, + // and we want /tmp/hash to be deleted. N.B. Apache Taverna uses the temporary directory + // for the temporary bundles' directory, not the bundles file specified by Spring. + Path parent = fs.getSource().toAbsolutePath().getParent().getFileName(); + if (parent != null) { + String bundleTmpDirName = parent.toString(); + File bundleTmpDir = new File(tmpDir, bundleTmpDirName); + if (bundleTmpDir.exists() && bundleTmpDir.isDirectory()) { + File[] files = bundleTmpDir.listFiles(); + // We expect the file to be empty, else we better avoid deleting it. + if (files != null && files.length == 0) { + org.apache.commons.io.FileUtils.forceDelete(bundleTmpDir); + } + } + } + } + } + + /** + * Used to delete the directory with a robundle ZIP file. It must be used + * when the system fails to generate a bundle file. Apache Taverna's API + * may have already created the Bundle ZIP. + * + *

Normally this file is stored in a newly created directory (e.g. + *

/tmp/bundles/bundle-hash-1234/robundle.zip
), and when we fail + * to successfully create the bundle, it is best to remove this file to + * prevent disk space issues (especially if we end up with duplicated + * bundles).

+ * + * @param bundle A bundle object + * @throws IOException if it fails to delete the bundle temporary directory + * @since 1.4.5 + */ + public static void deleteBundleParentDirectory(Bundle bundle) throws IOException { + if (bundle != null) { + Path bundleSource = bundle.getSource(); + File parent = bundleSource.getParent().toFile(); + if (parent.exists() && parent.isDirectory()) { + File[] files = parent.listFiles(); + // We expect the directory to be empty or contain just one file (the ZIP bundle, + // since the bundle files are stored temporarily in another temporary directory). + if (files != null && files.length <= 1) { + org.apache.commons.io.FileUtils.forceDelete(parent); + } + } + } + } +} diff --git a/src/main/java/org/commonwl/view/workflow/WorkflowService.java b/src/main/java/org/commonwl/view/workflow/WorkflowService.java index fc61b549..fed87827 100644 --- a/src/main/java/org/commonwl/view/workflow/WorkflowService.java +++ b/src/main/java/org/commonwl/view/workflow/WorkflowService.java @@ -19,18 +19,6 @@ package org.commonwl.view.workflow; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.List; -import java.util.Objects; -import java.util.Optional; - import org.commonwl.view.cwl.CWLService; import org.commonwl.view.cwl.CWLToolRunner; import org.commonwl.view.cwl.CWLToolStatus; @@ -40,6 +28,7 @@ import org.commonwl.view.graphviz.GraphVizService; import org.commonwl.view.researchobject.ROBundleFactory; import org.commonwl.view.researchobject.ROBundleNotFoundException; +import org.commonwl.view.util.FileUtils; import org.eclipse.jgit.api.Git; import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.api.errors.RefNotFoundException; @@ -52,6 +41,18 @@ import org.springframework.data.domain.Pageable; import org.springframework.stereotype.Service; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + @Service public class WorkflowService { @@ -295,9 +296,9 @@ public QueuedWorkflow createQueuedWorkflow(GitDetails gitInfo) throws GitAPIException, WorkflowNotFoundException, IOException { QueuedWorkflow queuedWorkflow; + Git repo = null; try { boolean safeToAccess = gitSemaphore.acquire(gitInfo.getRepoUrl()); - Git repo = null; while (repo == null) { try { repo = gitService.getRepository(gitInfo, safeToAccess); @@ -366,6 +367,10 @@ public QueuedWorkflow createQueuedWorkflow(GitDetails gitInfo) logger.error("Could not update workflow with cwltool", e); } + } catch (GitAPIException | RuntimeException | IOException e) { + logger.warn(String.format("Failed to create Queued Workflow: %s - Temporary files will be deleted", e.getMessage()), e); + FileUtils.deleteGitRepository(repo); + throw e; } finally { gitSemaphore.release(gitInfo.getRepoUrl()); } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index f47d9986..17f92699 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -75,6 +75,12 @@ cron.deleteOldQueuedWorkflows = 0 0 * * * ? # Age limit for queued workflows in hours. queuedWorkflowAgeLimitHours = 24 +# The expression below implies every day at the 0th second, 0th minute and 24th(0th) hour i.e (time 00:00:00, every day) +cron.clearTmpDir = 0 0 0 * * ? + +# Age limit for tmp directories in days. +tmpDirAgeLimitDays = 1 + #======================= # DB migrations #======================= diff --git a/src/test/java/org/commonwl/view/util/FileUtilsTest.java b/src/test/java/org/commonwl/view/util/FileUtilsTest.java new file mode 100644 index 00000000..5a7939f8 --- /dev/null +++ b/src/test/java/org/commonwl/view/util/FileUtilsTest.java @@ -0,0 +1,244 @@ +package org.commonwl.view.util; + +import org.apache.taverna.robundle.Bundle; +import org.apache.taverna.robundle.fs.BundleFileSystem; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.lib.Repository; +import org.junit.Rule; +import org.junit.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.NullSource; +import org.junit.rules.TemporaryFolder; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; +import org.mockito.stubbing.Answer; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + +/** + * Tests for FileUtils. + * + * @since 1.4.5 + */ +public class FileUtilsTest { + + @Rule + public MockitoRule initRule = MockitoJUnit.rule(); + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + // --- git files + + @ParameterizedTest + @NullSource + public void testNullRepository(Git repository) throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + FileUtils.deleteGitRepository(repository); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testRepositoryNullRepository() throws IOException { + Git repository = mock(Git.class); + when(repository.getRepository()).thenReturn(null); + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + FileUtils.deleteGitRepository(repository); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testRepositoryRepositoryNullDirectory() throws IOException { + Git repository = mock(Git.class); + Repository repository1 = mock(Repository.class); + when(repository.getRepository()).thenReturn(repository1); + when(repository1.getDirectory()).thenReturn(null); + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + FileUtils.deleteGitRepository(repository); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testRepositoryRepositoryDirectoryDoesNotExist() throws IOException { + Git repository = mock(Git.class); + Repository repository1 = mock(Repository.class); + when(repository.getRepository()).thenReturn(repository1); + File doesNotExist = new File("/tmp/tmp/tmp/tmp/1/2/3/4/5"); + when(repository1.getDirectory()).thenReturn(doesNotExist); + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + FileUtils.deleteGitRepository(repository); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testRepositoryRepositoryDirectory() throws IOException { + Git repository = mock(Git.class); + Repository repository1 = mock(Repository.class); + when(repository.getRepository()).thenReturn(repository1); + File gitRepository = temporaryFolder.newFile(".git"); + when(repository1.getDirectory()).thenReturn(gitRepository); + assertTrue(gitRepository.exists()); + FileUtils.deleteGitRepository(repository); + assertFalse(gitRepository.exists()); + } + + // --- bundle temporary files + + @ParameterizedTest + @NullSource + public void testBundleTemporaryDirectoryNullBundle(Bundle bundle) throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + FileUtils.deleteBundleTemporaryDirectory(bundle); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testBundleTemporaryDirectoryNoParentFileName() throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + Bundle bundle = mock(Bundle.class); + BundleFileSystem fileSystem = mock(BundleFileSystem.class); + when(bundle.getFileSystem()).thenReturn(fileSystem); + when(fileSystem.getSource()).thenReturn(Path.of("/does-not-exist-12345")); + FileUtils.deleteBundleTemporaryDirectory(bundle); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testBundleTemporaryDirectoryDoesNotExist() throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + Bundle bundle = mock(Bundle.class); + BundleFileSystem fileSystem = mock(BundleFileSystem.class); + when(bundle.getFileSystem()).thenReturn(fileSystem); + when(fileSystem.getSource()).thenReturn(Path.of("/does/not/exist/1/2/3/does-not-exist-12345")); + FileUtils.deleteBundleTemporaryDirectory(bundle); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testBundleTemporaryDirectoryNotEmpty() throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + Bundle bundle = mock(Bundle.class); + BundleFileSystem fileSystem = mock(BundleFileSystem.class); + when(bundle.getFileSystem()).thenReturn(fileSystem); + final String hash = "bundle.zip"; + File bundleTemporaryDirectory = temporaryFolder.newFile(hash); + when(fileSystem.getSource()).thenReturn(bundleTemporaryDirectory.toPath()); + FileUtils.deleteBundleTemporaryDirectory(bundle); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testBundleTemporaryDirectory() throws IOException { + Bundle bundle = mock(Bundle.class); + BundleFileSystem fileSystem = mock(BundleFileSystem.class); + when(bundle.getFileSystem()).thenReturn(fileSystem); + final String hash = "bundle.zip"; + File bundleTemporaryDirectory = temporaryFolder.newFile(hash); + // We want an empty temporary directory, so we can delete the bundle.zip file + // (in real-life it will have been moved from the temporary location to the + // bundles' directory). + org.apache.commons.io.FileUtils.forceDelete(bundleTemporaryDirectory); + when(fileSystem.getSource()).thenReturn(bundleTemporaryDirectory.toPath()); + assertTrue(bundleTemporaryDirectory.getParentFile().exists()); + FileUtils.deleteBundleTemporaryDirectory(bundle); + assertFalse(bundleTemporaryDirectory.exists()); + } + + // --- bundle parent directory + + @ParameterizedTest + @NullSource + public void testBundleParentDirectoryNullBundle(Bundle bundle) throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + FileUtils.deleteBundleParentDirectory(bundle); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testBundleParentDirectoryDoesNotExist() throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + Bundle bundle = mock(Bundle.class); + when(bundle.getSource()).thenReturn(Path.of("/does/not/exist/1/2/3/does-not-exist-12345")); + FileUtils.deleteBundleParentDirectory(bundle); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testBundleParentDirectoryWithTooManyFiles() throws IOException { + try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) { + fileUtilsMocked.when( + () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any()) + ).thenAnswer((Answer) invocation -> null); + Bundle bundle = mock(Bundle.class); + final String hash = "bundle.zip"; + File bundleTemporaryDirectory = temporaryFolder.newFile(hash); + for (int i = 0; i < 3; i++) { + temporaryFolder.newFile("file-" + i); + } + when(bundle.getSource()).thenReturn(bundleTemporaryDirectory.toPath()); + FileUtils.deleteBundleParentDirectory(bundle); + fileUtilsMocked.verifyNoInteractions(); + } + } + + @Test + public void testBundleParentDirectory() throws IOException { + Bundle bundle = mock(Bundle.class); + final String hash = "bundle.zip"; + File bundleTemporaryDirectory = temporaryFolder.newFile(hash); + when(bundle.getSource()).thenReturn(bundleTemporaryDirectory.toPath()); + assertTrue(bundleTemporaryDirectory.getParentFile().exists()); + FileUtils.deleteBundleParentDirectory(bundle); + assertFalse(bundleTemporaryDirectory.exists()); + } +} diff --git a/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java b/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java index 7969b87f..4812315c 100644 --- a/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java +++ b/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java @@ -21,10 +21,15 @@ import org.commonwl.view.cwl.CWLService; import org.commonwl.view.git.GitDetails; +import org.commonwl.view.git.GitSemaphore; +import org.commonwl.view.git.GitService; import org.commonwl.view.graphviz.GraphVizService; import org.commonwl.view.researchobject.ROBundleNotFoundException; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.api.errors.TransportException; import org.eclipse.jgit.api.errors.WrongRepositoryStateException; +import org.eclipse.jgit.lib.Repository; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.mockito.Mockito; @@ -47,8 +52,12 @@ import java.util.List; import static org.hamcrest.core.Is.is; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; @@ -191,6 +200,39 @@ public void newWorkflowFromGithubURL() throws Exception { } + @Test + public void errorCreatingQueuedWorkflowDeletesTemporaryFiles() throws GitAPIException, IOException { + GitService gitService = Mockito.mock(GitService.class); + WorkflowService service = new WorkflowService( + gitService, + null, + null, + null, + null, + null, + null, + new GitSemaphore(), + 1 + ); + + final GitDetails gitDetails = new GitDetails("https://github.com/common-workflow-language/", "main", "/"); + Git git = mock(Git.class); + when(gitService.getRepository(Mockito.any(GitDetails.class), Mockito.anyBoolean())).thenReturn(git); + + Repository repository = Mockito.mock(Repository.class); + File temporaryFile = roBundleFolder.resolve("repository/.git").toFile(); + assertTrue(temporaryFile.mkdirs()); + when(repository.getDirectory()).thenReturn(temporaryFile); + when(git.getRepository()) + .thenThrow(RuntimeException.class) + .thenReturn(repository); + assertTrue(temporaryFile.exists()); + assertThrows(RuntimeException.class, () -> { + service.createQueuedWorkflow(gitDetails); + }); + assertFalse(temporaryFile.exists()); + } + /** * Displaying workflows */