diff --git a/pom.xml b/pom.xml
index e09837a5..f563f8d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -145,6 +145,12 @@
commons-compress
1.22
+
+
+ commons-io
+ commons-io
+ 2.11.0
+
org.springframework.boot
@@ -168,10 +174,11 @@
1.17.6
test
+
org.mockito
- mockito-all
- 1.10.19
+ mockito-inline
+ 4.7.0
test
diff --git a/src/main/java/org/commonwl/view/Scheduler.java b/src/main/java/org/commonwl/view/Scheduler.java
index b8a73c7c..6f86a26b 100644
--- a/src/main/java/org/commonwl/view/Scheduler.java
+++ b/src/main/java/org/commonwl/view/Scheduler.java
@@ -1,6 +1,9 @@
package org.commonwl.view;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.file.AccumulatorPathVisitor;
+import org.apache.commons.io.filefilter.AgeFileFilter;
import org.commonwl.view.workflow.QueuedWorkflowRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -9,8 +12,18 @@
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.time.Instant;
import java.util.Calendar;
+import java.util.Collections;
import java.util.Date;
+import java.util.List;
+import java.util.stream.Stream;
/**
* Scheduler class for recurrent processes.
@@ -24,6 +37,17 @@ public class Scheduler {
@Value("${queuedWorkflowAgeLimitHours}")
private Integer QUEUED_WORKFLOW_AGE_LIMIT_HOURS;
+ @Value("${tmpDirAgeLimitDays}")
+ private Integer TMP_DIR_AGE_LIMIT_DAYS;
+
+ // We do not want to remove the bundles, as we use the disk as a sort of
+ // cache. Whenever a workflow page is displayed in the browser the UI
+ // fires a request to re-generate it. We skip that by keeping files on disk.
+ @Value("${graphvizStorage}")
+ private String graphvizStorage;
+ @Value("${gitStorage}")
+ private String gitStorage;
+
@Autowired
public Scheduler(QueuedWorkflowRepository queuedWorkflowRepository) {
this.queuedWorkflowRepository = queuedWorkflowRepository;
@@ -55,4 +79,59 @@ public void removeOldQueuedWorkflowEntries() {
logger.info(queuedWorkflowRepository.deleteByTempRepresentation_RetrievedOnLessThanEqual(removeTime)
+ " Old queued workflows removed");
}
+
+ /**
+ * Scheduled function to delete old temporary directories.
+ *
+ *
Will scan each temporary directory (graphviz, RO, git), searching
+ * for files exceeding a specified threshold.
+ *
+ * It scans the first level directories, i.e. it does not recursively
+ * scans directories. So it will delete any RO or Git repository directories
+ * that exceed the threshold. Similarly, it will delete any graph (svg, png,
+ * etc) that also exceed it.
+ *
+ * Errors logged through Logger. Settings in Spring application properties
+ * file.
+ *
+ * @since 1.4.5
+ */
+ @Scheduled(cron = "${cron.clearTmpDir}")
+ public void clearTmpDir() {
+ // Temporary files used for graphviz, RO, and git may be stored in different
+ // locations, so we will collect all of them here.
+ List temporaryDirectories = Stream.of(graphvizStorage, gitStorage)
+ .distinct()
+ .toList();
+ temporaryDirectories.forEach(this::clearDirectory);
+ }
+
+ /**
+ * For a given temporary directory, scans it (not recursively) for files and
+ * directories exceeding the age limit threshold.
+ *
+ * @since 1.4.5
+ * @see https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/AgeFileFilter.html
+ * @param temporaryDirectory temporary directory
+ */
+ private void clearDirectory(String temporaryDirectory) {
+ final Instant cutoff = Instant.now().minus(Duration.ofDays(TMP_DIR_AGE_LIMIT_DAYS));
+
+ File temporaryDirectoryFile = new File(temporaryDirectory);
+ String[] files = temporaryDirectoryFile.list(new AgeFileFilter(Date.from(cutoff)));
+
+ if (files != null && files.length > 0) {
+ for (String fileName : files) {
+ File fileToDelete = new File(temporaryDirectoryFile, fileName);
+ try {
+ FileUtils.forceDelete(fileToDelete);
+ } catch (IOException e) {
+ // Here we probably have a more serious case. Since the Git repository, RO directory, or graphs are
+ // not expected to be in use, and the application must have access, I/O errors are not expected and
+ // must be treated as errors.
+ logger.error(String.format("Failed to delete old temporary file or directory [%s]: %s", fileToDelete.getAbsolutePath(), e.getMessage()), e);
+ }
+ }
+ }
+ }
}
diff --git a/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java b/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java
index 87884cc2..121d8f11 100644
--- a/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java
+++ b/src/main/java/org/commonwl/view/cwl/CWLToolRunner.java
@@ -24,6 +24,7 @@
import org.commonwl.view.git.GitSemaphore;
import org.commonwl.view.git.GitService;
import org.commonwl.view.researchobject.ROBundleFactory;
+import org.commonwl.view.util.FileUtils;
import org.commonwl.view.workflow.QueuedWorkflow;
import org.commonwl.view.workflow.QueuedWorkflowRepository;
import org.commonwl.view.workflow.Workflow;
@@ -78,9 +79,10 @@ public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow)
GitDetails gitInfo = tempWorkflow.getRetrievedFrom();
final String repoUrl = gitInfo.getRepoUrl();
// Parse using cwltool and replace in database
+ Git repo = null;
try {
boolean safeToAccess = gitSemaphore.acquire(repoUrl);
- Git repo = gitService.getRepository(gitInfo, safeToAccess);
+ repo = gitService.getRepository(gitInfo, safeToAccess);
Path localPath = repo.getRepository().getWorkTree().toPath();
Path workflowFile = localPath.resolve(gitInfo.getPath()).normalize().toAbsolutePath();
Workflow newWorkflow = cwlService.parseWorkflowWithCwltool(
@@ -106,16 +108,19 @@ public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow)
logger.error("Jena query exception ", ex);
queuedWorkflow.setCwltoolStatus(CWLToolStatus.ERROR);
queuedWorkflow.setMessage("An error occurred when executing a query on the SPARQL store");
+ FileUtils.deleteGitRepository(repo);
} catch (CWLValidationException ex) {
logger.error(ex.getMessage(), ex);
queuedWorkflow.setCwltoolStatus(CWLToolStatus.ERROR);
queuedWorkflow.setMessage(ex.getMessage());
+ FileUtils.deleteGitRepository(repo);
} catch (Exception ex) {
logger.error("Unexpected error", ex);
queuedWorkflow.setCwltoolStatus(CWLToolStatus.ERROR);
queuedWorkflow.setMessage("Whoops! Cwltool ran successfully, but an unexpected " +
"error occurred in CWLViewer!\n" +
"Help us by reporting it on Gitter or a Github issue\n");
+ FileUtils.deleteGitRepository(repo);
} finally {
gitSemaphore.release(repoUrl);
queuedWorkflowRepository.save(queuedWorkflow);
diff --git a/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java b/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java
index fa310f5f..afc0c0a0 100644
--- a/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java
+++ b/src/main/java/org/commonwl/view/researchobject/ROBundleFactory.java
@@ -21,7 +21,9 @@
import org.apache.commons.io.FilenameUtils;
import org.apache.taverna.robundle.Bundle;
+import org.apache.taverna.robundle.fs.BundleFileSystem;
import org.commonwl.view.git.GitDetails;
+import org.commonwl.view.util.FileUtils;
import org.commonwl.view.workflow.Workflow;
import org.commonwl.view.workflow.WorkflowRepository;
import org.slf4j.Logger;
@@ -31,7 +33,9 @@
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.stereotype.Component;
+import java.io.File;
import java.io.IOException;
+import java.nio.file.FileSystem;
import java.nio.file.Path;
/**
@@ -77,6 +81,11 @@ public void createWorkflowRO(Workflow workflow)
// Save the bundle to the storage location in properties
Path bundleLocation = roBundleService.saveToFile(bundle);
+ try {
+ FileUtils.deleteBundleTemporaryDirectory(bundle);
+ } catch (IOException e) {
+ logger.warn(String.format("Failed to delete temporary directory for bundle [%s]: %s", bundle.getSource(), e.getMessage()), e);
+ }
// Add RO Bundle to associated workflow model
workflow.setRoBundlePath(bundleLocation.toString());
diff --git a/src/main/java/org/commonwl/view/researchobject/ROBundleService.java b/src/main/java/org/commonwl/view/researchobject/ROBundleService.java
index 427b230a..2b375614 100644
--- a/src/main/java/org/commonwl/view/researchobject/ROBundleService.java
+++ b/src/main/java/org/commonwl/view/researchobject/ROBundleService.java
@@ -19,23 +19,6 @@
package org.commonwl.view.researchobject;
-import static org.apache.commons.io.FileUtils.readFileToString;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.jena.query.QuerySolution;
@@ -64,6 +47,25 @@
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.apache.commons.io.FileUtils.readFileToString;
+
/**
* Service handling Research Object Bundles
*/
@@ -73,16 +75,16 @@ public class ROBundleService {
private final Logger logger = LoggerFactory.getLogger(this.getClass());
// Services
- private GraphVizService graphVizService;
- private GitService gitService;
- private RDFService rdfService;
- private CWLTool cwlTool;
- private GitSemaphore gitSemaphore;
+ private final GraphVizService graphVizService;
+ private final GitService gitService;
+ private final RDFService rdfService;
+ private final CWLTool cwlTool;
+ private final GitSemaphore gitSemaphore;
// Configuration variables
- private Agent appAgent;
- private int singleFileSizeLimit;
- private Path bundleStorage;
+ private final Agent appAgent;
+ private final int singleFileSizeLimit;
+ private final Path bundleStorage;
// Pattern for extracting version from a cwl file
private final String CWL_VERSION_REGEX = "cwlVersion:\\s*\"?(?:cwl:)?([^\\s\"]+)\"?";
@@ -150,13 +152,19 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep
Set authors = new HashSet<>();
boolean safeToAccess = gitSemaphore.acquire(gitInfo.getRepoUrl());
+ Git gitRepo = null;
try {
- Git gitRepo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess);
+ gitRepo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess);
Path relativePath = Paths.get(FilenameUtils.getPath(gitInfo.getPath()));
Path gitPath = gitRepo.getRepository().getWorkTree().toPath().resolve(relativePath);
addFilesToBundle(gitInfo, bundle, bundlePath, gitRepo, gitPath, authors, workflow);
+ } catch (GitAPIException | IOException e) {
+ org.commonwl.view.util.FileUtils.deleteBundleTemporaryDirectory(bundle);
+ org.commonwl.view.util.FileUtils.deleteBundleParentDirectory(bundle);
+ throw e;
} finally {
gitSemaphore.release(gitInfo.getRepoUrl());
+ org.commonwl.view.util.FileUtils.deleteGitRepository(gitRepo);
}
// Add combined authors
@@ -195,7 +203,7 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep
addAggregation(bundle, manifestAnnotations,
"merged.cwl", cwlTool.getPackedVersion(rawUrl));
} catch (CWLValidationException ex) {
- logger.error("Could not pack workflow when creating Research Object", ex.getMessage());
+ logger.error(String.format("Could not pack workflow when creating Research Object: %s", ex.getMessage()), ex);
}
String rdfUrl = workflow.getIdentifier();
if (rdfService.graphExists(rdfUrl)) {
@@ -215,9 +223,15 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep
} catch (URISyntaxException ex) {
logger.error("Error creating URI for RO Bundle", ex);
+ org.commonwl.view.util.FileUtils.deleteBundleTemporaryDirectory(bundle);
+ org.commonwl.view.util.FileUtils.deleteBundleParentDirectory(bundle);
} catch (GitAPIException ex) {
logger.error("Error getting repository to create RO Bundle", ex);
+ org.commonwl.view.util.FileUtils.deleteBundleTemporaryDirectory(bundle);
+ org.commonwl.view.util.FileUtils.deleteBundleParentDirectory(bundle);
}
+ // TODO: Why are we not re-throwing the exceptions above? It looks like we should
+ // so users/admins are aware of the problem.
// Return the completed bundle
return bundle;
@@ -238,118 +252,121 @@ private void addFilesToBundle(GitDetails gitDetails, Bundle bundle, Path bundleP
Workflow workflow)
throws IOException {
File[] files = repoPath.toFile().listFiles();
- for (File file : files) {
- if (!file.getName().equals(".git")) {
- if (file.isDirectory()) {
-
- // Create a new folder in the RO for this directory
- Path newBundlePath = bundlePath.resolve(file.getName());
- Files.createDirectory(newBundlePath);
-
- // Create git details object for subfolder
- GitDetails subfolderGitDetails = new GitDetails(gitDetails.getRepoUrl(), gitDetails.getBranch(),
- Paths.get(gitDetails.getPath()).resolve(file.getName()).toString());
-
- // Add all files in the subdirectory to this new folder
- addFilesToBundle(subfolderGitDetails, bundle, newBundlePath, gitRepo,
- repoPath.resolve(file.getName()), authors, workflow);
-
- } else {
- try {
- // Where to store the new file
- Path bundleFilePath = bundlePath.resolve(file.getName());
- Path gitFolder = Paths.get(gitDetails.getPath());
- String relativePath = gitFolder.resolve(file.getName()).toString();
- Path gitPath = bundlePath.getRoot().resolve(relativePath); // would start with /
-
- // Get direct URL permalink
- URI rawURI = new URI("https://w3id.org/cwl/view/git/" + workflow.getLastCommit() +
- gitPath + "?format=raw");
-
- // Variable to store file contents and aggregation
- String fileContent = null;
- PathMetadata aggregation;
-
- // Download or externally link if oversized
- if (file.length() <= singleFileSizeLimit) {
- // Save file to research object bundle
- fileContent = readFileToString(file);
- Bundles.setStringValue(bundleFilePath, fileContent);
+ if (files != null) {
+ for (File file : files) {
+ if (!file.getName().equals(".git")) {
+ BasicFileAttributes basicFileAttributes = Files.readAttributes(file.toPath(), BasicFileAttributes.class);
+ if (basicFileAttributes.isDirectory()) {
- // Set retrieved information for this file in the manifest
- aggregation = bundle.getManifest().getAggregation(bundleFilePath);
- aggregation.setRetrievedFrom(rawURI);
- aggregation.setRetrievedBy(appAgent);
- aggregation.setRetrievedOn(aggregation.getCreatedOn());
- } else {
- logger.info("File " + file.getName() + " is too large to download - " +
- FileUtils.byteCountToDisplaySize(file.length()) + "/" +
- FileUtils.byteCountToDisplaySize(singleFileSizeLimit) +
- ", linking externally to RO bundle");
-
- // Set information for this file in the manifest
- aggregation = bundle.getManifest().getAggregation(rawURI);
- Proxy bundledAs = new Proxy();
- bundledAs.setURI();
- bundledAs.setFolder(repoPath);
- aggregation.setBundledAs(bundledAs);
- }
+ // Create a new folder in the RO for this directory
+ Path newBundlePath = bundlePath.resolve(file.getName());
+ Files.createDirectory(newBundlePath);
- // Special handling for cwl files
- boolean cwl = FilenameUtils.getExtension(file.getName()).equals("cwl");
- if (cwl) {
- // Correct mime type (no official standard for yaml)
- aggregation.setMediatype("text/x-yaml");
-
- // Add conformsTo for version extracted from regex
- if (fileContent != null) {
- Matcher m = cwlVersionPattern.matcher(fileContent);
- if (m.find()) {
- aggregation.setConformsTo(new URI("https://w3id.org/cwl/" + m.group(1)));
- }
- }
- }
+ // Create git details object for subfolder
+ GitDetails subfolderGitDetails = new GitDetails(gitDetails.getRepoUrl(), gitDetails.getBranch(),
+ Paths.get(gitDetails.getPath()).resolve(file.getName()).toString());
+ // Add all files in the subdirectory to this new folder
+ addFilesToBundle(subfolderGitDetails, bundle, newBundlePath, gitRepo,
+ repoPath.resolve(file.getName()), authors, workflow);
+
+ } else {
try {
- // Add authors from git commits to the file
- Set fileAuthors = gitService.getAuthors(gitRepo,
- gitPath.toString());
+ // Where to store the new file
+ Path bundleFilePath = bundlePath.resolve(file.getName());
+ Path gitFolder = Paths.get(gitDetails.getPath());
+ String relativePath = gitFolder.resolve(file.getName()).toString();
+ Path gitPath = bundlePath.getRoot().resolve(relativePath); // would start with /
+
+ // Get direct URL permalink
+ URI rawURI = new URI("https://w3id.org/cwl/view/git/" + workflow.getLastCommit() +
+ gitPath + "?format=raw");
+
+ // Variable to store file contents and aggregation
+ String fileContent = null;
+ PathMetadata aggregation;
+
+ // Download or externally link if oversized
+ if (basicFileAttributes.size() <= singleFileSizeLimit) {
+ // Save file to research object bundle
+ fileContent = readFileToString(file, Charset.defaultCharset());
+ Bundles.setStringValue(bundleFilePath, fileContent);
+
+ // Set retrieved information for this file in the manifest
+ aggregation = bundle.getManifest().getAggregation(bundleFilePath);
+ aggregation.setRetrievedFrom(rawURI);
+ aggregation.setRetrievedBy(appAgent);
+ aggregation.setRetrievedOn(aggregation.getCreatedOn());
+ } else {
+ logger.info("File " + file.getName() + " is too large to download - " +
+ FileUtils.byteCountToDisplaySize(basicFileAttributes.size()) + "/" +
+ FileUtils.byteCountToDisplaySize(singleFileSizeLimit) +
+ ", linking externally to RO bundle");
+
+ // Set information for this file in the manifest
+ aggregation = bundle.getManifest().getAggregation(rawURI);
+ Proxy bundledAs = new Proxy();
+ bundledAs.setURI();
+ bundledAs.setFolder(repoPath);
+ aggregation.setBundledAs(bundledAs);
+ }
+ // Special handling for cwl files
+ boolean cwl = FilenameUtils.getExtension(file.getName()).equals("cwl");
if (cwl) {
- // Attempt to get authors from cwl description - takes priority
- ResultSet descAuthors = rdfService.getAuthors(bundlePath
- .resolve(file.getName()).toString().substring(10),
- workflow.getIdentifier());
- if (descAuthors.hasNext()) {
- QuerySolution authorSoln = descAuthors.nextSolution();
- HashableAgent newAuthor = new HashableAgent();
- if (authorSoln.contains("name")) {
- newAuthor.setName(authorSoln.get("name").toString());
- }
- if (authorSoln.contains("email")) {
- newAuthor.setUri(new URI(authorSoln.get("email").toString()));
- }
- if (authorSoln.contains("orcid")) {
- newAuthor.setOrcid(new URI(authorSoln.get("orcid").toString()));
+ // Correct mime type (no official standard for yaml)
+ aggregation.setMediatype("text/x-yaml");
+
+ // Add conformsTo for version extracted from regex
+ if (fileContent != null) {
+ Matcher m = cwlVersionPattern.matcher(fileContent);
+ if (m.find()) {
+ aggregation.setConformsTo(new URI("https://w3id.org/cwl/" + m.group(1)));
}
- fileAuthors.remove(newAuthor);
- fileAuthors.add(newAuthor);
}
}
- authors.addAll(fileAuthors);
- aggregation.setAuthoredBy(new ArrayList<>(fileAuthors));
- } catch (GitAPIException ex) {
- logger.error("Could not get commits for file " + repoPath, ex);
- }
+ try {
+ // Add authors from git commits to the file
+ Set fileAuthors = gitService.getAuthors(gitRepo,
+ gitPath.toString());
+
+ if (cwl) {
+ // Attempt to get authors from cwl description - takes priority
+ ResultSet descAuthors = rdfService.getAuthors(bundlePath
+ .resolve(file.getName()).toString().substring(10),
+ workflow.getIdentifier());
+ if (descAuthors.hasNext()) {
+ QuerySolution authorSolution = descAuthors.nextSolution();
+ HashableAgent newAuthor = new HashableAgent();
+ if (authorSolution.contains("name")) {
+ newAuthor.setName(authorSolution.get("name").toString());
+ }
+ if (authorSolution.contains("email")) {
+ newAuthor.setUri(new URI(authorSolution.get("email").toString()));
+ }
+ if (authorSolution.contains("orcid")) {
+ newAuthor.setOrcid(new URI(authorSolution.get("orcid").toString()));
+ }
+ fileAuthors.remove(newAuthor);
+ fileAuthors.add(newAuthor);
+ }
+ }
- // Set retrieved information for this file in the manifest
- aggregation.setRetrievedFrom(rawURI);
- aggregation.setRetrievedBy(appAgent);
- aggregation.setRetrievedOn(aggregation.getCreatedOn());
+ authors.addAll(fileAuthors);
+ aggregation.setAuthoredBy(new ArrayList<>(fileAuthors));
+ } catch (GitAPIException ex) {
+ logger.error("Could not get commits for file " + repoPath, ex);
+ }
- } catch (URISyntaxException ex) {
- logger.error("Error creating URI for RO Bundle", ex);
+ // Set retrieved information for this file in the manifest
+ aggregation.setRetrievedFrom(rawURI);
+ aggregation.setRetrievedBy(appAgent);
+ aggregation.setRetrievedOn(aggregation.getCreatedOn());
+
+ } catch (URISyntaxException ex) {
+ logger.error("Error creating URI for RO Bundle", ex);
+ }
}
}
}
diff --git a/src/main/java/org/commonwl/view/util/FileUtils.java b/src/main/java/org/commonwl/view/util/FileUtils.java
new file mode 100644
index 00000000..816860a5
--- /dev/null
+++ b/src/main/java/org/commonwl/view/util/FileUtils.java
@@ -0,0 +1,128 @@
+package org.commonwl.view.util;
+
+import org.apache.taverna.robundle.Bundle;
+import org.apache.taverna.robundle.fs.BundleFileSystem;
+import org.eclipse.jgit.api.Git;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Objects;
+
+/**
+ * File utilities for CWL Viewer.
+ *
+ * Uses other utilities, such as Apache Commons IO's {@code FileUtils}, but
+ * with refinements specific for CWL Viewer (e.g. handling Git repositories).
+ *
+ * @since 1.4.5
+ */
+public class FileUtils {
+
+ private FileUtils() {}
+
+ /**
+ * Deletes the directory of a git repository. Note that the Git
object
+ * contains a repository with a directory, but this directory points to the .git
+ * directory. This method will delete the parent of the .git
directory,
+ * which corresponds to the cloned folder with the source code from git.
+ *
+ * @param repo Git repository object
+ * @throws IOException if it fails to delete the Git repository directory
+ * @since 1.4.5
+ */
+ public static void deleteGitRepository(Git repo) throws IOException {
+ if (
+ repo != null &&
+ repo.getRepository() != null &&
+ repo.getRepository().getDirectory() != null &&
+ repo.getRepository().getDirectory().exists()
+ ) {
+ // This is literally the git directory, i.e. /some/hierarchy/repository/.git,
+ // but we want to delete its parent directory.
+ File gitDirectory = repo.getRepository().getDirectory();
+ org.apache.commons.io.FileUtils.forceDelete(gitDirectory.getParentFile());
+ }
+ }
+
+ /**
+ * Deletes the bundle temporary directory.
+ *
+ * The Bundle
object is an Apache Taverna object. Be careful
+ * as it acts as a virtual file system, with a root directory (normally "/"),
+ * but that's an abstract representation of the bundle files.
+ *
+ * This method looks inside the bundle for the bundle source directory.
+ * This directory contains a hash in its name structure. Apache Taverna creates
+ * temporary directories in
java.io.tmpdir
using this same
+ * hash. This method will delete any directory matching that hash in the
+ * temporary directory.
+ *
+ * NOTE: the directory deleted is expected to be empty. It is a left over
+ * by Apache Taverna, and failing to delete it shouldn't be a major issue
+ * (other than using an inode to represent the file in the file system). And
+ * also note that we are not deleting the parent directory, since the
+ * left-over directory is always stored in the system temporary directory,
+ * while the ROBundle with the hash is stored in the Spring configured
+ * directory for bundles.
+ *
+ * @param bundle A bundle object
+ * @throws IOException if it fails to delete the bundle temporary directory
+ * @since 1.4.5
+ */
+ public static void deleteBundleTemporaryDirectory(Bundle bundle) throws IOException {
+ // The RoBundleService#saveToFile call will delegate to Apache Taverna's
+ // Bundles.closeAndSaveBundle, which empties the bundle temporary
+ // directory without deleting the directory itself. The following call is
+ // just for cleaning it up.
+ if (bundle != null) {
+ BundleFileSystem fs = (BundleFileSystem) bundle.getFileSystem();
+ File tmpDir = new File(System.getProperty("java.io.tmpdir"));
+ // The file system source will be something like /tmp/bundles_dir/hash/bundle.zip,
+ // and we want /tmp/hash to be deleted. N.B. Apache Taverna uses the temporary directory
+ // for the temporary bundles' directory, not the bundles file specified by Spring.
+ Path parent = fs.getSource().toAbsolutePath().getParent().getFileName();
+ if (parent != null) {
+ String bundleTmpDirName = parent.toString();
+ File bundleTmpDir = new File(tmpDir, bundleTmpDirName);
+ if (bundleTmpDir.exists() && bundleTmpDir.isDirectory()) {
+ File[] files = bundleTmpDir.listFiles();
+ // We expect the file to be empty, else we better avoid deleting it.
+ if (files != null && files.length == 0) {
+ org.apache.commons.io.FileUtils.forceDelete(bundleTmpDir);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Used to delete the directory with a robundle ZIP file. It must be used
+ * when the system fails to generate a bundle file. Apache Taverna's API
+ * may have already created the Bundle ZIP.
+ *
+ * Normally this file is stored in a newly created directory (e.g.
+ *
/tmp/bundles/bundle-hash-1234/robundle.zip
), and when we fail
+ * to successfully create the bundle, it is best to remove this file to
+ * prevent disk space issues (especially if we end up with duplicated
+ * bundles).
+ *
+ * @param bundle A bundle object
+ * @throws IOException if it fails to delete the bundle temporary directory
+ * @since 1.4.5
+ */
+ public static void deleteBundleParentDirectory(Bundle bundle) throws IOException {
+ if (bundle != null) {
+ Path bundleSource = bundle.getSource();
+ File parent = bundleSource.getParent().toFile();
+ if (parent.exists() && parent.isDirectory()) {
+ File[] files = parent.listFiles();
+ // We expect the directory to be empty or contain just one file (the ZIP bundle,
+ // since the bundle files are stored temporarily in another temporary directory).
+ if (files != null && files.length <= 1) {
+ org.apache.commons.io.FileUtils.forceDelete(parent);
+ }
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/commonwl/view/workflow/WorkflowService.java b/src/main/java/org/commonwl/view/workflow/WorkflowService.java
index fc61b549..fed87827 100644
--- a/src/main/java/org/commonwl/view/workflow/WorkflowService.java
+++ b/src/main/java/org/commonwl/view/workflow/WorkflowService.java
@@ -19,18 +19,6 @@
package org.commonwl.view.workflow;
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.List;
-import java.util.Objects;
-import java.util.Optional;
-
import org.commonwl.view.cwl.CWLService;
import org.commonwl.view.cwl.CWLToolRunner;
import org.commonwl.view.cwl.CWLToolStatus;
@@ -40,6 +28,7 @@
import org.commonwl.view.graphviz.GraphVizService;
import org.commonwl.view.researchobject.ROBundleFactory;
import org.commonwl.view.researchobject.ROBundleNotFoundException;
+import org.commonwl.view.util.FileUtils;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.api.errors.RefNotFoundException;
@@ -52,6 +41,18 @@
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Service;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
@Service
public class WorkflowService {
@@ -295,9 +296,9 @@ public QueuedWorkflow createQueuedWorkflow(GitDetails gitInfo)
throws GitAPIException, WorkflowNotFoundException, IOException {
QueuedWorkflow queuedWorkflow;
+ Git repo = null;
try {
boolean safeToAccess = gitSemaphore.acquire(gitInfo.getRepoUrl());
- Git repo = null;
while (repo == null) {
try {
repo = gitService.getRepository(gitInfo, safeToAccess);
@@ -366,6 +367,10 @@ public QueuedWorkflow createQueuedWorkflow(GitDetails gitInfo)
logger.error("Could not update workflow with cwltool", e);
}
+ } catch (GitAPIException | RuntimeException | IOException e) {
+ logger.warn(String.format("Failed to create Queued Workflow: %s - Temporary files will be deleted", e.getMessage()), e);
+ FileUtils.deleteGitRepository(repo);
+ throw e;
} finally {
gitSemaphore.release(gitInfo.getRepoUrl());
}
diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties
index f47d9986..17f92699 100644
--- a/src/main/resources/application.properties
+++ b/src/main/resources/application.properties
@@ -75,6 +75,12 @@ cron.deleteOldQueuedWorkflows = 0 0 * * * ?
# Age limit for queued workflows in hours.
queuedWorkflowAgeLimitHours = 24
+# The expression below implies every day at the 0th second, 0th minute and 24th(0th) hour i.e (time 00:00:00, every day)
+cron.clearTmpDir = 0 0 0 * * ?
+
+# Age limit for tmp directories in days.
+tmpDirAgeLimitDays = 1
+
#=======================
# DB migrations
#=======================
diff --git a/src/test/java/org/commonwl/view/util/FileUtilsTest.java b/src/test/java/org/commonwl/view/util/FileUtilsTest.java
new file mode 100644
index 00000000..5a7939f8
--- /dev/null
+++ b/src/test/java/org/commonwl/view/util/FileUtilsTest.java
@@ -0,0 +1,244 @@
+package org.commonwl.view.util;
+
+import org.apache.taverna.robundle.Bundle;
+import org.apache.taverna.robundle.fs.BundleFileSystem;
+import org.eclipse.jgit.api.Git;
+import org.eclipse.jgit.lib.Repository;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.NullSource;
+import org.junit.rules.TemporaryFolder;
+import org.mockito.MockedStatic;
+import org.mockito.Mockito;
+import org.mockito.junit.MockitoJUnit;
+import org.mockito.junit.MockitoRule;
+import org.mockito.stubbing.Answer;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
+
+/**
+ * Tests for FileUtils
.
+ *
+ * @since 1.4.5
+ */
+public class FileUtilsTest {
+
+ @Rule
+ public MockitoRule initRule = MockitoJUnit.rule();
+
+ @Rule
+ public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ // --- git files
+
+ @ParameterizedTest
+ @NullSource
+ public void testNullRepository(Git repository) throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ FileUtils.deleteGitRepository(repository);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testRepositoryNullRepository() throws IOException {
+ Git repository = mock(Git.class);
+ when(repository.getRepository()).thenReturn(null);
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ FileUtils.deleteGitRepository(repository);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testRepositoryRepositoryNullDirectory() throws IOException {
+ Git repository = mock(Git.class);
+ Repository repository1 = mock(Repository.class);
+ when(repository.getRepository()).thenReturn(repository1);
+ when(repository1.getDirectory()).thenReturn(null);
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ FileUtils.deleteGitRepository(repository);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testRepositoryRepositoryDirectoryDoesNotExist() throws IOException {
+ Git repository = mock(Git.class);
+ Repository repository1 = mock(Repository.class);
+ when(repository.getRepository()).thenReturn(repository1);
+ File doesNotExist = new File("/tmp/tmp/tmp/tmp/1/2/3/4/5");
+ when(repository1.getDirectory()).thenReturn(doesNotExist);
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ FileUtils.deleteGitRepository(repository);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testRepositoryRepositoryDirectory() throws IOException {
+ Git repository = mock(Git.class);
+ Repository repository1 = mock(Repository.class);
+ when(repository.getRepository()).thenReturn(repository1);
+ File gitRepository = temporaryFolder.newFile(".git");
+ when(repository1.getDirectory()).thenReturn(gitRepository);
+ assertTrue(gitRepository.exists());
+ FileUtils.deleteGitRepository(repository);
+ assertFalse(gitRepository.exists());
+ }
+
+ // --- bundle temporary files
+
+ @ParameterizedTest
+ @NullSource
+ public void testBundleTemporaryDirectoryNullBundle(Bundle bundle) throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ FileUtils.deleteBundleTemporaryDirectory(bundle);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testBundleTemporaryDirectoryNoParentFileName() throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ Bundle bundle = mock(Bundle.class);
+ BundleFileSystem fileSystem = mock(BundleFileSystem.class);
+ when(bundle.getFileSystem()).thenReturn(fileSystem);
+ when(fileSystem.getSource()).thenReturn(Path.of("/does-not-exist-12345"));
+ FileUtils.deleteBundleTemporaryDirectory(bundle);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testBundleTemporaryDirectoryDoesNotExist() throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ Bundle bundle = mock(Bundle.class);
+ BundleFileSystem fileSystem = mock(BundleFileSystem.class);
+ when(bundle.getFileSystem()).thenReturn(fileSystem);
+ when(fileSystem.getSource()).thenReturn(Path.of("/does/not/exist/1/2/3/does-not-exist-12345"));
+ FileUtils.deleteBundleTemporaryDirectory(bundle);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testBundleTemporaryDirectoryNotEmpty() throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ Bundle bundle = mock(Bundle.class);
+ BundleFileSystem fileSystem = mock(BundleFileSystem.class);
+ when(bundle.getFileSystem()).thenReturn(fileSystem);
+ final String hash = "bundle.zip";
+ File bundleTemporaryDirectory = temporaryFolder.newFile(hash);
+ when(fileSystem.getSource()).thenReturn(bundleTemporaryDirectory.toPath());
+ FileUtils.deleteBundleTemporaryDirectory(bundle);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testBundleTemporaryDirectory() throws IOException {
+ Bundle bundle = mock(Bundle.class);
+ BundleFileSystem fileSystem = mock(BundleFileSystem.class);
+ when(bundle.getFileSystem()).thenReturn(fileSystem);
+ final String hash = "bundle.zip";
+ File bundleTemporaryDirectory = temporaryFolder.newFile(hash);
+ // We want an empty temporary directory, so we can delete the bundle.zip file
+ // (in real-life it will have been moved from the temporary location to the
+ // bundles' directory).
+ org.apache.commons.io.FileUtils.forceDelete(bundleTemporaryDirectory);
+ when(fileSystem.getSource()).thenReturn(bundleTemporaryDirectory.toPath());
+ assertTrue(bundleTemporaryDirectory.getParentFile().exists());
+ FileUtils.deleteBundleTemporaryDirectory(bundle);
+ assertFalse(bundleTemporaryDirectory.exists());
+ }
+
+ // --- bundle parent directory
+
+ @ParameterizedTest
+ @NullSource
+ public void testBundleParentDirectoryNullBundle(Bundle bundle) throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ FileUtils.deleteBundleParentDirectory(bundle);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testBundleParentDirectoryDoesNotExist() throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ Bundle bundle = mock(Bundle.class);
+ when(bundle.getSource()).thenReturn(Path.of("/does/not/exist/1/2/3/does-not-exist-12345"));
+ FileUtils.deleteBundleParentDirectory(bundle);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testBundleParentDirectoryWithTooManyFiles() throws IOException {
+ try (MockedStatic fileUtilsMocked = mockStatic(org.apache.commons.io.FileUtils.class)) {
+ fileUtilsMocked.when(
+ () -> org.apache.commons.io.FileUtils.deleteDirectory(Mockito.any())
+ ).thenAnswer((Answer) invocation -> null);
+ Bundle bundle = mock(Bundle.class);
+ final String hash = "bundle.zip";
+ File bundleTemporaryDirectory = temporaryFolder.newFile(hash);
+ for (int i = 0; i < 3; i++) {
+ temporaryFolder.newFile("file-" + i);
+ }
+ when(bundle.getSource()).thenReturn(bundleTemporaryDirectory.toPath());
+ FileUtils.deleteBundleParentDirectory(bundle);
+ fileUtilsMocked.verifyNoInteractions();
+ }
+ }
+
+ @Test
+ public void testBundleParentDirectory() throws IOException {
+ Bundle bundle = mock(Bundle.class);
+ final String hash = "bundle.zip";
+ File bundleTemporaryDirectory = temporaryFolder.newFile(hash);
+ when(bundle.getSource()).thenReturn(bundleTemporaryDirectory.toPath());
+ assertTrue(bundleTemporaryDirectory.getParentFile().exists());
+ FileUtils.deleteBundleParentDirectory(bundle);
+ assertFalse(bundleTemporaryDirectory.exists());
+ }
+}
diff --git a/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java b/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java
index 7969b87f..4812315c 100644
--- a/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java
+++ b/src/test/java/org/commonwl/view/workflow/WorkflowControllerTest.java
@@ -21,10 +21,15 @@
import org.commonwl.view.cwl.CWLService;
import org.commonwl.view.git.GitDetails;
+import org.commonwl.view.git.GitSemaphore;
+import org.commonwl.view.git.GitService;
import org.commonwl.view.graphviz.GraphVizService;
import org.commonwl.view.researchobject.ROBundleNotFoundException;
+import org.eclipse.jgit.api.Git;
+import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.api.errors.TransportException;
import org.eclipse.jgit.api.errors.WrongRepositoryStateException;
+import org.eclipse.jgit.lib.Repository;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mockito;
@@ -47,8 +52,12 @@
import java.util.List;
import static org.hamcrest.core.Is.is;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
@@ -191,6 +200,39 @@ public void newWorkflowFromGithubURL() throws Exception {
}
+ @Test
+ public void errorCreatingQueuedWorkflowDeletesTemporaryFiles() throws GitAPIException, IOException {
+ GitService gitService = Mockito.mock(GitService.class);
+ WorkflowService service = new WorkflowService(
+ gitService,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ new GitSemaphore(),
+ 1
+ );
+
+ final GitDetails gitDetails = new GitDetails("https://github.com/common-workflow-language/", "main", "/");
+ Git git = mock(Git.class);
+ when(gitService.getRepository(Mockito.any(GitDetails.class), Mockito.anyBoolean())).thenReturn(git);
+
+ Repository repository = Mockito.mock(Repository.class);
+ File temporaryFile = roBundleFolder.resolve("repository/.git").toFile();
+ assertTrue(temporaryFile.mkdirs());
+ when(repository.getDirectory()).thenReturn(temporaryFile);
+ when(git.getRepository())
+ .thenThrow(RuntimeException.class)
+ .thenReturn(repository);
+ assertTrue(temporaryFile.exists());
+ assertThrows(RuntimeException.class, () -> {
+ service.createQueuedWorkflow(gitDetails);
+ });
+ assertFalse(temporaryFile.exists());
+ }
+
/**
* Displaying workflows
*/