Skip to content

Commit 77808cd

Browse files
committed
Using parts of #344 by @obasekiosa (thanks!), and Commons IO to traverse and delete old files and directories.
1 parent 9516d2a commit 77808cd

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,12 @@
145145
<artifactId>commons-compress</artifactId>
146146
<version>1.21</version>
147147
</dependency>
148+
<!-- IO -->
149+
<dependency>
150+
<groupId>commons-io</groupId>
151+
<artifactId>commons-io</artifactId>
152+
<version>2.11.0</version>
153+
</dependency>
148154
<!-- For JSR-303, javax.validation -->
149155
<dependency>
150156
<groupId>org.springframework.boot</groupId>

src/main/java/org/commonwl/view/Scheduler.java

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package org.commonwl.view;
22

33

4+
import org.apache.commons.io.FileUtils;
5+
import org.apache.commons.io.file.AccumulatorPathVisitor;
6+
import org.apache.commons.io.filefilter.AgeFileFilter;
47
import org.commonwl.view.workflow.QueuedWorkflowRepository;
58
import org.slf4j.Logger;
69
import org.slf4j.LoggerFactory;
@@ -9,8 +12,21 @@
912
import org.springframework.scheduling.annotation.Scheduled;
1013
import org.springframework.stereotype.Component;
1114

15+
import java.io.File;
16+
import java.io.IOException;
17+
import java.nio.file.Files;
18+
import java.nio.file.Path;
19+
import java.nio.file.Paths;
20+
import java.time.Duration;
21+
import java.time.Instant;
22+
import java.util.Arrays;
1223
import java.util.Calendar;
24+
import java.util.Collections;
1325
import java.util.Date;
26+
import java.util.HashSet;
27+
import java.util.List;
28+
import java.util.Set;
29+
import java.util.stream.Stream;
1430

1531
/**
1632
* Scheduler class for recurrent processes.
@@ -24,6 +40,16 @@ public class Scheduler {
2440
@Value("${queuedWorkflowAgeLimitHours}")
2541
private Integer QUEUED_WORKFLOW_AGE_LIMIT_HOURS;
2642

43+
@Value("${tmpDirAgeLimitDays}")
44+
private Integer TMP_DIR_AGE_LIMIT_DAYS;
45+
46+
@Value("${bundleStorage}")
47+
private String bundleStorage;
48+
@Value("${graphvizStorage}")
49+
private String graphvizStorage;
50+
@Value("${gitStorage}")
51+
private String gitStorage;
52+
2753
@Autowired
2854
public Scheduler(QueuedWorkflowRepository queuedWorkflowRepository) {
2955
this.queuedWorkflowRepository = queuedWorkflowRepository;
@@ -55,4 +81,70 @@ public void removeOldQueuedWorkflowEntries() {
5581
logger.info(queuedWorkflowRepository.deleteByTempRepresentation_RetrievedOnLessThanEqual(removeTime)
5682
+ " Old queued workflows removed");
5783
}
84+
85+
/**
86+
* Scheduled function to delete old temporary directories.
87+
*
88+
* <p>Will scan each temporary directory (graphviz, RO, git), searching
89+
* for files exceeding a specified threshold.</p>
90+
*
91+
* <p>It scans the first level directories, i.e. it does not recursively
92+
* scans directories. So it will delete any RO or Git repository directories
93+
* that exceed the threshold. Similarly, it will delete any graph (svg, png,
94+
* etc) that also exceed it.</p>
95+
*
96+
* <p>Errors logged through Logger. Settings in Spring application properties
97+
* file.</p>
98+
*
99+
* @since 1.4.5
100+
*/
101+
@Scheduled(cron = "${cron.clearTmpDir}")
102+
public void clearTmpDir() {
103+
// Temporary files used for graphviz, RO, and git may be stored in different
104+
// locations, so we will collect all of them here.
105+
List<String> temporaryDirectories = Stream.of(bundleStorage, graphvizStorage, gitStorage)
106+
.distinct()
107+
.toList();
108+
temporaryDirectories.forEach(this::clearDirectory);
109+
}
110+
111+
/**
112+
* For a given temporary directory, scans it (not recursively) for files and
113+
* directories exceeding the age limit threshold.
114+
*
115+
* @since 1.4.5
116+
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/AgeFileFilter.html">https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/AgeFileFilter.html</a>
117+
* @param temporaryDirectory temporary directory
118+
*/
119+
private void clearDirectory(String temporaryDirectory) {
120+
final Path dir = Paths.get(temporaryDirectory);
121+
final Instant cutoff = Instant.now().minus(Duration.ofDays(TMP_DIR_AGE_LIMIT_DAYS));
122+
// TODO: Commons IO 2.12 has a constructor that takes an Instant; drop the Date#from call here when we upgrade.
123+
final AgeFileFilter fileAndDirFilter = new AgeFileFilter(Date.from(cutoff));
124+
final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(fileAndDirFilter, fileAndDirFilter);
125+
126+
// Walk the files.
127+
try {
128+
Files.walkFileTree(dir, Collections.emptySet(), /* maxDepth */ 1, visitor);
129+
} catch (IOException e) {
130+
// Really unexpected. walkFileTree should throw an IllegalArgumentException for negative maxDepth (clearly
131+
// not happening here), a SecurityException if the security manager denies access, or this IOException in
132+
// the cases where an I/O error happened (disk error, OS error, file not found, etc.). So just a warning.
133+
logger.warn(String.format("Unexpected I/O error was thrown walking directory [%s]: %s", dir.toString(), e.getMessage()), e);
134+
}
135+
136+
// Delete the directories accumulated by the visitor.
137+
final List<Path> dirList = visitor.getDirList();
138+
dirList.forEach(tooOldDeleteMe -> {
139+
File fileToDelete = tooOldDeleteMe.toFile();
140+
try {
141+
FileUtils.forceDelete(fileToDelete);
142+
} catch (IOException e) {
143+
// Here we probably have a more serious case. Since the Git repository, RO directory, or graphs are
144+
// not expected to be in use, and the application must have access, I/O errors are not expected and
145+
// must be treated as errors.
146+
logger.error(String.format("Failed to delete old temporary file or directory [%s]: %s", fileToDelete.getAbsolutePath(), e.getMessage()), e);
147+
}
148+
});
149+
}
58150
}

src/main/resources/application.properties

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ cron.deleteOldQueuedWorkflows = 0 0 * * * ?
7575
# Age limit for queued workflows in hours.
7676
queuedWorkflowAgeLimitHours = 24
7777

78+
# The expression below implies every day at the 0th second, 0th minute and 24th(0th) hour i.e ( time 00:00:00, every day)
79+
cron.clearTmpDir = 0 0 0 * * ?
80+
81+
# Age limit for tmp directories in days.
82+
tmpDirAgeLimitDays = 1
83+
7884
#=======================
7985
# DB migrations
8086
#=======================

0 commit comments

Comments
 (0)