-
-
Notifications
You must be signed in to change notification settings - Fork 29
Delete git/RO directories when not needed, delete old files/dirs in cron #426
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
07a5001
b98bd15
3e03f59
00c987d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
package org.commonwl.view; | ||
|
||
|
||
import org.apache.commons.io.FileUtils; | ||
import org.apache.commons.io.file.AccumulatorPathVisitor; | ||
import org.apache.commons.io.filefilter.AgeFileFilter; | ||
import org.commonwl.view.workflow.QueuedWorkflowRepository; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
@@ -9,8 +12,18 @@ | |
import org.springframework.scheduling.annotation.Scheduled; | ||
import org.springframework.stereotype.Component; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.time.Duration; | ||
import java.time.Instant; | ||
import java.util.Calendar; | ||
import java.util.Collections; | ||
import java.util.Date; | ||
import java.util.List; | ||
import java.util.stream.Stream; | ||
|
||
/** | ||
* Scheduler class for recurrent processes. | ||
|
@@ -24,6 +37,17 @@ public class Scheduler { | |
@Value("${queuedWorkflowAgeLimitHours}") | ||
private Integer QUEUED_WORKFLOW_AGE_LIMIT_HOURS; | ||
|
||
@Value("${tmpDirAgeLimitDays}") | ||
private Integer TMP_DIR_AGE_LIMIT_DAYS; | ||
|
||
// We do not want to remove the bundles, as we use the disk as a sort of | ||
// cache. Whenever a workflow page is displayed in the browser the UI | ||
// fires a request to re-generate it. We skip that by keeping files on disk. | ||
@Value("${graphvizStorage}") | ||
private String graphvizStorage; | ||
@Value("${gitStorage}") | ||
private String gitStorage; | ||
|
||
@Autowired | ||
public Scheduler(QueuedWorkflowRepository queuedWorkflowRepository) { | ||
this.queuedWorkflowRepository = queuedWorkflowRepository; | ||
|
@@ -55,4 +79,59 @@ public void removeOldQueuedWorkflowEntries() { | |
logger.info(queuedWorkflowRepository.deleteByTempRepresentation_RetrievedOnLessThanEqual(removeTime) | ||
+ " Old queued workflows removed"); | ||
} | ||
|
||
/** | ||
* Scheduled function to delete old temporary directories. | ||
* | ||
* <p>Will scan each temporary directory (graphviz, RO, git), searching | ||
* for files exceeding a specified threshold.</p> | ||
* | ||
* <p>It scans the first level directories, i.e. it does not recursively | ||
* scans directories. So it will delete any RO or Git repository directories | ||
* that exceed the threshold. Similarly, it will delete any graph (svg, png, | ||
* etc) that also exceed it.</p> | ||
* | ||
* <p>Errors logged through Logger. Settings in Spring application properties | ||
* file.</p> | ||
* | ||
* @since 1.4.5 | ||
*/ | ||
@Scheduled(cron = "${cron.clearTmpDir}") | ||
public void clearTmpDir() { | ||
// Temporary files used for graphviz, RO, and git may be stored in different | ||
// locations, so we will collect all of them here. | ||
List<String> temporaryDirectories = Stream.of(graphvizStorage, gitStorage) | ||
.distinct() | ||
.toList(); | ||
temporaryDirectories.forEach(this::clearDirectory); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mr-c , @tetron , this change should delete all files older than a certain limit from the server. But I was thinking, maybe we want to keep the files from the workflows listed in the first page of the CWL Viewer app (or two first, or three first pages, or first 15 workflows, etc.)? I can probably use the same code used when you visit "Explore", but requesting just one page or a certain number of workflows. Then I would get the git and RO directories locations from the PostgreSQL table and skip these. Or is that a bit too much? WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we keep the ROs but delete the old Git checkouts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. Do you mean keep all the ROs, or just the ones in the first page of the workflows listing? I was going to send a screenshot of the first page, but the server appears to be unwell again. I'll leave a message on the Matrix CWLViewer and in some room at Curii to see if someone can kick that server or container, etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How much disk space do the ROs take up? An estimate based upon some "typical" ROs is fine as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea, cc'ing @tetron There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the code above to skip bundles 👍 |
||
} | ||
|
||
/** | ||
* For a given temporary directory, scans it (not recursively) for files and | ||
* directories exceeding the age limit threshold. | ||
* | ||
* @since 1.4.5 | ||
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/AgeFileFilter.html">https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/AgeFileFilter.html</a> | ||
* @param temporaryDirectory temporary directory | ||
*/ | ||
private void clearDirectory(String temporaryDirectory) { | ||
final Instant cutoff = Instant.now().minus(Duration.ofDays(TMP_DIR_AGE_LIMIT_DAYS)); | ||
|
||
File temporaryDirectoryFile = new File(temporaryDirectory); | ||
String[] files = temporaryDirectoryFile.list(new AgeFileFilter(Date.from(cutoff))); | ||
|
||
if (files != null && files.length > 0) { | ||
for (String fileName : files) { | ||
File fileToDelete = new File(temporaryDirectoryFile, fileName); | ||
try { | ||
FileUtils.forceDelete(fileToDelete); | ||
} catch (IOException e) { | ||
// Here we probably have a more serious case. Since the Git repository, RO directory, or graphs are | ||
// not expected to be in use, and the application must have access, I/O errors are not expected and | ||
// must be treated as errors. | ||
logger.error(String.format("Failed to delete old temporary file or directory [%s]: %s", fileToDelete.getAbsolutePath(), e.getMessage()), e); | ||
} | ||
} | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.