diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py index ef532ce2b..112cb3e6b 100644 --- a/qiita_db/test/test_util.py +++ b/qiita_db/test/test_util.py @@ -1291,6 +1291,12 @@ def test_purge_filepaths_test(self): fps_viewed = self._get_current_filepaths() self.assertCountEqual(fps_expected, fps_viewed) + def test_quick_mounts_purge(self): + # one of the tests creates a conflicting artifact_type so this test + # will always raise this ValueError + with self.assertRaises(ValueError): + qdb.util.quick_mounts_purge() + STUDY_INFO = { 'study_id': 1, diff --git a/qiita_db/util.py b/qiita_db/util.py index 06ed3d417..df7153bb4 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -49,13 +49,14 @@ from bcrypt import hashpw, gensalt from functools import partial from os.path import join, basename, isdir, exists, getsize -from os import walk, remove, listdir, rename +from os import walk, remove, listdir, rename, stat from glob import glob from shutil import move, rmtree, copy as shutil_copy from openpyxl import load_workbook from tempfile import mkstemp from csv import writer as csv_writer from datetime import datetime +from time import time as now from itertools import chain from contextlib import contextmanager import h5py @@ -896,6 +897,73 @@ def purge_filepaths(delete_files=True): qdb.sql_connection.TRN.execute() +def quick_mounts_purge(): + r"""This is a quick mount purge as it only slightly relies on the database + + Notes + ----- + Currently we delete anything older than 30 days that is not linked + to the database. This number is intentionally hardcoded in the code. + At the time of this writing this number seem high but keeping it + this way to be safe. In the future, if needed, it can be changed. + """ + with qdb.sql_connection.TRN: + main_sql = """SELECT data_directory_id FROM qiita.artifact_type at + LEFT JOIN qiita.data_directory dd ON ( + dd.data_type = at.artifact_type) + WHERE subdirectory = true""" + qdb.sql_connection.TRN.add(main_sql) + mp_ids = qdb.sql_connection.TRN.execute_fetchflatten() + mounts = [qdb.util.get_mountpoint_path_by_id(x) for x in mp_ids] + folders = [join(x, f) for x in mounts for f in listdir(x) + if f.isnumeric()] + + # getting all unlinked folders + to_delete = [] + for i, f in enumerate(folders): + vals = f.split('/') + aid = int(vals[-1]) + artifact_type = vals[-2] + if artifact_type == 'FeatureData[Taxonomy]': + continue + + try: + a = qdb.artifact.Artifact(aid) + except qdb.exceptions.QiitaDBUnknownIDError: + to_delete.append(f) + continue + if not a.artifact_type.startswith(artifact_type): + raise ValueError('Review artifact type: ' + f'{a.id} {artifact_type} {a.artifact_type}') + + # now, let's just keep those older than 30 days (in seconds) + ignore = now() - (30*86400) + to_keep = [x for x in to_delete if stat(x).st_mtime >= ignore] + to_delete = set(to_delete) - set(to_keep) + + # get stats to report + stats = dict() + for td in to_delete: + f = td.split('/')[-2] + if f not in stats: + stats[f] = 0 + stats[f] += sum([getsize(join(p, fp)) for p, ds, fs in walk(td) + for fp in fs]) + + report = ['----------------------'] + for f, s in stats.items(): + report.append(f'{f}\t{naturalsize(s)}') + report.append( + f'Total files {len(to_delete)} {naturalsize(sum(stats.values()))}') + report.append('----------------------') + + for td in list(to_delete): + if exists(td): + rmtree(td) + + return '\n'.join(report) + + def _rm_exists(fp, obj, _id, delete_files): try: _id = int(_id) diff --git a/scripts/qiita-cron-job b/scripts/qiita-cron-job index 825791afc..80d75d479 100755 --- a/scripts/qiita-cron-job +++ b/scripts/qiita-cron-job @@ -12,7 +12,8 @@ import click from qiita_db.util import ( purge_filepaths as qiita_purge_filepaths, - empty_trash_upload_folder as qiita_empty_trash_upload_folder) + empty_trash_upload_folder as qiita_empty_trash_upload_folder, + quick_mounts_purge as qiita_quick_mounts_purge) from qiita_db.meta_util import ( update_redis_stats as qiita_update_redis_stats, generate_biom_and_metadata_release as @@ -62,5 +63,10 @@ def generate_plugin_releases(): qiita_generate_plugin_releases() +@commands.command() +def quick_mounts_purge(): + print(qiita_quick_mounts_purge()) + + if __name__ == "__main__": commands()