From c539b5b3f528ee3e7d02f3e2801e047bd094f538 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 18 Sep 2017 08:41:25 -0700 Subject: [PATCH 1/7] Fix #2276 --- qiita_db/test/test_util.py | 10 +++++++++ qiita_db/util.py | 38 +++++++++++++++++++++++++++++++ qiita_pet/handlers/download.py | 40 ++++++++++++++++++++++++++------- qiita_pet/test/test_download.py | 39 +++++++++++++++++++++++++++++--- 4 files changed, 116 insertions(+), 11 deletions(-) diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py index 466eaed7b..11d5ed310 100644 --- a/qiita_db/test/test_util.py +++ b/qiita_db/test/test_util.py @@ -622,6 +622,16 @@ def test_get_timeseries_types(self): [10, 'mixed', 'combo intervention']] self.assertEqual(obs, exp) + def test_get_filepath_information(self): + obs = qdb.util.get_filepath_information(1) + # This path is machine specific. Just checking that is not empty + self.assertIsNotNone(obs.pop('fullpath')) + exp = {'filepath_id': 1L, 'filepath': '1_s_G1_L001_sequences.fastq.gz', + 'filepath_type': 'raw_forward_seqs', 'checksum': '852952723', + 'data_type': 'raw_data', 'mountpoint': 'raw_data', + 'subdirectory': False, 'active': True} + self.assertEqual(obs, exp) + def test_filepath_id_to_rel_path(self): obs = qdb.util.filepath_id_to_rel_path(1) exp = 'raw_data/1_s_G1_L001_sequences.fastq.gz' diff --git a/qiita_db/util.py b/qiita_db/util.py index 25898fa54..fc197798e 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -845,6 +845,44 @@ def move_filepaths_to_upload_folder(study_id, filepaths): qdb.sql_connection.TRN.execute() +def get_filepath_information(filepath_id): + """Gets the filepath information of filepath_id + + Parameters + ---------- + filepath_id : int + The filepath id + + Returns + ------- + dict + The filepath information + """ + with qdb.sql_connection.TRN: + sql = """SELECT filepath_id, filepath, filepath_type, checksum, + data_type, mountpoint, subdirectory, active, + artifact_id + FROM qiita.filepath + JOIN qiita.filepath_type USING (filepath_type_id) + JOIN qiita.data_directory USING (data_directory_id) + LEFT JOIN qiita.artifact_filepath USING (filepath_id) + WHERE filepath_id = %s""" + qdb.sql_connection.TRN.add(sql, [filepath_id]) + res = dict(qdb.sql_connection.TRN.execute_fetchindex()[0]) + + def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): + if subdirectory: + return join(db_dir, mountpoint, str(obj_id), filepath) + else: + return join(db_dir, mountpoint, filepath) + + obj_id = res.pop('artifact_id') + res['fullpath'] = path_builder(get_db_files_base_dir(), + res['filepath'], res['mountpoint'], + res['subdirectory'], obj_id) + return res + + def filepath_id_to_rel_path(filepath_id): """Gets the relative to the base directory of filepath_id diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index bb906e532..cea0d488e 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -17,7 +17,8 @@ from .base_handlers import BaseHandler from qiita_pet.handlers.api_proxy import study_get_req from qiita_db.study import Study -from qiita_db.util import filepath_id_to_rel_path, get_db_files_base_dir +from qiita_db.util import (filepath_id_to_rel_path, get_db_files_base_dir, + get_filepath_information) from qiita_db.meta_util import validate_filepath_access_by_user from qiita_db.metadata_template.sample_template import SampleTemplate from qiita_db.metadata_template.prep_template import PrepTemplate @@ -37,19 +38,42 @@ def get(self, filepath_id): "filepath_id: %s" % (self.current_user.email, str(fid))) relpath = filepath_id_to_rel_path(fid) + fp_info = get_filepath_information(fid) fname = basename(relpath) - # If we don't have nginx, write a file that indicates this - self.write("This installation of Qiita was not equipped with nginx, " - "so it is incapable of serving files. The file you " - "attempted to download is located at %s" % relpath) + if fp_info['filepath_type'] in ('directory', 'html_summary_dir'): + # This is a directory, we need to list all the files so NGINX + # can download all of them + basedir = get_db_files_base_dir() + basedir_len = len(basedir) + 1 + to_download = [] + for dp, _, fps in walk(fp_info['fullpath']): + for fn in fps: + fullpath = join(dp, fn) + spath = fullpath + if fullpath.startswith(basedir): + spath = fullpath[basedir_len:] + to_download.append((fullpath, spath, spath)) + + all_files = '\n'.join( + ["- %s /protected/%s %s" % (getsize(fp), sfp, n) + for fp, sfp, n in to_download]) + + self.set_header('X-Archive-Files', 'zip') + self.write("%s\n" % all_files) + fname = '%s.zip' % fname + else: + # If we don't have nginx, write a file that indicates this + self.write("This installation of Qiita was not equipped with " + "nginx, so it is incapable of serving files. The file " + "you attempted to download is located at %s" % relpath) + self.set_header('Content-Type', 'application/octet-stream') + self.set_header('Content-Transfer-Encoding', 'binary') + self.set_header('X-Accel-Redirect', '/protected/' + relpath) self.set_header('Content-Description', 'File Transfer') - self.set_header('Content-Type', 'application/octet-stream') - self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('Expires', '0') self.set_header('Cache-Control', 'no-cache') - self.set_header('X-Accel-Redirect', '/protected/' + relpath) self.set_header('Content-Disposition', 'attachment; filename=%s' % fname) diff --git a/qiita_pet/test/test_download.py b/qiita_pet/test/test_download.py index d9d0415c8..10ed52bfe 100644 --- a/qiita_pet/test/test_download.py +++ b/qiita_pet/test/test_download.py @@ -8,10 +8,10 @@ from unittest import main from mock import Mock -from os.path import exists, isdir, join -from os import remove, makedirs +from os.path import exists, isdir, join, basename +from os import remove, makedirs, close from shutil import rmtree -from tempfile import mkdtemp +from tempfile import mkdtemp, mkstemp from biom.util import biom_open from biom import example_table as et @@ -28,9 +28,16 @@ class TestDownloadHandler(TestHandlerBase): def setUp(self): super(TestDownloadHandler, self).setUp() + self._clean_up_files = [] def tearDown(self): super(TestDownloadHandler, self).tearDown() + for fp in self._clean_up_files: + if exists(fp): + if isdir(fp): + rmtree(fp) + else: + remove(fp) def test_download(self): # check success @@ -45,6 +52,32 @@ def test_download(self): response = self.get('/download/1000') self.assertEqual(response.code, 403) + # directory + a = Artifact(1) + fd, fp = mkstemp(suffix='.html') + close(fd) + with open(fp, 'w') as f: + f.write('\n') + self._clean_up_files.append(fp) + dirpath = mkdtemp() + fd, fp2 = mkstemp(suffix='.txt', dir=dirpath) + close(fd) + with open(fp2, 'w') as f: + f.write('\n') + self._clean_up_files.append(dirpath) + a.set_html_summary(fp, support_dir=dirpath) + for fp_id, _, fp_type in a.filepaths: + if fp_type == 'html_summary_dir': + break + response = self.get('/download/%d' % fp_id) + self.assertEqual(response.code, 200) + + fp_name = basename(fp2) + dirname = basename(dirpath) + self.assertEqual( + response.body, "- 1 /protected/FASTQ/1/%s/%s FASTQ/1/%s/%s\n" + % (dirname, fp_name, dirname, fp_name)) + class TestDownloadStudyBIOMSHandler(TestHandlerBase): From 6ad5e799f132825d890b04c80be8584c49a5dd1e Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 18 Sep 2017 10:59:55 -0700 Subject: [PATCH 2/7] Factoring out generate nginx directory file list --- qiita_pet/handlers/download.py | 91 +++++++++++++++------------------- 1 file changed, 40 insertions(+), 51 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index cea0d488e..310134f48 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -25,7 +25,43 @@ from qiita_core.util import execute_as_transaction, get_release_info -class DownloadHandler(BaseHandler): +class BaseHandlerDownload(BaseHandler): + def _check_permissions(self, sid): + # Check general access to study + study_info = study_get_req(sid, self.current_user.id) + if study_info['status'] != 'success': + raise HTTPError(405, "%s: %s, %s" % (study_info['message'], + self.current_user.email, sid)) + return Study(sid) + + def _generate_files(self, header_name, accessions, filename): + text = "sample_name\t%s\n%s" % (header_name, '\n'.join( + ["%s\t%s" % (k, v) for k, v in viewitems(accessions)])) + + self.set_header('Content-Description', 'text/csv') + self.set_header('Expires', '0') + self.set_header('Cache-Control', 'no-cache') + self.set_header('Content-Disposition', 'attachment; ' + 'filename=%s' % filename) + self.write(text) + self.finish() + + def _list_dir_files_nginx(self, dirpath): + """""" + basedir = get_db_files_base_dir() + basedir_len = len(basedir) + 1 + to_download = [] + for dp, _, fps in walk(dirpath): + for fn in fps: + fullpath = join(dp, fn) + spath = fullpath + if fullpath.startswith(basedir): + spath = fullpath[basedir_len:] + to_download.append((fullpath, spath, spath)) + return to_download + + +class DownloadHandler(BaseHandlerDownload): @authenticated @coroutine @execute_as_transaction @@ -44,17 +80,7 @@ def get(self, filepath_id): if fp_info['filepath_type'] in ('directory', 'html_summary_dir'): # This is a directory, we need to list all the files so NGINX # can download all of them - basedir = get_db_files_base_dir() - basedir_len = len(basedir) + 1 - to_download = [] - for dp, _, fps in walk(fp_info['fullpath']): - for fn in fps: - fullpath = join(dp, fn) - spath = fullpath - if fullpath.startswith(basedir): - spath = fullpath[basedir_len:] - to_download.append((fullpath, spath, spath)) - + to_download = self._list_dir_files_nginx(fp_info['fullpath']) all_files = '\n'.join( ["- %s /protected/%s %s" % (getsize(fp), sfp, n) for fp, sfp, n in to_download]) @@ -80,28 +106,6 @@ def get(self, filepath_id): self.finish() -class BaseHandlerDownload(BaseHandler): - def _check_permissions(self, sid): - # Check general access to study - study_info = study_get_req(sid, self.current_user.id) - if study_info['status'] != 'success': - raise HTTPError(405, "%s: %s, %s" % (study_info['message'], - self.current_user.email, sid)) - return Study(sid) - - def _generate_files(self, header_name, accessions, filename): - text = "sample_name\t%s\n%s" % (header_name, '\n'.join( - ["%s\t%s" % (k, v) for k, v in viewitems(accessions)])) - - self.set_header('Content-Description', 'text/csv') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', 'attachment; ' - 'filename=%s' % filename) - self.write(text) - self.finish() - - class DownloadStudyBIOMSHandler(BaseHandlerDownload): @authenticated @coroutine @@ -125,13 +129,7 @@ def get(self, study_id): # If we have a directory, we actually need to list # all the files from the directory so NGINX can # actually download all of them - for dp, _, fps in walk(path): - for fname in fps: - fullpath = join(dp, fname) - spath = fullpath - if fullpath.startswith(basedir): - spath = fullpath[basedir_len:] - to_download.append((fullpath, spath, spath)) + to_download.extend(self._list_dir_files_nginx(path)) elif path.startswith(basedir): spath = path[basedir_len:] to_download.append((path, spath, spath)) @@ -215,16 +213,7 @@ def get(self, study_id): if not a.parents: for i, (fid, path, data_type) in enumerate(a.filepaths): if data_type == 'directory': - # If we have a directory, we actually need to list - # all the files from the directory so NGINX can - # actually download all of them - for dp, _, fps in walk(path): - for fname in fps: - fullpath = join(dp, fname) - spath = fullpath - if fullpath.startswith(basedir): - spath = fullpath[basedir_len:] - to_download.append((fullpath, spath, spath)) + to_download.append(path) elif path.startswith(basedir): spath = path[basedir_len:] to_download.append((path, spath, spath)) From 27acf31efd6186deb66234a53f7115d528b6bd2a Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 18 Sep 2017 11:05:38 -0700 Subject: [PATCH 3/7] Factoring out the nginx file list writing --- qiita_pet/handlers/download.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 310134f48..8dca9e464 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -60,6 +60,15 @@ def _list_dir_files_nginx(self, dirpath): to_download.append((fullpath, spath, spath)) return to_download + def _write_nginx_file_list(self, to_download): + """""" + all_files = '\n'.join( + ["- %s /protected/%s %s" % (getsize(fp), sfp, n) + for fp, sfp, n in to_download]) + + self.set_header('X-Archive-Files', 'zip') + self.write("%s\n" % all_files) + class DownloadHandler(BaseHandlerDownload): @authenticated @@ -81,12 +90,7 @@ def get(self, filepath_id): # This is a directory, we need to list all the files so NGINX # can download all of them to_download = self._list_dir_files_nginx(fp_info['fullpath']) - all_files = '\n'.join( - ["- %s /protected/%s %s" % (getsize(fp), sfp, n) - for fp, sfp, n in to_download]) - - self.set_header('X-Archive-Files', 'zip') - self.write("%s\n" % all_files) + self._write_nginx_file_list(to_download) fname = '%s.zip' % fname else: # If we don't have nginx, write a file that indicates this @@ -150,10 +154,7 @@ def get(self, study_id): (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' % a.id)) - # If we don't have nginx, write a file that indicates this - all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n) - for fp, sfp, n in to_download]) - self.write("%s\n" % all_files) + self._write_nginx_file_list(to_download) zip_fn = 'study_%d_%s.zip' % ( study_id, datetime.now().strftime('%m%d%y-%H%M%S')) @@ -161,7 +162,6 @@ def get(self, study_id): self.set_header('Content-Description', 'File Transfer') self.set_header('Expires', '0') self.set_header('Cache-Control', 'no-cache') - self.set_header('X-Archive-Files', 'zip') self.set_header('Content-Disposition', 'attachment; filename=%s' % zip_fn) self.finish() @@ -234,12 +234,7 @@ def get(self, study_id): (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' % a.id)) - # If we don't have nginx, write a file that indicates this - # Note that this configuration will automatically create and download - # ("on the fly") the zip file via the contents in all_files - all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n) - for fp, sfp, n in to_download]) - self.write("%s\n" % all_files) + self._write_nginx_file_list(to_download) zip_fn = 'study_raw_data_%d_%s.zip' % ( study_id, datetime.now().strftime('%m%d%y-%H%M%S')) @@ -247,7 +242,6 @@ def get(self, study_id): self.set_header('Content-Description', 'File Transfer') self.set_header('Expires', '0') self.set_header('Cache-Control', 'no-cache') - self.set_header('X-Archive-Files', 'zip') self.set_header('Content-Disposition', 'attachment; filename=%s' % zip_fn) self.finish() From 460998f5bf39ef7964cf39014cccc941b592ce54 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 18 Sep 2017 11:09:53 -0700 Subject: [PATCH 4/7] Factoring out generating the file list of an artifact --- qiita_pet/handlers/download.py | 90 +++++++++++++--------------------- 1 file changed, 34 insertions(+), 56 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 8dca9e464..00218c318 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -60,6 +60,38 @@ def _list_dir_files_nginx(self, dirpath): to_download.append((fullpath, spath, spath)) return to_download + def _list_artifact_files_nginx(self, artifact): + """""" + basedir = get_db_files_base_dir() + basedir_len = len(basedir) + 1 + to_download = [] + for i, (fid, path, data_type) in enumerate(artifact.filepaths): + # ignore if tgz as they could create problems and the + # raw data is in the folder + if data_type == 'tgz': + continue + if data_type == 'directory': + # If we have a directory, we actually need to list all the + # files from the directory so NGINX can actually download all + # of them + to_download.extend(self._list_dir_files_nginx(path)) + elif path.startswith(basedir): + spath = path[basedir_len:] + to_download.append((path, spath, spath)) + else: + to_download.append((path, path, path)) + + for pt in artifact.prep_templates: + qmf = pt.qiime_map_fp + if qmf is not None: + sqmf = qmf + if qmf.startswith(basedir): + sqmf = qmf[basedir_len:] + to_download.append( + (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' + % artifact.id)) + return to_download + def _write_nginx_file_list(self, to_download): """""" all_files = '\n'.join( @@ -117,42 +149,11 @@ class DownloadStudyBIOMSHandler(BaseHandlerDownload): def get(self, study_id): study_id = int(study_id) study = self._check_permissions(study_id) - - basedir = get_db_files_base_dir() - basedir_len = len(basedir) + 1 # loop over artifacts and retrieve those that we have access to to_download = [] for a in study.artifacts(): if a.artifact_type == 'BIOM': - for i, (fid, path, data_type) in enumerate(a.filepaths): - # ignore if tgz as they could create problems and the - # raw data is in the folder - if data_type == 'tgz': - continue - if data_type == 'directory': - # If we have a directory, we actually need to list - # all the files from the directory so NGINX can - # actually download all of them - to_download.extend(self._list_dir_files_nginx(path)) - elif path.startswith(basedir): - spath = path[basedir_len:] - to_download.append((path, spath, spath)) - else: - # We are not aware of any case that can trigger this - # situation, but we wanted to be overly cautious - # There is no test for this line cause we don't know - # how to trigger it - to_download.append((path, path, path)) - - for pt in a.prep_templates: - qmf = pt.qiime_map_fp - if qmf is not None: - sqmf = qmf - if qmf.startswith(basedir): - sqmf = qmf[basedir_len:] - to_download.append( - (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' - % a.id)) + to_download.extend(self._list_artifact_files_nginx(a)) self._write_nginx_file_list(to_download) @@ -205,34 +206,11 @@ def get(self, study_id): self.current_user.email, str(study_id))) - basedir = get_db_files_base_dir() - basedir_len = len(basedir) + 1 # loop over artifacts and retrieve raw data (no parents) to_download = [] for a in study.artifacts(): if not a.parents: - for i, (fid, path, data_type) in enumerate(a.filepaths): - if data_type == 'directory': - to_download.append(path) - elif path.startswith(basedir): - spath = path[basedir_len:] - to_download.append((path, spath, spath)) - else: - # We are not aware of any case that can trigger this - # situation, but we wanted to be overly cautious - # There is no test for this line cause we don't know - # how to trigger it - to_download.append((path, path, path)) - - for pt in a.prep_templates: - qmf = pt.qiime_map_fp - if qmf is not None: - sqmf = qmf - if qmf.startswith(basedir): - sqmf = qmf[basedir_len:] - to_download.append( - (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' - % a.id)) + to_download.extend(self._list_artifact_files_nginx(a)) self._write_nginx_file_list(to_download) From c3a27de02c144563a731dfd618307ed6ea9a93bd Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 18 Sep 2017 11:19:15 -0700 Subject: [PATCH 5/7] Factoring out the header setting --- qiita_pet/handlers/download.py | 77 +++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 00218c318..c5d11140a 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -47,7 +47,19 @@ def _generate_files(self, header_name, accessions, filename): self.finish() def _list_dir_files_nginx(self, dirpath): - """""" + """Generates a nginx list of files in the given dirpath for nginx + + Parameters + ---------- + dirpath : str + Path to the directory + + Returns + ------- + list of (str, str, str) + The path information needed by nginx for each file in the + directory + """ basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 to_download = [] @@ -61,7 +73,18 @@ def _list_dir_files_nginx(self, dirpath): return to_download def _list_artifact_files_nginx(self, artifact): - """""" + """Generates a nginx list of files for the given artifact + + Parameters + ---------- + artifact : qiita_db.artifact.Artifact + The artifact to retrieve the files + + Returns + ------- + list of (str, str, str) + The path information needed by nginx for each file in the artifact + """ basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 to_download = [] @@ -93,7 +116,13 @@ def _list_artifact_files_nginx(self, artifact): return to_download def _write_nginx_file_list(self, to_download): - """""" + """Writes out the nginx file list + + Parameters + ---------- + to_download : list of (str, str, str) + The file list information + """ all_files = '\n'.join( ["- %s /protected/%s %s" % (getsize(fp), sfp, n) for fp, sfp, n in to_download]) @@ -101,6 +130,20 @@ def _write_nginx_file_list(self, to_download): self.set_header('X-Archive-Files', 'zip') self.write("%s\n" % all_files) + def _set_nginx_headers(self, fname): + """Sets commong nginx headers + + Parameters + ---------- + fname : str + Nginx's output filename + """ + self.set_header('Content-Description', 'File Transfer') + self.set_header('Expires', '0') + self.set_header('Cache-Control', 'no-cache') + self.set_header('Content-Disposition', + 'attachment; filename=%s' % fname) + class DownloadHandler(BaseHandlerDownload): @authenticated @@ -133,12 +176,7 @@ def get(self, filepath_id): self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('X-Accel-Redirect', '/protected/' + relpath) - self.set_header('Content-Description', 'File Transfer') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', - 'attachment; filename=%s' % fname) - + self._set_nginx_headers(fname) self.finish() @@ -160,15 +198,11 @@ def get(self, study_id): zip_fn = 'study_%d_%s.zip' % ( study_id, datetime.now().strftime('%m%d%y-%H%M%S')) - self.set_header('Content-Description', 'File Transfer') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', - 'attachment; filename=%s' % zip_fn) + self._set_nginx_headers(zip_fn) self.finish() -class DownloadRelease(BaseHandler): +class DownloadRelease(BaseHandlerDownload): @coroutine def get(self, extras): _, relpath, _ = get_release_info() @@ -180,15 +214,12 @@ def get(self, extras): "so it is incapable of serving files. The file you " "attempted to download is located at %s" % relpath) - self.set_header('Content-Description', 'File Transfer') + self._set_nginx_headers(basename(relpath)) + self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') self.set_header('X-Accel-Redirect', '/protected-working_dir/' + relpath) - self.set_header('Content-Disposition', - 'attachment; filename=%s' % basename(relpath)) self.finish() @@ -217,11 +248,7 @@ def get(self, study_id): zip_fn = 'study_raw_data_%d_%s.zip' % ( study_id, datetime.now().strftime('%m%d%y-%H%M%S')) - self.set_header('Content-Description', 'File Transfer') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', - 'attachment; filename=%s' % zip_fn) + self._set_nginx_headers(zip_fn) self.finish() From 92c5b05ea01587e691054d9d2ef652ef2b8008e8 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 18 Sep 2017 14:03:35 -0700 Subject: [PATCH 6/7] Addressing @antgonza's comment --- qiita_pet/handlers/download.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index c5d11140a..fd0de463f 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -144,6 +144,19 @@ def _set_nginx_headers(self, fname): self.set_header('Content-Disposition', 'attachment; filename=%s' % fname) + def _write_nginx_placeholder_file(self, fp): + """Writes nginx placeholder file in case that nginx is not set up + + Parameters + ---------- + fp : str + The path to be downloaded through nginx + """ + # If we don't have nginx, write a file that indicates this + self.write("This installation of Qiita was not equipped with " + "nginx, so it is incapable of serving files. The file " + "you attempted to download is located at %s" % fp) + class DownloadHandler(BaseHandlerDownload): @authenticated @@ -168,10 +181,7 @@ def get(self, filepath_id): self._write_nginx_file_list(to_download) fname = '%s.zip' % fname else: - # If we don't have nginx, write a file that indicates this - self.write("This installation of Qiita was not equipped with " - "nginx, so it is incapable of serving files. The file " - "you attempted to download is located at %s" % relpath) + self._write_nginx_placeholder_file(relpath) self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('X-Accel-Redirect', '/protected/' + relpath) @@ -210,9 +220,7 @@ def get(self, extras): # If we don't have nginx, write a file that indicates this # Note that this configuration will automatically create and download # ("on the fly") the zip file via the contents in all_files - self.write("This installation of Qiita was not equipped with nginx, " - "so it is incapable of serving files. The file you " - "attempted to download is located at %s" % relpath) + self._write_nginx_placeholder_file(relpath) self._set_nginx_headers(basename(relpath)) From 32c038235bd0d3207da1f3a5c8a3f6e6369307e3 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 19 Sep 2017 07:28:08 -0700 Subject: [PATCH 7/7] Addressing @wasade's comments --- qiita_db/util.py | 47 ++++++++++++++++++++++------------ qiita_pet/handlers/download.py | 4 +-- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/qiita_db/util.py b/qiita_db/util.py index fc197798e..b664bfa8b 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -649,6 +649,33 @@ def str_to_id(x): chain.from_iterable(qdb.sql_connection.TRN.execute()[idx:]))) +def _path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): + """Builds the path of a DB stored file + + Parameters + ---------- + db_dir : str + The DB base dir + filepath : str + The path stored in the DB + mountpoint : str + The mountpoint of the given file + subdirectory : bool + Whether the file is stored in a subdirectory in the mountpoint or not + obj_id : int + The id of the object to which the file is attached + + Returns + ------- + str + The full path of the given file + """ + if subdirectory: + return join(db_dir, mountpoint, str(obj_id), filepath) + else: + return join(db_dir, mountpoint, filepath) + + def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None, fp_type=None): """Retrieves the filepaths for the given object id @@ -674,12 +701,6 @@ def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None, object id """ - def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): - if subdirectory: - return join(db_dir, mountpoint, str(obj_id), filepath) - else: - return join(db_dir, mountpoint, filepath) - sql_sort = "" if sort == 'ascending': sql_sort = " ORDER BY filepath_id" @@ -710,7 +731,7 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): results = qdb.sql_connection.TRN.execute_fetchindex() db_dir = get_db_files_base_dir() - return [(fpid, path_builder(db_dir, fp, m, s, obj_id), fp_type_) + return [(fpid, _path_builder(db_dir, fp, m, s, obj_id), fp_type_) for fpid, fp, fp_type_, m, s in results] @@ -870,16 +891,10 @@ def get_filepath_information(filepath_id): qdb.sql_connection.TRN.add(sql, [filepath_id]) res = dict(qdb.sql_connection.TRN.execute_fetchindex()[0]) - def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): - if subdirectory: - return join(db_dir, mountpoint, str(obj_id), filepath) - else: - return join(db_dir, mountpoint, filepath) - obj_id = res.pop('artifact_id') - res['fullpath'] = path_builder(get_db_files_base_dir(), - res['filepath'], res['mountpoint'], - res['subdirectory'], obj_id) + res['fullpath'] = _path_builder(get_db_files_base_dir(), + res['filepath'], res['mountpoint'], + res['subdirectory'], obj_id) return res diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index fd0de463f..8b06f7230 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -10,7 +10,7 @@ from tornado.gen import coroutine from future.utils import viewitems -from os.path import basename, getsize, join +from os.path import basename, getsize, join, isdir from os import walk from datetime import datetime @@ -93,7 +93,7 @@ def _list_artifact_files_nginx(self, artifact): # raw data is in the folder if data_type == 'tgz': continue - if data_type == 'directory': + if isdir(path): # If we have a directory, we actually need to list all the # files from the directory so NGINX can actually download all # of them