From c539b5b3f528ee3e7d02f3e2801e047bd094f538 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Mon, 18 Sep 2017 08:41:25 -0700
Subject: [PATCH 1/7] Fix #2276

---
 qiita_db/test/test_util.py      | 10 +++++++++
 qiita_db/util.py                | 38 +++++++++++++++++++++++++++++++
 qiita_pet/handlers/download.py  | 40 ++++++++++++++++++++++++++-------
 qiita_pet/test/test_download.py | 39 +++++++++++++++++++++++++++++---
 4 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py
index 466eaed7b..11d5ed310 100644
--- a/qiita_db/test/test_util.py
+++ b/qiita_db/test/test_util.py
@@ -622,6 +622,16 @@ def test_get_timeseries_types(self):
                [10, 'mixed', 'combo intervention']]
         self.assertEqual(obs, exp)
 
+    def test_get_filepath_information(self):
+        obs = qdb.util.get_filepath_information(1)
+        # This path is machine specific. Just checking that is not empty
+        self.assertIsNotNone(obs.pop('fullpath'))
+        exp = {'filepath_id': 1L, 'filepath': '1_s_G1_L001_sequences.fastq.gz',
+               'filepath_type': 'raw_forward_seqs', 'checksum': '852952723',
+               'data_type': 'raw_data', 'mountpoint': 'raw_data',
+               'subdirectory': False, 'active': True}
+        self.assertEqual(obs, exp)
+
     def test_filepath_id_to_rel_path(self):
         obs = qdb.util.filepath_id_to_rel_path(1)
         exp = 'raw_data/1_s_G1_L001_sequences.fastq.gz'
diff --git a/qiita_db/util.py b/qiita_db/util.py
index 25898fa54..fc197798e 100644
--- a/qiita_db/util.py
+++ b/qiita_db/util.py
@@ -845,6 +845,44 @@ def move_filepaths_to_upload_folder(study_id, filepaths):
         qdb.sql_connection.TRN.execute()
 
 
+def get_filepath_information(filepath_id):
+    """Gets the filepath information of filepath_id
+
+    Parameters
+    ----------
+    filepath_id : int
+        The filepath id
+
+    Returns
+    -------
+    dict
+        The filepath information
+    """
+    with qdb.sql_connection.TRN:
+        sql = """SELECT filepath_id, filepath, filepath_type, checksum,
+                        data_type, mountpoint, subdirectory, active,
+                        artifact_id
+                 FROM qiita.filepath
+                    JOIN qiita.filepath_type USING (filepath_type_id)
+                    JOIN qiita.data_directory USING (data_directory_id)
+                    LEFT JOIN qiita.artifact_filepath USING (filepath_id)
+                 WHERE filepath_id = %s"""
+        qdb.sql_connection.TRN.add(sql, [filepath_id])
+        res = dict(qdb.sql_connection.TRN.execute_fetchindex()[0])
+
+        def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
+            if subdirectory:
+                return join(db_dir, mountpoint, str(obj_id), filepath)
+            else:
+                return join(db_dir, mountpoint, filepath)
+
+        obj_id = res.pop('artifact_id')
+        res['fullpath'] = path_builder(get_db_files_base_dir(),
+                                       res['filepath'], res['mountpoint'],
+                                       res['subdirectory'], obj_id)
+        return res
+
+
 def filepath_id_to_rel_path(filepath_id):
     """Gets the relative to the base directory of filepath_id
 
diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
index bb906e532..cea0d488e 100644
--- a/qiita_pet/handlers/download.py
+++ b/qiita_pet/handlers/download.py
@@ -17,7 +17,8 @@
 from .base_handlers import BaseHandler
 from qiita_pet.handlers.api_proxy import study_get_req
 from qiita_db.study import Study
-from qiita_db.util import filepath_id_to_rel_path, get_db_files_base_dir
+from qiita_db.util import (filepath_id_to_rel_path, get_db_files_base_dir,
+                           get_filepath_information)
 from qiita_db.meta_util import validate_filepath_access_by_user
 from qiita_db.metadata_template.sample_template import SampleTemplate
 from qiita_db.metadata_template.prep_template import PrepTemplate
@@ -37,19 +38,42 @@ def get(self, filepath_id):
                 "filepath_id: %s" % (self.current_user.email, str(fid)))
 
         relpath = filepath_id_to_rel_path(fid)
+        fp_info = get_filepath_information(fid)
         fname = basename(relpath)
 
-        # If we don't have nginx, write a file that indicates this
-        self.write("This installation of Qiita was not equipped with nginx, "
-                   "so it is incapable of serving files. The file you "
-                   "attempted to download is located at %s" % relpath)
+        if fp_info['filepath_type'] in ('directory', 'html_summary_dir'):
+            # This is a directory, we need to list all the files so NGINX
+            # can download all of them
+            basedir = get_db_files_base_dir()
+            basedir_len = len(basedir) + 1
+            to_download = []
+            for dp, _, fps in walk(fp_info['fullpath']):
+                for fn in fps:
+                    fullpath = join(dp, fn)
+                    spath = fullpath
+                    if fullpath.startswith(basedir):
+                        spath = fullpath[basedir_len:]
+                    to_download.append((fullpath, spath, spath))
+
+            all_files = '\n'.join(
+                ["- %s /protected/%s %s" % (getsize(fp), sfp, n)
+                 for fp, sfp, n in to_download])
+
+            self.set_header('X-Archive-Files', 'zip')
+            self.write("%s\n" % all_files)
+            fname = '%s.zip' % fname
+        else:
+            # If we don't have nginx, write a file that indicates this
+            self.write("This installation of Qiita was not equipped with "
+                       "nginx, so it is incapable of serving files. The file "
+                       "you attempted to download is located at %s" % relpath)
+            self.set_header('Content-Type', 'application/octet-stream')
+            self.set_header('Content-Transfer-Encoding', 'binary')
+            self.set_header('X-Accel-Redirect', '/protected/' + relpath)
 
         self.set_header('Content-Description', 'File Transfer')
-        self.set_header('Content-Type', 'application/octet-stream')
-        self.set_header('Content-Transfer-Encoding', 'binary')
         self.set_header('Expires',  '0')
         self.set_header('Cache-Control',  'no-cache')
-        self.set_header('X-Accel-Redirect', '/protected/' + relpath)
         self.set_header('Content-Disposition',
                         'attachment; filename=%s' % fname)
 
diff --git a/qiita_pet/test/test_download.py b/qiita_pet/test/test_download.py
index d9d0415c8..10ed52bfe 100644
--- a/qiita_pet/test/test_download.py
+++ b/qiita_pet/test/test_download.py
@@ -8,10 +8,10 @@
 
 from unittest import main
 from mock import Mock
-from os.path import exists, isdir, join
-from os import remove, makedirs
+from os.path import exists, isdir, join, basename
+from os import remove, makedirs, close
 from shutil import rmtree
-from tempfile import mkdtemp
+from tempfile import mkdtemp, mkstemp
 
 from biom.util import biom_open
 from biom import example_table as et
@@ -28,9 +28,16 @@ class TestDownloadHandler(TestHandlerBase):
 
     def setUp(self):
         super(TestDownloadHandler, self).setUp()
+        self._clean_up_files = []
 
     def tearDown(self):
         super(TestDownloadHandler, self).tearDown()
+        for fp in self._clean_up_files:
+            if exists(fp):
+                if isdir(fp):
+                    rmtree(fp)
+                else:
+                    remove(fp)
 
     def test_download(self):
         # check success
@@ -45,6 +52,32 @@ def test_download(self):
         response = self.get('/download/1000')
         self.assertEqual(response.code, 403)
 
+        # directory
+        a = Artifact(1)
+        fd, fp = mkstemp(suffix='.html')
+        close(fd)
+        with open(fp, 'w') as f:
+            f.write('\n')
+        self._clean_up_files.append(fp)
+        dirpath = mkdtemp()
+        fd, fp2 = mkstemp(suffix='.txt', dir=dirpath)
+        close(fd)
+        with open(fp2, 'w') as f:
+            f.write('\n')
+        self._clean_up_files.append(dirpath)
+        a.set_html_summary(fp, support_dir=dirpath)
+        for fp_id, _, fp_type in a.filepaths:
+            if fp_type == 'html_summary_dir':
+                break
+        response = self.get('/download/%d' % fp_id)
+        self.assertEqual(response.code, 200)
+
+        fp_name = basename(fp2)
+        dirname = basename(dirpath)
+        self.assertEqual(
+            response.body, "- 1 /protected/FASTQ/1/%s/%s FASTQ/1/%s/%s\n"
+                           % (dirname, fp_name, dirname, fp_name))
+
 
 class TestDownloadStudyBIOMSHandler(TestHandlerBase):
 

From 6ad5e799f132825d890b04c80be8584c49a5dd1e Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Mon, 18 Sep 2017 10:59:55 -0700
Subject: [PATCH 2/7] Factoring out generate nginx directory file list

---
 qiita_pet/handlers/download.py | 91 +++++++++++++++-------------------
 1 file changed, 40 insertions(+), 51 deletions(-)

diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
index cea0d488e..310134f48 100644
--- a/qiita_pet/handlers/download.py
+++ b/qiita_pet/handlers/download.py
@@ -25,7 +25,43 @@
 from qiita_core.util import execute_as_transaction, get_release_info
 
 
-class DownloadHandler(BaseHandler):
+class BaseHandlerDownload(BaseHandler):
+    def _check_permissions(self, sid):
+        # Check general access to study
+        study_info = study_get_req(sid, self.current_user.id)
+        if study_info['status'] != 'success':
+            raise HTTPError(405, "%s: %s, %s" % (study_info['message'],
+                                                 self.current_user.email, sid))
+        return Study(sid)
+
+    def _generate_files(self, header_name, accessions, filename):
+        text = "sample_name\t%s\n%s" % (header_name, '\n'.join(
+            ["%s\t%s" % (k, v) for k, v in viewitems(accessions)]))
+
+        self.set_header('Content-Description', 'text/csv')
+        self.set_header('Expires', '0')
+        self.set_header('Cache-Control', 'no-cache')
+        self.set_header('Content-Disposition', 'attachment; '
+                        'filename=%s' % filename)
+        self.write(text)
+        self.finish()
+
+    def _list_dir_files_nginx(self, dirpath):
+        """"""
+        basedir = get_db_files_base_dir()
+        basedir_len = len(basedir) + 1
+        to_download = []
+        for dp, _, fps in walk(dirpath):
+            for fn in fps:
+                fullpath = join(dp, fn)
+                spath = fullpath
+                if fullpath.startswith(basedir):
+                    spath = fullpath[basedir_len:]
+                to_download.append((fullpath, spath, spath))
+        return to_download
+
+
+class DownloadHandler(BaseHandlerDownload):
     @authenticated
     @coroutine
     @execute_as_transaction
@@ -44,17 +80,7 @@ def get(self, filepath_id):
         if fp_info['filepath_type'] in ('directory', 'html_summary_dir'):
             # This is a directory, we need to list all the files so NGINX
             # can download all of them
-            basedir = get_db_files_base_dir()
-            basedir_len = len(basedir) + 1
-            to_download = []
-            for dp, _, fps in walk(fp_info['fullpath']):
-                for fn in fps:
-                    fullpath = join(dp, fn)
-                    spath = fullpath
-                    if fullpath.startswith(basedir):
-                        spath = fullpath[basedir_len:]
-                    to_download.append((fullpath, spath, spath))
-
+            to_download = self._list_dir_files_nginx(fp_info['fullpath'])
             all_files = '\n'.join(
                 ["- %s /protected/%s %s" % (getsize(fp), sfp, n)
                  for fp, sfp, n in to_download])
@@ -80,28 +106,6 @@ def get(self, filepath_id):
         self.finish()
 
 
-class BaseHandlerDownload(BaseHandler):
-    def _check_permissions(self, sid):
-        # Check general access to study
-        study_info = study_get_req(sid, self.current_user.id)
-        if study_info['status'] != 'success':
-            raise HTTPError(405, "%s: %s, %s" % (study_info['message'],
-                                                 self.current_user.email, sid))
-        return Study(sid)
-
-    def _generate_files(self, header_name, accessions, filename):
-        text = "sample_name\t%s\n%s" % (header_name, '\n'.join(
-            ["%s\t%s" % (k, v) for k, v in viewitems(accessions)]))
-
-        self.set_header('Content-Description', 'text/csv')
-        self.set_header('Expires', '0')
-        self.set_header('Cache-Control', 'no-cache')
-        self.set_header('Content-Disposition', 'attachment; '
-                        'filename=%s' % filename)
-        self.write(text)
-        self.finish()
-
-
 class DownloadStudyBIOMSHandler(BaseHandlerDownload):
     @authenticated
     @coroutine
@@ -125,13 +129,7 @@ def get(self, study_id):
                         # If we have a directory, we actually need to list
                         # all the files from the directory so NGINX can
                         # actually download all of them
-                        for dp, _, fps in walk(path):
-                            for fname in fps:
-                                fullpath = join(dp, fname)
-                                spath = fullpath
-                                if fullpath.startswith(basedir):
-                                    spath = fullpath[basedir_len:]
-                                to_download.append((fullpath, spath, spath))
+                        to_download.extend(self._list_dir_files_nginx(path))
                     elif path.startswith(basedir):
                         spath = path[basedir_len:]
                         to_download.append((path, spath, spath))
@@ -215,16 +213,7 @@ def get(self, study_id):
             if not a.parents:
                 for i, (fid, path, data_type) in enumerate(a.filepaths):
                     if data_type == 'directory':
-                        # If we have a directory, we actually need to list
-                        # all the files from the directory so NGINX can
-                        # actually download all of them
-                        for dp, _, fps in walk(path):
-                            for fname in fps:
-                                fullpath = join(dp, fname)
-                                spath = fullpath
-                                if fullpath.startswith(basedir):
-                                    spath = fullpath[basedir_len:]
-                                to_download.append((fullpath, spath, spath))
+                        to_download.append(path)
                     elif path.startswith(basedir):
                         spath = path[basedir_len:]
                         to_download.append((path, spath, spath))

From 27acf31efd6186deb66234a53f7115d528b6bd2a Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Mon, 18 Sep 2017 11:05:38 -0700
Subject: [PATCH 3/7] Factoring out the nginx file list writing

---
 qiita_pet/handlers/download.py | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
index 310134f48..8dca9e464 100644
--- a/qiita_pet/handlers/download.py
+++ b/qiita_pet/handlers/download.py
@@ -60,6 +60,15 @@ def _list_dir_files_nginx(self, dirpath):
                 to_download.append((fullpath, spath, spath))
         return to_download
 
+    def _write_nginx_file_list(self, to_download):
+        """"""
+        all_files = '\n'.join(
+            ["- %s /protected/%s %s" % (getsize(fp), sfp, n)
+             for fp, sfp, n in to_download])
+
+        self.set_header('X-Archive-Files', 'zip')
+        self.write("%s\n" % all_files)
+
 
 class DownloadHandler(BaseHandlerDownload):
     @authenticated
@@ -81,12 +90,7 @@ def get(self, filepath_id):
             # This is a directory, we need to list all the files so NGINX
             # can download all of them
             to_download = self._list_dir_files_nginx(fp_info['fullpath'])
-            all_files = '\n'.join(
-                ["- %s /protected/%s %s" % (getsize(fp), sfp, n)
-                 for fp, sfp, n in to_download])
-
-            self.set_header('X-Archive-Files', 'zip')
-            self.write("%s\n" % all_files)
+            self._write_nginx_file_list(to_download)
             fname = '%s.zip' % fname
         else:
             # If we don't have nginx, write a file that indicates this
@@ -150,10 +154,7 @@ def get(self, study_id):
                             (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
                                         % a.id))
 
-        # If we don't have nginx, write a file that indicates this
-        all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n)
-                               for fp, sfp, n in to_download])
-        self.write("%s\n" % all_files)
+        self._write_nginx_file_list(to_download)
 
         zip_fn = 'study_%d_%s.zip' % (
             study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
@@ -161,7 +162,6 @@ def get(self, study_id):
         self.set_header('Content-Description', 'File Transfer')
         self.set_header('Expires', '0')
         self.set_header('Cache-Control', 'no-cache')
-        self.set_header('X-Archive-Files', 'zip')
         self.set_header('Content-Disposition',
                         'attachment; filename=%s' % zip_fn)
         self.finish()
@@ -234,12 +234,7 @@ def get(self, study_id):
                             (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
                                         % a.id))
 
-        # If we don't have nginx, write a file that indicates this
-        # Note that this configuration will automatically create and download
-        # ("on the fly") the zip file via the contents in all_files
-        all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n)
-                               for fp, sfp, n in to_download])
-        self.write("%s\n" % all_files)
+        self._write_nginx_file_list(to_download)
 
         zip_fn = 'study_raw_data_%d_%s.zip' % (
             study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
@@ -247,7 +242,6 @@ def get(self, study_id):
         self.set_header('Content-Description', 'File Transfer')
         self.set_header('Expires', '0')
         self.set_header('Cache-Control', 'no-cache')
-        self.set_header('X-Archive-Files', 'zip')
         self.set_header('Content-Disposition',
                         'attachment; filename=%s' % zip_fn)
         self.finish()

From 460998f5bf39ef7964cf39014cccc941b592ce54 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Mon, 18 Sep 2017 11:09:53 -0700
Subject: [PATCH 4/7] Factoring out generating the file list of an artifact

---
 qiita_pet/handlers/download.py | 90 +++++++++++++---------------------
 1 file changed, 34 insertions(+), 56 deletions(-)

diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
index 8dca9e464..00218c318 100644
--- a/qiita_pet/handlers/download.py
+++ b/qiita_pet/handlers/download.py
@@ -60,6 +60,38 @@ def _list_dir_files_nginx(self, dirpath):
                 to_download.append((fullpath, spath, spath))
         return to_download
 
+    def _list_artifact_files_nginx(self, artifact):
+        """"""
+        basedir = get_db_files_base_dir()
+        basedir_len = len(basedir) + 1
+        to_download = []
+        for i, (fid, path, data_type) in enumerate(artifact.filepaths):
+            # ignore if tgz as they could create problems and the
+            # raw data is in the folder
+            if data_type == 'tgz':
+                continue
+            if data_type == 'directory':
+                # If we have a directory, we actually need to list all the
+                # files from the directory so NGINX can actually download all
+                # of them
+                to_download.extend(self._list_dir_files_nginx(path))
+            elif path.startswith(basedir):
+                spath = path[basedir_len:]
+                to_download.append((path, spath, spath))
+            else:
+                to_download.append((path, path, path))
+
+        for pt in artifact.prep_templates:
+            qmf = pt.qiime_map_fp
+            if qmf is not None:
+                sqmf = qmf
+                if qmf.startswith(basedir):
+                    sqmf = qmf[basedir_len:]
+                to_download.append(
+                    (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
+                                % artifact.id))
+        return to_download
+
     def _write_nginx_file_list(self, to_download):
         """"""
         all_files = '\n'.join(
@@ -117,42 +149,11 @@ class DownloadStudyBIOMSHandler(BaseHandlerDownload):
     def get(self, study_id):
         study_id = int(study_id)
         study = self._check_permissions(study_id)
-
-        basedir = get_db_files_base_dir()
-        basedir_len = len(basedir) + 1
         # loop over artifacts and retrieve those that we have access to
         to_download = []
         for a in study.artifacts():
             if a.artifact_type == 'BIOM':
-                for i, (fid, path, data_type) in enumerate(a.filepaths):
-                    # ignore if tgz as they could create problems and the
-                    # raw data is in the folder
-                    if data_type == 'tgz':
-                        continue
-                    if data_type == 'directory':
-                        # If we have a directory, we actually need to list
-                        # all the files from the directory so NGINX can
-                        # actually download all of them
-                        to_download.extend(self._list_dir_files_nginx(path))
-                    elif path.startswith(basedir):
-                        spath = path[basedir_len:]
-                        to_download.append((path, spath, spath))
-                    else:
-                        # We are not aware of any case that can trigger this
-                        # situation, but we wanted to be overly cautious
-                        # There is no test for this line cause we don't know
-                        # how to trigger it
-                        to_download.append((path, path, path))
-
-                for pt in a.prep_templates:
-                    qmf = pt.qiime_map_fp
-                    if qmf is not None:
-                        sqmf = qmf
-                        if qmf.startswith(basedir):
-                            sqmf = qmf[basedir_len:]
-                        to_download.append(
-                            (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
-                                        % a.id))
+                to_download.extend(self._list_artifact_files_nginx(a))
 
         self._write_nginx_file_list(to_download)
 
@@ -205,34 +206,11 @@ def get(self, study_id):
                                                  self.current_user.email,
                                                  str(study_id)))
 
-        basedir = get_db_files_base_dir()
-        basedir_len = len(basedir) + 1
         # loop over artifacts and retrieve raw data (no parents)
         to_download = []
         for a in study.artifacts():
             if not a.parents:
-                for i, (fid, path, data_type) in enumerate(a.filepaths):
-                    if data_type == 'directory':
-                        to_download.append(path)
-                    elif path.startswith(basedir):
-                        spath = path[basedir_len:]
-                        to_download.append((path, spath, spath))
-                    else:
-                        # We are not aware of any case that can trigger this
-                        # situation, but we wanted to be overly cautious
-                        # There is no test for this line cause we don't know
-                        # how to trigger it
-                        to_download.append((path, path, path))
-
-                for pt in a.prep_templates:
-                    qmf = pt.qiime_map_fp
-                    if qmf is not None:
-                        sqmf = qmf
-                        if qmf.startswith(basedir):
-                            sqmf = qmf[basedir_len:]
-                        to_download.append(
-                            (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
-                                        % a.id))
+                to_download.extend(self._list_artifact_files_nginx(a))
 
         self._write_nginx_file_list(to_download)
 

From c3a27de02c144563a731dfd618307ed6ea9a93bd Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Mon, 18 Sep 2017 11:19:15 -0700
Subject: [PATCH 5/7] Factoring out the header setting

---
 qiita_pet/handlers/download.py | 77 +++++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 25 deletions(-)

diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
index 00218c318..c5d11140a 100644
--- a/qiita_pet/handlers/download.py
+++ b/qiita_pet/handlers/download.py
@@ -47,7 +47,19 @@ def _generate_files(self, header_name, accessions, filename):
         self.finish()
 
     def _list_dir_files_nginx(self, dirpath):
-        """"""
+        """Generates a nginx list of files in the given dirpath for nginx
+
+        Parameters
+        ----------
+        dirpath : str
+            Path to the directory
+
+        Returns
+        -------
+        list of (str, str, str)
+            The path information needed by nginx for each file in the
+            directory
+        """
         basedir = get_db_files_base_dir()
         basedir_len = len(basedir) + 1
         to_download = []
@@ -61,7 +73,18 @@ def _list_dir_files_nginx(self, dirpath):
         return to_download
 
     def _list_artifact_files_nginx(self, artifact):
-        """"""
+        """Generates a nginx list of files for the given artifact
+
+        Parameters
+        ----------
+        artifact : qiita_db.artifact.Artifact
+            The artifact to retrieve the files
+
+        Returns
+        -------
+        list of (str, str, str)
+            The path information needed by nginx for each file in the artifact
+        """
         basedir = get_db_files_base_dir()
         basedir_len = len(basedir) + 1
         to_download = []
@@ -93,7 +116,13 @@ def _list_artifact_files_nginx(self, artifact):
         return to_download
 
     def _write_nginx_file_list(self, to_download):
-        """"""
+        """Writes out the nginx file list
+
+        Parameters
+        ----------
+        to_download : list of (str, str, str)
+            The file list information
+        """
         all_files = '\n'.join(
             ["- %s /protected/%s %s" % (getsize(fp), sfp, n)
              for fp, sfp, n in to_download])
@@ -101,6 +130,20 @@ def _write_nginx_file_list(self, to_download):
         self.set_header('X-Archive-Files', 'zip')
         self.write("%s\n" % all_files)
 
+    def _set_nginx_headers(self, fname):
+        """Sets commong nginx headers
+
+        Parameters
+        ----------
+        fname : str
+            Nginx's output filename
+        """
+        self.set_header('Content-Description', 'File Transfer')
+        self.set_header('Expires',  '0')
+        self.set_header('Cache-Control',  'no-cache')
+        self.set_header('Content-Disposition',
+                        'attachment; filename=%s' % fname)
+
 
 class DownloadHandler(BaseHandlerDownload):
     @authenticated
@@ -133,12 +176,7 @@ def get(self, filepath_id):
             self.set_header('Content-Transfer-Encoding', 'binary')
             self.set_header('X-Accel-Redirect', '/protected/' + relpath)
 
-        self.set_header('Content-Description', 'File Transfer')
-        self.set_header('Expires',  '0')
-        self.set_header('Cache-Control',  'no-cache')
-        self.set_header('Content-Disposition',
-                        'attachment; filename=%s' % fname)
-
+        self._set_nginx_headers(fname)
         self.finish()
 
 
@@ -160,15 +198,11 @@ def get(self, study_id):
         zip_fn = 'study_%d_%s.zip' % (
             study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
 
-        self.set_header('Content-Description', 'File Transfer')
-        self.set_header('Expires', '0')
-        self.set_header('Cache-Control', 'no-cache')
-        self.set_header('Content-Disposition',
-                        'attachment; filename=%s' % zip_fn)
+        self._set_nginx_headers(zip_fn)
         self.finish()
 
 
-class DownloadRelease(BaseHandler):
+class DownloadRelease(BaseHandlerDownload):
     @coroutine
     def get(self, extras):
         _, relpath, _ = get_release_info()
@@ -180,15 +214,12 @@ def get(self, extras):
                    "so it is incapable of serving files. The file you "
                    "attempted to download is located at %s" % relpath)
 
-        self.set_header('Content-Description', 'File Transfer')
+        self._set_nginx_headers(basename(relpath))
+
         self.set_header('Content-Type', 'application/octet-stream')
         self.set_header('Content-Transfer-Encoding', 'binary')
-        self.set_header('Expires',  '0')
-        self.set_header('Cache-Control',  'no-cache')
         self.set_header('X-Accel-Redirect',
                         '/protected-working_dir/' + relpath)
-        self.set_header('Content-Disposition',
-                        'attachment; filename=%s' % basename(relpath))
         self.finish()
 
 
@@ -217,11 +248,7 @@ def get(self, study_id):
         zip_fn = 'study_raw_data_%d_%s.zip' % (
             study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
 
-        self.set_header('Content-Description', 'File Transfer')
-        self.set_header('Expires', '0')
-        self.set_header('Cache-Control', 'no-cache')
-        self.set_header('Content-Disposition',
-                        'attachment; filename=%s' % zip_fn)
+        self._set_nginx_headers(zip_fn)
         self.finish()
 
 

From 92c5b05ea01587e691054d9d2ef652ef2b8008e8 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Mon, 18 Sep 2017 14:03:35 -0700
Subject: [PATCH 6/7] Addressing @antgonza's comment

---
 qiita_pet/handlers/download.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
index c5d11140a..fd0de463f 100644
--- a/qiita_pet/handlers/download.py
+++ b/qiita_pet/handlers/download.py
@@ -144,6 +144,19 @@ def _set_nginx_headers(self, fname):
         self.set_header('Content-Disposition',
                         'attachment; filename=%s' % fname)
 
+    def _write_nginx_placeholder_file(self, fp):
+        """Writes nginx placeholder file in case that nginx is not set up
+
+        Parameters
+        ----------
+        fp : str
+            The path to be downloaded through nginx
+        """
+        # If we don't have nginx, write a file that indicates this
+        self.write("This installation of Qiita was not equipped with "
+                   "nginx, so it is incapable of serving files. The file "
+                   "you attempted to download is located at %s" % fp)
+
 
 class DownloadHandler(BaseHandlerDownload):
     @authenticated
@@ -168,10 +181,7 @@ def get(self, filepath_id):
             self._write_nginx_file_list(to_download)
             fname = '%s.zip' % fname
         else:
-            # If we don't have nginx, write a file that indicates this
-            self.write("This installation of Qiita was not equipped with "
-                       "nginx, so it is incapable of serving files. The file "
-                       "you attempted to download is located at %s" % relpath)
+            self._write_nginx_placeholder_file(relpath)
             self.set_header('Content-Type', 'application/octet-stream')
             self.set_header('Content-Transfer-Encoding', 'binary')
             self.set_header('X-Accel-Redirect', '/protected/' + relpath)
@@ -210,9 +220,7 @@ def get(self, extras):
         # If we don't have nginx, write a file that indicates this
         # Note that this configuration will automatically create and download
         # ("on the fly") the zip file via the contents in all_files
-        self.write("This installation of Qiita was not equipped with nginx, "
-                   "so it is incapable of serving files. The file you "
-                   "attempted to download is located at %s" % relpath)
+        self._write_nginx_placeholder_file(relpath)
 
         self._set_nginx_headers(basename(relpath))
 

From 32c038235bd0d3207da1f3a5c8a3f6e6369307e3 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Tue, 19 Sep 2017 07:28:08 -0700
Subject: [PATCH 7/7] Addressing @wasade's comments

---
 qiita_db/util.py               | 47 ++++++++++++++++++++++------------
 qiita_pet/handlers/download.py |  4 +--
 2 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/qiita_db/util.py b/qiita_db/util.py
index fc197798e..b664bfa8b 100644
--- a/qiita_db/util.py
+++ b/qiita_db/util.py
@@ -649,6 +649,33 @@ def str_to_id(x):
             chain.from_iterable(qdb.sql_connection.TRN.execute()[idx:])))
 
 
+def _path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
+    """Builds the path of a DB stored file
+
+    Parameters
+    ----------
+    db_dir : str
+        The DB base dir
+    filepath : str
+        The path stored in the DB
+    mountpoint : str
+        The mountpoint of the given file
+    subdirectory : bool
+        Whether the file is stored in a subdirectory in the mountpoint or not
+    obj_id : int
+        The id of the object to which the file is attached
+
+    Returns
+    -------
+    str
+        The full path of the given file
+    """
+    if subdirectory:
+        return join(db_dir, mountpoint, str(obj_id), filepath)
+    else:
+        return join(db_dir, mountpoint, filepath)
+
+
 def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None,
                        fp_type=None):
     """Retrieves the filepaths for the given object id
@@ -674,12 +701,6 @@ def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None,
         object id
     """
 
-    def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
-        if subdirectory:
-            return join(db_dir, mountpoint, str(obj_id), filepath)
-        else:
-            return join(db_dir, mountpoint, filepath)
-
     sql_sort = ""
     if sort == 'ascending':
         sql_sort = " ORDER BY filepath_id"
@@ -710,7 +731,7 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
         results = qdb.sql_connection.TRN.execute_fetchindex()
         db_dir = get_db_files_base_dir()
 
-        return [(fpid, path_builder(db_dir, fp, m, s, obj_id), fp_type_)
+        return [(fpid, _path_builder(db_dir, fp, m, s, obj_id), fp_type_)
                 for fpid, fp, fp_type_, m, s in results]
 
 
@@ -870,16 +891,10 @@ def get_filepath_information(filepath_id):
         qdb.sql_connection.TRN.add(sql, [filepath_id])
         res = dict(qdb.sql_connection.TRN.execute_fetchindex()[0])
 
-        def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
-            if subdirectory:
-                return join(db_dir, mountpoint, str(obj_id), filepath)
-            else:
-                return join(db_dir, mountpoint, filepath)
-
         obj_id = res.pop('artifact_id')
-        res['fullpath'] = path_builder(get_db_files_base_dir(),
-                                       res['filepath'], res['mountpoint'],
-                                       res['subdirectory'], obj_id)
+        res['fullpath'] = _path_builder(get_db_files_base_dir(),
+                                        res['filepath'], res['mountpoint'],
+                                        res['subdirectory'], obj_id)
         return res
 
 
diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
index fd0de463f..8b06f7230 100644
--- a/qiita_pet/handlers/download.py
+++ b/qiita_pet/handlers/download.py
@@ -10,7 +10,7 @@
 from tornado.gen import coroutine
 
 from future.utils import viewitems
-from os.path import basename, getsize, join
+from os.path import basename, getsize, join, isdir
 from os import walk
 from datetime import datetime
 
@@ -93,7 +93,7 @@ def _list_artifact_files_nginx(self, artifact):
             # raw data is in the folder
             if data_type == 'tgz':
                 continue
-            if data_type == 'directory':
+            if isdir(path):
                 # If we have a directory, we actually need to list all the
                 # files from the directory so NGINX can actually download all
                 # of them