diff --git a/CITATIONS.md b/CITATIONS.md index d7f75e84..30829486 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,8 @@ - [Aspera CLI](https://github.com/IBM/aspera-cli) +- [fastq-dl](https://github.com/rpetit3/fastq-dl) + - [Python](http://www.python.org) - [Requests](https://docs.python-requests.org/) diff --git a/modules.json b/modules.json index ea85317b..8593e71f 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fastq_download_prefetch_fasterqdump_sratools"] }, + "fastqdl": { + "branch": "master", + "git_sha": "fc355d4bbc11017d768c0fa585ad8d265d0c6ebb", + "installed_by": ["modules"] + }, "sratools/fasterqdump": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", diff --git a/modules/nf-core/fastqdl/environment.yml b/modules/nf-core/fastqdl/environment.yml new file mode 100644 index 00000000..d850c740 --- /dev/null +++ b/modules/nf-core/fastqdl/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastq-dl=3.0.1 + - conda-forge::legacy-cgi=2.6.2 diff --git a/modules/nf-core/fastqdl/main.nf b/modules/nf-core/fastqdl/main.nf new file mode 100644 index 00000000..99ababfb --- /dev/null +++ b/modules/nf-core/fastqdl/main.nf @@ -0,0 +1,53 @@ +process FASTQDL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastq-dl:3.0.1--pyhdfd78af_0': + 'biocontainers/fastq-dl:3.0.1--pyhdfd78af_0' }" + + input: + tuple val(meta), val(accession) + + output: + tuple val(meta), path("*.fastq.gz") , emit: fastq + tuple val(meta), path("*-run-info.tsv") , emit: runinfo + tuple val(meta), path("*-run-mergers.tsv"), emit: runmergers, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + fastq-dl \\ + ${args} \\ + --prefix ${prefix} \\ + --accession ${accession} \\ + --cpus ${task.cpus} \\ + --outdir . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastq-dl: \$(fastq-dl --version |& sed 's/.* //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${accession}.fastq.gz + echo "" | gzip > ${accession}_1.fastq.gz + echo "" | gzip > ${accession}_2.fastq.gz + touch ${prefix}-run-info.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastq-dl: \$(fastq-dl --version |& sed 's/.* //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqdl/meta.yml b/modules/nf-core/fastqdl/meta.yml new file mode 100644 index 00000000..49d34156 --- /dev/null +++ b/modules/nf-core/fastqdl/meta.yml @@ -0,0 +1,79 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastqdl" +description: Download FASTQ files from SRA or ENA repositories. +keywords: + - download + - ena + - fastq + - sra +tools: + - "fastqdl": + description: "A tool to download FASTQs associated with Study, Experiment, or + Run accessions." + homepage: "https://github.com/rpetit3/fastq-dl" + documentation: "https://github.com/rpetit3/fastq-dl" + tool_dev_url: "https://github.com/rpetit3/fastq-dl" + doi: "10.5281/zenodo.13957735" + licence: ["MIT"] + identifier: biotools:fastq-dl + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - accession: + type: string + description: ENA/SRA accession to query + +output: + fastq: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.fastq.gz": + type: file + description: FASTQ files downloaded from ENA or SRA + pattern: "*.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + runinfo: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*-run-info.tsv": + type: file + description: Tab-delimited file containing metadata for each Run + downloaded + pattern: "*-run-info.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + runmergers: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*-run-mergers.tsv": + type: file + description: Tab-delimited file merge information from + --group-by-experiment or --group-by-sample + pattern: "*-run-mergers.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@camlloyd" +maintainers: + - "@camlloyd" diff --git a/modules/nf-core/fastqdl/nextflow.config b/modules/nf-core/fastqdl/nextflow.config new file mode 100644 index 00000000..fec8cf82 --- /dev/null +++ b/modules/nf-core/fastqdl/nextflow.config @@ -0,0 +1,18 @@ +process { + withName: 'FASTQDL' { + publishDir = [ + [ + path: { "${params.outdir}/fastq" }, + pattern: "*.fastq.gz" + ], + [ + path: { "${params.outdir}/metadata" }, + pattern: "*-run-info.tsv" + ], + [ + path: { "${params.outdir}/metadata" }, + pattern: "*-run-mergers.tsv" + ] + ] + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqdl/tests/main.nf.test b/modules/nf-core/fastqdl/tests/main.nf.test new file mode 100644 index 00000000..59b12a30 --- /dev/null +++ b/modules/nf-core/fastqdl/tests/main.nf.test @@ -0,0 +1,103 @@ +nextflow_process { + + name "Test Process FASTQDL" + script "../main.nf" + process "FASTQDL" + + tag "modules" + tag "modules_nfcore" + tag "fastqdl" + + test("fastqdl - run") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], + 'ERR5069949' + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("fastqdl - experiment") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], + 'ERX4876079' + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("fastqdl - run - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of ([ + [ id:'test' ], + 'ERR5069949' + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +test("fastqdl - experiment - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of ([ + [ id:'test' ], + 'ERX4876079' + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastqdl/tests/main.nf.test.snap b/modules/nf-core/fastqdl/tests/main.nf.test.snap new file mode 100644 index 00000000..0c50f45c --- /dev/null +++ b/modules/nf-core/fastqdl/tests/main.nf.test.snap @@ -0,0 +1,254 @@ +{ + "fastqdl - run - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "ERR5069949.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERR5069949_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERR5069949_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ], + "fastq": [ + [ + { + "id": "test" + }, + [ + "ERR5069949.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERR5069949_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERR5069949_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "runinfo": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "runmergers": [ + + ], + "versions": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-14T14:33:23.831919254" + }, + "fastqdl - experiment": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "ERR5069949.fastq.gz:md5,c931a862c2126788432684cca99db416", + "ERR5069949_1.fastq.gz:md5,b643cdf5f6a8fae5063fb2b7c172071d", + "ERR5069949_2.fastq.gz:md5,685c1b66d8bf0c20a37402a0578acca2" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,707edf67dd64548633abe03aa1fc8ff4" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ], + "fastq": [ + [ + { + "id": "test" + }, + [ + "ERR5069949.fastq.gz:md5,c931a862c2126788432684cca99db416", + "ERR5069949_1.fastq.gz:md5,b643cdf5f6a8fae5063fb2b7c172071d", + "ERR5069949_2.fastq.gz:md5,685c1b66d8bf0c20a37402a0578acca2" + ] + ] + ], + "runinfo": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,707edf67dd64548633abe03aa1fc8ff4" + ] + ], + "runmergers": [ + + ], + "versions": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-14T14:31:27.540008436" + }, + "fastqdl - experiment - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "ERX4876079.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERX4876079_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERX4876079_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ], + "fastq": [ + [ + { + "id": "test" + }, + [ + "ERX4876079.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERX4876079_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "ERX4876079_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "runinfo": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "runmergers": [ + + ], + "versions": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-14T14:35:22.438530573" + }, + "fastqdl - run": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "ERR5069949.fastq.gz:md5,c931a862c2126788432684cca99db416", + "ERR5069949_1.fastq.gz:md5,b643cdf5f6a8fae5063fb2b7c172071d", + "ERR5069949_2.fastq.gz:md5,685c1b66d8bf0c20a37402a0578acca2" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,707edf67dd64548633abe03aa1fc8ff4" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ], + "fastq": [ + [ + { + "id": "test" + }, + [ + "ERR5069949.fastq.gz:md5,c931a862c2126788432684cca99db416", + "ERR5069949_1.fastq.gz:md5,b643cdf5f6a8fae5063fb2b7c172071d", + "ERR5069949_2.fastq.gz:md5,685c1b66d8bf0c20a37402a0578acca2" + ] + ] + ], + "runinfo": [ + [ + { + "id": "test" + }, + "test-run-info.tsv:md5,707edf67dd64548633abe03aa1fc8ff4" + ] + ], + "runmergers": [ + + ], + "versions": [ + "versions.yml:md5,1c78c47d16f00ad96065c7a4830e89e0" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-14T14:30:14.026645864" + } +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index d4d87227..2401b6aa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -51,8 +51,8 @@ "type": "string", "default": "ftp", "fa_icon": "fas fa-download", - "enum": ["aspera", "ftp", "sratools"], - "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", + "enum": ["aspera", "fastq-dl", "ftp", "sratools"], + "description": "Method to download FastQ files. Available options are 'aspera', 'fastq-dl', 'ftp' or 'sratools'. Default is 'ftp'.", "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." }, "skip_fastq_download": { diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 0c8cac0c..02cc7312 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -10,6 +10,7 @@ include { SRA_IDS_TO_RUNINFO } from '../../modules/local/sra_ids_to_runinfo include { SRA_RUNINFO_TO_FTP } from '../../modules/local/sra_runinfo_to_ftp' include { ASPERA_CLI } from '../../modules/local/aspera_cli' include { SRA_TO_SAMPLESHEET } from '../../modules/local/sra_to_samplesheet' +include { FASTQDL } from '../../modules/nf-core/fastqdl/main' include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' /* @@ -76,12 +77,17 @@ workflow SRA { if (meta.fastq_aspera && params.download_method == 'aspera') { download_method = 'aspera' } + if (params.download_method == 'fastq-dl') { + download_method = 'fastq-dl' + } if ((!meta.fastq_aspera && !meta.fastq_1) || params.download_method == 'sratools') { download_method = 'sratools' } aspera: download_method == 'aspera' return [ meta, meta.fastq_aspera.tokenize(';').take(2) ] + fastqdl: download_method == 'fastq-dl' + return [ meta, meta.run_accession ] ftp: download_method == 'ftp' return [ meta, [ meta.fastq_1, meta.fastq_2 ] ] sratools: download_method == 'sratools' @@ -115,12 +121,18 @@ workflow SRA { ) ch_versions = ch_versions.mix(ASPERA_CLI.out.versions.first()) + FASTQDL ( + ch_sra_reads.fastqdl + ) + ch_versions = ch_versions.mix(FASTQDL.out.versions) + // Isolate FASTQ channel which will be added to emit block SRA_FASTQ_FTP .out .fastq .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) .mix(ASPERA_CLI.out.fastq) + .mix(FASTQDL.out.fastq) .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config index d242c238..14df1ee3 100644 --- a/workflows/sra/nextflow.config +++ b/workflows/sra/nextflow.config @@ -5,4 +5,5 @@ includeConfig "../../modules/local/sra_ids_to_runinfo/nextflow.config" includeConfig "../../modules/local/sra_runinfo_to_ftp/nextflow.config" includeConfig "../../modules/local/sra_to_samplesheet/nextflow.config" includeConfig "../../modules/nf-core/sratools/prefetch/nextflow.config" +includeConfig "../../modules/nf-core/fastqdl/nextflow.config" includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config"