From bff5441036659838712620b8de0786c951a5c5e5 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Mon, 16 Sep 2024 14:44:50 -0400 Subject: [PATCH 01/23] add skeleton for humann module- wip --- modules/local/humann/humann/environment.yml | 6 ++ modules/local/humann/humann/main.nf | 58 ++++++++++++ modules/local/humann/humann/nextflow.config | 27 ++++++ .../local/humann/humann/tests/main.nf.test | 49 ++++++++++ .../local/humann/humann/tests/nextflow.config | 27 ++++++ modules/local/humann/join/environment.yml | 6 ++ modules/local/humann/join/join.nf | 89 ++++++++++++++++++ modules/local/humann/join/main.nf | 42 +++++++++ modules/local/humann/join/nextflow.config | 5 + modules/local/humann/join/tests/main.nf.test | 49 ++++++++++ .../local/humann/join/tests/nextflow.config | 5 + modules/local/humann/regroup/environment.yml | 6 ++ modules/local/humann/regroup/main.nf | 42 +++++++++ modules/local/humann/regroup/nextflow.config | 5 + modules/local/humann/regroup/regroup.nf | 91 +++++++++++++++++++ .../local/humann/regroup/tests/main.nf.test | 49 ++++++++++ .../humann/regroup/tests/nextflow.config | 5 + modules/local/humann/rename/environment.yml | 6 ++ modules/local/humann/rename/main.nf | 42 +++++++++ modules/local/humann/rename/nextflow.config | 5 + modules/local/humann/rename/rename.nf | 91 +++++++++++++++++++ .../local/humann/rename/tests/main.nf.test | 49 ++++++++++ .../local/humann/rename/tests/nextflow.config | 5 + modules/local/humann/renorm/environment.yml | 6 ++ modules/local/humann/renorm/main.nf | 42 +++++++++ modules/local/humann/renorm/nextflow.config | 5 + modules/local/humann/renorm/renorm.nf | 91 +++++++++++++++++++ .../local/humann/renorm/tests/main.nf.test | 49 ++++++++++ .../local/humann/renorm/tests/nextflow.config | 5 + 29 files changed, 957 insertions(+) create mode 100644 modules/local/humann/humann/environment.yml create mode 100644 modules/local/humann/humann/main.nf create mode 100644 modules/local/humann/humann/nextflow.config create mode 100644 modules/local/humann/humann/tests/main.nf.test create mode 100644 modules/local/humann/humann/tests/nextflow.config create mode 100644 modules/local/humann/join/environment.yml create mode 100644 modules/local/humann/join/join.nf create mode 100644 modules/local/humann/join/main.nf create mode 100644 modules/local/humann/join/nextflow.config create mode 100644 modules/local/humann/join/tests/main.nf.test create mode 100644 modules/local/humann/join/tests/nextflow.config create mode 100644 modules/local/humann/regroup/environment.yml create mode 100644 modules/local/humann/regroup/main.nf create mode 100644 modules/local/humann/regroup/nextflow.config create mode 100644 modules/local/humann/regroup/regroup.nf create mode 100644 modules/local/humann/regroup/tests/main.nf.test create mode 100644 modules/local/humann/regroup/tests/nextflow.config create mode 100644 modules/local/humann/rename/environment.yml create mode 100644 modules/local/humann/rename/main.nf create mode 100644 modules/local/humann/rename/nextflow.config create mode 100644 modules/local/humann/rename/rename.nf create mode 100644 modules/local/humann/rename/tests/main.nf.test create mode 100644 modules/local/humann/rename/tests/nextflow.config create mode 100644 modules/local/humann/renorm/environment.yml create mode 100644 modules/local/humann/renorm/main.nf create mode 100644 modules/local/humann/renorm/nextflow.config create mode 100644 modules/local/humann/renorm/renorm.nf create mode 100644 modules/local/humann/renorm/tests/main.nf.test create mode 100644 modules/local/humann/renorm/tests/nextflow.config diff --git a/modules/local/humann/humann/environment.yml b/modules/local/humann/humann/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/humann/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/humann/main.nf b/modules/local/humann/humann/main.nf new file mode 100644 index 0000000..66abe4a --- /dev/null +++ b/modules/local/humann/humann/main.nf @@ -0,0 +1,58 @@ +process HUMANN_HUMANN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(metaphlan_profile) + path chocophlan_db + path uniref_db + + output: + tuple val(meta), path("*_genefamilies.tsv") , emit: genefamilies + tuple val(meta), path("*_pathabundance.tsv"), emit: pathabundance + tuple val(meta), path("*_pathcoverage.tsv") , emit: pathcoverage + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + humann \\ + --input ${reads} \\ + --output ./ \\ + --threads ${task.cpus} \\ + --taxonomic-profile ${metaphlan_profile} \\ + --nucleotide-database ${chocophlan_db} \\ + --protein-database ${uniref_db} \\ + --o-log ${prefix}.log \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_genefamilies.tsv + touch ${prefix}_pathabundance.tsv + touch ${prefix}_pathcoverage.tsv + touch ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/humann/nextflow.config b/modules/local/humann/humann/nextflow.config new file mode 100644 index 0000000..52ad9a3 --- /dev/null +++ b/modules/local/humann/humann/nextflow.config @@ -0,0 +1,27 @@ +process { + withName: HUMANN_HUMANN { + publishDir = [ + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_genefamilies.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathabundance.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathcoverage.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/humann/tests/main.nf.test b/modules/local/humann/humann/tests/main.nf.test new file mode 100644 index 0000000..f4c6fa5 --- /dev/null +++ b/modules/local/humann/humann/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process HUMANN_HUMANN" + script "../main.nf" + process "HUMANN_HUMANN" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/humann" + + test("DEMO_diamond") { + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("DEMO_diamond - stub") { + + options "-stub" + + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/humann/tests/nextflow.config b/modules/local/humann/humann/tests/nextflow.config new file mode 100644 index 0000000..52ad9a3 --- /dev/null +++ b/modules/local/humann/humann/tests/nextflow.config @@ -0,0 +1,27 @@ +process { + withName: HUMANN_HUMANN { + publishDir = [ + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_genefamilies.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathabundance.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathcoverage.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/join/environment.yml b/modules/local/humann/join/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/join/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/join/join.nf b/modules/local/humann/join/join.nf new file mode 100644 index 0000000..ba0a341 --- /dev/null +++ b/modules/local/humann/join/join.nf @@ -0,0 +1,89 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process HUMANN_JOIN { + tag '$bam' + label 'process_low' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': + 'biocontainers/humann:3.9--py312hdfd78af_0' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + path bam + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + path "*.bam", emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf new file mode 100644 index 0000000..58bc710 --- /dev/null +++ b/modules/local/humann/join/main.nf @@ -0,0 +1,42 @@ +process HUMANN_DOWNLOADUNIREFDB { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + val uniref_db_version + + output: + path("uniref") , emit: uniref_db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + humann_databases \\ + --download uniref \\ + ${uniref_db_version} \\ + . \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + mkdir uniref + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/join/nextflow.config b/modules/local/humann/join/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/join/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} diff --git a/modules/local/humann/join/tests/main.nf.test b/modules/local/humann/join/tests/main.nf.test new file mode 100644 index 0000000..4a0ba43 --- /dev/null +++ b/modules/local/humann/join/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process HUMANN_DOWNLOADUNIREFDB" + script "../main.nf" + process "HUMANN_DOWNLOADUNIREFDB" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/downloadunireflandb" + + test("DEMO_diamond") { + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("DEMO_diamond - stub") { + + options "-stub" + + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/join/tests/nextflow.config b/modules/local/humann/join/tests/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/join/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} diff --git a/modules/local/humann/regroup/environment.yml b/modules/local/humann/regroup/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/regroup/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf new file mode 100644 index 0000000..58bc710 --- /dev/null +++ b/modules/local/humann/regroup/main.nf @@ -0,0 +1,42 @@ +process HUMANN_DOWNLOADUNIREFDB { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + val uniref_db_version + + output: + path("uniref") , emit: uniref_db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + humann_databases \\ + --download uniref \\ + ${uniref_db_version} \\ + . \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + mkdir uniref + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/regroup/nextflow.config b/modules/local/humann/regroup/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/regroup/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} diff --git a/modules/local/humann/regroup/regroup.nf b/modules/local/humann/regroup/regroup.nf new file mode 100644 index 0000000..8b70eab --- /dev/null +++ b/modules/local/humann/regroup/regroup.nf @@ -0,0 +1,91 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process HUMANN_REGROUP { + tag "$meta.id" + label 'process_low' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': + 'biocontainers/humann:3.9--py312hdfd78af_0' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/local/humann/regroup/tests/main.nf.test b/modules/local/humann/regroup/tests/main.nf.test new file mode 100644 index 0000000..4a0ba43 --- /dev/null +++ b/modules/local/humann/regroup/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process HUMANN_DOWNLOADUNIREFDB" + script "../main.nf" + process "HUMANN_DOWNLOADUNIREFDB" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/downloadunireflandb" + + test("DEMO_diamond") { + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("DEMO_diamond - stub") { + + options "-stub" + + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/regroup/tests/nextflow.config b/modules/local/humann/regroup/tests/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/regroup/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} diff --git a/modules/local/humann/rename/environment.yml b/modules/local/humann/rename/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/rename/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf new file mode 100644 index 0000000..58bc710 --- /dev/null +++ b/modules/local/humann/rename/main.nf @@ -0,0 +1,42 @@ +process HUMANN_DOWNLOADUNIREFDB { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + val uniref_db_version + + output: + path("uniref") , emit: uniref_db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + humann_databases \\ + --download uniref \\ + ${uniref_db_version} \\ + . \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + mkdir uniref + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/rename/nextflow.config b/modules/local/humann/rename/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/rename/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} diff --git a/modules/local/humann/rename/rename.nf b/modules/local/humann/rename/rename.nf new file mode 100644 index 0000000..d33eea0 --- /dev/null +++ b/modules/local/humann/rename/rename.nf @@ -0,0 +1,91 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process HUMANN_RENAME { + tag "$meta.id" + label 'process_low' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': + 'biocontainers/humann:3.9--py312hdfd78af_0' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/local/humann/rename/tests/main.nf.test b/modules/local/humann/rename/tests/main.nf.test new file mode 100644 index 0000000..4a0ba43 --- /dev/null +++ b/modules/local/humann/rename/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process HUMANN_DOWNLOADUNIREFDB" + script "../main.nf" + process "HUMANN_DOWNLOADUNIREFDB" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/downloadunireflandb" + + test("DEMO_diamond") { + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("DEMO_diamond - stub") { + + options "-stub" + + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/rename/tests/nextflow.config b/modules/local/humann/rename/tests/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/rename/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} diff --git a/modules/local/humann/renorm/environment.yml b/modules/local/humann/renorm/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/renorm/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf new file mode 100644 index 0000000..58bc710 --- /dev/null +++ b/modules/local/humann/renorm/main.nf @@ -0,0 +1,42 @@ +process HUMANN_DOWNLOADUNIREFDB { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + val uniref_db_version + + output: + path("uniref") , emit: uniref_db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + humann_databases \\ + --download uniref \\ + ${uniref_db_version} \\ + . \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + mkdir uniref + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/renorm/nextflow.config b/modules/local/humann/renorm/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/renorm/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} diff --git a/modules/local/humann/renorm/renorm.nf b/modules/local/humann/renorm/renorm.nf new file mode 100644 index 0000000..393a62f --- /dev/null +++ b/modules/local/humann/renorm/renorm.nf @@ -0,0 +1,91 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process HUMANN_RENORM { + tag "$meta.id" + label 'process_low' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': + 'biocontainers/humann:3.9--py312hdfd78af_0' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/local/humann/renorm/tests/main.nf.test b/modules/local/humann/renorm/tests/main.nf.test new file mode 100644 index 0000000..4a0ba43 --- /dev/null +++ b/modules/local/humann/renorm/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process HUMANN_DOWNLOADUNIREFDB" + script "../main.nf" + process "HUMANN_DOWNLOADUNIREFDB" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/downloadunireflandb" + + test("DEMO_diamond") { + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("DEMO_diamond - stub") { + + options "-stub" + + when { + process { + """ + input[0] = "DEMO_diamond" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/renorm/tests/nextflow.config b/modules/local/humann/renorm/tests/nextflow.config new file mode 100644 index 0000000..6be75c4 --- /dev/null +++ b/modules/local/humann/renorm/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: HUMANN_DOWNLOADUNIREFDB { + ext.args = "--update-config no" + } +} From 1882f624da5f04341972baf9029e0f9447fe68dc Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 17 Sep 2024 13:00:57 -0400 Subject: [PATCH 02/23] fleshed out humann modules, tests still wip --- modules/local/humann/humann/main.nf | 1 - .../local/humann/humann/tests/main.nf.test | 80 ++++++++++++++-- modules/local/humann/join/join.nf | 89 ------------------ modules/local/humann/join/main.nf | 29 +++--- modules/local/humann/join/nextflow.config | 11 ++- modules/local/humann/join/tests/main.nf.test | 25 +++-- .../local/humann/join/tests/nextflow.config | 11 ++- modules/local/humann/regroup/main.nf | 32 ++++--- modules/local/humann/regroup/nextflow.config | 11 ++- modules/local/humann/regroup/regroup.nf | 91 ------------------- .../local/humann/regroup/tests/main.nf.test | 39 ++++++-- .../humann/regroup/tests/nextflow.config | 11 ++- modules/local/humann/rename/main.nf | 34 ++++--- modules/local/humann/rename/nextflow.config | 11 ++- modules/local/humann/rename/rename.nf | 91 ------------------- .../local/humann/rename/tests/main.nf.test | 39 ++++++-- .../local/humann/rename/tests/nextflow.config | 11 ++- modules/local/humann/renorm/main.nf | 42 +++++---- modules/local/humann/renorm/nextflow.config | 11 ++- modules/local/humann/renorm/renorm.nf | 91 ------------------- .../local/humann/renorm/tests/main.nf.test | 39 ++++++-- .../local/humann/renorm/tests/nextflow.config | 11 ++- 22 files changed, 329 insertions(+), 481 deletions(-) delete mode 100644 modules/local/humann/join/join.nf delete mode 100644 modules/local/humann/regroup/regroup.nf delete mode 100644 modules/local/humann/rename/rename.nf delete mode 100644 modules/local/humann/renorm/renorm.nf diff --git a/modules/local/humann/humann/main.nf b/modules/local/humann/humann/main.nf index 66abe4a..c7a0c78 100644 --- a/modules/local/humann/humann/main.nf +++ b/modules/local/humann/humann/main.nf @@ -43,7 +43,6 @@ process HUMANN_HUMANN { """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_genefamilies.tsv diff --git a/modules/local/humann/humann/tests/main.nf.test b/modules/local/humann/humann/tests/main.nf.test index f4c6fa5..aa5b71a 100644 --- a/modules/local/humann/humann/tests/main.nf.test +++ b/modules/local/humann/humann/tests/main.nf.test @@ -10,37 +10,103 @@ nextflow_process { tag "humann" tag "humann/humann" - test("DEMO_diamond") { + setup { + run("HUMANN_DOWNLOADCHOCOPHLANDB") { + script "../../downloadchocophlandb/main.nf" + process { + """ + input[0] = 'TODO - some chochophlan db version - or maybe skip this setup and build in a mini test db' + """ + } + } + + run("HUMANN_DOWNLOADUNIREFDB") { + script "../../downloadunirefdb/main.nf" + process { + """ + input[0] = 'TODO - some uniref db version - or maybe skip this setup and build in a mini test db' + """ + } + } + } + + test("kneaded fastq.gz") { + when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true) + ] + ] + ) + input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db + input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db """ } } + then { - assertAll( + assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.genefamilies + process.out.pathabundance + process.out.pathcoverage, + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } ) } } - test("DEMO_diamond - stub") { + test("kneaded fastq.gz - stub") { options "-stub" when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true) + ] + ] + ) + input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db + input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db """ } } + then { - assertAll( + assertAll ( { assert process.success }, { assert snapshot(process.out).match() } ) diff --git a/modules/local/humann/join/join.nf b/modules/local/humann/join/join.nf deleted file mode 100644 index ba0a341..0000000 --- a/modules/local/humann/join/join.nf +++ /dev/null @@ -1,89 +0,0 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - -process HUMANN_JOIN { - tag '$bam' - label 'process_low' - - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': - 'biocontainers/humann:3.9--py312hdfd78af_0' }" - - input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - path bam - - output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - path "*.bam", emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) - """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 - """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ -} diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf index 58bc710..47c2f66 100644 --- a/modules/local/humann/join/main.nf +++ b/modules/local/humann/join/main.nf @@ -1,5 +1,5 @@ -process HUMANN_DOWNLOADUNIREFDB { - label 'process_single' +process HUMANN_JOIN { + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,23 +7,25 @@ process HUMANN_DOWNLOADUNIREFDB { 'biocontainers/humann:3.8--pyh7cba7a3_0' }" input: - val uniref_db_version + path(input_dir) + val file_name_pattern output: - path("uniref") , emit: uniref_db - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + path("*_joined.tsv.gz"), emit: joined + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' """ - humann_databases \\ - --download uniref \\ - ${uniref_db_version} \\ - . \\ + if compgen -G "$input_dir/*$file_name_pattern*.gz" > /dev/null; then + find $input_dir \( -name '*$file_name_pattern*' \) -exec gunzip --verbose {} \; + fi + humann_join_table \\ + --input $input_dir \\ + --output ${file_name_pattern}_joined.tsv \\ + --file_name $file_name_pattern \\ ${args} + gzip -n ${file_name_pattern}_joined.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) @@ -31,9 +33,8 @@ process HUMANN_DOWNLOADUNIREFDB { """ stub: - def args = task.ext.args ?: '' """ - mkdir uniref + touch ${file_name_pattern}_joined.tsv.gz cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) diff --git a/modules/local/humann/join/nextflow.config b/modules/local/humann/join/nextflow.config index 6be75c4..2c90039 100644 --- a/modules/local/humann/join/nextflow.config +++ b/modules/local/humann/join/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_JOIN { + publishDir = [ + [ + path: { "${params.outdir}/humann/results" }, + mode: params.publish_dir_mode, + pattern: '*_joined.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } diff --git a/modules/local/humann/join/tests/main.nf.test b/modules/local/humann/join/tests/main.nf.test index 4a0ba43..3e748ea 100644 --- a/modules/local/humann/join/tests/main.nf.test +++ b/modules/local/humann/join/tests/main.nf.test @@ -1,40 +1,47 @@ nextflow_process { - name "Test Process HUMANN_DOWNLOADUNIREFDB" + name "Test Process HUMANN_JOIN" script "../main.nf" - process "HUMANN_DOWNLOADUNIREFDB" + process "HUMANN_JOIN" config "./nextflow.config" tag "modules" tag "modules_nfcore" tag "humann" - tag "humann/downloadunireflandb" + tag "humann/join" - test("DEMO_diamond") { + test("humann/join") { when { process { """ - input[0] = "DEMO_diamond" + input[0] = TODO - first is a path to a dir w files matching the below pattern + input[1] = "genefamilies" """ } } then { - assertAll( + assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.joined + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } ) } } - test("DEMO_diamond - stub") { + test("humann/join - stub") { options "-stub" when { process { """ - input[0] = "DEMO_diamond" + input[0] = TODO - first is a path to a dir w files matching the below pattern + input[1] = "genefamilies" """ } } diff --git a/modules/local/humann/join/tests/nextflow.config b/modules/local/humann/join/tests/nextflow.config index 6be75c4..2c90039 100644 --- a/modules/local/humann/join/tests/nextflow.config +++ b/modules/local/humann/join/tests/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_JOIN { + publishDir = [ + [ + path: { "${params.outdir}/humann/results" }, + mode: params.publish_dir_mode, + pattern: '*_joined.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf index 58bc710..37c2cbc 100644 --- a/modules/local/humann/regroup/main.nf +++ b/modules/local/humann/regroup/main.nf @@ -1,4 +1,5 @@ -process HUMANN_DOWNLOADUNIREFDB { +process HUMANN_REGROUP { + tag "$meta.id" label 'process_single' conda "${moduleDir}/environment.yml" @@ -7,23 +8,28 @@ process HUMANN_DOWNLOADUNIREFDB { 'biocontainers/humann:3.8--pyh7cba7a3_0' }" input: - val uniref_db_version + tuple val(meta), path(input) + val groups output: - path("uniref") , emit: uniref_db - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + tuple val(meta), path("*_regroup.tsv.gz"), emit: regroup + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ - humann_databases \\ - --download uniref \\ - ${uniref_db_version} \\ - . \\ + if [[ $input == *.gz ]]; then + gunzip -c $input > input.tsv + else + mv $input input.tsv + fi + humann_regroup_table \\ + --input input.tsv \\ + --output ${prefix}_${groups}_regroup.tsv \\ + --groups $groups \\ ${args} + gzip -n ${prefix}_${groups}_regroup.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) @@ -31,9 +37,9 @@ process HUMANN_DOWNLOADUNIREFDB { """ stub: - def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ - mkdir uniref + touch ${prefix}_${groups}_regroup.tsv.gz cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) diff --git a/modules/local/humann/regroup/nextflow.config b/modules/local/humann/regroup/nextflow.config index 6be75c4..ff58126 100644 --- a/modules/local/humann/regroup/nextflow.config +++ b/modules/local/humann/regroup/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_REGROUP { + publishDir = [ + [ + path: { "${params.outdir}/humann/regroup" }, + mode: params.publish_dir_mode, + pattern: '*_regroup.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } diff --git a/modules/local/humann/regroup/regroup.nf b/modules/local/humann/regroup/regroup.nf deleted file mode 100644 index 8b70eab..0000000 --- a/modules/local/humann/regroup/regroup.nf +++ /dev/null @@ -1,91 +0,0 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - -process HUMANN_REGROUP { - tag "$meta.id" - label 'process_low' - - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': - 'biocontainers/humann:3.9--py312hdfd78af_0' }" - - input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) - - output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) - """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 - """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ -} diff --git a/modules/local/humann/regroup/tests/main.nf.test b/modules/local/humann/regroup/tests/main.nf.test index 4a0ba43..9dc218c 100644 --- a/modules/local/humann/regroup/tests/main.nf.test +++ b/modules/local/humann/regroup/tests/main.nf.test @@ -1,40 +1,61 @@ nextflow_process { - name "Test Process HUMANN_DOWNLOADUNIREFDB" + name "Test Process HUMANN_REGROUP" script "../main.nf" - process "HUMANN_DOWNLOADUNIREFDB" + process "HUMANN_REGROUP" config "./nextflow.config" tag "modules" tag "modules_nfcore" tag "humann" - tag "humann/downloadunireflandb" + tag "humann/regroup" - test("DEMO_diamond") { + test("humann/regroup") { when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "uniref90_rxn" """ } } then { - assertAll( + assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.regroup + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } ) } } - test("DEMO_diamond - stub") { + test("humann/regroup - stub") { options "-stub" when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "uniref90_rxn" """ } } diff --git a/modules/local/humann/regroup/tests/nextflow.config b/modules/local/humann/regroup/tests/nextflow.config index 6be75c4..ff58126 100644 --- a/modules/local/humann/regroup/tests/nextflow.config +++ b/modules/local/humann/regroup/tests/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_REGROUP { + publishDir = [ + [ + path: { "${params.outdir}/humann/regroup" }, + mode: params.publish_dir_mode, + pattern: '*_regroup.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf index 58bc710..16d8dc2 100644 --- a/modules/local/humann/rename/main.nf +++ b/modules/local/humann/rename/main.nf @@ -1,5 +1,6 @@ -process HUMANN_DOWNLOADUNIREFDB { - label 'process_single' +process HUMANN_RENAME { + tag "$meta.id" + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,23 +8,28 @@ process HUMANN_DOWNLOADUNIREFDB { 'biocontainers/humann:3.8--pyh7cba7a3_0' }" input: - val uniref_db_version + tuple val(meta), path(input) + val names output: - path("uniref") , emit: uniref_db - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + tuple val(meta), path("*_renamed.tsv.gz"), emit: renamed + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ - humann_databases \\ - --download uniref \\ - ${uniref_db_version} \\ - . \\ + if [[ $input == *.gz ]]; then + gunzip -c $input > input.tsv + else + mv $input input.tsv + fi + humann_rename_table \\ + --input input.tsv \\ + --output ${prefix}_${names}_rename.tsv \\ + --names $names \\ ${args} + gzip -n ${prefix}_${names}_rename.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) @@ -31,9 +37,9 @@ process HUMANN_DOWNLOADUNIREFDB { """ stub: - def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ - mkdir uniref + touch ${prefix}_${names}_rename.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) diff --git a/modules/local/humann/rename/nextflow.config b/modules/local/humann/rename/nextflow.config index 6be75c4..a990469 100644 --- a/modules/local/humann/rename/nextflow.config +++ b/modules/local/humann/rename/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_RENAME { + publishDir = [ + [ + path: { "${params.outdir}/humann/rename" }, + mode: params.publish_dir_mode, + pattern: '*_renamed.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } diff --git a/modules/local/humann/rename/rename.nf b/modules/local/humann/rename/rename.nf deleted file mode 100644 index d33eea0..0000000 --- a/modules/local/humann/rename/rename.nf +++ /dev/null @@ -1,91 +0,0 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - -process HUMANN_RENAME { - tag "$meta.id" - label 'process_low' - - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': - 'biocontainers/humann:3.9--py312hdfd78af_0' }" - - input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) - - output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) - """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 - """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ -} diff --git a/modules/local/humann/rename/tests/main.nf.test b/modules/local/humann/rename/tests/main.nf.test index 4a0ba43..edee5b6 100644 --- a/modules/local/humann/rename/tests/main.nf.test +++ b/modules/local/humann/rename/tests/main.nf.test @@ -1,40 +1,61 @@ nextflow_process { - name "Test Process HUMANN_DOWNLOADUNIREFDB" + name "Test Process HUMANN_RENAME" script "../main.nf" - process "HUMANN_DOWNLOADUNIREFDB" + process "HUMANN_RENAME" config "./nextflow.config" tag "modules" tag "modules_nfcore" tag "humann" - tag "humann/downloadunireflandb" + tag "humann/rename" - test("DEMO_diamond") { + test("humann/rename") { when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_rxn.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "metacyc-rxn" """ } } then { - assertAll( + assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.renamed + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } ) } } - test("DEMO_diamond - stub") { + test("humann/rename - stub") { options "-stub" when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_rxn.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "uniref90_rxn" """ } } diff --git a/modules/local/humann/rename/tests/nextflow.config b/modules/local/humann/rename/tests/nextflow.config index 6be75c4..a990469 100644 --- a/modules/local/humann/rename/tests/nextflow.config +++ b/modules/local/humann/rename/tests/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_RENAME { + publishDir = [ + [ + path: { "${params.outdir}/humann/rename" }, + mode: params.publish_dir_mode, + pattern: '*_renamed.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf index 58bc710..315bad5 100644 --- a/modules/local/humann/renorm/main.nf +++ b/modules/local/humann/renorm/main.nf @@ -1,5 +1,6 @@ -process HUMANN_DOWNLOADUNIREFDB { - label 'process_single' +process HUMANN_RENORM { + tag "$meta.id" + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,33 +8,38 @@ process HUMANN_DOWNLOADUNIREFDB { 'biocontainers/humann:3.8--pyh7cba7a3_0' }" input: - val uniref_db_version + tuple val(meta), path(input) + val units output: - path("uniref") , emit: uniref_db - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + tuple val(meta), path("*_renorm.tsv.gz"), emit: renorm + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ - humann_databases \\ - --download uniref \\ - ${uniref_db_version} \\ - . \\ + if [[ $input == *.gz ]]; then + gunzip -c $input > input.tsv + else + mv $input input.tsv + fi + humann_renorm_table \\ + --input input.tsv \\ + --output ${prefix}_${units}_renorm.tsv \\ + --units $units \\ + --update-snames \\ ${args} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) - END_VERSIONS - """ + gzip -n ${prefix}_${units}_renorm.tsv stub: def args = task.ext.args ?: '' """ - mkdir uniref + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_${units}_renorm.tsv.gz cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) diff --git a/modules/local/humann/renorm/nextflow.config b/modules/local/humann/renorm/nextflow.config index 6be75c4..9298e72 100644 --- a/modules/local/humann/renorm/nextflow.config +++ b/modules/local/humann/renorm/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_RENORM { + publishDir = [ + [ + path: { "${params.outdir}/humann/renorm" }, + mode: params.publish_dir_mode, + pattern: '*_renorm.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } diff --git a/modules/local/humann/renorm/renorm.nf b/modules/local/humann/renorm/renorm.nf deleted file mode 100644 index 393a62f..0000000 --- a/modules/local/humann/renorm/renorm.nf +++ /dev/null @@ -1,91 +0,0 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - -process HUMANN_RENORM { - tag "$meta.id" - label 'process_low' - - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0': - 'biocontainers/humann:3.9--py312hdfd78af_0' }" - - input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) - - output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) - """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 - """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - humann: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ -} diff --git a/modules/local/humann/renorm/tests/main.nf.test b/modules/local/humann/renorm/tests/main.nf.test index 4a0ba43..2888606 100644 --- a/modules/local/humann/renorm/tests/main.nf.test +++ b/modules/local/humann/renorm/tests/main.nf.test @@ -1,40 +1,61 @@ nextflow_process { - name "Test Process HUMANN_DOWNLOADUNIREFDB" + name "Test Process HUMANN_RENORM" script "../main.nf" - process "HUMANN_DOWNLOADUNIREFDB" + process "HUMANN_RENORM" config "./nextflow.config" tag "modules" tag "modules_nfcore" tag "humann" - tag "humann/downloadunireflandb" + tag "humann/renorm" - test("DEMO_diamond") { + test("humann/renorm") { when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "cpm" """ } } then { - assertAll( + assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.renorm + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } ) } } - test("DEMO_diamond - stub") { + test("humann/renorm - stub") { options "-stub" when { process { """ - input[0] = "DEMO_diamond" + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "cpm" """ } } diff --git a/modules/local/humann/renorm/tests/nextflow.config b/modules/local/humann/renorm/tests/nextflow.config index 6be75c4..9298e72 100644 --- a/modules/local/humann/renorm/tests/nextflow.config +++ b/modules/local/humann/renorm/tests/nextflow.config @@ -1,5 +1,12 @@ process { - withName: HUMANN_DOWNLOADUNIREFDB { - ext.args = "--update-config no" + withName: HUMANN_RENORM { + publishDir = [ + [ + path: { "${params.outdir}/humann/renorm" }, + mode: params.publish_dir_mode, + pattern: '*_renorm.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" } } From b520116a169ef781894c52bc287040af6365af8a Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 17 Sep 2024 14:51:50 -0400 Subject: [PATCH 03/23] add draft humann subworkflow --- .../fastq_microbial_pathway_humann/main.nf | 90 +++++++++++++++++++ .../nextflow.config | 5 ++ 2 files changed, 95 insertions(+) create mode 100644 subworkflows/local/fastq_microbial_pathway_humann/main.nf diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf new file mode 100644 index 0000000..554d37b --- /dev/null +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -0,0 +1,90 @@ +// +// SUBWORKFLOW: Identify gene families and pathways associated with reads using HUMAnN 3 +// + +include { HUMANN_DOWNLOADCHOCOPHLANDB } from '../../../modules/local/humann/downloadchocophlandb/main' +include { HUMANN_DOWNLOADUNIREFDB } from '../../../modules/local/humann/downloadunirefdb/main' +include { HUMANN_HUMANN } from '../../../modules/local/humann/humann/main' +include { HUMANN_JOIN } from '../../../modules/local/humann/join/main' +include { HUMANN_REGROUP } from '../../../modules/local/humann/regroup/main' +include { HUMANN_RENAME } from '../../../modules/local/humann/rename/main' +include { HUMANN_RENORM } from '../../../modules/local/humann/renorm/main' + +workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { + + take: + processed_reads_fastq_gz // channel: [ val(meta), [ processed_reads_1.fastq.gz, processed_reads_2.fastq.gz ] ] (MANDATORY) + metaphlan_profile // channel: [ val(meta2), metaphlan_profile.tsv ] (MANDATORY) + chocophlan_db // channel: [ chocophlan_db ] (OPTIONAL) + chochophlan_db_version // value: '' (OPTIONAL) + uniref_db // channel: [ uniref_db ] (OPTIONAL) + uniref_db_version // value: '' (OPTIONAL) + + main: + + ch_versions = Channel.empty() + + // if chocophlan_db exists, skip HUMANN_DOWNLOADCHOCOPHLANDB + if ( chocophlan_db ){ + ch_chocophlan_db = chochophlan_db + } else { + // + // MODULE: Download ChocoPhlAn database + // + ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chochophlan_db_version ).chochophlan_db + ch_versions = ch_versions.mix(HUMANN_DOWNLOADCHOCOPHLANDB.out.versions) + } + + // if uniref_db exists, skip HUMANN_DOWNLOADUNIREFDB + if ( uniref_db ){ + ch_uniref_db = uniref_db + } else { + // + // MODULE: Download UniRef database + // + ch_uniref_db = HUMANN_DOWNLOADUNIREFDB ( uniref_db_version ).uniref_db + ch_versions = ch_versions.mix(HUMANN_DOWNLOADUNIREFDB.out.versions) + } + + // + // MODULE: Run HUMAnN 3 for raw outputs + // + ch_humann_genefamilies_raw = HUMANN_HUMANN ( processed_reads_fastq_gz, metaphlan_profile, ch_chochophlan_db, ch_uniref_db ).genefamilies + ch_humann_pathabundance_raw = HUMANN_HUMANN.out.pathabundance + ch_humann_pathcoverage_raw = HUMANN_HUMANN.out.pathcoverage // TODO is this still right? looking at humann docs, might not get this file any longer? + ch_humann_logs = HUMANN_HUMANN.out.log + ch_versions = ch_versions.mix(HUMANN_HUMANN.out.versions) + + // collect log files and store in a directory + ch_combined_humann_logs = ch_humann_logs + .map { [ [ id:'all_samples' ], it[1] ] } + .groupTuple( sort: 'deep' ) + + // + // MODULE: renormalize raw gene families from HUMAnN outputs to cpm + // + ch_humann_genefamilies_cpm = HUMANN_RENORM ( ch_humann_genefamilies_raw, 'cpm' ).renorm + ch_versions = ch_versions.mix(HUMANN_RENORM.out.versions) + + // + // MODULE: regroup cpm gene families to EC numbers + // + ch_humann_ec = HUMANN_REGROUP( ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg + ch_versions = ch_versions.mix(HUMANN_REGROUP.out.versions) + + // + // MODULE: rename ec number outputs to include descriptors + // + ch_humann_ec_renamed = HUMANN_RENAME (ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg + ch_versions = ch_versions.mix(HUMANN_RENAME.out.versions) + + // TODO join all outputs as necessary, then update emit below + // TODO need to modify modules to return output dirs i suppose first, so they can be passed to join module + + emit: + humann_genefamilies_cpm = ch_humann_genefamilies_cpm // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz ] ] + humann_ec = ch_humann_ec_renamed // channel: [ val(meta), read_counts.tsv ] + humann_pathabundance = ch_humann_pathabundance_raw // channel: [ val(meta), pathabundance.tsv ] + humann_pathcoverage = ch_humann_pathcoverage_raw // channel: [ val(meta), pathcoverage.tsv ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config b/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config index 1a8763f..dccc682 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config +++ b/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config @@ -1,2 +1,7 @@ includeConfig '../../../modules/local/humann/downloadchocophlandb/nextflow.config' includeConfig '../../../modules/local/humann/downloadunirefdb/nextflow.config' +includeConfig '../../../modules/local/humann/humann/nextflow.config' +includeConfig '../../../modules/local/humann/join/nextflow.config' +includeConfig '../../../modules/local/humann/regroup/nextflow.config' +includeConfig '../../../modules/local/humann/rename/nextflow.config' +includeConfig '../../../modules/local/humann/renorm/nextflow.config' From ce04afe31df8141c82293b98462ff08a569b0af1 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 18 Sep 2024 13:07:36 -0400 Subject: [PATCH 04/23] fleshing out humann subworkflow some --- .../fastq_microbial_pathway_humann/main.nf | 44 +++++++++++---- .../tests/main.nf.test | 54 +++++++++++++++++++ .../tests/tags.yml | 2 + 3 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test create mode 100644 subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index 554d37b..6ad055d 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -5,7 +5,10 @@ include { HUMANN_DOWNLOADCHOCOPHLANDB } from '../../../modules/local/humann/downloadchocophlandb/main' include { HUMANN_DOWNLOADUNIREFDB } from '../../../modules/local/humann/downloadunirefdb/main' include { HUMANN_HUMANN } from '../../../modules/local/humann/humann/main' -include { HUMANN_JOIN } from '../../../modules/local/humann/join/main' +include { HUMANN_JOIN as JOIN_GENES } from '../../../modules/local/humann/join/main' +include { HUMANN_JOIN as JOIN_PATHABUND } from '../../../modules/local/humann/join/main' +include { HUMANN_JOIN as JOIN_PATHCOV } from '../../../modules/local/humann/join/main' +include { HUMANN_JOIN as JOIN_EC } from '../../../modules/local/humann/join/main' include { HUMANN_REGROUP } from '../../../modules/local/humann/regroup/main' include { HUMANN_RENAME } from '../../../modules/local/humann/rename/main' include { HUMANN_RENORM } from '../../../modules/local/humann/renorm/main' @@ -69,22 +72,45 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: regroup cpm gene families to EC numbers // - ch_humann_ec = HUMANN_REGROUP( ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg + ch_humann_ec = HUMANN_REGROUP(ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg ch_versions = ch_versions.mix(HUMANN_REGROUP.out.versions) // // MODULE: rename ec number outputs to include descriptors // - ch_humann_ec_renamed = HUMANN_RENAME (ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg + ch_humann_ec_renamed = HUMANN_RENAME(ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg ch_versions = ch_versions.mix(HUMANN_RENAME.out.versions) - // TODO join all outputs as necessary, then update emit below - // TODO need to modify modules to return output dirs i suppose first, so they can be passed to join module + // + // MODULE: join gene abundances across all samples into one file + // + // the paths should all be the same, so im taking the first. + // should probably be validated though, im just short of time.. + ch_humann_genefamilies_cpm_path = ch_humann_genefamilies_cpm.map{ toCanonicalPath(it[1]) }.unique().take(1) + ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies') + + // + // MODULE: join ec abundances across all samples into one file + // + ch_humann_ec_renamed_path = ch_humann_ec_renamed.map{ toCanonicalPath(it[1]) }.unique().take(1) + ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec') // TODO check the file name pattern + + // + // MODULE: join pathway abundances across all samples into one file + // + ch_humann_pathabundance_path = ch_humann_pathabundance_raw.map{ toCanonicalPath(it[1]) }.unique().take(1) + ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance') + + // + // MODULE: join pathway coverage across all samples into one file + // + ch_humann_pathcoverage_path = ch_humann_pathcoverage_raw.map{ toCanonicalPath(it[1]) }.unique().take(1) + ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage') emit: - humann_genefamilies_cpm = ch_humann_genefamilies_cpm // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz ] ] - humann_ec = ch_humann_ec_renamed // channel: [ val(meta), read_counts.tsv ] - humann_pathabundance = ch_humann_pathabundance_raw // channel: [ val(meta), pathabundance.tsv ] - humann_pathcoverage = ch_humann_pathcoverage_raw // channel: [ val(meta), pathcoverage.tsv ] + humann_genefamilies = ch_humann_genefamilies_joined // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz ] ] + humann_ec = ch_humann_ec_joined // channel: [ val(meta), read_counts.tsv ] + humann_pathabundance = ch_humann_pathabundance_joined // channel: [ val(meta), pathabundance.tsv ] + humann_pathcoverage = ch_humann_pathcoverage_joined // channel: [ val(meta), pathcoverage.tsv ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test b/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test new file mode 100644 index 0000000..fb8d06d --- /dev/null +++ b/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_workflow { + + name "Test Subworkflow: FASTQ_MICROBIAL_PATHWAY_HUMANN" + script "../main.nf" + workflow "FASTQ_MICROBIAL_PATHWAY_HUMANN" + + tag "subworkflows" + tag "subworkflows_local" + tag "fastq_microbial_pathway_humann" + tag "fastq_microbial_pathway_humann_default" + + + // TODO update inputs here, these are copied from the metaphlan subworkflow which obviously isnt what we actually need here + // the first should be processed fastq + // the second should be metaphlan profile output from that subworkflow + // the third is chocophlan db, either that or a chocophlan db version as fourth input should be provided (multiple tests) + // the fifth is uniref db, either that or a uniref db version as sixth input should be provided (multiple tests) + + test("fastq.gz") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ], + [ + [ id: 'test2' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = "https://github.com/biobakery/MetaPhlAn/raw/master/metaphlan/utils/mpa_vOct22_CHOCOPhlAnSGB_202212_SGB2GTDB.tsv" + input[2] = null + input[3] = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml b/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml new file mode 100644 index 0000000..1467ae5 --- /dev/null +++ b/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml @@ -0,0 +1,2 @@ +fastq_microbial_pathway_humann_default: + - subworkflows/local/fastq_microbial_pathway_humann/** From 06fa7df9a352f62e804b4ba51f8de731417ac80d Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 18 Sep 2024 13:36:04 -0400 Subject: [PATCH 05/23] draft adding humann subworkflow to workflow --- workflows/biobakerymgx/main.nf | 59 ++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/workflows/biobakerymgx/main.nf b/workflows/biobakerymgx/main.nf index d045911..0bf91c4 100644 --- a/workflows/biobakerymgx/main.nf +++ b/workflows/biobakerymgx/main.nf @@ -9,6 +9,7 @@ // include { FASTQ_READ_PREPROCESSING_KNEADDATA } from '../../subworkflows/local/fastq_read_preprocessing_kneaddata/main' include { FASTQ_READ_TAXONOMY_METAPHLAN } from '../../subworkflows/local/fastq_read_taxonomy_metaphlan/main' +include { FASTQ_MICROBIAL_PATHWAY_HUMANN } from '../../subworkflows/local/fastq_microbial_pathway_humann/main' /* @@ -130,7 +131,7 @@ workflow BIOBAKERYMGX { Taxonomic classification: MetaPhlAn -----------------------------------------------------------------------------------*/ if ( params.run_metaphlan ) { - // create channel from params.kneaddata_db + // create channel from params.metaphlan_db if ( !params.metaphlan_db ){ ch_metaphlan_db = null } else { @@ -146,13 +147,63 @@ workflow BIOBAKERYMGX { // // SUBWORKFLOW: MetaPhlAn // - ch_read_taxonomy_tsv = FASTQ_READ_TAXONOMY_METAPHLAN ( ch_preprocessed_fastq_gz, ch_metaphlan_sgb2gtbd_file, ch_metaphlan_db, params.metaphlan_db_version ).metaphlan_profiles_merged_tsv + ch_read_taxonomy_tsv = FASTQ_READ_TAXONOMY_METAPHLAN ( + ch_preprocessed_fastq_gz, + ch_metaphlan_sgb2gtbd_file, + ch_metaphlan_db, + params.metaphlan_db_version + ).metaphlan_profiles_merged_tsv ch_versions = ch_versions.mix(FASTQ_READ_TAXONOMY_METAPHLAN.out.versions) } else { ch_read_taxonomy_tsv = Channel.empty() } + /*----------------------------------------------------------------------------------- + Functional classification: HUMAnN + -----------------------------------------------------------------------------------*/ + if ( params.run_humann ) { + // create channel from params.chochophlan_db + if ( !params.chocophlan_db ) { + ch_chochophlan_db = null + } else { + ch_chochophlan_db = Channel.value( file( params.chochophlan_db, checkIfExists: true ) ) + } + + // create channel from params.uniref_db + if ( !params.uniref_db ) { + ch_uniref_db = null + } else { + ch_uniref_db = Channel.value( file( params.uniref_db, checkIfExists: true ) ) + } + + // theres probably a better way to handle this. but good enough for me for now.. + if ( !params.run_metaphlan ) { + error "Error: run_humann is true but run_metaphlan is false. Cannot run HUMAnN without MetaPhlAn." + } + + // + // SUBWORKFLOW: HUMAnN + // + // TODO double check the metaphlan output channel. not sure its the format i was expecting in the module + ch_genefamilies_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN( + ch_preprocessed_fastq_gz, + ch_read_taxonomy_tsv, + ch_chochophlan_db, + params.chochophlan_db_version, + ch_uniref_db, + params.uniref_db_version).humann_genefamilies + ch_ec_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_ec + ch_pathabundance_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_pathabundance + ch_pathcoverage_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_pathcoverage + ch_versions = ch_versions.mix(FASTQ_MICROBIAL_PATHWAY_HUMANN.out.versions) + } else { + ch_genefamilies_tsv = Channel.empty() + ch_ec_tsv = Channel.empty() + ch_pathabundance_tsv = Channel.empty() + ch_pathcoverage_tsv = Channel.empty() + } + /*----------------------------------------------------------------------------------- Pipeline report utilities @@ -192,6 +243,10 @@ workflow BIOBAKERYMGX { preprocessed_fastq_gz = ch_preprocessed_fastq_gz preprocessed_read_counts_tsv = ch_preprocessed_read_counts_tsv read_taxonomy_tsv = ch_read_taxonomy_tsv + genefamilies_tsv = ch_genefamilies_tsv + ec_tsv = ch_ec_tsv + pathabundance_tsv = ch_pathabundance_tsv + pathcoverage_tsv = ch_pathcoverage_tsv multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions } From 1de7d40ef77c83db20d59d7a70c4706ac885ef42 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 18 Sep 2024 13:38:24 -0400 Subject: [PATCH 06/23] update expected outputs in workflow test --- workflows/biobakerymgx/tests/main.nf.test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflows/biobakerymgx/tests/main.nf.test b/workflows/biobakerymgx/tests/main.nf.test index 163ca0b..bcf6e60 100644 --- a/workflows/biobakerymgx/tests/main.nf.test +++ b/workflows/biobakerymgx/tests/main.nf.test @@ -44,6 +44,10 @@ nextflow_workflow { workflow.out.preprocessed_fastq_gz, workflow.out.preprocessed_read_counts_tsv, workflow.out.read_taxonomy_tsv, + workflow.out.genefamilies_tsv, + workflow.out.ec_tsv, + workflow.out.pathabundance_tsv, + workflow.out.pathcoverage_tsv, workflow.out.versions ).match() }, From 69007fac1787cc4369b14dd246ec94daba80a9c8 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 25 Sep 2024 14:55:01 -0400 Subject: [PATCH 07/23] dont keep compressing and uncompressing intermediate files --- modules/local/humann/join/main.nf | 8 ++------ modules/local/humann/regroup/main.nf | 10 ++-------- modules/local/humann/rename/main.nf | 8 +------- modules/local/humann/renorm/main.nf | 11 ++--------- 4 files changed, 7 insertions(+), 30 deletions(-) diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf index 47c2f66..df06e16 100644 --- a/modules/local/humann/join/main.nf +++ b/modules/local/humann/join/main.nf @@ -11,21 +11,17 @@ process HUMANN_JOIN { val file_name_pattern output: - path("*_joined.tsv.gz"), emit: joined + path("*_joined.tsv") , emit: joined path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' """ - if compgen -G "$input_dir/*$file_name_pattern*.gz" > /dev/null; then - find $input_dir \( -name '*$file_name_pattern*' \) -exec gunzip --verbose {} \; - fi humann_join_table \\ --input $input_dir \\ --output ${file_name_pattern}_joined.tsv \\ --file_name $file_name_pattern \\ ${args} - gzip -n ${file_name_pattern}_joined.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) @@ -34,7 +30,7 @@ process HUMANN_JOIN { stub: """ - touch ${file_name_pattern}_joined.tsv.gz + touch ${file_name_pattern}_joined.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf index 37c2cbc..9e87bfa 100644 --- a/modules/local/humann/regroup/main.nf +++ b/modules/local/humann/regroup/main.nf @@ -12,24 +12,18 @@ process HUMANN_REGROUP { val groups output: - tuple val(meta), path("*_regroup.tsv.gz"), emit: regroup + tuple val(meta), path("*_regroup.tsv") , emit: regroup path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - if [[ $input == *.gz ]]; then - gunzip -c $input > input.tsv - else - mv $input input.tsv - fi humann_regroup_table \\ --input input.tsv \\ --output ${prefix}_${groups}_regroup.tsv \\ --groups $groups \\ ${args} - gzip -n ${prefix}_${groups}_regroup.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) @@ -39,7 +33,7 @@ process HUMANN_REGROUP { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_${groups}_regroup.tsv.gz + touch ${prefix}_${groups}_regroup.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf index 16d8dc2..b6922fa 100644 --- a/modules/local/humann/rename/main.nf +++ b/modules/local/humann/rename/main.nf @@ -12,24 +12,18 @@ process HUMANN_RENAME { val names output: - tuple val(meta), path("*_renamed.tsv.gz"), emit: renamed + tuple val(meta), path("*_renamed.tsv") , emit: renamed path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - if [[ $input == *.gz ]]; then - gunzip -c $input > input.tsv - else - mv $input input.tsv - fi humann_rename_table \\ --input input.tsv \\ --output ${prefix}_${names}_rename.tsv \\ --names $names \\ ${args} - gzip -n ${prefix}_${names}_rename.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf index 315bad5..69e390a 100644 --- a/modules/local/humann/renorm/main.nf +++ b/modules/local/humann/renorm/main.nf @@ -12,26 +12,19 @@ process HUMANN_RENORM { val units output: - tuple val(meta), path("*_renorm.tsv.gz"), emit: renorm + tuple val(meta), path("*_renorm.tsv") , emit: renorm path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - if [[ $input == *.gz ]]; then - gunzip -c $input > input.tsv - else - mv $input input.tsv - fi humann_renorm_table \\ --input input.tsv \\ --output ${prefix}_${units}_renorm.tsv \\ --units $units \\ --update-snames \\ ${args} - gzip -n ${prefix}_${units}_renorm.tsv - stub: def args = task.ext.args ?: '' """ @@ -39,7 +32,7 @@ process HUMANN_RENORM { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_${units}_renorm.tsv.gz + touch ${prefix}_${units}_renorm.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) From 4237a24ddef6fd80f55b4a35eac81eac0f8ee222 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 25 Sep 2024 15:00:08 -0400 Subject: [PATCH 08/23] remove unneeded first operator on value channels --- subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf | 2 +- subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf b/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf index d7b18b0..4b1aac7 100644 --- a/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf +++ b/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf @@ -31,7 +31,7 @@ workflow FASTQ_READ_PREPROCESSING_KNEADDATA { // // MODULE: Trim and remove human reads // - ch_preprocessed_reads_fastq_gz = KNEADDATA_KNEADDATA ( raw_reads_fastq_gz, ch_kneaddata_db.first() ).preprocessed_reads + ch_preprocessed_reads_fastq_gz = KNEADDATA_KNEADDATA ( raw_reads_fastq_gz, ch_kneaddata_db ).preprocessed_reads ch_kneaddata_logs = KNEADDATA_KNEADDATA.out.kneaddata_log ch_versions = ch_versions.mix(KNEADDATA_KNEADDATA.out.versions) diff --git a/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf b/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf index a1205f0..1ca8ab5 100644 --- a/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf +++ b/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf @@ -33,7 +33,7 @@ workflow FASTQ_READ_TAXONOMY_METAPHLAN { // // MODULE: Trim and remove human reads // - ch_metaphlan_profile_txt = METAPHLAN_METAPHLAN ( preprocessed_reads_fastq_gz, ch_metaphlan_db.first() ).profile + ch_metaphlan_profile_txt = METAPHLAN_METAPHLAN ( preprocessed_reads_fastq_gz, ch_metaphlan_db ).profile ch_versions = ch_versions.mix(METAPHLAN_METAPHLAN.out.versions) // From b160d8fa8bc0f95788ddb941676fb9618781a6d0 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 25 Sep 2024 15:04:30 -0400 Subject: [PATCH 09/23] run_humann true for test --- tests/main.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/main.nf.test b/tests/main.nf.test index 4e471eb..3d00bbc 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -14,6 +14,7 @@ nextflow_pipeline { kneaddata_db_version = 'human_genome' run_metaphlan = true metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + run_humann = true } } From 3fc30ece52e1ba910b9dce92607ee19011ef459f Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Thu, 26 Sep 2024 12:57:55 -0400 Subject: [PATCH 10/23] updating some config --- conf/test.config | 3 +++ nextflow.config | 4 +++- nextflow_schema.json | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index a4e5f05..4da4045 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,5 +26,8 @@ params { kneaddata_db_version = 'human_genome' run_metaphlan = true metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + run_humann = true + chocophlan_db_version = 'DEMO' + uniref_db_version = 'DEMO_diamond' } diff --git a/nextflow.config b/nextflow.config index 0c0b78d..da0de55 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,8 +31,10 @@ params { metaphlan_sgb2gtbd_file = "https://github.com/biobakery/MetaPhlAn/raw/master/metaphlan/utils/mpa_vOct22_CHOCOPhlAnSGB_202212_SGB2GTDB.tsv" // HUMAnN options - run_humann = false + run_humann = false + chocophlan_db = null chocophlan_db_version ='DEMO' + uniref_db = null uniref_db_version ='DEMO_diamond' utility_mapping_version ='DEMO' diff --git a/nextflow_schema.json b/nextflow_schema.json index 3c1e17c..c6e2503 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -124,11 +124,19 @@ "type": "boolean", "description": "Run HUMAnN to assess functional capacity of a metagenome?" }, + "chocophlan_db": { + "type": "string", + "description": "Path to pre-downloaded ChocoPhlAn database" + }, "chocophlan_db_version": { "type": "string", "default": "DEMO", "description": "ChocoPhlAn database version to download" }, + "uniref_db": { + "type": "string", + "description": "Path to pre-downloaded UNIREF database" + }, "uniref_db_version": { "type": "string", "default": "DEMO_diamond", From 91d7df59784688686408764599634212e81a3013 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Thu, 26 Sep 2024 14:23:07 -0400 Subject: [PATCH 11/23] some chocophlan misspelling --- nextflow.config | 6 +++--- .../local/fastq_microbial_pathway_humann/main.nf | 6 +++--- tests/main.nf.test | 2 ++ workflows/biobakerymgx/main.nf | 10 +++++----- workflows/biobakerymgx/tests/main.nf.test | 2 ++ 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/nextflow.config b/nextflow.config index da0de55..1128606 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,10 +33,10 @@ params { // HUMAnN options run_humann = false chocophlan_db = null - chocophlan_db_version ='DEMO' + chocophlan_db_version = 'DEMO' uniref_db = null - uniref_db_version ='DEMO_diamond' - utility_mapping_version ='DEMO' + uniref_db_version = 'DEMO_diamond' + utility_mapping_version = 'DEMO' // MultiQC options multiqc_config = null diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index 6ad055d..8174151 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -19,7 +19,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { processed_reads_fastq_gz // channel: [ val(meta), [ processed_reads_1.fastq.gz, processed_reads_2.fastq.gz ] ] (MANDATORY) metaphlan_profile // channel: [ val(meta2), metaphlan_profile.tsv ] (MANDATORY) chocophlan_db // channel: [ chocophlan_db ] (OPTIONAL) - chochophlan_db_version // value: '' (OPTIONAL) + chocophlan_db_version // value: '' (OPTIONAL) uniref_db // channel: [ uniref_db ] (OPTIONAL) uniref_db_version // value: '' (OPTIONAL) @@ -29,12 +29,12 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // if chocophlan_db exists, skip HUMANN_DOWNLOADCHOCOPHLANDB if ( chocophlan_db ){ - ch_chocophlan_db = chochophlan_db + ch_chocophlan_db = chocophlan_db } else { // // MODULE: Download ChocoPhlAn database // - ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chochophlan_db_version ).chochophlan_db + ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chocophlan_db_version ).chochophlan_db ch_versions = ch_versions.mix(HUMANN_DOWNLOADCHOCOPHLANDB.out.versions) } diff --git a/tests/main.nf.test b/tests/main.nf.test index 3d00bbc..6583270 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -15,6 +15,8 @@ nextflow_pipeline { run_metaphlan = true metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' run_humann = true + chocophlan_db_version = 'DEMO' + uniref_db_version = 'DEMO_diamond' } } diff --git a/workflows/biobakerymgx/main.nf b/workflows/biobakerymgx/main.nf index 0bf91c4..6368515 100644 --- a/workflows/biobakerymgx/main.nf +++ b/workflows/biobakerymgx/main.nf @@ -163,11 +163,11 @@ workflow BIOBAKERYMGX { Functional classification: HUMAnN -----------------------------------------------------------------------------------*/ if ( params.run_humann ) { - // create channel from params.chochophlan_db + // create channel from params.chocophlan_db if ( !params.chocophlan_db ) { - ch_chochophlan_db = null + ch_chocophlan_db = null } else { - ch_chochophlan_db = Channel.value( file( params.chochophlan_db, checkIfExists: true ) ) + ch_chocophlan_db = Channel.value( file( params.chocophlan_db, checkIfExists: true ) ) } // create channel from params.uniref_db @@ -189,8 +189,8 @@ workflow BIOBAKERYMGX { ch_genefamilies_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN( ch_preprocessed_fastq_gz, ch_read_taxonomy_tsv, - ch_chochophlan_db, - params.chochophlan_db_version, + ch_chocophlan_db, + params.chocophlan_db_version, ch_uniref_db, params.uniref_db_version).humann_genefamilies ch_ec_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_ec diff --git a/workflows/biobakerymgx/tests/main.nf.test b/workflows/biobakerymgx/tests/main.nf.test index bcf6e60..8faae8e 100644 --- a/workflows/biobakerymgx/tests/main.nf.test +++ b/workflows/biobakerymgx/tests/main.nf.test @@ -33,6 +33,8 @@ nextflow_workflow { outdir = "$outputDir" kneaddata_db_version = 'human_genome' metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + chocophlan_db_version = 'DEMO' + uniref_db_version = 'DEMO_diamond' max_cpus = 1 } } From 88bb41bf39a030bea9a3bad1a5a7629912f9b392 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Thu, 26 Sep 2024 14:30:38 -0400 Subject: [PATCH 12/23] omg more chocophlan typos --- modules/local/humann/humann/tests/main.nf.test | 2 +- subworkflows/local/fastq_microbial_pathway_humann/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/humann/humann/tests/main.nf.test b/modules/local/humann/humann/tests/main.nf.test index aa5b71a..cae1a75 100644 --- a/modules/local/humann/humann/tests/main.nf.test +++ b/modules/local/humann/humann/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { script "../../downloadchocophlandb/main.nf" process { """ - input[0] = 'TODO - some chochophlan db version - or maybe skip this setup and build in a mini test db' + input[0] = 'TODO - some chocophlan db version - or maybe skip this setup and build in a mini test db' """ } } diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index 8174151..2bbbb99 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -34,7 +34,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: Download ChocoPhlAn database // - ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chocophlan_db_version ).chochophlan_db + ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chocophlan_db_version ).chocophlan_db ch_versions = ch_versions.mix(HUMANN_DOWNLOADCHOCOPHLANDB.out.versions) } @@ -52,7 +52,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: Run HUMAnN 3 for raw outputs // - ch_humann_genefamilies_raw = HUMANN_HUMANN ( processed_reads_fastq_gz, metaphlan_profile, ch_chochophlan_db, ch_uniref_db ).genefamilies + ch_humann_genefamilies_raw = HUMANN_HUMANN ( processed_reads_fastq_gz, metaphlan_profile, ch_chocophlan_db, ch_uniref_db ).genefamilies ch_humann_pathabundance_raw = HUMANN_HUMANN.out.pathabundance ch_humann_pathcoverage_raw = HUMANN_HUMANN.out.pathcoverage // TODO is this still right? looking at humann docs, might not get this file any longer? ch_humann_logs = HUMANN_HUMANN.out.log From 80df04d8ede51e218d0a962c2211ce8a88859931 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Thu, 26 Sep 2024 14:43:20 -0400 Subject: [PATCH 13/23] fix emit for humann subworkflow --- .../local/fastq_microbial_pathway_humann/main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index 2bbbb99..ea1b840 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -78,7 +78,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: rename ec number outputs to include descriptors // - ch_humann_ec_renamed = HUMANN_RENAME(ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg + ch_humann_ec_renamed = HUMANN_RENAME(ch_humann_ec, 'ec').renamed // TODO make sure 'ec' is valid arg ch_versions = ch_versions.mix(HUMANN_RENAME.out.versions) // @@ -87,28 +87,28 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // the paths should all be the same, so im taking the first. // should probably be validated though, im just short of time.. ch_humann_genefamilies_cpm_path = ch_humann_genefamilies_cpm.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies') + ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies').joined // // MODULE: join ec abundances across all samples into one file // ch_humann_ec_renamed_path = ch_humann_ec_renamed.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec') // TODO check the file name pattern + ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec').joined // TODO check the file name pattern // // MODULE: join pathway abundances across all samples into one file // ch_humann_pathabundance_path = ch_humann_pathabundance_raw.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance') + ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance').joined // // MODULE: join pathway coverage across all samples into one file // ch_humann_pathcoverage_path = ch_humann_pathcoverage_raw.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage') + ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage').joined emit: - humann_genefamilies = ch_humann_genefamilies_joined // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz ] ] + humann_genefamilies = ch_humann_genefamilies_joined // channel: [ val(meta), genefamilies.tsv ] humann_ec = ch_humann_ec_joined // channel: [ val(meta), read_counts.tsv ] humann_pathabundance = ch_humann_pathabundance_joined // channel: [ val(meta), pathabundance.tsv ] humann_pathcoverage = ch_humann_pathcoverage_joined // channel: [ val(meta), pathcoverage.tsv ] From cb338e8298878750c129d29da30eb2252764efcd Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Mon, 30 Sep 2024 14:02:10 -0400 Subject: [PATCH 14/23] handle better paired fastq for humann --- modules/local/humann/humann/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/humann/humann/main.nf b/modules/local/humann/humann/main.nf index c7a0c78..5ba7c21 100644 --- a/modules/local/humann/humann/main.nf +++ b/modules/local/humann/humann/main.nf @@ -26,9 +26,10 @@ process HUMANN_HUMANN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def input_data = !meta.single_end ? "--input ${reads[0]} --input ${reads[1]}" : "--input $reads" """ humann \\ - --input ${reads} \\ + ${input_data} \\ --output ./ \\ --threads ${task.cpus} \\ --taxonomic-profile ${metaphlan_profile} \\ From f58934ea3b9365c2d651abb1013c7a804839efec Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 11:02:17 -0400 Subject: [PATCH 15/23] change test version of metaphlan db --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 4da4045..9bbc08a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,7 +25,7 @@ params { run_kneaddata = true kneaddata_db_version = 'human_genome' run_metaphlan = true - metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + metaphlan_db_version = 'mpa_vOct22_CHOCOPhlAnSGB_202212' run_humann = true chocophlan_db_version = 'DEMO' uniref_db_version = 'DEMO_diamond' From 4e3e6159691b578ed85edb1c60fbbe15ba7dce12 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 12:47:43 -0400 Subject: [PATCH 16/23] update humann/join module to use current work dir as input --- modules/local/humann/join/main.nf | 3 +-- modules/local/humann/join/tests/main.nf.test | 6 ++---- .../local/fastq_microbial_pathway_humann/main.nf | 14 ++++---------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf index df06e16..3213e30 100644 --- a/modules/local/humann/join/main.nf +++ b/modules/local/humann/join/main.nf @@ -7,7 +7,6 @@ process HUMANN_JOIN { 'biocontainers/humann:3.8--pyh7cba7a3_0' }" input: - path(input_dir) val file_name_pattern output: @@ -18,7 +17,7 @@ process HUMANN_JOIN { def args = task.ext.args ?: '' """ humann_join_table \\ - --input $input_dir \\ + --input . \\ --output ${file_name_pattern}_joined.tsv \\ --file_name $file_name_pattern \\ ${args} diff --git a/modules/local/humann/join/tests/main.nf.test b/modules/local/humann/join/tests/main.nf.test index 3e748ea..474626f 100644 --- a/modules/local/humann/join/tests/main.nf.test +++ b/modules/local/humann/join/tests/main.nf.test @@ -14,8 +14,7 @@ nextflow_process { when { process { """ - input[0] = TODO - first is a path to a dir w files matching the below pattern - input[1] = "genefamilies" + input[0] = "genefamilies" """ } } @@ -40,8 +39,7 @@ nextflow_process { when { process { """ - input[0] = TODO - first is a path to a dir w files matching the below pattern - input[1] = "genefamilies" + input[0] = "genefamilies" """ } } diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index ea1b840..d8a6c96 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -84,28 +84,22 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: join gene abundances across all samples into one file // - // the paths should all be the same, so im taking the first. - // should probably be validated though, im just short of time.. - ch_humann_genefamilies_cpm_path = ch_humann_genefamilies_cpm.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies').joined + ch_humann_genefamilies_joined = JOIN_GENES('genefamilies').joined // // MODULE: join ec abundances across all samples into one file // - ch_humann_ec_renamed_path = ch_humann_ec_renamed.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec').joined // TODO check the file name pattern + ch_humann_ec_joined = JOIN_EC('ec').joined // TODO check the file name pattern // // MODULE: join pathway abundances across all samples into one file // - ch_humann_pathabundance_path = ch_humann_pathabundance_raw.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance').joined + ch_humann_pathabundance_joined = JOIN_PATHABUND('pathabundance').joined // // MODULE: join pathway coverage across all samples into one file // - ch_humann_pathcoverage_path = ch_humann_pathcoverage_raw.map{ toCanonicalPath(it[1]) }.unique().take(1) - ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage').joined + ch_humann_pathcoverage_joined = JOIN_PATHCOV('pathcoverage').joined emit: humann_genefamilies = ch_humann_genefamilies_joined // channel: [ val(meta), genefamilies.tsv ] From 9eb971edf0da5cfa474806d728ba2ba75e4693d1 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 12:48:25 -0400 Subject: [PATCH 17/23] increase test resources --- conf/test.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 9bbc08a..5bcf4c0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,8 +15,8 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' + max_cpus = 4 + max_memory = '16.GB' max_time = '6.h' // Input data From 79fb1910f4c479c9669ae6dc9e9781ad5e21890d Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 12:55:23 -0400 Subject: [PATCH 18/23] typo in humann module inputs --- modules/local/humann/regroup/main.nf | 2 +- modules/local/humann/rename/main.nf | 2 +- modules/local/humann/renorm/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf index 9e87bfa..f2fe5dc 100644 --- a/modules/local/humann/regroup/main.nf +++ b/modules/local/humann/regroup/main.nf @@ -20,7 +20,7 @@ process HUMANN_REGROUP { def prefix = task.ext.prefix ?: "${meta.id}" """ humann_regroup_table \\ - --input input.tsv \\ + --input $input \\ --output ${prefix}_${groups}_regroup.tsv \\ --groups $groups \\ ${args} diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf index b6922fa..f3c320f 100644 --- a/modules/local/humann/rename/main.nf +++ b/modules/local/humann/rename/main.nf @@ -20,7 +20,7 @@ process HUMANN_RENAME { def prefix = task.ext.prefix ?: "${meta.id}" """ humann_rename_table \\ - --input input.tsv \\ + --input $input \\ --output ${prefix}_${names}_rename.tsv \\ --names $names \\ ${args} diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf index 69e390a..ec3f4f2 100644 --- a/modules/local/humann/renorm/main.nf +++ b/modules/local/humann/renorm/main.nf @@ -20,7 +20,7 @@ process HUMANN_RENORM { def prefix = task.ext.prefix ?: "${meta.id}" """ humann_renorm_table \\ - --input input.tsv \\ + --input $input \\ --output ${prefix}_${units}_renorm.tsv \\ --units $units \\ --update-snames \\ From 2b1cd7b95455cd58b34c784d3bbf78295a701cd1 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 13:02:41 -0400 Subject: [PATCH 19/23] silly --- modules/local/humann/join/main.nf | 1 + subworkflows/local/fastq_microbial_pathway_humann/main.nf | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf index 3213e30..a3e4a2b 100644 --- a/modules/local/humann/join/main.nf +++ b/modules/local/humann/join/main.nf @@ -7,6 +7,7 @@ process HUMANN_JOIN { 'biocontainers/humann:3.8--pyh7cba7a3_0' }" input: + path(input) val file_name_pattern output: diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index d8a6c96..aec4a17 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -84,22 +84,22 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: join gene abundances across all samples into one file // - ch_humann_genefamilies_joined = JOIN_GENES('genefamilies').joined + ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm, 'genefamilies').joined // // MODULE: join ec abundances across all samples into one file // - ch_humann_ec_joined = JOIN_EC('ec').joined // TODO check the file name pattern + ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed, 'ec').joined // TODO check the file name pattern // // MODULE: join pathway abundances across all samples into one file // - ch_humann_pathabundance_joined = JOIN_PATHABUND('pathabundance').joined + ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_raw, 'pathabundance').joined // // MODULE: join pathway coverage across all samples into one file // - ch_humann_pathcoverage_joined = JOIN_PATHCOV('pathcoverage').joined + ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_raw, 'pathcoverage').joined emit: humann_genefamilies = ch_humann_genefamilies_joined // channel: [ val(meta), genefamilies.tsv ] From ca95d106616fc3c1e129ab397cf9fd300ed765dd Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 13:07:26 -0400 Subject: [PATCH 20/23] forgot meta in humann/join module --- modules/local/humann/join/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf index a3e4a2b..17f6027 100644 --- a/modules/local/humann/join/main.nf +++ b/modules/local/humann/join/main.nf @@ -7,7 +7,7 @@ process HUMANN_JOIN { 'biocontainers/humann:3.8--pyh7cba7a3_0' }" input: - path(input) + tuple val(meta), path(input) val file_name_pattern output: @@ -17,7 +17,7 @@ process HUMANN_JOIN { script: def args = task.ext.args ?: '' """ - humann_join_table \\ + humann_join_tables \\ --input . \\ --output ${file_name_pattern}_joined.tsv \\ --file_name $file_name_pattern \\ From c5823ad3dc2eba6c0ca03f11989864a4d62ed437 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 13:11:55 -0400 Subject: [PATCH 21/23] typo in humann/renorm --- modules/local/humann/renorm/main.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf index ec3f4f2..b5a4c5f 100644 --- a/modules/local/humann/renorm/main.nf +++ b/modules/local/humann/renorm/main.nf @@ -25,8 +25,10 @@ process HUMANN_RENORM { --units $units \\ --update-snames \\ ${args} - stub: - def args = task.ext.args ?: '' + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS """ stub: From c3e9f6d981779bdf67b1c53673395e1e5e23584b Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 13:17:02 -0400 Subject: [PATCH 22/23] fix humann_regroup to rxn --- subworkflows/local/fastq_microbial_pathway_humann/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index aec4a17..255da3c 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -72,7 +72,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: regroup cpm gene families to EC numbers // - ch_humann_ec = HUMANN_REGROUP(ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg + ch_humann_ec = HUMANN_REGROUP(ch_humann_genefamilies_cpm, 'uniref90_rxn').regroup ch_versions = ch_versions.mix(HUMANN_REGROUP.out.versions) // From c15761bc56d06d67bcb2c982d785b43a3effd2d9 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 1 Oct 2024 13:19:07 -0400 Subject: [PATCH 23/23] fix join_genes file name pattern --- subworkflows/local/fastq_microbial_pathway_humann/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf index 255da3c..8b82438 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -84,7 +84,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { // // MODULE: join gene abundances across all samples into one file // - ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm, 'genefamilies').joined + ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm, 'cpm').joined // // MODULE: join ec abundances across all samples into one file