From bff5441036659838712620b8de0786c951a5c5e5 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Mon, 16 Sep 2024 14:44:50 -0400
Subject: [PATCH 01/23] add skeleton for humann module- wip

---
 modules/local/humann/humann/environment.yml   |  6 ++
 modules/local/humann/humann/main.nf           | 58 ++++++++++++
 modules/local/humann/humann/nextflow.config   | 27 ++++++
 .../local/humann/humann/tests/main.nf.test    | 49 ++++++++++
 .../local/humann/humann/tests/nextflow.config | 27 ++++++
 modules/local/humann/join/environment.yml     |  6 ++
 modules/local/humann/join/join.nf             | 89 ++++++++++++++++++
 modules/local/humann/join/main.nf             | 42 +++++++++
 modules/local/humann/join/nextflow.config     |  5 +
 modules/local/humann/join/tests/main.nf.test  | 49 ++++++++++
 .../local/humann/join/tests/nextflow.config   |  5 +
 modules/local/humann/regroup/environment.yml  |  6 ++
 modules/local/humann/regroup/main.nf          | 42 +++++++++
 modules/local/humann/regroup/nextflow.config  |  5 +
 modules/local/humann/regroup/regroup.nf       | 91 +++++++++++++++++++
 .../local/humann/regroup/tests/main.nf.test   | 49 ++++++++++
 .../humann/regroup/tests/nextflow.config      |  5 +
 modules/local/humann/rename/environment.yml   |  6 ++
 modules/local/humann/rename/main.nf           | 42 +++++++++
 modules/local/humann/rename/nextflow.config   |  5 +
 modules/local/humann/rename/rename.nf         | 91 +++++++++++++++++++
 .../local/humann/rename/tests/main.nf.test    | 49 ++++++++++
 .../local/humann/rename/tests/nextflow.config |  5 +
 modules/local/humann/renorm/environment.yml   |  6 ++
 modules/local/humann/renorm/main.nf           | 42 +++++++++
 modules/local/humann/renorm/nextflow.config   |  5 +
 modules/local/humann/renorm/renorm.nf         | 91 +++++++++++++++++++
 .../local/humann/renorm/tests/main.nf.test    | 49 ++++++++++
 .../local/humann/renorm/tests/nextflow.config |  5 +
 29 files changed, 957 insertions(+)
 create mode 100644 modules/local/humann/humann/environment.yml
 create mode 100644 modules/local/humann/humann/main.nf
 create mode 100644 modules/local/humann/humann/nextflow.config
 create mode 100644 modules/local/humann/humann/tests/main.nf.test
 create mode 100644 modules/local/humann/humann/tests/nextflow.config
 create mode 100644 modules/local/humann/join/environment.yml
 create mode 100644 modules/local/humann/join/join.nf
 create mode 100644 modules/local/humann/join/main.nf
 create mode 100644 modules/local/humann/join/nextflow.config
 create mode 100644 modules/local/humann/join/tests/main.nf.test
 create mode 100644 modules/local/humann/join/tests/nextflow.config
 create mode 100644 modules/local/humann/regroup/environment.yml
 create mode 100644 modules/local/humann/regroup/main.nf
 create mode 100644 modules/local/humann/regroup/nextflow.config
 create mode 100644 modules/local/humann/regroup/regroup.nf
 create mode 100644 modules/local/humann/regroup/tests/main.nf.test
 create mode 100644 modules/local/humann/regroup/tests/nextflow.config
 create mode 100644 modules/local/humann/rename/environment.yml
 create mode 100644 modules/local/humann/rename/main.nf
 create mode 100644 modules/local/humann/rename/nextflow.config
 create mode 100644 modules/local/humann/rename/rename.nf
 create mode 100644 modules/local/humann/rename/tests/main.nf.test
 create mode 100644 modules/local/humann/rename/tests/nextflow.config
 create mode 100644 modules/local/humann/renorm/environment.yml
 create mode 100644 modules/local/humann/renorm/main.nf
 create mode 100644 modules/local/humann/renorm/nextflow.config
 create mode 100644 modules/local/humann/renorm/renorm.nf
 create mode 100644 modules/local/humann/renorm/tests/main.nf.test
 create mode 100644 modules/local/humann/renorm/tests/nextflow.config

diff --git a/modules/local/humann/humann/environment.yml b/modules/local/humann/humann/environment.yml
new file mode 100644
index 0000000..92f963f
--- /dev/null
+++ b/modules/local/humann/humann/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::humann=3.8
diff --git a/modules/local/humann/humann/main.nf b/modules/local/humann/humann/main.nf
new file mode 100644
index 0000000..66abe4a
--- /dev/null
+++ b/modules/local/humann/humann/main.nf
@@ -0,0 +1,58 @@
+process HUMANN_HUMANN {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0':
+        'biocontainers/humann:3.8--pyh7cba7a3_0' }"
+
+    input:
+    tuple val(meta) , path(reads)
+    tuple val(meta2), path(metaphlan_profile)
+    path chocophlan_db
+    path uniref_db
+
+    output:
+    tuple val(meta), path("*_genefamilies.tsv") , emit: genefamilies
+    tuple val(meta), path("*_pathabundance.tsv"), emit: pathabundance
+    tuple val(meta), path("*_pathcoverage.tsv") , emit: pathcoverage
+    tuple val(meta), path("*.log")              , emit: log
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    humann \\
+        --input ${reads} \\
+        --output ./ \\
+        --threads ${task.cpus} \\
+        --taxonomic-profile ${metaphlan_profile} \\
+        --nucleotide-database ${chocophlan_db} \\
+        --protein-database ${uniref_db} \\
+        --o-log ${prefix}.log \\
+        ${args}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_genefamilies.tsv
+    touch ${prefix}_pathabundance.tsv
+    touch ${prefix}_pathcoverage.tsv
+    touch ${prefix}.log
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/humann/nextflow.config b/modules/local/humann/humann/nextflow.config
new file mode 100644
index 0000000..52ad9a3
--- /dev/null
+++ b/modules/local/humann/humann/nextflow.config
@@ -0,0 +1,27 @@
+process {
+    withName: HUMANN_HUMANN {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*_genefamilies.tsv'
+            ],
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*_pathabundance.tsv'
+            ],
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*_pathcoverage.tsv'
+            ],
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*.log'
+            ],
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
+    }
+}
diff --git a/modules/local/humann/humann/tests/main.nf.test b/modules/local/humann/humann/tests/main.nf.test
new file mode 100644
index 0000000..f4c6fa5
--- /dev/null
+++ b/modules/local/humann/humann/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_process {
+
+    name "Test Process HUMANN_HUMANN"
+    script "../main.nf"
+    process "HUMANN_HUMANN"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "humann"
+    tag "humann/humann"
+
+    test("DEMO_diamond") {
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("DEMO_diamond - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/local/humann/humann/tests/nextflow.config b/modules/local/humann/humann/tests/nextflow.config
new file mode 100644
index 0000000..52ad9a3
--- /dev/null
+++ b/modules/local/humann/humann/tests/nextflow.config
@@ -0,0 +1,27 @@
+process {
+    withName: HUMANN_HUMANN {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*_genefamilies.tsv'
+            ],
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*_pathabundance.tsv'
+            ],
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*_pathcoverage.tsv'
+            ],
+            [
+                path: { "${params.outdir}/humann/humann" },
+                mode: params.publish_dir_mode,
+                pattern: '*.log'
+            ],
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
+    }
+}
diff --git a/modules/local/humann/join/environment.yml b/modules/local/humann/join/environment.yml
new file mode 100644
index 0000000..92f963f
--- /dev/null
+++ b/modules/local/humann/join/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::humann=3.8
diff --git a/modules/local/humann/join/join.nf b/modules/local/humann/join/join.nf
new file mode 100644
index 0000000..ba0a341
--- /dev/null
+++ b/modules/local/humann/join/join.nf
@@ -0,0 +1,89 @@
+// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
+//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
+//               All other parameters MUST be provided using the "task.ext" directive, see here:
+//               https://www.nextflow.io/docs/latest/process.html#ext
+//               where "task.ext" is a string.
+//               Any parameters that need to be evaluated in the context of a particular sample
+//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
+// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
+//               unless there is a run-time, storage advantage in implementing in this way
+//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
+//                 bwa mem | samtools view -B -T ref.fasta
+// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
+//               list (`[]`) instead of a file can be used to work around this issue.
+
+process HUMANN_JOIN {
+    tag '$bam'
+    label 'process_low'
+
+    // TODO nf-core: List required Conda package(s).
+    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
+    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
+    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
+        'biocontainers/humann:3.9--py312hdfd78af_0' }"
+
+    input:
+    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
+    //               MUST be provided as an input via a Groovy Map called "meta".
+    //               This information may not be required in some instances e.g. indexing reference genome files:
+    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
+    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
+    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
+    path bam
+
+    output:
+    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
+    path "*.bam", emit: bam
+    // TODO nf-core: List additional required output channels/values here
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    
+    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
+    //               If the software is unable to output a version number on the command-line then it can be manually specified
+    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
+    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
+    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
+    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
+    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
+    // TODO nf-core: Please replace the example samtools command below with your module's command
+    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    """
+    samtools \\
+        sort \\
+        $args \\
+        -@ $task.cpus \\
+        $bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    
+    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
+    //               Have a look at the following examples:
+    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
+    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
+    """
+    touch ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf
new file mode 100644
index 0000000..58bc710
--- /dev/null
+++ b/modules/local/humann/join/main.nf
@@ -0,0 +1,42 @@
+process HUMANN_DOWNLOADUNIREFDB {
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0':
+        'biocontainers/humann:3.8--pyh7cba7a3_0' }"
+
+    input:
+    val uniref_db_version
+
+    output:
+    path("uniref")      , emit: uniref_db
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    humann_databases \\
+        --download uniref \\
+        ${uniref_db_version} \\
+        . \\
+        ${args}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    """
+    mkdir uniref
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/join/nextflow.config b/modules/local/humann/join/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/join/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}
diff --git a/modules/local/humann/join/tests/main.nf.test b/modules/local/humann/join/tests/main.nf.test
new file mode 100644
index 0000000..4a0ba43
--- /dev/null
+++ b/modules/local/humann/join/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_process {
+
+    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    script "../main.nf"
+    process "HUMANN_DOWNLOADUNIREFDB"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "humann"
+    tag "humann/downloadunireflandb"
+
+    test("DEMO_diamond") {
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("DEMO_diamond - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/local/humann/join/tests/nextflow.config b/modules/local/humann/join/tests/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/join/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}
diff --git a/modules/local/humann/regroup/environment.yml b/modules/local/humann/regroup/environment.yml
new file mode 100644
index 0000000..92f963f
--- /dev/null
+++ b/modules/local/humann/regroup/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::humann=3.8
diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf
new file mode 100644
index 0000000..58bc710
--- /dev/null
+++ b/modules/local/humann/regroup/main.nf
@@ -0,0 +1,42 @@
+process HUMANN_DOWNLOADUNIREFDB {
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0':
+        'biocontainers/humann:3.8--pyh7cba7a3_0' }"
+
+    input:
+    val uniref_db_version
+
+    output:
+    path("uniref")      , emit: uniref_db
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    humann_databases \\
+        --download uniref \\
+        ${uniref_db_version} \\
+        . \\
+        ${args}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    """
+    mkdir uniref
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/regroup/nextflow.config b/modules/local/humann/regroup/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/regroup/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}
diff --git a/modules/local/humann/regroup/regroup.nf b/modules/local/humann/regroup/regroup.nf
new file mode 100644
index 0000000..8b70eab
--- /dev/null
+++ b/modules/local/humann/regroup/regroup.nf
@@ -0,0 +1,91 @@
+// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
+//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
+//               All other parameters MUST be provided using the "task.ext" directive, see here:
+//               https://www.nextflow.io/docs/latest/process.html#ext
+//               where "task.ext" is a string.
+//               Any parameters that need to be evaluated in the context of a particular sample
+//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
+// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
+//               unless there is a run-time, storage advantage in implementing in this way
+//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
+//                 bwa mem | samtools view -B -T ref.fasta
+// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
+//               list (`[]`) instead of a file can be used to work around this issue.
+
+process HUMANN_REGROUP {
+    tag "$meta.id"
+    label 'process_low'
+
+    // TODO nf-core: List required Conda package(s).
+    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
+    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
+    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
+        'biocontainers/humann:3.9--py312hdfd78af_0' }"
+
+    input:
+    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
+    //               MUST be provided as an input via a Groovy Map called "meta".
+    //               This information may not be required in some instances e.g. indexing reference genome files:
+    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
+    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
+    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
+    tuple val(meta), path(bam)
+
+    output:
+    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
+    tuple val(meta), path("*.bam"), emit: bam
+    // TODO nf-core: List additional required output channels/values here
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
+    //               If the software is unable to output a version number on the command-line then it can be manually specified
+    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
+    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
+    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
+    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
+    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
+    // TODO nf-core: Please replace the example samtools command below with your module's command
+    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    """
+    samtools \\
+        sort \\
+        $args \\
+        -@ $task.cpus \\
+        -o ${prefix}.bam \\
+        -T $prefix \\
+        $bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
+    //               Have a look at the following examples:
+    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
+    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
+    """
+    touch ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/regroup/tests/main.nf.test b/modules/local/humann/regroup/tests/main.nf.test
new file mode 100644
index 0000000..4a0ba43
--- /dev/null
+++ b/modules/local/humann/regroup/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_process {
+
+    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    script "../main.nf"
+    process "HUMANN_DOWNLOADUNIREFDB"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "humann"
+    tag "humann/downloadunireflandb"
+
+    test("DEMO_diamond") {
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("DEMO_diamond - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/local/humann/regroup/tests/nextflow.config b/modules/local/humann/regroup/tests/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/regroup/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}
diff --git a/modules/local/humann/rename/environment.yml b/modules/local/humann/rename/environment.yml
new file mode 100644
index 0000000..92f963f
--- /dev/null
+++ b/modules/local/humann/rename/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::humann=3.8
diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf
new file mode 100644
index 0000000..58bc710
--- /dev/null
+++ b/modules/local/humann/rename/main.nf
@@ -0,0 +1,42 @@
+process HUMANN_DOWNLOADUNIREFDB {
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0':
+        'biocontainers/humann:3.8--pyh7cba7a3_0' }"
+
+    input:
+    val uniref_db_version
+
+    output:
+    path("uniref")      , emit: uniref_db
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    humann_databases \\
+        --download uniref \\
+        ${uniref_db_version} \\
+        . \\
+        ${args}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    """
+    mkdir uniref
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/rename/nextflow.config b/modules/local/humann/rename/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/rename/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}
diff --git a/modules/local/humann/rename/rename.nf b/modules/local/humann/rename/rename.nf
new file mode 100644
index 0000000..d33eea0
--- /dev/null
+++ b/modules/local/humann/rename/rename.nf
@@ -0,0 +1,91 @@
+// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
+//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
+//               All other parameters MUST be provided using the "task.ext" directive, see here:
+//               https://www.nextflow.io/docs/latest/process.html#ext
+//               where "task.ext" is a string.
+//               Any parameters that need to be evaluated in the context of a particular sample
+//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
+// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
+//               unless there is a run-time, storage advantage in implementing in this way
+//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
+//                 bwa mem | samtools view -B -T ref.fasta
+// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
+//               list (`[]`) instead of a file can be used to work around this issue.
+
+process HUMANN_RENAME {
+    tag "$meta.id"
+    label 'process_low'
+
+    // TODO nf-core: List required Conda package(s).
+    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
+    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
+    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
+        'biocontainers/humann:3.9--py312hdfd78af_0' }"
+
+    input:
+    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
+    //               MUST be provided as an input via a Groovy Map called "meta".
+    //               This information may not be required in some instances e.g. indexing reference genome files:
+    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
+    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
+    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
+    tuple val(meta), path(bam)
+
+    output:
+    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
+    tuple val(meta), path("*.bam"), emit: bam
+    // TODO nf-core: List additional required output channels/values here
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
+    //               If the software is unable to output a version number on the command-line then it can be manually specified
+    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
+    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
+    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
+    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
+    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
+    // TODO nf-core: Please replace the example samtools command below with your module's command
+    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    """
+    samtools \\
+        sort \\
+        $args \\
+        -@ $task.cpus \\
+        -o ${prefix}.bam \\
+        -T $prefix \\
+        $bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
+    //               Have a look at the following examples:
+    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
+    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
+    """
+    touch ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/rename/tests/main.nf.test b/modules/local/humann/rename/tests/main.nf.test
new file mode 100644
index 0000000..4a0ba43
--- /dev/null
+++ b/modules/local/humann/rename/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_process {
+
+    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    script "../main.nf"
+    process "HUMANN_DOWNLOADUNIREFDB"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "humann"
+    tag "humann/downloadunireflandb"
+
+    test("DEMO_diamond") {
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("DEMO_diamond - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/local/humann/rename/tests/nextflow.config b/modules/local/humann/rename/tests/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/rename/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}
diff --git a/modules/local/humann/renorm/environment.yml b/modules/local/humann/renorm/environment.yml
new file mode 100644
index 0000000..92f963f
--- /dev/null
+++ b/modules/local/humann/renorm/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::humann=3.8
diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf
new file mode 100644
index 0000000..58bc710
--- /dev/null
+++ b/modules/local/humann/renorm/main.nf
@@ -0,0 +1,42 @@
+process HUMANN_DOWNLOADUNIREFDB {
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0':
+        'biocontainers/humann:3.8--pyh7cba7a3_0' }"
+
+    input:
+    val uniref_db_version
+
+    output:
+    path("uniref")      , emit: uniref_db
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    humann_databases \\
+        --download uniref \\
+        ${uniref_db_version} \\
+        . \\
+        ${args}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    """
+    mkdir uniref
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/renorm/nextflow.config b/modules/local/humann/renorm/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/renorm/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}
diff --git a/modules/local/humann/renorm/renorm.nf b/modules/local/humann/renorm/renorm.nf
new file mode 100644
index 0000000..393a62f
--- /dev/null
+++ b/modules/local/humann/renorm/renorm.nf
@@ -0,0 +1,91 @@
+// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
+//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
+//               All other parameters MUST be provided using the "task.ext" directive, see here:
+//               https://www.nextflow.io/docs/latest/process.html#ext
+//               where "task.ext" is a string.
+//               Any parameters that need to be evaluated in the context of a particular sample
+//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
+// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
+//               unless there is a run-time, storage advantage in implementing in this way
+//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
+//                 bwa mem | samtools view -B -T ref.fasta
+// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
+//               list (`[]`) instead of a file can be used to work around this issue.
+
+process HUMANN_RENORM {
+    tag "$meta.id"
+    label 'process_low'
+
+    // TODO nf-core: List required Conda package(s).
+    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
+    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
+    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
+        'biocontainers/humann:3.9--py312hdfd78af_0' }"
+
+    input:
+    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
+    //               MUST be provided as an input via a Groovy Map called "meta".
+    //               This information may not be required in some instances e.g. indexing reference genome files:
+    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
+    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
+    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
+    tuple val(meta), path(bam)
+
+    output:
+    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
+    tuple val(meta), path("*.bam"), emit: bam
+    // TODO nf-core: List additional required output channels/values here
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
+    //               If the software is unable to output a version number on the command-line then it can be manually specified
+    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
+    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
+    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
+    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
+    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
+    // TODO nf-core: Please replace the example samtools command below with your module's command
+    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    """
+    samtools \\
+        sort \\
+        $args \\
+        -@ $task.cpus \\
+        -o ${prefix}.bam \\
+        -T $prefix \\
+        $bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
+    //               Have a look at the following examples:
+    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
+    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
+    """
+    touch ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/humann/renorm/tests/main.nf.test b/modules/local/humann/renorm/tests/main.nf.test
new file mode 100644
index 0000000..4a0ba43
--- /dev/null
+++ b/modules/local/humann/renorm/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_process {
+
+    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    script "../main.nf"
+    process "HUMANN_DOWNLOADUNIREFDB"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "humann"
+    tag "humann/downloadunireflandb"
+
+    test("DEMO_diamond") {
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("DEMO_diamond - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = "DEMO_diamond"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/local/humann/renorm/tests/nextflow.config b/modules/local/humann/renorm/tests/nextflow.config
new file mode 100644
index 0000000..6be75c4
--- /dev/null
+++ b/modules/local/humann/renorm/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: HUMANN_DOWNLOADUNIREFDB {
+        ext.args = "--update-config no"
+    }
+}

From 1882f624da5f04341972baf9029e0f9447fe68dc Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 17 Sep 2024 13:00:57 -0400
Subject: [PATCH 02/23] fleshed out humann modules, tests still wip

---
 modules/local/humann/humann/main.nf           |  1 -
 .../local/humann/humann/tests/main.nf.test    | 80 ++++++++++++++--
 modules/local/humann/join/join.nf             | 89 ------------------
 modules/local/humann/join/main.nf             | 29 +++---
 modules/local/humann/join/nextflow.config     | 11 ++-
 modules/local/humann/join/tests/main.nf.test  | 25 +++--
 .../local/humann/join/tests/nextflow.config   | 11 ++-
 modules/local/humann/regroup/main.nf          | 32 ++++---
 modules/local/humann/regroup/nextflow.config  | 11 ++-
 modules/local/humann/regroup/regroup.nf       | 91 -------------------
 .../local/humann/regroup/tests/main.nf.test   | 39 ++++++--
 .../humann/regroup/tests/nextflow.config      | 11 ++-
 modules/local/humann/rename/main.nf           | 34 ++++---
 modules/local/humann/rename/nextflow.config   | 11 ++-
 modules/local/humann/rename/rename.nf         | 91 -------------------
 .../local/humann/rename/tests/main.nf.test    | 39 ++++++--
 .../local/humann/rename/tests/nextflow.config | 11 ++-
 modules/local/humann/renorm/main.nf           | 42 +++++----
 modules/local/humann/renorm/nextflow.config   | 11 ++-
 modules/local/humann/renorm/renorm.nf         | 91 -------------------
 .../local/humann/renorm/tests/main.nf.test    | 39 ++++++--
 .../local/humann/renorm/tests/nextflow.config | 11 ++-
 22 files changed, 329 insertions(+), 481 deletions(-)
 delete mode 100644 modules/local/humann/join/join.nf
 delete mode 100644 modules/local/humann/regroup/regroup.nf
 delete mode 100644 modules/local/humann/rename/rename.nf
 delete mode 100644 modules/local/humann/renorm/renorm.nf

diff --git a/modules/local/humann/humann/main.nf b/modules/local/humann/humann/main.nf
index 66abe4a..c7a0c78 100644
--- a/modules/local/humann/humann/main.nf
+++ b/modules/local/humann/humann/main.nf
@@ -43,7 +43,6 @@ process HUMANN_HUMANN {
     """
 
     stub:
-    def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     touch ${prefix}_genefamilies.tsv
diff --git a/modules/local/humann/humann/tests/main.nf.test b/modules/local/humann/humann/tests/main.nf.test
index f4c6fa5..aa5b71a 100644
--- a/modules/local/humann/humann/tests/main.nf.test
+++ b/modules/local/humann/humann/tests/main.nf.test
@@ -10,37 +10,103 @@ nextflow_process {
     tag "humann"
     tag "humann/humann"
 
-    test("DEMO_diamond") {
+    setup {
+        run("HUMANN_DOWNLOADCHOCOPHLANDB") {
+            script "../../downloadchocophlandb/main.nf"
+            process {
+                """
+                input[0] = 'TODO - some chochophlan db version - or maybe skip this setup and build in a mini test db'
+                """
+            }
+        }
+
+        run("HUMANN_DOWNLOADUNIREFDB") {
+            script "../../downloadunirefdb/main.nf"
+            process {
+                """
+                input[0] = 'TODO - some uniref db version - or maybe skip this setup and build in a mini test db'
+                """
+            }
+        }
+    }
+
+    test("kneaded fastq.gz") {
+
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [ id: 'test' ],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true),
+                            file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db
+                input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db
                 """
             }
         }
 
+
         then {
-            assertAll(
+            assertAll (
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    process.out.genefamilies
+                    process.out.pathabundance
+                    process.out.pathcoverage,
+                    process.out.versions
+                    ).match()
+                },
+                { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") }
             )
         }
     }
 
-    test("DEMO_diamond - stub") {
+    test("kneaded fastq.gz - stub") {
 
         options "-stub"
 
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [ id: 'test' ],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true),
+                            file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db
+                input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db
                 """
             }
         }
 
+
         then {
-            assertAll(
+            assertAll (
                 { assert process.success },
                 { assert snapshot(process.out).match() }
             )
diff --git a/modules/local/humann/join/join.nf b/modules/local/humann/join/join.nf
deleted file mode 100644
index ba0a341..0000000
--- a/modules/local/humann/join/join.nf
+++ /dev/null
@@ -1,89 +0,0 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
-process HUMANN_JOIN {
-    tag '$bam'
-    label 'process_low'
-
-    // TODO nf-core: List required Conda package(s).
-    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
-        'biocontainers/humann:3.9--py312hdfd78af_0' }"
-
-    input:
-    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    path bam
-
-    output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    path "*.bam", emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    path "versions.yml"           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
-    """
-    samtools \\
-        sort \\
-        $args \\
-        -@ $task.cpus \\
-        $bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-
-    stub:
-    def args = task.ext.args ?: ''
-    
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
-    """
-    touch ${prefix}.bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf
index 58bc710..47c2f66 100644
--- a/modules/local/humann/join/main.nf
+++ b/modules/local/humann/join/main.nf
@@ -1,5 +1,5 @@
-process HUMANN_DOWNLOADUNIREFDB {
-    label 'process_single'
+process HUMANN_JOIN {
+    label 'process_low'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -7,23 +7,25 @@ process HUMANN_DOWNLOADUNIREFDB {
         'biocontainers/humann:3.8--pyh7cba7a3_0' }"
 
     input:
-    val uniref_db_version
+    path(input_dir)
+    val file_name_pattern
 
     output:
-    path("uniref")      , emit: uniref_db
-    path "versions.yml" , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
+    path("*_joined.tsv.gz"), emit: joined
+    path "versions.yml"    , emit: versions
 
     script:
     def args = task.ext.args ?: ''
     """
-    humann_databases \\
-        --download uniref \\
-        ${uniref_db_version} \\
-        . \\
+    if compgen -G "$input_dir/*$file_name_pattern*.gz" > /dev/null; then
+        find $input_dir \( -name '*$file_name_pattern*' \) -exec gunzip --verbose {} \;
+    fi
+    humann_join_table \\
+        --input $input_dir \\
+        --output ${file_name_pattern}_joined.tsv \\
+        --file_name $file_name_pattern \\
         ${args}
+    gzip -n ${file_name_pattern}_joined.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
@@ -31,9 +33,8 @@ process HUMANN_DOWNLOADUNIREFDB {
     """
 
     stub:
-    def args = task.ext.args ?: ''
     """
-    mkdir uniref
+    touch ${file_name_pattern}_joined.tsv.gz
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
diff --git a/modules/local/humann/join/nextflow.config b/modules/local/humann/join/nextflow.config
index 6be75c4..2c90039 100644
--- a/modules/local/humann/join/nextflow.config
+++ b/modules/local/humann/join/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_JOIN {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/results" },
+                mode: params.publish_dir_mode,
+                pattern: '*_joined.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }
diff --git a/modules/local/humann/join/tests/main.nf.test b/modules/local/humann/join/tests/main.nf.test
index 4a0ba43..3e748ea 100644
--- a/modules/local/humann/join/tests/main.nf.test
+++ b/modules/local/humann/join/tests/main.nf.test
@@ -1,40 +1,47 @@
 nextflow_process {
 
-    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    name "Test Process HUMANN_JOIN"
     script "../main.nf"
-    process "HUMANN_DOWNLOADUNIREFDB"
+    process "HUMANN_JOIN"
     config "./nextflow.config"
 
     tag "modules"
     tag "modules_nfcore"
     tag "humann"
-    tag "humann/downloadunireflandb"
+    tag "humann/join"
 
-    test("DEMO_diamond") {
+    test("humann/join") {
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = TODO - first is a path to a dir w files matching the below pattern
+                input[1] = "genefamilies"
                 """
             }
         }
 
         then {
-            assertAll(
+            assertAll (
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    process.out.joined
+                    process.out.versions
+                    ).match()
+                },
+                { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") }
             )
         }
     }
 
-    test("DEMO_diamond - stub") {
+    test("humann/join - stub") {
 
         options "-stub"
 
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = TODO - first is a path to a dir w files matching the below pattern
+                input[1] = "genefamilies"
                 """
             }
         }
diff --git a/modules/local/humann/join/tests/nextflow.config b/modules/local/humann/join/tests/nextflow.config
index 6be75c4..2c90039 100644
--- a/modules/local/humann/join/tests/nextflow.config
+++ b/modules/local/humann/join/tests/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_JOIN {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/results" },
+                mode: params.publish_dir_mode,
+                pattern: '*_joined.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }
diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf
index 58bc710..37c2cbc 100644
--- a/modules/local/humann/regroup/main.nf
+++ b/modules/local/humann/regroup/main.nf
@@ -1,4 +1,5 @@
-process HUMANN_DOWNLOADUNIREFDB {
+process HUMANN_REGROUP {
+    tag "$meta.id"
     label 'process_single'
 
     conda "${moduleDir}/environment.yml"
@@ -7,23 +8,28 @@ process HUMANN_DOWNLOADUNIREFDB {
         'biocontainers/humann:3.8--pyh7cba7a3_0' }"
 
     input:
-    val uniref_db_version
+    tuple val(meta), path(input)
+    val groups
 
     output:
-    path("uniref")      , emit: uniref_db
-    path "versions.yml" , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
+    tuple val(meta), path("*_regroup.tsv.gz"), emit: regroup
+    path "versions.yml"                      , emit: versions
 
     script:
     def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    humann_databases \\
-        --download uniref \\
-        ${uniref_db_version} \\
-        . \\
+    if [[ $input == *.gz ]]; then
+        gunzip -c $input > input.tsv
+    else
+        mv $input input.tsv
+    fi
+    humann_regroup_table \\
+        --input input.tsv \\
+        --output ${prefix}_${groups}_regroup.tsv \\
+        --groups $groups \\
         ${args}
+    gzip -n ${prefix}_${groups}_regroup.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
@@ -31,9 +37,9 @@ process HUMANN_DOWNLOADUNIREFDB {
     """
 
     stub:
-    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    mkdir uniref
+    touch ${prefix}_${groups}_regroup.tsv.gz
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
diff --git a/modules/local/humann/regroup/nextflow.config b/modules/local/humann/regroup/nextflow.config
index 6be75c4..ff58126 100644
--- a/modules/local/humann/regroup/nextflow.config
+++ b/modules/local/humann/regroup/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_REGROUP {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/regroup" },
+                mode: params.publish_dir_mode,
+                pattern: '*_regroup.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }
diff --git a/modules/local/humann/regroup/regroup.nf b/modules/local/humann/regroup/regroup.nf
deleted file mode 100644
index 8b70eab..0000000
--- a/modules/local/humann/regroup/regroup.nf
+++ /dev/null
@@ -1,91 +0,0 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
-process HUMANN_REGROUP {
-    tag "$meta.id"
-    label 'process_low'
-
-    // TODO nf-core: List required Conda package(s).
-    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
-        'biocontainers/humann:3.9--py312hdfd78af_0' }"
-
-    input:
-    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    tuple val(meta), path(bam)
-
-    output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    tuple val(meta), path("*.bam"), emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    path "versions.yml"           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
-    """
-    samtools \\
-        sort \\
-        $args \\
-        -@ $task.cpus \\
-        -o ${prefix}.bam \\
-        -T $prefix \\
-        $bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-
-    stub:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
-    """
-    touch ${prefix}.bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/humann/regroup/tests/main.nf.test b/modules/local/humann/regroup/tests/main.nf.test
index 4a0ba43..9dc218c 100644
--- a/modules/local/humann/regroup/tests/main.nf.test
+++ b/modules/local/humann/regroup/tests/main.nf.test
@@ -1,40 +1,61 @@
 nextflow_process {
 
-    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    name "Test Process HUMANN_REGROUP"
     script "../main.nf"
-    process "HUMANN_DOWNLOADUNIREFDB"
+    process "HUMANN_REGROUP"
     config "./nextflow.config"
 
     tag "modules"
     tag "modules_nfcore"
     tag "humann"
-    tag "humann/downloadunireflandb"
+    tag "humann/regroup"
 
-    test("DEMO_diamond") {
+    test("humann/regroup") {
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = "uniref90_rxn"
                 """
             }
         }
 
         then {
-            assertAll(
+            assertAll (
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    process.out.regroup
+                    process.out.versions
+                    ).match()
+                },
+                { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") }
             )
         }
     }
 
-    test("DEMO_diamond - stub") {
+    test("humann/regroup - stub") {
 
         options "-stub"
 
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = "uniref90_rxn"
                 """
             }
         }
diff --git a/modules/local/humann/regroup/tests/nextflow.config b/modules/local/humann/regroup/tests/nextflow.config
index 6be75c4..ff58126 100644
--- a/modules/local/humann/regroup/tests/nextflow.config
+++ b/modules/local/humann/regroup/tests/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_REGROUP {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/regroup" },
+                mode: params.publish_dir_mode,
+                pattern: '*_regroup.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }
diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf
index 58bc710..16d8dc2 100644
--- a/modules/local/humann/rename/main.nf
+++ b/modules/local/humann/rename/main.nf
@@ -1,5 +1,6 @@
-process HUMANN_DOWNLOADUNIREFDB {
-    label 'process_single'
+process HUMANN_RENAME {
+    tag "$meta.id"
+    label 'process_low'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -7,23 +8,28 @@ process HUMANN_DOWNLOADUNIREFDB {
         'biocontainers/humann:3.8--pyh7cba7a3_0' }"
 
     input:
-    val uniref_db_version
+    tuple val(meta), path(input)
+    val names
 
     output:
-    path("uniref")      , emit: uniref_db
-    path "versions.yml" , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
+    tuple val(meta), path("*_renamed.tsv.gz"), emit: renamed
+    path "versions.yml"                      , emit: versions
 
     script:
     def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    humann_databases \\
-        --download uniref \\
-        ${uniref_db_version} \\
-        . \\
+    if [[ $input == *.gz ]]; then
+        gunzip -c $input > input.tsv
+    else
+        mv $input input.tsv
+    fi
+    humann_rename_table \\
+        --input input.tsv \\
+        --output ${prefix}_${names}_rename.tsv \\
+        --names $names \\
         ${args}
+    gzip -n ${prefix}_${names}_rename.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
@@ -31,9 +37,9 @@ process HUMANN_DOWNLOADUNIREFDB {
     """
 
     stub:
-    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    mkdir uniref
+    touch ${prefix}_${names}_rename.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
diff --git a/modules/local/humann/rename/nextflow.config b/modules/local/humann/rename/nextflow.config
index 6be75c4..a990469 100644
--- a/modules/local/humann/rename/nextflow.config
+++ b/modules/local/humann/rename/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_RENAME {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/rename" },
+                mode: params.publish_dir_mode,
+                pattern: '*_renamed.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }
diff --git a/modules/local/humann/rename/rename.nf b/modules/local/humann/rename/rename.nf
deleted file mode 100644
index d33eea0..0000000
--- a/modules/local/humann/rename/rename.nf
+++ /dev/null
@@ -1,91 +0,0 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
-process HUMANN_RENAME {
-    tag "$meta.id"
-    label 'process_low'
-
-    // TODO nf-core: List required Conda package(s).
-    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
-        'biocontainers/humann:3.9--py312hdfd78af_0' }"
-
-    input:
-    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    tuple val(meta), path(bam)
-
-    output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    tuple val(meta), path("*.bam"), emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    path "versions.yml"           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
-    """
-    samtools \\
-        sort \\
-        $args \\
-        -@ $task.cpus \\
-        -o ${prefix}.bam \\
-        -T $prefix \\
-        $bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-
-    stub:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
-    """
-    touch ${prefix}.bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/humann/rename/tests/main.nf.test b/modules/local/humann/rename/tests/main.nf.test
index 4a0ba43..edee5b6 100644
--- a/modules/local/humann/rename/tests/main.nf.test
+++ b/modules/local/humann/rename/tests/main.nf.test
@@ -1,40 +1,61 @@
 nextflow_process {
 
-    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    name "Test Process HUMANN_RENAME"
     script "../main.nf"
-    process "HUMANN_DOWNLOADUNIREFDB"
+    process "HUMANN_RENAME"
     config "./nextflow.config"
 
     tag "modules"
     tag "modules_nfcore"
     tag "humann"
-    tag "humann/downloadunireflandb"
+    tag "humann/rename"
 
-    test("DEMO_diamond") {
+    test("humann/rename") {
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_rxn.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = "metacyc-rxn"
                 """
             }
         }
 
         then {
-            assertAll(
+            assertAll (
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    process.out.renamed
+                    process.out.versions
+                    ).match()
+                },
+                { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") }
             )
         }
     }
 
-    test("DEMO_diamond - stub") {
+    test("humann/rename - stub") {
 
         options "-stub"
 
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_rxn.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = "uniref90_rxn"
                 """
             }
         }
diff --git a/modules/local/humann/rename/tests/nextflow.config b/modules/local/humann/rename/tests/nextflow.config
index 6be75c4..a990469 100644
--- a/modules/local/humann/rename/tests/nextflow.config
+++ b/modules/local/humann/rename/tests/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_RENAME {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/rename" },
+                mode: params.publish_dir_mode,
+                pattern: '*_renamed.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }
diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf
index 58bc710..315bad5 100644
--- a/modules/local/humann/renorm/main.nf
+++ b/modules/local/humann/renorm/main.nf
@@ -1,5 +1,6 @@
-process HUMANN_DOWNLOADUNIREFDB {
-    label 'process_single'
+process HUMANN_RENORM {
+    tag "$meta.id"
+    label 'process_low'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -7,33 +8,38 @@ process HUMANN_DOWNLOADUNIREFDB {
         'biocontainers/humann:3.8--pyh7cba7a3_0' }"
 
     input:
-    val uniref_db_version
+    tuple val(meta), path(input)
+    val units
 
     output:
-    path("uniref")      , emit: uniref_db
-    path "versions.yml" , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
+    tuple val(meta), path("*_renorm.tsv.gz"), emit: renorm
+    path "versions.yml"                     , emit: versions
 
     script:
     def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    humann_databases \\
-        --download uniref \\
-        ${uniref_db_version} \\
-        . \\
+    if [[ $input == *.gz ]]; then
+        gunzip -c $input > input.tsv
+    else
+        mv $input input.tsv
+    fi
+    humann_renorm_table \\
+        --input input.tsv \\
+        --output ${prefix}_${units}_renorm.tsv \\
+        --units $units \\
+        --update-snames \\
         ${args}
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
-    END_VERSIONS
-    """
+    gzip -n ${prefix}_${units}_renorm.tsv
 
     stub:
     def args = task.ext.args ?: ''
     """
-    mkdir uniref
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_${units}_renorm.tsv.gz
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
diff --git a/modules/local/humann/renorm/nextflow.config b/modules/local/humann/renorm/nextflow.config
index 6be75c4..9298e72 100644
--- a/modules/local/humann/renorm/nextflow.config
+++ b/modules/local/humann/renorm/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_RENORM {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/renorm" },
+                mode: params.publish_dir_mode,
+                pattern: '*_renorm.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }
diff --git a/modules/local/humann/renorm/renorm.nf b/modules/local/humann/renorm/renorm.nf
deleted file mode 100644
index 393a62f..0000000
--- a/modules/local/humann/renorm/renorm.nf
+++ /dev/null
@@ -1,91 +0,0 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
-process HUMANN_RENORM {
-    tag "$meta.id"
-    label 'process_low'
-
-    // TODO nf-core: List required Conda package(s).
-    //               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-    //               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/humann:3.9--py312hdfd78af_0':
-        'biocontainers/humann:3.9--py312hdfd78af_0' }"
-
-    input:
-    // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    tuple val(meta), path(bam)
-
-    output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    tuple val(meta), path("*.bam"), emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    path "versions.yml"           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
-    """
-    samtools \\
-        sort \\
-        $args \\
-        -@ $task.cpus \\
-        -o ${prefix}.bam \\
-        -T $prefix \\
-        $bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-
-    stub:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
-    """
-    touch ${prefix}.bam
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        humann: \$(samtools --version |& sed '1!d ; s/samtools //')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/humann/renorm/tests/main.nf.test b/modules/local/humann/renorm/tests/main.nf.test
index 4a0ba43..2888606 100644
--- a/modules/local/humann/renorm/tests/main.nf.test
+++ b/modules/local/humann/renorm/tests/main.nf.test
@@ -1,40 +1,61 @@
 nextflow_process {
 
-    name "Test Process HUMANN_DOWNLOADUNIREFDB"
+    name "Test Process HUMANN_RENORM"
     script "../main.nf"
-    process "HUMANN_DOWNLOADUNIREFDB"
+    process "HUMANN_RENORM"
     config "./nextflow.config"
 
     tag "modules"
     tag "modules_nfcore"
     tag "humann"
-    tag "humann/downloadunireflandb"
+    tag "humann/renorm"
 
-    test("DEMO_diamond") {
+    test("humann/renorm") {
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = "cpm"
                 """
             }
         }
 
         then {
-            assertAll(
+            assertAll (
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    process.out.renorm
+                    process.out.versions
+                    ).match()
+                },
+                { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") }
             )
         }
     }
 
-    test("DEMO_diamond - stub") {
+    test("humann/renorm - stub") {
 
         options "-stub"
 
         when {
             process {
                 """
-                input[0] = "DEMO_diamond"
+                input[0] = Channel.of(
+                    [
+                        [id: 'test'],
+                        [
+                            file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = "cpm"
                 """
             }
         }
diff --git a/modules/local/humann/renorm/tests/nextflow.config b/modules/local/humann/renorm/tests/nextflow.config
index 6be75c4..9298e72 100644
--- a/modules/local/humann/renorm/tests/nextflow.config
+++ b/modules/local/humann/renorm/tests/nextflow.config
@@ -1,5 +1,12 @@
 process {
-    withName: HUMANN_DOWNLOADUNIREFDB {
-        ext.args = "--update-config no"
+    withName: HUMANN_RENORM {
+        publishDir = [
+            [
+                path: { "${params.outdir}/humann/renorm" },
+                mode: params.publish_dir_mode,
+                pattern: '*_renorm.tsv.gz'
+            ]
+        ]
+        ext.args = params.humann_options ? params.humann_options : ""
     }
 }

From b520116a169ef781894c52bc287040af6365af8a Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 17 Sep 2024 14:51:50 -0400
Subject: [PATCH 03/23] add draft humann subworkflow

---
 .../fastq_microbial_pathway_humann/main.nf    | 90 +++++++++++++++++++
 .../nextflow.config                           |  5 ++
 2 files changed, 95 insertions(+)
 create mode 100644 subworkflows/local/fastq_microbial_pathway_humann/main.nf

diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
new file mode 100644
index 0000000..554d37b
--- /dev/null
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -0,0 +1,90 @@
+//
+// SUBWORKFLOW: Identify gene families and pathways associated with reads using HUMAnN 3
+//
+
+include { HUMANN_DOWNLOADCHOCOPHLANDB      } from '../../../modules/local/humann/downloadchocophlandb/main'
+include { HUMANN_DOWNLOADUNIREFDB          } from '../../../modules/local/humann/downloadunirefdb/main'
+include { HUMANN_HUMANN                    } from '../../../modules/local/humann/humann/main'
+include { HUMANN_JOIN                      } from '../../../modules/local/humann/join/main'
+include { HUMANN_REGROUP                   } from '../../../modules/local/humann/regroup/main'
+include { HUMANN_RENAME                    } from '../../../modules/local/humann/rename/main'
+include { HUMANN_RENORM                    } from '../../../modules/local/humann/renorm/main'
+
+workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
+
+    take:
+    processed_reads_fastq_gz      // channel: [ val(meta), [ processed_reads_1.fastq.gz, processed_reads_2.fastq.gz ] ] (MANDATORY)
+    metaphlan_profile             // channel: [ val(meta2), metaphlan_profile.tsv ] (MANDATORY)
+    chocophlan_db                 // channel: [ chocophlan_db ] (OPTIONAL)
+    chochophlan_db_version        // value: '' (OPTIONAL)
+    uniref_db                     // channel: [ uniref_db ] (OPTIONAL)
+    uniref_db_version             // value: '' (OPTIONAL)
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    // if chocophlan_db exists, skip HUMANN_DOWNLOADCHOCOPHLANDB
+    if ( chocophlan_db ){
+        ch_chocophlan_db = chochophlan_db
+    } else {
+        //
+        // MODULE: Download ChocoPhlAn database
+        //
+        ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chochophlan_db_version ).chochophlan_db
+        ch_versions = ch_versions.mix(HUMANN_DOWNLOADCHOCOPHLANDB.out.versions)
+    }
+
+    // if uniref_db exists, skip HUMANN_DOWNLOADUNIREFDB
+    if ( uniref_db ){
+        ch_uniref_db = uniref_db
+    } else {
+        //
+        // MODULE: Download UniRef database
+        //
+        ch_uniref_db = HUMANN_DOWNLOADUNIREFDB ( uniref_db_version ).uniref_db
+        ch_versions = ch_versions.mix(HUMANN_DOWNLOADUNIREFDB.out.versions)
+    }
+
+    //
+    // MODULE: Run HUMAnN 3 for raw outputs
+    //
+    ch_humann_genefamilies_raw = HUMANN_HUMANN ( processed_reads_fastq_gz, metaphlan_profile, ch_chochophlan_db, ch_uniref_db ).genefamilies
+    ch_humann_pathabundance_raw = HUMANN_HUMANN.out.pathabundance
+    ch_humann_pathcoverage_raw = HUMANN_HUMANN.out.pathcoverage // TODO is this still right? looking at humann docs, might not get this file any longer?
+    ch_humann_logs = HUMANN_HUMANN.out.log
+    ch_versions = ch_versions.mix(HUMANN_HUMANN.out.versions)
+
+    // collect log files and store in a directory
+    ch_combined_humann_logs = ch_humann_logs
+        .map { [ [ id:'all_samples' ], it[1] ] }
+        .groupTuple( sort: 'deep' )
+
+    //
+    // MODULE: renormalize raw gene families from HUMAnN outputs to cpm
+    //
+    ch_humann_genefamilies_cpm = HUMANN_RENORM ( ch_humann_genefamilies_raw, 'cpm' ).renorm
+    ch_versions = ch_versions.mix(HUMANN_RENORM.out.versions)
+
+    //
+    // MODULE: regroup cpm gene families to EC numbers
+    //
+    ch_humann_ec = HUMANN_REGROUP( ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg
+    ch_versions = ch_versions.mix(HUMANN_REGROUP.out.versions)
+
+    //
+    // MODULE: rename ec number outputs to include descriptors
+    //
+    ch_humann_ec_renamed = HUMANN_RENAME (ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg
+    ch_versions = ch_versions.mix(HUMANN_RENAME.out.versions)
+
+    // TODO join all outputs as necessary, then update emit below
+    // TODO need to modify modules to return output dirs i suppose first, so they can be passed to join module
+
+    emit:
+    humann_genefamilies_cpm   = ch_humann_genefamilies_cpm        // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz  ] ]
+    humann_ec                 = ch_humann_ec_renamed              // channel: [ val(meta), read_counts.tsv ]
+    humann_pathabundance      = ch_humann_pathabundance_raw       // channel: [ val(meta), pathabundance.tsv ]
+    humann_pathcoverage       = ch_humann_pathcoverage_raw        // channel: [ val(meta), pathcoverage.tsv ]
+    versions                  = ch_versions                       // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config b/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config
index 1a8763f..dccc682 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config
+++ b/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config
@@ -1,2 +1,7 @@
 includeConfig '../../../modules/local/humann/downloadchocophlandb/nextflow.config'
 includeConfig '../../../modules/local/humann/downloadunirefdb/nextflow.config'
+includeConfig '../../../modules/local/humann/humann/nextflow.config'
+includeConfig '../../../modules/local/humann/join/nextflow.config'
+includeConfig '../../../modules/local/humann/regroup/nextflow.config'
+includeConfig '../../../modules/local/humann/rename/nextflow.config'
+includeConfig '../../../modules/local/humann/renorm/nextflow.config'

From ce04afe31df8141c82293b98462ff08a569b0af1 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 18 Sep 2024 13:07:36 -0400
Subject: [PATCH 04/23] fleshing out humann subworkflow some

---
 .../fastq_microbial_pathway_humann/main.nf    | 44 +++++++++++----
 .../tests/main.nf.test                        | 54 +++++++++++++++++++
 .../tests/tags.yml                            |  2 +
 3 files changed, 91 insertions(+), 9 deletions(-)
 create mode 100644 subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test
 create mode 100644 subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml

diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index 554d37b..6ad055d 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -5,7 +5,10 @@
 include { HUMANN_DOWNLOADCHOCOPHLANDB      } from '../../../modules/local/humann/downloadchocophlandb/main'
 include { HUMANN_DOWNLOADUNIREFDB          } from '../../../modules/local/humann/downloadunirefdb/main'
 include { HUMANN_HUMANN                    } from '../../../modules/local/humann/humann/main'
-include { HUMANN_JOIN                      } from '../../../modules/local/humann/join/main'
+include { HUMANN_JOIN as JOIN_GENES        } from '../../../modules/local/humann/join/main'
+include { HUMANN_JOIN as JOIN_PATHABUND    } from '../../../modules/local/humann/join/main'
+include { HUMANN_JOIN as JOIN_PATHCOV      } from '../../../modules/local/humann/join/main'
+include { HUMANN_JOIN as JOIN_EC           } from '../../../modules/local/humann/join/main'
 include { HUMANN_REGROUP                   } from '../../../modules/local/humann/regroup/main'
 include { HUMANN_RENAME                    } from '../../../modules/local/humann/rename/main'
 include { HUMANN_RENORM                    } from '../../../modules/local/humann/renorm/main'
@@ -69,22 +72,45 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     //
     // MODULE: regroup cpm gene families to EC numbers
     //
-    ch_humann_ec = HUMANN_REGROUP( ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg
+    ch_humann_ec = HUMANN_REGROUP(ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg
     ch_versions = ch_versions.mix(HUMANN_REGROUP.out.versions)
 
     //
     // MODULE: rename ec number outputs to include descriptors
     //
-    ch_humann_ec_renamed = HUMANN_RENAME (ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg
+    ch_humann_ec_renamed = HUMANN_RENAME(ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg
     ch_versions = ch_versions.mix(HUMANN_RENAME.out.versions)
 
-    // TODO join all outputs as necessary, then update emit below
-    // TODO need to modify modules to return output dirs i suppose first, so they can be passed to join module
+    //
+    // MODULE: join gene abundances across all samples into one file
+    //
+    // the paths should all be the same, so im taking the first.
+    // should probably be validated though, im just short of time..
+    ch_humann_genefamilies_cpm_path = ch_humann_genefamilies_cpm.map{ toCanonicalPath(it[1]) }.unique().take(1)
+    ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies')
+
+    //
+    // MODULE: join ec abundances across all samples into one file
+    //
+    ch_humann_ec_renamed_path = ch_humann_ec_renamed.map{ toCanonicalPath(it[1]) }.unique().take(1)
+    ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec') // TODO check the file name pattern
+
+    //
+    // MODULE: join pathway abundances across all samples into one file
+    //
+    ch_humann_pathabundance_path = ch_humann_pathabundance_raw.map{ toCanonicalPath(it[1]) }.unique().take(1)
+    ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance')
+
+    //
+    // MODULE: join pathway coverage across all samples into one file
+    //
+    ch_humann_pathcoverage_path = ch_humann_pathcoverage_raw.map{ toCanonicalPath(it[1]) }.unique().take(1)
+    ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage')
 
     emit:
-    humann_genefamilies_cpm   = ch_humann_genefamilies_cpm        // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz  ] ]
-    humann_ec                 = ch_humann_ec_renamed              // channel: [ val(meta), read_counts.tsv ]
-    humann_pathabundance      = ch_humann_pathabundance_raw       // channel: [ val(meta), pathabundance.tsv ]
-    humann_pathcoverage       = ch_humann_pathcoverage_raw        // channel: [ val(meta), pathcoverage.tsv ]
+    humann_genefamilies       = ch_humann_genefamilies_joined     // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz  ] ]
+    humann_ec                 = ch_humann_ec_joined               // channel: [ val(meta), read_counts.tsv ]
+    humann_pathabundance      = ch_humann_pathabundance_joined    // channel: [ val(meta), pathabundance.tsv ]
+    humann_pathcoverage       = ch_humann_pathcoverage_joined     // channel: [ val(meta), pathcoverage.tsv ]
     versions                  = ch_versions                       // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test b/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test
new file mode 100644
index 0000000..fb8d06d
--- /dev/null
+++ b/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test
@@ -0,0 +1,54 @@
+nextflow_workflow {
+
+    name "Test Subworkflow: FASTQ_MICROBIAL_PATHWAY_HUMANN"
+    script "../main.nf"
+    workflow "FASTQ_MICROBIAL_PATHWAY_HUMANN"
+
+    tag "subworkflows"
+    tag "subworkflows_local"
+    tag "fastq_microbial_pathway_humann"
+    tag "fastq_microbial_pathway_humann_default"
+
+
+    // TODO update inputs here, these are copied from the metaphlan subworkflow which obviously isnt what we actually need here
+    // the first should be processed fastq
+    // the second should be metaphlan profile output from that subworkflow
+    // the third is chocophlan db, either that or a chocophlan db version as fourth input should be provided (multiple tests)
+    // the fifth is uniref db, either that or a uniref db version as sixth input should be provided (multiple tests)
+
+    test("fastq.gz") {
+
+        when {
+            workflow {
+                """
+                input[0] = Channel.of(
+                    [
+                        [ id: 'test' ],
+                        [
+                            file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+                            file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true)
+                        ]
+                    ],
+                    [
+                        [ id: 'test2' ],
+                        [
+                            file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true),
+                            file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true)
+                        ]
+                    ]
+                )
+                input[1] = "https://github.com/biobakery/MetaPhlAn/raw/master/metaphlan/utils/mpa_vOct22_CHOCOPhlAnSGB_202212_SGB2GTDB.tsv"
+                input[2] = null
+                input[3] = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out).match() }
+            )
+        }
+    }
+}
diff --git a/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml b/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml
new file mode 100644
index 0000000..1467ae5
--- /dev/null
+++ b/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml
@@ -0,0 +1,2 @@
+fastq_microbial_pathway_humann_default:
+  - subworkflows/local/fastq_microbial_pathway_humann/**

From 06fa7df9a352f62e804b4ba51f8de731417ac80d Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 18 Sep 2024 13:36:04 -0400
Subject: [PATCH 05/23] draft adding humann subworkflow to workflow

---
 workflows/biobakerymgx/main.nf | 59 ++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/workflows/biobakerymgx/main.nf b/workflows/biobakerymgx/main.nf
index d045911..0bf91c4 100644
--- a/workflows/biobakerymgx/main.nf
+++ b/workflows/biobakerymgx/main.nf
@@ -9,6 +9,7 @@
 //
 include { FASTQ_READ_PREPROCESSING_KNEADDATA    } from '../../subworkflows/local/fastq_read_preprocessing_kneaddata/main'
 include { FASTQ_READ_TAXONOMY_METAPHLAN         } from '../../subworkflows/local/fastq_read_taxonomy_metaphlan/main'
+include { FASTQ_MICROBIAL_PATHWAY_HUMANN        } from '../../subworkflows/local/fastq_microbial_pathway_humann/main'
 
 
 /*
@@ -130,7 +131,7 @@ workflow BIOBAKERYMGX {
         Taxonomic classification: MetaPhlAn
     -----------------------------------------------------------------------------------*/
     if ( params.run_metaphlan ) {
-        // create channel from params.kneaddata_db
+        // create channel from params.metaphlan_db
         if ( !params.metaphlan_db ){
             ch_metaphlan_db = null
         } else {
@@ -146,13 +147,63 @@ workflow BIOBAKERYMGX {
         //
         // SUBWORKFLOW: MetaPhlAn
         //
-        ch_read_taxonomy_tsv = FASTQ_READ_TAXONOMY_METAPHLAN ( ch_preprocessed_fastq_gz, ch_metaphlan_sgb2gtbd_file, ch_metaphlan_db, params.metaphlan_db_version ).metaphlan_profiles_merged_tsv
+        ch_read_taxonomy_tsv = FASTQ_READ_TAXONOMY_METAPHLAN (
+            ch_preprocessed_fastq_gz,
+            ch_metaphlan_sgb2gtbd_file,
+            ch_metaphlan_db,
+            params.metaphlan_db_version
+        ).metaphlan_profiles_merged_tsv
         ch_versions = ch_versions.mix(FASTQ_READ_TAXONOMY_METAPHLAN.out.versions)
     } else {
         ch_read_taxonomy_tsv = Channel.empty()
     }
 
 
+    /*-----------------------------------------------------------------------------------
+        Functional classification: HUMAnN
+    -----------------------------------------------------------------------------------*/
+    if ( params.run_humann ) {
+        // create channel from params.chochophlan_db
+        if ( !params.chocophlan_db ) {
+            ch_chochophlan_db = null
+        } else {
+            ch_chochophlan_db = Channel.value( file( params.chochophlan_db, checkIfExists: true ) )
+        }
+
+        // create channel from params.uniref_db
+        if ( !params.uniref_db ) {
+            ch_uniref_db = null
+        } else {
+            ch_uniref_db = Channel.value( file( params.uniref_db, checkIfExists: true ) )
+        }
+
+        // theres probably a better way to handle this. but good enough for me for now..
+        if ( !params.run_metaphlan ) {
+            error "Error: run_humann is true but run_metaphlan is false. Cannot run HUMAnN without MetaPhlAn."
+        }
+
+        //
+        // SUBWORKFLOW: HUMAnN
+        //
+        // TODO double check the metaphlan output channel. not sure its the format i was expecting in the module
+        ch_genefamilies_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN(
+            ch_preprocessed_fastq_gz,
+            ch_read_taxonomy_tsv,
+            ch_chochophlan_db,
+            params.chochophlan_db_version,
+            ch_uniref_db,
+            params.uniref_db_version).humann_genefamilies
+        ch_ec_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_ec
+        ch_pathabundance_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_pathabundance
+        ch_pathcoverage_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_pathcoverage
+        ch_versions = ch_versions.mix(FASTQ_MICROBIAL_PATHWAY_HUMANN.out.versions)
+    } else {
+        ch_genefamilies_tsv = Channel.empty()
+        ch_ec_tsv = Channel.empty()
+        ch_pathabundance_tsv = Channel.empty()
+        ch_pathcoverage_tsv = Channel.empty()
+    }
+
 
     /*-----------------------------------------------------------------------------------
         Pipeline report utilities
@@ -192,6 +243,10 @@ workflow BIOBAKERYMGX {
     preprocessed_fastq_gz           = ch_preprocessed_fastq_gz
     preprocessed_read_counts_tsv    = ch_preprocessed_read_counts_tsv
     read_taxonomy_tsv               = ch_read_taxonomy_tsv
+    genefamilies_tsv                = ch_genefamilies_tsv
+    ec_tsv                          = ch_ec_tsv
+    pathabundance_tsv               = ch_pathabundance_tsv
+    pathcoverage_tsv                = ch_pathcoverage_tsv
     multiqc_report                  = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
     versions                        = ch_versions
 }

From 1de7d40ef77c83db20d59d7a70c4706ac885ef42 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 18 Sep 2024 13:38:24 -0400
Subject: [PATCH 06/23] update expected outputs in workflow test

---
 workflows/biobakerymgx/tests/main.nf.test | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/workflows/biobakerymgx/tests/main.nf.test b/workflows/biobakerymgx/tests/main.nf.test
index 163ca0b..bcf6e60 100644
--- a/workflows/biobakerymgx/tests/main.nf.test
+++ b/workflows/biobakerymgx/tests/main.nf.test
@@ -44,6 +44,10 @@ nextflow_workflow {
                     workflow.out.preprocessed_fastq_gz,
                     workflow.out.preprocessed_read_counts_tsv,
                     workflow.out.read_taxonomy_tsv,
+                    workflow.out.genefamilies_tsv,
+                    workflow.out.ec_tsv,
+                    workflow.out.pathabundance_tsv,
+                    workflow.out.pathcoverage_tsv,
                     workflow.out.versions
                     ).match()
                 },

From 69007fac1787cc4369b14dd246ec94daba80a9c8 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 25 Sep 2024 14:55:01 -0400
Subject: [PATCH 07/23] dont keep compressing and uncompressing intermediate
 files

---
 modules/local/humann/join/main.nf    |  8 ++------
 modules/local/humann/regroup/main.nf | 10 ++--------
 modules/local/humann/rename/main.nf  |  8 +-------
 modules/local/humann/renorm/main.nf  | 11 ++---------
 4 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf
index 47c2f66..df06e16 100644
--- a/modules/local/humann/join/main.nf
+++ b/modules/local/humann/join/main.nf
@@ -11,21 +11,17 @@ process HUMANN_JOIN {
     val file_name_pattern
 
     output:
-    path("*_joined.tsv.gz"), emit: joined
+    path("*_joined.tsv")   , emit: joined
     path "versions.yml"    , emit: versions
 
     script:
     def args = task.ext.args ?: ''
     """
-    if compgen -G "$input_dir/*$file_name_pattern*.gz" > /dev/null; then
-        find $input_dir \( -name '*$file_name_pattern*' \) -exec gunzip --verbose {} \;
-    fi
     humann_join_table \\
         --input $input_dir \\
         --output ${file_name_pattern}_joined.tsv \\
         --file_name $file_name_pattern \\
         ${args}
-    gzip -n ${file_name_pattern}_joined.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
@@ -34,7 +30,7 @@ process HUMANN_JOIN {
 
     stub:
     """
-    touch ${file_name_pattern}_joined.tsv.gz
+    touch ${file_name_pattern}_joined.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf
index 37c2cbc..9e87bfa 100644
--- a/modules/local/humann/regroup/main.nf
+++ b/modules/local/humann/regroup/main.nf
@@ -12,24 +12,18 @@ process HUMANN_REGROUP {
     val groups
 
     output:
-    tuple val(meta), path("*_regroup.tsv.gz"), emit: regroup
+    tuple val(meta), path("*_regroup.tsv")   , emit: regroup
     path "versions.yml"                      , emit: versions
 
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    if [[ $input == *.gz ]]; then
-        gunzip -c $input > input.tsv
-    else
-        mv $input input.tsv
-    fi
     humann_regroup_table \\
         --input input.tsv \\
         --output ${prefix}_${groups}_regroup.tsv \\
         --groups $groups \\
         ${args}
-    gzip -n ${prefix}_${groups}_regroup.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
@@ -39,7 +33,7 @@ process HUMANN_REGROUP {
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}_${groups}_regroup.tsv.gz
+    touch ${prefix}_${groups}_regroup.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf
index 16d8dc2..b6922fa 100644
--- a/modules/local/humann/rename/main.nf
+++ b/modules/local/humann/rename/main.nf
@@ -12,24 +12,18 @@ process HUMANN_RENAME {
     val names
 
     output:
-    tuple val(meta), path("*_renamed.tsv.gz"), emit: renamed
+    tuple val(meta), path("*_renamed.tsv")   , emit: renamed
     path "versions.yml"                      , emit: versions
 
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    if [[ $input == *.gz ]]; then
-        gunzip -c $input > input.tsv
-    else
-        mv $input input.tsv
-    fi
     humann_rename_table \\
         --input input.tsv \\
         --output ${prefix}_${names}_rename.tsv \\
         --names $names \\
         ${args}
-    gzip -n ${prefix}_${names}_rename.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf
index 315bad5..69e390a 100644
--- a/modules/local/humann/renorm/main.nf
+++ b/modules/local/humann/renorm/main.nf
@@ -12,26 +12,19 @@ process HUMANN_RENORM {
     val units
 
     output:
-    tuple val(meta), path("*_renorm.tsv.gz"), emit: renorm
+    tuple val(meta), path("*_renorm.tsv")   , emit: renorm
     path "versions.yml"                     , emit: versions
 
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    if [[ $input == *.gz ]]; then
-        gunzip -c $input > input.tsv
-    else
-        mv $input input.tsv
-    fi
     humann_renorm_table \\
         --input input.tsv \\
         --output ${prefix}_${units}_renorm.tsv \\
         --units $units \\
         --update-snames \\
         ${args}
-    gzip -n ${prefix}_${units}_renorm.tsv
-
     stub:
     def args = task.ext.args ?: ''
     """
@@ -39,7 +32,7 @@ process HUMANN_RENORM {
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}_${units}_renorm.tsv.gz
+    touch ${prefix}_${units}_renorm.tsv
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))

From 4237a24ddef6fd80f55b4a35eac81eac0f8ee222 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 25 Sep 2024 15:00:08 -0400
Subject: [PATCH 08/23] remove unneeded first operator on value channels

---
 subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf | 2 +-
 subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf b/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf
index d7b18b0..4b1aac7 100644
--- a/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf
+++ b/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf
@@ -31,7 +31,7 @@ workflow FASTQ_READ_PREPROCESSING_KNEADDATA {
     //
     // MODULE: Trim and remove human reads
     //
-    ch_preprocessed_reads_fastq_gz = KNEADDATA_KNEADDATA ( raw_reads_fastq_gz, ch_kneaddata_db.first() ).preprocessed_reads
+    ch_preprocessed_reads_fastq_gz = KNEADDATA_KNEADDATA ( raw_reads_fastq_gz, ch_kneaddata_db ).preprocessed_reads
     ch_kneaddata_logs = KNEADDATA_KNEADDATA.out.kneaddata_log
     ch_versions = ch_versions.mix(KNEADDATA_KNEADDATA.out.versions)
 
diff --git a/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf b/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf
index a1205f0..1ca8ab5 100644
--- a/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf
+++ b/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf
@@ -33,7 +33,7 @@ workflow FASTQ_READ_TAXONOMY_METAPHLAN {
     //
     // MODULE: Trim and remove human reads
     //
-    ch_metaphlan_profile_txt = METAPHLAN_METAPHLAN ( preprocessed_reads_fastq_gz, ch_metaphlan_db.first() ).profile
+    ch_metaphlan_profile_txt = METAPHLAN_METAPHLAN ( preprocessed_reads_fastq_gz, ch_metaphlan_db ).profile
     ch_versions = ch_versions.mix(METAPHLAN_METAPHLAN.out.versions)
 
     //

From b160d8fa8bc0f95788ddb941676fb9618781a6d0 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Wed, 25 Sep 2024 15:04:30 -0400
Subject: [PATCH 09/23] run_humann true for test

---
 tests/main.nf.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/main.nf.test b/tests/main.nf.test
index 4e471eb..3d00bbc 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -14,6 +14,7 @@ nextflow_pipeline {
                 kneaddata_db_version    = 'human_genome'
                 run_metaphlan           = true
                 metaphlan_db_version    = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103'
+                run_humann              = true
             }
         }
 

From 3fc30ece52e1ba910b9dce92607ee19011ef459f Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Thu, 26 Sep 2024 12:57:55 -0400
Subject: [PATCH 10/23] updating some config

---
 conf/test.config     | 3 +++
 nextflow.config      | 4 +++-
 nextflow_schema.json | 8 ++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/conf/test.config b/conf/test.config
index a4e5f05..4da4045 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -26,5 +26,8 @@ params {
     kneaddata_db_version    = 'human_genome'
     run_metaphlan           = true
     metaphlan_db_version    = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103'
+    run_humann              = true
+    chocophlan_db_version   = 'DEMO'
+    uniref_db_version       = 'DEMO_diamond'
 
 }
diff --git a/nextflow.config b/nextflow.config
index 0c0b78d..da0de55 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -31,8 +31,10 @@ params {
     metaphlan_sgb2gtbd_file     = "https://github.com/biobakery/MetaPhlAn/raw/master/metaphlan/utils/mpa_vOct22_CHOCOPhlAnSGB_202212_SGB2GTDB.tsv"
 
     // HUMAnN options
-    run_humann                 = false
+    run_humann                  = false
+    chocophlan_db               = null
     chocophlan_db_version       ='DEMO'
+    uniref_db                   = null
     uniref_db_version           ='DEMO_diamond'
     utility_mapping_version     ='DEMO'
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 3c1e17c..c6e2503 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -124,11 +124,19 @@
                     "type": "boolean",
                     "description": "Run HUMAnN to assess functional capacity of a metagenome?"
                 },
+                "chocophlan_db": {
+                    "type": "string",
+                    "description": "Path to pre-downloaded ChocoPhlAn database"
+                },
                 "chocophlan_db_version": {
                     "type": "string",
                     "default": "DEMO",
                     "description": "ChocoPhlAn database version to download"
                 },
+                "uniref_db": {
+                    "type": "string",
+                    "description": "Path to pre-downloaded UNIREF database"
+                },
                 "uniref_db_version": {
                     "type": "string",
                     "default": "DEMO_diamond",

From 91d7df59784688686408764599634212e81a3013 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Thu, 26 Sep 2024 14:23:07 -0400
Subject: [PATCH 11/23] some chocophlan misspelling

---
 nextflow.config                                        |  6 +++---
 .../local/fastq_microbial_pathway_humann/main.nf       |  6 +++---
 tests/main.nf.test                                     |  2 ++
 workflows/biobakerymgx/main.nf                         | 10 +++++-----
 workflows/biobakerymgx/tests/main.nf.test              |  2 ++
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index da0de55..1128606 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -33,10 +33,10 @@ params {
     // HUMAnN options
     run_humann                  = false
     chocophlan_db               = null
-    chocophlan_db_version       ='DEMO'
+    chocophlan_db_version       = 'DEMO'
     uniref_db                   = null
-    uniref_db_version           ='DEMO_diamond'
-    utility_mapping_version     ='DEMO'
+    uniref_db_version           = 'DEMO_diamond'
+    utility_mapping_version     = 'DEMO'
 
     // MultiQC options
     multiqc_config             = null
diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index 6ad055d..8174151 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -19,7 +19,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     processed_reads_fastq_gz      // channel: [ val(meta), [ processed_reads_1.fastq.gz, processed_reads_2.fastq.gz ] ] (MANDATORY)
     metaphlan_profile             // channel: [ val(meta2), metaphlan_profile.tsv ] (MANDATORY)
     chocophlan_db                 // channel: [ chocophlan_db ] (OPTIONAL)
-    chochophlan_db_version        // value: '' (OPTIONAL)
+    chocophlan_db_version        // value: '' (OPTIONAL)
     uniref_db                     // channel: [ uniref_db ] (OPTIONAL)
     uniref_db_version             // value: '' (OPTIONAL)
 
@@ -29,12 +29,12 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
 
     // if chocophlan_db exists, skip HUMANN_DOWNLOADCHOCOPHLANDB
     if ( chocophlan_db ){
-        ch_chocophlan_db = chochophlan_db
+        ch_chocophlan_db = chocophlan_db
     } else {
         //
         // MODULE: Download ChocoPhlAn database
         //
-        ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chochophlan_db_version ).chochophlan_db
+        ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chocophlan_db_version ).chochophlan_db
         ch_versions = ch_versions.mix(HUMANN_DOWNLOADCHOCOPHLANDB.out.versions)
     }
 
diff --git a/tests/main.nf.test b/tests/main.nf.test
index 3d00bbc..6583270 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -15,6 +15,8 @@ nextflow_pipeline {
                 run_metaphlan           = true
                 metaphlan_db_version    = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103'
                 run_humann              = true
+                chocophlan_db_version   = 'DEMO'
+                uniref_db_version       = 'DEMO_diamond'
             }
         }
 
diff --git a/workflows/biobakerymgx/main.nf b/workflows/biobakerymgx/main.nf
index 0bf91c4..6368515 100644
--- a/workflows/biobakerymgx/main.nf
+++ b/workflows/biobakerymgx/main.nf
@@ -163,11 +163,11 @@ workflow BIOBAKERYMGX {
         Functional classification: HUMAnN
     -----------------------------------------------------------------------------------*/
     if ( params.run_humann ) {
-        // create channel from params.chochophlan_db
+        // create channel from params.chocophlan_db
         if ( !params.chocophlan_db ) {
-            ch_chochophlan_db = null
+            ch_chocophlan_db = null
         } else {
-            ch_chochophlan_db = Channel.value( file( params.chochophlan_db, checkIfExists: true ) )
+            ch_chocophlan_db = Channel.value( file( params.chocophlan_db, checkIfExists: true ) )
         }
 
         // create channel from params.uniref_db
@@ -189,8 +189,8 @@ workflow BIOBAKERYMGX {
         ch_genefamilies_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN(
             ch_preprocessed_fastq_gz,
             ch_read_taxonomy_tsv,
-            ch_chochophlan_db,
-            params.chochophlan_db_version,
+            ch_chocophlan_db,
+            params.chocophlan_db_version,
             ch_uniref_db,
             params.uniref_db_version).humann_genefamilies
         ch_ec_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_ec
diff --git a/workflows/biobakerymgx/tests/main.nf.test b/workflows/biobakerymgx/tests/main.nf.test
index bcf6e60..8faae8e 100644
--- a/workflows/biobakerymgx/tests/main.nf.test
+++ b/workflows/biobakerymgx/tests/main.nf.test
@@ -33,6 +33,8 @@ nextflow_workflow {
                 outdir                  = "$outputDir"
                 kneaddata_db_version    = 'human_genome'
                 metaphlan_db_version    = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103'
+                chocophlan_db_version   = 'DEMO'
+                uniref_db_version       = 'DEMO_diamond'
                 max_cpus                = 1
             }
         }

From 88bb41bf39a030bea9a3bad1a5a7629912f9b392 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Thu, 26 Sep 2024 14:30:38 -0400
Subject: [PATCH 12/23] omg more chocophlan typos

---
 modules/local/humann/humann/tests/main.nf.test            | 2 +-
 subworkflows/local/fastq_microbial_pathway_humann/main.nf | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/humann/humann/tests/main.nf.test b/modules/local/humann/humann/tests/main.nf.test
index aa5b71a..cae1a75 100644
--- a/modules/local/humann/humann/tests/main.nf.test
+++ b/modules/local/humann/humann/tests/main.nf.test
@@ -15,7 +15,7 @@ nextflow_process {
             script "../../downloadchocophlandb/main.nf"
             process {
                 """
-                input[0] = 'TODO - some chochophlan db version - or maybe skip this setup and build in a mini test db'
+                input[0] = 'TODO - some chocophlan db version - or maybe skip this setup and build in a mini test db'
                 """
             }
         }
diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index 8174151..2bbbb99 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -34,7 +34,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
         //
         // MODULE: Download ChocoPhlAn database
         //
-        ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chocophlan_db_version ).chochophlan_db
+        ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chocophlan_db_version ).chocophlan_db
         ch_versions = ch_versions.mix(HUMANN_DOWNLOADCHOCOPHLANDB.out.versions)
     }
 
@@ -52,7 +52,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     //
     // MODULE: Run HUMAnN 3 for raw outputs
     //
-    ch_humann_genefamilies_raw = HUMANN_HUMANN ( processed_reads_fastq_gz, metaphlan_profile, ch_chochophlan_db, ch_uniref_db ).genefamilies
+    ch_humann_genefamilies_raw = HUMANN_HUMANN ( processed_reads_fastq_gz, metaphlan_profile, ch_chocophlan_db, ch_uniref_db ).genefamilies
     ch_humann_pathabundance_raw = HUMANN_HUMANN.out.pathabundance
     ch_humann_pathcoverage_raw = HUMANN_HUMANN.out.pathcoverage // TODO is this still right? looking at humann docs, might not get this file any longer?
     ch_humann_logs = HUMANN_HUMANN.out.log

From 80df04d8ede51e218d0a962c2211ce8a88859931 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Thu, 26 Sep 2024 14:43:20 -0400
Subject: [PATCH 13/23] fix emit for humann subworkflow

---
 .../local/fastq_microbial_pathway_humann/main.nf     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index 2bbbb99..ea1b840 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -78,7 +78,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     //
     // MODULE: rename ec number outputs to include descriptors
     //
-    ch_humann_ec_renamed = HUMANN_RENAME(ch_humann_ec, 'ec').rename // TODO make sure 'ec' is valid arg
+    ch_humann_ec_renamed = HUMANN_RENAME(ch_humann_ec, 'ec').renamed // TODO make sure 'ec' is valid arg
     ch_versions = ch_versions.mix(HUMANN_RENAME.out.versions)
 
     //
@@ -87,28 +87,28 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     // the paths should all be the same, so im taking the first.
     // should probably be validated though, im just short of time..
     ch_humann_genefamilies_cpm_path = ch_humann_genefamilies_cpm.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies')
+    ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies').joined
 
     //
     // MODULE: join ec abundances across all samples into one file
     //
     ch_humann_ec_renamed_path = ch_humann_ec_renamed.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec') // TODO check the file name pattern
+    ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec').joined // TODO check the file name pattern
 
     //
     // MODULE: join pathway abundances across all samples into one file
     //
     ch_humann_pathabundance_path = ch_humann_pathabundance_raw.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance')
+    ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance').joined
 
     //
     // MODULE: join pathway coverage across all samples into one file
     //
     ch_humann_pathcoverage_path = ch_humann_pathcoverage_raw.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage')
+    ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage').joined
 
     emit:
-    humann_genefamilies       = ch_humann_genefamilies_joined     // channel: [ val(meta), [ reads_1.fastq.gz, reads_2.fastq.gz  ] ]
+    humann_genefamilies       = ch_humann_genefamilies_joined     // channel: [ val(meta), genefamilies.tsv ]
     humann_ec                 = ch_humann_ec_joined               // channel: [ val(meta), read_counts.tsv ]
     humann_pathabundance      = ch_humann_pathabundance_joined    // channel: [ val(meta), pathabundance.tsv ]
     humann_pathcoverage       = ch_humann_pathcoverage_joined     // channel: [ val(meta), pathcoverage.tsv ]

From cb338e8298878750c129d29da30eb2252764efcd Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Mon, 30 Sep 2024 14:02:10 -0400
Subject: [PATCH 14/23] handle better paired fastq for humann

---
 modules/local/humann/humann/main.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/local/humann/humann/main.nf b/modules/local/humann/humann/main.nf
index c7a0c78..5ba7c21 100644
--- a/modules/local/humann/humann/main.nf
+++ b/modules/local/humann/humann/main.nf
@@ -26,9 +26,10 @@ process HUMANN_HUMANN {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
+    def input_data  = !meta.single_end ? "--input ${reads[0]} --input ${reads[1]}" : "--input $reads"
     """
     humann \\
-        --input ${reads} \\
+        ${input_data} \\
         --output ./ \\
         --threads ${task.cpus} \\
         --taxonomic-profile ${metaphlan_profile} \\

From f58934ea3b9365c2d651abb1013c7a804839efec Mon Sep 17 00:00:00 2001
From: Danielle Callan <dmleake92@gmail.com>
Date: Tue, 1 Oct 2024 11:02:17 -0400
Subject: [PATCH 15/23] change test version of metaphlan db

---
 conf/test.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test.config b/conf/test.config
index 4da4045..9bbc08a 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -25,7 +25,7 @@ params {
     run_kneaddata           = true
     kneaddata_db_version    = 'human_genome'
     run_metaphlan           = true
-    metaphlan_db_version    = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103'
+    metaphlan_db_version    = 'mpa_vOct22_CHOCOPhlAnSGB_202212'
     run_humann              = true
     chocophlan_db_version   = 'DEMO'
     uniref_db_version       = 'DEMO_diamond'

From 4e3e6159691b578ed85edb1c60fbbe15ba7dce12 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 1 Oct 2024 12:47:43 -0400
Subject: [PATCH 16/23] update humann/join module to use current work dir as
 input

---
 modules/local/humann/join/main.nf                  |  3 +--
 modules/local/humann/join/tests/main.nf.test       |  6 ++----
 .../local/fastq_microbial_pathway_humann/main.nf   | 14 ++++----------
 3 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf
index df06e16..3213e30 100644
--- a/modules/local/humann/join/main.nf
+++ b/modules/local/humann/join/main.nf
@@ -7,7 +7,6 @@ process HUMANN_JOIN {
         'biocontainers/humann:3.8--pyh7cba7a3_0' }"
 
     input:
-    path(input_dir)
     val file_name_pattern
 
     output:
@@ -18,7 +17,7 @@ process HUMANN_JOIN {
     def args = task.ext.args ?: ''
     """
     humann_join_table \\
-        --input $input_dir \\
+        --input . \\
         --output ${file_name_pattern}_joined.tsv \\
         --file_name $file_name_pattern \\
         ${args}
diff --git a/modules/local/humann/join/tests/main.nf.test b/modules/local/humann/join/tests/main.nf.test
index 3e748ea..474626f 100644
--- a/modules/local/humann/join/tests/main.nf.test
+++ b/modules/local/humann/join/tests/main.nf.test
@@ -14,8 +14,7 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = TODO - first is a path to a dir w files matching the below pattern
-                input[1] = "genefamilies"
+                input[0] = "genefamilies"
                 """
             }
         }
@@ -40,8 +39,7 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = TODO - first is a path to a dir w files matching the below pattern
-                input[1] = "genefamilies"
+                input[0] = "genefamilies"
                 """
             }
         }
diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index ea1b840..d8a6c96 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -84,28 +84,22 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     //
     // MODULE: join gene abundances across all samples into one file
     //
-    // the paths should all be the same, so im taking the first.
-    // should probably be validated though, im just short of time..
-    ch_humann_genefamilies_cpm_path = ch_humann_genefamilies_cpm.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm_path, 'genefamilies').joined
+    ch_humann_genefamilies_joined = JOIN_GENES('genefamilies').joined
 
     //
     // MODULE: join ec abundances across all samples into one file
     //
-    ch_humann_ec_renamed_path = ch_humann_ec_renamed.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed_path, 'ec').joined // TODO check the file name pattern
+    ch_humann_ec_joined = JOIN_EC('ec').joined // TODO check the file name pattern
 
     //
     // MODULE: join pathway abundances across all samples into one file
     //
-    ch_humann_pathabundance_path = ch_humann_pathabundance_raw.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_path, 'pathabundance').joined
+    ch_humann_pathabundance_joined = JOIN_PATHABUND('pathabundance').joined
 
     //
     // MODULE: join pathway coverage across all samples into one file
     //
-    ch_humann_pathcoverage_path = ch_humann_pathcoverage_raw.map{ toCanonicalPath(it[1]) }.unique().take(1)
-    ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_path, 'pathcoverage').joined
+    ch_humann_pathcoverage_joined = JOIN_PATHCOV('pathcoverage').joined
 
     emit:
     humann_genefamilies       = ch_humann_genefamilies_joined     // channel: [ val(meta), genefamilies.tsv ]

From 9eb971edf0da5cfa474806d728ba2ba75e4693d1 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dmleake92@gmail.com>
Date: Tue, 1 Oct 2024 12:48:25 -0400
Subject: [PATCH 17/23] increase test resources

---
 conf/test.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 9bbc08a..5bcf4c0 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -15,8 +15,8 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
+    max_cpus   = 4
+    max_memory = '16.GB'
     max_time   = '6.h'
 
     // Input data

From 79fb1910f4c479c9669ae6dc9e9781ad5e21890d Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 1 Oct 2024 12:55:23 -0400
Subject: [PATCH 18/23] typo in humann module inputs

---
 modules/local/humann/regroup/main.nf | 2 +-
 modules/local/humann/rename/main.nf  | 2 +-
 modules/local/humann/renorm/main.nf  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf
index 9e87bfa..f2fe5dc 100644
--- a/modules/local/humann/regroup/main.nf
+++ b/modules/local/humann/regroup/main.nf
@@ -20,7 +20,7 @@ process HUMANN_REGROUP {
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     humann_regroup_table \\
-        --input input.tsv \\
+        --input $input \\
         --output ${prefix}_${groups}_regroup.tsv \\
         --groups $groups \\
         ${args}
diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf
index b6922fa..f3c320f 100644
--- a/modules/local/humann/rename/main.nf
+++ b/modules/local/humann/rename/main.nf
@@ -20,7 +20,7 @@ process HUMANN_RENAME {
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     humann_rename_table \\
-        --input input.tsv \\
+        --input $input \\
         --output ${prefix}_${names}_rename.tsv \\
         --names $names \\
         ${args}
diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf
index 69e390a..ec3f4f2 100644
--- a/modules/local/humann/renorm/main.nf
+++ b/modules/local/humann/renorm/main.nf
@@ -20,7 +20,7 @@ process HUMANN_RENORM {
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     humann_renorm_table \\
-        --input input.tsv \\
+        --input $input \\
         --output ${prefix}_${units}_renorm.tsv \\
         --units $units \\
         --update-snames \\

From 2b1cd7b95455cd58b34c784d3bbf78295a701cd1 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 1 Oct 2024 13:02:41 -0400
Subject: [PATCH 19/23] silly

---
 modules/local/humann/join/main.nf                         | 1 +
 subworkflows/local/fastq_microbial_pathway_humann/main.nf | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf
index 3213e30..a3e4a2b 100644
--- a/modules/local/humann/join/main.nf
+++ b/modules/local/humann/join/main.nf
@@ -7,6 +7,7 @@ process HUMANN_JOIN {
         'biocontainers/humann:3.8--pyh7cba7a3_0' }"
 
     input:
+    path(input)
     val file_name_pattern
 
     output:
diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index d8a6c96..aec4a17 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -84,22 +84,22 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     //
     // MODULE: join gene abundances across all samples into one file
     //
-    ch_humann_genefamilies_joined = JOIN_GENES('genefamilies').joined
+    ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm, 'genefamilies').joined
 
     //
     // MODULE: join ec abundances across all samples into one file
     //
-    ch_humann_ec_joined = JOIN_EC('ec').joined // TODO check the file name pattern
+    ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed, 'ec').joined // TODO check the file name pattern
 
     //
     // MODULE: join pathway abundances across all samples into one file
     //
-    ch_humann_pathabundance_joined = JOIN_PATHABUND('pathabundance').joined
+    ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_raw, 'pathabundance').joined
 
     //
     // MODULE: join pathway coverage across all samples into one file
     //
-    ch_humann_pathcoverage_joined = JOIN_PATHCOV('pathcoverage').joined
+    ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_raw, 'pathcoverage').joined
 
     emit:
     humann_genefamilies       = ch_humann_genefamilies_joined     // channel: [ val(meta), genefamilies.tsv ]

From ca95d106616fc3c1e129ab397cf9fd300ed765dd Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 1 Oct 2024 13:07:26 -0400
Subject: [PATCH 20/23] forgot meta in humann/join module

---
 modules/local/humann/join/main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf
index a3e4a2b..17f6027 100644
--- a/modules/local/humann/join/main.nf
+++ b/modules/local/humann/join/main.nf
@@ -7,7 +7,7 @@ process HUMANN_JOIN {
         'biocontainers/humann:3.8--pyh7cba7a3_0' }"
 
     input:
-    path(input)
+    tuple val(meta), path(input)
     val file_name_pattern
 
     output:
@@ -17,7 +17,7 @@ process HUMANN_JOIN {
     script:
     def args = task.ext.args ?: ''
     """
-    humann_join_table \\
+    humann_join_tables \\
         --input . \\
         --output ${file_name_pattern}_joined.tsv \\
         --file_name $file_name_pattern \\

From c5823ad3dc2eba6c0ca03f11989864a4d62ed437 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 1 Oct 2024 13:11:55 -0400
Subject: [PATCH 21/23] typo in humann/renorm

---
 modules/local/humann/renorm/main.nf | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf
index ec3f4f2..b5a4c5f 100644
--- a/modules/local/humann/renorm/main.nf
+++ b/modules/local/humann/renorm/main.nf
@@ -25,8 +25,10 @@ process HUMANN_RENORM {
         --units $units \\
         --update-snames \\
         ${args}
-    stub:
-    def args = task.ext.args ?: ''
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
+    END_VERSIONS
     """
 
     stub:

From c3e9f6d981779bdf67b1c53673395e1e5e23584b Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 1 Oct 2024 13:17:02 -0400
Subject: [PATCH 22/23] fix humann_regroup to rxn

---
 subworkflows/local/fastq_microbial_pathway_humann/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index aec4a17..255da3c 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -72,7 +72,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     //
     // MODULE: regroup cpm gene families to EC numbers
     //
-    ch_humann_ec = HUMANN_REGROUP(ch_humann_genefamilies_cpm, 'ec').regroup // TODO make sure 'ec' is still valid arg
+    ch_humann_ec = HUMANN_REGROUP(ch_humann_genefamilies_cpm, 'uniref90_rxn').regroup
     ch_versions = ch_versions.mix(HUMANN_REGROUP.out.versions)
 
     //

From c15761bc56d06d67bcb2c982d785b43a3effd2d9 Mon Sep 17 00:00:00 2001
From: Danielle Callan <dcallan@upenn.edu>
Date: Tue, 1 Oct 2024 13:19:07 -0400
Subject: [PATCH 23/23] fix join_genes file name pattern

---
 subworkflows/local/fastq_microbial_pathway_humann/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
index 255da3c..8b82438 100644
--- a/subworkflows/local/fastq_microbial_pathway_humann/main.nf
+++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf
@@ -84,7 +84,7 @@ workflow FASTQ_MICROBIAL_PATHWAY_HUMANN {
     //
     // MODULE: join gene abundances across all samples into one file
     //
-    ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm, 'genefamilies').joined
+    ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm, 'cpm').joined
 
     //
     // MODULE: join ec abundances across all samples into one file