From f9d01cf1f179705d45ec0c0df670a95d295c00f9 Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 18:29:16 +0100 Subject: [PATCH 01/10] Remove h5ad_conversion indent --- subworkflows/local/h5ad_conversion.nf | 64 +++++++++++++-------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/subworkflows/local/h5ad_conversion.nf b/subworkflows/local/h5ad_conversion.nf index f832a7cf..d7b18337 100644 --- a/subworkflows/local/h5ad_conversion.nf +++ b/subworkflows/local/h5ad_conversion.nf @@ -9,40 +9,40 @@ workflow H5AD_CONVERSION { samplesheet main: - ch_versions = Channel.empty() - - // - // Concat sample-specific h5ad in one - // - ch_concat_h5ad_input = ch_h5ads.groupTuple() // gather all sample-specific files / per type - if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { - // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) - // which nextflow break because it is not a valid 'path' thus, we have to remove one level - // making it as [ mtx_1, mtx_2 ] - ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } - } - - CONCAT_H5AD ( - ch_concat_h5ad_input, - samplesheet - ) - ch_h5ad_concat = CONCAT_H5AD.out.h5ad.map{ meta, file -> - def meta_clone = meta.clone() - meta_clone.id = 'combined' // maintain output prefix - [ meta_clone, file ] - } - ch_versions = ch_versions.mix(CONCAT_H5AD.out.versions.first()) - - // - // MODULE: Convert to Rds with AnndataR package - // - ANNDATAR_CONVERT ( - ch_h5ads.mix( ch_h5ad_concat ) - ) - ch_versions = ch_versions.mix(ANNDATAR_CONVERT.out.versions.first()) + + ch_versions = Channel.empty() + + // + // Concat sample-specific h5ad in one + // + ch_concat_h5ad_input = ch_h5ads.groupTuple() // gather all sample-specific files / per type + if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { + // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) + // which nextflow break because it is not a valid 'path' thus, we have to remove one level + // making it as [ mtx_1, mtx_2 ] + ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } + } + + CONCAT_H5AD ( + ch_concat_h5ad_input, + samplesheet + ) + ch_h5ad_concat = CONCAT_H5AD.out.h5ad.map{ meta, file -> + def meta_clone = meta.clone() + meta_clone.id = 'combined' // maintain output prefix + [ meta_clone, file ] + } + ch_versions = ch_versions.mix(CONCAT_H5AD.out.versions.first()) + + // + // MODULE: Convert to Rds with AnndataR package + // + ANNDATAR_CONVERT ( + ch_h5ads.mix( ch_h5ad_concat ) + ) + ch_versions = ch_versions.mix(ANNDATAR_CONVERT.out.versions.first()) emit: ch_versions h5ads = ch_h5ads - } From 82dc4620ae89b97d483fae541b3b0faa3e7145f7 Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 18:38:15 +0100 Subject: [PATCH 02/10] Fix problems in h5ad concatenation --- modules/local/templates/concat_h5ad.py | 4 ++-- subworkflows/local/h5ad_conversion.nf | 23 ++++++++--------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/modules/local/templates/concat_h5ad.py b/modules/local/templates/concat_h5ad.py index 9eddfa46..2876461c 100755 --- a/modules/local/templates/concat_h5ad.py +++ b/modules/local/templates/concat_h5ad.py @@ -62,9 +62,9 @@ def dump_versions(): # merge with data.frame, on sample information adata.obs = adata.obs.join(df_samplesheet, on="sample", how="left").astype(str) - adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad") + adata.write_h5ad("combined_${meta.id}_matrix.h5ad") - print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad")) + print("Wrote h5ad file to {}".format("combined_${meta.id}_matrix.h5ad")) # dump versions dump_versions() diff --git a/subworkflows/local/h5ad_conversion.nf b/subworkflows/local/h5ad_conversion.nf index d7b18337..208648ce 100644 --- a/subworkflows/local/h5ad_conversion.nf +++ b/subworkflows/local/h5ad_conversion.nf @@ -13,32 +13,25 @@ workflow H5AD_CONVERSION { ch_versions = Channel.empty() // - // Concat sample-specific h5ad in one + // Concat all raw and unfiltered h5ad files // - ch_concat_h5ad_input = ch_h5ads.groupTuple() // gather all sample-specific files / per type - if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { - // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) - // which nextflow break because it is not a valid 'path' thus, we have to remove one level - // making it as [ mtx_1, mtx_2 ] - ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } - } + ch_concat_h5ad_input = ch_h5ads + .map{ meta, file -> [ [id: meta.input_type], file ]} + .groupTuple() CONCAT_H5AD ( ch_concat_h5ad_input, samplesheet ) - ch_h5ad_concat = CONCAT_H5AD.out.h5ad.map{ meta, file -> - def meta_clone = meta.clone() - meta_clone.id = 'combined' // maintain output prefix - [ meta_clone, file ] - } + + ch_h5ad_concat = CONCAT_H5AD.out.h5ad ch_versions = ch_versions.mix(CONCAT_H5AD.out.versions.first()) // - // MODULE: Convert to Rds with AnndataR package + // MODULE: Convert to RDS with AnndataR package // ANNDATAR_CONVERT ( - ch_h5ads.mix( ch_h5ad_concat ) + ch_h5ad_concat ) ch_versions = ch_versions.mix(ANNDATAR_CONVERT.out.versions.first()) From 2e2da0353c835cc239f478250fabf3188ba233e8 Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 19:19:59 +0100 Subject: [PATCH 03/10] Restructure empty droplet calling and h5ad conversion --- modules.json | 12 +++- .../nf-core/anndata/barcodes/environment.yml | 5 ++ modules/nf-core/anndata/barcodes/main.nf | 36 ++++++++++ modules/nf-core/anndata/barcodes/meta.yml | 58 +++++++++++++++ .../anndata/barcodes/templates/barcodes.py | 43 +++++++++++ .../anndata/barcodes/tests/main.nf.test | 62 ++++++++++++++++ .../anndata/barcodes/tests/main.nf.test.snap | 72 +++++++++++++++++++ .../nf-core/anndata/barcodes/tests/tags.yml | 2 + subworkflows/local/emptydrops_removal.nf | 37 ---------- .../main.nf | 28 ++++++++ .../meta.yml | 35 +++++++++ .../tests/epochs.config | 6 ++ .../tests/main.nf.test | 32 +++++++++ .../tests/main.nf.test.snap | 37 ++++++++++ workflows/scrnaseq.nf | 72 +++++++++---------- 15 files changed, 460 insertions(+), 77 deletions(-) create mode 100644 modules/nf-core/anndata/barcodes/environment.yml create mode 100644 modules/nf-core/anndata/barcodes/main.nf create mode 100644 modules/nf-core/anndata/barcodes/meta.yml create mode 100644 modules/nf-core/anndata/barcodes/templates/barcodes.py create mode 100644 modules/nf-core/anndata/barcodes/tests/main.nf.test create mode 100644 modules/nf-core/anndata/barcodes/tests/main.nf.test.snap create mode 100644 modules/nf-core/anndata/barcodes/tests/tags.yml delete mode 100644 subworkflows/local/emptydrops_removal.nf create mode 100644 subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/main.nf create mode 100644 subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/meta.yml create mode 100644 subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/epochs.config create mode 100644 subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test create mode 100644 subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index d4d95cab..aa6c65f8 100644 --- a/modules.json +++ b/modules.json @@ -5,10 +5,15 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "anndata/barcodes": { + "branch": "master", + "git_sha": "05e4d643e8a9cd600fdce51dccdfc68cf9a72489", + "installed_by": ["h5ad_removebackground_barcodes_cellbender_anndata"] + }, "cellbender/removebackground": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] + "installed_by": ["h5ad_removebackground_barcodes_cellbender_anndata", "modules"] }, "cellranger/count": { "branch": "master", @@ -94,6 +99,11 @@ }, "subworkflows": { "nf-core": { + "h5ad_removebackground_barcodes_cellbender_anndata": { + "branch": "master", + "git_sha": "23600f6cd1c7fc4da79709bb531e5ba3cd5e840e", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", diff --git a/modules/nf-core/anndata/barcodes/environment.yml b/modules/nf-core/anndata/barcodes/environment.yml new file mode 100644 index 00000000..b43edd6c --- /dev/null +++ b/modules/nf-core/anndata/barcodes/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::anndata=0.10.9 diff --git a/modules/nf-core/anndata/barcodes/main.nf b/modules/nf-core/anndata/barcodes/main.nf new file mode 100644 index 00000000..c6697109 --- /dev/null +++ b/modules/nf-core/anndata/barcodes/main.nf @@ -0,0 +1,36 @@ +process ANNDATA_BARCODES { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/anndata:0.10.9--d13580e4b297da7c': + 'community.wave.seqera.io/library/anndata:0.10.9--1eab54e300e1e584' }" + + input: + tuple val(meta), path(h5ad), path(barcodes) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'barcodes.py' + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.h5ad + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 -c 'import platform; print(platform.python_version())') + anndata: \$(python3 -c 'import anndata as ad; print(ad.__version__)') + pandas: \$(python3 -c 'import pandas as pd; print(pd.__version__)') + END_VERSIONS + """ +} diff --git a/modules/nf-core/anndata/barcodes/meta.yml b/modules/nf-core/anndata/barcodes/meta.yml new file mode 100644 index 00000000..96be0d15 --- /dev/null +++ b/modules/nf-core/anndata/barcodes/meta.yml @@ -0,0 +1,58 @@ +name: anndata_barcodes +description: Module to subset AnnData object to cells with matching barcodes from the csv file +keywords: + - single-cell + - barcodes + - anndata + - subsetting + - transcriptomics +tools: + - "anndata": + description: "An annotated data matrix." + homepage: "https://anndata.readthedocs.io" + documentation: "https://anndata.readthedocs.io" + tool_dev_url: "https://github.com/theislab/anndata" + doi: "10.21105/joss.04371" + licence: ["BSD-3-clause"] + identifier: biotools:anndata +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - h5ad: + type: file + description: Anndata object as H5AD file + pattern: "*.h5ad" + - barcodes: + type: file + description: | + CSV file containing barcodes. Barcodes are written in plain text. One column, no header + pattern: "*.csv" + +output: + - h5ad: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + pattern: "*.h5ad" + - "*.h5ad": + type: file + description: | + AnnData file containing all cells with barcodes that match those in the input barcodes csv file + pattern: "*.h5ad" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nictru" + - "@chaochaowong" + - "@LeonHafner" +maintainers: + - "@nictru" + - "@LeonHafner" diff --git a/modules/nf-core/anndata/barcodes/templates/barcodes.py b/modules/nf-core/anndata/barcodes/templates/barcodes.py new file mode 100644 index 00000000..b7dcd63a --- /dev/null +++ b/modules/nf-core/anndata/barcodes/templates/barcodes.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import platform + +import anndata as ad +import pandas as pd + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + + +df = pd.read_csv("${barcodes}", header=None) +adata = ad.read_h5ad("${h5ad}") + +adata = adata[df[0].values] + +adata.write_h5ad("${prefix}.h5ad") + +# Versions + +versions = { + "${task.process}": {"python": platform.python_version(), "anndata": ad.__version__, "pandas": pd.__version__} +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/nf-core/anndata/barcodes/tests/main.nf.test b/modules/nf-core/anndata/barcodes/tests/main.nf.test new file mode 100644 index 00000000..8910016b --- /dev/null +++ b/modules/nf-core/anndata/barcodes/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name 'Test Process ANNDATA_BARCODES' + script '../main.nf' + process 'ANNDATA_BARCODES' + + tag "modules" + tag "modules_nfcore" + tag "anndata/barcodes" + tag "anndata" + + test("scdownstream - h5ad - csv") { + when { + process { + """ + input[0] = Channel.from([ + [ + [ id:'test', single_end:false ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + + ] + ]).combine(Channel.from('TGGTTAGCAGAGCCAA', 'GGGAGATAGACTGTAA', 'GAGGTGACAGGTCTCG').collectFile(newLine: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("scdownstream - h5ad - csv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.from([ + [ + [ id:'test', single_end:false ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + + ] + ]).combine(Channel.from('TGGTTAGCAGAGCCAA', 'GGGAGATAGACTGTAA', 'GAGGTGACAGGTCTCG').collectFile(newLine: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/anndata/barcodes/tests/main.nf.test.snap b/modules/nf-core/anndata/barcodes/tests/main.nf.test.snap new file mode 100644 index 00000000..29092a26 --- /dev/null +++ b/modules/nf-core/anndata/barcodes/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "scdownstream - h5ad - csv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,eacb49282e0ca8e8073368e7cd1e6225" + ], + "h5ad": [ + [ + { + "id": "test", + "single_end": false + }, + "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,eacb49282e0ca8e8073368e7cd1e6225" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T22:16:28.555901034" + }, + "scdownstream - h5ad - csv": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.h5ad:md5,cb9527dba4ca011ae2eec8442df7178e" + ] + ], + "1": [ + "versions.yml:md5,3a2663ed06ac752477d403b6c389d78a" + ], + "h5ad": [ + [ + { + "id": "test", + "single_end": false + }, + "test.h5ad:md5,cb9527dba4ca011ae2eec8442df7178e" + ] + ], + "versions": [ + "versions.yml:md5,3a2663ed06ac752477d403b6c389d78a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-05T22:16:16.587686784" + } +} \ No newline at end of file diff --git a/modules/nf-core/anndata/barcodes/tests/tags.yml b/modules/nf-core/anndata/barcodes/tests/tags.yml new file mode 100644 index 00000000..c14cf688 --- /dev/null +++ b/modules/nf-core/anndata/barcodes/tests/tags.yml @@ -0,0 +1,2 @@ +anndata/barcodes: + - modules/nf-core/anndata/barcodes/** diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf deleted file mode 100644 index 7d63e86f..00000000 --- a/subworkflows/local/emptydrops_removal.nf +++ /dev/null @@ -1,37 +0,0 @@ -include { CELLBENDER_REMOVEBACKGROUND } from '../../modules/nf-core/cellbender/removebackground' -include { ADATA_BARCODES } from '../../modules/local/adata_barcodes' - -// -// TODO: Make it a nf-core subworkflow to be shared by scrnaseq and scdownstream pipelines. -// - -workflow EMPTY_DROPLET_REMOVAL { - take: - ch_unfiltered - - main: - ch_versions = Channel.empty() - - CELLBENDER_REMOVEBACKGROUND(ch_unfiltered) - ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions) - - ch_combined = - ch_unfiltered - .join(CELLBENDER_REMOVEBACKGROUND.out.barcodes) - .map { meta, h5ad, csv -> - def meta_clone = meta.clone() - meta_clone.input_type = meta['input_type'].toString().replaceAll('raw', 'emptydrops_filter') - - [ meta_clone, h5ad, csv ] - } - - ADATA_BARCODES(ch_combined) - ch_versions = ch_versions.mix(ADATA_BARCODES.out.versions) - - ch_h5ad = ADATA_BARCODES.out.h5ad - - emit: - h5ad = ch_h5ad - - versions = ch_versions -} diff --git a/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/main.nf b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/main.nf new file mode 100644 index 00000000..9539f1c1 --- /dev/null +++ b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/main.nf @@ -0,0 +1,28 @@ +// +// Apply cellbender and anndata to h5ad for background and empty droplet removal +// +include { CELLBENDER_REMOVEBACKGROUND } from '../../../modules/nf-core/cellbender/removebackground' +include { ANNDATA_BARCODES } from '../../../modules/nf-core/anndata/barcodes' + +workflow H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA { + + take: + ch_unfiltered // channel: [mandatory] meta, h5ad + + main: + ch_versions = Channel.empty() + + CELLBENDER_REMOVEBACKGROUND(ch_unfiltered) + ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions) + + ch_combined = ch_unfiltered.join(CELLBENDER_REMOVEBACKGROUND.out.barcodes) + + ANNDATA_BARCODES(ch_combined) + ch_versions = ch_versions.mix(ANNDATA_BARCODES.out.versions) + + emit: + h5ad = ANNDATA_BARCODES.out.h5ad // channel: [ val(meta), path(h5ad) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} + diff --git a/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/meta.yml b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/meta.yml new file mode 100644 index 00000000..c4c4b4cf --- /dev/null +++ b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/meta.yml @@ -0,0 +1,35 @@ +name: "h5ad_removebackground_barcodes_cellbender_anndata" +description: Use features of Cellbender to remove background noise and empty droplet +keywords: + - scdownstream + - cellbender + - anndata +components: + - cellbender/removebackground + - anndata/barcodes +input: + - ch_unfiltered: + type: file + description: | + The input channel containing the unfiltered AnnData file to process + and remove background noise and empty droplets + Structure: [ val(meta), path(h5ad) ] + pattern: "*.h5ad" +output: + - h5ad: + description: | + Background and empty droplet removed AnnData file containing cells with + barcodes exceeding 0.5 posterior cell probability determined by the + cellbender's remove-background + Structure: [ val(meta), path(h5ad) ] + pattern: "*.h5ad" + - versions: + type: file + description: | + File containing software versions + pattern: "versions.yml" +authors: + - "@nictru" + - "@chaochaowong" +maintainers: + - "@nictru" diff --git a/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/epochs.config b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/epochs.config new file mode 100644 index 00000000..96282b07 --- /dev/null +++ b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/epochs.config @@ -0,0 +1,6 @@ + +process { + withName: CELLBENDER_REMOVEBACKGROUND { + ext.args = '--epochs 20' + } +} diff --git a/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test new file mode 100644 index 00000000..92ea058a --- /dev/null +++ b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_workflow { + + name "Test Subworkflow H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA" + script "../main.nf" + workflow "H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/h5ad_removebackground_barcodes_cellbender_anndata" + tag "cellbender/removebackground" + tag "anndata/barcodes" + + test("h5ad - h5ad_removebackground_barcodes_cellbender_anndata") { + config './epochs.config' + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + ]) + """ + } + } + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test.snap b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test.snap new file mode 100644 index 00000000..50f6d7c6 --- /dev/null +++ b/subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "h5ad - h5ad_removebackground_barcodes_cellbender_anndata": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.h5ad:md5,6c94663b900a495d3492cd0b216c9b67" + ] + ], + "1": [ + "versions.yml:md5,3b268371ecd09ac624398c004f5e279d", + "versions.yml:md5,fc7f5fec60cfd2b10b91177571ee63e8" + ], + "h5ad": [ + [ + { + "id": "test" + }, + "test.h5ad:md5,6c94663b900a495d3492cd0b216c9b67" + ] + ], + "versions": [ + "versions.yml:md5,3b268371ecd09ac624398c004f5e279d", + "versions.yml:md5,fc7f5fec60cfd2b10b91177571ee63e8" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T15:17:04.577770824" + } +} \ No newline at end of file diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 8fa3c317..c8986c65 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -3,28 +3,25 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' -include { getGenomeAttribute } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' -include { FASTQC_CHECK } from '../subworkflows/local/fastqc' -include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' -include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' -include { STARSOLO } from '../subworkflows/local/starsolo' -include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { CELLRANGER_MULTI_ALIGN } from "../subworkflows/local/align_cellrangermulti" -include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" -include { MTX_TO_H5AD } from '../modules/local/mtx_to_h5ad' -include { H5AD_CONVERSION } from '../subworkflows/local/h5ad_conversion' -include { H5AD_CONVERSION as EMPTYDROPS_H5AD_CONVERSION } from '../subworkflows/local/h5ad_conversion' -include { EMPTY_DROPLET_REMOVAL } from '../subworkflows/local/emptydrops_removal.nf' -include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' -include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' - - +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' +include { getGenomeAttribute } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' +include { FASTQC_CHECK } from '../subworkflows/local/fastqc' +include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' +include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' +include { STARSOLO } from '../subworkflows/local/starsolo' +include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { CELLRANGER_MULTI_ALIGN } from "../subworkflows/local/align_cellrangermulti" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" +include { MTX_TO_H5AD } from '../modules/local/mtx_to_h5ad' +include { H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA } from '../subworkflows/nf-core/h5ad_removebackground_barcodes_cellbender_anndata' +include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' +include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' +include { H5AD_CONVERSION } from '../subworkflows/local/h5ad_conversion' workflow SCRNASEQ { @@ -296,33 +293,30 @@ workflow SCRNASEQ { params.aligner ) ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first()) - - // - // SUBWORKFLOW: Run h5ad conversion and concatenation - // - ch_emptydrops = Channel.empty() - H5AD_CONVERSION ( - MTX_TO_H5AD.out.h5ad, - ch_input - ) - ch_versions = ch_versions.mix(H5AD_CONVERSION.out.ch_versions) + ch_h5ads = MTX_TO_H5AD.out.h5ad // // SUBWORKFLOW: Run cellbender emptydrops filter // if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) { - // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it - EMPTY_DROPLET_REMOVAL ( - H5AD_CONVERSION.out.h5ads.filter { meta, mtx_files -> meta.input_type.contains('raw') } + H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA ( + ch_h5ads.filter { meta, mtx_files -> meta.input_type == 'raw' } ) - EMPTYDROPS_H5AD_CONVERSION ( - EMPTY_DROPLET_REMOVAL.out.h5ad, - ch_input + ch_h5ads = ch_h5ads.mix( + H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA.out.h5ad + .map{ meta, file -> [ meta + [input_type: 'emptydrops'], file ]} ) - } + // + // SUBWORKFLOW: Concat samples and convert h5ad to other formats + // + H5AD_CONVERSION ( + ch_h5ads, + ch_input + ) + // // Collate and save software versions // From 51b64fa604ceb3dbea558032102fd892e87332eb Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 19:24:15 +0100 Subject: [PATCH 04/10] Remove outdated local module --- modules/local/adata_barcodes.nf | 29 ------------------- modules/local/templates/barcodes.py | 45 ----------------------------- 2 files changed, 74 deletions(-) delete mode 100644 modules/local/adata_barcodes.nf delete mode 100644 modules/local/templates/barcodes.py diff --git a/modules/local/adata_barcodes.nf b/modules/local/adata_barcodes.nf deleted file mode 100644 index 630d90ae..00000000 --- a/modules/local/adata_barcodes.nf +++ /dev/null @@ -1,29 +0,0 @@ -process ADATA_BARCODES { - - // - // Module from nf-core/scdownstream. - // This module performs the subset of the h5ad file to only contain barcodes that passed emptydrops filter with cellbender - // - - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/anndata:0.10.7--e9840a94592528c8': - 'community.wave.seqera.io/library/anndata:0.10.7--336c6c1921a0632b' }" - - input: - tuple val(meta), path(h5ad), path(barcodes_csv) - - output: - tuple val(meta), path("*.h5ad"), emit: h5ad - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - prefix = task.ext.prefix ?: "${meta.id}" - template 'barcodes.py' -} diff --git a/modules/local/templates/barcodes.py b/modules/local/templates/barcodes.py deleted file mode 100644 index 73b9a32a..00000000 --- a/modules/local/templates/barcodes.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -"""Subset h5ad to a predefined set of barcodes""" - -import platform -import anndata as ad -import pandas as pd - -def format_yaml_like(data: dict, indent: int = 0) -> str: - """Formats a dictionary to a YAML-like string. - - Args: - data (dict): The dictionary to format. - indent (int): The current indentation level. - - Returns: - str: A string formatted as YAML. - """ - yaml_str = "" - for key, value in data.items(): - spaces = " " * indent - if isinstance(value, dict): - yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" - else: - yaml_str += f"{spaces}{key}: {value}\\n" - return yaml_str - -df = pd.read_csv("${barcodes_csv}", header=None) -adata = ad.read_h5ad("${h5ad}") - -adata = adata[df[0].values] - -adata.write_h5ad("${prefix}.h5ad") - -# Versions - -versions = { - "${task.process}": { - "python": platform.python_version(), - "anndata": ad.__version__, - "pandas": pd.__version__ - } -} - -with open("versions.yml", "w") as f: - f.write(format_yaml_like(versions)) From 549d23717bec8fb753dac9dd90c79f1989380964 Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 19:27:19 +0100 Subject: [PATCH 05/10] Fix ANNDATA_BARCODES module config specifier --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index a1bfb4af..771d94f7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -38,7 +38,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'ADATA_BARCODES' { + withName: 'ANNDATA_BARCODES' { ext.prefix = { "${meta.id}_${meta.input_type}_matrix" } publishDir = [ path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" }, From c9c9167b0ab82842471b5dbfed034ac54df8493a Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 19:36:09 +0100 Subject: [PATCH 06/10] Make sure per-sample h5ads are also converted to RDS --- subworkflows/local/h5ad_conversion.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/h5ad_conversion.nf b/subworkflows/local/h5ad_conversion.nf index 208648ce..3289b8ae 100644 --- a/subworkflows/local/h5ad_conversion.nf +++ b/subworkflows/local/h5ad_conversion.nf @@ -31,7 +31,7 @@ workflow H5AD_CONVERSION { // MODULE: Convert to RDS with AnndataR package // ANNDATAR_CONVERT ( - ch_h5ad_concat + ch_h5ads.mix(ch_h5ad_concat) ) ch_versions = ch_versions.mix(ANNDATAR_CONVERT.out.versions.first()) From 5c05db3bee70fab5002a14f1cde323dc7365879a Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 20:58:50 +0100 Subject: [PATCH 07/10] Fix some meta map details --- modules/local/templates/concat_h5ad.py | 4 ++-- subworkflows/local/h5ad_conversion.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/templates/concat_h5ad.py b/modules/local/templates/concat_h5ad.py index 2876461c..f6e1ccc4 100755 --- a/modules/local/templates/concat_h5ad.py +++ b/modules/local/templates/concat_h5ad.py @@ -62,9 +62,9 @@ def dump_versions(): # merge with data.frame, on sample information adata.obs = adata.obs.join(df_samplesheet, on="sample", how="left").astype(str) - adata.write_h5ad("combined_${meta.id}_matrix.h5ad") + adata.write_h5ad("${meta.id}_${meta.input_type}_matrix.h5ad") - print("Wrote h5ad file to {}".format("combined_${meta.id}_matrix.h5ad")) + print("Wrote h5ad file to {}".format("${meta.id}_${meta.input_type}_matrix.h5ad")) # dump versions dump_versions() diff --git a/subworkflows/local/h5ad_conversion.nf b/subworkflows/local/h5ad_conversion.nf index 3289b8ae..70e1dd7a 100644 --- a/subworkflows/local/h5ad_conversion.nf +++ b/subworkflows/local/h5ad_conversion.nf @@ -16,7 +16,7 @@ workflow H5AD_CONVERSION { // Concat all raw and unfiltered h5ad files // ch_concat_h5ad_input = ch_h5ads - .map{ meta, file -> [ [id: meta.input_type], file ]} + .map{ meta, file -> [ [id: 'combined', input_type: meta.input_type], file ]} .groupTuple() CONCAT_H5AD ( From e25b179f629cff489b7ea47d4c40a3ca7fe1bd18 Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 21:10:29 +0100 Subject: [PATCH 08/10] Decrease number of expected executed tasks for all aligners --- tests/main_pipeline_alevin.nf.test | 2 +- tests/main_pipeline_cellranger.nf.test | 2 +- tests/main_pipeline_cellrangermulti.nf.test | 2 +- tests/main_pipeline_kallisto.nf.test | 2 +- tests/main_pipeline_star.nf.test | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/main_pipeline_alevin.nf.test b/tests/main_pipeline_alevin.nf.test index 398e98d2..7ed23a61 100644 --- a/tests/main_pipeline_alevin.nf.test +++ b/tests/main_pipeline_alevin.nf.test @@ -25,7 +25,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 17}, + {assert workflow.trace.tasks().size() == 15}, // How many results were produced? {assert path("${outputDir}/results_alevin").list().size() == 5}, diff --git a/tests/main_pipeline_cellranger.nf.test b/tests/main_pipeline_cellranger.nf.test index f16dbed6..302264b3 100644 --- a/tests/main_pipeline_cellranger.nf.test +++ b/tests/main_pipeline_cellranger.nf.test @@ -25,7 +25,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 24}, + {assert workflow.trace.tasks().size() == 22}, // How many results were produced? {assert path("${outputDir}/results_cellranger").list().size() == 4}, diff --git a/tests/main_pipeline_cellrangermulti.nf.test b/tests/main_pipeline_cellrangermulti.nf.test index 263e486a..3a270b6a 100644 --- a/tests/main_pipeline_cellrangermulti.nf.test +++ b/tests/main_pipeline_cellrangermulti.nf.test @@ -34,7 +34,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 85}, + {assert workflow.trace.tasks().size() == 83}, // How many results were produced? {assert path("${outputDir}/results_cellrangermulti").list().size() == 4}, diff --git a/tests/main_pipeline_kallisto.nf.test b/tests/main_pipeline_kallisto.nf.test index 50ea4b8f..ddb0460c 100644 --- a/tests/main_pipeline_kallisto.nf.test +++ b/tests/main_pipeline_kallisto.nf.test @@ -25,7 +25,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 15}, + {assert workflow.trace.tasks().size() == 13}, // How many results were produced? {assert path("${outputDir}/results_kallisto").list().size() == 4}, diff --git a/tests/main_pipeline_star.nf.test b/tests/main_pipeline_star.nf.test index ef314358..33094b27 100644 --- a/tests/main_pipeline_star.nf.test +++ b/tests/main_pipeline_star.nf.test @@ -25,7 +25,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 23}, + {assert workflow.trace.tasks().size() == 21}, // How many results were produced? {assert path("${outputDir}/results_star").list().size() == 4}, From a8e46f2b2d65c08be5624f1cdd31eff40b70a884 Mon Sep 17 00:00:00 2001 From: nictru Date: Sat, 11 Jan 2025 21:33:23 +0100 Subject: [PATCH 09/10] Adjust remaining task number tests --- tests/main_pipeline_cellranger.nf.test | 2 +- tests/main_pipeline_cellrangermulti.nf.test | 2 +- tests/main_pipeline_star.nf.test | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/main_pipeline_cellranger.nf.test b/tests/main_pipeline_cellranger.nf.test index 302264b3..2e0186ed 100644 --- a/tests/main_pipeline_cellranger.nf.test +++ b/tests/main_pipeline_cellranger.nf.test @@ -25,7 +25,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 22}, + {assert workflow.trace.tasks().size() == 20}, // How many results were produced? {assert path("${outputDir}/results_cellranger").list().size() == 4}, diff --git a/tests/main_pipeline_cellrangermulti.nf.test b/tests/main_pipeline_cellrangermulti.nf.test index 3a270b6a..9a725f13 100644 --- a/tests/main_pipeline_cellrangermulti.nf.test +++ b/tests/main_pipeline_cellrangermulti.nf.test @@ -34,7 +34,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 83}, + {assert workflow.trace.tasks().size() == 57}, // How many results were produced? {assert path("${outputDir}/results_cellrangermulti").list().size() == 4}, diff --git a/tests/main_pipeline_star.nf.test b/tests/main_pipeline_star.nf.test index 33094b27..f3660c27 100644 --- a/tests/main_pipeline_star.nf.test +++ b/tests/main_pipeline_star.nf.test @@ -25,7 +25,7 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 21}, + {assert workflow.trace.tasks().size() == 19}, // How many results were produced? {assert path("${outputDir}/results_star").list().size() == 4}, From d99d05cb3f9849e0866a2fea2f662bffabcb0475 Mon Sep 17 00:00:00 2001 From: nictru Date: Wed, 15 Jan 2025 09:17:41 +0100 Subject: [PATCH 10/10] Update cellbender/removebackground sha --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index aa6c65f8..313937b0 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "cellbender/removebackground": { "branch": "master", - "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "git_sha": "e01435f46b46adff59546d6a4b4002e31f6c4457", "installed_by": ["h5ad_removebackground_barcodes_cellbender_anndata", "modules"] }, "cellranger/count": {