From 067cd1f0f84e75ba7f9a33f04ce89e5582cd04e4 Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 16 Sep 2024 15:58:57 +0100 Subject: [PATCH 01/14] trio mode --- assets/test.yaml | 5 +++++ conf/base.config | 8 ++++++++ modules.json | 5 +++++ subworkflows/local/genomescope_model.nf | 18 ++++++++++++++++++ subworkflows/local/prepare_input.nf | 9 +++++++++ subworkflows/local/raw_assembly.nf | 7 +++++-- workflows/genomeassembly.nf | 6 ++++-- 7 files changed, 54 insertions(+), 4 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index 423fdc22..f85b648f 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -8,6 +8,11 @@ dataset: pacbio: reads: - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta + trio: + matreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz + patreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz HiC: reads: - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram diff --git a/conf/base.config b/conf/base.config index 3355e30e..1efe8ef7 100644 --- a/conf/base.config +++ b/conf/base.config @@ -75,6 +75,14 @@ process { memory = { check_max( 72.GB * task.attempt, 'memory' ) } } + withName: YAK_COUNT_MAT { + ext.prefix = { "${meta.id}_yak_mat" } + } + + withName: YAK_COUNT_PAT { + ext.prefix = { "${meta.id}_yak_pat" } + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/modules.json b/modules.json index 1fd59692..f9f7be15 100644 --- a/modules.json +++ b/modules.json @@ -249,6 +249,11 @@ "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", "installed_by": ["modules"] + }, + "yak/count": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] } } } diff --git a/subworkflows/local/genomescope_model.nf b/subworkflows/local/genomescope_model.nf index f2be25d8..ee177a1d 100644 --- a/subworkflows/local/genomescope_model.nf +++ b/subworkflows/local/genomescope_model.nf @@ -2,11 +2,15 @@ include { CAT_CAT as CAT_CAT_READS } from "../../modules/nf-core/cat/cat/main" include { FASTK_FASTK } from "../../modules/nf-core/fastk/fastk/main" include { FASTK_HISTEX } from '../../modules/nf-core/fastk/histex/main' include { GENESCOPEFK } from "../../modules/nf-core/genescopefk/main" +include { YAK_COUNT as YAK_COUNT_MAT } from "../../modules/nf-core/yak/count/main" +include { YAK_COUNT as YAK_COUNT_PAT } from "../../modules/nf-core/yak/count/main" workflow GENOMESCOPE_MODEL { take: reads // [meta, [reads]] + matreads // [meta, [matreads]] + patreads // [meta, [patreads]] main: ch_versions = Channel.empty() @@ -42,6 +46,18 @@ workflow GENOMESCOPE_MODEL { FASTK_HISTEX( FASTK_FASTK.out.hist ) ch_versions = ch_versions.mix(FASTK_HISTEX.out.versions) + // + // MODULE: YAK TO PRODUCE MAT DATABASE + // + YAK_COUNT_MAT( matreads ) + ch_versions = ch_versions.mix(YAK_COUNT_MAT.out.versions) + + // + // MODULE: YAK TO PRODUCE PAT DATABASE + // + YAK_COUNT_PAT( patreads ) + ch_versions = ch_versions.mix(YAK_COUNT_PAT.out.versions) + // // MODULE: GENERATE GENOMESCOPE KMER COVERAGE MODEL // @@ -52,6 +68,8 @@ workflow GENOMESCOPE_MODEL { model = GENESCOPEFK.out.model hist = FASTK_FASTK.out.hist ktab = FASTK_FASTK.out.ktab + matdb = YAK_COUNT_MAT.out.yak.map{ meta, matyak -> matyak} + patdb = YAK_COUNT_PAT.out.yak.map{ meta, patyak -> patyak} versions = ch_versions } diff --git a/subworkflows/local/prepare_input.nf b/subworkflows/local/prepare_input.nf index 63d36902..af798fea 100644 --- a/subworkflows/local/prepare_input.nf +++ b/subworkflows/local/prepare_input.nf @@ -49,6 +49,13 @@ workflow PREPARE_INPUT { hic_ch: ( data.HiC ? [ [id: data.id ], data.HiC.reads.collect { file( it.reads, checkIfExists: true )}] : []) + matreads_ch: ( data.trio ? [ [id: data.id ], + data.trio.matreads.collect { file( it, checkIfExists: true ) } ] + : []) + patreads_ch: ( data.trio ? [ [id: data.id ], + data.trio.patreads.collect { file( it, checkIfExists: true ) } ] + : []) + } .set{ dataset_ch } @@ -74,6 +81,8 @@ workflow PREPARE_INPUT { hic = hic_ch hifi = dataset_ch.pacbio_ch + matreads = dataset_ch.matreads_ch + patreads = dataset_ch.patreads_ch illumina_10X = dataset_ch.illumina_10X_ch busco = busco_ch mito = ch_yml_data.mito diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index 703efbf4..c53d7611 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -11,6 +11,8 @@ workflow RAW_ASSEMBLY { hifi_reads // channel: [ val(meta), [ datafile ] ] hic_reads // channel: [ datafile ] hifiasm_hic_on // val: True/False + matdb + patdb main: ch_versions = Channel.empty() @@ -18,7 +20,8 @@ workflow RAW_ASSEMBLY { // // MODULE: RUN HIFIASM IN STANDARD WAY // - HIFIASM_PRI(hifi_reads, [], [], [], [], []) + patdb.view() + HIFIASM_PRI(hifi_reads, patdb, matdb, [], [], []) ch_versions = ch_versions.mix(HIFIASM_PRI.out.versions) // @@ -39,7 +42,7 @@ workflow RAW_ASSEMBLY { // // MODULE: RUN HIFIASM IN HIC MODE // - HIFIASM_HIC(hifi_reads, [], [], [], [], hic_reads) + HIFIASM_HIC(hifi_reads, patdb, matdb, [], [], hic_reads) // // MODULE: CONVERT HIFIASM-HIC PRIMARY CONTIGS TO FASTA diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 2e5e3899..1822025d 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -97,6 +97,8 @@ workflow GENOMEASSEMBLY { // LOGIC: CREATE A VARIABLE SERVING AS AN ALIAS FOR HIFI READS CHANNEL // PREPARE_INPUT.out.hifi.set{ hifi_reads_ch } + PREPARE_INPUT.out.matreads.set{ mat_reads_ch} + PREPARE_INPUT.out.patreads.set{ pat_reads_ch} // // LOGIC: SEPARATE READS PATHS INTO A DIFFERENT CHANNEL @@ -106,14 +108,14 @@ workflow GENOMEASSEMBLY { // // SUBWORKFLOW: GENERATE KMER DATABASE AND PROFILE MODEL // - GENOMESCOPE_MODEL( hifi_reads_ch ) + GENOMESCOPE_MODEL( hifi_reads_ch, mat_reads_ch, pat_reads_ch ) ch_versions = ch_versions.mix(GENOMESCOPE_MODEL.out.versions) // // SUBWORKFLOW: RUN A HIFIASM ASSEMBLY ON THE HIFI READS; ALSO CREATE // A HIFIASM RUN IN HIC MODE IF THE FLAG IS SWITCHED ON // - RAW_ASSEMBLY( hifi_reads_ch, hic_reads_ch, hifiasm_hic_on ) + RAW_ASSEMBLY( hifi_reads_ch, hic_reads_ch, hifiasm_hic_on, GENOMESCOPE_MODEL.out.patdb, GENOMESCOPE_MODEL.out.matdb ) ch_versions = ch_versions.mix(RAW_ASSEMBLY.out.versions) // From f07e8bd5a19217ae852f54b164ac2ff4f428977f Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 18:05:29 +0100 Subject: [PATCH 02/14] updates --- assets/test.yaml | 11 +- modules.json | 2 +- modules/nf-core/merquryfk/merquryfk/main.nf | 67 ++- modules/nf-core/merquryfk/merquryfk/meta.yml | 107 +++-- .../merquryfk/merquryfk/tests/main.nf.test | 170 +++++++ .../merquryfk/tests/main.nf.test.snap | 454 ++++++++++++++++++ .../merquryfk/merquryfk/tests/nextflow.config | 8 + .../merquryfk/tests/nextflow.pdf.config | 9 + .../merquryfk/tests/nextflow.png.config | 9 + .../merquryfk/tests/nextflow.trio.config | 9 + .../merquryfk/merquryfk/tests/tags.yml | 2 + modules/nf-core/yak/count/environment.yml | 7 + modules/nf-core/yak/count/main.nf | 49 ++ modules/nf-core/yak/count/meta.yml | 47 ++ modules/nf-core/yak/count/tests/main.nf.test | 81 ++++ .../nf-core/yak/count/tests/main.nf.test.snap | 107 +++++ modules/nf-core/yak/count/tests/tags.yml | 2 + subworkflows/local/genome_statistics.nf | 4 +- subworkflows/local/genomescope_model.nf | 46 +- subworkflows/local/prepare_input.nf | 7 +- subworkflows/local/raw_assembly.nf | 1 - subworkflows/local/trio_mode.nf | 40 ++ workflows/genomeassembly.nf | 15 +- 23 files changed, 1158 insertions(+), 96 deletions(-) create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/main.nf.test create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config create mode 100644 modules/nf-core/merquryfk/merquryfk/tests/tags.yml create mode 100644 modules/nf-core/yak/count/environment.yml create mode 100644 modules/nf-core/yak/count/main.nf create mode 100644 modules/nf-core/yak/count/meta.yml create mode 100644 modules/nf-core/yak/count/tests/main.nf.test create mode 100644 modules/nf-core/yak/count/tests/main.nf.test.snap create mode 100644 modules/nf-core/yak/count/tests/tags.yml create mode 100644 subworkflows/local/trio_mode.nf diff --git a/assets/test.yaml b/assets/test.yaml index f85b648f..9749f958 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -8,16 +8,17 @@ dataset: pacbio: reads: - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta - trio: - matreads: - - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz - patreads: - - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz + #trio: + #matreads: + #- https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz + #patreads: + #- https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz HiC: reads: - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram hic_motif: GATC,GANTC,CTNAG,TTAA hic_aligner: bwamem2 +trio_mode: non_trio busco: lineage: bacteria_odb10 mito: diff --git a/modules.json b/modules.json index f9f7be15..209ae70c 100644 --- a/modules.json +++ b/modules.json @@ -116,7 +116,7 @@ }, "merquryfk/merquryfk": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "95f7516731f960e86bff900b9f1d80a8904bffd8", "installed_by": ["modules"] }, "minimap2/align": { diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf index 080575e0..1ee8b35f 100644 --- a/modules/nf-core/merquryfk/merquryfk/main.nf +++ b/modules/nf-core/merquryfk/merquryfk/main.nf @@ -5,39 +5,42 @@ process MERQURYFK_MERQURYFK { // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "MERQURYFK_MERQURYFK module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: - tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly), path(haplotigs) + tuple val(meta), path(fastk_hist),path(fastk_ktab),path(assembly),path(haplotigs) + path matktab //optional + path patktab //optional output: - tuple val(meta), path("${prefix}.completeness.stats") , emit: stats - tuple val(meta), path("${prefix}.*_only.bed") , emit: bed - tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv - tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf, optional: true - tuple val(meta), path("${prefix}.qv") , emit: qv - tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.completeness.stats") , emit: stats + tuple val(meta), path("${prefix}.*_only.bed") , emit: bed + tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv + tuple val(meta), path("${prefix}.*.spectra-cn.fl.{png,pdf}") , emit: spectra_cn_fl, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.ln.{png,pdf}") , emit: spectra_cn_ln, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.st.{png,pdf}") , emit: spectra_cn_st, optional: true + tuple val(meta), path("${prefix}.qv") , emit: qv + tuple val(meta), path("${prefix}.spectra-asm.fl.{png,pdf}") , emit: spectra_asm_fl, optional: true + tuple val(meta), path("${prefix}.spectra-asm.ln.{png,pdf}") , emit: spectra_asm_ln, optional: true + tuple val(meta), path("${prefix}.spectra-asm.st.{png,pdf}") , emit: spectra_asm_st, optional: true + tuple val(meta), path("${prefix}.phased_block.bed") , emit: phased_block_bed, optional: true + tuple val(meta), path("${prefix}.phased_block.stats") , emit: phased_block_stats, optional: true + tuple val(meta), path("${prefix}.continuity.N.{pdf,png}") , emit: continuity_N, optional: true + tuple val(meta), path("${prefix}.block.N.{pdf,png}") , emit: block_N, optional: true + tuple val(meta), path("${prefix}.block.blob.{pdf,png}") , emit: block_blob, optional: true + tuple val(meta), path("${prefix}.hapmers.blob.{pdf,png}") , emit: hapmers_blob, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MERQURYFK_MERQURYFK module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def mat_ktab = matktab ? "${matktab.find{ it.toString().endsWith(".ktab") }}" : '' + def pat_ktab = patktab ? "${patktab.find{ it.toString().endsWith(".ktab") }}" : '' def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ @@ -45,6 +48,8 @@ process MERQURYFK_MERQURYFK { $args \\ -T$task.cpus \\ ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\ + ${mat_ktab} \\ + ${pat_ktab} \\ $assembly \\ $haplotigs \\ $prefix @@ -56,4 +61,20 @@ process MERQURYFK_MERQURYFK { r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) END_VERSIONS """ + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.completeness.stats + touch ${prefix}.qv + touch ${prefix}._.qv + touch ${prefix}._only.bed + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + merquryfk: $MERQURY_VERSION + r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/merquryfk/merquryfk/meta.yml b/modules/nf-core/merquryfk/merquryfk/meta.yml index ad89f8ba..bba26d12 100644 --- a/modules/nf-core/merquryfk/merquryfk/meta.yml +++ b/modules/nf-core/merquryfk/merquryfk/meta.yml @@ -1,16 +1,15 @@ name: "merquryfk_merquryfk" description: FastK based version of Merqury keywords: - - sort + - Merqury + - reference-free + - assembly evaluation tools: - "merquryfk": description: "FastK based version of Merqury" homepage: "https://github.com/thegenemyers/MERQURY.FK" - tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK" - - licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE" - + licence: ["https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE"] input: - meta: type: map @@ -25,6 +24,14 @@ input: type: file description: Histogram ktab files from the program FastK (option -t) pattern: "*.ktab*" + - matktab: + type: file + description: trio maternal histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" + - patktab: + type: file + description: trio paternal histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" - assembly: type: file description: Genome (primary) assembly files (fasta format) @@ -33,7 +40,6 @@ input: type: file description: Assembly haplotigs (fasta format) pattern: ".fasta" - output: - meta: type: map @@ -52,54 +58,30 @@ output: type: file description: Assembly only kmer positions not supported by reads in bed format pattern: "*_only.bed" - - spectra_cn_fl_png: - type: file - description: "Unstacked copy number spectra filled plot in PNG format" - pattern: "*.spectra-cn.fl.png" - - spectra_cn_ln_png: - type: file - description: "Unstacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.ln.png" - - spectra_cn_st_png: - type: file - description: "Stacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.st.png" - - spectra_asm_fl_png: - type: file - description: "Unstacked assembly spectra filled plot in PNG format" - pattern: "*.spectra-asm.fl.png" - - spectra_asm_ln_png: - type: file - description: "Unstacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.ln.png" - - spectra_asm_st_png: + - spectra_cn_fl: type: file - description: "Stacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.st.png" - - spectra_cn_fl_pdf: + description: "Unstacked copy number spectra filled plot in PNG or PDF format" + pattern: "*.spectra-cn.fl.{png,pdf}" + - spectra_cn_ln: type: file - description: "Unstacked copy number spectra filled plot in PDF format" - pattern: "*.spectra-cn.fl.pdf" - - spectra_cn_ln_pdf: + description: "Unstacked copy number spectra line plot in PNG or PDF format" + pattern: "*.spectra-cn.ln.{png,pdf}" + - spectra_cn_st: type: file - description: "Unstacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.ln.pdf" - - spectra_cn_st_pdf: + description: "Stacked copy number spectra line plot in PNG or PDF format" + pattern: "*.spectra-cn.st.{png,pdf}" + - spectra_asm_fl: type: file - description: "Stacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.st.pdf" - - spectra_asm_fl_pdf: + description: "Unstacked assembly spectra filled plot in PNG or PDF format" + pattern: "*.spectra-asm.fl.{png,pdf}" + - spectra_asm_ln: type: file - description: "Unstacked assembly spectra filled plot in PDF format" - pattern: "*.spectra-asm.fl.pdf" - - spectra_asm_ln_pdf: + description: "Unstacked assembly spectra line plot in PNG or PDF format" + pattern: "*.spectra-asm.ln.{png,pdf}" + - spectra_asm_st: type: file - description: "Unstacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.ln.pdf" - - spectra_asm_st_pdf: - type: file - description: "Stacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.st.pdf" + description: "Stacked assembly spectra line plot in PNG or PDF format" + pattern: "*.spectra-asm.st.{png,pdf}" - assembly_qv: type: file description: "error and qv table for each scaffold of the assembly" @@ -108,6 +90,33 @@ output: type: file description: "error and qv of each assembly as a whole" pattern: "*.qv" - + - phased_block_bed: + type: file + description: Assembly kmer positions seperated by block in bed format + pattern: "*.phased.block.bed" + - phased_block_stats: + type: file + description: phased assembly statistics file + pattern: "*.phased.block.stats" + - continuity_N: + type: file + description: "Stacked assembly N continuity plot in PNG or PDF format" + pattern: "*.continuity.N.{png,pdf}" + - block_N: + type: file + description: "Stacked assembly N continuity by block plot in PNG or PDF format" + pattern: "*.block.N.{png,pdf}" + - block_blob: + type: file + description: "Stacked assembly block plot in PNG or PDF format" + pattern: "*.block.blob.{png,pdf}" + - hapmers_blob: + type: file + description: "Stacked assembly hapmers block plot in PNG or PDF format" + pattern: "*.hapmers.blob.{png,pdf}" authors: - "@mahesh-panchal" + - "@yumisims" +maintainers: + - "@mahesh-panchal" + - "@yumisims" diff --git a/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test new file mode 100644 index 00000000..c46843c6 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test @@ -0,0 +1,170 @@ +nextflow_process { + + name "Test Process MERQURYFK" + script "../main.nf" + process "MERQURYFK_MERQURYFK" + + tag "modules" + tag "modules_nfcore" + tag "merquryfk" + tag "merquryfk/merquryfk" + tag "fastk" + tag "fastk/fastk" + + setup { + run("FASTK_FASTK") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + run("FASTK_FASTK", alias: "FASTK_MAT") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + """ + } + } + + run("FASTK_FASTK", alias: "FASTK_PAT") { + script "../../../fastk/fastk" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + ] + """ + } + } + } + + test("homo_sapiens - Illumina - png") { + config "./nextflow.png.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - pdf") { + config "./nextflow.pdf.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - trio") { + config "./nextflow.trio.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = FASTK_MAT.out.ktab + input[2] = FASTK_PAT.out.ktab + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - Illumina - stub") { + options "-stub" + config "./nextflow.pdf.config" + when { + process { + """ + assembly = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + + haplotigs = [ + [ id:'test', single_end:true ], [] + ] + input[0] = FASTK_FASTK.out.hist.join(FASTK_FASTK.out.ktab).join(Channel.from(assembly)).join(Channel.from(haplotigs)) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + +} \ No newline at end of file diff --git a/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap new file mode 100644 index 00000000..f7ce47f0 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/main.nf.test.snap @@ -0,0 +1,454 @@ +{ + "homo_sapiens - Illumina - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:52.240373868" + }, + "homo_sapiens - Illumina - pdf": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:19.530675341" + }, + "homo_sapiens - Illumina - png": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:21:57.682723412" + }, + "homo_sapiens - Illumina - trio": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "assembly_qv": [ + + ], + "bed": [ + + ], + "block_N": [ + + ], + "block_blob": [ + + ], + "continuity_N": [ + + ], + "hapmers_blob": [ + + ], + "phased_block_bed": [ + + ], + "phased_block_stats": [ + + ], + "qv": [ + + ], + "spectra_asm_fl": [ + + ], + "spectra_asm_ln": [ + + ], + "spectra_asm_st": [ + + ], + "spectra_cn_fl": [ + + ], + "spectra_cn_ln": [ + + ], + "spectra_cn_st": [ + + ], + "stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T15:22:40.060937299" + } +} \ No newline at end of file diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config new file mode 100644 index 00000000..ba1eebc9 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config new file mode 100644 index 00000000..52beeaa3 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.pdf.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs -pdf' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config new file mode 100644 index 00000000..47c3d63e --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.png.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config new file mode 100644 index 00000000..47c3d63e --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/nextflow.trio.config @@ -0,0 +1,9 @@ +process { + withName: 'FASTK_.*' { + ext.args = '-t' + publishDir = [ enabled: false ] + } + withName: 'MERQURYFK_MERQURYFK' { + ext.args = '-lfs' + } +} diff --git a/modules/nf-core/merquryfk/merquryfk/tests/tags.yml b/modules/nf-core/merquryfk/merquryfk/tests/tags.yml new file mode 100644 index 00000000..7dcac99b --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/tests/tags.yml @@ -0,0 +1,2 @@ +merquryfk/merquryfk: + - "modules/nf-core/merquryfk/merquryfk/**" diff --git a/modules/nf-core/yak/count/environment.yml b/modules/nf-core/yak/count/environment.yml new file mode 100644 index 00000000..907edbb6 --- /dev/null +++ b/modules/nf-core/yak/count/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::yak=0.1" diff --git a/modules/nf-core/yak/count/main.nf b/modules/nf-core/yak/count/main.nf new file mode 100644 index 00000000..c1e38885 --- /dev/null +++ b/modules/nf-core/yak/count/main.nf @@ -0,0 +1,49 @@ +process YAK_COUNT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/yak:0.1--he4a0461_4': + 'biocontainers/yak:0.1--he4a0461_4' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.yak"), emit: yak + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + input_command = meta.single_end ? "${fastq}" : "<(zcat ${fastq}) <(zcat ${fastq})" + """ + yak \\ + count \\ + $args \\ + -t${task.cpus} \\ + -o ${prefix}.yak \\ + $input_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + yak: \$(yak version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.yak + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + yak: \$(yak version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/yak/count/meta.yml b/modules/nf-core/yak/count/meta.yml new file mode 100644 index 00000000..c5e35302 --- /dev/null +++ b/modules/nf-core/yak/count/meta.yml @@ -0,0 +1,47 @@ +name: "yak_count" +description: a tool to build k-mer hash table for fasta and fastq files +keywords: + - kmer + - fastq + - sequence + - count + - assembly + +tools: + - "yak": + description: "Yet another k-mer analyzer" + homepage: "https://github.com/lh3/yak" + documentation: "https://github.com/lh3/yak/blob/master/README.md" + tool_dev_url: "https://github.com/lh3/yak" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fastq: + type: file + description: reads fastq/fasta file + pattern: "*.{fastq.gz,fq.gz,fasta.gz,fa.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - yak: + type: file + description: k-mer hash table of input + pattern: "*.{yak}" + +authors: + - "@yumisims" +maintainers: + - "@yumisims" diff --git a/modules/nf-core/yak/count/tests/main.nf.test b/modules/nf-core/yak/count/tests/main.nf.test new file mode 100644 index 00000000..8a003d71 --- /dev/null +++ b/modules/nf-core/yak/count/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process YAK_COUNT" + script "../main.nf" + process "YAK_COUNT" + + tag "modules" + tag "modules_nfcore" + tag "yak" + tag "yak/count" + + test("sarscov2 - Illumina - se") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - Illumina - pe") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - Illumina - se - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/yak/count/tests/main.nf.test.snap b/modules/nf-core/yak/count/tests/main.nf.test.snap new file mode 100644 index 00000000..f8f9eaf8 --- /dev/null +++ b/modules/nf-core/yak/count/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "sarscov2 - Illumina - se": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.yak:md5,0f7375059550949b7ba619a984ece2da" + ] + ], + "1": [ + "versions.yml:md5,bf1f473f0df39c185adfaf52c9701874" + ], + "versions": [ + "versions.yml:md5,bf1f473f0df39c185adfaf52c9701874" + ], + "yak": [ + [ + { + "id": "test", + "single_end": true + }, + "test.yak:md5,0f7375059550949b7ba619a984ece2da" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-30T13:14:15.064795726" + }, + "sarscov2 - Illumina - se - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.yak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,bf1f473f0df39c185adfaf52c9701874" + ], + "versions": [ + "versions.yml:md5,bf1f473f0df39c185adfaf52c9701874" + ], + "yak": [ + [ + { + "id": "test", + "single_end": true + }, + "test.yak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-30T13:14:36.315145428" + }, + "sarscov2 - Illumina - pe": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.yak:md5,f4d1b22744d1ba9619bf7b66c89ecf10" + ] + ], + "1": [ + "versions.yml:md5,bf1f473f0df39c185adfaf52c9701874" + ], + "versions": [ + "versions.yml:md5,bf1f473f0df39c185adfaf52c9701874" + ], + "yak": [ + [ + { + "id": "test", + "single_end": false + }, + "test.yak:md5,f4d1b22744d1ba9619bf7b66c89ecf10" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-30T13:14:25.971171004" + } +} \ No newline at end of file diff --git a/modules/nf-core/yak/count/tests/tags.yml b/modules/nf-core/yak/count/tests/tags.yml new file mode 100644 index 00000000..f6dc1619 --- /dev/null +++ b/modules/nf-core/yak/count/tests/tags.yml @@ -0,0 +1,2 @@ +yak/count: + - "modules/nf-core/yak/count/**" diff --git a/subworkflows/local/genome_statistics.nf b/subworkflows/local/genome_statistics.nf index d4cfcb6b..db5bfa10 100644 --- a/subworkflows/local/genome_statistics.nf +++ b/subworkflows/local/genome_statistics.nf @@ -19,6 +19,8 @@ workflow GENOME_STATISTICS { lineage // channel: [ meta, /path/to/buscoDB, lineage ] hist // channel: [meta, fastk_hist files] ktab // channel: [meta, fastk_ktab files] + pktab // channel: [meta, fastk_ktab files] + mktab // channel: [meta, fastk_ktab files] busco_alt // channel: true/false main: @@ -83,7 +85,7 @@ workflow GENOME_STATISTICS { // // MODULE: RUN KMER ANALYSIS WITH MERQURYFK // - MERQURYFK_MERQURYFK ( ch_merq ) + MERQURYFK_MERQURYFK ( ch_merq, mktab, pktab) ch_versions = ch_versions.mix(MERQURYFK_MERQURYFK.out.versions.first()) emit: diff --git a/subworkflows/local/genomescope_model.nf b/subworkflows/local/genomescope_model.nf index ee177a1d..78443d8c 100644 --- a/subworkflows/local/genomescope_model.nf +++ b/subworkflows/local/genomescope_model.nf @@ -2,8 +2,7 @@ include { CAT_CAT as CAT_CAT_READS } from "../../modules/nf-core/cat/cat/main" include { FASTK_FASTK } from "../../modules/nf-core/fastk/fastk/main" include { FASTK_HISTEX } from '../../modules/nf-core/fastk/histex/main' include { GENESCOPEFK } from "../../modules/nf-core/genescopefk/main" -include { YAK_COUNT as YAK_COUNT_MAT } from "../../modules/nf-core/yak/count/main" -include { YAK_COUNT as YAK_COUNT_PAT } from "../../modules/nf-core/yak/count/main" +include { TRIO_MODE as TRIO_PROCESS } from '../../subworkflows/local/trio_mode' workflow GENOMESCOPE_MODEL { @@ -11,10 +10,17 @@ workflow GENOMESCOPE_MODEL { reads // [meta, [reads]] matreads // [meta, [matreads]] patreads // [meta, [patreads]] + trio_flag main: ch_versions = Channel.empty() + matdb_ch = Channel.empty() + matktab_ch = Channel.empty() + patdb_ch = Channel.empty() + patktab_ch = Channel.empty() + + trio_flag.view() // // MODULE: MERGE ALL READS IN ONE FILE // @@ -46,18 +52,33 @@ workflow GENOMESCOPE_MODEL { FASTK_HISTEX( FASTK_FASTK.out.hist ) ch_versions = ch_versions.mix(FASTK_HISTEX.out.versions) - // - // MODULE: YAK TO PRODUCE MAT DATABASE - // - YAK_COUNT_MAT( matreads ) - ch_versions = ch_versions.mix(YAK_COUNT_MAT.out.versions) // // MODULE: YAK TO PRODUCE PAT DATABASE // - YAK_COUNT_PAT( patreads ) - ch_versions = ch_versions.mix(YAK_COUNT_PAT.out.versions) + trio_flag + .combine( patreads ) + .combine( matreads ) + .branch { + trio: it[0] == "trio" + non_trio: it[0] == "nontrio" + } + .set{ trio_data } + + trio_data + .trio + .multiMap { trio_mode, pat_meta, pat_data, mat_meta, mat_data -> + pat: tuple( pat_meta, pat_data ) + mat: tuple( mat_meta, mat_data ) + } + .set{ ch_trio_data } + + TRIO_PROCESS ( + ch_trio_data.pat, + ch_trio_data.mat + ) + // // MODULE: GENERATE GENOMESCOPE KMER COVERAGE MODEL // @@ -68,9 +89,10 @@ workflow GENOMESCOPE_MODEL { model = GENESCOPEFK.out.model hist = FASTK_FASTK.out.hist ktab = FASTK_FASTK.out.ktab - matdb = YAK_COUNT_MAT.out.yak.map{ meta, matyak -> matyak} - patdb = YAK_COUNT_PAT.out.yak.map{ meta, patyak -> patyak} - + pktab = TRIO_PROCESS.out.pktab + mktab = TRIO_PROCESS.out.mktab + matdb = TRIO_PROCESS.out.matdb + patdb = TRIO_PROCESS.out.patdb versions = ch_versions } diff --git a/subworkflows/local/prepare_input.nf b/subworkflows/local/prepare_input.nf index af798fea..df40857b 100644 --- a/subworkflows/local/prepare_input.nf +++ b/subworkflows/local/prepare_input.nf @@ -30,6 +30,7 @@ workflow PREPARE_INPUT { plastid : ( data.plastid ? ( data.plastid.fam ? file(data.plastid.fam, checkIfExists: true) : [] ) : []) hic_motif : (data.hic_motif ? data.hic_motif : []) hic_aligner : (data.hic_aligner ? data.hic_aligner :[]) + trio_mode : (data.trio_mode ? data.trio_mode :[]) } .set{ ch_yml_data } @@ -51,11 +52,10 @@ workflow PREPARE_INPUT { : []) matreads_ch: ( data.trio ? [ [id: data.id ], data.trio.matreads.collect { file( it, checkIfExists: true ) } ] - : []) + : [ [id: data.id], [] ]) patreads_ch: ( data.trio ? [ [id: data.id ], data.trio.patreads.collect { file( it, checkIfExists: true ) } ] - : []) - + : [ [id: data.id], [] ]) } .set{ dataset_ch } @@ -85,6 +85,7 @@ workflow PREPARE_INPUT { patreads = dataset_ch.patreads_ch illumina_10X = dataset_ch.illumina_10X_ch busco = busco_ch + trio_flag_ch = ch_yml_data.trio_mode mito = ch_yml_data.mito plastid = ch_yml_data.plastid versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index c53d7611..d766dad4 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -20,7 +20,6 @@ workflow RAW_ASSEMBLY { // // MODULE: RUN HIFIASM IN STANDARD WAY // - patdb.view() HIFIASM_PRI(hifi_reads, patdb, matdb, [], [], []) ch_versions = ch_versions.mix(HIFIASM_PRI.out.versions) diff --git a/subworkflows/local/trio_mode.nf b/subworkflows/local/trio_mode.nf new file mode 100644 index 00000000..738366f3 --- /dev/null +++ b/subworkflows/local/trio_mode.nf @@ -0,0 +1,40 @@ +include { FASTK_FASTK as FASTK_PAT } from "../../modules/nf-core/fastk/fastk/main" +include { FASTK_FASTK as FASTK_MAT } from "../../modules/nf-core/fastk/fastk/main" +include { YAK_COUNT as YAK_COUNT_MAT } from "../../modules/nf-core/yak/count/main" +include { YAK_COUNT as YAK_COUNT_PAT } from "../../modules/nf-core/yak/count/main" + +workflow TRIO_MODE { + + take: + matreads // [meta, [matreads]] + patreads // [meta, [patreads]] + + main: + ch_versions = Channel.empty() + matdb_ch = Channel.empty() + matktab_ch = Channel.empty() + patdb_ch = Channel.empty() + patktab_ch = Channel.empty() + + + YAK_COUNT_PAT(patreads) + FASTK_PAT(patreads) + patdb_ch = YAK_COUNT_PAT.out.yak + patktab_ch = FASTK_PAT.out.ktab + ch_versions = ch_versions.mix(YAK_COUNT_PAT.out.versions) + ch_versions = ch_versions.mix(FASTK_PAT.out.versions) + YAK_COUNT_MAT(matreads) + FASTK_MAT(matreads) + matdb_ch = YAK_COUNT_MAT.out.yak + matktab_ch = FASTK_MAT.out.ktab + ch_versions = ch_versions.mix(YAK_COUNT_MAT.out.versions) + ch_versions = ch_versions.mix(FASTK_MAT.out.versions) + + emit: + pktab = patktab_ch.ifEmpty( [ [], [] ] ) + mktab = matktab_ch.ifEmpty( [ [], [] ] ) + matdb = matdb_ch.ifEmpty( [] ) + patdb = patdb_ch.ifEmpty( [] ) + versions = ch_versions +} + diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 1822025d..6d7e89dd 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -99,6 +99,9 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.hifi.set{ hifi_reads_ch } PREPARE_INPUT.out.matreads.set{ mat_reads_ch} PREPARE_INPUT.out.patreads.set{ pat_reads_ch} + PREPARE_INPUT.out.trio_flag_ch.set{ trio_flag_ch} + + // // LOGIC: SEPARATE READS PATHS INTO A DIFFERENT CHANNEL @@ -108,7 +111,7 @@ workflow GENOMEASSEMBLY { // // SUBWORKFLOW: GENERATE KMER DATABASE AND PROFILE MODEL // - GENOMESCOPE_MODEL( hifi_reads_ch, mat_reads_ch, pat_reads_ch ) + GENOMESCOPE_MODEL( hifi_reads_ch, mat_reads_ch, pat_reads_ch, trio_flag_ch) ch_versions = ch_versions.mix(GENOMESCOPE_MODEL.out.versions) // @@ -135,6 +138,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, + GENOMESCOPE_MODEL.out.pktab, + GENOMESCOPE_MODEL.out.mktab, unset_busco_alts ) ch_versions = ch_versions.mix(GENOME_STATISTICS_RAW.out.versions) @@ -182,6 +187,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, + GENOMESCOPE_MODEL.out.pktab, + GENOMESCOPE_MODEL.out.mktab, set_busco_alts ) } @@ -225,6 +232,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, + GENOMESCOPE_MODEL.out.pktab, + GENOMESCOPE_MODEL.out.mktab, unset_busco_alts ) @@ -366,6 +375,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, + GENOMESCOPE_MODEL.out.pktab, + GENOMESCOPE_MODEL.out.mktab, unset_busco_alts ) @@ -408,6 +419,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, + GENOMESCOPE_MODEL.out.pktab, + GENOMESCOPE_MODEL.out.mktab, set_busco_alts ) From 1f37e382c3eb35b0890bf65c7a02cd426bf76c06 Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 21:09:11 +0100 Subject: [PATCH 03/14] add trio mode --- assets/test.yaml | 11 +++++------ assets/test_gfLaeSulp1.yaml | 12 +++++++++--- assets/test_gsMetZobe1.yaml | 5 +++++ assets/test_iyVesGerm1.yaml | 5 +++++ subworkflows/local/genomescope_model.nf | 17 ++++++++--------- subworkflows/local/prepare_input.nf | 4 ++-- subworkflows/local/trio_mode.nf | 13 ++++--------- workflows/genomeassembly.nf | 9 +++++---- 8 files changed, 43 insertions(+), 33 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index 9749f958..2e4a2340 100644 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -8,17 +8,16 @@ dataset: pacbio: reads: - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta - #trio: - #matreads: - #- https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz - #patreads: - #- https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz HiC: reads: - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram + trio: + matreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz + patreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz hic_motif: GATC,GANTC,CTNAG,TTAA hic_aligner: bwamem2 -trio_mode: non_trio busco: lineage: bacteria_odb10 mito: diff --git a/assets/test_gfLaeSulp1.yaml b/assets/test_gfLaeSulp1.yaml index 12779d54..210b6ae2 100644 --- a/assets/test_gfLaeSulp1.yaml +++ b/assets/test_gfLaeSulp1.yaml @@ -3,9 +3,15 @@ dataset: pacbio: reads: - reads: /lustre/scratch124/tol/projects/darwin/data/fungi/Laetiporus_sulphureus/genomic_data/gfLaeSulp1/pacbio/fasta/m64229e_210602_121910.ccs.bc1020_BAK8B_OA--bc1020_BAK8B_OA.filtered.fasta.gz - HiC: - reads: - - reads: /lustre/scratch124/tol/projects/darwin/data/fungi/Laetiporus_sulphureus/genomic_data/gfLaeSulp1/hic-arima2/40063_3#5.cram + HiC: + reads: + - reads: /lustre/scratch124/tol/projects/darwin/data/fungi/Laetiporus_sulphureus/genomic_data/gfLaeSulp1/hic-arima2/40063_3#5.cram + trio: + matreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz + patreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz + hic_motif: GATC,GANTC,CTNAG,TTAA hic_aligner: minimap2 busco: diff --git a/assets/test_gsMetZobe1.yaml b/assets/test_gsMetZobe1.yaml index 56f7a484..cd20f0c6 100644 --- a/assets/test_gsMetZobe1.yaml +++ b/assets/test_gsMetZobe1.yaml @@ -11,6 +11,11 @@ dataset: HiC: reads: - reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/hic-arima2/35528_4#7.cram + trio: + matreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz + patreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz hic_motif: GATC,GANTC,CTNAG,TTAA hic_aligner: minimap2 busco: diff --git a/assets/test_iyVesGerm1.yaml b/assets/test_iyVesGerm1.yaml index a03d8a3d..4d8bc212 100644 --- a/assets/test_iyVesGerm1.yaml +++ b/assets/test_iyVesGerm1.yaml @@ -8,6 +8,11 @@ dataset: HiC: reads: - reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/iyVesGerm1/hic-arima2/34957_3#2.cram + trio: + matreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz + patreads: + - https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz hic_motif: GATC,GANTC,CTNAG,TTAA hic_aligner: minimap2 busco: diff --git a/subworkflows/local/genomescope_model.nf b/subworkflows/local/genomescope_model.nf index 78443d8c..51f4a5d1 100644 --- a/subworkflows/local/genomescope_model.nf +++ b/subworkflows/local/genomescope_model.nf @@ -19,8 +19,6 @@ workflow GENOMESCOPE_MODEL { patdb_ch = Channel.empty() patktab_ch = Channel.empty() - - trio_flag.view() // // MODULE: MERGE ALL READS IN ONE FILE // @@ -54,9 +52,8 @@ workflow GENOMESCOPE_MODEL { // - // MODULE: YAK TO PRODUCE PAT DATABASE + // LOGIC: RUN TRIO WHEN TRIO DATA IS AVAILABLE // - trio_flag .combine( patreads ) .combine( matreads ) @@ -73,7 +70,9 @@ workflow GENOMESCOPE_MODEL { mat: tuple( mat_meta, mat_data ) } .set{ ch_trio_data } - + // + // SUBWORKFLOW: RUN TRIO PROCESS WITH TRIO DATA + // TRIO_PROCESS ( ch_trio_data.pat, ch_trio_data.mat @@ -89,10 +88,10 @@ workflow GENOMESCOPE_MODEL { model = GENESCOPEFK.out.model hist = FASTK_FASTK.out.hist ktab = FASTK_FASTK.out.ktab - pktab = TRIO_PROCESS.out.pktab - mktab = TRIO_PROCESS.out.mktab - matdb = TRIO_PROCESS.out.matdb - patdb = TRIO_PROCESS.out.patdb + pktab = TRIO_PROCESS.out.pktab.ifEmpty( [] ) + mktab = TRIO_PROCESS.out.mktab.ifEmpty( [] ) + matdb = TRIO_PROCESS.out.matdb.ifEmpty( [] ) + patdb = TRIO_PROCESS.out.patdb.ifEmpty( [] ) versions = ch_versions } diff --git a/subworkflows/local/prepare_input.nf b/subworkflows/local/prepare_input.nf index df40857b..a2ea5384 100644 --- a/subworkflows/local/prepare_input.nf +++ b/subworkflows/local/prepare_input.nf @@ -30,7 +30,6 @@ workflow PREPARE_INPUT { plastid : ( data.plastid ? ( data.plastid.fam ? file(data.plastid.fam, checkIfExists: true) : [] ) : []) hic_motif : (data.hic_motif ? data.hic_motif : []) hic_aligner : (data.hic_aligner ? data.hic_aligner :[]) - trio_mode : (data.trio_mode ? data.trio_mode :[]) } .set{ ch_yml_data } @@ -56,6 +55,7 @@ workflow PREPARE_INPUT { patreads_ch: ( data.trio ? [ [id: data.id ], data.trio.patreads.collect { file( it, checkIfExists: true ) } ] : [ [id: data.id], [] ]) + trio_mode: ( data.trio ? "trio" : "non_trio" ) } .set{ dataset_ch } @@ -85,7 +85,7 @@ workflow PREPARE_INPUT { patreads = dataset_ch.patreads_ch illumina_10X = dataset_ch.illumina_10X_ch busco = busco_ch - trio_flag_ch = ch_yml_data.trio_mode + trio_flag_ch = dataset_ch.trio_mode mito = ch_yml_data.mito plastid = ch_yml_data.plastid versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] diff --git a/subworkflows/local/trio_mode.nf b/subworkflows/local/trio_mode.nf index 738366f3..28e2eb23 100644 --- a/subworkflows/local/trio_mode.nf +++ b/subworkflows/local/trio_mode.nf @@ -11,11 +11,6 @@ workflow TRIO_MODE { main: ch_versions = Channel.empty() - matdb_ch = Channel.empty() - matktab_ch = Channel.empty() - patdb_ch = Channel.empty() - patktab_ch = Channel.empty() - YAK_COUNT_PAT(patreads) FASTK_PAT(patreads) @@ -31,10 +26,10 @@ workflow TRIO_MODE { ch_versions = ch_versions.mix(FASTK_MAT.out.versions) emit: - pktab = patktab_ch.ifEmpty( [ [], [] ] ) - mktab = matktab_ch.ifEmpty( [ [], [] ] ) - matdb = matdb_ch.ifEmpty( [] ) - patdb = patdb_ch.ifEmpty( [] ) + pktab = patktab_ch + mktab = matktab_ch + matdb = matdb_ch + patdb = patdb_ch versions = ch_versions } diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 6d7e89dd..2a1cb60c 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -101,8 +101,6 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.patreads.set{ pat_reads_ch} PREPARE_INPUT.out.trio_flag_ch.set{ trio_flag_ch} - - // // LOGIC: SEPARATE READS PATHS INTO A DIFFERENT CHANNEL // @@ -111,14 +109,17 @@ workflow GENOMEASSEMBLY { // // SUBWORKFLOW: GENERATE KMER DATABASE AND PROFILE MODEL // - GENOMESCOPE_MODEL( hifi_reads_ch, mat_reads_ch, pat_reads_ch, trio_flag_ch) + GENOMESCOPE_MODEL( hifi_reads_ch, mat_reads_ch, pat_reads_ch, trio_flag_ch) ch_versions = ch_versions.mix(GENOMESCOPE_MODEL.out.versions) + GENOMESCOPE_MODEL.out.patdb.map{ meta, patdb -> patdb }.set{ patdb_ch } + GENOMESCOPE_MODEL.out.matdb.map{ meta, matdb -> matdb }.set{ matdb_ch } + // // SUBWORKFLOW: RUN A HIFIASM ASSEMBLY ON THE HIFI READS; ALSO CREATE // A HIFIASM RUN IN HIC MODE IF THE FLAG IS SWITCHED ON // - RAW_ASSEMBLY( hifi_reads_ch, hic_reads_ch, hifiasm_hic_on, GENOMESCOPE_MODEL.out.patdb, GENOMESCOPE_MODEL.out.matdb ) + RAW_ASSEMBLY( hifi_reads_ch, hic_reads_ch, hifiasm_hic_on, patdb_ch, matdb_ch ) ch_versions = ch_versions.mix(RAW_ASSEMBLY.out.versions) // From 4ff0e7c3c4dcf49943a8255dff9b936bbbbc10f9 Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 21:18:37 +0100 Subject: [PATCH 04/14] revert hifiasm_hic --- subworkflows/local/raw_assembly.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index d766dad4..8c16804e 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -41,7 +41,7 @@ workflow RAW_ASSEMBLY { // // MODULE: RUN HIFIASM IN HIC MODE // - HIFIASM_HIC(hifi_reads, patdb, matdb, [], [], hic_reads) + HIFIASM_HIC(hifi_reads, [], [], [], [], hic_reads) // // MODULE: CONVERT HIFIASM-HIC PRIMARY CONTIGS TO FASTA From ea1832226c839edaaec4076100c2ad7d5195392c Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 21:31:51 +0100 Subject: [PATCH 05/14] map ktabs --- workflows/genomeassembly.nf | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 2a1cb60c..f081d78e 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -114,6 +114,8 @@ workflow GENOMEASSEMBLY { GENOMESCOPE_MODEL.out.patdb.map{ meta, patdb -> patdb }.set{ patdb_ch } GENOMESCOPE_MODEL.out.matdb.map{ meta, matdb -> matdb }.set{ matdb_ch } + GENOMESCOPE_MODEL.out.pktab.map{ meta, pktab -> pktab }.set{ pktab_ch } + GENOMESCOPE_MODEL.out.pktab.map{ meta, mktab -> mktab }.set{ mktab_ch } // // SUBWORKFLOW: RUN A HIFIASM ASSEMBLY ON THE HIFI READS; ALSO CREATE @@ -139,8 +141,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, - GENOMESCOPE_MODEL.out.pktab, - GENOMESCOPE_MODEL.out.mktab, + pktab_ch, + mktab_ch, unset_busco_alts ) ch_versions = ch_versions.mix(GENOME_STATISTICS_RAW.out.versions) @@ -188,8 +190,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, - GENOMESCOPE_MODEL.out.pktab, - GENOMESCOPE_MODEL.out.mktab, + pktab_ch, + mktab_ch, set_busco_alts ) } @@ -233,8 +235,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, - GENOMESCOPE_MODEL.out.pktab, - GENOMESCOPE_MODEL.out.mktab, + pktab_ch, + mktab_ch, unset_busco_alts ) @@ -336,6 +338,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, + pktab_ch, + mktab_ch, unset_busco_alts ) } @@ -376,8 +380,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, - GENOMESCOPE_MODEL.out.pktab, - GENOMESCOPE_MODEL.out.mktab, + pktab_ch, + mktab_ch, unset_busco_alts ) @@ -420,8 +424,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, - GENOMESCOPE_MODEL.out.pktab, - GENOMESCOPE_MODEL.out.mktab, + pktab_ch, + mktab_ch, set_busco_alts ) From 60d5fc176f2bbe844cd946ca08e630e267fa4dc6 Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 21:38:53 +0100 Subject: [PATCH 06/14] map ktabs --- workflows/genomeassembly.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index f081d78e..a4c61fd6 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -190,8 +190,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, - pktab_ch, - mktab_ch, + [], + [], set_busco_alts ) } From ee465275edec19088aee4a1cbf7288758328c064 Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 21:48:23 +0100 Subject: [PATCH 07/14] map ktabs --- workflows/genomeassembly.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index a4c61fd6..5f864cda 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -424,8 +424,8 @@ workflow GENOMEASSEMBLY { PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, - pktab_ch, - mktab_ch, + [], + []], set_busco_alts ) From 47ccc760c35c630cb30db213f5d030be39606acd Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 21:51:58 +0100 Subject: [PATCH 08/14] map ktabs --- workflows/genomeassembly.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 5f864cda..81c2b4d7 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -425,7 +425,7 @@ workflow GENOMEASSEMBLY { GENOMESCOPE_MODEL.out.hist, GENOMESCOPE_MODEL.out.ktab, [], - []], + [], set_busco_alts ) From 36730fc0d205b2e3d1c10cad9d35b98f87ed4e39 Mon Sep 17 00:00:00 2001 From: yumisims Date: Mon, 23 Sep 2024 22:49:34 +0100 Subject: [PATCH 09/14] add version for trio process --- subworkflows/local/genomescope_model.nf | 3 ++- subworkflows/local/trio_mode.nf | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/genomescope_model.nf b/subworkflows/local/genomescope_model.nf index 51f4a5d1..925bd206 100644 --- a/subworkflows/local/genomescope_model.nf +++ b/subworkflows/local/genomescope_model.nf @@ -77,7 +77,8 @@ workflow GENOMESCOPE_MODEL { ch_trio_data.pat, ch_trio_data.mat ) - + ch_versions = ch_versions.mix(TRIO_PROCESS.out.versions) + // // MODULE: GENERATE GENOMESCOPE KMER COVERAGE MODEL // diff --git a/subworkflows/local/trio_mode.nf b/subworkflows/local/trio_mode.nf index 28e2eb23..5c17bcda 100644 --- a/subworkflows/local/trio_mode.nf +++ b/subworkflows/local/trio_mode.nf @@ -11,11 +11,14 @@ workflow TRIO_MODE { main: ch_versions = Channel.empty() - + + // + // MODULE: GENERATE TRIO DATABASES AND KTABS FOR BOTH PAT AND MAT + // YAK_COUNT_PAT(patreads) FASTK_PAT(patreads) patdb_ch = YAK_COUNT_PAT.out.yak - patktab_ch = FASTK_PAT.out.ktab + patktab_ch = FASTK_PAT.out.ktab ch_versions = ch_versions.mix(YAK_COUNT_PAT.out.versions) ch_versions = ch_versions.mix(FASTK_PAT.out.versions) YAK_COUNT_MAT(matreads) From 47e93c25f1a3b6f2f44e87e692d4683fbd08c303 Mon Sep 17 00:00:00 2001 From: yumisims Date: Tue, 5 Nov 2024 14:10:23 +0000 Subject: [PATCH 10/14] scaffolding trio assembly --- subworkflows/local/raw_assembly.nf | 25 +++++++++++++-- workflows/genomeassembly.nf | 51 ++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index 8c16804e..295c49ae 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -3,8 +3,10 @@ include { HIFIASM as HIFIASM_HIC } from '../../modules/nf-core/hi include { GFA_TO_FASTA as GFA_TO_FASTA_PRI } from '../../modules/local/gfa_to_fasta' include { GFA_TO_FASTA as GFA_TO_FASTA_ALT } from '../../modules/local/gfa_to_fasta' -include { GFA_TO_FASTA as GFA_TO_FASTA_HAP1_HIC } from '../../modules/local/gfa_to_fasta' -include { GFA_TO_FASTA as GFA_TO_FASTA_HAP2_HIC } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_HAP1_HIC } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_HAP2_HIC } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_PAT_HIC } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_MAT_HIC } from '../../modules/local/gfa_to_fasta' workflow RAW_ASSEMBLY { take: @@ -54,11 +56,30 @@ workflow RAW_ASSEMBLY { GFA_TO_FASTA_HAP2_HIC( HIFIASM_HIC.out.hap2_contigs ) } + if ( hifiasm_trio_on ) { + // + // MODULE: RUN HIFIASM IN HIC MODE + // + HIFIASM_HIC(hifi_reads, [], [], [], [], hic_reads) + + // + // MODULE: CONVERT HIFIASM-HIC PATERNAL CONTIGS TO FASTA + // + GFA_TO_FASTA_PAT_HIC( HIFIASM_HIC.out.paternal_contigs) + + // + // MODULE: CONVERT HIFIASM-HIC MATERNAL CONTIGS TO FASTA + // + GFA_TO_FASTA_MAT_HIC( HIFIASM_HIC.out.maternal_contigs ) + } + emit: primary_contigs = GFA_TO_FASTA_PRI.out.fasta alternate_contigs = GFA_TO_FASTA_ALT.out.fasta hap1_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_HAP1_HIC.out.fasta : null hap2_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_HAP2_HIC.out.fasta : null + pat_hic_contigs = hifiasm_trio_on ? GFA_TO_FASTA_PAT_HIC.out.fasta : null + mat_hic_contigs = hifiasm_trio_on ? GFA_TO_FASTA_MAT_HIC.out.fasta : null versions = ch_versions } diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 81c2b4d7..369cc27d 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -19,6 +19,7 @@ if (params.cool_bin) { cool_bin = params.cool_bin } else { cool_bin = 1000; } if (params.polishing_on) { polishing_on = params.polishing_on } else { polishing_on = false; } if (params.hifiasm_hic_on) { hifiasm_hic_on = params.hifiasm_hic_on } else { hifiasm_hic_on = false; } +if (params.hifiasm_trio_on) { hifiasm_trio_on = params.hifiasm_trio_on } else { hifiasm_trio_on = false; } if (params.organelles_on) { organelles_on = params.organelles_on } else { organelles_on = false; } // Declare constants to toggle BUSCO for alts @@ -49,17 +50,22 @@ include { POLISHING } from '../subwo include { SCAFFOLDING } from '../subworkflows/local/scaffolding' include { SCAFFOLDING as SCAFFOLDING_HAP1 } from '../subworkflows/local/scaffolding' include { SCAFFOLDING as SCAFFOLDING_HAP2 } from '../subworkflows/local/scaffolding' +include { SCAFFOLDING as SCAFFOLDING_PAT } from '../subworkflows/local/scaffolding' +include { SCAFFOLDING as SCAFFOLDING_MAT } from '../subworkflows/local/scaffolding' include { KEEP_SEQNAMES as KEEP_SEQNAMES_PRIMARY } from '../modules/local/keep_seqnames' include { KEEP_SEQNAMES as KEEP_SEQNAMES_HAPLOTIGS } from '../modules/local/keep_seqnames' include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' include { HIC_MAPPING as HIC_MAPPING_HAP1 } from '../subworkflows/local/hic_mapping' include { HIC_MAPPING as HIC_MAPPING_HAP2 } from '../subworkflows/local/hic_mapping' +include { HIC_MAPPING as HIC_MAPPING_PAT } from '../subworkflows/local/hic_mapping' +include { HIC_MAPPING as HIC_MAPPING_MAT } from '../subworkflows/local/hic_mapping' include { GENOME_STATISTICS as GENOME_STATISTICS_RAW } from '../subworkflows/local/genome_statistics' include { GENOME_STATISTICS as GENOME_STATISTICS_RAW_HIC } from '../subworkflows/local/genome_statistics' include { GENOME_STATISTICS as GENOME_STATISTICS_PURGED } from '../subworkflows/local/genome_statistics' include { GENOME_STATISTICS as GENOME_STATISTICS_POLISHED } from '../subworkflows/local/genome_statistics' include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS } from '../subworkflows/local/genome_statistics' include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS_HAPS } from '../subworkflows/local/genome_statistics' +include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS_TRIO } from '../subworkflows/local/genome_statistics' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -428,6 +434,51 @@ workflow GENOMEASSEMBLY { [], set_busco_alts ) + } + + if ( hifiasm_trio_on ) { + // + // SUBWORKFLOW: MAP HIC DATA TO THE HAP1 CONTIGS + // + HIC_MAPPING_PAT ( RAW_ASSEMBLY.out.pat_hic_contigs, crams_ch, hic_aligner_ch, 'pat' ) + ch_versions = ch_versions.mix(HIC_MAPPING_HAP1.out.versions) + + // + // SUBWORKFLOW: SCAFFOLD PAT + // + SCAFFOLDING_PAT( HIC_MAPPING_PAT.out.bed, RAW_ASSEMBLY.out.pat_hic_contigs, cool_bin, 'pat' ) + ch_versions = ch_versions.mix(SCAFFOLDING_PAT.out.versions) + + // + // SUBWORKFLOW: MAP HIC DATA TO THE HAP2 CONTIGS + // + HIC_MAPPING_MAT ( RAW_ASSEMBLY.out.mat_hic_contigs, crams_ch, hic_aligner_ch, 'mat' ) + ch_versions = ch_versions.mix(HIC_MAPPING_MAT.out.versions) + + // + // SUBWORKFLOW: SCAFFOLD MAT + // + SCAFFOLDING_MAT( HIC_MAPPING_MAT.out.bed, RAW_ASSEMBLY.out.mat_hic_contigs, cool_bin, 'mat' ) + ch_versions = ch_versions.mix(SCAFFOLDING_MAT.out.versions) + + // + // LOGIC: CREATE A CHANNEL FOR THE FULL PAT/MAT ASSEMBLY + // + SCAFFOLDING_PAT.out.fasta.combine(SCAFFOLDING_PAT.out.fasta) + .map{meta_s, fasta_s, meta_h, fasta_h -> [ [id:meta_h.id], fasta_s, fasta_h ]} + .set{ stats_trio_input_ch } + + // + // SUBWORKFLOW: CALCULATE ASSEMBLY STATISTICS FOR PAT/MAT ASSEMBLY + // + GENOME_STATISTICS_SCAFFOLDS_TRIO( stats_trio_input_ch, + PREPARE_INPUT.out.busco, + GENOMESCOPE_MODEL.out.hist, + GENOMESCOPE_MODEL.out.ktab, + [], + [], + set_busco_alts + ) } From 392a45459ac0a6c07afec661d6a75a36d01f0753 Mon Sep 17 00:00:00 2001 From: yumisims Date: Tue, 5 Nov 2024 14:18:30 +0000 Subject: [PATCH 11/14] scaffolding trio assembly --- subworkflows/local/raw_assembly.nf | 1 + workflows/genomeassembly.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index 295c49ae..58cdefde 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -13,6 +13,7 @@ workflow RAW_ASSEMBLY { hifi_reads // channel: [ val(meta), [ datafile ] ] hic_reads // channel: [ datafile ] hifiasm_hic_on // val: True/False + hifiasm_trio_on // val: True/False matdb patdb diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index 369cc27d..fa65951c 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -127,7 +127,7 @@ workflow GENOMEASSEMBLY { // SUBWORKFLOW: RUN A HIFIASM ASSEMBLY ON THE HIFI READS; ALSO CREATE // A HIFIASM RUN IN HIC MODE IF THE FLAG IS SWITCHED ON // - RAW_ASSEMBLY( hifi_reads_ch, hic_reads_ch, hifiasm_hic_on, patdb_ch, matdb_ch ) + RAW_ASSEMBLY( hifi_reads_ch, hic_reads_ch, hifiasm_hic_on, hifiasm_trio_on, patdb_ch, matdb_ch ) ch_versions = ch_versions.mix(RAW_ASSEMBLY.out.versions) // From 7383927c1555a52e77bda8ce442033feae88889c Mon Sep 17 00:00:00 2001 From: yumisims Date: Tue, 5 Nov 2024 17:16:04 +0000 Subject: [PATCH 12/14] upgrade editconfig --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 16bdf6d2..4bc6746d 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-node@v3 - name: Install editorconfig-checker - run: npm install -g editorconfig-checker + run: npm install -g editorconfig-checker@3.0.2 - name: Run ECLint check run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|cff\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile\|drawio') From 35299c8b11c10d51ed417a6448f0cbc511e22595 Mon Sep 17 00:00:00 2001 From: yumisims Date: Wed, 13 Nov 2024 14:53:01 +0000 Subject: [PATCH 13/14] re-adjusted configuration for trio mode --- conf/base.config | 8 --- conf/modules.config | 109 +++++++++++++++++++++++++++++ nextflow.config | 1 + subworkflows/local/raw_assembly.nf | 1 + workflows/genomeassembly.nf | 4 +- 5 files changed, 113 insertions(+), 10 deletions(-) diff --git a/conf/base.config b/conf/base.config index 1efe8ef7..3355e30e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -75,14 +75,6 @@ process { memory = { check_max( 72.GB * task.attempt, 'memory' ) } } - withName: YAK_COUNT_MAT { - ext.prefix = { "${meta.id}_yak_mat" } - } - - withName: YAK_COUNT_PAT { - ext.prefix = { "${meta.id}_yak_pat" } - } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/conf/modules.config b/conf/modules.config index 21cb4af8..1a8ff54a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -756,6 +756,107 @@ process { // End of hap1/hap2 scaffolding + // start of PAT/MAT scaffolding + if (params.hifiasm_trio_on) { + + withName: '.*HIC_MAPPING_HAP.*:SAMTOOLS_MARKDUP_HIC_MAPPING' { + ext.prefix = { "${meta.id}_mkdup" } + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*HIC_MAPPING_HAP.*:BAMTOBED_SORT' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + + withName: '.*HIC_MAPPING_.*AT.*:CONVERT_STATS:SAMTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*HIC_MAPPING_.*AT.*:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*HIC_MAPPING_.*AT.*:CONVERT_STATS:SAMTOOLS_IDXSTATS' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_.*AT.*:YAHS' { + ext.prefix = { "${meta.hap_id}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + } + + withName: '.*SCAFFOLDING_.*AT.*:COOLER_CLOAD' { + // Positions in the input file are zero-based; + // chrom1 field number (one-based) is 2; + // pos1 field number (one-based) is 3; + // chrom2 field number (one-based) is 6; + // pos2 field number (one-based) is 7 + ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_.*AT.*:PRETEXTSNAPSHOT' { + // Make one plot containing all sequences + ext.args = '--sequences \"=full\"' + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_.*AT.*:JUICER_TOOLS_PRE' { + ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' + ext.juicer_jvm_params = '-Xms1g -Xmx6g' + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_.*AT.*:JUICER_PRE' { + ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + //end of trio scaffolding + withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_PRI' { ext.prefix = { "${meta.id}_scaffolds_final" } publishDir = [ @@ -808,6 +909,14 @@ process { ] } + withName: YAK_COUNT_MAT { + ext.prefix = { "${meta.id}_yak_mat" } + } + + withName: YAK_COUNT_PAT { + ext.prefix = { "${meta.id}_yak_pat" } + } + // End of Scaffolding hap1/hap2 // End of Set up of the scaffolding pipeline diff --git a/nextflow.config b/nextflow.config index 46455dc4..bf1374a6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,6 +19,7 @@ params { // Assembly options hifiasm_hic_on = false + hifiasm_trio_on = false organelles_on = false // Polishing diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index 58cdefde..244b14d1 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -1,5 +1,6 @@ include { HIFIASM as HIFIASM_PRI } from '../../modules/nf-core/hifiasm/main' include { HIFIASM as HIFIASM_HIC } from '../../modules/nf-core/hifiasm/main' +include { HIFIASM as HIFIASM_HIC_TRIO } from '../../modules/nf-core/hifiasm/main' include { GFA_TO_FASTA as GFA_TO_FASTA_PRI } from '../../modules/local/gfa_to_fasta' include { GFA_TO_FASTA as GFA_TO_FASTA_ALT } from '../../modules/local/gfa_to_fasta' diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index fa65951c..c30f9af2 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -441,7 +441,7 @@ workflow GENOMEASSEMBLY { // SUBWORKFLOW: MAP HIC DATA TO THE HAP1 CONTIGS // HIC_MAPPING_PAT ( RAW_ASSEMBLY.out.pat_hic_contigs, crams_ch, hic_aligner_ch, 'pat' ) - ch_versions = ch_versions.mix(HIC_MAPPING_HAP1.out.versions) + ch_versions = ch_versions.mix(HIC_MAPPING_PAT.out.versions) // // SUBWORKFLOW: SCAFFOLD PAT @@ -464,7 +464,7 @@ workflow GENOMEASSEMBLY { // // LOGIC: CREATE A CHANNEL FOR THE FULL PAT/MAT ASSEMBLY // - SCAFFOLDING_PAT.out.fasta.combine(SCAFFOLDING_PAT.out.fasta) + SCAFFOLDING_PAT.out.fasta.combine(SCAFFOLDING_MAT.out.fasta) .map{meta_s, fasta_s, meta_h, fasta_h -> [ [id:meta_h.id], fasta_s, fasta_h ]} .set{ stats_trio_input_ch } From fac5b0a4e48d1da4c58e4e8e4d5c511720c18298 Mon Sep 17 00:00:00 2001 From: yumisims Date: Wed, 13 Nov 2024 15:03:26 +0000 Subject: [PATCH 14/14] add boolen in schema --- nextflow_schema.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 0e0f0ded..71278b29 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -39,6 +39,10 @@ "type": "boolean", "description": "Switch on HiC mode for hifiasm assembly" }, + "hifiasm_trio_on": { + "type": "boolean", + "description": "Switch on TRIO mode for hifiasm assembly" + }, "polishing_on": { "type": "boolean", "description": "Switch on to do polish purged assembly before scaffolding"