From 5a2afa6fac3f004d3c3e5d0ff830111500819feb Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Sun, 2 Nov 2025 14:21:51 +0200 Subject: [PATCH 01/26] Integrating gecco_convert into funcscan --- modules/nf-core/gecco/convert/environment.yml | 7 ++ modules/nf-core/gecco/convert/main.nf | 56 +++++++++ modules/nf-core/gecco/convert/meta.yml | 117 ++++++++++++++++++ nextflow.config | 3 + nextflow_schema.json | 28 +++++ subworkflows/local/bgc.nf | 17 +++ 6 files changed, 228 insertions(+) create mode 100644 modules/nf-core/gecco/convert/environment.yml create mode 100644 modules/nf-core/gecco/convert/main.nf create mode 100644 modules/nf-core/gecco/convert/meta.yml diff --git a/modules/nf-core/gecco/convert/environment.yml b/modules/nf-core/gecco/convert/environment.yml new file mode 100644 index 00000000..bb47bc85 --- /dev/null +++ b/modules/nf-core/gecco/convert/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gecco=0.9.10 diff --git a/modules/nf-core/gecco/convert/main.nf b/modules/nf-core/gecco/convert/main.nf new file mode 100644 index 00000000..2eb033af --- /dev/null +++ b/modules/nf-core/gecco/convert/main.nf @@ -0,0 +1,56 @@ +process GECCO_CONVERT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gecco:0.9.10--pyhdfd78af_0': + 'biocontainers/gecco:0.9.10--pyhdfd78af_0' }" + + input: + tuple val(meta), path(clusters), path(gbk) + val(mode) + val(format) + + output: + tuple val(meta), path("*.gff") , emit: gff , optional: true + tuple val(meta), path("*.region*.gbk"), emit: bigslice, optional: true + tuple val(meta), path("*.faa") , emit: faa , optional: true + tuple val(meta), path("*.fna") , emit: fna , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + gecco \\ + convert \\ + $args \\ + $mode \\ + --input-dir ./ \\ + --format ${format} \\ + --output ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + + touch ${prefix}.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gecco/convert/meta.yml b/modules/nf-core/gecco/convert/meta.yml new file mode 100644 index 00000000..137d826c --- /dev/null +++ b/modules/nf-core/gecco/convert/meta.yml @@ -0,0 +1,117 @@ +name: "gecco_convert" +description: | + This command helps transforming the output files created by + GECCO into helpful format, should you want to use the results in + combination with other tools. +keywords: + - bgc + - reformatting + - clusters + - gbk + - gff + - bigslice + - faa + - fna +tools: + - "gecco": + description: "Biosynthetic Gene Cluster prediction with Conditional Random Fields." + homepage: "https://gecco.embl.de" + documentation: "https://gecco.embl.de" + tool_dev_url: "https://github.com/zellerlab/GECCO" + doi: "10.1101/2021.05.03.442509" + licence: ["GPL v3"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - clusters: + type: file + description: | + TSV file containing coordinates of gecco predicted clusters and BGC types. + pattern: "*.clusters.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - gbk: + type: file + description: | + Per cluster GenBank file containing sequence with annotations + pattern: "*.gbk" + ontologies: + - edam: "http://edamontology.org/format_1936" # GenBank + - mode: + type: string + description: Either clusters or gbk folder output, depending on what is reformatted + enum: ["clusters", "gbk"] + - format: + type: string + description: Format for the output file + enum: ["gff", "bigslice", "faa", "fna"] +output: + gff: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.gff": + type: file + description: | + GFF3 converted cluster tables containing the position + and metadata for all the predicted clusters + pattern: "*.gff" + ontologies: + - edam: "http://edamontology.org/format_1975" # GFF3 + bigslice: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.region*.gbk": + type: file + description: | + Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE + pattern: "*.region*.gbk" + ontologies: [] + faa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.faa": + type: file + description: | + Amino-acid FASTA converted GenBank files of all the proteins in a cluster + pattern: "*.faa" + ontologies: + - edam: "http://edamontology.org/format_1929" # FASTA + fna: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.fna": + type: file + description: | + Nucleotide sequence FASTA converted GenBank files from the cluster + pattern: "*.fna" + ontologies: + - edam: "http://edamontology.org/format_1929" # FASTA + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: "http://edamontology.org/format_3750" # YAML + +authors: + - "@vagkaratzas" +maintainers: + - "@vagkaratzas" diff --git a/nextflow.config b/nextflow.config index 9c2b60fb..ca113b3a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -259,6 +259,9 @@ params { // Schema validation default options validate_params = true + gecco_convert_enable = true + gecco_convert_mode = null + gecco_convert_format = null } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 4c70e28d..12070f00 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,34 @@ "description": "Pipeline for screening for functional components of assembled contigs", "type": "object", "$defs": { + + "gecco_options": { + "title": "GECCO biosynthetic gene cluster prediction options", + "type": "object", + "description": "Parameters for running GECCO BGC prediction", + "default": "", + "properties": { + "gecco_convert_enable": { + "type": "boolean", + "default": false, + "description": "Enable conversion of contig FASTA files to GECCO-compatible format.", + "help_text": "GECCO requires contig FASTA files to be in a specific format. Enabling this option will convert the input FASTA files to the required format before running GECCO.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--enable`" + }, + "gecco_convert_mode": { + "type": "string", + "default": "default", + "description": "Mode for converting contig FASTA files for GECCO.", + "help_text": "Select the mode for converting contig FASTA files. 'default' uses standard conversion settings, while 'custom' allows for user-defined parameters.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--mode`" + }, + "gecco_convert_format": { + "type": "string", + "default": "gff", + "description": "Output format for converted contig files.", + "help_text": "Specify the output format for the converted contig files. Options include 'fasta' and 'genbank'.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--format`" + } + }, + "fa_icon": "fas fa-angle-double-right" + }, "input_output_options": { "title": "Input/output options", "type": "object", diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 6bdc2881..2d852dac 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -12,6 +12,7 @@ include { DEEPBGC_PIPELINE } from '../../modules/nf-core/d include { COMBGC } from '../../modules/local/combgc' include { TABIX_BGZIP as BGC_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' include { MERGE_TAXONOMY_COMBGC } from '../../modules/local/merge_taxonomy_combgc' +incude { GECCO_CONVERT } from '../../modules/nf-core/gecco/convert/main' workflow BGC { take: @@ -104,6 +105,22 @@ workflow BGC { ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix(ch_geccoresults_for_combgc) } + // GECCO CONVERT + if (params.gecco_convert_enable) { + ch_gecco_clusters_and_gbk = GECCO_RUN.out.clusters + .join(GECCO_RUN.out.gbk) + .map { meta, clusters_file, gbk_file -> + def mode = params.gecco_convert_mode ?: 'clusters' + def format = params.gecco_convert_format ?: 'gff' + [ meta, + (mode == 'clusters' ? clusters_file : null), + (mode == 'gbk' ? gbk_file : null), + mode, + format ] + } + + GECCO_CONVERT(ch_gecco_clusters_and_gbk) + // HMMSEARCH if (params.bgc_run_hmmsearch) { if (params.bgc_hmmsearch_models) { From 0012b8a7c3b6d9585d6948afece62bee75a31909 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Sun, 2 Nov 2025 14:47:29 +0200 Subject: [PATCH 02/26] Rearranging the code --- nextflow.config | 8 ++++---- subworkflows/local/bgc.nf | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nextflow.config b/nextflow.config index ca113b3a..efd908c0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -250,6 +250,9 @@ params { bgc_gecco_pfilter = 1E-9 bgc_gecco_edgedistance = 0 bgc_gecco_mask = false + run_gecco_convert = true + gecco_convert_mode = null + gecco_convert_format = null bgc_run_hmmsearch = false bgc_hmmsearch_models = null @@ -258,10 +261,7 @@ params { bgc_hmmsearch_savedomains = false // Schema validation default options - validate_params = true - gecco_convert_enable = true - gecco_convert_mode = null - gecco_convert_format = null + validate_params = true } // Load base.config by default for all pipelines diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 2d852dac..0d88f7d6 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -106,7 +106,7 @@ workflow BGC { } // GECCO CONVERT - if (params.gecco_convert_enable) { + if (params.run_gecco_convert) { ch_gecco_clusters_and_gbk = GECCO_RUN.out.clusters .join(GECCO_RUN.out.gbk) .map { meta, clusters_file, gbk_file -> From 518a7a168051370ffa7a5a7f728dce124cd12917 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Sun, 2 Nov 2025 15:00:33 +0200 Subject: [PATCH 03/26] fixing --- subworkflows/local/bgc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 0d88f7d6..aab29ffb 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -120,7 +120,7 @@ workflow BGC { } GECCO_CONVERT(ch_gecco_clusters_and_gbk) - + } // HMMSEARCH if (params.bgc_run_hmmsearch) { if (params.bgc_hmmsearch_models) { From f63667069659e52c31dbc8e7ab65d3ae5bdc811b Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Sun, 2 Nov 2025 15:10:50 +0200 Subject: [PATCH 04/26] fixing typos --- subworkflows/local/bgc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index aab29ffb..b374163a 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -12,7 +12,7 @@ include { DEEPBGC_PIPELINE } from '../../modules/nf-core/d include { COMBGC } from '../../modules/local/combgc' include { TABIX_BGZIP as BGC_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' include { MERGE_TAXONOMY_COMBGC } from '../../modules/local/merge_taxonomy_combgc' -incude { GECCO_CONVERT } from '../../modules/nf-core/gecco/convert/main' +include { GECCO_CONVERT } from '../../modules/nf-core/gecco/convert/main' workflow BGC { take: From 0fd918f90675bc5c18e75de87a293575f1e5e9c1 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Mon, 3 Nov 2025 09:18:11 +0200 Subject: [PATCH 05/26] fixing some error --- subworkflows/local/bgc.nf | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index b374163a..7f272145 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -108,18 +108,14 @@ workflow BGC { // GECCO CONVERT if (params.run_gecco_convert) { ch_gecco_clusters_and_gbk = GECCO_RUN.out.clusters - .join(GECCO_RUN.out.gbk) + .join(GECCO_RUN.out.gbk) .map { meta, clusters_file, gbk_file -> - def mode = params.gecco_convert_mode ?: 'clusters' - def format = params.gecco_convert_format ?: 'gff' - [ meta, - (mode == 'clusters' ? clusters_file : null), - (mode == 'gbk' ? gbk_file : null), - mode, - format ] + [ meta, clusters_file, gbk_file ] } + ch_gecco_mode = Channel.value( params.gecco_convert_mode ?: 'clusters' ) + ch_gecco_format = Channel.value( params.gecco_convert_format ?: 'gff' ) - GECCO_CONVERT(ch_gecco_clusters_and_gbk) + GECCO_CONVERT(ch_gecco_clusters_and_gbk, ch_gecco_mode, ch_gecco_format) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From a1bcca75bb14031a3a0d9af3a379398f4a882e5e Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 4 Nov 2025 11:22:10 +0200 Subject: [PATCH 06/26] polishing the integration --- conf/modules.config | 8 ++++++++ nextflow.config | 6 +++--- nextflow_schema.json | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2812093b..a787f31e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -529,6 +529,14 @@ process { ].join(' ').trim() } + withName: GECCO_CONVERT { + publishDir = [ + path: { "${params.outdir}/bgc/gecco/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: HAMRONIZATION_ABRICATE { publishDir = [ path: { "${params.outdir}/arg/hamronization/abricate" }, diff --git a/nextflow.config b/nextflow.config index efd908c0..a65978ec 100644 --- a/nextflow.config +++ b/nextflow.config @@ -250,9 +250,9 @@ params { bgc_gecco_pfilter = 1E-9 bgc_gecco_edgedistance = 0 bgc_gecco_mask = false - run_gecco_convert = true - gecco_convert_mode = null - gecco_convert_format = null + run_gecco_convert = false + gecco_convert_mode = 'clusters' + gecco_convert_format = 'gff' bgc_run_hmmsearch = false bgc_hmmsearch_models = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 12070f00..b09ae7a0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -12,7 +12,7 @@ "description": "Parameters for running GECCO BGC prediction", "default": "", "properties": { - "gecco_convert_enable": { + "run_gecco_convert": { "type": "boolean", "default": false, "description": "Enable conversion of contig FASTA files to GECCO-compatible format.", @@ -20,7 +20,7 @@ }, "gecco_convert_mode": { "type": "string", - "default": "default", + "default": "clusters", "description": "Mode for converting contig FASTA files for GECCO.", "help_text": "Select the mode for converting contig FASTA files. 'default' uses standard conversion settings, while 'custom' allows for user-defined parameters.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--mode`" }, From 8e4f163873d1ab4447779066e00ce3e43c578258 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 4 Nov 2025 12:50:07 +0200 Subject: [PATCH 07/26] fixing warnings --- nextflow_schema.json | 59 ++++++++++++++++++++------------------- subworkflows/local/bgc.nf | 4 +-- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b09ae7a0..6ba0b037 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,34 +5,6 @@ "description": "Pipeline for screening for functional components of assembled contigs", "type": "object", "$defs": { - - "gecco_options": { - "title": "GECCO biosynthetic gene cluster prediction options", - "type": "object", - "description": "Parameters for running GECCO BGC prediction", - "default": "", - "properties": { - "run_gecco_convert": { - "type": "boolean", - "default": false, - "description": "Enable conversion of contig FASTA files to GECCO-compatible format.", - "help_text": "GECCO requires contig FASTA files to be in a specific format. Enabling this option will convert the input FASTA files to the required format before running GECCO.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--enable`" - }, - "gecco_convert_mode": { - "type": "string", - "default": "clusters", - "description": "Mode for converting contig FASTA files for GECCO.", - "help_text": "Select the mode for converting contig FASTA files. 'default' uses standard conversion settings, while 'custom' allows for user-defined parameters.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--mode`" - }, - "gecco_convert_format": { - "type": "string", - "default": "gff", - "description": "Output format for converted contig files.", - "help_text": "Specify the output format for the converted contig files. Options include 'fasta' and 'genbank'.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--format`" - } - }, - "fa_icon": "fas fa-angle-double-right" - }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -1446,6 +1418,34 @@ }, "fa_icon": "fas fa-angle-double-right" }, + + "gecco_options": { + "title": "GECCO biosynthetic gene cluster prediction options", + "type": "object", + "description": "Parameters for running GECCO BGC prediction", + "default": "", + "properties": { + "run_gecco_convert": { + "type": "boolean", + "default": false, + "description": "Enable conversion of contig FASTA files to GECCO-compatible format.", + "help_text": "GECCO requires contig FASTA files to be in a specific format. Enabling this option will convert the input FASTA files to the required format before running GECCO.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--enable`" + }, + "gecco_convert_mode": { + "type": "string", + "default": "clusters", + "description": "Mode for converting contig FASTA files for GECCO.", + "help_text": "Select the mode for converting contig FASTA files. 'default' uses standard conversion settings, while 'custom' allows for user-defined parameters.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--mode`" + }, + "gecco_convert_format": { + "type": "string", + "default": "gff", + "description": "Output format for converted contig files.", + "help_text": "Specify the output format for the converted contig files. Options include 'fasta' and 'genbank'.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--format`" + } + }, + "fa_icon": "fas fa-angle-double-right" + }, "bgc_hmmsearch": { "title": "BGC: hmmsearch", "type": "object", @@ -1731,6 +1731,9 @@ { "$ref": "#/$defs/bgc_gecco" }, + { + "$ref": "#/$defs/gecco_options" + }, { "$ref": "#/$defs/bgc_hmmsearch" }, diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 7f272145..856fb046 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -112,8 +112,8 @@ workflow BGC { .map { meta, clusters_file, gbk_file -> [ meta, clusters_file, gbk_file ] } - ch_gecco_mode = Channel.value( params.gecco_convert_mode ?: 'clusters' ) - ch_gecco_format = Channel.value( params.gecco_convert_format ?: 'gff' ) + ch_gecco_mode = Channel.value( params.gecco_convert_mode) + ch_gecco_format = Channel.value( params.gecco_convert_format) GECCO_CONVERT(ch_gecco_clusters_and_gbk, ch_gecco_mode, ch_gecco_format) } From 686b2ffdc5b24970ba394228168d6bf60fa4b237 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 11 Nov 2025 16:46:43 +0200 Subject: [PATCH 08/26] Update for the schema and the function calling --- nextflow_schema.json | 18 +++++++++--------- subworkflows/local/bgc.nf | 4 +--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6ba0b037..cab3efe4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1419,29 +1419,29 @@ "fa_icon": "fas fa-angle-double-right" }, - "gecco_options": { - "title": "GECCO biosynthetic gene cluster prediction options", + "bgc_gecco_convert": { + "title": "GECCO BGC conversion option", "type": "object", - "description": "Parameters for running GECCO BGC prediction", + "description": "Settings for converting GECCO output files.", "default": "", "properties": { "run_gecco_convert": { "type": "boolean", "default": false, - "description": "Enable conversion of contig FASTA files to GECCO-compatible format.", - "help_text": "GECCO requires contig FASTA files to be in a specific format. Enabling this option will convert the input FASTA files to the required format before running GECCO.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--enable`" + "description": "Enable GECCO file conversion.", + "help_text": "Converts GECCO output into formats like GFF3, GenBank, or FASTA for further analysis. Modifies tool parameter - gecco convert: `--run_gecco_convert true`" }, "gecco_convert_mode": { "type": "string", "default": "clusters", - "description": "Mode for converting contig FASTA files for GECCO.", - "help_text": "Select the mode for converting contig FASTA files. 'default' uses standard conversion settings, while 'custom' allows for user-defined parameters.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--mode`" + "description": "Select conversion mode.", + "help_text": "Either clusters or gbk folder output, depending on what is reformatted. Modifies tool parameter: - gecco convert: `--gecco_convert_mode `" }, "gecco_convert_format": { "type": "string", "default": "gff", - "description": "Output format for converted contig files.", - "help_text": "Specify the output format for the converted contig files. Options include 'fasta' and 'genbank'.\n\n> Modifies tool parameter(s):\n> - gecco convert: `--format`" + "description": "Set output format.", + "help_text": "Choose output format: 'gff', 'fasta', or 'genbank'. Modifies tool parameter: - gecco convert: `--gecco_convert_format`" } }, "fa_icon": "fas fa-angle-double-right" diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 856fb046..435ee608 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -112,10 +112,8 @@ workflow BGC { .map { meta, clusters_file, gbk_file -> [ meta, clusters_file, gbk_file ] } - ch_gecco_mode = Channel.value( params.gecco_convert_mode) - ch_gecco_format = Channel.value( params.gecco_convert_format) - GECCO_CONVERT(ch_gecco_clusters_and_gbk, ch_gecco_mode, ch_gecco_format) + GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.gecco_convert_mode, params.gecco_convert_format) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From ed55447f5e384b500036ae81f9bb4fe84d07653f Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Mon, 17 Nov 2025 16:26:07 +0200 Subject: [PATCH 09/26] updated output.md --- docs/output.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/output.md b/docs/output.md index 289d9086..86586603 100644 --- a/docs/output.md +++ b/docs/output.md @@ -466,6 +466,20 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation [GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). +#### GECCO CONVERT +
+Output files + +- `gecco/` + - `*.gff`: GFF3 converted cluster tables containing the position and metadata for all the predicted clusters + - `*.region*.gbk`: Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE + - `*.faa`: Amino-acid FASTA converted GenBank files of all the proteins in a cluster + - `*.fna`:Nucleotide sequence FASTA converted GenBank files from the cluster + +
+ +[GECCO CONVERT] (https://gecco.embl.de) is an option in gecco which does file conversion into formats like GFF3, GenBank, or FASTA for further analysis. + ### Summary tools [AMPcombi](#ampcombi), [hAMRonization](#hamronization), [comBGC](#combgc), [MultiQC](#multiqc), [pipeline information](#pipeline-information), [argNorm](#argnorm). From 242592dfbae542e82701ca476edcdbf446b220b3 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 18 Nov 2025 10:12:51 +0200 Subject: [PATCH 10/26] added tests --- modules/nf-core/gecco/convert/environment.yml | 2 +- modules/nf-core/gecco/convert/main.nf | 12 +- modules/nf-core/gecco/convert/meta.yml | 3 +- .../nf-core/gecco/convert/tests/main.nf.test | 118 ++++++++++++++++++ .../gecco/convert/tests/main.nf.test.snap | 112 +++++++++++++++++ 5 files changed, 239 insertions(+), 8 deletions(-) create mode 100644 modules/nf-core/gecco/convert/tests/main.nf.test create mode 100644 modules/nf-core/gecco/convert/tests/main.nf.test.snap diff --git a/modules/nf-core/gecco/convert/environment.yml b/modules/nf-core/gecco/convert/environment.yml index bb47bc85..7eefcd9e 100644 --- a/modules/nf-core/gecco/convert/environment.yml +++ b/modules/nf-core/gecco/convert/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::gecco=0.9.10 + - bioconda::gecco=0.10.0 diff --git a/modules/nf-core/gecco/convert/main.nf b/modules/nf-core/gecco/convert/main.nf index 2eb033af..f1022d5f 100644 --- a/modules/nf-core/gecco/convert/main.nf +++ b/modules/nf-core/gecco/convert/main.nf @@ -1,11 +1,11 @@ process GECCO_CONVERT { tag "$meta.id" - label 'process_single' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gecco:0.9.10--pyhdfd78af_0': - 'biocontainers/gecco:0.9.10--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gecco:0.10.0--pyhdfd78af_0': + 'biocontainers/gecco:0.10.0--pyhdfd78af_0' }" input: tuple val(meta), path(clusters), path(gbk) @@ -24,15 +24,15 @@ process GECCO_CONVERT { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" // IMPORTANT: -o ${prefix} does not work in 0.10.0 """ gecco \\ convert \\ $args \\ + --jobs $task.cpus \\ $mode \\ --input-dir ./ \\ - --format ${format} \\ - --output ${prefix} + --format ${format} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gecco/convert/meta.yml b/modules/nf-core/gecco/convert/meta.yml index 137d826c..bf58cb52 100644 --- a/modules/nf-core/gecco/convert/meta.yml +++ b/modules/nf-core/gecco/convert/meta.yml @@ -76,7 +76,8 @@ output: description: | Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE pattern: "*.region*.gbk" - ontologies: [] + ontologies: + - edam: "http://edamontology.org/format_1936" # GenBank faa: - - meta: type: map diff --git a/modules/nf-core/gecco/convert/tests/main.nf.test b/modules/nf-core/gecco/convert/tests/main.nf.test new file mode 100644 index 00000000..769a1a87 --- /dev/null +++ b/modules/nf-core/gecco/convert/tests/main.nf.test @@ -0,0 +1,118 @@ +nextflow_process { + + name "Test Process GECCO_CONVERT" + script "../main.nf" + process "GECCO_CONVERT" + + tag "modules" + tag "modules_nfcore" + tag "gecco" + tag "gecco/convert" + tag "gecco/run" + + setup { + run("GECCO_RUN") { + script "../../run/main.nf" + process { + """ + input[0] = [ + [ id:'test_gecco', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = [] + """ + } + } + } + + test("candidatus_portiera_aleyrodidarum - clusters - gff") { + + when { + process { + """ + input[0] = GECCO_RUN.out.clusters + .mix(GECCO_RUN.out.gbk) + .groupTuple(by:0) + .map { meta, paths -> + [meta, paths[0], paths[1]] + } + input[1] = "clusters" + input[2] = "gff" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.gff, + process.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + + } + + test("candidatus_portiera_aleyrodidarum - gbk - faa") { + + when { + process { + """ + input[0] = GECCO_RUN.out.clusters + .mix(GECCO_RUN.out.gbk) + .groupTuple(by:0) + .map { meta, paths -> + [meta, paths[0], paths[1]] + } + input[1] = "gbk" + input[2] = "faa" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.faa, + process.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + + } + + test("candidatus_portiera_aleyrodidarum - clusters - gff - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GECCO_RUN.out.clusters + .mix(GECCO_RUN.out.gbk) + .groupTuple(by:0) + .map { meta, paths -> + [meta, paths[0], paths[1]] + } + input[1] = "clusters" + input[2] = "gff" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + process.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gecco/convert/tests/main.nf.test.snap b/modules/nf-core/gecco/convert/tests/main.nf.test.snap new file mode 100644 index 00000000..96ce6a8c --- /dev/null +++ b/modules/nf-core/gecco/convert/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "candidatus_portiera_aleyrodidarum - gbk - faa": { + "content": [ + [ + [ + { + "id": "test_gecco", + "single_end": true + }, + "NC_018507.1_cluster_1.faa:md5,82c70d6273c21eadf2d16f5fcdcd5e7f" + ] + ], + [ + { + "GECCO_CONVERT": { + "gecco": "0.10.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-09T09:00:23.11547703" + }, + "candidatus_portiera_aleyrodidarum - clusters - gff": { + "content": [ + [ + [ + { + "id": "test_gecco", + "single_end": true + }, + "test_gecco.clusters.gff:md5,21437cce86b3880f2c4d41798563d0df" + ] + ], + [ + { + "GECCO_CONVERT": { + "gecco": "0.10.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-09T09:00:08.211056173" + }, + "candidatus_portiera_aleyrodidarum - clusters - gff - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_gecco", + "single_end": true + }, + "test_gecco.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,86c663d3d9423b28361d384e9146c0a7" + ], + "bigslice": [ + + ], + "faa": [ + + ], + "fna": [ + + ], + "gff": [ + [ + { + "id": "test_gecco", + "single_end": true + }, + "test_gecco.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,86c663d3d9423b28361d384e9146c0a7" + ] + }, + [ + { + "GECCO_CONVERT": { + "gecco": "0.10.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-11-09T08:28:53.159566209" + } +} \ No newline at end of file From a5d011998ae83243793eb2819e94af6e1f11482d Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Thu, 20 Nov 2025 00:32:16 +0200 Subject: [PATCH 11/26] fixed allOf section for gecco convert --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index cab3efe4..a87d66f3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1732,7 +1732,7 @@ "$ref": "#/$defs/bgc_gecco" }, { - "$ref": "#/$defs/gecco_options" + "$ref": "#/$defs/bgc_gecco_convert" }, { "$ref": "#/$defs/bgc_hmmsearch" From ecfdfd39ca8f20fdc7b2d64069b6bc2ce7a5bbe7 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Sun, 23 Nov 2025 14:35:32 +0200 Subject: [PATCH 12/26] redid the output to include gecco convert --- conf/modules.config | 2 +- docs/output.md | 17 ++++++----------- nextflow.config | 2 +- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a787f31e..9749213f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -536,7 +536,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } - + withName: HAMRONIZATION_ABRICATE { publishDir = [ path: { "${params.outdir}/arg/hamronization/abricate" }, diff --git a/docs/output.md b/docs/output.md index 86586603..c912078b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -451,32 +451,27 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation [deepBGC](https://github.com/Merck/deepbgc) detects BGCs in bacterial and fungal genomes using deep learning. DeepBGC employs a Bidirectional Long Short-Term Memory Recurrent Neural Network and a word2vec-like vector embedding of Pfam protein domains. Product class and activity of detected BGCs is predicted using a Random Forest classifier. -#### GECCO +#### GECCO & GECCO CONVERT
Output files - `gecco/` + - **GECCO** - `*.genes.tsv/`: TSV file containing detected/predicted genes with BGC probability scores - `*.features.tsv`: TSV file containing identified domains - `*.clusters.tsv`: TSV file containing coordinates of predicted clusters and BGC types - `*_cluster_*.gbk`: GenBank file (if clusters were found) containing sequence with annotations; one file per GECCO hit -
- -[GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). - -#### GECCO CONVERT -
-Output files - -- `gecco/` + - **GECCO CONVERT** - `*.gff`: GFF3 converted cluster tables containing the position and metadata for all the predicted clusters - `*.region*.gbk`: Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE - `*.faa`: Amino-acid FASTA converted GenBank files of all the proteins in a cluster - `*.fna`:Nucleotide sequence FASTA converted GenBank files from the cluster + **ONLY IF --run_gecco_convert** +
- +[GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). [GECCO CONVERT] (https://gecco.embl.de) is an option in gecco which does file conversion into formats like GFF3, GenBank, or FASTA for further analysis. diff --git a/nextflow.config b/nextflow.config index a65978ec..28d5e7ec 100644 --- a/nextflow.config +++ b/nextflow.config @@ -261,7 +261,7 @@ params { bgc_hmmsearch_savedomains = false // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines From 81b1160da1700501086de12b4674216b59167e04 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 25 Nov 2025 10:40:57 +0200 Subject: [PATCH 13/26] updated changelog.md and readme.md --- CHANGELOG.md | 1 + README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fea0044..c7f86ea8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#500](https://github.com/nf-core/funcscan/pull/500) Updated pipeline template to nf-core/tools version 3.4.1 (by @jfy133) +- [#506](https://github.com/nf-core/funcscan/pull/506) Gecco convert (by @SkyLexS) ### `Fixed` diff --git a/README.md b/README.md index 38667c2c..32fcd56e 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ nf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Lou We thank the following people for their extensive assistance in the development of this pipeline: -Adam Talbot, Alexandru Mizeranschi, Hugo Tavares, Júlia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion. +Adam Talbot, Alexandru Mizeranschi, Hugo Tavares, Júlia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion, Dediu Octavian-Codrin. ## Contributions and Support From 43a2964538580bb755e8fde956a38a6030f88cb0 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 25 Nov 2025 20:40:22 +0200 Subject: [PATCH 14/26] implemented the changes --- CHANGELOG.md | 2 +- conf/modules.config | 18 ++++++++++++++++++ docs/output.md | 12 ++++++------ nextflow.config | 6 +++--- nextflow_schema.json | 18 ++++++++++-------- subworkflows/local/bgc.nf | 5 +++-- 6 files changed, 41 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7f86ea8..68d5148f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#500](https://github.com/nf-core/funcscan/pull/500) Updated pipeline template to nf-core/tools version 3.4.1 (by @jfy133) -- [#506](https://github.com/nf-core/funcscan/pull/506) Gecco convert (by @SkyLexS) +- [#506](https://github.com/nf-core/funcscan/pull/506) Added support GECCO convert for generation of additional files useful for downstream analysis (by @SkyLexS) ### `Fixed` diff --git a/conf/modules.config b/conf/modules.config index 9749213f..88b85b91 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -486,6 +486,24 @@ process { ] } + withName 'BIGSLICE_RUN'{ + publishDir = [ + path : { "${params.outdir}/bigslice"}, + mode : copy, + ] + } + + withName 'BIGSLICE_PREP_INPUT'{ + publishDir = [ + path : { "${params.outdir}/bigslice"}, + mode : copy, + ] + } + + withLabel: bigslice{ + container:'quay.io/biocontainers/bigslice:2.0.2--pyh8ed023e_0' + } + withName: DEEPBGC_DOWNLOAD { publishDir = [ path: { "${params.outdir}/databases/deepbgc" }, diff --git a/docs/output.md b/docs/output.md index c912078b..eba857fc 100644 --- a/docs/output.md +++ b/docs/output.md @@ -451,7 +451,7 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation [deepBGC](https://github.com/Merck/deepbgc) detects BGCs in bacterial and fungal genomes using deep learning. DeepBGC employs a Bidirectional Long Short-Term Memory Recurrent Neural Network and a word2vec-like vector embedding of Pfam protein domains. Product class and activity of detected BGCs is predicted using a Random Forest classifier. -#### GECCO & GECCO CONVERT +#### GECCO
Output files @@ -464,16 +464,16 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation - `*_cluster_*.gbk`: GenBank file (if clusters were found) containing sequence with annotations; one file per GECCO hit - **GECCO CONVERT** - - `*.gff`: GFF3 converted cluster tables containing the position and metadata for all the predicted clusters - - `*.region*.gbk`: Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE - - `*.faa`: Amino-acid FASTA converted GenBank files of all the proteins in a cluster - - `*.fna`:Nucleotide sequence FASTA converted GenBank files from the cluster - **ONLY IF --run_gecco_convert** + - `*.gff`: GFF3 converted cluster tables containing the position and metadata for all the predicted clusters (only if `--bgc_gecco_runconvert`) + - `*.region*.gbk`: Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE (only if `--bgc_gecco_runconvert`) + - `*.faa`: Amino-acid FASTA converted GenBank files of all the proteins in a cluster (only if `--bgc_gecco_runconvert`) + - `*.fna`:Nucleotide sequence FASTA converted GenBank files from the cluster (only if `--bgc_gecco_runconvert`)
[GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). [GECCO CONVERT] (https://gecco.embl.de) is an option in gecco which does file conversion into formats like GFF3, GenBank, or FASTA for further analysis. +The additional GFF3, GenBank, or FASTA files from `--bgc_gecco_runconvert`, can be useful for additional further analysis of the BGC hits. ### Summary tools diff --git a/nextflow.config b/nextflow.config index 28d5e7ec..06953321 100644 --- a/nextflow.config +++ b/nextflow.config @@ -250,9 +250,9 @@ params { bgc_gecco_pfilter = 1E-9 bgc_gecco_edgedistance = 0 bgc_gecco_mask = false - run_gecco_convert = false - gecco_convert_mode = 'clusters' - gecco_convert_format = 'gff' + bgc_gecco_runconvert = false + bgc_gecco_convertmode = 'clusters' + bgc_gecco_convertformat = 'gff' bgc_run_hmmsearch = false bgc_hmmsearch_models = null diff --git a/nextflow_schema.json b/nextflow_schema.json index a87d66f3..6b005ca0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1425,23 +1425,25 @@ "description": "Settings for converting GECCO output files.", "default": "", "properties": { - "run_gecco_convert": { + "bgc_gecco_runconvert": { "type": "boolean", "default": false, "description": "Enable GECCO file conversion.", - "help_text": "Converts GECCO output into formats like GFF3, GenBank, or FASTA for further analysis. Modifies tool parameter - gecco convert: `--run_gecco_convert true`" + "help_text": "Converts GECCO output into formats like GFF3, GenBank, or FASTA for further analysis. Modifies tool parameter - gecco convert: `--bgc_gecco_runconvert true`" }, - "gecco_convert_mode": { + "bgc_gecco_convertmode": { "type": "string", "default": "clusters", - "description": "Select conversion mode.", - "help_text": "Either clusters or gbk folder output, depending on what is reformatted. Modifies tool parameter: - gecco convert: `--gecco_convert_mode `" + "enum": ["clusters", "gbk"], + "description": "Specify conversion mode for GECCO convert.", + "help_text": "Either clusters or gbk folder output, depending on what is reformatted. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertmode `" }, - "gecco_convert_format": { + "bgc_gecco_convertformat": { "type": "string", "default": "gff", - "description": "Set output format.", - "help_text": "Choose output format: 'gff', 'fasta', or 'genbank'. Modifies tool parameter: - gecco convert: `--gecco_convert_format`" + "enum": ["gff", "gbk", "fna", "faa"], + "description": "Specify output format for GECCO convert.", + "help_text": "Choose output format: 'gff', 'fasta', or 'genbank'. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertformat`" } }, "fa_icon": "fas fa-angle-double-right" diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 435ee608..7e2d41a5 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -66,6 +66,7 @@ workflow BGC { } ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix(ch_antismashresults_for_combgc) + } // DEEPBGC @@ -106,14 +107,14 @@ workflow BGC { } // GECCO CONVERT - if (params.run_gecco_convert) { + if (params.bgc_gecco_runconvert) { ch_gecco_clusters_and_gbk = GECCO_RUN.out.clusters .join(GECCO_RUN.out.gbk) .map { meta, clusters_file, gbk_file -> [ meta, clusters_file, gbk_file ] } - GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.gecco_convert_mode, params.gecco_convert_format) + GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.bgc_gecco_convertmode, params.bgc_gecco_convertformat) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From 2e6288833ec420c62b10b811e466db44a2d80425 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 25 Nov 2025 20:52:33 +0200 Subject: [PATCH 15/26] removing the unwanted code in the modules config --- conf/modules.config | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 88b85b91..d1236167 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -486,23 +486,6 @@ process { ] } - withName 'BIGSLICE_RUN'{ - publishDir = [ - path : { "${params.outdir}/bigslice"}, - mode : copy, - ] - } - - withName 'BIGSLICE_PREP_INPUT'{ - publishDir = [ - path : { "${params.outdir}/bigslice"}, - mode : copy, - ] - } - - withLabel: bigslice{ - container:'quay.io/biocontainers/bigslice:2.0.2--pyh8ed023e_0' - } withName: DEEPBGC_DOWNLOAD { publishDir = [ From c9244b78aafec0d9e02659beea85fc8a6d417c59 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Tue, 25 Nov 2025 20:55:41 +0200 Subject: [PATCH 16/26] fixing lint --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index eba857fc..e665cc19 100644 --- a/docs/output.md +++ b/docs/output.md @@ -473,7 +473,7 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation [GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). [GECCO CONVERT] (https://gecco.embl.de) is an option in gecco which does file conversion into formats like GFF3, GenBank, or FASTA for further analysis. -The additional GFF3, GenBank, or FASTA files from `--bgc_gecco_runconvert`, can be useful for additional further analysis of the BGC hits. +The additional GFF3, GenBank, or FASTA files from `--bgc_gecco_runconvert`, can be useful for additional further analysis of the BGC hits. ### Summary tools From 000326b3484196537330bbf748ec366682ccead4 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Mon, 1 Dec 2025 11:51:55 +0200 Subject: [PATCH 17/26] removed unwanted comment --- docs/output.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index e665cc19..5fb8a9e7 100644 --- a/docs/output.md +++ b/docs/output.md @@ -472,7 +472,6 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation [GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). -[GECCO CONVERT] (https://gecco.embl.de) is an option in gecco which does file conversion into formats like GFF3, GenBank, or FASTA for further analysis. The additional GFF3, GenBank, or FASTA files from `--bgc_gecco_runconvert`, can be useful for additional further analysis of the BGC hits. ### Summary tools From d6a9dcb8aae7a568729d74de48ca045c5c320b59 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Thu, 4 Dec 2025 08:44:05 +0200 Subject: [PATCH 18/26] Implemented schema modification --- nextflow_schema.json | 153 +++++++++++++++++++++++++++++-------------- 1 file changed, 105 insertions(+), 48 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6b005ca0..3250bf09 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -85,7 +88,9 @@ "help_text": "This flag specifies which tool for taxonomic classification should be activated. At the moment only 'MMseqs2' is incorporated in the pipeline.", "description": "Specifies the tool used for taxonomic classification.", "fa_icon": "fas fa-tools", - "enum": ["mmseqs2"] + "enum": [ + "mmseqs2" + ] }, "taxa_classification_mmseqs_compressed": { "type": "boolean", @@ -159,14 +164,14 @@ }, "taxa_classification_mmseqs_taxonomy_sensitivity": { "type": "number", - "default": 5.0, + "default": 5, "help_text": "This flag specifies the speed and sensitivity of the taxonomic search. It stands for how many kmers should be produced during the preliminary seeding stage. A very fast search requires a low value e.g. '1.0' and a a very sensitive search requires e.g. '7.0'. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `-s`", "description": "Specify the speed and sensitivity for taxonomy assignment.", "fa_icon": "fas fa-history" }, "taxa_classification_mmseqs_taxonomy_orffilters": { "type": "number", - "default": 2.0, + "default": 2, "help_text": "This flag specifies the sensitivity used for prefiltering the query ORF. Before the taxonomy-assigning step, MMseqs2 searches the predicted ORFs against the provided database. This value influences the speed with which the search is carried out. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--orf-filter-s`", "description": "Specify the ORF search sensitivity in the prefilter step.", "fa_icon": "fas fa-history" @@ -198,7 +203,12 @@ "type": "string", "default": "pyrodigal", "description": "Specify which annotation tool to use for some downstream tools.", - "enum": ["prodigal", "pyrodigal", "prokka", "bakta"], + "enum": [ + "prodigal", + "pyrodigal", + "prokka", + "bakta" + ], "fa_icon": "fas fa-edit" }, "save_annotations": { @@ -227,7 +237,10 @@ "description": "Download full or light version of the Bakta database if not supplying own database.", "help_text": "If you want the pipeline to download the Bakta database for you, you can choose between the full (33.1 GB) and light (1.3 GB) version. The full version is generally recommended for best annotation results, because it contains all of these:\n\n- UPS: unique protein sequences identified via length and MD5 hash digests (100% coverage & 100% sequence identity)\n- IPS: identical protein sequences comprising seeds of UniProt's UniRef100 protein sequence clusters\n- PSC: protein sequences clusters comprising seeds of UniProt's UniRef90 protein sequence clusters\n- PSCC: protein sequences clusters of clusters comprising annotations of UniProt's UniRef50 protein sequence clusters\n\nIf download bandwidth, storage, memory, or run duration requirements become an issue, go for the light version (which only contains PSCCs) by modifying the `annotation_bakta_db_downloadtype` flag.\n\nMore details can be found in the [documentation](https://github.com/oschwengers/bakta#database)\n\n> Modifies tool parameter(s):\n> - BAKTA_DBDOWNLOAD: `--type`", "fa_icon": "fas fa-database", - "enum": ["full", "light"], + "enum": [ + "full", + "light" + ], "default": "full" }, "annotation_bakta_singlemode": { @@ -256,7 +269,11 @@ "annotation_bakta_gram": { "type": "string", "default": "?", - "enum": ["+", "-", "?"], + "enum": [ + "+", + "-", + "?" + ], "description": "Specify the type of bacteria to be annotated to detect signaling peptides.", "help_text": "Specify the type of bacteria expected in the input dataset for correct annotation of the signal peptide predictions. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--gram`", "fa_icon": "far fa-plus-square" @@ -383,8 +400,13 @@ "default": "Bacteria", "fa_icon": "fas fa-crown", "description": "Specify the kingdom that the input represents.", - "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> \u26a0\ufe0f Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", - "enum": ["Archaea", "Bacteria", "Mitochondria", "Viruses"] + "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> ⚠️ Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", + "enum": [ + "Archaea", + "Bacteria", + "Mitochondria", + "Viruses" + ] }, "annotation_prokka_gcode": { "type": "integer", @@ -399,12 +421,12 @@ "type": "integer", "default": 1, "description": "Minimum contig size required for annotation (bp).", - "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be \u2265 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`", + "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be ≥ 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`", "fa_icon": "fas fa-ruler-horizontal" }, "annotation_prokka_evalue": { "type": "number", - "default": 1e-6, + "default": 0.000001, "description": "E-value cut-off.", "help_text": "Specifiy the maximum E-value used for filtering the alignment hits.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--evalue`", "fa_icon": "fas fa-sort-amount-down" @@ -543,7 +565,9 @@ "help_text": "This flag specifies which tool for protein annotation should be activated.\nAt the moment only [InterProScan](https://github.com/ebi-pf-team/interproscan) is incorporated in the pipeline. This annotates the locus tags to protein and domain levels according to the InterPro databases.\n\nMore details can be found in the tool [documentation](https://interproscan-docs.readthedocs.io/en/latest/index.html).", "description": "Specifies the tool used for further protein annotation.", "fa_icon": "fas fa-tools", - "enum": ["InterProScan"] + "enum": [ + "InterProScan" + ] }, "protein_annotation_interproscan_db_url": { "type": "string", @@ -621,7 +645,10 @@ "default": "precursor", "description": "Specify which machine learning classification model to use.", "help_text": "Ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, \"precursor\" and \"mature\". \n\nThe precursor module is better for predicted proteins from a translated transcriptome or translated gene models. The alternative model (mature) is best suited for AMP sequences after post-translational processing, typically from direct proteomic sequencing.\n\nMore information can be found in the ampir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir: `model =`", - "enum": ["precursor", "mature"], + "enum": [ + "precursor", + "mature" + ], "fa_icon": "fas fa-layer-group" }, "amp_ampir_minlength": { @@ -700,12 +727,16 @@ "help_text": "AMPcombi can use three different AMP databases to classify the recovered AMPS. These can either be: \n\n- [DRAMP database](http://dramp.cpu-bioinfor.org/downloads/): Only general AMPs are downloaded and filtered to remove any entry that has an instance of non amino acid residues in their sequence.\n\n- [APD](https://aps.unmc.edu/): Only experimentally validated AMPs are present.\n\n- [UniRef100](https://academic.oup.com/bioinformatics/article/23/10/1282/197795): Combines a more general protein dataset including curated and non curated AMPs. Helpful for identifying the clusters to remove any potential false positives. Beware: If the thresholds are for ampcombi are not strict enough, alignment with this database can take a long time. \n\nBy default this is set to 'DRAMP'. Other valid options include 'APD' or 'UniRef100'.\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables).", "fa_icon": "fas fa-address-book", "default": "DRAMP", - "enum": ["DRAMP", "APD", "UniRef100"] + "enum": [ + "DRAMP", + "APD", + "UniRef100" + ] }, "amp_ampcombi_db": { "type": "string", "description": "The path to the folder containing the reference database files.", - "help_text": "The path to the folder containing the reference database files (`*.fasta` and `*.tsv`); a fasta file and the corresponding table with structural, functional and if reported taxonomic classifications. AMPcombi will then generate the corresponding `mmseqs2` directory, in which all binary files are prepared for the downstream alignment of the recovered AMPs with [MMseqs2](https://github.com/soedinglab/MMseqs2). These can also be provided by the user by setting up an mmseqs2 compatible database using `mmseqs createdb *.fasta` in a directory called `mmseqs2`.\n\nExample file structure for the reference database supplied by the user:\n\n```bash\namp_DRAMP_database/\n\u251c\u2500\u2500 general_amps_2024_11_13.fasta\n\u251c\u2500\u2500 general_amps_2024_11_13.txt\n\u2514\u2500\u2500 mmseqs2\n \u251c\u2500\u2500 ref_DB\n \u251c\u2500\u2500 ref_DB.dbtype\n \u251c\u2500\u2500 ref_DB_h\n \u251c\u2500\u2500 ref_DB_h.dbtype\n \u251c\u2500\u2500 ref_DB_h.index\n \u251c\u2500\u2500 ref_DB.index\n \u251c\u2500\u2500 ref_DB.lookup\n \u2514\u2500\u2500 ref_DB.source```\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables)." + "help_text": "The path to the folder containing the reference database files (`*.fasta` and `*.tsv`); a fasta file and the corresponding table with structural, functional and if reported taxonomic classifications. AMPcombi will then generate the corresponding `mmseqs2` directory, in which all binary files are prepared for the downstream alignment of the recovered AMPs with [MMseqs2](https://github.com/soedinglab/MMseqs2). These can also be provided by the user by setting up an mmseqs2 compatible database using `mmseqs createdb *.fasta` in a directory called `mmseqs2`.\n\nExample file structure for the reference database supplied by the user:\n\n```bash\namp_DRAMP_database/\n├── general_amps_2024_11_13.fasta\n├── general_amps_2024_11_13.txt\n└── mmseqs2\n ├── ref_DB\n ├── ref_DB.dbtype\n ├── ref_DB_h\n ├── ref_DB_h.dbtype\n ├── ref_DB_h.index\n ├── ref_DB.index\n ├── ref_DB.lookup\n └── ref_DB.source```\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables)." }, "amp_ampcombi_parsetables_cutoff": { "type": "number", @@ -723,7 +754,7 @@ }, "amp_ampcombi_parsetables_dbevalue": { "type": "number", - "default": 5.0, + "default": 5, "description": "Remove all DRAMP annotations that have an e-value greater than this value.", "help_text": "This e-value is used as a cut-off for the annotations from the internal Diamond alignment step (against the DRAMP database by default). Any e-value below this value will only remove the DRAMP classification and not the entire hit.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--db_evalue`", "fa_icon": "fas fa-sort-numeric-down" @@ -794,14 +825,14 @@ "properties": { "amp_ampcombi_cluster_covmode": { "type": "number", - "default": 0.0, + "default": 0, "description": "MMseqs2 coverage mode.", "help_text": "This assigns the coverage mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More details can be found in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_cov_mode`", "fa_icon": "far fa-circle" }, "amp_ampcombi_cluster_sensitivity": { "type": "number", - "default": 4.0, + "default": 4, "description": "Remove hits that have no stop codon upstream and downstream of the AMP.", "help_text": "This assigns the sensitivity of alignment to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_sensitivity`", "fa_icon": "fas fa-arrows-alt-h" @@ -815,7 +846,7 @@ }, "amp_ampcombi_cluster_mode": { "type": "number", - "default": 1.0, + "default": 1, "description": "MMseqs2 clustering mode.", "help_text": "This assigns the cluster mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_mode`", "fa_icon": "fas fa-circle" @@ -863,7 +894,7 @@ }, "arg_amrfinderplus_identmin": { "type": "number", - "default": -1.0, + "default": -1, "help_text": "Specify the minimum percentage amino-acid identity to reference protein or nucleotide identity for nucleotide reference must have if a BLAST alignment (based on methods: BLAST or PARTIAL) was detected, otherwise NA.\n\n If you specify `-1`, this means use a curated threshold if it exists and `0.9` otherwise.\n\nSetting this value to something other than `-1` will override any curated similarity cutoffs. For BLAST: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database. For PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--ident_min`", "description": "Minimum percent identity to reference sequence.", "fa_icon": "fas fa-angle-left" @@ -927,7 +958,10 @@ "arg_deeparg_model": { "type": "string", "default": "LS", - "enum": ["LS", "SS"], + "enum": [ + "LS", + "SS" + ], "description": "Specify which model to use (short or long sequences).", "help_text": "Specify which model to use: short sequences for reads (`SS`), or long sequences for genes (`LS`). In the vast majority of cases we recommend using the `LS` model when using funcscan\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--model`", "fa_icon": "fas fa-layer-group" @@ -1060,19 +1094,22 @@ "default": "BLAST", "description": "Specify the alignment tool to be used.", "help_text": "Specifies the alignment tool to be used. By default RGI runs BLAST and this is also set as default in the nf-core/funcscan pipeline. With this flag the user can choose between BLAST and DIAMOND for the alignment step.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--alignment_tool`", - "enum": ["BLAST", "DIAMOND"], + "enum": [ + "BLAST", + "DIAMOND" + ], "fa_icon": "fas fa-align-justify" }, "arg_rgi_includeloose": { "type": "boolean", - "description": "Include all of loose, strict and perfect hits (i.e. \u2265 95% identity) found by RGI.", + "description": "Include all of loose, strict and perfect hits (i.e. ≥ 95% identity) found by RGI.", "help_text": "When activated RGI output will include 'Loose' hits in addition to 'Strict' and 'Perfect' hits. The 'Loose' algorithm works outside of the detection model cut-offs to provide detection of new, emergent threats and more distant homologs of AMR genes, but will also catalog homologous sequences and spurious partial matches that may not have a role in AMR.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_loose`", "fa_icon": "far fa-hand-scissors" }, "arg_rgi_includenudge": { "type": "boolean", "description": "Suppresses the default behaviour of RGI with `--arg_rgi_includeloose`.", - "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of \u2265 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`", + "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of ≥ 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`", "fa_icon": "fas fa-hand-scissors" }, "arg_rgi_lowquality": { @@ -1086,7 +1123,12 @@ "default": "NA", "description": "Specify a more specific data-type of input (e.g. plasmid, chromosome).", "help_text": "This flag is used to specify the data type used as input to RGI. By default this is set as 'NA', which makes no assumptions on input data.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--data`", - "enum": ["NA", "wgs", "plasmid", "chromosome"], + "enum": [ + "NA", + "wgs", + "plasmid", + "chromosome" + ], "fa_icon": "fas fa-dna" }, "arg_rgi_split_prodigal_jobs": { @@ -1153,7 +1195,11 @@ "arg_hamronization_summarizeformat": { "type": "string", "default": "tsv", - "enum": ["interactive", "tsv", "json"], + "enum": [ + "interactive", + "tsv", + "json" + ], "help_text": "Specifies which summary report format to apply with `hamronize summarize`: tsv, json or interactive (html)\n\n> Modifies tool parameter(s)\n> - hamronize summarize: `-t`, `--summary_type`", "description": "Specifies summary output format.", "fa_icon": "far fa-file-code" @@ -1257,7 +1303,11 @@ "description": "Defines which level of strictness to use for HMM-based cluster detection.", "help_text": "Levels of strictness correspond to screening different groups of 'how well-defined' clusters are. For example, `loose` will include screening for 'poorly defined' clusters (e.g. saccharides), `relaxed` for partially present clusters (e.g. certain types of NRPS), whereas `strict` will screen for well-defined clusters such as Ketosynthases.\n\nYou can see the rules for the levels of strictness [here](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/cluster_rules).\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--hmmdetection-strictness`", "fa_icon": "fas fa-search", - "enum": ["relaxed", "strict", "loose"] + "enum": [ + "relaxed", + "strict", + "loose" + ] }, "bgc_antismash_pfam2go": { "type": "boolean", @@ -1277,7 +1327,10 @@ "description": "Specify which taxonomic classification of input sequence to use.", "help_text": "This specifies which set of secondary metabolites to screen for, based on the taxon type the secondary metabolites are from.\n\nThis will run different pipelines depending on whether the input sequences are from bacteria or fungi.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--taxon`", "fa_icon": "fas fa-bacteria", - "enum": ["bacteria", "fungi"] + "enum": [ + "bacteria", + "fungi" + ] }, "bgc_antismash_tfbs": { "type": "boolean", @@ -1414,34 +1467,31 @@ "description": "The minimum number of annotated genes that must separate a cluster from the edge.", "help_text": "The minimum number of annotated genes that must separate a possible BGC cluster from the edge. Edge clusters will still be included if they are longer. A lower number will increase the number of false positives on small contigs. Used during BGC extraction.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--edge-distance`", "fa_icon": "fas fa-ruler-horizontal" - } - }, - "fa_icon": "fas fa-angle-double-right" - }, - - "bgc_gecco_convert": { - "title": "GECCO BGC conversion option", - "type": "object", - "description": "Settings for converting GECCO output files.", - "default": "", - "properties": { + }, "bgc_gecco_runconvert": { "type": "boolean", - "default": false, "description": "Enable GECCO file conversion.", "help_text": "Converts GECCO output into formats like GFF3, GenBank, or FASTA for further analysis. Modifies tool parameter - gecco convert: `--bgc_gecco_runconvert true`" }, "bgc_gecco_convertmode": { "type": "string", "default": "clusters", - "enum": ["clusters", "gbk"], + "enum": [ + "clusters", + "gbk" + ], "description": "Specify conversion mode for GECCO convert.", "help_text": "Either clusters or gbk folder output, depending on what is reformatted. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertmode `" }, "bgc_gecco_convertformat": { "type": "string", "default": "gff", - "enum": ["gff", "gbk", "fna", "faa"], + "enum": [ + "gff", + "gbk", + "fna", + "faa" + ], "description": "Specify output format for GECCO convert.", "help_text": "Choose output format: 'gff', 'fasta', or 'genbank'. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertformat`" } @@ -1554,7 +1604,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -1631,7 +1688,10 @@ "hidden": true }, "help": { - "type": ["boolean", "string"], + "type": [ + "boolean", + "string" + ], "description": "Display the help message." }, "help_full": { @@ -1733,9 +1793,6 @@ { "$ref": "#/$defs/bgc_gecco" }, - { - "$ref": "#/$defs/bgc_gecco_convert" - }, { "$ref": "#/$defs/bgc_hmmsearch" }, @@ -1746,4 +1803,4 @@ "$ref": "#/$defs/generic_options" } ] -} +} \ No newline at end of file From 8104d87b88eba2a79bcba10ed6f4a87d8bca834b Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Thu, 4 Dec 2025 08:50:02 +0200 Subject: [PATCH 19/26] Linting --- nextflow_schema.json | 111 ++++++++----------------------------------- 1 file changed, 20 insertions(+), 91 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 3250bf09..36ad075a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -88,9 +85,7 @@ "help_text": "This flag specifies which tool for taxonomic classification should be activated. At the moment only 'MMseqs2' is incorporated in the pipeline.", "description": "Specifies the tool used for taxonomic classification.", "fa_icon": "fas fa-tools", - "enum": [ - "mmseqs2" - ] + "enum": ["mmseqs2"] }, "taxa_classification_mmseqs_compressed": { "type": "boolean", @@ -203,12 +198,7 @@ "type": "string", "default": "pyrodigal", "description": "Specify which annotation tool to use for some downstream tools.", - "enum": [ - "prodigal", - "pyrodigal", - "prokka", - "bakta" - ], + "enum": ["prodigal", "pyrodigal", "prokka", "bakta"], "fa_icon": "fas fa-edit" }, "save_annotations": { @@ -237,10 +227,7 @@ "description": "Download full or light version of the Bakta database if not supplying own database.", "help_text": "If you want the pipeline to download the Bakta database for you, you can choose between the full (33.1 GB) and light (1.3 GB) version. The full version is generally recommended for best annotation results, because it contains all of these:\n\n- UPS: unique protein sequences identified via length and MD5 hash digests (100% coverage & 100% sequence identity)\n- IPS: identical protein sequences comprising seeds of UniProt's UniRef100 protein sequence clusters\n- PSC: protein sequences clusters comprising seeds of UniProt's UniRef90 protein sequence clusters\n- PSCC: protein sequences clusters of clusters comprising annotations of UniProt's UniRef50 protein sequence clusters\n\nIf download bandwidth, storage, memory, or run duration requirements become an issue, go for the light version (which only contains PSCCs) by modifying the `annotation_bakta_db_downloadtype` flag.\n\nMore details can be found in the [documentation](https://github.com/oschwengers/bakta#database)\n\n> Modifies tool parameter(s):\n> - BAKTA_DBDOWNLOAD: `--type`", "fa_icon": "fas fa-database", - "enum": [ - "full", - "light" - ], + "enum": ["full", "light"], "default": "full" }, "annotation_bakta_singlemode": { @@ -269,11 +256,7 @@ "annotation_bakta_gram": { "type": "string", "default": "?", - "enum": [ - "+", - "-", - "?" - ], + "enum": ["+", "-", "?"], "description": "Specify the type of bacteria to be annotated to detect signaling peptides.", "help_text": "Specify the type of bacteria expected in the input dataset for correct annotation of the signal peptide predictions. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--gram`", "fa_icon": "far fa-plus-square" @@ -401,12 +384,7 @@ "fa_icon": "fas fa-crown", "description": "Specify the kingdom that the input represents.", "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> ⚠️ Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", - "enum": [ - "Archaea", - "Bacteria", - "Mitochondria", - "Viruses" - ] + "enum": ["Archaea", "Bacteria", "Mitochondria", "Viruses"] }, "annotation_prokka_gcode": { "type": "integer", @@ -565,9 +543,7 @@ "help_text": "This flag specifies which tool for protein annotation should be activated.\nAt the moment only [InterProScan](https://github.com/ebi-pf-team/interproscan) is incorporated in the pipeline. This annotates the locus tags to protein and domain levels according to the InterPro databases.\n\nMore details can be found in the tool [documentation](https://interproscan-docs.readthedocs.io/en/latest/index.html).", "description": "Specifies the tool used for further protein annotation.", "fa_icon": "fas fa-tools", - "enum": [ - "InterProScan" - ] + "enum": ["InterProScan"] }, "protein_annotation_interproscan_db_url": { "type": "string", @@ -645,10 +621,7 @@ "default": "precursor", "description": "Specify which machine learning classification model to use.", "help_text": "Ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, \"precursor\" and \"mature\". \n\nThe precursor module is better for predicted proteins from a translated transcriptome or translated gene models. The alternative model (mature) is best suited for AMP sequences after post-translational processing, typically from direct proteomic sequencing.\n\nMore information can be found in the ampir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir: `model =`", - "enum": [ - "precursor", - "mature" - ], + "enum": ["precursor", "mature"], "fa_icon": "fas fa-layer-group" }, "amp_ampir_minlength": { @@ -727,11 +700,7 @@ "help_text": "AMPcombi can use three different AMP databases to classify the recovered AMPS. These can either be: \n\n- [DRAMP database](http://dramp.cpu-bioinfor.org/downloads/): Only general AMPs are downloaded and filtered to remove any entry that has an instance of non amino acid residues in their sequence.\n\n- [APD](https://aps.unmc.edu/): Only experimentally validated AMPs are present.\n\n- [UniRef100](https://academic.oup.com/bioinformatics/article/23/10/1282/197795): Combines a more general protein dataset including curated and non curated AMPs. Helpful for identifying the clusters to remove any potential false positives. Beware: If the thresholds are for ampcombi are not strict enough, alignment with this database can take a long time. \n\nBy default this is set to 'DRAMP'. Other valid options include 'APD' or 'UniRef100'.\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables).", "fa_icon": "fas fa-address-book", "default": "DRAMP", - "enum": [ - "DRAMP", - "APD", - "UniRef100" - ] + "enum": ["DRAMP", "APD", "UniRef100"] }, "amp_ampcombi_db": { "type": "string", @@ -958,10 +927,7 @@ "arg_deeparg_model": { "type": "string", "default": "LS", - "enum": [ - "LS", - "SS" - ], + "enum": ["LS", "SS"], "description": "Specify which model to use (short or long sequences).", "help_text": "Specify which model to use: short sequences for reads (`SS`), or long sequences for genes (`LS`). In the vast majority of cases we recommend using the `LS` model when using funcscan\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--model`", "fa_icon": "fas fa-layer-group" @@ -1094,10 +1060,7 @@ "default": "BLAST", "description": "Specify the alignment tool to be used.", "help_text": "Specifies the alignment tool to be used. By default RGI runs BLAST and this is also set as default in the nf-core/funcscan pipeline. With this flag the user can choose between BLAST and DIAMOND for the alignment step.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--alignment_tool`", - "enum": [ - "BLAST", - "DIAMOND" - ], + "enum": ["BLAST", "DIAMOND"], "fa_icon": "fas fa-align-justify" }, "arg_rgi_includeloose": { @@ -1123,12 +1086,7 @@ "default": "NA", "description": "Specify a more specific data-type of input (e.g. plasmid, chromosome).", "help_text": "This flag is used to specify the data type used as input to RGI. By default this is set as 'NA', which makes no assumptions on input data.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--data`", - "enum": [ - "NA", - "wgs", - "plasmid", - "chromosome" - ], + "enum": ["NA", "wgs", "plasmid", "chromosome"], "fa_icon": "fas fa-dna" }, "arg_rgi_split_prodigal_jobs": { @@ -1195,11 +1153,7 @@ "arg_hamronization_summarizeformat": { "type": "string", "default": "tsv", - "enum": [ - "interactive", - "tsv", - "json" - ], + "enum": ["interactive", "tsv", "json"], "help_text": "Specifies which summary report format to apply with `hamronize summarize`: tsv, json or interactive (html)\n\n> Modifies tool parameter(s)\n> - hamronize summarize: `-t`, `--summary_type`", "description": "Specifies summary output format.", "fa_icon": "far fa-file-code" @@ -1303,11 +1257,7 @@ "description": "Defines which level of strictness to use for HMM-based cluster detection.", "help_text": "Levels of strictness correspond to screening different groups of 'how well-defined' clusters are. For example, `loose` will include screening for 'poorly defined' clusters (e.g. saccharides), `relaxed` for partially present clusters (e.g. certain types of NRPS), whereas `strict` will screen for well-defined clusters such as Ketosynthases.\n\nYou can see the rules for the levels of strictness [here](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/cluster_rules).\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--hmmdetection-strictness`", "fa_icon": "fas fa-search", - "enum": [ - "relaxed", - "strict", - "loose" - ] + "enum": ["relaxed", "strict", "loose"] }, "bgc_antismash_pfam2go": { "type": "boolean", @@ -1327,10 +1277,7 @@ "description": "Specify which taxonomic classification of input sequence to use.", "help_text": "This specifies which set of secondary metabolites to screen for, based on the taxon type the secondary metabolites are from.\n\nThis will run different pipelines depending on whether the input sequences are from bacteria or fungi.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--taxon`", "fa_icon": "fas fa-bacteria", - "enum": [ - "bacteria", - "fungi" - ] + "enum": ["bacteria", "fungi"] }, "bgc_antismash_tfbs": { "type": "boolean", @@ -1476,22 +1423,14 @@ "bgc_gecco_convertmode": { "type": "string", "default": "clusters", - "enum": [ - "clusters", - "gbk" - ], + "enum": ["clusters", "gbk"], "description": "Specify conversion mode for GECCO convert.", "help_text": "Either clusters or gbk folder output, depending on what is reformatted. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertmode `" }, "bgc_gecco_convertformat": { "type": "string", "default": "gff", - "enum": [ - "gff", - "gbk", - "fna", - "faa" - ], + "enum": ["gff", "gbk", "fna", "faa"], "description": "Specify output format for GECCO convert.", "help_text": "Choose output format: 'gff', 'fasta', or 'genbank'. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertformat`" } @@ -1604,14 +1543,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -1688,10 +1620,7 @@ "hidden": true }, "help": { - "type": [ - "boolean", - "string" - ], + "type": ["boolean", "string"], "description": "Display the help message." }, "help_full": { @@ -1803,4 +1732,4 @@ "$ref": "#/$defs/generic_options" } ] -} \ No newline at end of file +} From ecd72c7b75b55458dc353fe44c05194f5f11eafa Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Mon, 8 Dec 2025 13:11:10 +0200 Subject: [PATCH 20/26] updated test configs --- conf/test_bgc_bakta.config | 2 ++ conf/test_bgc_prokka.config | 2 ++ conf/test_bgc_pyrodigal.config | 2 ++ conf/test_preannotated_bgc.config | 2 ++ 4 files changed, 8 insertions(+) diff --git a/conf/test_bgc_bakta.config b/conf/test_bgc_bakta.config index a91483c9..374109e3 100644 --- a/conf/test_bgc_bakta.config +++ b/conf/test_bgc_bakta.config @@ -33,6 +33,8 @@ params { run_amp_screening = false run_bgc_screening = true + bgc_gecco_runconvert = true + bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' } diff --git a/conf/test_bgc_prokka.config b/conf/test_bgc_prokka.config index 54d5d0db..80880234 100644 --- a/conf/test_bgc_prokka.config +++ b/conf/test_bgc_prokka.config @@ -32,6 +32,8 @@ params { run_amp_screening = false run_bgc_screening = true + bgc_gecco_runconvert = true + bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' } diff --git a/conf/test_bgc_pyrodigal.config b/conf/test_bgc_pyrodigal.config index 4b986dd6..77c4569e 100644 --- a/conf/test_bgc_pyrodigal.config +++ b/conf/test_bgc_pyrodigal.config @@ -32,6 +32,8 @@ params { run_amp_screening = false run_bgc_screening = true + bgc_gecco_runconvert = true + bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' } diff --git a/conf/test_preannotated_bgc.config b/conf/test_preannotated_bgc.config index 8bc11a51..15ca6d71 100644 --- a/conf/test_preannotated_bgc.config +++ b/conf/test_preannotated_bgc.config @@ -32,6 +32,8 @@ params { run_amp_screening = false run_bgc_screening = true + bgc_gecco_runconvert = true + bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' } From fb9a4ff87d62713673406e8986875afdb476b5b3 Mon Sep 17 00:00:00 2001 From: Dediu Codrin Date: Mon, 8 Dec 2025 13:36:56 +0200 Subject: [PATCH 21/26] Linting --- conf/test_bgc_bakta.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_bgc_bakta.config b/conf/test_bgc_bakta.config index 374109e3..a914b620 100644 --- a/conf/test_bgc_bakta.config +++ b/conf/test_bgc_bakta.config @@ -33,7 +33,7 @@ params { run_amp_screening = false run_bgc_screening = true - bgc_gecco_runconvert = true + bgc_gecco_runconvert = true bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' From df1e3acb0a0aaa051b747d5fb313347f174cc288 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 17 Dec 2025 11:35:54 +0100 Subject: [PATCH 22/26] Update docs/output.md --- docs/output.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 5fb8a9e7..60455619 100644 --- a/docs/output.md +++ b/docs/output.md @@ -463,7 +463,6 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation - `*.clusters.tsv`: TSV file containing coordinates of predicted clusters and BGC types - `*_cluster_*.gbk`: GenBank file (if clusters were found) containing sequence with annotations; one file per GECCO hit - - **GECCO CONVERT** - `*.gff`: GFF3 converted cluster tables containing the position and metadata for all the predicted clusters (only if `--bgc_gecco_runconvert`) - `*.region*.gbk`: Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE (only if `--bgc_gecco_runconvert`) - `*.faa`: Amino-acid FASTA converted GenBank files of all the proteins in a cluster (only if `--bgc_gecco_runconvert`) From a9a561a0559f17716218997316d86b8a1124010f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 17 Dec 2025 12:15:13 +0100 Subject: [PATCH 23/26] Fix convert format options to match gecco itself, add validation check of invalid combinations --- nextflow_schema.json | 10 +-- subworkflows/local/bgc.nf | 16 ++--- .../utils_nfcore_funcscan_pipeline/main.nf | 61 +++++++++++-------- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 36ad075a..23ad6809 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1417,22 +1417,22 @@ }, "bgc_gecco_runconvert": { "type": "boolean", - "description": "Enable GECCO file conversion.", - "help_text": "Converts GECCO output into formats like GFF3, GenBank, or FASTA for further analysis. Modifies tool parameter - gecco convert: `--bgc_gecco_runconvert true`" + "description": "Enable GECCO file conversion to formats for downstream analysis.", + "help_text": "Converts GECCO output into formats like GFF3, GenBank, or FASTA for further analysis." }, "bgc_gecco_convertmode": { "type": "string", "default": "clusters", "enum": ["clusters", "gbk"], "description": "Specify conversion mode for GECCO convert.", - "help_text": "Either clusters or gbk folder output, depending on what is reformatted. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertmode `" + "help_text": "Either clusters or gbk folder output, depending on what is reformatted.\n\n Modifies tool parameter:\n- gecco convert: `gecco convert `" }, "bgc_gecco_convertformat": { "type": "string", "default": "gff", - "enum": ["gff", "gbk", "fna", "faa"], + "enum": ["gff", "bigslice", "fna", "faa"], "description": "Specify output format for GECCO convert.", - "help_text": "Choose output format: 'gff', 'fasta', or 'genbank'. Modifies tool parameter: - gecco convert: `--bgc_gecco_convertformat`" + "help_text": "Choose output format for clusters mode: 'gff', or gbk mode: 'bigslice', 'fna', or 'faa'.\n\nModifies tool parameter:\n- gecco convert: `--format`" } }, "fa_icon": "fas fa-angle-double-right" diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 7e2d41a5..75c34854 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -17,9 +17,9 @@ include { GECCO_CONVERT } from '../../modules/nf-core/g workflow BGC { take: fastas // tuple val(meta), path(PREPPED_INPUT.out.fna) - faas // tuple val(meta), path(.out.faa) - gbks // tuple val(meta), path(.out.gbk) - tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + faas // tuple val(meta), path(.out.faa) + gbks // tuple val(meta), path(.out.gbk) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) main: ch_versions = Channel.empty() @@ -66,14 +66,14 @@ workflow BGC { } ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix(ch_antismashresults_for_combgc) - } // DEEPBGC if (!params.bgc_skip_deepbgc) { if (params.bgc_deepbgc_db) { - ch_deepbgc_database = Channel.fromPath(params.bgc_deepbgc_db, checkIfExists: true) + ch_deepbgc_database = Channel + .fromPath(params.bgc_deepbgc_db, checkIfExists: true) .first() } else { @@ -111,10 +111,11 @@ workflow BGC { ch_gecco_clusters_and_gbk = GECCO_RUN.out.clusters .join(GECCO_RUN.out.gbk) .map { meta, clusters_file, gbk_file -> - [ meta, clusters_file, gbk_file ] + [meta, clusters_file, gbk_file] } GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.bgc_gecco_convertmode, params.bgc_gecco_convertformat) + ch_versions = ch_versions.mix(GECCO_CONVERT.out.versions) } // HMMSEARCH if (params.bgc_run_hmmsearch) { @@ -174,7 +175,8 @@ workflow BGC { MERGE_TAXONOMY_COMBGC(ch_combgc_summaries, ch_mmseqs_taxonomy_list) ch_versions = ch_versions.mix(MERGE_TAXONOMY_COMBGC.out.versions) - ch_tabix_input = Channel.of(['id': 'combgc_complete_summary_taxonomy']) + ch_tabix_input = Channel + .of(['id': 'combgc_complete_summary_taxonomy']) .combine(MERGE_TAXONOMY_COMBGC.out.tsv) BGC_TABIX_BGZIP(ch_tabix_input) diff --git a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf index 929b5cca..8ef458eb 100644 --- a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf @@ -8,15 +8,15 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { paramsHelp } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -26,15 +26,15 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin workflow PIPELINE_INITIALISATION { take: - version // boolean: Display version and exit - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet - help // boolean: Display help message and exit - help_full // boolean: Show the full help message - show_hidden // boolean: Show hidden parameters in the help message + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: @@ -63,7 +63,7 @@ workflow PIPELINE_INITIALISATION { \033[0;35m nf-core/funcscan ${workflow.manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${workflow.manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x @@ -72,7 +72,7 @@ workflow PIPELINE_INITIALISATION { """ command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, null, @@ -81,7 +81,7 @@ workflow PIPELINE_INITIALISATION { show_hidden, before_text, after_text, - command + command, ) // @@ -99,7 +99,8 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // - Channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) + Channel + .fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) .set { ch_samplesheet } emit: @@ -115,13 +116,13 @@ workflow PIPELINE_INITIALISATION { workflow PIPELINE_COMPLETION { take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure + email // string: email address + email_on_fail // string: email address sent on pipeline failure plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published + outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications - multiqc_report // string: Path to MultiQC report + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") @@ -164,6 +165,14 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // def validateInputParameters() { + if (params.run_bgc_screening && !params.bgc_skip_gecco && params.bgc_gecco_runconvert) { + if (params.bgc_gecco_convertmode == 'gbk' && params.bgc_gecco_convertformat == 'gff') { + error("[nf-core/funcscan] ERROR: when specifying --bgc_gecco_convertmode 'gbk', --bgc_gecco_convertformat can only be set to 'bigslice', 'fna' or 'faa'. You specified --bgc_gecco_convertformat '${params.bgc_gecco_convertformat}'. Check input!") + } + if (params.bgc_gecco_convertmode == 'clusters' && params.bgc_gecco_convertformat != 'gff') { + error("[nf-core/funcscan] ERROR: when specifying --bgc_gecco_convertmode 'clusters', --bgc_gecco_convertformat can only be set to 'gff'. You specified --bgc_gecco_convertformat '${params.bgc_gecco_convertformat}'. Check input!") + } + } } // From 47a92545ea9b8f3b3b307bd196ef9bc6a71d8827 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 17 Dec 2025 13:52:39 +0100 Subject: [PATCH 24/26] Update tests --- conf/modules.config | 4 ++-- conf/test_bgc_bakta.config | 4 +--- conf/test_bgc_prokka.config | 4 +++- conf/test_bgc_pyrodigal.config | 4 +++- docs/output.md | 10 ++++------ tests/test_bgc_bakta.nf.test | 4 +++- tests/test_bgc_bakta.nf.test.snap | 8 ++++---- tests/test_bgc_prokka.nf.test | 4 +++- tests/test_bgc_prokka.nf.test.snap | 20 +++++++++++--------- tests/test_bgc_pyrodigal.nf.test | 4 +++- tests/test_bgc_pyrodigal.nf.test.snap | 20 +++++++++++--------- 11 files changed, 48 insertions(+), 38 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d1236167..0e9900f0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -140,7 +140,7 @@ process { '--disable-residue-annot', '--enable-tsv-residue-annot', "--formats tsv", - ].join(' ').trim() // Warning: Do not disable the flags "--enable-tsv-residue-annot" and "--formats tsv"! This would cause a run failure because the format of the resulting files would no longer be adequate for parsing by AMPcombi2. + ].join(' ').trim() } withName: PROKKA { @@ -530,7 +530,7 @@ process { ].join(' ').trim() } - withName: GECCO_CONVERT { + withName: GECCO_CONVERT { publishDir = [ path: { "${params.outdir}/bgc/gecco/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/conf/test_bgc_bakta.config b/conf/test_bgc_bakta.config index a914b620..3b97de01 100644 --- a/conf/test_bgc_bakta.config +++ b/conf/test_bgc_bakta.config @@ -23,7 +23,7 @@ params { config_profile_description = 'Minimal test dataset to check BGC workflow function' // Input data - input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_hits.csv' bgc_antismash_db = params.pipelines_testdata_base_path + 'funcscan/databases/antismash_trimmed_8_0_1.tar.gz' annotation_tool = 'bakta' @@ -33,8 +33,6 @@ params { run_amp_screening = false run_bgc_screening = true - bgc_gecco_runconvert = true - bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' } diff --git a/conf/test_bgc_prokka.config b/conf/test_bgc_prokka.config index 80880234..865e3163 100644 --- a/conf/test_bgc_prokka.config +++ b/conf/test_bgc_prokka.config @@ -23,7 +23,7 @@ params { config_profile_description = 'Minimal test dataset to check BGC workflow function' // Input data - input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_hits.csv' bgc_antismash_db = params.pipelines_testdata_base_path + 'funcscan/databases/antismash_trimmed_8_0_1.tar.gz' annotation_tool = 'prokka' @@ -33,6 +33,8 @@ params { run_bgc_screening = true bgc_gecco_runconvert = true + bgc_gecco_convertmode = 'gbk' + bgc_gecco_convertformat = 'fna' bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' diff --git a/conf/test_bgc_pyrodigal.config b/conf/test_bgc_pyrodigal.config index 77c4569e..cbd19fd6 100644 --- a/conf/test_bgc_pyrodigal.config +++ b/conf/test_bgc_pyrodigal.config @@ -23,7 +23,7 @@ params { config_profile_description = 'Minimal test dataset to check BGC workflow function' // Input data - input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_hits.csv' bgc_antismash_db = params.pipelines_testdata_base_path + 'funcscan/databases/antismash_trimmed_8_0_1.tar.gz' annotation_tool = 'pyrodigal' @@ -33,6 +33,8 @@ params { run_bgc_screening = true bgc_gecco_runconvert = true + bgc_gecco_convertmode = 'clusters' + bgc_gecco_convertformat = 'gff' bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' diff --git a/docs/output.md b/docs/output.md index 5fb8a9e7..bc36e94e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -462,12 +462,10 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation - `*.features.tsv`: TSV file containing identified domains - `*.clusters.tsv`: TSV file containing coordinates of predicted clusters and BGC types - `*_cluster_*.gbk`: GenBank file (if clusters were found) containing sequence with annotations; one file per GECCO hit - - - **GECCO CONVERT** - - `*.gff`: GFF3 converted cluster tables containing the position and metadata for all the predicted clusters (only if `--bgc_gecco_runconvert`) - - `*.region*.gbk`: Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE (only if `--bgc_gecco_runconvert`) - - `*.faa`: Amino-acid FASTA converted GenBank files of all the proteins in a cluster (only if `--bgc_gecco_runconvert`) - - `*.fna`:Nucleotide sequence FASTA converted GenBank files from the cluster (only if `--bgc_gecco_runconvert`) + - `*.gff`: GFF3 converted cluster tables containing the position and metadata for all the predicted clusters (only if `--bgc_gecco_runconvert --bgc_gecco_convertmode clusters --bgc_gecco_convertformat gff`) + - `*.region*.gbk`: Converted and aliased GenBank files so that they can be loaded by BiG-SLiCE (only if `--bgc_gecco_runconvert --bgc_gecco_convertmode gbk --bgc_gecco_convertformat bigslice`) + - `*.faa`: Amino-acid FASTA converted GenBank files of all the proteins in a cluster (only if `--bgc_gecco_runconvert --bgc_gecco_convertmode gbk --bgc_gecco_convertformat faa`) + - `*.fna`:Nucleotide sequence FASTA converted GenBank files from the cluster (only if `--bgc_gecco_runconvert --bgc_gecco_convertmode gbk --bgc_gecco_convertformat fna`) [GECCO](https://gecco.embl.de) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). diff --git a/tests/test_bgc_bakta.nf.test b/tests/test_bgc_bakta.nf.test index 10d8d2ba..a1a270ec 100644 --- a/tests/test_bgc_bakta.nf.test +++ b/tests/test_bgc_bakta.nf.test @@ -38,7 +38,9 @@ nextflow_pipeline { // GECCO { assert snapshot( path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes - path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv"), // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.clusters.tsv"), // channel: clusters + path("$outputDir/bgc/gecco/sample_2/*.gbk"), // from gecco convert ).match("gecco") } ) } diff --git a/tests/test_bgc_bakta.nf.test.snap b/tests/test_bgc_bakta.nf.test.snap index 5a0932ad..4f66e145 100644 --- a/tests/test_bgc_bakta.nf.test.snap +++ b/tests/test_bgc_bakta.nf.test.snap @@ -13,13 +13,13 @@ }, "deepbgc_bgc_gbk": { "content": [ - "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + "sample_2.bgc.gbk:md5,d7e7a8421ee13457487108f9d41aff54" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-24T10:32:18.378687548" + "timestamp": "2025-12-17T11:42:38.648657935" }, "gecco": { "content": [ diff --git a/tests/test_bgc_prokka.nf.test b/tests/test_bgc_prokka.nf.test index afb9c8f6..cf6392aa 100644 --- a/tests/test_bgc_prokka.nf.test +++ b/tests/test_bgc_prokka.nf.test @@ -38,7 +38,9 @@ nextflow_pipeline { // GECCO { assert snapshot( path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes - path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv"), // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.clusters.tsv"), // channel: clusters + path("$outputDir/bgc/gecco/sample_2/PROKKA_2_cluster_1.fna"), // from gecco convert ).match("gecco") } ) } diff --git a/tests/test_bgc_prokka.nf.test.snap b/tests/test_bgc_prokka.nf.test.snap index 69670287..3a5e36f5 100644 --- a/tests/test_bgc_prokka.nf.test.snap +++ b/tests/test_bgc_prokka.nf.test.snap @@ -13,23 +13,25 @@ }, "deepbgc_bgc_gbk": { "content": [ - "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + "sample_2.bgc.gbk:md5,03712704561ca22c5e29f45a50f4a18d" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-24T10:39:33.920624113" + "timestamp": "2025-12-17T11:25:30.345420425" }, "gecco": { "content": [ - "sample_2.genes.tsv:md5,050b82ca462430ecc0635acb2e297531", - "sample_2.features.tsv:md5,79354868ee3de6fdc419195b8fa8edb6" + "sample_2.genes.tsv:md5,b9a64f054cea791ebbe0738e33431b2c", + "sample_2.features.tsv:md5,9900311acf9e6396fe4106c03ab628ba", + "sample_2.clusters.tsv:md5,78c908f8db4194ce989d4dddf16eea18", + "PROKKA_2_cluster_1.fna:md5,7647cfa207914f33f2abd32f2f7639d1" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-24T10:39:33.944935473" + "timestamp": "2025-12-17T11:33:10.649330826" } } \ No newline at end of file diff --git a/tests/test_bgc_pyrodigal.nf.test b/tests/test_bgc_pyrodigal.nf.test index 9ccf8024..7ad939d9 100644 --- a/tests/test_bgc_pyrodigal.nf.test +++ b/tests/test_bgc_pyrodigal.nf.test @@ -38,7 +38,9 @@ nextflow_pipeline { // GECCO { assert snapshot( path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes - path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv"), // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.clusters.tsv"), // channel: clusters + path("$outputDir/bgc/gecco/sample_2/sample_2.clusters.gff"), // from gecco convert ).match("gecco") } ) } diff --git a/tests/test_bgc_pyrodigal.nf.test.snap b/tests/test_bgc_pyrodigal.nf.test.snap index 80348839..60d02b2d 100644 --- a/tests/test_bgc_pyrodigal.nf.test.snap +++ b/tests/test_bgc_pyrodigal.nf.test.snap @@ -13,23 +13,25 @@ }, "deepbgc_bgc_gbk": { "content": [ - "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + "sample_2.bgc.gbk:md5,a22271277ced910adede93fe202a7008" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-24T10:45:44.435766452" + "timestamp": "2025-12-17T11:16:13.022611618" }, "gecco": { "content": [ - "sample_2.genes.tsv:md5,66e3724c7e7da102bf58acd564211e8b", - "sample_2.features.tsv:md5,2ef146213836ca80d3079776f17c7cb2" + "sample_2.genes.tsv:md5,4e45a9882d7b9d5510fefe9f34329a96", + "sample_2.features.tsv:md5,b64ad7a5cb4af9971b1bd4b379ff2486", + "sample_2.clusters.tsv:md5,61a89e5684004b6c4e7b943c373e8d1e", + "sample_2.clusters.gff:md5,3caa3574e1be1ac5a0e1d80f01bacddd" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-24T10:45:25.732866237" + "timestamp": "2025-12-17T11:20:47.603380605" } } \ No newline at end of file From 1ec8ceef623d7c768637f3d19d9e181e0e2eed57 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 17 Dec 2025 14:34:16 +0100 Subject: [PATCH 25/26] Update BAKTA test with final test profile --- conf/test_bgc_bakta.config | 4 ++++ tests/test_bgc_bakta.nf.test | 2 +- tests/test_bgc_bakta.nf.test.snap | 12 +++++++----- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/conf/test_bgc_bakta.config b/conf/test_bgc_bakta.config index 3b97de01..27717806 100644 --- a/conf/test_bgc_bakta.config +++ b/conf/test_bgc_bakta.config @@ -33,6 +33,10 @@ params { run_amp_screening = false run_bgc_screening = true + bgc_gecco_runconvert = true + bgc_gecco_convertmode = 'gbk' + bgc_gecco_convertformat = 'bigslice' + bgc_run_hmmsearch = true bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' } diff --git a/tests/test_bgc_bakta.nf.test b/tests/test_bgc_bakta.nf.test index a1a270ec..a688070d 100644 --- a/tests/test_bgc_bakta.nf.test +++ b/tests/test_bgc_bakta.nf.test @@ -40,7 +40,7 @@ nextflow_pipeline { path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv"), // channel: features path("$outputDir/bgc/gecco/sample_2/sample_2.clusters.tsv"), // channel: clusters - path("$outputDir/bgc/gecco/sample_2/*.gbk"), // from gecco convert + file("$outputDir/bgc/gecco/sample_2/NODE_18_length_18230_cov_4.622228.region001.gbk").name, // from gecco convert ).match("gecco") } ) } diff --git a/tests/test_bgc_bakta.nf.test.snap b/tests/test_bgc_bakta.nf.test.snap index 4f66e145..b847a229 100644 --- a/tests/test_bgc_bakta.nf.test.snap +++ b/tests/test_bgc_bakta.nf.test.snap @@ -23,13 +23,15 @@ }, "gecco": { "content": [ - "sample_2.genes.tsv:md5,66e3724c7e7da102bf58acd564211e8b", - "sample_2.features.tsv:md5,2ef146213836ca80d3079776f17c7cb2" + "sample_2.genes.tsv:md5,4e45a9882d7b9d5510fefe9f34329a96", + "sample_2.features.tsv:md5,b64ad7a5cb4af9971b1bd4b379ff2486", + "sample_2.clusters.tsv:md5,61a89e5684004b6c4e7b943c373e8d1e", + "NODE_18_length_18230_cov_4.622228.region001.gbk" ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-24T10:32:18.404694725" + "timestamp": "2025-12-17T13:30:31.319332788" } } \ No newline at end of file From de6c87a4ec5b12f4c9934f13eb45242674505514 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 18 Dec 2025 12:24:58 +0100 Subject: [PATCH 26/26] Update conf/modules.config --- conf/modules.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index b12550ff..26186279 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -489,7 +489,6 @@ process { ] } - withName: DEEPBGC_DOWNLOAD { publishDir = [ path: { "${params.outdir}/databases/deepbgc" },