diff --git a/CHANGELOG.md b/CHANGELOG.md index abafaa47..e9721f26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#421](https://github.com/nf-core/funcscan/pull/421) Updated to nf-core template 3.0.2. (by @jfy133) - [#427](https://github.com/nf-core/funcscan/pull/427) AMPcombi now can use multiple other databases for classifications. (by @darcy220606) +- [#428](https://github.com/nf-core/funcscan/pull/428) Added InterProScan annotation workflow to the pipeline. The results are coupled to AMPcombi final table. (by @darcy220606) - [#429](https://github.com/nf-core/funcscan/pull/429) Updated to nf-core template 3.1.0. (by @jfy133 and @jasmezz) - [#433](https://github.com/nf-core/funcscan/pull/433) Updated to nf-core template 3.1.1. (by @jfy133) - [#431](https://github.com/nf-core/funcscan/pull/431) Updated AMPcombi, Macrel, all MMseqs2 modules, MultiQC, Pyrodigal, and seqkit, added `--taxa_classification_mmseqs_compressed` parameter. (by @jasmezz) @@ -28,15 +29,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` -| Tool | Previous version | New version | -| --------- | ---------------- | ----------- | -| AMPcombi | 0.2.2 | 2.0.1 | -| Bakta | 1.9.3 | 1.10.4 | -| Macrel | 1.2.0 | 1.4.0 | -| MMseqs2 | 15.6f452 | 17.b804f | -| MultiQC | 1.24.0 | 1.27 | -| Pyrodigal | 3.3.0 | 3.6.3 | -| seqkit | 2.8.1 | 2.9.0 | +| Tool | Previous Version | New Version | +| ------------ | ---------------- | ----------- | +| AMPcombi | 0.2.2 | 2.0.1 | +| Bakta | 1.9.3 | 1.10.4 | +| InterProScan | - | 5.59_91.0 | +| Macrel | 1.2.0 | 1.4.0 | +| MMseqs2 | 15.6f452 | 17.b804f | +| MultiQC | 1.24.0 | 1.27 | +| Pyrodigal | 3.3.0 | 3.6.3 | +| seqkit | 2.8.1 | 2.9.0 | ### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md index 50655554..37e595d2 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -70,6 +70,14 @@ > Eddy S. R. (2011). Accelerated Profile HMM Searches. PLoS computational biology, 7(10), e1002195. [DOI: 10.1371/journal.pcbi.1002195](https://doi.org/10.1371/journal.pcbi.1002195) +- [InterPro](https://doi.org/10.1093/nar/gkaa977) + + > Blum, M., Chang, H-Y., Chuguransky, S., Grego, T., Kandasaamy, S., Mitchell, A., Nuka, G., Paysan-Lafosse, T., Qureshi, M., Raj, S., Richardson, L., Salazar, G. A., Williams, L., Bork, P., Bridge, A., Gough, J., Haft, D. H., Letunic, I., Marchler-Bauer, A., Mi, H., Natale, D. A., Necci, M., Orengo, C. A., Pandurangan, A. P., Rivoire, C., Sigrist, C. A., Sillitoe, I., Thanki, N., Thomas, P. D., Tosatto, S. C. E, Wu, C. H., Bateman, A., Finn, R. D. (2021) The InterPro protein families and domains database: 20 years on. Nucleic Acids Research, 49(D1), D344–D354. [DOI: 10.1093/nar/gkaa977](https://doi.org/10.1093/nar/gkaa977) + +- [InterProScan](https://doi.org/10.1093/bioinformatics/btu031) + + > Jones, P., Binns, D., Chang, H-Y., Fraser, M., Li, W., McAnulla, C., McWilliam, H., Maslen, J., Mitchell, A., Nuka, G., Pesseat, S., Quinn, A. F., Sangrador-Vegas, A., Scheremetjew, M., Yong, S-Y., Lopez, R., Hunter, S. (2014) InterProScan 5: genome-scale protein function classification. Bioinformatics, 30(9), 1236–1240. [DOI: 10.1093/bioinformatics/btu031](https://doi.org/10.1093/bioinformatics/btu031) + - [Macrel](https://doi.org/10.7717/peerj.10555) > Santos-Júnior, C. D., Pan, S., Zhao, X. M., & Coelho, L. P. (2020). Macrel: antimicrobial peptide screening in genomes and metagenomes. PeerJ, 8, e10555. [DOI: 10.7717/peerj.10555](https://doi.org/10.7717/peerj.10555) diff --git a/conf/base.config b/conf/base.config index 68f87c98..814ebd88 100644 --- a/conf/base.config +++ b/conf/base.config @@ -231,4 +231,11 @@ process { memory = { 6.GB * task.attempt } time = { 2.h * task.attempt } } + + withName: INTERPROSCAN_DATABASE { + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + cpus = { 6 * task.attempt } + } + } diff --git a/conf/modules.config b/conf/modules.config index a348da3c..34528cd9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -91,7 +91,7 @@ process { ].join(' ').trim() } - withName: SEQKIT_SEQ { + withName: SEQKIT_SEQ_LENGTH { ext.prefix = { "${meta.id}_long" } publishDir = [ path: { "${params.outdir}/bgc/seqkit/" }, @@ -104,6 +104,45 @@ process { ].join(' ').trim() } + withName: SEQKIT_SEQ_FILTER { + ext.prefix = { "${meta.id}_cleaned.faa" } + publishDir = [ + path: { "${params.outdir}/protein_annotation/interproscan/" }, + mode: params.publish_dir_mode, + enabled: { params.run_protein_annotation }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = [ + "--gap-letters '* \t.' --remove-gaps" + ].join(' ').trim() + } + + withName: INTERPROSCAN_DATABASE { + publishDir = [ + path: { "${params.outdir}/databases/interproscan/" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: INTERPROSCAN { + ext.prefix = { "${meta.id}_interproscan.faa" } + publishDir = [ + path: { "${params.outdir}/protein_annotation/interproscan/" }, + mode: params.publish_dir_mode, + enabled: params.run_protein_annotation, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = [ + "--applications ${params.protein_annotation_interproscan_applications}", + params.protein_annotation_interproscan_enableprecalc ? '' : '--disable-precalc', + '--disable-residue-annot', + '--enable-tsv-residue-annot', + "--formats tsv" + ].join(' ').trim() // Warning: Do not disable the flags "--enable-tsv-residue-annot" and "--formats tsv"! This would cause a run failure because the format of the resulting files would no longer be adequate for parsing by AMPcombi2. + } + withName: PROKKA { ext.prefix = { "${meta.id}_prokka" } publishDir = [ @@ -687,7 +726,7 @@ process { withName: AMP_DATABASE_DOWNLOAD { publishDir = [ - path: { "${params.outdir}/databases/${params.amp_ampcombi_db}" }, + path: { "${params.outdir}/databases/ampcombi/" }, mode: params.publish_dir_mode, enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, diff --git a/docs/output.md b/docs/output.md index aefa77e0..c4cb9f05 100644 --- a/docs/output.md +++ b/docs/output.md @@ -25,6 +25,8 @@ results/ | ├── prodigal/ | ├── prokka/ | └── pyrodigal/ +├── protein_annotation/ +| └── interproscan/ ├── amp/ | ├── ampir/ | ├── amplify/ @@ -74,6 +76,10 @@ ORF prediction and annotation with any of: - [Prokka](#prokka) – open reading frame prediction and functional protein annotation. - [Bakta](#bakta) – open reading frame prediction and functional protein annotation. +CDS domain annotation: + +- [InterProScan](#interproscan) (default) – for open reading frame protein and domain predictions. + Antimicrobial Resistance Genes (ARGs): - [ABRicate](#abricate) – antimicrobial resistance gene detection, based on alignment to one of several databases. @@ -216,6 +222,23 @@ Output Summaries: [Bakta](https://github.com/oschwengers/bakta) is a tool for the rapid & standardised annotation of bacterial genomes and plasmids from both isolates and MAGs. It provides dbxref-rich, sORF-including and taxon-independent annotations in machine-readable JSON & bioinformatics standard file formats for automated downstream analysis. The output is used by some of the functional screening tools. +### Protein annotation + +[InterProScan](#interproscan) + +#### InterProScan + +
+Output files + +- `interproscan/` + - `_cleaned.faa`: clean version of the fasta files (in amino acid format) generated by one of the annotation tools (i.e. Pyrodigal, Prokka, Bakta). These contain sequences with no special characters (for eg. `*` or `-`). + - `_interproscan_faa.tsv`: predicted proteins and domains using the InterPro database in TSV format + +
+ +[InterProScan](https://github.com/ebi-pf-team/interproscan) is designed to predict protein functions and provide possible domain and motif information of the coding regions. It utilizes the InterPro database that consists of multiple sister databases such as PANTHER, ProSite, Pfam, etc. More details can be found in the [documentation](https://interproscan-docs.readthedocs.io/en/latest/index.html). + ### AMP detection tools [ampir](#ampir), [AMPlify](#amplify), [hmmsearch](#hmmsearch), [Macrel](#macrel) @@ -465,6 +488,11 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation - `/*_ampcombi.tsv`: summarised output in tsv format for each sample - `/*_amp.faa*`: fasta file containing the amino acid sequences for all AMP hits for each sample - `/*_mmseqs_matches.txt*`: alignment file generated by MMseqs2 for each sample + +:::info +In some cases when the AMP and the taxonomic classification subworkflows are turned on, it can happen that only summary files per sample are created in the output folder with **no** `Ampcombi_summary.tsv` and `Ampcombi_summary_cluster.tsv` files with no taxonomic classifications merged. This can occur if some AMP prediction parameters are 'too strict' or only one AMP tool is run, which can lead to no AMP hits found in any of the samples or in only one sample. Look out for the warning `[nf-core/funcscan] AMPCOMBI2: 0/1 file passed. Skipping AMPCOMBI2_COMPLETE, AMPCOMBI2_CLUSTER, and TAXONOMY MERGING steps.` in the stdout or `.nextflow.log` file. In that case we recommend to lower the AMP prediction thresholds and run more than one AMP prediction tool. +::: + AMP summary table header descriptions using DRAMP as reference database | Table column | Description | diff --git a/docs/usage.md b/docs/usage.md index c100f81c..a5e0b655 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -109,7 +109,7 @@ We highly recommend performing quality control on input contigs before running t For example, ideally BGC screening requires contigs of at least 3,000 bp else downstream tools may crash. ::: -## Notes on screening tools and taxonomic classification +## Notes on screening tools, taxonomic and functional classifications The implementation of some tools in the pipeline may have some particular behaviours that you should be aware of before you run the pipeline. @@ -131,6 +131,18 @@ MMseqs2 is currently the only taxonomic classification tool used in the pipeline --taxa_classification_mmseqs_db_id 'Kalamari' ``` +### InterProScan + +[InterProScan](https://github.com/ebi-pf-team/interproscan) is currently the only protein annotation tool that gives a snapshot of the protein families and domains for each coding region. + +The protein annotation workflow is activated with the flag `--run_protein_annotation`. InterProScan is used as the only protein annotation tool at the moment and the [InterPro database](http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.72-103.0) version 5.72-103.0 is downloaded and prepared to screen the input sequences against it. + +Since the database download is huge (5.5GB) and might take quite some time, you can skip the automatic database download on each run by manually downloading and extracting the files of any [InterPro version](http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/) beforehand and providing the resulting directory path to `--protein_annotation_interproscan_db `. + +:::info +By default, the databases used by InterProScan is set as `PANTHER,ProSiteProfiles,ProSitePatterns,Pfam`. An addition of other application to the list does not guarantee that the results will be integrated correctly within `AMPcombi`. +::: + ### antiSMASH antiSMASH has a minimum contig parameter, in which only contigs of a certain length (or longer) will be screened. In cases where no hits are found in these, the tool ends successfully without hits. However if no contigs in an input file reach that minimum threshold, the tool will end with a 'failure' code, and cause the pipeline to crash. @@ -256,7 +268,13 @@ The pipeline will automatically run Pyrodigal instead of Prodigal if the paramet This is due to an incompatibility issue of Prodigal's output `.gbk` file with multiple downstream tools. ::: -### Abricate +:::tip + +- If `--run_protein_annotation` is activated, protein and domain classifications of the coding regions are generated and then used by the `ampcombi2/parsetables` module to create a table for every sample and the complete summary files e.g., `Ampcombi_summary.tsv`. + +In some cases when the AMP and the taxonomic classification subworkflows are turned on, it can happen that only summary files per sample are created in the output folder with **no** `Ampcombi_summary.tsv` and `Ampcombi_summary_cluster.tsv` files with no taxonomic classifications merged. This can occur if some AMP prediction parameters are 'too strict' or only one AMP tool is run, which can lead to no AMP hits found in any of the samples or in only one sample. Look out for the warning `[nf-core/funcscan] AMPCOMBI2: 0/1 file passed. Skipping AMPCOMBI2_COMPLETE, AMPCOMBI2_CLUSTER, and TAXONOMY MERGING steps.` in the stdout or `.nextflow.log` file. In that case we recommend to lower the AMP prediction thresholds and run more than one AMP prediction tool. + +### ABRicate The default ABRicate installation comes with a series of 'default' databases: @@ -499,7 +517,6 @@ The contents of the database directory should include directories such as `commo ```console deepbgc_db/ ├── common - └── Pfam-hmm-models*.hmm.* └── [0.1.0] ├── classifier | └── myClassifiers*.pkl @@ -507,6 +524,45 @@ deepbgc_db/ └── myDetectors*.pkl ``` +### InterProScan + +[InterProScan](https://github.com/ebi-pf-team/interproscan) is used to provide more information about the proteins annotated on the contigs. By default, turning on this subworkflow with `--run_protein_annotation` will download and unzip the [InterPro database](http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.72-103.0/) version 5.72-103.0. The database can be saved in the output directory `/databases/interproscan/` if the `--save_db` is turned on. Note: the huge database download (5.5GB) can take up to 4 hours depending on the bandwidth. + +A local version of the database can be supplied to the pipeline by passing the InterProScan database directory to `--protein_annotation_interproscan_db `. The directory can be created by running (e.g. for database version 5.67-99.0): + +``` +curl -L https://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.72-103.0/interproscan-5.72-103.0-64-bit.tar.gz -o interproscan_db/interproscan-5.72-103.0-64-bit.tar.gz +tar -xzf interproscan_db/interproscan-5.72-103.0-64-bit.tar.gz -C interproscan_db/ + +``` + +The contents of the database directory should include the directory `data` in the top level with a couple of subdirectories: + +``` +interproscan_db/ + └── data/ + ├── antifam + ├── cdd + ├── funfam + ├── gene3d + ├── hamap + ├── ncbifam + ├── panther + | └── [18.0] + ├── pfam + | └── [36.0] + ├── phobius + ├── pirsf + ├── pirsr + ├── prints + ├── prosite + | └── [2023_05] + ├── sfld + ├── smart + ├── superfamily + └── tmhmm +``` + ## Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/modules.json b/modules.json index da39cf46..b4ef3688 100644 --- a/modules.json +++ b/modules.json @@ -7,157 +7,162 @@ "nf-core": { "abricate/run": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "ampcombi2/cluster": { "branch": "master", - "git_sha": "993865fe60cb1569155fbbbe0cee113e1127abaf", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "ampcombi2/complete": { "branch": "master", - "git_sha": "993865fe60cb1569155fbbbe0cee113e1127abaf", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "ampcombi2/parsetables": { "branch": "master", - "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e", + "git_sha": "637c3e1796ab13d4c91f3030932598aed94a4f87", "installed_by": ["modules"] }, "ampir": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "amplify/predict": { "branch": "master", - "git_sha": "ce35ce92566b3328b405253543b9b2b4d4e5f4f7", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "amrfinderplus/run": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "amrfinderplus/update": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "antismash/antismashlite": { "branch": "master", - "git_sha": "3e3be50f2096e4b8fcfadc0318c92f782d6ae969", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "antismash/antismashlitedownloaddatabases": { "branch": "master", - "git_sha": "1eb613d008eae2f5dc8368fc940f6a36758deca9", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "argnorm": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "bakta/bakta": { "branch": "master", - "git_sha": "4f0998ebd8f65961cfef0b810f008be3bc477132", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "bakta/baktadbdownload": { "branch": "master", - "git_sha": "4f0998ebd8f65961cfef0b810f008be3bc477132", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "deeparg/downloaddata": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "deeparg/predict": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "deepbgc/download": { "branch": "master", - "git_sha": "ce35ce92566b3328b405253543b9b2b4d4e5f4f7", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "deepbgc/pipeline": { "branch": "master", - "git_sha": "ce35ce92566b3328b405253543b9b2b4d4e5f4f7", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "fargene": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gecco/run": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "ce35ce92566b3328b405253543b9b2b4d4e5f4f7", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "hamronization/abricate": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "hamronization/amrfinderplus": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "hamronization/deeparg": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "hamronization/fargene": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "hamronization/rgi": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "hamronization/summarize": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "hmmer/hmmsearch": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "interproscan": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "macrel/contigs": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "mmseqs/createdb": { "branch": "master", - "git_sha": "2dc4c0474a77f5f8709eb970d890ad102e92af6f", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "mmseqs/createtsv": { "branch": "master", - "git_sha": "2dc4c0474a77f5f8709eb970d890ad102e92af6f", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "mmseqs/databases": { "branch": "master", - "git_sha": "2dc4c0474a77f5f8709eb970d890ad102e92af6f", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "mmseqs/taxonomy": { @@ -167,17 +172,17 @@ }, "multiqc": { "branch": "master", - "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "prodigal": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "prokka": { "branch": "master", - "git_sha": "bcfb2f7efd4444ab442ffc8a248afbd62bca5ae7", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "pyrodigal": { @@ -187,29 +192,28 @@ }, "rgi/cardannotation": { "branch": "master", - "git_sha": "3e548877f25a5980a177cc4f81d2d2e8c24164ef", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "rgi/main": { "branch": "master", - "git_sha": "41623bcb962bd02d1a45ad81c1853547ebd26f5f", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "seqkit/seq": { "branch": "master", - "git_sha": "60645c2b45e56579de0a0c89416805cae44c1f46", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "tabix/bgzip": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "3e548877f25a5980a177cc4f81d2d2e8c24164ef", - "installed_by": ["modules"], - "patch": "modules/nf-core/untar/untar.diff" + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] } } }, diff --git a/modules/local/interproscan_download.nf b/modules/local/interproscan_download.nf new file mode 100644 index 00000000..4df3edf3 --- /dev/null +++ b/modules/local/interproscan_download.nf @@ -0,0 +1,35 @@ +process INTERPROSCAN_DATABASE { + tag "interproscan_database_download" + label 'process_medium' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/curl:7.80.0' : + 'biocontainers/curl:7.80.0' }" + + input: + val database_url + + output: + path("interproscan_db/*"), emit: db + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + mkdir -p interproscan_db/ + + filename=\$(basename ${database_url}) + + curl -L ${database_url} -o interproscan_db/\$filename + tar -xzf interproscan_db/\$filename -C interproscan_db/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version 2>&1 | sed -n '1s/tar (busybox) //p') + curl: "\$(curl --version 2>&1 | sed -n '1s/^curl \\([0-9.]*\\).*/\\1/p')" + END_VERSIONS + """ +} diff --git a/modules/nf-core/abricate/run/environment.yml b/modules/nf-core/abricate/run/environment.yml index c7a7d199..53fe9857 100644 --- a/modules/nf-core/abricate/run/environment.yml +++ b/modules/nf-core/abricate/run/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/ampcombi2/cluster/environment.yml b/modules/nf-core/ampcombi2/cluster/environment.yml index f9c25b04..e88b26ba 100644 --- a/modules/nf-core/ampcombi2/cluster/environment.yml +++ b/modules/nf-core/ampcombi2/cluster/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "bioconda::ampcombi=2.0.1" + - bioconda::ampcombi=2.0.1 diff --git a/modules/nf-core/ampcombi2/cluster/tests/main.nf.test b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test new file mode 100644 index 00000000..49bee6cf --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process AMPCOMBI2_CLUSTER" + script "../main.nf" + process "AMPCOMBI2_CLUSTER" + + tag "modules" + tag "modules_nfcore" + tag "ampcombi2" + tag "ampcombi2/cluster" + tag "ampcombi2/complete" + + setup { + run("AMPCOMBI2_COMPLETE") { + script "../../../ampcombi2/complete/main.nf" + process { + """ + input[0] = + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_1_ampcombi.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_2_ampcombi.tsv', checkIfExists: true) + ] + """ + } + } + } + + test("ampcombi2_cluster - metagenome") { + when { + process { + """ + input[0] = AMPCOMBI2_COMPLETE.out.tsv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cluster_tsv[0]).readLines()[0].contains("Linear/Cyclic/Branched"), + file(process.out.rep_cluster_tsv[0]).readLines()[0].contains("total_cluster_members"), + process.out.versions).match() } + ) + } + } + + test("ampcombi2_cluster - metagenome - stub") { + options "-stub" + when { + process { + """ + input[0] = AMPCOMBI2_COMPLETE.out.tsv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ampcombi2/cluster/tests/tags.yml b/modules/nf-core/ampcombi2/cluster/tests/tags.yml new file mode 100644 index 00000000..783f4d52 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/tests/tags.yml @@ -0,0 +1,2 @@ +ampcombi2/cluster: + - "modules/nf-core/ampcombi2/cluster/**" diff --git a/modules/nf-core/ampcombi2/complete/environment.yml b/modules/nf-core/ampcombi2/complete/environment.yml index f9c25b04..e88b26ba 100644 --- a/modules/nf-core/ampcombi2/complete/environment.yml +++ b/modules/nf-core/ampcombi2/complete/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "bioconda::ampcombi=2.0.1" + - bioconda::ampcombi=2.0.1 diff --git a/modules/nf-core/ampcombi2/complete/tests/main.nf.test b/modules/nf-core/ampcombi2/complete/tests/main.nf.test new file mode 100644 index 00000000..176d975f --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process AMPCOMBI2_COMPLETE" + script "../main.nf" + process "AMPCOMBI2_COMPLETE" + + tag "modules" + tag "modules_nfcore" + tag "ampcombi2" + tag "ampcombi2/complete" + + test("ampcombi2_complete - contigs") { + when { + process { + """ + input[0] = + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_1_ampcombi.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_2_ampcombi.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.tsv[0]).readLines()[0].contains("ampir"), + process.out.versions).match() } + ) + } + } + + test("ampcombi2_complete - contigs - stub") { + options "-stub" + when { + process { + """ + input[0] = + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_1_ampcombi.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_2_ampcombi.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ampcombi2/complete/tests/tags.yml b/modules/nf-core/ampcombi2/complete/tests/tags.yml new file mode 100644 index 00000000..f8ac5fee --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/tests/tags.yml @@ -0,0 +1,2 @@ +ampcombi2/complete: + - "modules/nf-core/ampcombi2/complete/**" diff --git a/modules/nf-core/ampcombi2/parsetables/environment.yml b/modules/nf-core/ampcombi2/parsetables/environment.yml index f9c25b04..e88b26ba 100644 --- a/modules/nf-core/ampcombi2/parsetables/environment.yml +++ b/modules/nf-core/ampcombi2/parsetables/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "bioconda::ampcombi=2.0.1" + - bioconda::ampcombi=2.0.1 diff --git a/modules/nf-core/ampcombi2/parsetables/main.nf b/modules/nf-core/ampcombi2/parsetables/main.nf index b9d855df..088497f4 100644 --- a/modules/nf-core/ampcombi2/parsetables/main.nf +++ b/modules/nf-core/ampcombi2/parsetables/main.nf @@ -17,10 +17,10 @@ process AMPCOMBI2_PARSETABLES { output: tuple val(meta), path("${meta.id}/") , emit: sample_dir - tuple val(meta), path("${meta.id}/contig_gbks/") , emit: contig_gbks - tuple val(meta), path("${meta.id}/${meta.id}_mmseqs_matches.tsv") , emit: db_tsv - tuple val(meta), path("${meta.id}/${meta.id}_ampcombi.tsv") , emit: tsv - tuple val(meta), path("${meta.id}/${meta.id}_amp.faa") , emit: faa + tuple val(meta), path("${meta.id}/contig_gbks/") , emit: contig_gbks , optional:true + tuple val(meta), path("${meta.id}/${meta.id}_mmseqs_matches.tsv") , emit: db_tsv , optional:true + tuple val(meta), path("${meta.id}/${meta.id}_ampcombi.tsv") , emit: tsv , optional:true + tuple val(meta), path("${meta.id}/${meta.id}_amp.faa") , emit: faa , optional:true tuple val(meta), path("${meta.id}/${meta.id}_ampcombi.log") , emit: sample_log , optional:true tuple val(meta), path("Ampcombi_parse_tables.log") , emit: full_log , optional:true tuple val(meta), path("amp_${opt_amp_db}_database/") , emit: db , optional:true diff --git a/modules/nf-core/ampir/environment.yml b/modules/nf-core/ampir/environment.yml index 359e426c..3c6f4793 100644 --- a/modules/nf-core/ampir/environment.yml +++ b/modules/nf-core/ampir/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/amplify/predict/environment.yml b/modules/nf-core/amplify/predict/environment.yml index e1cb5703..872115b4 100644 --- a/modules/nf-core/amplify/predict/environment.yml +++ b/modules/nf-core/amplify/predict/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/amrfinderplus/run/environment.yml b/modules/nf-core/amrfinderplus/run/environment.yml index 2744ce54..0487b72d 100644 --- a/modules/nf-core/amrfinderplus/run/environment.yml +++ b/modules/nf-core/amrfinderplus/run/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/amrfinderplus/update/environment.yml b/modules/nf-core/amrfinderplus/update/environment.yml index 2744ce54..0487b72d 100644 --- a/modules/nf-core/amrfinderplus/update/environment.yml +++ b/modules/nf-core/amrfinderplus/update/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/antismash/antismashlite/environment.yml b/modules/nf-core/antismash/antismashlite/environment.yml index ce4491dc..dc2807d5 100644 --- a/modules/nf-core/antismash/antismashlite/environment.yml +++ b/modules/nf-core/antismash/antismashlite/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml b/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml index ce4491dc..dc2807d5 100644 --- a/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/argnorm/environment.yml b/modules/nf-core/argnorm/environment.yml index 783995f2..91971001 100644 --- a/modules/nf-core/argnorm/environment.yml +++ b/modules/nf-core/argnorm/environment.yml @@ -1,5 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - "bioconda::argnorm=0.5.0" + - bioconda::argnorm=0.5.0 diff --git a/modules/nf-core/bakta/bakta/environment.yml b/modules/nf-core/bakta/bakta/environment.yml index a2d0ff72..c1b616a4 100644 --- a/modules/nf-core/bakta/bakta/environment.yml +++ b/modules/nf-core/bakta/bakta/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/bakta/baktadbdownload/environment.yml b/modules/nf-core/bakta/baktadbdownload/environment.yml index a2d0ff72..c1b616a4 100644 --- a/modules/nf-core/bakta/baktadbdownload/environment.yml +++ b/modules/nf-core/bakta/baktadbdownload/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/deeparg/downloaddata/environment.yml b/modules/nf-core/deeparg/downloaddata/environment.yml index 074c6501..91c8f5cf 100644 --- a/modules/nf-core/deeparg/downloaddata/environment.yml +++ b/modules/nf-core/deeparg/downloaddata/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/deeparg/downloaddata/main.nf b/modules/nf-core/deeparg/downloaddata/main.nf index 787c0027..7f17ebab 100644 --- a/modules/nf-core/deeparg/downloaddata/main.nf +++ b/modules/nf-core/deeparg/downloaddata/main.nf @@ -2,32 +2,33 @@ process DEEPARG_DOWNLOADDATA { label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' : - 'biocontainers/deeparg:1.0.4--pyhdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' + : 'biocontainers/deeparg:1.0.4--pyhdfd78af_0'}" /* We have to force docker/singularity to mount a fake file to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). Original report: https://github.com/nf-core/funcscan/issues/23 */ containerOptions { - "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : - "${workflow.containerEngine}" == 'docker' ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : - '' + ['singularity', 'apptainer'].contains(workflow.containerEngine) + ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' + : "${workflow.containerEngine}" == 'docker' + ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' + : '' } - input: - output: - path "db/" , emit: db - path "versions.yml" , emit: versions + path "db/", emit: db + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.0.4' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ # Theano needs a writable space and uses the home directory by default, @@ -38,24 +39,30 @@ process DEEPARG_DOWNLOADDATA { deeparg \\ download_data \\ - $args \\ + ${args} \\ -o db/ cat <<-END_VERSIONS > versions.yml "${task.process}": - deeparg: $VERSION + deeparg: ${VERSION} END_VERSIONS """ stub: def args = task.ext.args ?: '' - def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.0.4' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ + echo "deeparg \\ + download_data \\ + ${args} \\ + -o db/" + mkdir db/ cat <<-END_VERSIONS > versions.yml "${task.process}": - deeparg: $VERSION + deeparg: ${VERSION} END_VERSIONS """ } diff --git a/modules/nf-core/deeparg/predict/environment.yml b/modules/nf-core/deeparg/predict/environment.yml index 074c6501..91c8f5cf 100644 --- a/modules/nf-core/deeparg/predict/environment.yml +++ b/modules/nf-core/deeparg/predict/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/deeparg/predict/main.nf b/modules/nf-core/deeparg/predict/main.nf index 20fd0a93..2ac258a8 100644 --- a/modules/nf-core/deeparg/predict/main.nf +++ b/modules/nf-core/deeparg/predict/main.nf @@ -1,32 +1,34 @@ process DEEPARG_PREDICT { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' : - 'biocontainers/deeparg:1.0.4--pyhdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' + : 'biocontainers/deeparg:1.0.4--pyhdfd78af_0'}" /* We have to force docker/singularity to mount a fake file to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). Original report: https://github.com/nf-core/funcscan/issues/23 */ containerOptions { - "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : - "${workflow.containerEngine}" == 'docker' ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : - '' + ['singularity', 'apptainer'].contains(workflow.containerEngine) + ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' + : "${workflow.containerEngine}" == 'docker' + ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' + : '' } input: tuple val(meta), path(fasta), val(model) - path(db) + path db output: - tuple val(meta), path("*.align.daa") , emit: daa - tuple val(meta), path("*.align.daa.tsv") , emit: daa_tsv - tuple val(meta), path("*.mapping.ARG") , emit: arg + tuple val(meta), path("*.align.daa"), emit: daa + tuple val(meta), path("*.align.daa.tsv"), emit: daa_tsv + tuple val(meta), path("*.mapping.ARG"), emit: arg tuple val(meta), path("*.mapping.potential.ARG"), emit: potential_arg - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -34,9 +36,10 @@ process DEEPARG_PREDICT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.0.4' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - DATABASE=`find -L $db -type d -name "database" | sed 's/database//'` + DATABASE=`find -L ${db} -type d -name "database" | sed 's/database//'` # Theano needs a writable space and uses the home directory by default, # but the latter is not always writable, for instance when Singularity @@ -46,22 +49,23 @@ process DEEPARG_PREDICT { deeparg \\ predict \\ - $args \\ - -i $fasta \\ + ${args} \\ + -i ${fasta} \\ -o ${prefix} \\ -d \$DATABASE \\ - --model $model + --model ${model} cat <<-END_VERSIONS > versions.yml "${task.process}": - deeparg: $VERSION + deeparg: ${VERSION} END_VERSIONS """ stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.0.4' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}.align.daa touch ${prefix}.align.daa.tsv @@ -70,7 +74,7 @@ process DEEPARG_PREDICT { cat <<-END_VERSIONS > versions.yml "${task.process}": - deeparg: $VERSION + deeparg: ${VERSION} END_VERSIONS """ } diff --git a/modules/nf-core/deepbgc/download/environment.yml b/modules/nf-core/deepbgc/download/environment.yml index 36cb903f..999c6864 100644 --- a/modules/nf-core/deepbgc/download/environment.yml +++ b/modules/nf-core/deepbgc/download/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/deepbgc/pipeline/environment.yml b/modules/nf-core/deepbgc/pipeline/environment.yml index 36cb903f..999c6864 100644 --- a/modules/nf-core/deepbgc/pipeline/environment.yml +++ b/modules/nf-core/deepbgc/pipeline/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/fargene/environment.yml b/modules/nf-core/fargene/environment.yml index ade4d770..197b2b32 100644 --- a/modules/nf-core/fargene/environment.yml +++ b/modules/nf-core/fargene/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/gecco/run/environment.yml b/modules/nf-core/gecco/run/environment.yml index 7db7dc87..bb47bc85 100644 --- a/modules/nf-core/gecco/run/environment.yml +++ b/modules/nf-core/gecco/run/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml index ae4fa457..9b926b1f 100644 --- a/modules/nf-core/gunzip/environment.yml +++ b/modules/nf-core/gunzip/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hamronization/abricate/environment.yml b/modules/nf-core/hamronization/abricate/environment.yml index 791b9c96..5826a865 100644 --- a/modules/nf-core/hamronization/abricate/environment.yml +++ b/modules/nf-core/hamronization/abricate/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hamronization/amrfinderplus/environment.yml b/modules/nf-core/hamronization/amrfinderplus/environment.yml index 791b9c96..5826a865 100644 --- a/modules/nf-core/hamronization/amrfinderplus/environment.yml +++ b/modules/nf-core/hamronization/amrfinderplus/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hamronization/deeparg/environment.yml b/modules/nf-core/hamronization/deeparg/environment.yml index 791b9c96..5826a865 100644 --- a/modules/nf-core/hamronization/deeparg/environment.yml +++ b/modules/nf-core/hamronization/deeparg/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hamronization/fargene/environment.yml b/modules/nf-core/hamronization/fargene/environment.yml index 791b9c96..5826a865 100644 --- a/modules/nf-core/hamronization/fargene/environment.yml +++ b/modules/nf-core/hamronization/fargene/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hamronization/rgi/environment.yml b/modules/nf-core/hamronization/rgi/environment.yml index 791b9c96..5826a865 100644 --- a/modules/nf-core/hamronization/rgi/environment.yml +++ b/modules/nf-core/hamronization/rgi/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hamronization/summarize/environment.yml b/modules/nf-core/hamronization/summarize/environment.yml index 791b9c96..5826a865 100644 --- a/modules/nf-core/hamronization/summarize/environment.yml +++ b/modules/nf-core/hamronization/summarize/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hmmer/hmmsearch/environment.yml b/modules/nf-core/hmmer/hmmsearch/environment.yml index c5ddec5d..1967d405 100644 --- a/modules/nf-core/hmmer/hmmsearch/environment.yml +++ b/modules/nf-core/hmmer/hmmsearch/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/interproscan/environment.yml b/modules/nf-core/interproscan/environment.yml new file mode 100644 index 00000000..8e82f003 --- /dev/null +++ b/modules/nf-core/interproscan/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::interproscan=5.59_91.0 diff --git a/modules/nf-core/interproscan/main.nf b/modules/nf-core/interproscan/main.nf new file mode 100644 index 00000000..add9b031 --- /dev/null +++ b/modules/nf-core/interproscan/main.nf @@ -0,0 +1,66 @@ +process INTERPROSCAN { + tag "$meta.id" + label 'process_medium' + label 'process_long' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/interproscan:5.59_91.0--hec16e2b_1' : + 'biocontainers/interproscan:5.59_91.0--hec16e2b_1' }" + + input: + tuple val(meta), path(fasta) + path(interproscan_database, stageAs: 'data') + + output: + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.xml') , optional: true, emit: xml + tuple val(meta), path('*.gff3'), optional: true, emit: gff3 + tuple val(meta), path('*.json'), optional: true, emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.name.endsWith(".gz") + def fasta_name = fasta.name.replace(".gz", "") + """ + if [ -d 'data' ]; then + # Find interproscan.properties to link data/ from work directory + INTERPROSCAN_DIR="\$( dirname "\$( dirname "\$( which interproscan.sh )" )" )" + INTERPROSCAN_PROPERTIES="\$( find "\$INTERPROSCAN_DIR/share" -name "interproscan.properties" )" + cp "\$INTERPROSCAN_PROPERTIES" . + sed -i "/^bin\\.directory=/ s|.*|bin.directory=\$INTERPROSCAN_DIR/bin|" interproscan.properties + export INTERPROSCAN_CONF=interproscan.properties + fi # else use sample DB included with conda ( testing only! ) + + if ${is_compressed} ; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + interproscan.sh \\ + --cpu ${task.cpus} \\ + --input ${fasta_name} \\ + ${args} \\ + --output-file-base ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + interproscan: \$( interproscan.sh --version | sed '1!d; s/.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.{tsv,xml,json,gff3} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + interproscan: \$( interproscan.sh --version | sed '1!d; s/.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/interproscan/meta.yml b/modules/nf-core/interproscan/meta.yml new file mode 100644 index 00000000..0bb10f7d --- /dev/null +++ b/modules/nf-core/interproscan/meta.yml @@ -0,0 +1,82 @@ +name: "interproscan" +description: Produces protein annotations and predictions from an amino acids FASTA + file +keywords: + - annotation + - fasta + - protein + - dna + - interproscan +tools: + - "interproscan": + description: "InterPro integrates together predictive information about proteins + function from a number of partner resources" + homepage: "https://www.ebi.ac.uk/interpro/search/sequence/" + documentation: "https://interproscan-docs.readthedocs.io" + tool_dev_url: "https://github.com/ebi-pf-team/interproscan" + doi: "10.1093/bioinformatics/btu031" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing the amino acid or dna query sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - interproscan_database: + type: directory + description: Path to the interproscan database (untarred + http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/${version_major}-${version_minor}/interproscan-${version_major}-${version_minor}-64-bit.tar.gz) +output: + - tsv: + - meta: + type: file + description: Tab separated file containing with detailed hits + pattern: "*.{tsv}" + - "*.tsv": + type: file + description: Tab separated file containing with detailed hits + pattern: "*.{tsv}" + - xml: + - meta: + type: file + description: XML file containing with detailed hits + pattern: "*.{xml}" + - "*.xml": + type: file + description: XML file containing with detailed hits + pattern: "*.{xml}" + - gff3: + - meta: + type: file + description: GFF3 file containing with detailed hits + pattern: "*.{gff3}" + - "*.gff3": + type: file + description: GFF3 file containing with detailed hits + pattern: "*.{gff3}" + - json: + - meta: + type: file + description: JSON file containing with detailed hits + pattern: "*.{json}" + - "*.json": + type: file + description: JSON file containing with detailed hits + pattern: "*.{json}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@toniher" + - "@mahesh-panchal" +maintainers: + - "@toniher" + - "@vagkaratzas" + - "@mahesh-panchal" diff --git a/modules/nf-core/interproscan/tests/main.nf.test b/modules/nf-core/interproscan/tests/main.nf.test new file mode 100644 index 00000000..1fe4625d --- /dev/null +++ b/modules/nf-core/interproscan/tests/main.nf.test @@ -0,0 +1,100 @@ +nextflow_process { + + name "Test Process INTERPROSCAN" + script "../main.nf" + process "INTERPROSCAN" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "interproscan" + + // Note: Regular tests have been commented out because Interproscan has a harded coded a requirement of 10G memory, + // and so will therefore not run on the nf-core test runners without being killed. + + // test("sarscov2 - proteome_fasta") { + + // when { + // process { + // """ + // input[0] = [ + // [ id:'test' ], + // file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) + // ] + // input[1] = [] + // """ + // } + // } + + // then { + // assertAll( + // { assert process.success }, + // { assert snapshot( + // path(process.out.tsv[0][1]).readLines()[0] + // .contains("ENSSASP00005000004.1 4c35f09aac2f7be4f3cffd30c6aecac8 1273 Coils Coil Coil 1176 1203 - T"), + // process.out.xml, + // process.out.json, + // path(process.out.gff3[0][1]).readLines()[0..4,6..-1], + // process.out.versions, + // ).match() + // } + // ) + // } + + // } + + // test("sarscov2 - proteome_fasta_gz") { + + // when { + // process { + // """ + // input[0] = [ + // [ id:'test' ], + // file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) + // ] + // input[1] = [] + // """ + // } + // } + + // then { + // assertAll( + // { assert process.success }, + // { assert snapshot( + // path(process.out.tsv[0][1]).readLines()[0] + // .contains("ENSSASP00005000004.1 4c35f09aac2f7be4f3cffd30c6aecac8 1273 Coils Coil Coil 1176 1203 - T"), + // process.out.xml, + // process.out.json, + // path(process.out.gff3[0][1]).readLines()[0..4,6..-1], + // process.out.versions, + // ).match() + // } + // ) + // } + + // } + + test("sarscov2 - proteome_fasta_gz - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/interproscan/tests/main.nf.test.snap b/modules/nf-core/interproscan/tests/main.nf.test.snap new file mode 100644 index 00000000..0529dfe4 --- /dev/null +++ b/modules/nf-core/interproscan/tests/main.nf.test.snap @@ -0,0 +1,207 @@ +{ + "sarscov2 - proteome_fasta_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.xml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,8bd8c66c2f1a7854faa29781761642c2" + ], + "gff3": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test" + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8bd8c66c2f1a7854faa29781761642c2" + ], + "xml": [ + [ + { + "id": "test" + }, + "test.xml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T12:51:27.943051636" + }, + "sarscov2 - proteome_fasta_gz": { + "content": [ + true, + [ + [ + { + "id": "test" + }, + "test.xml:md5,7a211c1a4761e2b9b8700e6e9abbb15f" + ] + ], + [ + [ + { + "id": "test" + }, + "test.json:md5,b05cffc28b7bfeb3dabe43c2927b2024" + ] + ], + [ + "##gff-version 3", + "##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269", + "##interproscan-version 5.59-91.0", + "##sequence-region ENSSASP00005000004.1 1 1273", + "ENSSASP00005000004.1\t.\tpolypeptide\t1\t1273\t.\t+\t.\tID=ENSSASP00005000004.1;md5=4c35f09aac2f7be4f3cffd30c6aecac8", + "##FASTA", + ">ENSSASP00005000004.1", + "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFS", + "NVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIV", + "NNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLE", + "GKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQT", + "LLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETK", + "CTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISN", + "CVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIAD", + "YNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPC", + "NGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVN", + "FNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITP", + "GTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSY", + "ECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTI", + "SVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQE", + "VFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDC", + "LGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAM", + "QMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALN", + "TLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRA", + "SANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPA", + "ICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDP", + "LQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDL", + "QELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDD", + "SEPVLKGVKLHYT", + ">match$1_1176_1203", + "VVNIQKEIDRLNEVAKNLNESLIDLQEL" + ], + [ + "versions.yml:md5,8bd8c66c2f1a7854faa29781761642c2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T12:51:14.476645388" + }, + "sarscov2 - proteome_fasta": { + "content": [ + true, + [ + [ + { + "id": "test" + }, + "test.xml:md5,7a211c1a4761e2b9b8700e6e9abbb15f" + ] + ], + [ + [ + { + "id": "test" + }, + "test.json:md5,b05cffc28b7bfeb3dabe43c2927b2024" + ] + ], + [ + "##gff-version 3", + "##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269", + "##interproscan-version 5.59-91.0", + "##sequence-region ENSSASP00005000004.1 1 1273", + "ENSSASP00005000004.1\t.\tpolypeptide\t1\t1273\t.\t+\t.\tID=ENSSASP00005000004.1;md5=4c35f09aac2f7be4f3cffd30c6aecac8", + "##FASTA", + ">ENSSASP00005000004.1", + "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFS", + "NVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIV", + "NNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLE", + "GKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQT", + "LLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETK", + "CTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISN", + "CVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIAD", + "YNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPC", + "NGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVN", + "FNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITP", + "GTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSY", + "ECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTI", + "SVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQE", + "VFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDC", + "LGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAM", + "QMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALN", + "TLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRA", + "SANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPA", + "ICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDP", + "LQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDL", + "QELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDD", + "SEPVLKGVKLHYT", + ">match$1_1176_1203", + "VVNIQKEIDRLNEVAKNLNESLIDLQEL" + ], + [ + "versions.yml:md5,8bd8c66c2f1a7854faa29781761642c2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T12:50:27.562653728" + } +} \ No newline at end of file diff --git a/modules/nf-core/interproscan/tests/nextflow.config b/modules/nf-core/interproscan/tests/nextflow.config new file mode 100644 index 00000000..2043e2c7 --- /dev/null +++ b/modules/nf-core/interproscan/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: INTERPROSCAN { + ext.args = '-appl Coils' + } +} diff --git a/modules/nf-core/interproscan/tests/tags.yml b/modules/nf-core/interproscan/tests/tags.yml new file mode 100644 index 00000000..ddb90f86 --- /dev/null +++ b/modules/nf-core/interproscan/tests/tags.yml @@ -0,0 +1,2 @@ +interproscan: + - modules/nf-core/interproscan/** diff --git a/modules/nf-core/macrel/contigs/environment.yml b/modules/nf-core/macrel/contigs/environment.yml index bb5ce1a6..ea2b6ac6 100644 --- a/modules/nf-core/macrel/contigs/environment.yml +++ b/modules/nf-core/macrel/contigs/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/mmseqs/createdb/environment.yml b/modules/nf-core/mmseqs/createdb/environment.yml index d3561349..69afa609 100644 --- a/modules/nf-core/mmseqs/createdb/environment.yml +++ b/modules/nf-core/mmseqs/createdb/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/mmseqs/createtsv/environment.yml b/modules/nf-core/mmseqs/createtsv/environment.yml index d3561349..69afa609 100644 --- a/modules/nf-core/mmseqs/createtsv/environment.yml +++ b/modules/nf-core/mmseqs/createtsv/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/mmseqs/databases/environment.yml b/modules/nf-core/mmseqs/databases/environment.yml index d3561349..69afa609 100644 --- a/modules/nf-core/mmseqs/databases/environment.yml +++ b/modules/nf-core/mmseqs/databases/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index a27122ce..c3b3413f 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/prodigal/environment.yml b/modules/nf-core/prodigal/environment.yml index 7609bf3b..b2c7efcf 100644 --- a/modules/nf-core/prodigal/environment.yml +++ b/modules/nf-core/prodigal/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/prokka/environment.yml b/modules/nf-core/prokka/environment.yml index 1d1a019f..b4687037 100644 --- a/modules/nf-core/prokka/environment.yml +++ b/modules/nf-core/prokka/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/rgi/cardannotation/environment.yml b/modules/nf-core/rgi/cardannotation/environment.yml index 609693fe..a3169324 100644 --- a/modules/nf-core/rgi/cardannotation/environment.yml +++ b/modules/nf-core/rgi/cardannotation/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/rgi/main/environment.yml b/modules/nf-core/rgi/main/environment.yml index 609693fe..a3169324 100644 --- a/modules/nf-core/rgi/main/environment.yml +++ b/modules/nf-core/rgi/main/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/seqkit/seq/environment.yml b/modules/nf-core/seqkit/seq/environment.yml index 160a67c0..b26fb1eb 100644 --- a/modules/nf-core/seqkit/seq/environment.yml +++ b/modules/nf-core/seqkit/seq/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - "bioconda::seqkit=2.9.0" + - bioconda::seqkit=2.9.0 diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml index 017c259d..fe48f542 100644 --- a/modules/nf-core/tabix/bgzip/environment.yml +++ b/modules/nf-core/tabix/bgzip/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml index ae4fa457..9b926b1f 100644 --- a/modules/nf-core/untar/environment.yml +++ b/modules/nf-core/untar/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/nextflow.config b/nextflow.config index ae6b0038..9d62c15f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,248 +10,256 @@ params { // Input options - input = null + input = null // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - help_full = false - show_hidden = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - trace_report_suffix = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + trace_report_suffix = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') // Config options // Taxonomy classification options - run_taxa_classification = false - taxa_classification_tool = 'mmseqs2' - taxa_classification_mmseqs_compressed = 0 - - taxa_classification_mmseqs_db = null - taxa_classification_mmseqs_db_id = 'Kalamari' - taxa_classification_mmseqs_db_savetmp = false - - taxa_classification_mmseqs_taxonomy_savetmp = false - taxa_classification_mmseqs_taxonomy_searchtype = 2 - taxa_classification_mmseqs_taxonomy_lcaranks = 'kingdom,phylum,class,order,family,genus,species' - taxa_classification_mmseqs_taxonomy_taxlineage = 1 - taxa_classification_mmseqs_taxonomy_sensitivity = 5.0 - taxa_classification_mmseqs_taxonomy_orffilters = 2.0 - taxa_classification_mmseqs_taxonomy_lcamode = 3 - taxa_classification_mmseqs_taxonomy_votemode = 1 + run_taxa_classification = false + taxa_classification_tool = 'mmseqs2' + taxa_classification_mmseqs_compressed = 0 + + taxa_classification_mmseqs_db = null + taxa_classification_mmseqs_db_id = 'Kalamari' + taxa_classification_mmseqs_db_savetmp = false + + taxa_classification_mmseqs_taxonomy_savetmp = false + taxa_classification_mmseqs_taxonomy_searchtype = 2 + taxa_classification_mmseqs_taxonomy_lcaranks = 'kingdom,phylum,class,order,family,genus,species' + taxa_classification_mmseqs_taxonomy_taxlineage = 1 + taxa_classification_mmseqs_taxonomy_sensitivity = 5.0 + taxa_classification_mmseqs_taxonomy_orffilters = 2.0 + taxa_classification_mmseqs_taxonomy_lcamode = 3 + taxa_classification_mmseqs_taxonomy_votemode = 1 // Annotation options - annotation_tool = 'pyrodigal' - save_annotations = false - - annotation_prodigal_singlemode = false - annotation_prodigal_closed = false - annotation_prodigal_transtable = 11 - annotation_prodigal_forcenonsd = false - - annotation_pyrodigal_singlemode = false - annotation_pyrodigal_closed = false - annotation_pyrodigal_transtable = 11 - annotation_pyrodigal_forcenonsd = false - annotation_pyrodigal_usespecialstopcharacter = false - - annotation_bakta_db = null - annotation_bakta_db_downloadtype = 'full' - annotation_bakta_singlemode = false - annotation_bakta_mincontiglen = 1 - annotation_bakta_translationtable = 11 - annotation_bakta_gram = '?' - annotation_bakta_complete = false - annotation_bakta_renamecontigheaders = false - annotation_bakta_compliant = false - annotation_bakta_trna = false - annotation_bakta_tmrna = false - annotation_bakta_rrna = false - annotation_bakta_ncrna = false - annotation_bakta_ncrnaregion = false - annotation_bakta_crispr = false - annotation_bakta_skipcds = false - annotation_bakta_pseudo = false - annotation_bakta_skipsorf = false - annotation_bakta_gap = false - annotation_bakta_ori = false - annotation_bakta_activate_plot = false - annotation_bakta_hmms = null - - annotation_prokka_singlemode = false - annotation_prokka_rawproduct = false - annotation_prokka_kingdom = 'Bacteria' - annotation_prokka_gcode = 11 - annotation_prokka_cdsrnaolap = false - annotation_prokka_rnammer = false - annotation_prokka_mincontiglen = 1 - annotation_prokka_evalue = 0.000001 - annotation_prokka_coverage = 80 - annotation_prokka_compliant = true - annotation_prokka_addgenes = false - annotation_prokka_retaincontigheaders = false + annotation_tool = 'pyrodigal' + save_annotations = false + + annotation_prodigal_singlemode = false + annotation_prodigal_closed = false + annotation_prodigal_transtable = 11 + annotation_prodigal_forcenonsd = false + + annotation_pyrodigal_singlemode = false + annotation_pyrodigal_closed = false + annotation_pyrodigal_transtable = 11 + annotation_pyrodigal_forcenonsd = false + annotation_pyrodigal_usespecialstopcharacter = false + + annotation_bakta_db = null + annotation_bakta_db_downloadtype = 'full' + annotation_bakta_singlemode = false + annotation_bakta_mincontiglen = 1 + annotation_bakta_translationtable = 11 + annotation_bakta_gram = '?' + annotation_bakta_complete = false + annotation_bakta_renamecontigheaders = false + annotation_bakta_compliant = false + annotation_bakta_trna = false + annotation_bakta_tmrna = false + annotation_bakta_rrna = false + annotation_bakta_ncrna = false + annotation_bakta_ncrnaregion = false + annotation_bakta_crispr = false + annotation_bakta_skipcds = false + annotation_bakta_pseudo = false + annotation_bakta_skipsorf = false + annotation_bakta_gap = false + annotation_bakta_ori = false + annotation_bakta_activate_plot = false + annotation_bakta_hmms = null + + annotation_prokka_singlemode = false + annotation_prokka_rawproduct = false + annotation_prokka_kingdom = 'Bacteria' + annotation_prokka_gcode = 11 + annotation_prokka_cdsrnaolap = false + annotation_prokka_rnammer = false + annotation_prokka_mincontiglen = 1 + annotation_prokka_evalue = 0.000001 + annotation_prokka_coverage = 80 + annotation_prokka_compliant = true + annotation_prokka_addgenes = false + annotation_prokka_retaincontigheaders = false + + // Protein annotation options + run_protein_annotation = false + protein_annotation_tool = 'InterProScan' + protein_annotation_interproscan_db = null + protein_annotation_interproscan_db_url = 'https://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.72-103.0/interproscan-5.72-103.0-64-bit.tar.gz' + protein_annotation_interproscan_applications = 'PANTHER,ProSiteProfiles,ProSitePatterns,Pfam' + protein_annotation_interproscan_enableprecalc = false // Database downloading options - save_db = false + save_db = false // AMP options - run_amp_screening = false - - amp_skip_amplify = false - - amp_skip_macrel = false - - amp_skip_ampir = false - amp_ampir_model = 'precursor' - amp_ampir_minlength = 10 - - amp_run_hmmsearch = false - amp_hmmsearch_models = null - amp_hmmsearch_savealignments = false - amp_hmmsearch_savetargets = false - amp_hmmsearch_savedomains = false - - amp_ampcombi_db_id = 'DRAMP' - amp_ampcombi_db = null - amp_ampcombi_parsetables_cutoff = 0.6 - amp_ampcombi_parsetables_ampir = '.ampir.tsv' - amp_ampcombi_parsetables_amplify = '.amplify.tsv' - amp_ampcombi_parsetables_macrel = '.macrel.prediction' - amp_ampcombi_parsetables_hmmsearch = '.hmmer_hmmsearch.txt' - amp_ampcombi_parsetables_aalength = 120 - amp_ampcombi_parsetables_dbevalue = 5 - amp_ampcombi_parsetables_hmmevalue = 0.06 - amp_ampcombi_parsetables_windowstopcodon = 60 - amp_ampcombi_parsetables_windowtransport = 11 - amp_ampcombi_parsetables_removehitswostopcodons = false - amp_ampcombi_cluster_covmode = 0 - amp_ampcombi_cluster_mode = 1 - amp_ampcombi_cluster_coverage = 0.8 - amp_ampcombi_cluster_seqid = 0.4 - amp_ampcombi_cluster_sensitivity = 4.0 - amp_ampcombi_cluster_removesingletons = false - amp_ampcombi_cluster_minmembers = 0 + run_amp_screening = false + + amp_skip_amplify = false + + amp_skip_macrel = false + + amp_skip_ampir = false + amp_ampir_model = 'precursor' + amp_ampir_minlength = 10 + + amp_run_hmmsearch = false + amp_hmmsearch_models = null + amp_hmmsearch_savealignments = false + amp_hmmsearch_savetargets = false + amp_hmmsearch_savedomains = false + + amp_ampcombi_db_id = 'DRAMP' + amp_ampcombi_db = null + amp_ampcombi_parsetables_cutoff = 0.6 + amp_ampcombi_parsetables_ampir = '.ampir.tsv' + amp_ampcombi_parsetables_amplify = '.amplify.tsv' + amp_ampcombi_parsetables_macrel = '.macrel.prediction' + amp_ampcombi_parsetables_hmmsearch = '.hmmer_hmmsearch.txt' + amp_ampcombi_parsetables_aalength = 120 + amp_ampcombi_parsetables_dbevalue = 5 + amp_ampcombi_parsetables_hmmevalue = 0.06 + amp_ampcombi_parsetables_windowstopcodon = 60 + amp_ampcombi_parsetables_windowtransport = 11 + amp_ampcombi_parsetables_removehitswostopcodons = false + amp_ampcombi_cluster_covmode = 0 + amp_ampcombi_cluster_mode = 1 + amp_ampcombi_cluster_coverage = 0.8 + amp_ampcombi_cluster_seqid = 0.4 + amp_ampcombi_cluster_sensitivity = 4.0 + amp_ampcombi_cluster_removesingletons = false + amp_ampcombi_cluster_minmembers = 0 // ARG options - run_arg_screening = false - - arg_skip_fargene = false - arg_fargene_hmmmodel = 'class_a,class_b_1_2,class_b_3,class_c,class_d_1,class_d_2,qnr,tet_efflux,tet_rpg,tet_enzyme' - arg_fargene_savetmpfiles = false - arg_fargene_minorflength = 90 - arg_fargene_score = null - arg_fargene_translationformat = 'pearson' - arg_fargene_orffinder = false - - arg_skip_rgi = false - arg_rgi_db = null - arg_rgi_savejson = false - arg_rgi_savetmpfiles = false - arg_rgi_alignmenttool = 'BLAST' - arg_rgi_includeloose = false - arg_rgi_includenudge = false - arg_rgi_lowquality = false - arg_rgi_data = 'NA' - arg_rgi_split_prodigal_jobs = true - - arg_skip_amrfinderplus = false - arg_amrfinderplus_db = null - arg_amrfinderplus_identmin = -1 - arg_amrfinderplus_coveragemin = 0.5 - arg_amrfinderplus_translationtable = 11 - arg_amrfinderplus_plus = false - arg_amrfinderplus_name = false - - arg_skip_deeparg = false - arg_deeparg_db = null - arg_deeparg_db_version = 2 // Make sure to update on module version bump! - arg_deeparg_model = 'LS' - arg_deeparg_minprob = 0.8 - arg_deeparg_alignmentidentity = 50 - arg_deeparg_alignmentevalue = 1e-10 - arg_deeparg_alignmentoverlap = 0.8 - arg_deeparg_numalignmentsperentry = 1000 - - arg_skip_abricate = false - arg_abricate_db_id = 'ncbi' - arg_abricate_db = null - arg_abricate_minid = 80 - arg_abricate_mincov = 80 - - arg_hamronization_summarizeformat = 'tsv' - - arg_skip_argnorm = false + run_arg_screening = false + + arg_skip_fargene = false + arg_fargene_hmmmodel = 'class_a,class_b_1_2,class_b_3,class_c,class_d_1,class_d_2,qnr,tet_efflux,tet_rpg,tet_enzyme' + arg_fargene_savetmpfiles = false + arg_fargene_minorflength = 90 + arg_fargene_score = null + arg_fargene_translationformat = 'pearson' + arg_fargene_orffinder = false + + arg_skip_rgi = false + arg_rgi_db = null + arg_rgi_savejson = false + arg_rgi_savetmpfiles = false + arg_rgi_alignmenttool = 'BLAST' + arg_rgi_includeloose = false + arg_rgi_includenudge = false + arg_rgi_lowquality = false + arg_rgi_data = 'NA' + arg_rgi_split_prodigal_jobs = true + + arg_skip_amrfinderplus = false + arg_amrfinderplus_db = null + arg_amrfinderplus_identmin = -1 + arg_amrfinderplus_coveragemin = 0.5 + arg_amrfinderplus_translationtable = 11 + arg_amrfinderplus_plus = false + arg_amrfinderplus_name = false + + arg_skip_deeparg = false + arg_deeparg_db = null + arg_deeparg_db_version = 2 // Make sure to update on module version bump! + arg_deeparg_model = 'LS' + arg_deeparg_minprob = 0.8 + arg_deeparg_alignmentidentity = 50 + arg_deeparg_alignmentevalue = 1e-10 + arg_deeparg_alignmentoverlap = 0.8 + arg_deeparg_numalignmentsperentry = 1000 + + arg_skip_abricate = false + arg_abricate_db_id = 'ncbi' + arg_abricate_db = null + arg_abricate_minid = 80 + arg_abricate_mincov = 80 + + arg_hamronization_summarizeformat = 'tsv' + + arg_skip_argnorm = false // BGC options - run_bgc_screening = false - - bgc_mincontiglength = 3000 - bgc_savefilteredcontigs = false - - bgc_skip_antismash = false - bgc_antismash_db = null - bgc_antismash_installdir = null - bgc_antismash_cbgeneral = false - bgc_antismash_cbknownclusters = false - bgc_antismash_cbsubclusters = false - bgc_antismash_smcogtrees = false - bgc_antismash_ccmibig = false - bgc_antismash_contigminlength = 3000 - bgc_antismash_hmmdetectionstrictness = 'relaxed' - bgc_antismash_pfam2go = false - bgc_antismash_rre = false - bgc_antismash_taxon = 'bacteria' - bgc_antismash_tfbs = false - - bgc_skip_deepbgc = false - bgc_deepbgc_db = null - bgc_deepbgc_score = 0.5 - bgc_deepbgc_prodigalsinglemode = false - bgc_deepbgc_mergemaxproteingap = 0 - bgc_deepbgc_mergemaxnuclgap = 0 - bgc_deepbgc_minnucl = 1 - bgc_deepbgc_minproteins = 1 - bgc_deepbgc_mindomains = 1 - bgc_deepbgc_minbiodomains = 0 - bgc_deepbgc_classifierscore = 0.5 - - bgc_skip_gecco = false - bgc_gecco_cds = 3 - bgc_gecco_threshold = 0.8 - bgc_gecco_pfilter = 0.000000001 - bgc_gecco_edgedistance = 0 - bgc_gecco_mask = false - - bgc_run_hmmsearch = false - bgc_hmmsearch_models = null - bgc_hmmsearch_savealignments = false - bgc_hmmsearch_savetargets = false - bgc_hmmsearch_savedomains = false + run_bgc_screening = false + + bgc_mincontiglength = 3000 + bgc_savefilteredcontigs = false + + bgc_skip_antismash = false + bgc_antismash_db = null + bgc_antismash_installdir = null + bgc_antismash_cbgeneral = false + bgc_antismash_cbknownclusters = false + bgc_antismash_cbsubclusters = false + bgc_antismash_smcogtrees = false + bgc_antismash_ccmibig = false + bgc_antismash_contigminlength = 3000 + bgc_antismash_hmmdetectionstrictness = 'relaxed' + bgc_antismash_pfam2go = false + bgc_antismash_rre = false + bgc_antismash_taxon = 'bacteria' + bgc_antismash_tfbs = false + + bgc_skip_deepbgc = false + bgc_deepbgc_db = null + bgc_deepbgc_score = 0.5 + bgc_deepbgc_prodigalsinglemode = false + bgc_deepbgc_mergemaxproteingap = 0 + bgc_deepbgc_mergemaxnuclgap = 0 + bgc_deepbgc_minnucl = 1 + bgc_deepbgc_minproteins = 1 + bgc_deepbgc_mindomains = 1 + bgc_deepbgc_minbiodomains = 0 + bgc_deepbgc_classifierscore = 0.5 + + bgc_skip_gecco = false + bgc_gecco_cds = 3 + bgc_gecco_threshold = 0.8 + bgc_gecco_pfilter = 0.000000001 + bgc_gecco_edgedistance = 0 + bgc_gecco_mask = false + + bgc_run_hmmsearch = false + bgc_hmmsearch_models = null + bgc_hmmsearch_savealignments = false + bgc_hmmsearch_savetargets = false + bgc_hmmsearch_savedomains = false // Config options - config_profile_name = null - config_profile_description = null + config_profile_name = null + config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines @@ -259,10 +267,10 @@ includeConfig 'conf/base.config' profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false - nextflow.enable.configProcessNamesValidation = true + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 8fc059f9..e5674eee 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -529,6 +529,54 @@ }, "fa_icon": "fas fa-file-signature" }, + "protein_annotation": { + "title": "Protein Annotation: INTERPROSCAN", + "type": "object", + "description": "Functionally annotates all annotated coding regions.", + "default": "", + "properties": { + "run_protein_annotation": { + "type": "boolean", + "description": "Activates the functional annotation of annotated coding regions to provide more information about the codon regions classified.", + "help_text": "Activates the annotation of annotated coding regions. " + }, + "protein_annotation_tool": { + "type": "string", + "default": "InterProScan", + "help_text": "This flag specifies which tool for protein annotation should be activated.\nAt the moment only [InterProScan](https://github.com/ebi-pf-team/interproscan) is incorporated in the pipeline. This annotates the locus tags to protein and domain levels according to the InterPro databases.\n\nMore details can be found in the tool [documentation](https://interproscan-docs.readthedocs.io/en/latest/index.html).", + "description": "Specifies the tool used for further protein annotation.", + "fa_icon": "fas fa-tools" + }, + "protein_annotation_interproscan_db_url": { + "type": "string", + "default": "https://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.72-103.0/interproscan-5.72-103.0-64-bit.tar.gz", + "help_text": "This allows the user to change the InterProScan database version that the pipeline will download for you automatically. To instead use a pre-downloaded database, please supply its path to `--protein_annotation_interproscan_db`. Changing this URL allows for the use of the latest database release. By default this is set to `http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.72-103.0/interproscan-5.72-103.0-64-bit.tar.gz`. ", + "description": "Change the database version used for annotation.", + "fa_icon": "fas fa-database" + }, + "protein_annotation_interproscan_db": { + "type": "string", + "help_text": "Use this to supply the path to a pre-downloaded InterProScan database. This can be any unzipped InterProScan version. By default the database version '5.72-103.0' is downloaded.\n\nFor more details on where to find different InterProScan databases see tool [documentation](https://interproscan-docs.readthedocs.io/en/latest/UserDocs.html#obtaining-a-copy-of-interproscan).\n", + "description": "Path to pre-downloaded InterProScan database.", + "fa_icon": "fas fa-database" + }, + "protein_annotation_interproscan_applications": { + "type": "string", + "default": "PANTHER,ProSiteProfiles,ProSitePatterns,Pfam", + "help_text": "A comma-separated string specifying the database(s) to be used to annotate the coding regions annotated during the contig annotation workflow of the pipeline. By default these include `PANTHER,ProSiteProfiles,ProSitePatterns,Pfam`.\n- PANTHER (Protein ANalysis THrough Evolutionary Relationships): genes classified by their functions, using published scientific experimental evidence and evolutionary relationships.\n- PROSITE: protein domains, families, functional sites and specific patterns and profiles to identify them.\n- PFAM: protein families, represented by multiple sequence alignments and hidden Markov models (HMMs).\n\nThese databases were chosen based on the AMP workflow and therefore, with only these databases, do we guarantee the integration of the results to the AMPcombi final summary.\n\nNOTE: Currently, no integration of the results are implemented for the BGC and the ARG final summary tables.\n\nFor more information about all possible databases see the tool [documentation](https://interproscan-docs.readthedocs.io/en/latest/HowToRun.html).\n\n> Modifies tool parameter(s):\n> - InterProScan: `--applications`", + "description": "Assigns the database(s) to be used to annotate the coding regions.", + "fa_icon": "fas fa-database" + }, + "protein_annotation_interproscan_enableprecalc": { + "type": "boolean", + "help_text": "This increases the speed of functional annotation with InterProScan by pre-calculating matches found in the UniProtKB, thereby identifying unique matches in the query sequences for faster annotation. By default this is turned off.\n\nFor more information about this flag see the tool [documentation](https://interproscan-docs.readthedocs.io/en/latest/HowToRun.html).\n\n> Modifies tool parameter(s):\n> - InterProScan: `---diasable-precalc`", + "description": "Pre-calculates residue mutual matches.", + "fa_icon": "fas fa-clock" + } + }, + "help_text": "This subworkflow adds additional protein annotations to all annotated coding regions. Currently, only annotation with InterProScan is integrated in the subworkflow.", + "fa_icon": "fas fa-file-signature" + }, "database_downloading_options": { "title": "Database downloading options", "type": "object", @@ -1593,6 +1641,9 @@ { "$ref": "#/$defs/annotation_pyrodigal" }, + { + "$ref": "#/$defs/protein_annotation" + }, { "$ref": "#/$defs/database_downloading_options" }, diff --git a/subworkflows/local/amp.nf b/subworkflows/local/amp.nf index 293692a1..0b97a7d9 100644 --- a/subworkflows/local/amp.nf +++ b/subworkflows/local/amp.nf @@ -17,18 +17,18 @@ include { MERGE_TAXONOMY_AMPCOMBI } from '.. workflow AMP { take: - fastas // tuple val(meta), path(contigs) - faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) - tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) - gbks // tuple val(meta), path(ANNOTATION_ANNOTATION_TOOL.out.gbk) + fastas // tuple val(meta), path(contigs) + faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + gbks // tuple val(meta), path(ANNOTATION_ANNOTATION_TOOL.out.gbk) + tsvs_interpro // tuple val(meta), path(INTERPROSCAN.out.tsv)' main: ch_versions = Channel.empty() ch_ampresults_for_ampcombi = Channel.empty() - ch_ampcombi_summaries = Channel.empty() ch_macrel_faa = Channel.empty() - ch_ampcombi_complete = Channel.empty() - ch_ampcombi_for_cluster = Channel.empty() + ch_ampcombi_summaries = Channel.empty() + ch_ampcombi_complete = null // When adding new tool that requires FAA, make sure to update conditions // in funcscan.nf around annotation and AMP subworkflow execution @@ -38,6 +38,7 @@ workflow AMP { ch_faa_for_ampir = faas ch_faa_for_ampcombi = faas ch_gbk_for_ampcombi = gbks + ch_interpro_for_ampcombi = tsvs_interpro // AMPLIFY if ( !params.amp_skip_amplify ) { @@ -104,30 +105,40 @@ workflow AMP { .groupTuple() .join( ch_faa_for_ampcombi ) .join( ch_gbk_for_ampcombi ) + .join( ch_interpro_for_ampcombi ) .multiMap{ input: [ it[0], it[1] ] faa: it[2] gbk: it[3] + interpro: it [4] } + // AMPCOMBI2::PARSETABLES if ( params.amp_ampcombi_db != null ) { - AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, params.amp_ampcombi_db, [] ) + AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, params.amp_ampcombi_db, ch_input_for_ampcombi.interpro ) } else { AMP_DATABASE_DOWNLOAD( params.amp_ampcombi_db_id ) ch_versions = ch_versions.mix( AMP_DATABASE_DOWNLOAD.out.versions ) ch_ampcombi_input_db = AMP_DATABASE_DOWNLOAD.out.db - AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, ch_ampcombi_input_db, [] ) + AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, ch_ampcombi_input_db, ch_input_for_ampcombi.interpro ) } ch_versions = ch_versions.mix( AMPCOMBI2_PARSETABLES.out.versions ) ch_ampcombi_summaries = AMPCOMBI2_PARSETABLES.out.tsv.map{ it[1] }.collect() - AMPCOMBI2_COMPLETE ( ch_ampcombi_summaries ) - ch_versions = ch_versions.mix( AMPCOMBI2_COMPLETE.out.versions ) + // AMPCOMBI2::COMPLETE + ch_summary_count = ch_ampcombi_summaries.map { it.size() }.sum() - ch_ampcombi_complete = AMPCOMBI2_COMPLETE.out.tsv + if ( ch_summary_count == 0 || ch_summary_count == 1 ) { + log.warn("[nf-core/funcscan] AMPCOMBI2: ${ch_summary_count} file(s) passed. Skipping AMPCOMBI2_COMPLETE, AMPCOMBI2_CLUSTER, and TAXONOMY MERGING steps.") + } else { + AMPCOMBI2_COMPLETE(ch_ampcombi_summaries) + ch_versions = ch_versions.mix( AMPCOMBI2_COMPLETE.out.versions ) + ch_ampcombi_complete = AMPCOMBI2_COMPLETE.out.tsv .filter { file -> file.countLines() > 1 } + } + // AMPCOMBI2::CLUSTER if ( ch_ampcombi_complete != null ) { AMPCOMBI2_CLUSTER ( ch_ampcombi_complete ) ch_versions = ch_versions.mix( AMPCOMBI2_CLUSTER.out.versions ) diff --git a/subworkflows/local/protein_annotation.nf b/subworkflows/local/protein_annotation.nf new file mode 100644 index 00000000..51930702 --- /dev/null +++ b/subworkflows/local/protein_annotation.nf @@ -0,0 +1,55 @@ +/* + RUN FUNCTIONAL CLASSIFICATION +*/ + +include { INTERPROSCAN_DATABASE } from '../../modules/local/interproscan_download' +include { INTERPROSCAN } from '../../modules/nf-core/interproscan/main' + +workflow PROTEIN_ANNOTATION { + take: + faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) + + main: + ch_versions = Channel.empty() + ch_interproscan_tsv = Channel.empty() + ch_interproscan_db = Channel.empty() + ch_interproscan_tsv_modified = Channel.empty() + + ch_faa_for_interproscan = faas + + if ( params.protein_annotation_tool == 'InterProScan') { + + if ( params.protein_annotation_interproscan_db != null ) { + ch_interproscan_db = Channel + .fromPath( params.protein_annotation_interproscan_db ) + .first() + } else { + INTERPROSCAN_DATABASE ( params.protein_annotation_interproscan_db_url ) + ch_versions = ch_versions.mix( INTERPROSCAN_DATABASE.out.versions ) + ch_interproscan_db = ( INTERPROSCAN_DATABASE.out.db ) + } + + INTERPROSCAN( ch_faa_for_interproscan, ch_interproscan_db ) + ch_versions = ch_versions.mix( INTERPROSCAN.out.versions ) + ch_interproscan_tsv = ch_interproscan_tsv.mix( INTERPROSCAN.out.tsv ) + + // Current INTERPROSCAN version 5.59_91.0 only includes 13 columns and not 15 which ampcombi expects, so we added them here + ch_interproscan_tsv_modified = INTERPROSCAN.out.tsv + .map { meta, tsv_path -> + def modified_tsv_path = "${workflow.workDir}/tmp/${meta.id}_interproscan.faa.tsv" + + def modified_tsv_content = new File(tsv_path.toString()) + .readLines() + .collect { line -> (line.split('\t') + ['NA', 'NA']).join('\t') } + + new File(modified_tsv_path).text = modified_tsv_content.join('\n') + [meta, file(modified_tsv_path)] + } + + ch_versions = ch_versions.mix(INTERPROSCAN.out.versions) + } + + emit: + versions = ch_versions + tsv = ch_interproscan_tsv_modified // channel: [ val(meta), tsv ] +} diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index e2e625f2..ba8f997a 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -19,11 +19,12 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { ANNOTATION } from '../subworkflows/local/annotation' -include { AMP } from '../subworkflows/local/amp' -include { ARG } from '../subworkflows/local/arg' -include { BGC } from '../subworkflows/local/bgc' -include { TAXA_CLASS } from '../subworkflows/local/taxa_class' +include { ANNOTATION } from '../subworkflows/local/annotation' +include { PROTEIN_ANNOTATION } from '../subworkflows/local/protein_annotation' +include { AMP } from '../subworkflows/local/amp' +include { ARG } from '../subworkflows/local/arg' +include { BGC } from '../subworkflows/local/bgc' +include { TAXA_CLASS } from '../subworkflows/local/taxa_class' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -34,8 +35,9 @@ include { TAXA_CLASS } from '../subworkflows/local/taxa_class' // // MODULE: Installed directly from nf-core/modules // -include { GUNZIP as GUNZIP_INPUT_PREP } from '../modules/nf-core/gunzip/main' -include { SEQKIT_SEQ } from '../modules/nf-core/seqkit/seq/main' +include { GUNZIP as GUNZIP_INPUT_PREP } from '../modules/nf-core/gunzip/main' +include { SEQKIT_SEQ as SEQKIT_SEQ_LENGTH } from '../modules/nf-core/seqkit/seq/main' +include { SEQKIT_SEQ as SEQKIT_SEQ_FILTER } from '../modules/nf-core/seqkit/seq/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -99,17 +101,17 @@ workflow FUNCSCAN { // Duplicate and filter the duplicated file for long contigs only for BGC // This is to speed up BGC run and prevent 'no hits found' fails if (params.run_bgc_screening) { - SEQKIT_SEQ(ch_intermediate_input.fastas.map { meta, fasta, faa, gbk -> [meta, fasta] }) + SEQKIT_SEQ_LENGTH(ch_intermediate_input.fastas.map { meta, fasta, faa, gbk -> [meta, fasta] }) ch_input_for_annotation = ch_intermediate_input.fastas .map { meta, fasta, protein, gbk -> [meta, fasta] } - .mix(SEQKIT_SEQ.out.fastx.map { meta, fasta -> [meta + [category: 'long'], fasta] }) + .mix(SEQKIT_SEQ_LENGTH.out.fastx.map { meta, fasta -> [meta + [category: 'long'], fasta] }) .filter { meta, fasta -> if (fasta != [] && fasta.isEmpty()) { log.warn("[nf-core/funcscan] Sample ${meta.id} does not have contigs longer than ${params.bgc_mincontiglength} bp. Will not be screened for BGCs.") } !fasta.isEmpty() } - ch_versions = ch_versions.mix(SEQKIT_SEQ.out.versions) + ch_versions = ch_versions.mix(SEQKIT_SEQ_LENGTH.out.versions) } else { ch_input_for_annotation = ch_intermediate_input.fastas.map { meta, fasta, protein, gbk -> [meta, fasta] } @@ -174,6 +176,39 @@ workflow FUNCSCAN { ch_taxonomy_tsv = Channel.empty() } + /* + PROTEIN ANNOTATION + */ + if (params.run_protein_annotation) { + def filtered_faas = ch_prepped_input.faas.filter { meta, file -> + if (file != [] && file.isEmpty()) { + log.warn("[nf-core/funcscan] Annotation of the following sample produced an empty FAA file. InterProScan classification of the CDS requiring this file will not be executed: ${meta.id}") + } + !file.isEmpty() + } + + SEQKIT_SEQ_FILTER(filtered_faas) + ch_versions = ch_versions.mix(SEQKIT_SEQ_FILTER.out.versions) + ch_input_for_protein_annotation = SEQKIT_SEQ_FILTER.out.fastx + + PROTEIN_ANNOTATION ( ch_input_for_protein_annotation ) + ch_versions = ch_versions.mix(PROTEIN_ANNOTATION.out.versions) + + ch_interproscan_tsv = PROTEIN_ANNOTATION.out.tsv.map { meta, file -> + if (file == [] || file.isEmpty()) { + log.warn("[nf-core/funcscan] Protein annotation with InterProScan produced an empty TSV file. No protein annotation will be added for sample ${meta.id}.") + [meta, []] + } else { + [meta, file] + } + } + } else { + ch_interproscan_tsv = ch_prepped_input.faas.map { meta, _ -> + [meta, []] + } + } + + /* SCREENING */ @@ -192,6 +227,7 @@ workflow FUNCSCAN { }, ch_taxonomy_tsv, ch_prepped_input.gbks, + ch_interproscan_tsv ) ch_versions = ch_versions.mix(AMP.out.versions) } @@ -211,6 +247,7 @@ workflow FUNCSCAN { !file.isEmpty() }, ch_prepped_input.gbks, + ch_interproscan_tsv ) ch_versions = ch_versions.mix(AMP.out.versions) }