Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/whatshap/phase/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::whatshap=2.8"
55 changes: 55 additions & 0 deletions modules/nf-core/whatshap/phase/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process WHATSHAP_PHASE {
tag "${meta.id}"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'oras://community.wave.seqera.io/library/whatshap:2.8--c3862a4b2ad0f978'
: 'community.wave.seqera.io/library/whatshap:2.8--7fe530bc624a3e5a'}"

input:
tuple val(meta), path(vcf), path(tbi)
tuple val(meta2), path(bam), path(bai)
tuple val(meta3), path(fasta), path(fai)

output:
tuple val(meta), path("*.phased.vcf.gz"), emit: vcf
tuple val(meta), path("*.phased.vcf.gz.tbi"), emit: tbi
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We ususally try to not enforce file names :) see

error "Input and output names are the same, set prefix in module configuration to disambiguate!"

maybe you can do it similarly here :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for pointing this. It's been fixed now.

path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
whatshap \\
phase \\
--output ${prefix}.phased.vcf \\
--reference ${fasta} \\
${args} \\
${vcf} \\
${bam}

bgzip ${prefix}.phased.vcf
tabix -p vcf ${prefix}.phased.vcf.gz

cat <<-END_VERSIONS > versions.yml
"${task.process}":
whatshap: \$(whatshap --version 2>&1 | sed 's/whatshap //g')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo "" | gzip > ${prefix}.phased.vcf.gz
touch ${prefix}.phased.vcf.gz.tbi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
whatshap: \$(whatshap --version 2>&1 | sed 's/whatshap //g')
END_VERSIONS
"""
}
99 changes: 99 additions & 0 deletions modules/nf-core/whatshap/phase/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "whatshap_phase"
description: Phase variants in a VCF file using long-read sequencing data
keywords:
- phasing
- haplotypes
- vcf
- long-reads
- nanopore
- pacbio
tools:
- whatshap:
description: |
WhatsHap is a software for phasing genomic variants using DNA sequencing
reads, also called read-based phasing or haplotype assembly.
homepage: https://whatshap.readthedocs.io/
documentation: https://whatshap.readthedocs.io/
tool_dev_url: https://github.com/whatshap/whatshap
doi: "10.1101/085050"
licence: ["MIT"]

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF file with unphased variants (can be gzipped)
pattern: "*.{vcf,vcf.gz}"
- tbi:
type: file
description: VCF index file (optional but recommended)
pattern: "*.{tbi,csi}"
- - meta2:
type: map
description: |
Groovy Map containing bam information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file with aligned reads
pattern: "*.bam"
- bai:
type: file
description: BAM index file (optional but recommended)
pattern: "*.bai"
- - meta3:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Reference genome in FASTA format
pattern: "*.{fa,fasta}"
- fai:
type: file
description: Reference genome index
pattern: "*.fai"

output:
vcf:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.phased.vcf.gz":
type: file
description: Bgzipped phased VCF file
pattern: "*.phased.vcf.gz"
ontologies:
- edam: http://edamontology.org/format_3989 # GZIP format
tbi:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.phased.vcf.gz.tbi":
type: file
description: Phased VCF index file
pattern: "*.phased.vcf.gz.tbi"
ontologies:
- edam: http://edamontology.org/format_3616 # TBI format
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
ontologies:
- edam: http://edamontology.org/format_3750 # YAML

authors:
- "@haidyi"
maintainers:
- "@haidyi"
104 changes: 104 additions & 0 deletions modules/nf-core/whatshap/phase/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
nextflow_process {

name "Test Process WHATSHAP_PHASE"
script "../main.nf"
process "WHATSHAP_PHASE"

tag "modules"
tag "modules_nfcore"
tag "whatshap"
tag "whatshap/phase"
tag "samtools/faidx"

setup {
run("SAMTOOLS_FAIDX") {
script "../../../samtools/faidx/main.nf"
process {
"""
input[0] = [ [ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome3.fasta', checkIfExists: true) ]
input[1] = [[],[]]
input[2] = false
"""
}
}
}

test("whatshap - phase - vcf") {

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz.csi', checkIfExists: true),

]

input[1] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam.bai', checkIfExists: true),
]

input[2] = Channel.of([
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome3.fasta', checkIfExists: true)
]).join(SAMTOOLS_FAIDX.out.fai)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf.get(0).get(1)).vcf.summary,
path(process.out.vcf.get(0).get(1)).vcf.variantsMD5,
process.out.versions,
path(process.out.versions[0]).yaml,
).match() }
)
}

}

test("whatshap - phase - vcf - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/vcf/NA03697B2_new.pbmm2.repeats.vcf.gz.csi', checkIfExists: true),

]

input[1] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/NA03697B2_downsampled.pbmm2.repeats.bam.bai', checkIfExists: true),
]

input[2] = Channel.of([
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome3.fasta', checkIfExists: true)
]).join(SAMTOOLS_FAIDX.out.fai)
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
70 changes: 70 additions & 0 deletions modules/nf-core/whatshap/phase/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"whatshap - phase - vcf - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.phased.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
[
{
"id": "test"
},
"test.phased.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
"versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994"
],
"tbi": [
[
{
"id": "test"
},
"test.phased.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"vcf": [
[
{
"id": "test"
},
"test.phased.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions": [
"versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994"
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2025-12-04T13:11:14.419732725"
},
"whatshap - phase - vcf": {
"content": [
"VcfFile [chromosomes=[chr19:45760000-45770300], sampleCount=1, variantCount=1, phased=false, phasedAutodetect=false]",
"e75d1ebbe87d6e55739cacb4e81dcd08",
[
"versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994"
],
{
"WHATSHAP_PHASE": {
"whatshap": 2.8
}
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2025-12-04T13:11:09.397108709"
}
}
Loading