Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

germline joint detect variants workflow #1043

Open
wants to merge 35 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
5ef76b2
add input to control full output filename
apaul7 Jul 15, 2021
15db8c7
add minimum confidence input for gatk calls
apaul7 Jul 15, 2021
cc85818
s/all_cds/no_cds/
apaul7 Jul 15, 2021
9e8876d
add survivor merged annotsv tsv filtering
apaul7 Jul 15, 2021
47c368a
update annotsv to version 2.3
apaul7 Jul 15, 2021
70ab64a
s/SURVIVOR/survivor/ and s/CNVnator/cnvnator/
apaul7 Jul 15, 2021
43b7c2c
outputbinding change s/merged_sv_vcf/merged_vcf/
apaul7 Jul 15, 2021
0622bd9
stage secondary files in gather_to_sub_directory
apaul7 Jul 15, 2021
68f43a3
added min confidence input to genotype_gvcf step
apaul7 Jul 15, 2021
498236b
add annotated vcf as output
apaul7 Jul 15, 2021
3b4329e
add decompose and normalize step to joint genotype
apaul7 Jul 15, 2021
da8d329
add gatk soft filtering
apaul7 Jul 15, 2021
4ccb8d3
add new normalize tool
apaul7 Jul 15, 2021
8a98373
add gather to subdirectory tool for directories
apaul7 Jul 15, 2021
fb99760
add bcftools view tool
apaul7 Jul 15, 2021
f894746
add manta_germline tool
apaul7 Jul 15, 2021
1c526f8
add joint cnvnator subworkflow
apaul7 Jul 15, 2021
e6e621a
add joint cnvkit subworkflow
apaul7 Jul 15, 2021
66ac589
add joint sv read caller filtering
apaul7 Jul 15, 2021
571bab7
add joint sv filtering for depth callers
apaul7 Jul 15, 2021
c8214e3
add joint detect svs subworkflows
apaul7 Jul 15, 2021
7623779
add joint detect snps subworkflow
apaul7 Jul 15, 2021
065f8b3
add joint detect variants
apaul7 Jul 15, 2021
70005be
pass annotsv_annotations input to subworkflow
apaul7 Jul 15, 2021
0885e03
pass soft filtered annotated vcf as output
apaul7 Jul 15, 2021
e0449dc
remove doc line for easy to understand input
apaul7 Nov 17, 2021
20f96d8
ubuntu:xenial -> ubuntu:focal docker image
apaul7 Nov 17, 2021
91b0e5f
quote parameters in script
apaul7 Nov 17, 2021
8079ab4
fix quotes
apaul7 Dec 3, 2021
9b6a9cb
move script inline cwl file
apaul7 Dec 3, 2021
6e5cb23
add input option for output file basename
apaul7 Dec 3, 2021
ac09653
Update definitions/subworkflows/gatk_soft_filter.cwl
apaul7 Dec 6, 2021
1cb0254
Update definitions/tools/bcftools_view.cwl
apaul7 Dec 6, 2021
c253e6f
add doc for output type
apaul7 Dec 7, 2021
140e6eb
use bash arrays to quote multiple vars
apaul7 Dec 9, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions definitions/pipelines/germline_wgs.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ inputs:
disclaimer_text:
type: string?
default: 'Workflow source can be found at https://github.com/genome/analysis-workflows'
annotsv_annotations:
type:
- string
- Directory
doc: "directory/path of the annotsv annotations directory"
outputs:
cram:
type: File
Expand Down Expand Up @@ -468,6 +473,7 @@ steps:
sv_split_count: sv_filter_split_count
genome_build: vep_ensembl_assembly
blocklist_bedpe: blocklist_bedpe
annotsv_annotations: annotsv_annotations
out:
[cn_diagram, cn_scatter_plot, tumor_antitarget_coverage, tumor_target_coverage, tumor_bin_level_ratios, tumor_segmented_ratios, cnvkit_vcf, cnvnator_cn_file, cnvnator_root, cnvnator_vcf, manta_diploid_variants, manta_somatic_variants, manta_all_candidates, manta_small_candidates, manta_tumor_only_variants, smoove_output_variants, cnvkit_filtered_vcf, cnvnator_filtered_vcf, manta_filtered_vcf, smoove_filtered_vcf, survivor_merged_vcf, survivor_merged_annotated_tsv, bcftools_merged_vcf, bcftools_merged_annotated_tsv, bcftools_merged_filtered_annotated_tsv]
add_disclaimer_survivor_sv_vcf:
Expand Down
84 changes: 84 additions & 0 deletions definitions/subworkflows/gatk_soft_filter.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: Workflow
label: "apply soft filtering to a gatk called vcf using hard filter paramaters"
apaul7 marked this conversation as resolved.
Show resolved Hide resolved
requirements:
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
- class: MultipleInputFeatureRequirement
inputs:
reference:
type:
- string
- File
secondaryFiles: [.fai, ^.dict]
vcf:
type: File
secondaryFiles: [.tbi]
outputs:
filtered_vcf:
type: File
secondaryFiles: [.tbi]
outputSource: index_merged/indexed_vcf
steps:
split_snps:
run: ../tools/select_variants.cwl
in:
reference: reference
vcf: vcf
output_vcf_basename:
default: "SNPS"
select_type:
default: "SNP"
out:
[filtered_vcf]
split_indels:
run: ../tools/select_variants.cwl
in:
reference: reference
vcf: vcf
output_vcf_basename:
default: "INDELS"
select_type:
default: "INDEL"
out:
[filtered_vcf]
filter_snps:
run: ../tools/variant_filtration.cwl
in:
reference: reference
vcf: split_snps/filtered_vcf
filters:
default: ["QD<2.0;QD2", "QUAL<30.0;QUAL30", "SOR>3.0;SOR3", "FS>60.0;FS60", "MQ<40.0;MQ40", "MQRankSum<-12.5;MQRankSum-12.5", "ReadPosRankSum<-8.0;ReadPosRankSum-8"]
output_vcf_basename:
default: "SNPS.filtered"
out:
[filtered_vcf]
filter_indels:
run: ../tools/variant_filtration.cwl
in:
reference: reference
vcf: split_indels/filtered_vcf
filters:
default: ["QD<2.0;QD2", "QUAL<30.0;QUAL30", "FS>200.0;FS200", "ReadPosRankSum<-20.0;ReadPosRankSum-20"]
output_vcf_basename:
default: "INDELS.filtered"
out:
[filtered_vcf]
merge:
run: ../tools/merge_vcf.cwl
in:
merged_vcf_basename:
default: "soft_filtered"
vcfs:
source: [filter_snps/filtered_vcf, filter_indels/filtered_vcf]
linkMerge: merge_flattened
out:
[merged_vcf]
index_merged:
run: ../tools/index_vcf.cwl
in:
vcf: merge/merged_vcf
out:
[indexed_vcf]
92 changes: 92 additions & 0 deletions definitions/subworkflows/joint_cnvkit.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: Workflow
label: "jointly run cnvkit for sv calls"
requirements:
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
- class: InlineJavascriptRequirement
- class: ScatterFeatureRequirement
inputs:
sample_names:
type: string[]
bams:
type: File[]
secondaryFiles: [^.bai]
reference_fasta:
type:
- string
- File
secondaryFiles: [.fai]
reference_cnn:
type: File?
doc: "can be a flat reference or reference based on a panel of normals"
method:
type:
- "null"
- type: enum
symbols: ["hybrid", "amplicon", "wgs"]
segment_filter:
type:
- "null"
- type: enum
symbols: ["ampdel", "ci", "cn", "sem"]
outputs:
vcfs:
type: File[]
outputSource: index_cnvkit/indexed_vcf
secondaryFiles: [.tbi]
cnr:
type: File[]
outputSource: cnvkit/tumor_bin_level_ratios
cns:
type: File[]
outputSource: cnvkit/tumor_segmented_ratios
steps:
cnvkit:
scatter: [tumor_bam, cnvkit_vcf_name]
scatterMethod: dotproduct
run: cnvkit_single_sample.cwl
in:
method: method
reference_cnn: reference_cnn
tumor_bam: bams
cnvkit_vcf_name:
source: [sample_names]
valueFrom: "$(self).cnvkit.vcf"
segment_filter: segment_filter
fasta_reference: reference_fasta
out:
[tumor_bin_level_ratios, tumor_segmented_ratios, cnvkit_vcf]
bgzip_and_index:
scatter: [vcf]
run: bgzip_and_index.cwl
in:
vcf: cnvkit/cnvkit_vcf
out:
[indexed_vcf]
sample_rename:
scatter: [input_vcf, new_sample_name]
scatterMethod: dotproduct
run: ../tools/replace_vcf_sample_name.cwl
in:
input_vcf: bgzip_and_index/indexed_vcf
new_sample_name: sample_names
sample_to_replace:
valueFrom: 'adjusted.tumor'
output_name:
valueFrom: '${
var sample = inputs.new_sample_name;
var name = sample + ".cnvkit.vcf.gz";
return name;
}'
out:
[renamed_vcf]
index_cnvkit:
scatter: [vcf]
run: ../tools/index_vcf.cwl
in:
vcf: sample_rename/renamed_vcf
out:
[indexed_vcf]
79 changes: 79 additions & 0 deletions definitions/subworkflows/joint_cnvnator.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: Workflow
label: "run cnvnator for multiple samples"
requirements:
- class: SubworkflowFeatureRequirement
- class: ScatterFeatureRequirement
- class: StepInputExpressionRequirement
inputs:
reference:
type:
- string
- File
secondaryFiles: [.fai, ^.dict]
sample_names:
type: string[]
bams:
type: File[]
secondaryFiles: [^.bai]
bin_size:
type: int?
outputs:
vcfs:
type: File[]
outputSource: index_cnvnator/indexed_vcf
secondaryFiles: [.tbi]
root_files:
type: File[]
outputSource: cnvnator/root_file
cn_files:
type: File[]
outputSource: cnvnator/cn_file
steps:
cnvnator:
scatter: [bam, sample_name]
scatterMethod: dotproduct
run: ../tools/cnvnator.cwl
in:
bam: bams
reference: reference
sample_name: sample_names
bin_size: bin_size
out:
[vcf, root_file, cn_file]
bgzip_index:
scatter: [vcf]
run: bgzip_and_index.cwl
in:
vcf: cnvnator/vcf
out:
[indexed_vcf]
sample_rename:
scatter: [input_vcf, new_sample_name]
scatterMethod: dotproduct
run: ../tools/replace_vcf_sample_name.cwl
in:
input_vcf: bgzip_index/indexed_vcf
new_sample_name: sample_names
sample_to_replace:
valueFrom: '${
var old_name = inputs.new_sample_name.split(".")[0];
return old_name;
}'
output_name:
valueFrom: '${
var sample = inputs.new_sample_name;
var name = sample + ".cnvnator.vcf.gz";
return name;
}'
out:
[renamed_vcf]
index_cnvnator:
scatter: [vcf]
run: ../tools/index_vcf.cwl
in:
vcf: sample_rename/renamed_vcf
out:
[indexed_vcf]
Loading