Skip to content

Commit f8e6bb2

Browse files
authored
Merge pull request #71 from dufeiyu/dragen_input
Directly use dragen align input files on storage1
2 parents 8c05555 + 21cca89 commit f8e6bb2

File tree

2 files changed

+15
-30
lines changed

2 files changed

+15
-30
lines changed

MyeloseqHD.wdl

+12-13
Original file line numberDiff line numberDiff line change
@@ -27,25 +27,24 @@ workflow MyeloseqHD {
2727
Array[String] Adapters = ["GATCGGAAGAGCACACGTCTGAACTCCAGTCAC","AGATCGGAAGAGCGTCGTGTAGGGAAA"]
2828

2929
String DragenReference = "/staging/runs/Chromoseq/refdata/dragen_hg38"
30-
String Reference = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.fa"
31-
String ReferenceDict = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.dict"
32-
33-
String VEP = "/storage1/fs1/gtac-mgi/Active/CLE/reference/VEP_cache"
34-
String QcMetrics = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHDQCMetrics.json"
30+
String Reference = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.fa"
31+
String ReferenceDict = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.dict"
32+
String VEP = "/storage1/fs1/gtac-mgi/Active/CLE/reference/VEP_cache"
3533

3634
String HaplotectBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq.haplotect_snppairs_hg38.bed"
3735
String AmpliconBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHD.16462-1615924889.Amplicons.hg38.bed"
3836
String CoverageBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed"
39-
String DragenCoverageBed = "/staging/runs/MyeloSeqHD/dragen_align_inputs/MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed"
40-
String DragenHotspot = "/staging/runs/MyeloSeqHD/dragen_align_inputs/myeloseq_hotspots.vcf.gz"
37+
String GenotypeVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseqhd.forcegenotype.vcf.gz"
38+
String QcMetrics = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHDQCMetrics.json"
39+
String Hotspot = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_hotspots.vcf.gz"
4140

4241
String CustomAnnotationVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_custom_annotations.annotated.hg38.vcf.gz"
4342
String CustomAnnotationIndex = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_custom_annotations.annotated.hg38.vcf.gz.tbi"
4443
String CustomAnnotationParameters = "MYELOSEQ,vcf,exact,0,TCGA_AC,MDS_AC,MYELOSEQBLACKLIST"
45-
String GenotypeVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseqhd.forcegenotype.vcf.gz"
4644

47-
String QC_pl = "/usr/local/bin/QC_metrics.pl"
45+
String QC_pl = "/usr/local/bin/QC_metrics.pl"
4846
String xfer_pl = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/scripts/data_transfer.pl"
47+
4948
String DemuxFastqDir = "/storage1/fs1/gtac-mgi/Active/CLE/assay/myeloseqhd/demux_fastq"
5049

5150

@@ -85,7 +84,7 @@ workflow MyeloseqHD {
8584

8685
call dragen_align {
8786
input: DragenRef=DragenReference,
88-
DragenHotspot=DragenHotspot,
87+
Hotspot=Hotspot,
8988
fastq1=select_first([trim_reads.read1,samples[13]]),
9089
fastq2=select_first([trim_reads.read2,samples[14]]),
9190
Name=samples[1],
@@ -94,7 +93,7 @@ workflow MyeloseqHD {
9493
LB=samples[5] + '.' + samples[0],
9594
readfamilysize=readfamilysize,
9695
AmpliconBed=AmpliconBed,
97-
CoverageBed=DragenCoverageBed,
96+
CoverageBed=CoverageBed,
9897
OutputDir=OutputDir,
9998
SubDir=samples[1] + '_' + samples[0],
10099
queue=DragenQueue,
@@ -290,7 +289,7 @@ task trim_reads {
290289
task dragen_align {
291290
String Name
292291
String DragenRef
293-
String DragenHotspot
292+
String Hotspot
294293
String fastq1
295294
String fastq2
296295
String RG
@@ -322,7 +321,7 @@ task dragen_align {
322321

323322
/bin/mkdir ${LocalSampleDir} && \
324323
/bin/mkdir ${outdir} && \
325-
/opt/edico/bin/dragen -r ${DragenRef} --tumor-fastq1 ${fastq1} --tumor-fastq2 ${fastq2} --RGSM-tumor ${SM} --RGID-tumor ${RG} --RGLB-tumor ${LB} --enable-map-align true --enable-sort true --enable-map-align-output true --vc-enable-umi-liquid true --vc-combine-phased-variants-distance 3 --gc-metrics-enable=true --qc-coverage-region-1 ${CoverageBed} --qc-coverage-reports-1 full_res --umi-enable true --umi-min-supporting-reads ${readfamilysize} --umi-correction-scheme=random --umi-enable-probability-model-merging=false --umi-fuzzy-window-size=0 --enable-variant-caller=true --vc-target-bed ${CoverageBed} --enable-sv true --sv-call-regions-bed ${CoverageBed} --sv-exome true --sv-output-contigs true --vc-somatic-hotspots ${DragenHotspot} --umi-metrics-interval-file ${CoverageBed} --read-trimmers=fixed-len --trim-r1-5prime=${default=1 TrimLen} --trim-r1-3prime=${default=1 TrimLen} --trim-r2-5prime=${default=1 TrimLen} --trim-r2-3prime=${default=1 TrimLen} --output-dir ${LocalSampleDir} --output-file-prefix ${Name} --output-format BAM &> ${log} && \
324+
/opt/edico/bin/dragen -r ${DragenRef} --tumor-fastq1 ${fastq1} --tumor-fastq2 ${fastq2} --RGSM-tumor ${SM} --RGID-tumor ${RG} --RGLB-tumor ${LB} --enable-map-align true --enable-sort true --enable-map-align-output true --vc-enable-umi-liquid true --vc-combine-phased-variants-distance 3 --gc-metrics-enable=true --qc-coverage-region-1 ${CoverageBed} --qc-coverage-reports-1 full_res --umi-enable true --umi-min-supporting-reads ${readfamilysize} --umi-correction-scheme=random --umi-enable-probability-model-merging=false --umi-fuzzy-window-size=0 --enable-variant-caller=true --vc-target-bed ${CoverageBed} --enable-sv true --sv-call-regions-bed ${CoverageBed} --sv-exome true --sv-output-contigs true --vc-somatic-hotspots ${Hotspot} --umi-metrics-interval-file ${CoverageBed} --read-trimmers=fixed-len --trim-r1-5prime=${default=1 TrimLen} --trim-r1-3prime=${default=1 TrimLen} --trim-r2-5prime=${default=1 TrimLen} --trim-r2-3prime=${default=1 TrimLen} --output-dir ${LocalSampleDir} --output-file-prefix ${Name} --output-format BAM &> ${log} && \
326325
/bin/mv ${log} ./ && \
327326
/bin/mv ${LocalSampleDir} ${dragen_outdir}
328327
}

scripts/launcher.pl

+3-17
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,17 @@
1616

1717
use lib "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/perl5/lib/perl5";
1818
use Spreadsheet::Read;
19-
use File::Copy::Recursive qw(dircopy);
2019
use JSON qw(from_json to_json);
2120
use IO::File;
2221
use File::Spec;
23-
use File::Compare;
2422

25-
##THIS LAUNCHER SCRIPT NEEDS TO BE RUN ON DRAGEN NODE compute1-dragen-2 TO BE ABLE TO CHECK DIFF ON SOME DRAGEN INPUT FILES
2623
die "Provide rundir, excel sample spreadsheet, and batch name in order" unless @ARGV == 3;
2724

2825
my ($rundir, $sample_sheet, $batch_name) = @ARGV;
2926
die "$rundir is not valid" unless -d $rundir;
3027
die "$sample_sheet is not valid" unless -s $sample_sheet;
3128

32-
my $staging_dir = '/staging/runs/MyeloSeqHD';
3329
my $dir = '/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD';
34-
35-
#check diff on two key files
36-
for my $name (qw(MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed myeloseq_hotspots.vcf.gz)) {
37-
my $staging = File::Spec->join($staging_dir, 'dragen_align_inputs', $name);
38-
my $process = File::Spec->join($dir, 'process', 'git', 'cle-myeloseqhd', 'accessory_files', $name);
39-
unless (compare($staging, $process)==0) {
40-
die "$staging and $process are not SAME !";
41-
}
42-
}
43-
4430
my $git_dir = File::Spec->join($dir, 'process', 'git', 'cle-myeloseqhd');
4531

4632
my $conf = File::Spec->join($git_dir, 'application.conf');
@@ -118,9 +104,9 @@
118104
unless (exists $all_hash{$all_id}) {
119105
die "For RESEQ $lib its MRN and accession can not be found in CoPath daily all_accession log";
120106
}
121-
$sex = $all_hash{$id}->{sex};
122-
$DOB = $all_hash{$id}->{DOB};
123-
$all_MRNs = $all_hash{$id}->{all_MRNs};
107+
$sex = $all_hash{$all_id}->{sex};
108+
$DOB = $all_hash{$all_id}->{DOB};
109+
$all_MRNs = $all_hash{$all_id}->{all_MRNs};
124110
}
125111
else { #NOTRANSFER RESEARCH They will skip query_DB and upload_DB tasks in WF
126112
($mrn, $accession, $sex, $DOB, $all_MRNs) = ('NONE') x 5;

0 commit comments

Comments
 (0)