Skip to content

Commit e397755

Browse files
committed
make the dedup verification step resilient to http transient errors
1 parent bf50b57 commit e397755

File tree

2 files changed

+62
-23
lines changed

2 files changed

+62
-23
lines changed

wdl/tasks/Utility/BAMutils.wdl

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -716,53 +716,76 @@ task GetDuplicateReadnamesInQnameSortedBam {
716716
}
717717
parameter_meta {
718718
qns_bam: {
719+
desciption: "Query name sorted BAM to be de-duplicated",
719720
localization_optional: true
720721
}
722+
trial_idx: "the n-th time this is being tried for (start from 1), if this value is >= trial_max, the BAM will be localized and the task will use a persistent SSD instead of persistent HDD."
723+
trial_max: "the max number of attempt to perform the duty by streaming in the BAM; this design together with trial_idx is to prevent call-caching preventing retries."
721724
}
722725
input {
723726
File qns_bam
727+
Int trial_idx = 1
728+
Int trial_max = 3
724729
}
725730

726731
output {
727732
File dup_names_txt = "dup_read_names.txt"
728733
Boolean result_may_be_corrupted = read_boolean("samtools.failed.txt")
729734
}
730735

736+
Boolean localize_bam = trial_idx >= trial_max
737+
731738
command <<<
732-
# the way this works is the following:
733-
# 0) relying on the re-auth.sh script to export the credentials
734-
# 1) perform the remote sam-view subsetting in the background
735-
# 2) listen to the PID of the background process, while re-auth every 1200 seconds
739+
736740
source /opt/re-auth.sh
737741
set -euxo pipefail
738742
739743
# assumption
740744
sort_order=$(samtools view -H ~{qns_bam} | grep "^@HD" | tr '\t' '\n' | grep "^SO:" | awk -F ':' '{print $2}')
741745
if [[ "queryname" != "${sort_order}" ]]; then echo -e "Sort order ${sort_oder} isn't the expected 'queryname'." && exit 1; fi
742746
743-
# remote grab read names
744-
echo "false" > samtools.failed.txt
745-
samtools view ~{qns_bam} \
746-
| awk -F '\t' '{print $1}' \
747-
| uniq -d \
748-
> "dup_read_names.txt" \
749-
|| { echo "true" > samtools.failed.txt; exit 77; } &
750-
pid=$!
747+
if ~{localize_bam}; then
748+
time \
749+
gcloud storage cp ~{qns_bam} name_does_not_matter.bam
751750
752-
set +e
753-
count=1
754-
while true; do
755-
sleep 1200 && date && source /opt/re-auth.sh
756-
if [[ ${count} -gt 2 ]]; then exit 0; fi
757-
if ! pgrep -x -P $pid; then exit 0; fi
758-
count=$(( count+1 ))
759-
done
751+
samtools view name_does_not_matter.bam \
752+
| awk -F '\t' '{print $1}' \
753+
| uniq -d \
754+
> "dup_read_names.txt"
755+
756+
echo "false" > samtools.failed.txt
757+
else
758+
# the way this works is the following:
759+
# 0) relying on the re-auth.sh script to export the credentials
760+
# 1) perform the remote sam-view operation in the background
761+
# 2) listen to the PID of the background process, while re-auth every 1200 seconds
762+
763+
# remote grab read names
764+
echo "false" > samtools.failed.txt
765+
samtools view ~{qns_bam} \
766+
| awk -F '\t' '{print $1}' \
767+
| uniq -d \
768+
> "dup_read_names.txt" \
769+
|| { echo "true" > samtools.failed.txt; exit 77; } &
770+
pid=$!
771+
772+
set +e
773+
count=1
774+
while true; do
775+
sleep 1200 && date && source /opt/re-auth.sh
776+
if [[ ${count} -gt 2 ]]; then exit 0; fi
777+
if ! pgrep -x -P $pid; then exit 0; fi
778+
count=$(( count+1 ))
779+
done
780+
fi
760781
>>>
761782

783+
Int disk_size = 5 + (if (localize_bam) then ceil(size(qns_bam, "Gib")) else 0)
784+
String disk_type = if (localize_bam) then "SSD" else "HDD"
762785
runtime {
763786
cpu: 1
764787
memory: "4 GiB"
765-
disks: "local-disk 10 HDD"
788+
disks: "local-disk ~{disk_size} ~{disk_type}"
766789
preemptible: 2
767790
maxRetries: 1
768791
docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"

wdl/tasks/Utility/ONTBamShardResetAndDeduplicate.wdl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,24 @@ workflow Work {
2222
call BU.SamtoolsReset as Magic { input: bam = shard_bam }
2323
call BU.QuerynameSortBamWithPicard as SortUnaligned { input: bam = Magic.res }
2424
call BU.DeduplicateQuerynameSortedBam as DeQS { input: qnorder_bam = SortUnaligned.qnsort_bam}
25-
call BU.GetDuplicateReadnamesInQnameSortedBam as CheckDedupShard { input: qns_bam = DeQS.dedup_bam }
26-
if ( CheckDedupShard.result_may_be_corrupted || 0!=length(read_lines(CheckDedupShard.dup_names_txt)) ) {
25+
26+
# verify
27+
call BU.GetDuplicateReadnamesInQnameSortedBam as InitialCheckDedupShard { input: qns_bam = DeQS.dedup_bam, trial_idx = 1 }
28+
if ( InitialCheckDedupShard.result_may_be_corrupted ) {
29+
call BU.GetDuplicateReadnamesInQnameSortedBam as RetryCheckDedupShard { input: qns_bam = DeQS.dedup_bam, trial_idx = 2 }
30+
if ( RetryCheckDedupShard.result_may_be_corrupted ) {
31+
call BU.GetDuplicateReadnamesInQnameSortedBam as LastCheckDedupShard { input: qns_bam = DeQS.dedup_bam, trial_idx = 3, trial_max = 3 }
32+
}
33+
}
34+
35+
# do not change order
36+
Boolean CheckOperationFailed = select_first([LastCheckDedupShard.result_may_be_corrupted,
37+
RetryCheckDedupShard.result_may_be_corrupted,
38+
InitialCheckDedupShard.result_may_be_corrupted])
39+
Array[String] dup_names = read_lines(select_first([LastCheckDedupShard.dup_names_txt,
40+
RetryCheckDedupShard.dup_names_txt,
41+
InitialCheckDedupShard.dup_names_txt]))
42+
if ( CheckOperationFailed || 0!=length(dup_names) ) {
2743
call Utils.StopWorkflow as DedupShardFail { input: reason = "Deduplication isn't successful for ~{shard_bam}."}
2844
}
2945
}

0 commit comments

Comments
 (0)