@@ -721,6 +721,7 @@ task GetDuplicateReadnamesInQnameSortedBam {
721721 }
722722 input {
723723 File qns_bam
724+ Boolean localize_bam = false
724725 }
725726
726727 output {
@@ -729,40 +730,56 @@ task GetDuplicateReadnamesInQnameSortedBam {
729730 }
730731
731732 command <<<
732- # the way this works is the following:
733- # 0) relying on the re-auth.sh script to export the credentials
734- # 1) perform the remote sam-view subsetting in the background
735- # 2) listen to the PID of the background process, while re-auth every 1200 seconds
733+
736734 source /opt/re-auth.sh
737735 set -euxo pipefail
738736
739737 # assumption
740738 sort_order = $(samtools view -H ~{qns_bam } | grep "^@HD" | tr '\t' '\n' | grep "^SO:" | awk -F ':' '{print $2}' )
741739 if [[ "queryname" != "${sort_order} " ]]; then echo -e "Sort order ${sort_oder} isn't the expected 'queryname'." && exit 1 ; fi
742740
743- # remote grab read names
744- echo "false" > samtools.failed.txt
745- samtools view ~{qns_bam } \
746- | awk -F '\t' '{print $1}' \
747- | uniq -d \
748- > "dup_read_names.txt" \
749- || { echo "true" > samtools.failed.txt ; exit 77 ; } &
750- pid = $!
741+ if ~{localize_bam }; then
742+ time \
743+ gcloud storage cp ~{qns_bam } name_does_not_matter.bam
751744
752- set +e
753- count = 1
754- while true ; do
755- sleep 1200 && date && source /opt/re-auth.sh
756- if [[ ${count } -gt 2 ]]; then exit 0 ; fi
757- if ! pgrep -x -P $pid ; then exit 0 ; fi
758- count = $(( count + 1 ))
759- done
745+ samtools view name_does_not_matter.bam \
746+ | awk -F '\t' '{print $1}' \
747+ | uniq -d \
748+ > "dup_read_names.txt"
749+
750+ echo "false" > samtools.failed.txt
751+ else
752+ # the way this works is the following:
753+ # 0) relying on the re-auth.sh script to export the credentials
754+ # 1) perform the remote sam-view operation in the background
755+ # 2) listen to the PID of the background process, while re-auth every 1200 seconds
756+
757+ # remote grab read names
758+ echo "false" > samtools.failed.txt
759+ samtools view ~{qns_bam } \
760+ | awk -F '\t' '{print $1}' \
761+ | uniq -d \
762+ > "dup_read_names.txt" \
763+ || { echo "true" > samtools.failed.txt ; exit 77 ; } &
764+ pid = $!
765+
766+ set +e
767+ count = 1
768+ while true ; do
769+ sleep 1200 && date && source /opt/re-auth.sh
770+ if [[ ${count } -gt 2 ]]; then exit 0 ; fi
771+ if ! pgrep -x -P $pid ; then exit 0 ; fi
772+ count = $(( count + 1 ))
773+ done
774+ fi
760775 >>>
761776
777+ Int disk_size = 5 + (if (localize_bam ) then ceil (size (qns_bam , "Gib" )) else 0 )
778+ String disk_type = if (localize_bam ) then "SSD" else "HDD"
762779 runtime {
763780 cpu : 1
764781 memory : "4 GiB"
765- disks : "local-disk 10 HDD "
782+ disks : "local-disk ~{ disk_size } ~{ disk_type } "
766783 preemptible : 2
767784 maxRetries : 1
768785 docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
0 commit comments