File tree Expand file tree Collapse file tree 2 files changed +85
-0
lines changed
Expand file tree Collapse file tree 2 files changed +85
-0
lines changed Original file line number Diff line number Diff line change @@ -229,3 +229,45 @@ task PartitionManifest {
229229 }
230230}
231231
232+ task GetBasecallModel {
233+ meta {
234+ desciption : "Getting the basecall model string of an ONT BAM"
235+ }
236+ parameter_meta {
237+ bam : {
238+ desciption : "BAM to operate on" ,
239+ localization_optional : true
240+ }
241+ runid_2_model : "The basecall model for each run."
242+ }
243+ input {
244+ File bam
245+ }
246+ output {
247+ Map [String , String ] runid_2_model = read_map ("results.tsv" )
248+ }
249+
250+ command <<<
251+ set -eux
252+
253+ export GCS_OAUTH_TOKEN = $(gcloud auth application-default print-access-token )
254+ samtools view -H ~{bam } | grep "^@RG" > one_rg_per_line.txt
255+
256+ while IFS = read -r line
257+ do
258+ echo "$line " | tr '\t' '\n' | grep "^DS:" | sed "s/^DS://" | tr ' ' '\n' > tmp.txt
259+ runid = $(grep "^runid=" tmp.txt | awk -F '=' '{print $2}' )
260+ model = $(grep "^basecall_model=" tmp.txt | awk -F '=' '{print $2}' )
261+ echo -e "${runid} \t${model} " >> results.tsv
262+ done < one_rg_per_line.txt
263+ >>>
264+
265+ runtime {
266+ cpu : 1
267+ memory : "4 GiB"
268+ disks : "local-disk 10 HDD"
269+ preemptible : 2
270+ maxRetries : 1
271+ docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
272+ }
273+ }
Original file line number Diff line number Diff line change @@ -1280,3 +1280,46 @@ task SummarizePBI {
12801280 docker : select_first ([runtime_attr .docker , default_attr .docker ])
12811281 }
12821282}
1283+
1284+ # todo: primrose is rebranded as jasmine, take care of that later
1285+ task VerifyPacBioBamHasAppropriatePrimroseRuns {
1286+ meta {
1287+ desciption : "Verify that a PacBio's BAM has primrose run on all its read groups"
1288+ }
1289+ input {
1290+ String bam
1291+ }
1292+
1293+ output {
1294+ Array [String ] readgroups_missing_primrose = read_lines ("movies_without_primrose.txt" )
1295+ }
1296+
1297+ command <<<
1298+ set -eux
1299+
1300+ export GCS_OAUTH_TOKEN = `gcloud auth application-default print-access-token `
1301+ samtools view -H ~{bam } > header.txt
1302+
1303+ # get read groups' movies
1304+ grep "^@RG" header.txt | tr '\t' '\n' | grep "^PU:" | awk -F ':' '{print $2}' | sort > readgroup.movies.txt
1305+ cat readgroup.movies.txt
1306+
1307+ # get primrose PG lines
1308+ grep "^@PG" header.txt | grep -v "^@SQ" | grep "^@PG" | grep -F 'ID:primrose' | tr '\t' '\n' | grep '^CL:' > primrose.pg.lines.txt
1309+ tr ' ' '\n' < primrose.pg.lines.txt
1310+
1311+ touch movies_without_primrose.txt
1312+ while IFS = read -r readgroup ; do
1313+ if ! grep -q "${readgroup} " primrose.pg.lines.txt ; then echo "${readgroup} " >> movies_without_primrose.txt ; fi
1314+ done < readgroup.movies.txt
1315+ >>>
1316+
1317+ runtime {
1318+ cpu : 1
1319+ memory : "4 GiB"
1320+ disks : "local-disk 10 HDD"
1321+ preemptible : 2
1322+ maxRetries : 1
1323+ docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
1324+ }
1325+ }
You can’t perform that action at this time.
0 commit comments