Skip to content

Commit

Permalink
handling missing AD/DP values
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisamiller committed Mar 18, 2019
1 parent 297357c commit 97b1e7d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
15 changes: 11 additions & 4 deletions somatic_llr_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,13 @@ def main(args_input = sys.argv[1:]):
def getFormatField(sample_name, field_name):
if(sample_name in entry.call_for_sample and field_name in entry.call_for_sample[sample_name].data):
return entry.call_for_sample[sample_name].data[field_name]
raise Exception("Field {} missing in an entry for Sample {}".format(field_name, sample_name))
return("NA")

def missingVals(arr):
for i in arr:
if i == "NA":
return True
return False

ad_nrm = getFormatField(args.normal_sample_name,args.allele_depth_field)
ad_tum = getFormatField(args.tumor_sample_name,args.allele_depth_field)
Expand All @@ -288,14 +293,16 @@ def getFormatField(sample_name, field_name):
normal_ref = ad_nrm[0]
tumor_ref = ad_tum[0]

call = ""
llr = 0

#TODO parse out per alt, retrieve calls
call = []
for i in range(1,(len(alts)+1)): #right now, this will only ever be one, due to above check. Could be expanded to support multiple alleles - see above
normal_var = ad_nrm[i]
tumor_var = ad_tum[i]

#if neither has any depth, then fail this up front
if tumor_depth + normal_depth == 0:
#if neither has any depth or vals or missing, then fail this up front
if missingVals([normal_var,tumor_var,tumor_depth,normal_depth]) or (tumor_depth + normal_depth == 0):
(llr,call) = (0,"Reference")
continue

Expand Down
8 changes: 8 additions & 0 deletions tests/test_data/input.missingdepth.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.0
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=AF,Number=A,Type=Float,Description="Variant-allele frequency for the alt alleles">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TUMOR NORMAL
22 18644673 . C T . . AC=1 GT:AD:AF 0/1:52,48:0.52,0.48 0/0:99,1:0.99,0.01

0 comments on commit 97b1e7d

Please sign in to comment.