Skip to content

Commit b88db5d

Browse files
Tobias JuhreTobias Juhre
authored andcommitted
added ClinVar_to_SAPA_parser and some fixes to the main
1 parent 2f84e50 commit b88db5d

File tree

3 files changed

+51
-12
lines changed

3 files changed

+51
-12
lines changed

ClinVar_to_SAPA_parser.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import csv
2+
import sys
3+
4+
print "converting file to SAPA format"
5+
6+
counter = 0
7+
clinvar = open('converted_hg38_pathogenic.csv', 'w')
8+
clinvar.write("Chr,Pos,Ref,Alt,Clinical significance,Gene\n")
9+
with open("clinvar_pathogenic_snp.txt") as csvfile:
10+
variant_lines = csv.reader(csvfile, delimiter='\t', quotechar='"')
11+
# skip header if there
12+
has_header = csv.Sniffer().has_header(csvfile.read(100))
13+
csvfile.seek(0) # rewind
14+
incsv = csv.reader(csvfile)
15+
if has_header:
16+
header = next(variant_lines)
17+
print header
18+
for variant_line in variant_lines:
19+
export_string = ""
20+
ref_alt = variant_line[0].split(">")
21+
if len(ref_alt) == 2:
22+
ref = ref_alt[0][-1]
23+
alt = ref_alt[1][0]
24+
chr = variant_line[6]
25+
pos = variant_line[7]
26+
clin_sig = variant_line[4].split("(")
27+
buildver = variant_line[8]
28+
gene = variant_line[1]
29+
30+
if buildver == "GRCh38" and chr and pos: # and clin_sig == "Pathogenic"
31+
counter += 1
32+
print chr
33+
print pos
34+
print ref
35+
print alt
36+
print clin_sig[0]
37+
print gene
38+
# print buildver
39+
export_string += chr+","+pos+","+ref+","+alt+","+clin_sig[0]+","+gene+"\n"
40+
clinvar.write(export_string)
41+
42+
if counter == 100:
43+
break
44+
clinvar.close()
45+
print "END"

annovarParser_ljb26_all.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,3 @@ def export_tab(self):
149149
self.__phyloP46way_placental,
150150
self.__phyloP100way_vertebrate,
151151
self.__SiPhy_29way_logOdds)
152-
153-
# self.__g2014oct_all,
154-
# self.__g2014oct_afr,
155-
# self.__g2014oct_eas,
156-
# self.__g2014oct_eur,
157-
0Suchvorgang...

main.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@
140140
variant_lines = csv.reader(csvfile, delimiter=',', quotechar='"')
141141

142142
# skip header if there
143-
has_header = csv.Sniffer().has_header(csvfile.read(100))
143+
has_header = csv.Sniffer().has_header(csvfile.read(25)) # 100
144144
csvfile.seek(0) # rewind
145145
incsv = csv.reader(csvfile)
146146
if has_header:
@@ -163,14 +163,14 @@
163163
variant_line[4], context, consequences, variant_line[7], variant_line[8], variant_line[9],
164164
int(variant_line[10]), variant_line[11], variant_line[12], variant_line[13],
165165
variant_line[14], variant_line[15]))
166-
elif len(variant_line) < 16:
166+
elif 16 > len(variant_line) > 4:
167167
success_count += 1
168168
# context = variant_line[5].split(",")
169169
# consequences = variant_line[6].split(",")
170170
snps.append(SNP(l_count, variant_line[0], int(variant_line[1]), variant_line[2], variant_line[3],
171-
".", ".", ".", ".", ".", ".", ".", ".", ".", ".", ".", "."))
171+
variant_line[4], variant_line[5], ".", ".", ".", ".", ".", ".", ".", ".", ".", "."))
172172
else:
173-
print "INVALID DATA (length < 16) in Line {}".format(l_count)
173+
print "INVALID DATA (16 >= length > 4) in Line {}".format(l_count)
174174
print variant_line
175175
fail_count += 1
176176
l_count += 1
@@ -230,7 +230,7 @@
230230
databases = ["-buildver " + buildversion + " -downdb -webfrom annovar refGene " + buildversion,
231231
"-buildver " + buildversion + " -downdb cytoBand " + buildversion,
232232
"-buildver " + buildversion + " -downdb -webfrom annovar esp6500siv2_all " + buildversion,
233-
"-buildver " + buildversion + " -downdb -webfrom annovar avsnp147 " + buildversion, # snp138
233+
"-buildver " + buildversion + " -downdb -webfrom annovar avsnp147 " + buildversion, # avsnp147
234234
"-buildver " + buildversion + " -downdb -webfrom annovar dbnsfp30a " + buildversion
235235
]
236236

@@ -294,7 +294,7 @@
294294
else:
295295
params = "amplicon_variants_tab.csv " + buildversion + " -buildver " + buildversion + " -out myanno -remove -protocol " \
296296
"refGene,cytoBand,esp6500siv2_all,avsnp147,dbnsfp30a " \
297-
"-operation g,r,f,f,f -nastring . " #snp138
297+
"-operation g,r,f,f,f -nastring . " #avsnp147
298298
if args.quiet:
299299
FNULL = open(os.devnull, 'w')
300300
p = subprocess.Popen([annovar_pl + params], shell=True, stdout=FNULL, stderr=subprocess.STDOUT)

0 commit comments

Comments
 (0)