forked from enormandeau/Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
haplotype_distance_table.py
executable file
·37 lines (30 loc) · 1.05 KB
/
haplotype_distance_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# First, run the following terminal command:
# awk -F"\t" '{print $1, $NF}' haplotype_finder_output.txt |
# grep Marker | uniq >markers_for_distance_table.txt
import sys
in_file = sys.argv[1]
out_file = sys.argv[2]
def distance(h1, h2):
"""Find number of differing nucleotides between two haplotypes
"""
d = 0
for i, n in enumerate(h1):
if n != h2[i]:
d += 1
return d
with open(in_file) as f:
with open(out_file, "w") as out_f:
for line in f:
l = line.strip()
if l != "":
marker, haplotypes = l.split()
out_f.write(marker + "\n")
# out_f.write(" ".join(haplotypes.split(":")) + "\n")
for i in haplotypes.split(":"):
temp_dist = []
for j in haplotypes.split(":"):
print i, j
temp_dist.append(distance(i, j))
out_f.write(" ".join([str(x) for x in temp_dist]) + "\n")