-
Notifications
You must be signed in to change notification settings - Fork 0
/
ba5m.py
58 lines (43 loc) · 1.67 KB
/
ba5m.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Find a Highest-Scoring Multiple Sequence Alignment
from itertools import product
# Does this pointer/score refer to a non-negative cell in our matrix?
def valid_coord(x, pos):
return all([i >= 0 for i in prev(pos, x)])
# get previous position given a pointer
def prev(pos, ptr):
return tuple([p + d for p, d in zip(pos, ptr)])
# given sequences, a position and a pointer, calculate score.
# This represents a scoring function in which the score of an alignment column
# is 1 if all three symbols are identical and 0 otherwise.
def score(seqs, pos, ptr):
if ptr == (-1, -1, -1):
bases = [seqs[i][j] for i, j in enumerate(prev(pos, ptr))]
return all(x == bases[0] for x in bases)
else:
return 0
# generate possible previous cells relative to current cell (pointers)
def moves(n):
return list(product([0, -1], repeat=n))[1:]
def multiple_alignment(seqs):
m, p = {}, {}
m[0, 0, 0] = 0
ranges = [range(0, len(s) + 1) for s in seqs]
for pos in product(*ranges):
ptrs = list(filter(lambda x: valid_coord(x, pos), moves(3)))
if not len(ptrs):
continue
sc = [m[prev(pos, x)] + score(seqs, pos, x) for x in ptrs]
m[pos] = max(sc)
p[pos] = ptrs[sc.index(max(sc))]
# traceback to recover alignment
tot = m[pos]
aln = ["", "", ""]
while any([x > 0 for x in pos]):
ptr = p[pos]
for i, seq in enumerate(seqs):
aln[i] += seq[pos[i] - 1] if ptr[i] == -1 else "-"
pos = prev(pos, ptr)
return tot, aln[0][::-1], aln[1][::-1], aln[2][::-1]
def main(file):
seqs = open(file).read().splitlines()
print(*multiple_alignment(seqs), sep="\n")