-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneedleman_wunsch.py
88 lines (65 loc) · 3.75 KB
/
needleman_wunsch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
class needleman_wunsch:
def __init__(self, X, Y, match, mismatch, gap):
self.sequence1 = X
self.sequence2 = Y
self.match_reward = match
self.mismatch_penalty = mismatch
self.gap_penalty = gap
def build_table(self):
print("Sequence 1:", self.sequence1)
print("Sequence 2:", self.sequence2)
# Pt I: initialize (M+1) x (N+1) table
opt = np.zeros((len(self.sequence1) + 1, len(self.sequence2) + 1))
# Pt II: initialize first row/col of alignment table
for i in range(1, len(opt[0])): # initialize first row
opt[0, i] = opt[0, i - 1] - 1 # assign progressive gap penalties
for i in range(1, len(opt)): # initialize first col
opt[i, 0] = opt[i - 1, 0] - 1 # assign progressive gap penalties
# Pt III: calculate optimal score in each cell
for i in range(1, len(opt)): # iter across rows
for j in range(1, len(opt[0])): # iter across cols
vertical_score = opt[i - 1, j] + self.gap_penalty # calc vert score
horizontal_score = opt[i, j - 1] + self.gap_penalty # calc horizontal score
if self.sequence1[i - 1] == self.sequence2[j - 1]: # check for match
diagonal_score = opt[i - 1, j - 1] + self.match_reward # diagonal score
else: # else is mismatch
diagonal_score = opt[i - 1, j - 1] + self.mismatch_penalty # diagonal score
# select highest (optimal) score from curr iteration
optimal_score = max([vertical_score, horizontal_score, diagonal_score])
# append highest score to [i,j]th mtx elt
opt[i, j] = optimal_score
opt_score = opt[-1, -1] # store optimal alignment score (final cell of mtx)
return opt, opt_score
def trace_back(self, table):
first = '' # init empty string for alignment of X
second = '' # init empty string for alignment of Y
x = len(self.sequence1) # store len of first seq
y = len(self.sequence2) # store len of second seq
while (x > 0) or (y > 0):
# Pt I: calculate vert/horizontal/diagonal scores
curr_score = table[x, y] # store curr score from curr iteration
vertical_score = table[x, y - 1] + self.gap_penalty # calc vert score
horizontal_score = table[x - 1, y] + self.gap_penalty # calc horizontal score
if self.sequence1[x - 1] == self.sequence2[y - 1]: # check for match
diagonal_score = table[x - 1, y - 1] + self.match_reward # diagonal score
else: # else is mismatch
diagonal_score = table[x - 1, y - 1] + self.mismatch_penalty # diagonal score
# Pt II: compare calculated scores to curr_score
if (y > 0) and (curr_score == vertical_score): # vert path
first = '-' + first # update alignment 1
second = self.sequence2[y - 1] + second # update alignment 2
x = x # update x idx
y = y - 1 # update y idx
elif (x > 0) and (curr_score == horizontal_score): # horizontal path
first = self.sequence1[x - 1] + first
second = '-' + second
x = x - 1 # update x idx
y = y # update y idx
elif (x > 0) and (y > 0) and (curr_score == diagonal_score): # diagonal path
first = self.sequence1[x - 1] + first # update alignment 1
second = self.sequence2[y - 1] + second # update alignment 2
x = x - 1 # update x idx
y = y - 1 # update y idx
opt_alignment = np.array([[first], [second]])
return opt_alignment