-
Notifications
You must be signed in to change notification settings - Fork 0
/
ba4b.py
35 lines (24 loc) · 860 Bytes
/
ba4b.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Compute the Number of Times a Pattern Appears in a Text
import re
from .ba4a import translate
from .ba1c import revcomp
def transcribe(dna):
return re.sub("T", "U", dna)
def rev_transcribe(rna):
return re.sub("U", "T", rna)
def find_matches(rna, pattern):
for i in range(3):
f = translate(rna[i:])
for m in re.finditer(rf"(?=({pattern}))", f):
start = m.span()[0] * 3 + i
end = start + len(pattern) * 3
yield rna[start:end]
def find_genome_substrings(dna, aa):
for match in find_matches(transcribe(dna), aa):
yield rev_transcribe(match)
for match in find_matches(transcribe(revcomp(dna)), aa):
yield revcomp(rev_transcribe(match))
def main(file):
dna, aa = open(file).read().splitlines()
for seq in find_genome_substrings(dna, aa):
print(seq)