-
Notifications
You must be signed in to change notification settings - Fork 1
/
BA1J.py
48 lines (40 loc) · 1.82 KB
/
BA1J.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from collections import defaultdict
def neighbor(pattern, mismatch, candidate_kmers):
bases = ['A', 'T', 'G', 'C']
for i in range(len(pattern)):
for j in range(len(bases)):
new_kmer = pattern[:i] + bases[j] + pattern[i+1:]
if (mismatch <= 1):
candidate_kmers.add(new_kmer)
else:
neighbor(new_kmer, mismatch-1, candidate_kmers)
def reverse_complement(pattern):
pattern = pattern[::-1]
dic = {
'A': 'T',
'C': 'G',
'G': 'C',
'T': 'A',
}
rev_c = ""
for ch in pattern:
rev_c += dic[ch]
return rev_c
dna = "CAGCACTTTCCCCCCATCAGCACTTCAGCACTTGGTCACCATACCCTTCAGCACTTGGTCACCCAGCACTTTCCCCCCATCAGCACTTCAGCACTTGCGAATGCAGCACTTTCCCCCCATGGTCACCATACCCTTATACCCTTTCCCCCCATTCCCCCCATCAGCACTTTCCCCCCATCAGCACTTCAGCACTTGCGAATGGGTCACCGGTCACCGGTCACCATACCCTTGGTCACCTCCCCCCATGCGAATGGCGAATGATACCCTTCAGCACTTATACCCTTTCCCCCCATATACCCTTCAGCACTTGCGAATGCAGCACTTCAGCACTTGCGAATGTCCCCCCATATACCCTTGCGAATGCAGCACTTCAGCACTTTCCCCCCATTCCCCCCATGCGAATGGCGAATGTCCCCCCATATACCCTTTCCCCCCATGGTCACCCAGCACTTATACCCTTTCCCCCCATTCCCCCCATATACCCTTGGTCACCATACCCTTCAGCACTTGGTCACCATACCCTTCAGCACTTGCGAATGCAGCACTTTCCCCCCATGGTCACCGGTCACCGCGAATGCAGCACTTGGTCACCGCGAATGGCGAATGGGTCACCATACCCTTCAGCACTTGCGAATGTCCCCCCATGGTCACCGGTCACCTCCCCCCATGGTCACCTCCCCCCATATACCCTTGGTCACCCAGCACTTGGTCACCGGTCACCGGTCACCCAGCACTTGGTCACCTCCCCCCATCAGCACTTGCGAATGGCGAATGCAGCACTTGCGAATGGCGAATGATACCCTTTCCCCCCATCAGCACTTGCGAATGTCCCCCCATTCCCCCCATATACCCTTATACCCTT"
k, d = 6, 3
n = len(dna)
ans = defaultdict(int)
for i in range(n-k+1):
candidate_kmers = set()
sub = dna[i:i+k]
neighbor(sub, d, candidate_kmers)
for kmer in candidate_kmers:
ans[kmer] += 1
candidate_kmers = set()
neighbor(reverse_complement(sub), d, candidate_kmers)
for kmer in candidate_kmers:
ans[kmer] += 1
mx = max(ans.values())
for k in ans:
if mx == ans[k]:
print(k, end=" ")