-
Notifications
You must be signed in to change notification settings - Fork 0
/
Needleman-Wunsch.py
95 lines (79 loc) · 4 KB
/
Needleman-Wunsch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# !/usr/bin/env python3
# Needleman-Wunsch
# University of California, Santa Cruz - BME 230A
# Biomolecular Engineering and Bioinformatics
# Name: (zmmason)
# Group Members: NONE
import numpy
class NeedlemanWunsch(object):
""""
Needleman-Wunsch algorithm to compute the optimal alignment for a pair of sequences
INPUT: Sequence String 1 (string1), Sequence String 2 (string2), scores can be altered in code
OUTPUT OPTIONS: Needleman-Wunsch matrix, alignment score, alignment index
"""
def __init__(self, string1, string2, gapScore=-2, matchScore=3,
mismatchScore=-3):
""" Finds an optimal global alignment of two strings.
"""
self.editMatrix = numpy.zeros(shape=[len(string1) + 1, len(string2) + 1],
dtype=int) # Numpy matrix representing edit matrix
# Preinitialized to have zero values
# Code to complete to compute the edit matrix
self.alignmentScore = 0 # initializes variable for 'getAlignmentScore' function
def matcher(a, b):
if a == b:
return matchScore
elif a == 0 or b == 0:
return gapScore
else:
return mismatchScore
for i in range(1, len(string2) + 1):
self.editMatrix[0][i] = gapScore * i
for j in range(1, len(string1) + 1):
self.editMatrix[j][0] = gapScore * j
for i in range(1, len(string1) + 1):
for j in range(1, len(string2) + 1):
match = matcher(string2[j - 1], string1[i - 1])
diag = self.editMatrix[i - 1][j - 1] + match
delete = self.editMatrix[i - 1][j] + gapScore
insert = self.editMatrix[i][j - 1] + gapScore
self.editMatrix[i][j] = max(diag, delete, insert)
alignmentScore = max(diag, delete, insert)
if alignmentScore > self.alignmentScore: # handles max alignment score for the matrix (easy callback for getAlignmentScore function)
self.alignmentScore = alignmentScore
def getAlignmentScore(self):
""" Return the alignment score """
# Code to complete
return self.alignmentScore # see above comment for implementation
def getAlignment(self):
""" Returns an optimal global alignment of two strings. Aligned
is returned as an ordered list of aligned pairs.
e.g. For the two strings GATTACA and TACA an global alignment is
is GATTACA
---TACA
This alignment would be returned as:
[(3, 0), (4, 1), (5, 2), (6, 3)]
"""
alignedPairs = []
# Code to complete - generated by traceback through matrix to generate aligned pairs
# flipMatrix = numpy.flip(self.editMatrix) # flipped matrix to make recursion and itterations easier
alignmentSet = []
usedRow = []
for i in range(len(string1) - 1): # finding max values for each significant row
for j in range(len(string2) - 1):
maxVertical = max(numpy.flip(self.editMatrix)[i]) # find max val
maxVertIndex = numpy.where(
numpy.flip(self.editMatrix)[i] == maxVertical) # inverting matrix to use a forward iteration algo
if maxVertIndex[0][0] not in usedRow: # handles non alignments
usedRow.append(maxVertIndex[0][0])
else:
break
alignmentSet.append(len(string1) - 1 - maxVertIndex[0][0])
alignmentSet = sorted(alignmentSet)
for i in alignmentSet: # uses the found max score in significant row to find its index as a column
for j in range(1, len(string2) + 1):
if self.editMatrix[i + 1][j] == max(self.editMatrix[i + 1]):
alignedPairs.append((i, j - 1))
else:
continue
return alignedPairs