In [1]:
import numpy as np

In [13]:
class SmithWaterman:
    def __init__(self, sub=None, w=1):
        if sub is not None:
            self.sub = sub
        else:
            self.sub = lambda a, b: 1 if a == b else -1
        self.w = w
        
    def align(self, a, b):
        n = len(a)
        m = len(b)
        
        H = np.zeros((n+1, m+1))
        
        for i, aa in enumerate(a):
            for j, bb in enumerate(b):
                v1 = H[i, j] + self.sub(aa, bb)
                v2 = (H[:i+1, j+1] - self.w*np.arange(i+1, 0, -1)).max()
                v3 = (H[i+1, :j+1] - self.w*np.arange(j+1, 0, -1)).max()
                scores = [v1, v2, v3, 0]
                m_ = np.argmax(scores)
                H[i+1, j+1] = scores[m_]
                
        i = np.argmax(H.reshape(-1,))
        j = i%(m+1)
        i //= m+1
        return H[i,j]

In [7]:
A = 'AAABBAC'
B = 'ABDBAC'
sw = SmithWaterman()
res = sw.align(A, B)

In [8]:
res

4.0

In [15]:
def SmithWaterman_matrix(SEQ_list):
    MatrixOfDistance = [[0 for x in range(len(SEQ_list))] for y in range(len(SEQ_list))]
    sw = SmithWaterman()
    for i in range(len(SEQ_list)-1):
        for j in range(i+1,len(SEQ_list)):
            MatrixOfDistance[i][j]=sw.align(SEQ_list[i], SEQ_list[j])
            MatrixOfDistance[j][i]=MatrixOfDistance[i][j]
    return MatrixOfDistance

In [16]:
SEQ_list=["GAATTCAGTTA","GGATCGA","GGGATCGGA"]

In [17]:
SmithWaterman_matrix(SEQ_list)

[[0, 2.0, 2.0], [2.0, 0, 6.0], [2.0, 6.0, 0]]