# BA5L Global Alignment in Linear Space Problem

In [1]:
class BLOSUM62( object ):
    def __init__(self):
        with open('BLOSUM62.txt') as input:
            items = [line.strip().split() for line in input.readlines()]
            self.m = {(item[0], item[1]):int(item[2]) for item in items}

    def __getitem__(self, pair):
        return self.m[pair[0], pair[1]]

In [2]:
def linearSpaceGlobalAlignment( v, w, matrix, sigma):

    def middleColumnScore(v, w, matrix, sigma ):    
        S = [[i*j*sigma for j in range(-1, 1)] for i in range(len(v)+1)]
        S[0][1] = -sigma
        backtrack = [0]*(len(v)+1)
        for j in range(1, int(len(w)/2+1)):
            for i in range(0, len(v)+1):
                if i == 0:
                    S[i][1] = -j*sigma
                else:
                    scores = [S[i-1][0] + matrix[v[i-1], w[j-1]], S[i][0] - sigma, S[i-1][1] - sigma]
                    S[i][1] = max(scores)
                    backtrack[i] = scores.index(S[i][1])
            if j != len(w)/2:
                S = [[row[1]]*2 for row in S]
        return [row[1] for row in S], backtrack
    
    def middleEdge( v, w, matrix, sigma ):
        sourceToMiddle = middleColumnScore(v, w, matrix, sigma)[0]
        middleToSink, backtrack = map(lambda l: l[::-1], middleColumnScore(v[::-1], w[::-1]+['', '$'][len(w) % 2 == 1 and len(w) > 1], matrix, sigma))
        scores = [sourceToMiddle[i] + middleToSink[i] for i in range(len(sourceToMiddle))]
        maxMiddle = max(range(len(scores)), key=lambda i: scores[i])
        if maxMiddle == len(scores) - 1:
            nextNode = (maxMiddle, int(len(w)/2 + 1))
        else:
            nextNode = [(maxMiddle + 1, int(len(w)/2 + 1)), (maxMiddle, int(len(w)/2 + 1)), (maxMiddle + 1, int(len(w)/2)),][backtrack[maxMiddle]]
        return (maxMiddle, int(len(w)/2)), nextNode

    def globalAlignment( v, w, matrix, sigma ):
        S = [[0 for repeat_j in range(len(w)+1)] for repeat_i in range(len(v)+1)]
        backtrack = [[0 for repeat_j in range(len(w)+1)] for repeat_i in range(len(v)+1)]
        for i in range(1, len(v)+1):
            S[i][0] = -i*sigma
        for j in range(1, len(w)+1):
            S[0][j] = -j*sigma
        for i in range(1, len(v)+1):
            for j in range(1, len(w)+1):
                scores = [S[i-1][j] - sigma, S[i][j-1] - sigma, S[i-1][j-1] + matrix[v[i-1], w[j-1]]]
                S[i][j] = max(scores)
                backtrack[i][j] = scores.index(S[i][j])
        insert_indel = lambda word, i: word[:i] + '-' + word[i:]
        vAligned, wAligned = v, w
        i, j = len(v), len(w)
        maxScore = str(S[i][j])
        while i*j != 0:
            if backtrack[i][j] == 0:
                i -= 1
                wAligned = insert_indel(wAligned, j)
            elif backtrack[i][j] == 1:
                j -= 1
                vAligned = insert_indel(vAligned, i)
            else:
                i -= 1
                j -= 1
        for repeat in range(i):
            wAligned = insert_indel(wAligned, 0)
        for repeat in range(j):
            vAligned = insert_indel(vAligned, 0)
        return maxScore, vAligned, wAligned

    def linearSpaceAlignment(top, bottom, left, right ):
        if left == right:
            return [v[top:bottom], '-'*(bottom - top)]
        elif top == bottom:
            return ['-'*(right - left), w[left:right]]
        elif bottom - top == 1 or right - left == 1:
            return globalAlignment(v[top:bottom], w[left:right], matrix, sigma)[1:]
        else:
            midNode, nextNode = middleEdge(v[top:bottom], w[left:right], matrix, sigma)
            midNode = tuple(map(sum, zip(midNode, [top, left])))
            nextNode = tuple(map(sum, zip(nextNode, [top, left])))
            current = [['-', v[midNode[0] % len(v)]][nextNode[0] - midNode[0]], ['-', w[midNode[1] % len(w)]][nextNode[1] - midNode[1]]]
            A = linearSpaceAlignment(top, midNode[0], left, midNode[1])
            B = linearSpaceAlignment(nextNode[0], bottom, nextNode[1], right)
            return [A[i] + current[i] + B[i] for i in range(2)]
    vAligned, wAligned = linearSpaceAlignment(0, len(v), 0, len(w))
    score = sum([-sigma if '-' in pair else matrix[pair] for pair in zip(vAligned, wAligned)])
    print(score)
    print(vAligned)
    print(wAligned)
    return str(score), vAligned, wAligned

In [3]:
with open("rosalind_ba5l.txt", 'r') as f:
    v = f.readline().rstrip()
    w = f.readline().rstrip()
    linearSpaceGlobalAlignment(v, w, BLOSUM62(), 5)

18406
GKIHRLVNKRDH---RYCFHY---N-M-YFP-WQRSGCELTAPPII-------V-METDNDGDCGTCQIHEWMQYVAQSTADRNLSQFEVFF-EM-GKMLISIIIRTWLLSWENFEMDDNCGDALQNDNASFVNYDMF---K-YPW----PRIMPKEVTYGMYGAHVHTPLP-----AT----DCN-MR-I-KRL-HINA-R-H--I-E---HVCTSYENDNHCKWQA-AMDVKVQALMFCWCYPQTPANRENANHRESNNPH-----YERWLFTAHHWHSSPPIWQKTTFRYQDIYNLCMQIWVVQAFMDQSDYLQWWG-S-FHGHEHMWHN------Y-PTLYL-LPR-W--YRV--MEKITRGGNKDHFLTKFPEAIQNWIMKCRGTA-------KHFCNDVKTFGWRRWFHPCVQWTCIHTHEGFQQGPLTWVLRVPLRCCEYNCKPCNITSVHKW-WIPHE-TPEI---E--VFRMN--R----IKYATVASGDKCNTKERFQMVESLPGHEYARCWSP-PA-WCPINSERRFSGTYNNFAMYQTHYRYWVATKQWQVSYKGMERYDMD-SLRQYCGLTKYHHHIIFNFGNRIDSKPEY-CTQSTYKSRNSQVNVYTFFYWLCSSWETAADAMSEIPIKPQTRPETNFCHHEQKRRAKPCMYHDL-N-HHRNQTMDDRNNQRMNEYKAQMAGDWC--C----QIQDGCKFVKTD---C---RITICQSNCLHI------C--H--W-D-F--QI--IYIKEKLTEGRKMSCIAMPNNKMLTDVNTNIRVTAVDPSHHHDIRDDTCREAQGNMSPDPRLIADNLNSNRREQHLMKERIRGKRYWKRRGQSDQLLVHRMNVSFTYVVAGCHCMEHNHSC-WSRGCWDFP---TPYNFWWMFFNMI-I-FTDGWQ-Y-PMWHTNYQQIVIQHAEDQKML---Q---NPQG--T------STNPSHPDVFLKRMKKFRLQPLFYTYCVIRITW