# Sequence Alignment

In [1]:
import SequenceAlignment as align
import itertools

## Example

In [2]:
sa = align.SequenceAlignment('IVVGSAAHCYGEDN', 'IVLTAAHCVSASGEHN',
                             align.NeedlemanWunschSimple())
sa.get_score_matrix_as_dataframe()

Unnamed: 0,I,V,V.1,G,S,A,A.1,H,C,Y,G.1,E,D,N
I,14.0,13.0,12.0,9.0,8.0,7.0,4.0,1.0,-2.0,-5.0,-6.0,-9.0,-12.0,-13.0
V,11.0,12.0,13.0,10.0,9.0,8.0,5.0,2.0,-1.0,-4.0,-5.0,-8.0,-11.0,-12.0
L,8.0,9.0,10.0,11.0,10.0,9.0,6.0,3.0,0.0,-3.0,-4.0,-7.0,-10.0,-11.0
T,7.0,8.0,9.0,10.0,11.0,10.0,7.0,4.0,1.0,-2.0,-3.0,-6.0,-9.0,-10.0
A,6.0,7.0,8.0,9.0,10.0,11.0,8.0,5.0,2.0,-1.0,-2.0,-5.0,-8.0,-9.0
A,7.0,6.0,5.0,6.0,7.0,8.0,9.0,6.0,3.0,0.0,-1.0,-4.0,-7.0,-8.0
H,8.0,7.0,6.0,5.0,6.0,5.0,6.0,7.0,4.0,1.0,0.0,-3.0,-6.0,-7.0
C,7.0,8.0,7.0,6.0,5.0,6.0,5.0,4.0,5.0,2.0,1.0,-2.0,-5.0,-6.0
V,6.0,7.0,8.0,7.0,6.0,5.0,6.0,5.0,4.0,3.0,2.0,-1.0,-4.0,-5.0
S,3.0,4.0,5.0,6.0,7.0,6.0,5.0,6.0,5.0,4.0,3.0,0.0,-3.0,-4.0


In [3]:
print(f"Alignment Value of '{sa.QUERY}' and '{sa.SUBJECT}' is {sa.get_alignment_value()}")

Alignment Value of 'IVVGSAAHCYGEDN' and 'IVLTAAHCVSASGEHN' is 14.0


In [4]:
for alignment in sa.find_optimal_alignments():
    print(alignment)
    print(*sa.read_alignment(alignment), sep="\n", end="\n\n")


[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 10), (11, 10), (12, 10), (13, 11), (14, 12), (15, 13)]
IVVGSAAHCYG---EDN
IVLTA-AHCVSASGEHN

[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 9), (10, 10), (11, 10), (12, 10), (13, 11), (14, 12), (15, 13)]
IVVGSAAHCY-G--EDN
IVLTA-AHCVSASGEHN

[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 9), (10, 9), (11, 10), (12, 10), (13, 11), (14, 12), (15, 13)]
IVVGSAAHCY--G-EDN
IVLTA-AHCVSASGEHN

[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 9), (10, 9), (11, 9), (12, 10), (13, 11), (14, 12), (15, 13)]
IVVGSAAHCY---GEDN
IVLTA-AHCVSASGEHN

[(0, 0), (1, 1), (2, 2), (3, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 10), (11, 10), (12, 10), (13, 11), (14, 12), (15, 13)]
IVVGSAAHCYG---EDN
IVLT-AAHCVSASGEHN

[(0, 0), (1, 1), (2, 2), (3, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8)

---

Testen Sie Ihre Implementierung an verschiedenen Paarungen aus der Menge der PDE-Sequenzen [NP_246273](https://www.ncbi.nlm.nih.gov/protein/NP_246273), [NP_006194](https://www.ncbi.nlm.nih.gov/protein/NP_006194.2/), [NP_000913](https://www.ncbi.nlm.nih.gov/protein/NP_000913.2/) und [NP_002590](https://www.ncbi.nlm.nih.gov/protein/NP_002590.1/). Fügen Sie die Alignmentwerte der Paarungen Ihrer Abgabe hinzu.

In [5]:
PDE_SEQUENCES = {
    "NP_246273.2": "mvnawfaervhtipvckegirghtescscplqqspradnsapgtptrkisasefdrplrpivvkdsegtvsflsdsekkeqmpltpprfdhdegdqcsrllelvkdisshldvtalchkiflhihglisadryslflvcedssndkflisrlfdvaegstleevsnncirlewnkgivghvaalgeplnikdayedprfnaevdqitgyktqsilcmpiknhreevvgvaqainkksgnggtftekdekdfaaylafcgivlhnaqlyetsllenkrnqvlldlaslifeeqqslevilkkiaatiisfmqvqkctifivdedcsdsfssvfhmeceelekssdtltrehdankinymyaqyvkntmeplnipdvskdkrfpwttentgnvnqqcirsllctpikngkknkvigvcqlvnkmeentgkvkpfnrndeqfleafvifcglgiqntqmyeaveramakqmvtlevlsyhasaaeeetrelqslaaavvpsaqtlkitdfsfsdfelsdletalctirmftdlnlvqnfqmkhevlcrwilsvkknyrknvayhnwrhafntaqcmfaalkagkiqnkltdleilalliaalshdldhrgvnnsyiqrsehplaqlychsimehhhfdqclmilnspgnqilsglsieeykttlkiikqailatdlalyikrrgeffelirknqfnledphqkelflamlmtacdlsaitkpwpiqqriaelvateffdqgdrerkelnieptdlmnrekknkipsmqvgfidaiclqlyealthvsedcfplldgcrknrqkwqalaeqqekmlingesgqakrn",
    "NP_006194.2": "mmhvnnfpfrrhswicfdvdngtsagrspldpmtspgsglilqanfvhsqrresflyrsdsdydlspksmsrnssiasdihgddlivtpfaqvlaslrtvrnnfaaltnlqdrapskrspmcnqpsinkatiteeayqklasetleeldwcldqletlqtrhsvsemasnkfkrmlnrelthlsemsrsgnqvsefisntfldkqheveipsptqkekekkkrpmsqisgvkklmhsssltnssiprfgvkteqedvlakeledvnkwglhvfriaelsgnrpltvimhtifqerdllktfkipvdtlitylmtledhyhadvayhnnihaadvvqsthvllstpaleavftdleilaaifasaihdvdhpgvsnqflintnselalmyndssvlenhhlavgfkllqeencdifqnltkkqrqslrkmvidivlatdmskhmnlladlktmvetkkvtssgvllldnysdriqvlqnmvhcadlsnptkplqlyrqwtdrimeeffrqgdrerergmeispmcdkhnasveksqvgfidyivhplwetwadlvhpdaqdildtlednrewyqstipqspspapddpeegrqgqtekfqfeltleedgesdtekdsgsqveedtscsdsktlctqdsesteipldeqveeeavgeeeesqpeacviddrspdt",
    "NP_000913.2": "mrrderdakamrslqppdgagsppeslrngyvkscvsplrqdpprgfffhlcrfcnvelrpppaspqqprrcspfcrarlslgalaafvlalllgaepeswaagaawlrtllsvcshslsplfsiacafffltcfltrtkrgpgpgrscgswwllalpaccylgdflvwqwwswpwgdgdagsaaphtppeaaagrlllvlscvgllltlahplrlrhcvlvlllasfvwwvsftslgslpsalrpllsglvggagcllalgldhffqireaplhprlssaaeekvpvirprrrsscvslgetaasyygsckifrrpslpcisreqmilwdwdlkqwykphyqnsgggngvdlsvlnearnmvsdlltdpslppqvisslrsisslmgafsgscrpkinpltpfpgfypcseiedpaekgdrklnkglnrnslptpqlrrssgtsgllpveqssrwdrnngkrphqefgissqgcylngpfnsnlltipkqrsssvslthhvglrragvlsslspvnssnhgpvstgsltnrspiefpdtadflnkpsvilqrslgnapntpdfyqqlrnsdsnlcnscghqmlkyvstsesdgtdccsgksgeeenifskesfklmetqqeeetekkdsrklfqegdkwlteeaqseqqtnieqevsldlilveeydsliekmsnwnfpifelvekmgeksgrilsqvmytlfqdtglleifkiptqqfmnyfralengyrdipyhnrihatdvlhavwylttrpvpglqqihngcgtgnetdsdgrinhgriayisskscsnpdesygclssnipalelmalyvaaamhdydhpgrtnaflvatnapqavlyndrsvlenhhaasawnlylsrpeynfllhldhvefkrfrflvieailatdlkkhfdflaefnakandvnsngiewsnendrllvcqvcikladingpakvrdlhlkwtegivnefyeqgdeeanlglpispfmdrsspqlaklqesfithivgplcnsydaagllpgqwleaeedndtesgddedgeeldtedeemennlnpkpprrksrrrifcqlmhhltenhkiwkeiveeeekckadgnklqvensslpqadeiqvieeadeee",
    "NP_002590.1": "mgqacghsilcrsqqypaarpaeprgqqvflkpdepppppqpcadslqdallslgsvidisglqravkealsavlprvetvytylldgesqlvcedpphelpqegkvreaiisqkrlgcnglgfsdlpgkplarlvaplapdtqvlvmpladkeagavaavilvhcgqlsdneewslqavekhtlvalrrvqvlqqrgpreapravqnppegtaedqkggaaytdrdrkilqlcgelydldasslqlkvlqylqqetrasrcclllvsednlqlsckvigdkvlgeevsfpltgclgqvvedkksiqlkdltsedvqqlqsmlgcelqamlcvpvisratdqvvalacafnklegdlftdedehviqhcfhytstvltstlafqkeqklkcecqallqvaknlfthlddvsvllqeiitearnlsnaeicsvflldqnelvakvfdggvvddesyeiripadqgiaghvattgqilnipdayahplfyrgvddstgfrtrnilcfpiknenqevigvaelvnkingpwfskfdedlatafsiycgisiahsllykkvneaqyrshlanemmmyhmkvsddeytkllhdgiqpvaaidsnfasftytprslpeddtsmailsmlqdmnfinnykidcptlarfclmvkkgyrdppyhnwmhafsvshfcyllyknleltnyledieifalfiscmchdldhrgtnnsfqvasksvlaalyssegsvmerhhfaqaiailnthgcnifdhfsrkdyqrmldlmrdiilatdlahhlrifkdlqkmaevgydrnnkqhhrlllcllmtscdlsdqtkgwkttrkiaeliykeffsqgdlekamgnrpmemmdrekayipelqisfmehiampiykllqdlfpkaaelyervasnrehwtkvshkftirglpsnnsldfldeeyevpdldgtrapingccsldae"
}

In [6]:
pde_comparisons = dict()
for sequence_a, sequence_b in itertools.combinations(PDE_SEQUENCES.keys(), 2):
    alignment = align.SequenceAlignment(
        PDE_SEQUENCES[sequence_a], PDE_SEQUENCES[sequence_b], align.NeedlemanWunschSimple(cost_mismatch=-1))
    COMPARISON_NAME = f"{sequence_a} / {sequence_b}"
    pde_comparisons[COMPARISON_NAME] = alignment
    print(f"{COMPARISON_NAME} = {alignment.get_alignment_value()}")
        

NP_246273.2 / NP_006194.2 = -136.0
NP_246273.2 / NP_000913.2 = -227.0
NP_246273.2 / NP_002590.1 = -14.0
NP_006194.2 / NP_000913.2 = -220.0
NP_006194.2 / NP_002590.1 = -181.0
NP_000913.2 / NP_002590.1 = -162.0


In [7]:
pde_alignment = pde_comparisons["NP_246273.2 / NP_002590.1"]
first_alignment = pde_alignment.find_optimal_alignments(max=1)[0]
print(first_alignment)
print(*pde_alignment.read_alignment(first_alignment), sep="\n", end="\n\n")


[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 8), (10, 8), (11, 8), (12, 9), (13, 10), (14, 11), (15, 12), (16, 13), (17, 14), (18, 15), (19, 16), (20, 17), (21, 17), (22, 17), (23, 18), (24, 19), (24, 20), (25, 21), (26, 22), (27, 23), (28, 24), (29, 24), (30, 24), (31, 24), (32, 24), (33, 24), (34, 24), (35, 25), (36, 26), (37, 27), (38, 28), (39, 29), (40, 30), (40, 31), (41, 32), (41, 33), (41, 34), (42, 35), (43, 36), (44, 37), (45, 38), (45, 39), (46, 40), (47, 40), (48, 40), (49, 40), (50, 41), (51, 42), (52, 42), (53, 42), (54, 42), (55, 43), (56, 44), (57, 45), (58, 46), (59, 47), (59, 48), (60, 49), (61, 50), (62, 50), (63, 50), (64, 50), (65, 50), (66, 51), (67, 52), (68, 52), (69, 53), (70, 54), (71, 55), (72, 56), (73, 57), (74, 57), (75, 58), (75, 59), (76, 60), (77, 61), (78, 62), (79, 62), (80, 62), (81, 63), (82, 64), (83, 64), (84, 64), (85, 64), (86, 64), (87, 65), (88, 66), (89, 67), (89, 68), (89, 69), (89, 70), (90, 71), (91, 72), (9