# TASK 1 - Implementation of Sequence Alignment Algorithms
Zofia Łągiewka 313096

In [10]:
from utils import load_matrix, fill_matrices, traceback, print_and_save_results
import numpy as np

## Needleman-Wunsch - global alignment

In [11]:
def needleman_wunsch(sequence1, sequence2, n, path, GP, output_filename):
    """
    Performs the Needleman-Wunsch algorithm to find n global alignments

    Parameters:
    - sequence1 (str): first DNA sequence
    - sequence2 (str): second DNA sequence
    - n (int): maximum number of alignments
    - path (str): filepath to the substitution matrix in CSV format
    - GP (int): gap penalty
    - output_filename (str): name of the output file
    """
    substitution_matrix = load_matrix(path)
    scoring_matrix, direction_matrix = fill_matrices(sequence1, sequence2, substitution_matrix, GP)
    
    alignments = []
    traceback(sequence1, sequence2, direction_matrix, scoring_matrix, len(sequence1), len(sequence2), '', '', alignments, n)
    
    print_and_save_results(output_filename, alignments, "Global")    

### Example use

In [12]:
needleman_wunsch('TATA', 'ATAT', n=3, path='matrix.csv', GP=-2, output_filename='output_nw.txt')

Global alignment no. 1:
-TATA
ATAT-
Score: 11

Global alignment no. 2:
TATA-
-ATAT
Score: 11


## Smith-Waterman - local alignment

In [13]:
def smith_waterman(sequence1, sequence2, n, path, GP, output_filename):
    """
    Performs the Smith-Waterman algorithm to find n local alignments

    Parameters:
        - sequence1 (str): first DNA sequence
        - sequence2 (str): second DNA sequence
        - n (int): maximum number of alignments
        - path (str): filepath to the substitution matrix in CSV format
        - GP (int): gap penalty
        - output_filename (str): name of the output file
    """
    substitution_matrix = load_matrix(path)
    scoring_matrix, direction_matrix = fill_matrices(sequence1, sequence2, substitution_matrix, GP, global_alignment=False)
    
    alignments = []
    max_score = scoring_matrix.max()
    for (i, j) in np.argwhere(scoring_matrix == max_score):
        traceback(sequence1, sequence2, direction_matrix, scoring_matrix, i, j, '', '', alignments, n, global_alignment=False, start_position=(i, j))
    
    print_and_save_results(output_filename, alignments, "Local")

### Example use

In [14]:
smith_waterman('TATA', 'ATAT', n=3, path='matrix.csv', GP=-2, output_filename='output_sw.txt')

Local alignment no. 1:
TAT
TAT
Score: 15

Local alignment no. 2:
ATA
ATA
Score: 15
