# Code describing the Viterbi algorithm

In [11]:
def viterbi(profile_hmm, sequence):
    num_states = len(profile_hmm)
    num_bases = len(sequence)

    # Initialize the Viterbi and path matrices
    viterbi_matrix = np.zeros((num_states, num_bases+1))
    viterbi_path = np.zeros((num_states, num_bases+1), dtype=int)

    # Initialize the first column of the Viterbi matrix
    viterbi_matrix[0, 0] = 1.0

    # Fill the Viterbi matrix
    for base_idx in range(1, num_bases+1):
        for state in range(num_states):
            transition_probs = {}
            current_type = profile_hmm[state]['type']  # Is this a 'M', I', or 'D' state?
            prev_abs_states = { t : state + rel for t, rel in prev_rel_states[current_type].items() if (t in profile_hmm[state]['transition']) and state + rel >= 0}
            prev_abs_base = base_idx -1 if (current_type in ['D','E' ]) else base_idx - 2 
            for prev_type, prev_abs_state in prev_abs_states.items():
                transition_prob = profile_hmm[prev_abs_state]['transition'][current_type]
                prev_score = viterbi_matrix[prev_abs_state, prev_abs_base]
                transition_probs[prev_abs_base] = transition_prob * prev_score
            
            if transition_probs:  # Check if the list is not empty
                max_prev_state = max(transition_probs)
                max_transition_prob = transition_probs[max_prev_state]
                emission_prob = profile_hmm[state]['emission'].get(sequence[base_idx-1], 0) if profile_hmm[state]['emission'] else 1.0
                viterbi_matrix[state, base_idx] = max_transition_prob * emission_prob
                viterbi_path[state, base_idx] = max_prev_state

    # Trace back to find the most probable path
    best_path = []
    last_state = np.argmax(viterbi_matrix[:, -1])
    best_path.append(last_state)

    for base_idx in range(num_bases, 0, -1):
        last_state = viterbi_path[last_state, base_idx]
        best_path.append(last_state)

    best_path.reverse()
    return best_path

# Example emission and transition probabilities for illustration purposes
profile_hmm = [
    {'type': "S", 'emission': {}, 'transition': {'M': 0.9, 'I': 0.05, 'D': 0.05}},  # Start State
    {'type': 'I', 'emission': {'A': 0.2, 'C': 0.3, 'G': 0.3, 'T': 0.2}, 'transition': {'M': 0.9, 'I': 0.1}},  # Insert State 1
    {'type': 'D', 'emission': {}, 'transition': {'M': 0.9, 'D': 0.1}},  # Delete State 1
    {'type': 'M', 'emission': {'A': 0.3, 'C': 0.2, 'G': 0.2, 'T': 0.3}, 'transition': {'M': 0.9, 'I': 0.05, 'D': 0.05}},  # Match State 1
    {'type': 'I', 'emission': {'A': 0.2, 'C': 0.3, 'G': 0.3, 'T': 0.2}, 'transition': {'M': 0.9, 'I': 0.1}},  # Insert State 2
    {'type': 'D', 'emission': {}, 'transition': {'M': 0.9, 'D': 0.1}},  # Delete State 2
    {'type': 'M', 'emission': {'A': 0.3, 'C': 0.2, 'G': 0.2, 'T': 0.3}, 'transition': {'M': 0.9, 'I': 0.05, 'D': 0.05}},  # Match State 2
    {'type': 'I', 'emission': {'A': 0.2, 'C': 0.3, 'G': 0.3, 'T': 0.2}, 'transition': {'M': 0.9, 'I': 0.1}},  # Insert State 3
    {'type': 'D', 'emission': {}, 'transition': {'M': 0.9, 'D': 0.1}},  # Delete State 3
    {'type': 'M', 'emission': {'A': 0.3, 'C': 0.2, 'G': 0.2, 'T': 0.3}, 'transition': {'M': 0.9, 'I': 0.05, 'D': 0.05}},  # Match State 3
    {'type': 'I', 'emission': {'A': 0.2, 'C': 0.3, 'G': 0.3, 'T': 0.2}, 'transition': {'M': 0.9, 'I': 0.1}},   # Insert State 4
    {'type': 'D', 'emission': {}, 'transition': {'M': 0.9, 'D': 0.1}},  # Delete State 4
    {'type': 'M', 'emission': {'A': 0.3, 'C': 0.2, 'G': 0.2, 'T': 0.3}, 'transition': {'M': 0.9, 'I': 0.05, 'D': 0.05}},  # Match State 4
    {'type': 'E', 'emission': {'A': 0.3, 'C': 0.2, 'G': 0.2, 'T': 0.3}, 'transition': {}},  # End State 
]

prev_rel_states = {  # Relative position to the current state of a certain type
    'S': {},
    'M': {'S': -3, 'M':-3, 'I':-2, 'D':-1},
    'I': {'S': -1, 'M':-1, 'I':0},
    'D': {'S': -2, 'M':-2, 'D':-3},
    'E': {'M':-1, 'I':-3, 'D':-2},
}

# Example sequence to be decoded
sequence = 'ACGTACGT'

# Decode the sequence using the Viterbi algorithm
decoded_path = viterbi(profile_hmm, sequence)
print("Decoded path:", decoded_path)


Decoded path: [0, 0, 0, 0, 0, 0, 0, 0, 0]
