In [1]:
class HMM:
    def __init__(self, emits, states, transition, emission):
        self.emits = emits
        self.states = states
        self.transition = transition
        self.emission = emission
        
    def get_transition_prob(self, cur_state, des_state):
        '''probability of transition from original state to destination state'''
        return float(self.transition[cur_state][des_state])
        
    def get_emission_prob(self, cur_state, symbol):
        '''probability of emission from current state to symbol'''    
        return float(self.emission[cur_state][symbol])
    
    def get_path_prob(self, path):
        '''probability that the HMM moves along a specific path (p(pi))'''
        # initial prob is 1/2 for A or B
        prod = 1/2
        for i in range(0, len(path)-1):
            cur_state = path[i]
            des_state = path[i+1]
            transition_prob = self.get_transition_prob(cur_state, des_state)
            prod = prod * float(transition_prob)
        return prod
    
    def get_output_prob(self, output, path):
        '''get the probability of an outcome being generate from a specific path'''
        prod = 1
        for i in range(0, len(output)):
            cur_state = path[i]
            symbol = output[i]
            emission_prob = self.get_emission_prob(cur_state, symbol)
            prod = prod * float(emission_prob)
        return prod
    

In [2]:
class Viterbi(HMM):
    def __init__(self, graph, emits, states, transition, emission):
        self.graph = graph
        super().__init__(emits, states, transition, emission)
    
    def add_node(self, node):
        '''add a new node to the graph'''
        if node not in self.graph.keys():
            self.graph[node] = []

    def add_weighted_edge(self, n1, n2, w):
        '''add new edge to the graph with wight'''
        if n1 not in self.graph.keys():
            self.add_node(n1)
        if n2 not in self.graph.keys():
            self.add_node(n2)
        if n2 not in self.graph[n1]:
            self.graph[n1].append((n2, w))
            
    def get_weight(self, n1, n2):
        '''get weight of node from n1 to n2 on a weighted graph'''
        for edge in self.graph[n1]:
            if edge[0] == n2:
                return edge[1]

    def calculate_weight(self, cur_state, des_state, symbol):
        '''get the weight for viterbi graph'''
        weight = self.get_transition_prob(cur_state, des_state) \
                    * self.get_emission_prob(des_state, symbol)
        return weight
    
    def construct_viterbi_graph(self, output: list[str]) -> dict:
        '''construct directed weighted graph of hidden path and calculate
        the score of each node based on weight
        Input: 
            a sequence
        '''
        score = {} # score dictionary store the store of node
        '''initialization of start and end node 
        and initialize probability of states'''
        self.add_node('S')
        score['S'] = 1
        # add start node connected edges
        for state in self.states:
            node = state + str(0)
            weight = 1/(len(self.states))
            self.add_weighted_edge('S', node, weight)
            # update score
            score[node] = weight * self.get_emission_prob(state, output[0])       
        
        # construct graph 
        for i in range(0, len(output)-1):
            for state in self.states:
                cur_node = state + str(i)
                cur_state = state
                for s in self.states:
                    des_node = s + str(i+1)
                    des_state = s
                    des_symbol = output[i+1]
                    w = self.calculate_weight(cur_state, des_state, des_symbol)
                    self.add_weighted_edge(cur_node, des_node, w)        
        
        # add ending node and its edges
        self.add_node('E')
        for state in self.states:
            node = state + str(len(output)-1)
            self.add_weighted_edge(n1=node, n2='E', w=1)        
        return score
    
    def dp(self, output: str, score: dict) -> dict:
        '''fill score dictionary with recurrence and 
        store where each score's choice from and find probability p(x)'''
        prev = {}
        for i in range(1, len(output)):
            for state in self.states:
                cur_node = state + str(i)
                choices = []
                for s in self.states:
                    prev_node = s + str(i-1)
                    value = score[prev_node] * \
                        self.get_weight(prev_node, cur_node)
                    choices.append(value)
                max_v = max(choices)
                index = choices.index(max_v)
                # update score
                score[cur_node] = max_v
                # get current node's choice
                choice = self.states[index] + str(i-1)
                # update prev dictionary
                prev[cur_node] = choice    
        
        # handel ending node
        temp = []
        for state in self.states:
            node = state + str(len(output)-1) 
            temp.append(score[node])
        max_v = max(temp)
        index = temp.index(max_v)
        prev['E'] = self.states[index] + str(len(output)-1)  
        
        # handel start node
        for state in self.states:
            prev[state + str(0)] = 'S'

        return prev, max_v
    
    def backtrack(self, prev: dict) -> list[str]:
        '''find the optimal path from end to start'''
        path = ['E']
        cur = path[0]
        while cur[0] != 'S': 
            next = prev[cur]
            path.insert(0, next)
            cur = next
        return path
    
    def forward(self, output: str, score: dict) -> dict:
        '''A string x, followed by the alphabet Σ from which x 
        was constructed, followed by the states States, 
        transition matrix Transition, and emission matrix 
        Emission of an HMM (Σ, States, Transition, Emission)
        '''
        for i in range(1, len(output)):
            for state in self.states:
                cur_node = state + str(i)
                choices = []
                for s in self.states:
                    prev_node = s + str(i-1)
                    value = score[prev_node] * \
                        self.get_weight(prev_node, cur_node)
                    choices.append(value)
                prob = sum(choices)
                # update score
                score[cur_node] = prob 
        
        # find final value for ending node
        final = 0
        for state in self.states:
            node = state + str(len(output)-1)
            final += score[node]  

        return final

In [3]:
def main3(inFile = None):
    '''
    solve problem 21
    '''
    # read file
    with open(inFile) as fh:
        output = fh.readline().rstrip()
        fh.readline()
        emits = fh.readline().rstrip().split()
        fh.readline()
        states = fh.readline().rstrip().split()
        fh.readline().rstrip()
        fh.readline().rstrip()
        # construct dictionary of transition probability 
        transition = {}
        for state in states: 
            temp = fh.readline().rstrip().split()[1:]
            transition[state] = {}
            for i in range(0, len(temp)): 
                transition[state][states[i]] = temp[i]
        fh.readline().rstrip()
        fh.readline().rstrip()
        # get emission matrix
        emission = {}
        for state in states: 
            temp = fh.readline().rstrip().split()[1:]
            emission[state] = {}
            for i in range(0, len(temp)): 
                emission[state][emits[i]] = temp[i]        
        
    # create HMM object and Viterbi object
    thisViterbi = Viterbi(graph={}, emits=emits, states=states, 
                          transition=transition, emission=emission)
    score = thisViterbi.construct_viterbi_graph(output)  
    prev = thisViterbi.dp(output, score)[0]
    prob = thisViterbi.dp(output, score)[1]
    
    # write path into file
    path = thisViterbi.backtrack(prev)
    path = path[1:-1]
    res = ''
    with open('p21answer.txt', 'w') as fw:
        for choice in path:
            res += choice[0]
        fw.write(res)
    
if __name__ == "__main__":
    main3(inFile = 'rosalind_ba10c.txt')

## Inspections:

### Lucy Zheng:

- Similar suggestions of using helper functions to parse the file, since you can re-use them in all the programs 
- Very neat and organized code!

### William Gao:

- `class Viterbi(HMM)` - great use of inheritance!
- Nit: you are calling `thisViterbi.dp()` twice to get `prev` and `prob`. Consider changing:

```python
    prev = thisViterbi.dp(output, score)[0]
    prob = thisViterbi.dp(output, score)[1]
```

to:

```python
    prev, prob = thisViterbi.dp(output, score)
```
- Otherwise, this looks great to me!
