In [55]:
class Node():
    '''
    This class is to be used by HMM, it represents a single node in a graph
    '''
    def __init__(self,inEdges,outEdges, name,score):
        '''
        :param name
        :param inEdges: list of input edges to a node with their wights. Each edge represented as a list with two elements [prevNode,wieght]
        :param outEdges: same as inEdges but for the output edges. Each element is [nextNode, wieght]
        :param name: int that names the node
        :param score: int that holds the maximum score of the node, calculated by the weights of the largest path to this node
        '''
        self.inEdges = inEdges
        self.outEdges = outEdges
        self.name = name
        self.score = score

class HMM():
    '''
    class that represents a Hidden Markov Model (HMM)
    contains methods that analyse apsects of the HMM
    '''
    def __init__(self,transMatrix,emitMatrix,emissions):
        '''

        :param transMatrix: Nested dictionary representing a transition matrix
        key:value --> state: RowDictionary
        Where RowDictionary is a dictionary that represents a single row in the matrix.
        key:value --> state: transition pr

        :param emitMatrix: Nested dictionary representing an emission matrix
        key:value --> state: RowDictionary
        Where RowDictionary is a dictionary that represents a single row in the matrix.
        key:value --> state: emission pr

        :param emissions: String representing the emissions from the HMM
        '''
        self.transMatrix = transMatrix
        self.emitMatrix = emitMatrix
        self.emissions = emissions
        self.nodes = self.genNodes()


    def genNodes(self):
        '''
        Returns a dictionary of Node objects from the emission string

        :return: nodes a diciotnay of Node objects
        '''
        states = self.transMatrix.keys()
        initPr = math.log(1/len(states))
        nodes = {'source':Node([],[[state+str(0),initPr] for state in states],'source',0)}
        for state in states: # generates the first layer of the HMM
            nodes[state+str(0)] = Node([['source',initPr]],[[x+str(1),self.transMatrix[state][x]] for x in states],state+str(0),-math.inf)

        for i in range(1,len(self.emissions)-1): # generates middle layers of the graph
            for state in states:
                nodes[state+str(i)] = Node([[x+str(i-1),self.transMatrix[x][state]] for x in states],[[x+str(i+1),self.transMatrix[state][x]] for x in states],state+str(i),-math.inf)

        for state in states: # generates last layer of the graph
            nodes[state+str(len(self.emissions)-1)] = Node([[x+str(len(self.emissions)-2),self.transMatrix[x][state]] for x in states],[['sink',0]],state+str(len(self.emissions)-1),-math.inf)
        nodes['sink'] = Node([[x+str(len(self.emissions)-1),0] for x in states],[],'sink',-math.inf)

        return nodes

    def scoreGraph(self):
        '''
        populates each node in self.nodes with its score.
        score = max(prevScore + edgewieght + emissionPr) across all nodes in the layer beofre the current node
        all probabilities are represented as log(Pr) so that they can be added together.

        returns None

        '''

        states = self.transMatrix.keys()
        for i in range(len(self.emissions)):
            for state in states:
                currentNode = self.nodes[state+str(i)]
                for edge in currentNode.inEdges:
                    prevScore = self.nodes[edge[0]].score
                    newScore = prevScore + edge[1] + self.emitMatrix[currentNode.name[0]][self.emissions[i]]
                    if newScore > currentNode.score:
                        currentNode.score = newScore

        finalScores = [self.nodes[x+str(len(self.emissions)-1)].score for x in states]
        self.nodes['sink'].score = max(finalScores)
        return


    def longestPath(self):
        '''
        Finds the path with the largest total weight.
        Works backwards from the sink node after the graph has been populated with scores

        :return longestPath: list of nodes
        '''

        longestPath = [self.nodes['sink']]
        for edge in longestPath[-1].inEdges:
            if self.nodes[edge[0]].score + edge[1] == longestPath[-1].score: # moves the current
                longestPath.append(self.nodes[edge[0]])
                break

        while longestPath[-1].name != 'source': # exits when the path gets back to the source node
            for edge in longestPath[-1].inEdges: # iterates through inEdges in the current node
                if self.nodes[edge[0]].score + edge[1] + self.emitMatrix[longestPath[-1].name[0]][self.emissions[int(longestPath[-1].name[1:])]] == longestPath[-1].score: # moves the current
                    longestPath.append(self.nodes[edge[0]])
                    break

        return longestPath[::-1]

import math
def main(fName=''):
    '''
    Handles input/output. Generates transition matrix and path from input data and
    runs prPath to find the probability of the given path
    '''

    with open(fName) as inFile:
        lines = inFile.readlines()
        emissions = lines[0].strip()
        alphabet = lines[2].strip().split()
        states = lines[4].strip().split()

        transMatrix = {}

        # Generate the transition matrix from the given input
        for i in range(len(states)):
            row = lines[7+i].strip().split()

            rowDictionary = {}
            for j in range(len(states)):
                rowDictionary[states[j]] = math.log(float(row[j+1]))
            transMatrix[row[0]] = rowDictionary

        emitMatrix = {}
       # Generate the emission matrix from the given input
        for i in range(len(states)):
            row = lines[9+len(states)+i].strip().split()

            rowDictionary = {}
            for j in range(len(alphabet)):
                rowDictionary[alphabet[j]] = math.log(float(row[j+1]))
            emitMatrix[row[0]] = rowDictionary

    hmm = HMM(transMatrix,emitMatrix,emissions)
    hmm.scoreGraph()


    print(''.join([x.name[0] for x in hmm.longestPath()[1:-1]]))


if __name__ == '__main__':
    main('problem21in.txt')

CBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBDDDDDDDDBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBDDDADBBBBBBB


Decoding Problem

Given: A string x, followed by the alphabet Σ from which x was constructed, followed by the states States, transition matrix Transition, and emission matrix Emission of an HMM (Σ, States, Transition, Emission).

Return: A path that maximizes the (unconditional) probability Pr(x, π) over all possible paths π.