# Markov Chains
## November 11th, 2021
### Overview: Using Markov chains to make predictions

In [2]:
import numpy as np
from scipy import linalg as la

In [3]:
class MarkovChain:
    """A Markov chain with finitely many states.

    Attributes:
        A ((n,n) ndarray): column stochastic transition matrix
        labels (lst(str)): list of state labels
        labelMap (dict): dictionary mapping state labels to specific column index
       
    Methods:
        __init__(A,states): saves transition matrix and creates attributes using states
        transition(state): uses A, the transition matrix, to transition from the input state and returns updated state
        walk(state,N): transitions N-1 times from the input state, returning list of all states walked through
        path(start,stop): transitions from start state until reaching stop state, returning list of all states walked through
        steady_state(tol,max_iter): finds steady state vector; returns error if none within tolerance reached after max_iter iterations
    """
    
    def __init__(self, A, states=None,stoch=True):
        """Check that A is column stochastic and construct a dictionary
        mapping a state's label to its index (the row / column of A that the
        state corresponds to). Save the transition matrix, the list of state
        labels, and the label-to-index dictionary as attributes.

        Parameters:
        A ((n,n) ndarray): the column-stochastic transition matrix for a
            Markov chain with n states.
        states (list(str)): a list of n labels corresponding to the n states.
            If not provided, the labels are the indices 0, 1, ..., n-1.

        Raises:
            ValueError: if A is not square or is not column stochastic.

        Example:
            >>> MarkovChain(np.array([[.5, .8], [.5, .2]], states=["A", "B"])
        corresponds to the Markov Chain with transition matrix
                                   from A  from B
                            to A [   .5      .8   ]
                            to B [   .5      .2   ]
        and the label-to-index dictionary is {"A":0, "B":1}.
        """
        #checking to make sure A is square and column stochastic
        if A.shape[0] != A.shape[1]:
            raise ValueError("A is not square")
        
        if not np.allclose(A.sum(axis=0), np.ones(A.shape[1])):
            raise ValueError("A is not column stochastic")
        
        #saving transition matrix and labels and initializing label map dictionary
        self.A = A
        self.labelMap = dict()
        
        if states != None:
            self.labels = states
        else:
            self.labels = [i for i in range(A.shape[0])]
            
        #if states given, fill dictionary map
        for i in range(len(self.labels)):
            self.labelMap[self.labels[i]] = i
                
    def transition(self, state):
        """Transition to a new state by making a random draw from the outgoing
        probabilities of the state with the specified label.

        Parameters:
            state (str): the label for the current state.

        Returns:
            (str): the label of the state to transitioned to.
        """
        #getting the column index, called col
        col = self.labelMap[state]
        
        #drawing from categorical distribution using the state's transition probabilities
        newState = np.argmax(np.random.multinomial(1,self.A.T[col]))
        
        #getting the new state's label
        for key in self.labelMap:
            if self.labelMap[key] == newState:
                return key
            
    def walk(self, start, N):
        """Starting at the specified state, use the transition() method to
        transition from state to state N-1 times, recording the state label at
        each step.

        Parameters:
            start (str): The starting state label.

        Returns:
            (list(str)): A list of N state labels, including start.
        """
        #checking in graph
        if start not in self.labelMap:
            raise KeyError("Invalid start label")
        
        #initializing the list of labels starting with start
        labelsWalk = [start]
        
        #calling transition N-1 times and appending each result to the list
        for i in range(N-1):
            start = self.transition(start)
            labelsWalk.append(start)
        
        return labelsWalk

    def path(self, start, stop):
        """Beginning at the start state, transition from state to state until
        arriving at the stop state, recording the state label at each step.

        Parameters:
            start (str): The starting state label.
            stop (str): The stopping state label.

        Returns:
            (list(str)): A list of state labels from start to stop.
        """
        #checking in graph
        if start not in self.labelMap:
            raise KeyError("Invalid start label")
        if stop not in self.labelMap:
            raise KeyError("Invalid stop label")
            
        #initializing labels list path
        labelsWalk = [start]
        
        #walking until achieving the stop state
        while start != stop:
            start = self.transition(start)
            labelsWalk.append(start)
        
        return labelsWalk
    
    def steady_state(self, tol=1e-12, maxiter=40):
        """Compute the steady state of the transition matrix A.

        Parameters:
            tol (float): The convergence tolerance.
            maxiter (int): The maximum number of iterations to compute.

        Returns:
            ((n,) ndarray): The steady state distribution vector of A.

        Raises:
            ValueError: if there is no convergence within maxiter iterations.
        """
        #initializing random vector
        x = np.random.random(self.A.shape[0])
        
        #repeatedly multiplying x by A and checking for within tolerance range - returning if sufficient
        for i in range(maxiter):
            x1 = x.copy()
            x = self.A@x
            if np.linalg.norm(x-x1) < tol:
                return x
        
        #if did not return, the matrix does not converge
        raise ValueError("Matrix does not converge")

In [4]:
class SentenceGenerator(MarkovChain):
    """A Markov-based simulator for natural language.

    Attributes:
        (fill this out)
    """
    def __init__(self, filename):
        """Read the specified file and build a transition matrix from its
        contents. You may assume that the file has one complete sentence
        written on each line.
        """
        #creating the training set of unique words
        self.statelabels = set()
        with open(filename,'r') as readfile:
            for line in readfile:
                line = line.split()
                for word in line:
                    self.statelabels.add(word)
        
        #getting the list of states and initializing square A
        self.states = list(self.statelabels)
        self.states = ["$tart"] + self.states + ["$top"]
        states = self.states
        self.A = np.zeros((len(self.statelabels)+2,len(self.statelabels)+2))

        
        #populating transition matrix
        with open(filename,'r') as readfile:
            for line in readfile:
                line = line.split()
                line = ["$tart"] + line + ["$top"]
                for i in range(len(line)-1):
                    index1 = states.index(line[i+1])
                    index2 = states.index(line[i])
                    self.A[index1,index2] += 1
        #adding one to $top's column
        self.A[-1][-1] = 1
        
        #making A column stochastic
        self.A = self.A/self.A.sum(axis=0)
        
        #now calling MarkovChain constructor
        MarkovChain.__init__(self,self.A,states)

    def babble(self):
        """Create a random sentence using MarkovChain.path().

        Returns:
            (str): A sentence generated with the transition matrix, not
                including the labels for the $tart and $top states.

        Example:
            >>> yoda = SentenceGenerator("yoda.txt")
            >>> print(yoda.babble())
            The dark side of loss is a path as one with you.
        """
        #calling path method to get word selection
        path = self.path("$tart","$top")
        
        #constructing the sentence produced by the path
        sentence = str()
        for word in path[1:-2]:
            sentence = sentence + word + ' '
        sentence = sentence + path[-2]
        
        
        return sentence

### Training on a txt file of Yoda's lines

In [5]:
S = SentenceGenerator('yoda.txt')

In [9]:
for i in range(5):
    print(S.babble())

Who he has.
The boy may be; nevertheless grave danger I have I have.
Much anger leads to say hmmm!
Act on you choose the horizon.
So certain, are they.
