In [2]:
from collections import defaultdict
from scipy.linalg import eig 

## Markov Processes:

A set of states $S$ and a state transition probability matrix $P$ s.t. the coefficients are $\mathbb{P}[X_{n+1} \mid X_n] = \mathbb{P}[X_{n+1} \mid X_1, \dots, X_n]$

In [3]:
class MP:
    def __init__(self, P):
        self.transition_matrix = P
    
    def stationary_distribution(self):
        S, U = eig(self.transition_matrix.T)
        stationary = np.array(U[:, np.where(np.abs(S - 1.) < 1e-8)[0][0]].flat)
        stationary /= np.sum(stationary)

## Markov Reward Processes

A set of states $S$, a state transition probability matrix $P$, a reward function $R$ s.t. $R(s) = \mathbb{E}[R_{n+1} \mid S_n = s]$, and a discount factor $\gamma \in [0, 1]$

The state value function $v(s) = \mathbb{E}[G_t \mid S_t = s]$ of an MRP is the expected return starting from state $s$, where $G_t = \sum_{k=0}^\infty \gamma^k R_{t+k+1}$ is the total discounted reward from time $t$

In [None]:
class MRP:
    def __init__(self, P, R, gamma):
        self.transition_probability = P
        self.reward = R
        self.gamma = gamma

In [None]:
class MRP_2:
    def __init__(self, P, R, gamma):
        self.transition_probability = P
        self.transition_reward = R
        self.gamma = gamma
        self.state_number = self.transition_probability.shape[0]

    def get_reward_per_state(self):
        self.reward = []
        for s in range(self.state_number):
            self.reward.append(sum([self.transition_probability[s][s_p]*self.transition_reward[s][s_p] for s_p in range(self.state_number)]))