In [1]:
from lib.util import *
from lib.mp import *

# Markov Processes

A set of states $S$ and a state transition probability matrix $P$ s.t. the coefficients are $\mathbb{P}[X_{n+1} \mid X_n] = \mathbb{P}[X_{n+1} \mid X_1, \dots, X_n]$

In [2]:
n = 5

In [3]:
P = generate_stochastic_matrix(n)
mp = MP(P)

print(mp.stationary_distribution())

[0.17808118 0.1719813  0.29694152 0.18273402 0.17026198]


# Markov Reward Processes

A set of states $S$, a state transition probability matrix $P$, a reward function $R$ s.t. $R(s) = \mathbb{E}[R_{n+1} \mid S_n = s]$, and a discount factor $\gamma \in [0, 1]$

The state value function $v(s) = \mathbb{E}[G_t \mid S_t = s]$ of an MRP is the expected return starting from state $s$, where $G_t = \sum_{k=0}^\infty \gamma^k R_{t+k+1}$ is the total discounted reward from time $t$

Bellman Equation for MRP:
$$ v(s) = \mathbb{E}[R_{t+1} + \gamma v(S_{t+1}) \mid S_t = s] = R(s) + \gamma \sum_{s' \in S} P(s, s') v_\pi(s')$$

Matrix form of the Bellman Equation for MRP:
$$ v = R + \gamma P v$$ 

In [4]:
from lib.mrp import *

In [5]:
n = 5
gamma = 0.95

In [6]:
P = generate_stochastic_matrix(n)
R = generate_reward_vector(n)
mrp = MRP(P, R, gamma)

print(mrp.get_value_function())

[9.72162566 8.92772358 9.85245503 9.65662999 9.30640476]


In [7]:
P = generate_stochastic_matrix(n)
R = generate_reward_matrix(n)
mrp = MRP_2(P, R, gamma)

print(mrp.get_reward_per_state().reward)

[0.352295509950974, 0.3733631543658672, 0.7887441491738059, 0.5747341026534833, 0.42679737159807213]
