# Let us create a simple example for Markov Reward Process

<img src="mrp1.png" width=540px height=60px>

In [2]:
from mrp import MRP

# states
S = ['s0', 's1', 's2', 's3', 'T']


# Probability transition
P = [[0, 0.3, 0.3, 0.4, 0],
     [0, 0, 0, 1.0, 0],           
     [0, 0.5, 0, 0, 0.5],
     [0, 0, 0, 0, 1.0],
     [0, 0, 0, 0, 1.0]]

# rewards
R = [-1, -1, -1, -1, 0]

mrp = MRP(S, P, R)


In [3]:
import pandas as pd
pd.set_option('display.max_colwidth',-1)

def get_return(episode, t=0, gamma=1):
    if len(episode) > t:  
        return episode[t][1] + gamma * get_return(episode, t+1, gamma)
    else:
        return 0
    
def generate_episodes(count=10,start_state=None):
    episodes = [mrp.generate_episodes(start_state=start_state) for i in range(count)]
    episodes_df = pd.Series(episodes).to_frame()
    episodes_df['length'] = episodes_df[0].apply(len)
    episodes_df['state_t_zero'] = episodes_df[0].apply(lambda x:x[0][0])
    episodes_df['return_t_zero'] =  episodes_df[0].apply(get_return)
    return episodes_df

In [4]:
episodes_df = generate_episodes()
episodes_df

Unnamed: 0,0,length,state_t_zero,return_t_zero
0,"[(s0, -1, s2), (s2, -1, T)]",2,s0,-2
1,"[(s0, -1, s2), (s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",4,s0,-4
2,"[(s3, -1, T)]",1,s3,-1
3,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
4,"[(s1, -1, s3), (s3, -1, T)]",2,s1,-2
5,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
6,"[(s2, -1, T)]",1,s2,-1
7,"[(s3, -1, T)]",1,s3,-1
8,"[(s3, -1, T)]",1,s3,-1
9,"[(s3, -1, T)]",1,s3,-1


In [5]:
episodes_df = generate_episodes(start_state='s2')
episodes_df

Unnamed: 0,0,length,state_t_zero,return_t_zero
0,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
1,"[(s2, -1, T)]",1,s2,-1
2,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
3,"[(s2, -1, T)]",1,s2,-1
4,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
5,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
6,"[(s2, -1, T)]",1,s2,-1
7,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
8,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
9,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3


In [6]:
episodes_df = generate_episodes(count=100,start_state='s2')
value_s0 = episodes_df.return_t_zero.mean()
value_s0

-1.98

In [7]:
episodes_df.head(10)

Unnamed: 0,0,length,state_t_zero,return_t_zero
0,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
1,"[(s2, -1, T)]",1,s2,-1
2,"[(s2, -1, T)]",1,s2,-1
3,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
4,"[(s2, -1, T)]",1,s2,-1
5,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
6,"[(s2, -1, T)]",1,s2,-1
7,"[(s2, -1, T)]",1,s2,-1
8,"[(s2, -1, T)]",1,s2,-1
9,"[(s2, -1, s1), (s1, -1, s3), (s3, -1, T)]",3,s2,-3
