In [1]:
from estimators.whited_estimator import EstWhited
from solver.value_iteration import VITrainer
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np

from mdps.whited_replace import cMDPWhited
from utils.distributions import ConstantDistribution, ParticleDistribution, UniformDistribution

In [2]:
# collect expert rollout

def get_rollouts(solver_, config):
    """Generate rollouts from a given solver and MDP(c)"""
    env_ = cMDPWhited(config=config)
    done_ = False
    obs_ = env_.reset()
    # run until episode ends
    gt_obs_arr_ = obs_
    gt_act_arr_ = None
    gt_rew_arr_ = None
    while not done_:
        action_ = solver_.compute_single_action(obs_)
        obs_, rewawrd_, done_, _ = env_.step(action_)
        gt_obs_arr_ = np.vstack((gt_obs_arr_, obs_))
        if gt_act_arr_ is None:
            gt_act_arr_ = [action_]
            gt_rew_arr_ = [rewawrd_]
        else:
            gt_act_arr_ += [action_]
            gt_rew_arr_ += [rewawrd_]

    gt_act_arr_ = np.array(gt_act_arr_)
    gt_rew_arr_ = np.array(gt_rew_arr_)
    return gt_obs_arr_[1:,:], gt_act_arr_, gt_rew_arr_

def plot_rollouts(gt_obs_arr_, gt_act_arr_):
    """Plot generated rollouts"""
    fig_, ax_1 = plt.subplots()
    fig_.set_size_inches(10, 6, forward=True)

    x = np.arange(start=0, stop=gt_obs_arr_.shape[0])
    ax_2 = ax_1.twinx()
    ax_1.plot(x, gt_obs_arr_[:,0], 'r-')
    ax_2.plot(x, gt_act_arr_, 'b-', alpha=0.3)

    ax_1.set_xlabel('time step')
    ax_1.set_ylabel('Capital(k)', color='r')
    ax_2.set_ylabel('Action', color='b')
    plt.title('sample observations and actions')
    plt.show()

    fig_, ax_3 = plt.subplots()
    fig_.set_size_inches(10, 6, forward=True)

    x = np.arange(start=0, stop=gt_obs_arr_.shape[0])
    ax_4 = ax_3.twinx()
    ax_3.plot(x, gt_obs_arr_[:,1], 'r-')
    ax_4.plot(x, gt_act_arr_, 'b-', alpha=0.3)

    ax_3.set_xlabel('time step')
    ax_3.set_ylabel('Shock(z)', color='r')
    ax_4.set_ylabel('Action', color='b')
    plt.title('sample observations and actions')
    plt.show()

In [3]:
c = {"context_distribution":
     ConstantDistribution(dim=5, constant_vector=np.array(
         [0.98, 0.15, 0.7, 0.7, 0.15]))
     }
c_trainer = {'grid_nums': np.array([35,15])

}
solver_vi = VITrainer(env=cMDPWhited(config=c))


In [4]:
solver_vi.train()

iteration:  5 distance:  9.653373989045917
iteration:  10 distance:  7.156935574776753
iteration:  15 distance:  6.256070452420715
iteration:  20 distance:  5.623194320213827
iteration:  25 distance:  5.078106551885696
iteration:  30 distance:  4.58947488778449
iteration:  35 distance:  4.148410716921546
iteration:  40 distance:  3.749817845369364
iteration:  45 distance:  3.3895357709150744
iteration:  50 distance:  3.0638714853414513
iteration:  55 distance:  2.769497095151223
iteration:  60 distance:  2.5034060119950254
iteration:  65 distance:  2.2628807557107393
iteration:  70 distance:  2.045464975557991
iteration:  75 distance:  1.848938330501369
iteration:  80 distance:  1.6712938089360705
iteration:  85 distance:  1.5107172314596937
iteration:  90 distance:  1.3655687236006315
iteration:  95 distance:  1.2343659687221589
iteration:  100 distance:  1.1157690699903924
iteration:  105 distance:  1.0085668667903747
iteration:  110 distance:  0.9116645658554035
iteration:  115 dist