In [None]:
%matplotlib inline

In [None]:
import collections
import glob
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("whitegrid")

In [None]:
# The optimal average return of the MDP.
# Compute this instead of hardcoding it because it'll get stale fast.
# I computed this by uniformly averaging the values output from value iteration for the starting room.
_OPTIMAL_V = 0.9373168211111111

In [None]:
viz_dir = "../../data/"
results_dir = os.path.join(viz_dir, "results")
os.makedirs(results_dir, exist_ok=True)
dirpaths = glob.glob(os.path.join(viz_dir, "muzero_*"))
print(len(dirpaths))

In [None]:
returns = []
for dirpath in dirpaths:
    try:
        g = np.array(pd.read_csv(os.path.join(dirpath, "stats.csv"))["avg_discounted_return"])
        returns.append(g)
    except:
        print("failed to load {}".format(dirpath))
returns = np.array(returns)
print(returns.shape)

In [None]:
def plot_returns(returns, steps_each=100):
    plt.figure(figsize=(10, 6))
    
    means = returns.mean(axis=0) 
    y_err = np.std(returns, axis=0)
    x_values = np.arange(len(means))* steps_each
    
    plt.plot(x_values, means, label="Tabular MuZero", c="b")
    plt.fill_between(x_values, y2=means + y_err, y1=means - y_err, color="b", alpha=0.2)
    
    plt.plot(x_values, np.ones_like(x_values) * _OPTIMAL_V, c="g", label="Optimal Expected Value", linestyle="--")
    
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Discounted Return", fontsize=14)
    plt.title("Discounted Return vs Environment Steps", fontsize=14)
    plt.legend(fontsize=14)
    plt.tight_layout()

In [None]:
plot_returns(returns)
plt.savefig(os.path.join(results_dir, "average_discounted_return.png"))

In [None]:
def extract_param(string, substring):
    start = string.index(substring + "_") + len(substring + "_")
    end = start + string[start:].index("_")
    return string[start:end]
    
def extract_params_from_dirname(d):
    return {
        "return_n": extract_param(d, "return_n"),
        "monte carlo simulations": extract_param(d, "num_sims")
    }
    
returns = collections.defaultdict(lambda: collections.defaultdict(list))
for dirpath in dirpaths:
    params = extract_params_from_dirname(dirpath)
    for k,v in params.items():
        g = np.array(pd.read_csv(os.path.join(dirpath, "stats.csv"))["avg_discounted_return"])
        returns[k][v].append(g) 

In [None]:
def plot_returns(returns, param, steps_each=100):
    plt.figure(figsize=(10, 6))
    keys = [str(k) for k in sorted([int(k) for k in returns.keys()])]
    for param_value in keys:
        
        value_returns = returns[param_value]
        
        x_values = np.arange(len(value_returns[0]))* steps_each
        
        means = np.mean(value_returns, axis=0) 
        y_err = np.std(value_returns, axis=0)
        x_values = np.arange(len(means))* steps_each
    
        plt.plot(x_values, means, label="{} = {}".format(param, param_value))
        plt.fill_between(x_values, y2=means + y_err, y1=means - y_err, alpha=0.2)
    
    plt.plot(x_values, np.ones_like(x_values) * _OPTIMAL_V, c="g", label="Optimal Expected Value", linestyle="--")
    
    plt.xlabel("Steps", fontsize=14)
    plt.ylabel("Discounted Return", fontsize=14)
    plt.title("Discounted Return vs Environment Steps".format(param), fontsize=14)
    plt.legend(fontsize=14)
    plt.tight_layout()

In [None]:
for k, v in returns.items():
    plot_returns(v, k)
    plt.savefig(os.path.join(results_dir, "average_discounted_return_{}.png".format(k)), dpi=300)