In [45]:
import pickle
import os

from scipy.signal import medfilt
from matplotlib import pyplot as plt
import matplotlib as mpl
import numpy as np
import itertools 
import pandas as pd

In [70]:
def plot_learning_average(env_name, ax, experiment, window_size, var = True, OnePlayer = False):
    if OnePlayer:
        base_dir = os.getcwd() + '/models_OnePlayer/' + env_name
    else:
        base_dir = os.getcwd() + '/models/' + env_name
    results = {}
        
    for exp in experiment:
        results[exp] = {}
        for seed in range(5):
            try:
                if(OnePlayer):
                    with open(base_dir + '/' + exp +'/nr_mdp_0_1/' + str(seed) + '/results', 'rb') as f:
                        results[exp][seed] = pickle.load(f)
                else:
                    with open(base_dir + '/' + exp +'/nr_mdp_0.1_1/' + str(seed) + '/results', 'rb') as f:
                        results[exp][seed] = pickle.load(f)                
            except IOError:
                pass
            
    avg, std = plot_learning_curves(results, ax, env_name, window_size, var, OnePlayer, best = False)
    return avg, std

def plot_learning_best(env_name, seed, ax, experiment, window_size, var = True, OnePlayer = False):
    if OnePlayer:
        base_dir = os.getcwd() + '/models_OnePlayer/' + env_name
    else:
        base_dir = os.getcwd() + '/models/' + env_name
    results = {}
        
    for i, exp in enumerate(experiment):
        results[exp] = {}
        try:
            if(OnePlayer):
                with open(base_dir + '/' + exp +'/nr_mdp_0_1/' + str(seed[i]) + '/results', 'rb') as f:
                    results[exp] = pickle.load(f)
            else:
                with open(base_dir + '/' + exp +'/nr_mdp_0.1_1/' + str(seed[i]) + '/results', 'rb') as f:
                    results[exp] = pickle.load(f)                
        except IOError:
            pass
            
    avg, std = plot_learning_curves(results, ax, env_name, window_size, var, OnePlayer, best = True)
    return avg, std
   
def plot_learning_curves(results, ax, env_name, window_size, var, OnePlayer, best):    
    colors = ['#396ab1', '#cc2529', '#3e9651', '#396ab1', '#da7c30', '#94823d', '#535154', '#006400', '#00FF00', '#800000', '#F08080', '#FFFF00', '#000000', '#C0C0C0']

    idx = 0
    final_avg = []
    final_std = []
    for exp in results:
        if(best):
            merged = list(itertools.chain(*(results[exp]['eval_rewards'])))
        else:
            reward = []    
            for seed in results[exp]:
                reward.append(list(itertools.chain(*(results[exp][seed]['eval_rewards']))))
            merged = list(itertools.chain.from_iterable(reward))
        out = np.array(merged) 
        out = out.reshape(-1,2)
        df = pd.DataFrame({'Column1':out[:,0],'Column2':out[:,1]})
        length = int(df.Column1.values[-1]/window_size)
        x = np.zeros(length)
        y_avg = np.zeros(length)
        y_std = np.zeros(length)
        for i in range(1, length):
            data = (df[(df.Column1 <= window_size * i) & (df.Column1 > window_size * (i-1))]).Column2.values
            x[i] = window_size * i
            y_avg[i] = np.mean(data)
            y_std[i] = np.std(data)
        avg = y_avg[-10:]
        final_avg.append(np.mean(avg))
        final_std.append(np.std(avg))
        if 'RMSprop' in exp:
            l = 'Baseline'
        else:
            l = 'SGLD'
        ax.plot(x, y_avg, color = colors[idx], label = l)
        if (var):
            ax.fill_between(x, (y_avg)-(y_std), (y_avg)+(y_std), facecolor=colors[idx], alpha=0.4, interpolate=True)
        idx += 1  
    
    ax.set_title(env_name)    
    ax.set_xlabel('Timesteps')
    ax.set_ylabel('Reward')
    ax.ticklabel_format(style='scientific', axis='x', scilimits=(0,0))    
    return final_avg, final_std

In [55]:
def plot_learning_all(experiment, title, comparison, OnePlayer, best, seed=None):
    cols = 2
    rows = 2
    envs = ['Walker2d-v2', 'HalfCheetah-v2', 'Hopper-v2', 'Ant-v2']
    fill = [True, False]
    for f in fill:
        fig, axs = plt.subplots(rows, cols, figsize=(20*cols/3, 9*rows/2))
        for i, env in enumerate(envs):
            ax = axs.flatten()[i]
            if(comparison):
                exp = experiment[i]
            else:
                exp = experiment
            if(best):
                plot_learning_best(env, seed[i], ax, exp, window_size = 3000, var = f, OnePlayer = OnePlayer)
            else:
                plot_learning_average(env, ax, exp, window_size = 3000, var = f, OnePlayer = OnePlayer)
            if(comparison):
                ax.legend()
            else:
                if i == 0:
                    ax.legend()
        plt.tight_layout()
        
        if(OnePlayer):
            folder = 'OnePlayer'
        else:
            folder = 'TwoPlayer'
            
        if(f):
            if(best):
                plt.savefig('plots/'+ folder + '/Learning_curve/' + title +'_best_fill.png')
            else:
                plt.savefig('plots/'+ folder + '/Learning_curve/' + title +'_average_fill.png')                    
        else:
            if(best):
                plt.savefig('plots/'+ folder + '/Learning_curve/' + title +'_best.png')
            else:
                plt.savefig('plots/'+ folder + '/Learning_curve/' + title +'_average.png')


In [60]:
## Comparison Two Player Average
Walker_exp = ['RMSprop/no_noise', 'SGLD_thermal_0.01/action_noise_0.01']
Hopper_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.001/action_noise_0.2']
HalfCheetah_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.0001/action_noise_0.2']
exp = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
plot_learning_all(exp, 'Comparison', comparison = True, OnePlayer = False, best = False)

In [61]:
## Comparison One Player Average
Walker_exp = ['RMSprop/action_noise_0.01', 'SGLD_thermal_0.01/action_noise_0.1']
Hopper_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_1e-05/action_noise_0.3']
HalfCheetah_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.01']
Ant_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.4']
exp = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
plot_learning_all(exp, 'Comparison', comparison = True, OnePlayer = True, best = False)

In [62]:
## Comparison Two Player Best
Walker_exp = ['RMSprop/no_noise', 'SGLD_thermal_0.01/action_noise_0.01']
Hopper_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.001/action_noise_0.2']
HalfCheetah_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_exp = ['RMSprop/action_noise_0.3', 'SGLD_thermal_0.0001/action_noise_0.2']
exp = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
Walker_seed = [0, 4]
Hopper_seed = [3, 4]
HalfCheetah_seed = [3, 2]
Ant_seed = [1, 3]
seed = [Walker_seed, HalfCheetah_seed, Hopper_seed, Ant_seed] 
plot_learning_all(exp, 'Comparison', comparison = True, OnePlayer = False, best = True, seed = seed)

In [63]:
## Comparison One Player Best
Walker_exp = ['RMSprop/action_noise_0.01', 'SGLD_thermal_0.01/action_noise_0.1']
Hopper_exp = ['RMSprop/action_noise_0.3', 'SGLD_thermal_1e-05/action_noise_0.3']
HalfCheetah_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.4']
exp = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
Walker_seed = [2, 0]
Hopper_seed = [4, 2]
HalfCheetah_seed = [4, 4]
Ant_seed = [2, 1]
seed = [Walker_seed, HalfCheetah_seed, Hopper_seed, Ant_seed] 
plot_learning_all(exp, 'Comparison', comparison = True, OnePlayer = True, best = True, seed = seed)

In [127]:
def plot_env(env_name, params, ax, var, OnePlayer, best, seed=None):

    if OnePlayer:
        base_dir = os.getcwd() + '/models_OnePlayer/' + env_name
    else:
        base_dir = os.getcwd() + '/models/' + env_name
    results = {}
        
    for i, exp in enumerate(params):
        if(best):
            upper = seed[i] + 1
            lower = seed[i]
        else:
            upper = 5
            lower = 0
        results[exp] = {}
        for s in range(lower, upper):
            try:
                if(OnePlayer):
                    with open(base_dir + '/' + exp +'/nr_mdp_0_1/' + str(s) + '/results_model', 'rb') as f:
                        results[exp][s] = pickle.load(f)
                else:
                    with open(base_dir + '/' + exp +'/nr_mdp_0.1_1/' + str(s) + '/results_model', 'rb') as f:
                        results[exp][s] = pickle.load(f)                
            except IOError:
                pass
            
    plot_model(results, ax, env_name, var)

def plot_model(results, ax, env_name, var):
    x = [i for i in range(21)]
    colors = ['#396ab1', '#cc2529', '#3e9651', '#396ab1', '#da7c30', '#94823d', '#535154', '#006400', '#00FF00', '#800000', '#F08080', '#FFFF00', '#000000', '#C0C0C0']
    
    idx = 0
    for exp in results:
        if exp not in ['results_model']:
            avg_seed = []
            for seed in results[exp]:
                data = []
                for mass in results[exp][seed]:
                    data.append((results[exp][seed][mass]))                
                avg_seed.append(data)
            avg_seed = np.array(avg_seed)
            avg = []
            std = []
            for i in range(21):
                avg.append(np.average(avg_seed[:, i, :].flatten()))
                std.append(np.std(avg_seed[:, i, :].flatten()))
        if 'RMSprop' in exp:
            l = 'Baseline'
        else:
            l = 'SGLD'                
        ax.plot(x, avg, label=l, color=colors[idx])
        if(var):
            ax.fill_between(x, np.array(avg) - np.array(std), np.array(avg) + np.array(std), facecolor=colors[idx], alpha=0.2, interpolate=True)
        idx += 1
    ax.set_title(env_name)         
    ax.set_xticks([0, 5, 10, 15, 20])
    ax.set_xticklabels([0.5, 0.75, 1.0, 1.5, 2.0])
    ax.set_xlabel('Relative mass')
    ax.set_ylabel('Reward')

In [128]:
def plot_env_all(params, title, comparison, OnePlayer, best, seed = None):
    cols = 2
    rows = 2
    envs = ['Walker2d-v2', 'HalfCheetah-v2', 'Hopper-v2', 'Ant-v2']
    fill = [True, False]
    for f in fill:
        fig, axs = plt.subplots(rows, cols, figsize=(20*cols/3, 9*rows/2))
        for i, env in enumerate(envs):
            ax = axs.flatten()[i]
            if(comparison):
                param = params[i]
            else:
                param = params
            if(best):
                plot_env(env, param, ax, var = f, OnePlayer = OnePlayer, best = best, seed = seed[i])
            else:
                plot_env(env, param, ax, var = f, OnePlayer = OnePlayer, best = best)                
            if(comparison):
                ax.legend()
            else:
                if i == 0:
                    ax.legend()
        plt.tight_layout()
        
        if(OnePlayer):
            folder = 'OnePlayer'
        else:
            folder = 'TwoPlayer'
            
        if(f):
            if(best):
                plt.savefig('plots/'+ folder + '/Mass_Uncertainty/' + title +'_best_fill.png')
            else:
                plt.savefig('plots/'+ folder + '/Mass_Uncertainty/' + title +'_average_fill.png')                    
        else:
            if(best):
                plt.savefig('plots/'+ folder + '/Mass_Uncertainty/' + title +'_best.png')
            else:
                plt.savefig('plots/'+ folder + '/Mass_Uncertainty/' + title +'_average.png')

In [129]:
## Comparison Two Player Average
Walker_exp = ['RMSprop/no_noise', 'SGLD_thermal_0.01/action_noise_0.01']
Hopper_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.001/action_noise_0.2']
HalfCheetah_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.0001/action_noise_0.2']
params = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
plot_env_all(params, title = 'Comparison', comparison = True, OnePlayer = False, best = False)

In [130]:
## Comparison One Player Average
Walker_exp = ['RMSprop/action_noise_0.01', 'SGLD_thermal_0.01/action_noise_0.1']
Hopper_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_1e-05/action_noise_0.3']
HalfCheetah_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.01']
Ant_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.4']
params = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
plot_env_all(params, 'Comparison', comparison = True, OnePlayer = True, best = False)

In [131]:
## Comparison Two Player Best
Walker_exp = ['RMSprop/no_noise', 'SGLD_thermal_0.01/action_noise_0.01']
Hopper_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.001/action_noise_0.2']
HalfCheetah_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_exp = ['RMSprop/action_noise_0.3', 'SGLD_thermal_0.0001/action_noise_0.2']
exp = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
Walker_seed = [0, 4]
Hopper_seed = [3, 4]
HalfCheetah_seed = [3, 2]
Ant_seed = [1, 3]
seed = [Walker_seed, HalfCheetah_seed, Hopper_seed, Ant_seed] 
plot_env_all(exp, 'Comparison', comparison = True, OnePlayer = False, best = True, seed = seed)

In [132]:
## Comparison One Player Best
Walker_exp = ['RMSprop/action_noise_0.01', 'SGLD_thermal_0.01/action_noise_0.1']
Hopper_exp = ['RMSprop/action_noise_0.3', 'SGLD_thermal_1e-05/action_noise_0.3']
HalfCheetah_exp = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_exp = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.4']
exp = [Walker_exp, HalfCheetah_exp, Hopper_exp, Ant_exp]    
Walker_seed = [2, 0]
Hopper_seed = [4, 2]
HalfCheetah_seed = [4, 4]
Ant_seed = [2, 1]
seed = [Walker_seed, HalfCheetah_seed, Hopper_seed, Ant_seed] 
plot_env_all(exp, 'Comparison', comparison = True, OnePlayer = True, best = True, seed = seed)

In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

def plot_2d(env_name, params, OnePlayer, best, seed=None):
    if OnePlayer:
        base_dir = os.getcwd() + '/models_OnePlayer/' + env_name
    else:
        base_dir = os.getcwd() + '/models/' + env_name

    results = {}
    vmin = 2000
    vmax = -2000
    for idx, param in enumerate(params):
        if(best):
            upper = seed[idx] + 1
            lower = seed[idx]
        else:
            upper = 5
            lower = 0
        try:
            mat = np.zeros((11, 11))
            for s in range(lower, upper):
                if(OnePlayer):
                    with open(base_dir + '/' + param + '/nr_mdp_0_1/'+str(s)+'/results_model_noise', 'rb') as f:
                        data = pickle.load(f)
                else:
                    with open(base_dir + '/' + param + '/nr_mdp_0.1_1/'+str(s)+'/results_model_noise', 'rb') as f:
                        data = pickle.load(f)                  
                row = 0
                for key in data:
                    col = 0
                    for key2 in data[key]:
                        mat[row, col] += np.mean(data[key][key2])
                        col += 1
                    row += 1
            mat = mat/5
            results[param] = mat

            vmin = min(vmin, np.min(mat))
            vmax = max(vmax, np.max(mat))
        except:
            pass
    
    plt.figure(figsize=(7, 7))
    idx = 1
    for key in results:
        
        plt.subplot(1, 4, idx)
        idx += 1
        ax = plt.gca()
        im = ax.imshow(results[key], cmap='autumn', interpolation='nearest', vmin=vmin, vmax=vmax)
        x = [0,2,4,6,8,10]
        #[0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
        labels = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
        plt.xticks(x, labels, fontsize=4)

        y = [0,2,4,6,8,10]
        #[0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
        labels = [0.5, 0.7, 0.9, 1.2, 1.6, 2.0]
        plt.yticks(y, labels, fontsize=4)

        plt.xlabel('Noise Probability', fontsize=5)
        plt.ylabel('Relative Mass', fontsize=5)
        if "RMSprop" in key:
            plt.title('Baseline' + '/' + env_name, fontsize=6)
        else:
            plt.title('SGLD' + '/' + env_name, fontsize=6)            
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.13)

        cbar = plt.colorbar(im, cax=cax)
        cbar.ax.tick_params(labelsize=4) 
    plt.tight_layout()
    
    if(OnePlayer):
        folder = 'OnePlayer'
    else:
        folder = 'TwoPlayer'
    
    if(best):
        plt.savefig('plots/'+ folder + '/Heat_map/' + env_name +'_best.png', bbox_inches='tight',pad_inches=0.0, dpi= 300)
    else:
        plt.savefig('plots/'+ folder + '/Heat_map/' + env_name +'_average.png', bbox_inches='tight',pad_inches=0.0, dpi = 300) 

In [None]:
## Comparison Two Player Average
Walker_param = ['RMSprop/no_noise', 'SGLD_thermal_0.01/action_noise_0.01']
Hopper_param = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.001/action_noise_0.2']
HalfCheetah_param = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_param = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.0001/action_noise_0.2']
plot_2d('Walker2d-v2', Walker_param, OnePlayer = False, best = False)
plot_2d('Hopper-v2', Hopper_param, OnePlayer = False, best = False)
plot_2d('HalfCheetah-v2', HalfCheetah_param, OnePlayer = False, best = False)
plot_2d('Ant-v2', Ant_param, OnePlayer = False, best = False)

In [None]:
## Comparison One Player Average
Walker_param = ['RMSprop/action_noise_0.01', 'SGLD_thermal_0.01/action_noise_0.1']
Hopper_param = ['RMSprop/action_noise_0.4', 'SGLD_thermal_1e-05/action_noise_0.3']
HalfCheetah_param = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.01']
Ant_param = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.4']
plot_2d('Walker2d-v2', Walker_param, OnePlayer = True, best = False)
plot_2d('Hopper-v2', Hopper_param, OnePlayer = True, best = False)
plot_2d('HalfCheetah-v2', HalfCheetah_param, OnePlayer = True, best = False)
plot_2d('Ant-v2', Ant_param, OnePlayer = True, best = False)

In [None]:
## Comparison Two Player Best
Walker_param = ['RMSprop/no_noise', 'SGLD_thermal_0.01/action_noise_0.01']
Hopper_param = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.001/action_noise_0.2']
HalfCheetah_param = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_param = ['RMSprop/action_noise_0.3', 'SGLD_thermal_0.0001/action_noise_0.2']
Walker_seed = [0, 4]
Hopper_seed = [3, 4]
HalfCheetah_seed = [3, 2]
Ant_seed = [1, 3]
plot_2d('Walker2d-v2', Walker_param, OnePlayer = False, best = True, seed = Walker_seed)
plot_2d('Hopper-v2', Hopper_param, OnePlayer = False, best = True, seed = Hopper_seed)
plot_2d('HalfCheetah-v2', HalfCheetah_param, OnePlayer = False, best = True, seed = HalfCheetah_seed)
plot_2d('Ant-v2', Ant_param, OnePlayer = False, best = True, seed = Ant_seed)

In [None]:
## Comparison One Player Best
Walker_param = ['RMSprop/action_noise_0.01', 'SGLD_thermal_0.01/action_noise_0.1']
Hopper_param = ['RMSprop/action_noise_0.3', 'SGLD_thermal_1e-05/action_noise_0.3']
HalfCheetah_param = ['RMSprop/action_noise_0.2', 'SGLD_thermal_0.01/no_noise']
Ant_param = ['RMSprop/action_noise_0.4', 'SGLD_thermal_0.01/action_noise_0.4']
Walker_seed = [2, 0]
Hopper_seed = [4, 2]
HalfCheetah_seed = [4, 4]
Ant_seed = [2, 1]
plot_2d('Walker2d-v2', Walker_param, OnePlayer = True, best = True, seed = Walker_seed)
plot_2d('Hopper-v2', Hopper_param, OnePlayer = True, best = True, seed = Hopper_seed)
plot_2d('HalfCheetah-v2', HalfCheetah_param, OnePlayer = True, best = True, seed = HalfCheetah_seed)
plot_2d('Ant-v2', Ant_param, OnePlayer = True, best = True, seed = Ant_seed)