In [1]:
import os
import sys
import time
sys.path.append("..")

import torch
import numpy as np
from gym.spaces import Discrete

from models.rl import *
from models.vaes import *
from envs.wrapped_envs import make_vec_atari_envs
from load_config import load_config_from_files
from experiments.handlers.load_model import adapt_load_model
from utils.exp import get_seed, set_seed, unwrap_onehot_to_discrete

In [2]:
def load_model(_actor, save_dir):
    state_dicts_path = os.path.join(save_dir, 'state_dicts.pt')
    state_dicts = torch.load(state_dicts_path, map_location='cpu')
    
    encoder = None
    encoder_state_dicts_path = os.path.join(save_dir, 'encoding_model.pth.tar')
    if os.path.exists(encoder_state_dicts_path):
        load_res = adapt_load_model(encoder_state_dicts_path, map_location='cpu')
        encoder = load_res["model"]
    _actor.load_state_dict(state_dicts["actor"])
    
    return _actor, encoder


def encode(encoder, obs):
    if encoder:
        # encoder observation to state
        with torch.no_grad():
            encoded = encoder.reparameterize(*encoder.encode(obs))
    else:
        encoded = torch.flatten(obs).view(obs.size(0), -1)
        
    return encoded

In [3]:
save_dir = './results/checkpoints/BoxingNoFrameskip-v0'
cfg_dir = './local_exp_conf/mbpo_vae/'
model_tags = ['boxing_mlp_1',
              'boxing_convvae_1',
              'boxing_convbetavae_1',
              'boxing_convbetatcvae_1',
              'boxing_convfactorvae_1',
              'boxing_convsparsevae_1',
              'boxing_convjointvae_1']

saved_models = [os.path.join(save_dir, tag) for tag in model_tags]
model_cfgs = [os.path.join(cfg_dir, tag + '.yml') for tag in model_tags]
print(saved_models)
print(model_cfgs)
frame_interval = 0.1 / 3

['./results/checkpoints/BoxingNoFrameskip-v0\\boxing_mlp_1', './results/checkpoints/BoxingNoFrameskip-v0\\boxing_convvae_1', './results/checkpoints/BoxingNoFrameskip-v0\\boxing_convbetavae_1', './results/checkpoints/BoxingNoFrameskip-v0\\boxing_convbetatcvae_1', './results/checkpoints/BoxingNoFrameskip-v0\\boxing_convfactorvae_1', './results/checkpoints/BoxingNoFrameskip-v0\\boxing_convsparsevae_1', './results/checkpoints/BoxingNoFrameskip-v0\\boxing_convjointvae_1']
['./local_exp_conf/mbpo_vae/boxing_mlp_1.yml', './local_exp_conf/mbpo_vae/boxing_convvae_1.yml', './local_exp_conf/mbpo_vae/boxing_convbetavae_1.yml', './local_exp_conf/mbpo_vae/boxing_convbetatcvae_1.yml', './local_exp_conf/mbpo_vae/boxing_convfactorvae_1.yml', './local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml', './local_exp_conf/mbpo_vae/boxing_convjointvae_1.yml']


In [4]:
def set_up(seed, tar_idx, num_ep):
    # set seed
    set_seed(seed)

    cfg_file = model_cfgs[tar_idx]
    model_dir = saved_models[tar_idx]

    print(cfg_file)
    cfgs = load_config_from_files([cfg_file])
    rl_cfg = cfgs['rl']
    env_cfg = rl_cfg.env
    algos_cfg = rl_cfg.algos
    sac_cfg = algos_cfg['sac']
    encoding = rl_cfg.encoding_config is not None
    if encoding:
        encoding_cfg = rl_cfg.encoding_config
        encoding_model_cfg = encoding_cfg.model_config

    # results tmp dir
    log_dir = os.path.join('results', env_cfg.env_name, 'tmp', 'log')
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    # init env
    device = torch.device('cpu')
    eval_envs = make_vec_atari_envs(
        env_cfg.env_name, get_seed(), 1, None, log_dir,
        device, allow_early_resets=True, norm_reward=True)
    action_space = eval_envs.action_space
    # init state dimension
    if encoding:
        model_args = encoding_model_cfg.model_args
        state_dim = model_args['dim_z']
    else:
        state_dim = np.prod(eval_envs.observation_space.shape)
    # init actor
    _actor = Actor(state_dim, action_space, sac_cfg['actor_hidden_dims'],
                  state_normalizer=None, use_limited_entropy=False,
                  use_tanh_squash=True, use_state_dependent_std=True)

    _actor, _encoder = load_model(_actor, model_dir)
    _actor.eval()
    if _encoder:
        _encoder.eval()
        
    return eval_envs, _actor, _encoder

### ramdom baseline

In [5]:
def idle_eps(eval_envs, num_ep, render=False):
    # idle agent
    eval_episode_rewards = []
    eval_episode_lengths = []

    obs = eval_envs.reset()
    while len(eval_episode_rewards) < num_ep:
        if render:
            eval_envs.render()
            time.sleep(frame_interval)
        obs, _, _, infos = eval_envs.step(torch.tensor([[0]]))

        eval_episode_rewards.extend([info['episode']['r'] for info in infos if 'episode' in info])
        eval_episode_lengths.extend([info['episode']['l'] for info in infos if 'episode' in info])

    return eval_episode_rewards, eval_episode_lengths

In [6]:
def random_eps(eval_envs, num_ep, render=False):
    # random acting agent
    eval_episode_rewards = []
    eval_episode_lengths = []

    obs = eval_envs.reset()
    while len(eval_episode_rewards) < num_ep:
        if render:
            eval_envs.render()
            time.sleep(frame_interval)
        obs, _, _, infos = eval_envs.step(
            torch.tensor([[eval_envs.action_space.sample()]]))
    
        eval_episode_rewards.extend([info['episode']['r'] for info in infos if 'episode' in info])
        eval_episode_lengths.extend([info['episode']['l'] for info in infos if 'episode' in info])

    return eval_episode_rewards, eval_episode_lengths

In [7]:
def test_eps(eval_envs, num_ep, _actor, _encoder, render=False):
    eval_episode_rewards = []
    eval_episode_lengths = []

    obs = eval_envs.reset()
    states = encode(_encoder, obs)
    while len(eval_episode_rewards) < num_ep:
        if render:
            eval_envs.render()
            time.sleep(frame_interval)
    
        with torch.no_grad():
            actions = _actor.act(states, deterministic=True)['actions']
        
        if isinstance(eval_envs.action_space, Discrete):
            actions = torch.tensor(unwrap_onehot_to_discrete(actions.cpu()))
        
        obs, _, _, infos = eval_envs.step(actions)
        states = encode(_encoder, obs)
        eval_episode_rewards.extend([info['episode']['r'] for info in infos if 'episode' in info])
        eval_episode_lengths.extend([info['episode']['l'] for info in infos if 'episode' in info])
    
    return eval_episode_rewards, eval_episode_lengths

In [8]:
def test_atari(seed, tar_idx, num_ep, render=False):
    envs, _actor, _encoder = set_up(seed, tar_idx, num_ep)
    if tar_idx == 'idle':
        print('run idle')
        res = idle_eps(envs, num_ep, render)
    elif tar_idx == 'random':
        print('run random')
        res = random_eps(envs, num_ep, render)
    else:
        print('run {}th agent'.format(tar_idx))
        res = test_eps(envs, num_ep, _actor, _encoder, render)
        
    envs.close()
    return res

## run tests

In [10]:
# single render test
seed = 5
tar_idx = 5
num_ep = 5
render = True

test_atari(seed, tar_idx, num_ep, render)

./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent


([4.0, 0.0, 2.0, 0.0, -3.0], [1781, 1782, 1783, 1779, 1779])

In [33]:
# statistic
seeds_range = list(range(100))
random.shuffle(seeds_range)
seeds = seeds_range[:10]
num_ep = 10
num_models = len(model_cfgs)

rewards = []
lengths = []

for i in range(num_models):
    model_rew = []
    model_len = []
    for seed in seeds:
        rews, lens = test_atari(seed, i, num_ep, False)
        model_rew.append(np.mean(rews))
        model_len.append(np.mean(lens))
    rewards.append(model_rew)
    lengths.append(model_len)

./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_mlp_1.yml
run 0th agent
./local_exp_conf/mbpo_vae/boxing_convvae_1.yml
run 1th agent
./local_exp_conf/mbpo_vae/boxing_convvae_1.yml
run 1th agent
./local_exp_conf/mbpo_vae/boxing_convvae_1.yml
run 1th agent
./local_exp_conf/mbpo_vae/boxing_convvae_1.yml
run 1th agent
./local_exp_conf/mbpo_vae/boxing_convvae_1.yml
run 1th agent
./local_exp_conf/mbpo_vae/boxing_convvae_1.yml
run 1th agent
./local_exp_conf/mbpo_vae/boxing_convvae_1.yml
run 1th agent
./l



./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convsparsevae_1.yml
run 5th agent
./local_exp_conf/mbpo_vae/boxing_convjointvae_1.yml
run 6th agent
./local_exp_conf/mbpo_vae/boxing_convjointvae_1.yml
run 6th agent
./local_exp_conf/mbpo_vae/boxing_convjointvae_1.yml
run 6th agent
./local_exp_conf/mbpo_vae/boxing_convjointvae_1.yml
run 6th agent
./local_exp_conf/mbpo_vae/boxing_convjointvae_1.yml
run 6th agent
./local_exp_conf/mbpo_vae/boxing_convjointvae_1.yml
run 6th agent
.

NameError: name 'model_tagss' is not defined

In [40]:
import re
tags = [ re.findall("_(.+)_", mtag)[0] for mtag in model_tags ]

In [2]:
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib qt5

In [3]:
plt.title('Episode lengths', fontsize=20)
plt.boxplot(lengths, labels=tags)

NameError: name 'lengths' is not defined

In [42]:
plt.title('Rewards', fontsize=20)
plt.boxplot(rewards, labels=tags)

{'whiskers': [<matplotlib.lines.Line2D at 0x143764ef9b0>,
  <matplotlib.lines.Line2D at 0x143762f90f0>,
  <matplotlib.lines.Line2D at 0x14375b473c8>,
  <matplotlib.lines.Line2D at 0x14375b47e48>,
  <matplotlib.lines.Line2D at 0x14375fcb898>,
  <matplotlib.lines.Line2D at 0x14375cf8b00>,
  <matplotlib.lines.Line2D at 0x14375ceb2b0>,
  <matplotlib.lines.Line2D at 0x14375cdfeb8>,
  <matplotlib.lines.Line2D at 0x14375cd0208>,
  <matplotlib.lines.Line2D at 0x14375cc9f98>,
  <matplotlib.lines.Line2D at 0x143764f5c18>,
  <matplotlib.lines.Line2D at 0x143764f5240>,
  <matplotlib.lines.Line2D at 0x143741ae438>,
  <matplotlib.lines.Line2D at 0x14376eeea90>],
 'caps': [<matplotlib.lines.Line2D at 0x143762f99b0>,
  <matplotlib.lines.Line2D at 0x143762f9b38>,
  <matplotlib.lines.Line2D at 0x14375edf5f8>,
  <matplotlib.lines.Line2D at 0x14375edf4a8>,
  <matplotlib.lines.Line2D at 0x14375cf84a8>,
  <matplotlib.lines.Line2D at 0x14375cf3240>,
  <matplotlib.lines.Line2D at 0x14375cdfa58>,
  <matplotlib