In [1]:
import deep_rl

In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
import torch
from torch.nn import functional as F
from torch.autograd import Variable
from torch import nn, optim
import torch.utils.data

# load as dask array
import dask.array as da
import dask
import h5py

import os
import glob
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook as tqdm

  from ._conv import register_converters as _register_converters


In [4]:
from world_models_sonic.models.vae import VAE6, loss_function_vae
from world_models_sonic.helpers.summarize import TorchSummarizeDf
from world_models_sonic.helpers.dataset import load_cache_data
from world_models_sonic.custom_envs.wrappers import RenderWrapper, WorldModelWrapper
from world_models_sonic.models.rnn import MDNRNN2
from world_models_sonic.models.inverse_model import InverseModel
from world_models_sonic.models.world_model import WorldModel
from world_models_sonic import config
from world_models_sonic.custom_envs.env import make_env

Importing 0 potential games...
Imported 0 games


In [5]:
"""
In jupyter notebook simple logging to console
"""
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# Init

In [6]:
cuda= torch.cuda.is_available()
env_name='sonic256'
num_epochs=200
batch_size = 2

# VAE loss function
lambda_vae_kld = 0.25
C = 0
z_dim = 256 # latent dimensions

# RNN
action_dim = 10
seq_len = 5
image_size=256
chunksize=seq_len*20

verbose=True # Set this true to render (and make it go slower)


# loss function weights
lambda_vae = 1/100
lambda_finv = 1

data_cache_file = os.path.join(config.base_vae_data_dir, 'sonic_rnn_256_v30.hdf5')
NAME ='RNN_v3b_256im_512z_1512_v5_greenfield'
ppo_save_file = './outputs/models/PPO_greenfields_256z_v2.pkl'

# Load Data

# Load VAE

In [7]:
# Load VAE
# TODO swap z and k dim, since it's inconsistent with other models
vae = VAE6(image_size=image_size, z_dim=32, conv_dim=48, code_dim=8, k_dim=z_dim)
if cuda:
    vae.cuda()
    
# # Resume    
save_file = './outputs/models/{NAME}-vae_state_dict.pkl'.format(NAME=NAME)
if os.path.isfile(save_file):
    state_dict = torch.load(save_file)
    vae.load_state_dict(state_dict)
    print('loaded save_file {save_file}'.format(save_file=save_file))

loaded save_file ./outputs/models/RNN_v3b_256im_512z_1512_v5_greenfield-vae_state_dict.pkl


# Load RNN

In [8]:
# Load MDRNN
action_dim, hidden_size, n_mixture, temp = action_dim, 128, 3, 0.0


mdnrnn = MDNRNN2(z_dim, action_dim, hidden_size, n_mixture, temp)

if cuda:
    mdnrnn = mdnrnn.cuda()
    
# # Resume?
save_file = './outputs/models/{NAME}-mdnrnn_state_dict.pkl'.format(NAME=NAME)
if os.path.isfile(save_file):
    state_dict = torch.load(save_file)
    mdnrnn.load_state_dict(state_dict)
    print('loaded {save_file}'.format(save_file=save_file))

loaded ./outputs/models/RNN_v3b_256im_512z_1512_v5_greenfield-mdnrnn_state_dict.pkl


# FInverse Model

In [9]:
finv = InverseModel(z_dim, action_dim, hidden_size=256).cuda()
finv.eval()

# Resume?
save_file = './outputs/models/{NAME}-finv_state_dict.pkl'.format(NAME=NAME)
if os.path.isfile(save_file):
    state_dict = torch.load(save_file)
    finv.load_state_dict(state_dict)
    print('loaded {save_file}'.format(save_file=save_file))

loaded ./outputs/models/RNN_v3b_256im_512z_1512_v5_greenfield-finv_state_dict.pkl


# Init

In [10]:
world_model = WorldModel(vae, mdnrnn, finv)
world_model=world_model.eval()
world_model

WorldModel(
  (vae): VAE6(
    (logvar): Linear(in_features=2048, out_features=256, bias=True)
    (mu): Linear(in_features=2048, out_features=256, bias=True)
    (z): Linear(in_features=256, out_features=2048, bias=True)
    (encoder): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): LeakyReLU(negative_slope=0.01, inplace)
      )
      (1): ConvBlock5(
        (conv0): InceptionA(
          (branch1x1): BasicConv2d(
            (conv): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (bn): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act): LeakyReLU(negative_slope=0.01, inplace)
          )
          (branch5x5_1): BasicConv2d(
            (conv): Conv2d(48, 9, kernel_size=(1, 1), stride=(1, 1))
            (bn): BatchNorm2d(9, eps=1e-05, momentum=0.

# summarize

In [11]:
img = np.random.randn(image_size, image_size, 3)
action = np.array(np.random.randint(0,action_dim))[np.newaxis]
action = Variable(torch.from_numpy(action)).float().cuda()[np.newaxis]
gpu_img = Variable(torch.from_numpy(img[np.newaxis].transpose(0, 3, 1, 2))).float().cuda()
if cuda:
    gpu_img = gpu_img.cuda()
with TorchSummarizeDf(vae) as tdf:
    x, mu_vae, logvar_vae = vae.forward(gpu_img)
    z = vae.sample(mu_vae, logvar_vae)
    df_vae = tdf.make_df()

df_vae[df_vae.level<2]

Total parameters 8909862
Total trainable parameters 8909862


Unnamed: 0,name,class_name,input_shape,output_shape,nb_params,level
4,encoder.0,BasicConv2d,"[(-1, 3, 256, 256)]","[(-1, 48, 256, 256)]",1440,1
42,encoder.1,ConvBlock5,"[(-1, 48, 256, 256)]","[(-1, 96, 128, 128)]",93213,1
80,encoder.2,ConvBlock5,"[(-1, 96, 128, 128)]","[(-1, 144, 64, 64)]",281034,1
118,encoder.3,ConvBlock5,"[(-1, 144, 64, 64)]","[(-1, 192, 32, 32)]",566055,1
156,encoder.4,ConvBlock5,"[(-1, 192, 32, 32)]","[(-1, 240, 16, 16)]",948276,1
194,encoder.5,ConvBlock5,"[(-1, 240, 16, 16)]","[(-1, 288, 8, 8)]",1427697,1
232,encoder.6,ConvBlock5,"[(-1, 288, 8, 8)]","[(-1, 32, 8, 8)]",351550,1
233,mu,Linear,"[(-1, 2048)]","[(-1, 256)]",524544,0
234,logvar,Linear,"[(-1, 2048)]","[(-1, 256)]",524544,0
235,z,Linear,"[(-1, 256)]","[(-1, 2048)]",526336,0


In [12]:
with TorchSummarizeDf(mdnrnn) as tdf: 
    pi, mu, sigma, hidden_state = mdnrnn.forward(z.unsqueeze(1).repeat((1,2,1)), action.repeat((1,2)))
    z_next = mdnrnn.sample(pi, mu, sigma)
    df_mdnrnn = tdf.make_df()
    
df_mdnrnn

Total parameters 1778688
Total trainable parameters 1778688


Unnamed: 0,name,class_name,input_shape,output_shape,nb_params,level
1,rnn,LSTM,"[[(-1, 2, 266)], [[(-1, 1, 128)], [(-1, 1, 128...","[[(-1, 2, 128)], [[(-1, 1, 128)], [(-1, 1, 128...",202752,0
2,ln1,Linear,"[(-1, 128), (-1, 128)]","[(-1, 128), (-1, 128)]",16512,0
3,ln2,Linear,"[(-1, 128), (-1, 128)]","[(-1, 640), (-1, 640)]",82560,0
4,mdn,Linear,"[(-1, 640), (-1, 640)]","[(-1, 2304), (-1, 2304)]",1476864,0


In [13]:

with TorchSummarizeDf(finv) as tdf:
    action_pred = finv(z.repeat((1,2,1)), z_next)   
    df_finv = tdf.make_df()
df_finv

Total parameters 199690
Total trainable parameters 199690


Unnamed: 0,name,class_name,input_shape,output_shape,nb_params,level
1,ln1,Linear,"[(-1, 2, 512)]","[(-1, 2, 256)]",131328,0
2,ln2,Linear,"[(-1, 2, 256)]","[(-1, 2, 256)]",65792,0
3,ln3,Linear,"[(-1, 2, 256)]","[(-1, 2, 10)]",2570,0


In [14]:
with TorchSummarizeDf(world_model) as tdf:
    z_next, z, _ = world_model(gpu_img, action)
    z_next.shape
    df_world_model = tdf.make_df()
df_world_model[df_world_model.level<2]

Total parameters 10888240
Total trainable parameters 10888240


Unnamed: 0,name,class_name,input_shape,output_shape,nb_params,level
233,vae.mu,Linear,"[(-1, 2048)]","[(-1, 256)]",524544,1
234,vae.logvar,Linear,"[(-1, 2048)]","[(-1, 256)]",524544,1
235,vae.z,Linear,"[(-1, 256)]","[(-1, 2048)]",526336,1
465,vae.sigmoid,Sigmoid,"[(-1, 3, 256, 256)]","[(-1, 3, 256, 256)]",0,1
466,mdnrnn.rnn,LSTM,"[[(-1, 1, 266)], [[(-1, 1, 128)], [(-1, 1, 128...","[[(-1, 1, 128)], [[(-1, 1, 128)], [(-1, 1, 128...",202752,1
467,mdnrnn.ln1,Linear,"[(-1, 128)]","[(-1, 128)]",16512,1
468,mdnrnn.ln2,Linear,"[(-1, 128)]","[(-1, 640)]",82560,1
469,mdnrnn.mdn,Linear,"[(-1, 640)]","[(-1, 2304)]",1476864,1


In [15]:
del img, action, gpu_img, x, mu, z

# Env wrappers

In [16]:
from deep_rl.utils import Config
from deep_rl.utils.logger import get_logger, get_default_log_dir

from deep_rl.network.network_heads import CategoricalActorCriticNet, QuantileNet, OptionCriticNet, DeterministicActorCriticNet, GaussianActorCriticNet
from deep_rl.network.network_bodies import FCBody

from deep_rl.agent.PPO_agent import PPOAgent
from deep_rl.component.task import ParallelizedTask, BaseTask
from deep_rl.utils.misc import run_episodes, run_iterations

In [17]:
class SonicWorldModelDeepRL(BaseTask):
    """Sonic environment wrapper for deep_rl."""
    def __init__(self, name='sonic256', max_steps=4500, log_dir=None, world_model_func=None, state=None, game=None):
        BaseTask.__init__(self)
        self.name = name
        self.world_model = world_model_func()
        self.env = WorldModelWrapper(make_env(self.name, state=state, game=game), self.world_model)
        self.env._max_episode_steps = max_steps
        self.action_dim = self.env.action_space.n
        self.state_dim = self.env.observation_space.shape[0]
        self.env = self.set_monitor(self.env, log_dir)
        if verbose:
            self.env = RenderWrapper(self.env, mode='world_model') # world_model_next



# Train

In [18]:
log_dir = get_default_log_dir(ppo_save_file)
log_dir
task_fn = lambda log_dir: SonicWorldModelDeepRL(
    'sonic256', 
    max_steps=200, 
    log_dir=log_dir, 
    world_model_func=lambda :world_model,
    state='GreenHillZone.Act1',
    game='SonicTheHedgehog-Genesis'
)

config = Config()

config.num_workers = 1
config.task_fn = lambda: ParallelizedTask(task_fn, config.num_workers, single_process=config.num_workers==1)
config.optimizer_fn = lambda params: torch.optim.RMSprop(params, 1e-3)
config.network_fn = lambda state_dim, action_dim: CategoricalActorCriticNet(
        state_dim, action_dim, FCBody(state_dim), gpu=-1)
config.discount = 0.99
config.logger = get_logger(NAME, level=20)
config.use_gae = True
config.gae_tau = 0.95
config.entropy_weight = 0.01
config.gradient_clip = 0.5
config.rollout_length = 128
config.optimization_epochs = 10
config.num_mini_batches = 4
config.ppo_ratio_clip = 0.2
config.iteration_log_interval = 1
agent=PPOAgent(config)
env = agent.task.tasks[0].env
if os.path.isfile(ppo_save_file):
    print('loading', ppo_save_file)
    agent.load(ppo_save_file)

game: SonicTheHedgehog-Genesis state: GreenHillZone.Act1


AttributeError: 'Tensor' object has no attribute 'detatch'

In [None]:
try:
    run_iterations(agent)
except:
    agent.task.tasks[0].env.close()
    raise

In [None]:
%debug

# Debug

In [None]:

task = task_fn(log_dir)
try:
    task.env.reset()
    task.env.render()
    for i in tqdm(range(100)):
        action = task.env.action_space.sample()
        task.env.step(action)
        task.env.unwrapped.step(action)
        task.env.render()
except:
    task.env.unwrapped.close()
    raise

In [None]:
task.env.unwrapped.close()

In [None]:
env = make_env('sonic256')
env.reset()
env.render()
for i in tqdm(range(1000)):
    action = env.action_space.sample()
    env.step(action)
    env.render()

In [None]:
env.close()

In [None]:
# task_fn = lambda log_dir: SonicWorldModelDeepRL('sonic256', max_steps=200, log_dir=log_dir, world_model_func=lambda :world_model)
# task = task_fn(log_dir)
# try:
#     task.env.reset()
#     task.env.render()
#     for i in tqdm(range(100)):
#         action = task.env.action_space.sample()
#         task.env.step(action)
#         task.env.unwrapped.step(action)
#         task.env.render()
# except:
#     task.env.unwrapped.close()
#     raise
# else:
#     task.env.unwrapped.close()

In [None]:
# this version renders all in one window
# import cv2
# import gym
# from gym import spaces

# # def np_sigmoid(x):
# #     return 1.0/(1.0+np.exp(-x))

# # class Action2Int(gym.ActionWrapper):
# #     def action(self, action):
# #         return np_sigmoid(action).round(0).astype(int)
    
# class RenderWrapper(gym.Wrapper):
#     def __init__(self, env, mode='world_model'):
#         """Uses the world model to give next latent state as observation."""
#         super().__init__(env)
#         self.mode = mode
        
#     def step(self, action):
#         observation, reward, done, info = self.env.step(action)
#         self.render(mode=self.mode)
#         return observation, reward, done, info
    
#     def reset(self):
#         observation = self.env.reset()
#         self.render(mode=self.mode)
#         return observation

# class WorldModelWrapper(gym.Wrapper):
#     def __init__(self, env, world_model):
#         """Uses the world model to give next latent state as observation."""
#         super().__init__(env)
#         self.world_model = world_model
#         self.img_z = None
#         self.img_z_next_pred = None
#         old_obs_space = env.observation_space
#         self.observation_space = spaces.Box(low=-1000, high=1000,
#                                             shape=(world_model.mdnrnn.z_dim,), dtype=np.float32)
        
#     def step(self, action):
#         action = action.round(0).astype(int)
        
#         observation, reward, done, info = self.env.step(action)
        
#         action = torch.from_numpy(np.array(action)).cuda().unsqueeze(0).unsqueeze(0)
#         observation = torch.from_numpy(observation).cuda().unsqueeze(0).transpose(1, 3)
        
#         z_next, z, h = self.world_model.forward(observation, action)
#         z = z.squeeze(0).cpu().data.numpy()
#         z_next = z_next.squeeze(0).cpu().data.numpy()
        
#         self.z = z
#         self.z_next = z_next
        
#         return z_next, reward, done, info
    
#     def reset(self):
#         action = torch.from_numpy(np.array(self.env.action_space.sample())).cuda().unsqueeze(0).unsqueeze(0)
#         observation = torch.from_numpy(self.env.reset()).cuda().unsqueeze(0).transpose(1, 3)
#         z_next, z, h = self.world_model.forward(observation, action)
#         z = z.squeeze(0).cpu().data.numpy()
#         z_next = z_next.squeeze(0).cpu().data.numpy()
#         self.z = z
#         self.z_next = z_next
#         return z_next
    
#     def render(self, mode='human', close=False):
#         env = self.env.unwrapped
#         if close:
#             for viewer in [self.viewer_z, self.viewer_z_next, self.viewer_img_z, self.viewer_img_z_next]:
#                 viewer.close()
#             if env.viewer:
#                 env.viewer.close()
#             return  
#         if mode=='world_model':
#             if env.viewer is None:
#                 from gym.envs.classic_control.rendering import SimpleImageViewer
#                 import pyglet
                
#                 env.viewer = SimpleImageViewer()
#                 env.viewer.window = pyglet.window.Window(width=256+256+320, height=256+256, vsync=False, resizable=True, caption='Game output')
# #                 env.viewer.window.set_location(0,0)

                
#             # to pytorch
#             zv = torch.from_numpy(self.z)[None, :]
#             zv_next = torch.from_numpy(self.z_next)[None, :]
#             if cuda:
#                 zv = zv.cuda()
#                 zv_next = zv_next.cuda()
            
#             # Decode
#             img_z = self.world_model.vae.decode(zv)
#             img_z = img_z.squeeze(0).transpose(0,2)
#             img_z = img_z.data.cpu().numpy()
#             img_z = (img_z*255).astype(np.uint8)
            
#             img_z_next = self.world_model.vae.decode(zv_next)
#             img_z_next = img_z_next.squeeze(0).transpose(0,2)
#             img_z_next = img_z_next.data.cpu().numpy()
#             img_z_next = (img_z_next*255).astype(np.uint8)
            
#             z_uint8 = ((self.z+0.5)*255).astype(np.uint8).reshape((16,16,1))
#             z_next_uint8 = ((self.z_next+0.5)*255).astype(np.uint8).reshape((16,16,1))
#             z_uint8 = cv2.resize(z_uint8, dsize=(z_dim, z_dim), interpolation=cv2.INTER_CUBIC)
#             z_next_uint8 = cv2.resize(z_next_uint8, dsize=(z_dim, z_dim), interpolation=cv2.INTER_CUBIC)
            
#             img = np.zeros((256+256+320, z_dim*3,3)).astype(np.uint8)
            
#             w1, h1, _ = env.img.shape
#             img[:w1, :h1]=env.img
#             img[z_dim*0+w1:z_dim*1+w1, :z_dim]=img_z
#             img[z_dim*1+w1:z_dim*2+w1, :z_dim]=img_z_next
            
#             # I need to resize these, then expand the amound of pixels each covers
#             img[z_dim*0+w1:z_dim*1+w1,h1:z_dim*1+h1]=z_uint8[:,:,None]
#             img[z_dim*1+w1:z_dim*2+w1,h1:z_dim*1+h1]=z_next_uint8[:,:,None]
#             env.viewer.imshow(img)
            
#             return env.viewer.isopen
#         return self.env.render(mode=mode, close=close)
              

# class SonicWorldModelDeepRL(BaseTask):
#     def __init__(self, name='sonic256', max_steps=4500, log_dir=None, world_model_func=None, state=None, game=None):
#         BaseTask.__init__(self)
#         self.name = name
#         self.world_model = world_model_func()
#         self.env = WorldModelWrapper(make_env(self.name, state=state, game=game), self.world_model)
#         self.env._max_episode_steps = max_steps
#         self.action_dim = self.env.action_space.n
#         self.state_dim = self.env.observation_space.shape[0]
#         self.env = self.set_monitor(self.env, log_dir)
#         if verbose:
#             self.env = RenderWrapper(self.env, mode='world_model') # world_model_next