<a href="https://colab.research.google.com/github/zzmtsvv/rl_task/blob/main/inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Include this at the top of your colab code
import os
if not os.path.exists('.mujoco_setup_complete'):
  # Get the prereqs
  !apt-get -qq update
  !apt-get -qq install -y libosmesa6-dev libgl1-mesa-glx libglfw3 libgl1-mesa-dev libglew-dev patchelf
  # Get Mujoco
  !mkdir ~/.mujoco
  !wget -q https://mujoco.org/download/mujoco210-linux-x86_64.tar.gz -O mujoco.tar.gz
  !tar -zxf mujoco.tar.gz -C "$HOME/.mujoco"
  !rm mujoco.tar.gz
  # Add it to the actively loaded path and the bashrc path (these only do so much)
  !echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc 
  !echo 'export LD_PRELOAD=$LD_PRELOAD:/usr/lib/x86_64-linux-gnu/libGLEW.so' >> ~/.bashrc 
  # THE ANNOYING ONE, FORCE IT INTO LDCONFIG SO WE ACTUALLY GET ACCESS TO IT THIS SESSION
  !echo "/root/.mujoco/mujoco210/bin" > /etc/ld.so.conf.d/mujoco_ld_lib_path.conf
  !ldconfig
  # Install Mujoco-py
  !pip3 install -U 'mujoco-py<2.2,>=2.1'
  # run once
  !touch .mujoco_setup_complete

try:
  if _mujoco_run_once:
    pass
except NameError:
  _mujoco_run_once = False
if not _mujoco_run_once:
  # Add it to the actively loaded path and the bashrc path (these only do so much)
  try:
    os.environ['LD_LIBRARY_PATH']=os.environ['LD_LIBRARY_PATH'] + ':/root/.mujoco/mujoco210/bin'
    os.environ['LD_LIBRARY_PATH']=os.environ['LD_LIBRARY_PATH'] + ':/usr/lib/nvidia'
  except KeyError:
    os.environ['LD_LIBRARY_PATH']='/root/.mujoco/mujoco210/bin'
  try:
    os.environ['LD_PRELOAD']=os.environ['LD_PRELOAD'] + ':/usr/lib/x86_64-linux-gnu/libGLEW.so'
  except KeyError:
    os.environ['LD_PRELOAD']='/usr/lib/x86_64-linux-gnu/libGLEW.so'
  # presetup so we don't see output on first env initialization
  import mujoco_py
  _mujoco_run_once = True

In [None]:
!pip install git+https://github.com/tinkoff-ai/d4rl@master#egg=d4rl
!git clone https://github.com/zzmtsvv/rl_task.git

In [None]:
import torch
from dataclasses import dataclass
import os
import numpy as np
import gym
import random
from imageio import mimsave
from tqdm import trange
import d4rl
from rl_task.spot.spot_ import SPOT
from rl_task.adaptive_bc.redq_bc import RandomizedEnsembles_BC
from rl_task.spot.vae import ConditionalVAE

In [13]:
@dataclass
class inference_config:
    model_type: str = "spot"  # [ redq_bc spot ]
    env: str = "halfcheetah"  # hopper halfcheetah walker2d
    pretrain_dataset: str = "medium"  # [ medium mediun-replay ]
    video_dir: str = "video"
    save_video: bool = True
    seed: int = 100
    device_str: str = "cuda" if torch.cuda.is_available() else "cpu"
    device = torch.device(device_str)

cfg = inference_config()


possible_variants = [
    "spot_halfcheetah_medium",
    "spot_hopper_medium-replay",
    "spot_hopper_medium",
    "spot_walker2d_medium",
    "redq_bc_halfcheetah_medium-replay",
    "redq_bc_halfcheetah_medium",
    "redq_bc_hopper_medium-replay",
    "redq_bc_hopper_medium",
    "redq_bc_walker2d_medium-replay"
]

assert f"{cfg.model_type}_{cfg.env}_{cfg.pretrain_dataset}" in possible_variants, "No ready model for a given combination"

In [None]:
!rm -rf video

In [None]:
def make_dir(dir_path):
    try:
        os.mkdir(dir_path)
    except OSError:
        pass
    return dir_path


make_dir(cfg.video_dir)

In [6]:
class VideoRecorder:
    def __init__(self, dir_name, height=512, width=512, camera_id=0, fps=60):
        self.dir_name = dir_name
        self.height = height
        self.width = width
        self.camera_id = camera_id
        self.fps = fps
        self.frames = []

    def init(self, enabled=True):
        self.frames = []
        self.enabled = self.dir_name is not None and enabled

    def record(self, env: gym.Env):
        if self.enabled:
            frame = env.render(
                mode='rgb_array',
                height=self.height,
                width=self.width,
                # camera_id=self.camera_id
            )
            self.frames.append(frame)

    def save(self, file_name):
        if self.enabled:
            path = os.path.join(self.dir_name, file_name)
            mimsave(path, self.frames, fps=self.fps)

In [7]:
def eval_policy(cfg: inference_config,
                iteration: int,
                recorder: VideoRecorder,
                policy,
                env_name: str,
                seed: int,
                mean: np.ndarray,
                std: np.ndarray,
                logger=None,
                eval_episodes: int = 10):
    env = gym.make(env_name)
    env.seed(seed + 100)

    lengths, returns, last_rewards = [], [], []
    average_reward = 0.0

    for episode in trange(eval_episodes):
        recorder.init(enabled=cfg.save_video)
        state, done = env.reset(), False
        
        recorder.record(env)
        steps = 0
        episode_return = 0

        while not done:
            state = (np.array(state).reshape(1, -1) - mean) / std
            action = policy.act(state)

            state, reward, done, _ = env.step(action)
            recorder.record(env)

            average_reward += reward
            episode_return += reward
            steps += 1

        lengths.append(steps)
        returns.append(episode_return)
        last_rewards.append(reward)
        recorder.save(f"evaluation_{iteration}_episode{episode}_return_{episode_return}.mp4")
    
    average_reward /= eval_episodes
    d4rl_score = env.get_normalized_score(average_reward)

    if logger is not None:
        logger.log('eval/lengths_mean', np.mean(lengths), iteration)
        logger.log('eval/lengths_std', np.std(lengths), iteration)
        logger.log('eval/returns_mean', np.mean(returns), iteration)
        logger.log('eval/returns_std', np.std(returns), iteration)
        logger.log('eval/d4rl_score', d4rl_score, iteration)
    
    return d4rl_score

In [None]:
folder_path = "/content/rl_task/online_weights"
env = gym.make(f"{cfg.env}-{cfg.pretrain_dataset}-v0")
random.seed(cfg.seed)
np.random.seed(cfg.seed)
torch.manual_seed(cfg.seed)

env.seed(cfg.seed)
env.action_space.seed(cfg.seed)
env.observation_space.seed(cfg.seed)

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0] 
max_action = float(env.action_space.high[0])


if cfg.model_type == "redq_bc":
    pattern = f"{cfg.model_type}_{cfg.env}-{cfg.pretrain_dataset}-v0_42"
    filename = os.path.join(folder_path, pattern)

    policy = RandomizedEnsembles_BC(state_dim, action_dim, max_action)
    policy.load(filename)

else:
    policy = SPOT(ConditionalVAE(1, 2, 3), state_dim, action_dim, max_action)

    model_dir = f"{cfg.model_type}_{cfg.env}_{cfg.pretrain_dataset}"
    model_dir = os.path.join(folder_path, model_dir)
    policy.load(model_dir)

video_recorder = VideoRecorder(cfg.video_dir)

In [19]:
score = eval_policy(cfg,
                    0,
                    video_recorder,
                    policy,
                    f"{cfg.env}-{cfg.pretrain_dataset}-v0",
                    cfg.seed,
                    0,
                    1)

100%|██████████| 10/10 [09:55<00:00, 59.51s/it]
