<a href="https://colab.research.google.com/github/yasu-k2/adapt-rl/blob/master/adapt_rl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Adapt RL

## Requirements

In [None]:
!sudo apt update
!sudo apt install -y xvfb
!sudo apt install -y python-opengl
!sudo apt install -y libopencv-dev

In [None]:
!pip install matplotlib
!pip install tensorflow
!pip install torch torchvision
!pip install gym
!pip install pybullet
!pip install ray[rllib]

In [None]:
# fix
#!cat -n /usr/local/lib/python3.6/dist-packages/torch/serialization.py
!sed -i -e "486s/None/'cpu'/" /usr/local/lib/python3.6/dist-packages/torch/serialization.py

In [None]:
!git clone https://github.com/ray-project/ray.git
!mkdir ./ray/logdir ./ray/tmp

In [None]:
%cd ./ray

In [None]:
!nvidia-smi

## Function Definition

In [None]:
import copy

import matplotlib.pyplot as plt
from IPython.display import HTML
import moviepy.editor as mpy
from base64 import b64encode

import gym
import ray
from ray import tune
from ray.tune.registry import register_env
from ray.rllib.agents import ppo

plt.rcParams["animation.embed_limit"] = 200.0

In [None]:
def save_video(frames, path):
    clip = mpy.ImageSequenceClip(frames, fps=60)
    clip.write_videofile(path, fps=60)

def play_mp4(path):
    mp4 = open(path, 'rb').read()
    url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML("""<video width=400 controls><source src="%s" type="video/mp4"></video>""" % url)

In [None]:
class HumanoidBulletEnv_alt(gym.Env):
    import pybullet_envs
    def __init__(self, env_config):
        env = gym.make("HumanoidBulletEnv-v0")
        self.env = env
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.hack = env_config["hack"]
    def reset(self):
        return self.env.reset()
    def step(self, action):
        if self.hack:
            # Hack 17 DOF
            action[10] = 0.0
        return self.env.step(action)
    def render(self, mode="rgb_array"):
        return self.env.render(mode)
    def close(self):
        self.env.close()

In [None]:
def ppo_humanoid_train(config, stop, chk_dir=None, log_dir='./logdir'):
    if chk_dir is None:
        results = tune.run("PPO", config=config, stop=stop, local_dir=log_dir, checkpoint_at_end=True, checkpoint_freq=100)
    else:
        results = tune.run(ppo.PPOTrainer, config=config, stop=stop, local_dir=log_dir, checkpoint_at_end=True, checkpoint_freq=10, restore=chk_dir)
    checkpoints = results.get_trial_checkpoints_paths(trial=results.get_best_trial("episode_reward_mean"), metric="episode_reward_mean")
    print(checkpoints)
    return checkpoints

In [None]:
def ppo_humanoid_test(config, chk_dir):
    agent = ppo.PPOTrainer(config=config, env="HumanoidBulletEnv_alt")
    agent.restore(chk_dir)
    env_config = config["env_config"]
    env = HumanoidBulletEnv_alt(env_config)
    episode_reward = 0.0
    done = False
    frames = []
    obs = env.reset()
    while not done:
        frame = env.render()
        frames.append(frame)
        action = agent.compute_action(obs)
        obs, reward, done, info = env.step(action)
        episode_reward += reward
    print("Episode Reward:", episode_reward)
    save_video(frames, "./tmp/sample.mp4")
    env.close()

## Configuration

In [None]:
env_config = {"hack": False}

config = {
    "env": "HumanoidBulletEnv_alt",
    "env_config": env_config,
    "model": {
        "free_log_std": True,
    },
    "lr": .0001,
    "framework": "torch",
    "gamma": 0.995,
    "lambda": 0.95,
    "clip_param": 0.2,
    "kl_coeff": 1.0,
    "num_sgd_iter": 20,
    "sgd_minibatch_size": 32768,
    "horizon": 5000,
    "train_batch_size": 320000,
    "num_workers": 16,
    "num_gpus": 4,
    "batch_mode": "complete_episodes",
    "observation_filter": "MeanStdFilter",
}

stop = {
    "episode_reward_mean": 2000,
}

In [None]:
# reduced num_workers, num_gpus
config["num_workers"] = 1
config["num_gpus"] = 1

# reduced episode_reward_mean
stop["episode_reward_mean"] = 1000

In [None]:
config_altenv = copy.deepcopy(config)
config_altenv["env_config"]["hack"] = True
stop_altenv = copy.deepcopy(stop)

In [None]:
config_control = copy.deepcopy(config)
config_control["env_config"]["hack"] = True
stop_control = copy.deepcopy(stop)

## Execution

In [None]:
ray.init()
register_env("HumanoidBulletEnv_alt", lambda config: HumanoidBulletEnv_alt(config))

In [None]:
chks = ppo_humanoid_train(config, stop)

In [None]:
#chk_dir = '/content/ray/logdir/PPO/PPO_HumanoidBulletEnv_alt_ID/checkpoint_No/checkpoint-No'
chk_dir = ''
ppo_humanoid_test(config, chk_dir)
play_mp4("./tmp/sample.mp4")

In [None]:
chks_altenv = ppo_humanoid_train(config_altenv, stop_altenv, chk_dir=chk_dir)

In [None]:
#chk_dir_altenv = '/content/ray/logdir/PPO/PPO_HumanoidBulletEnv_alt_ID/checkpoint_No/checkpoint-No'
chk_dir_altenv = ''
ppo_humanoid_test(config_altenv, chk_dir_altenv)
play_mp4("./tmp/sample.mp4")

In [None]:
chks_control = ppo_humanoid_train(config_control, stop_control)

In [None]:
#chk_dir_control = '/content/ray/logdir/PPO/PPO_HumanoidBulletEnv_alt_ID/checkpoint_No/checkpoint-No'
chk_dir_control = ''
ppo_humanoid_test(config_control, chk_dir_control)
play_mp4("./tmp/sample.mp4")

In [None]:
ray.shutdown()