```py
env = 'myoChallengeBimanual-v0' or 'myoChallengeRunTrackP1-v0'
```

In [None]:
import myosuite
print(f"version: {myosuite.__version__}")

# 1. Install Dependencies & Define Functions

In [1]:
# !pip install myosuite==2.5.0 --quiet
!pip install "stable-baselines3[extra]" --quiet
!pip install tqdm --quiet
!pip install mujoco==3.1.2 --quiet
!pip install sk-video --quiet
!pip install torch --quiet
!pip install opencv-python-headless
# %env MUJOCO_GL=egl



In [2]:
import os
import time
from datetime import datetime
import numpy as np
from tqdm import tqdm_notebook as tqdm
from IPython.display import HTML
from base64 import b64encode
import skvideo.io
import cv2

import mujoco
import myosuite
from myosuite.utils import gym
import torch
from stable_baselines3 import PPO, SAC
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv # SubprocVecEnv is for pararrrel processing, DummyVecEnv is for sequential processing
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, BaseCallback, CheckpointCallback, StopTrainingOnRewardThreshold
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure

pygame 2.6.0 (SDL 2.28.4, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
MyoSuite:> Registering Myo Envs


  File "/home/ta747375ki/myosuite/venv/lib/python3.10/site-packages/gymnasium/envs/registration.py", line 594, in load_plugin_envs
    fn()
  File "/home/ta747375ki/myosuite/venv/lib/python3.10/site-packages/shimmy/registration.py", line 303, in register_gymnasium_envs
    _register_dm_control_envs()
  File "/home/ta747375ki/myosuite/venv/lib/python3.10/site-packages/shimmy/registration.py", line 63, in _register_dm_control_envs
    from shimmy.dm_control_compatibility import DmControlCompatibilityV0
  File "/home/ta747375ki/myosuite/venv/lib/python3.10/site-packages/shimmy/dm_control_compatibility.py", line 20, in <module>
    from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
  File "/home/ta747375ki/myosuite/venv/lib/python3.10/site-packages/gymnasium/envs/mujoco/mujoco_rendering.py", line 7, in <module>
    import imageio
ModuleNotFoundError: No module named 'imageio'
[0m
  logger.warn(f"plugin: {plugin.value} raised {traceback.format_exc()}")


In [3]:
def show_video(video_path, video_width = 400):
    video_file = open(video_path, "r+b").read()
    video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
    return HTML(f"""<video autoplay width={video_width} controls><source src="{video_url}"></video>""")

def make_env(env_name, idx, seed=0):
    def _init():
        env = gym.make(env_name)
        env.seed(seed + idx)
        return env
    return _init

# 2. Train Model

## 2.A. PPO

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"using {device}")


# Using time to define the unique naming
start_time = time.time()
time_now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

# Initiate N parallel envs and create them
num_cpu = 4
env_name = 'myoChallengeBimanual-v0'
envs = DummyVecEnv([make_env(env_name, i) for i in range(num_cpu)]) # This creates all envs in RAM

# Define your own log path
log_path = './MPL_baselines/policy_best_model/' + env_name + '/' + time_now + '/'

# Define how frequent you want to evaluate the model, where it is logged
eval_callback = EvalCallback(envs, best_model_save_path=log_path, log_path=log_path, eval_freq=10000, deterministic=True, render=False)

print('Begin training')

# Define your policy parameters based on your need.
policy_kwargs = {
'activation_fn': torch.nn.modules.activation.ReLU,
'net_arch': {'pi': [256, 256], 'vf': [256, 256]}
}

#start the training with PPO
model = PPO('MlpPolicy', envs, verbose=0, ent_coef= 0.001, policy_kwargs =policy_kwargs, device=device, batch_size=512)
callback = CallbackList([eval_callback])
model.learn(total_timesteps=100000, tb_log_name=env_name + "_" + time_now, callback=callback)

False
[36m    MyoSuite: A contact-rich simulation suite for musculoskeletal motor control
        Vittorio Caggiano, Huawei Wang, Guillaume Durandau, Massimo Sartori, Vikash Kumar
        L4DC-2019 | https://sites.google.com/view/myosuite
    [0m


  logger.warn(


Begin training


  logger.warn(f"{pre} is not within the observation space.")


Eval num_timesteps=40000, episode_reward=-70.77 +/- 121.24
Episode length: 250.00 +/- 0.00
New best mean reward!
Eval num_timesteps=80000, episode_reward=-15.00 +/- 314.86
Episode length: 250.00 +/- 0.00
New best mean reward!


<stable_baselines3.ppo.ppo.PPO at 0x7f376594ffa0>

# Visualize

In [None]:
# Render trained policy
frames = []
env = gym.make(env_name)
for _ in range(5): # 5 random targets
    print("\n*")
    env.reset()
    ep_rewards = []
    done = False
    obs = env.reset()
    for i in range(1, 201):
        if i%10 == 0: print("=", end="")
        obs = env.obsdict2obsvec(env.obs_dict, env.obs_keys)[1]
        # get the next action from the policy
        action, _ = model.predict(obs, deterministic=True)
        geom_1_indices = np.where(env.sim.model.geom_group == 1)
        env.sim.model.geom_rgba[geom_1_indices, 3] = 0
        frame = env.sim.renderer.render_offscreen(
                            width=800,
                            height=800,
                            camera_id=1)
        frames.append(frame)
        # take an action based on the current observation
        obs, reward, done, info, _ = env.step(action)

env.close()