In [None]:
%%capture
!apt install -y python3-opengl
!apt install -y ffmpeg
!apt install -y xvfb
!pip3 install pyvirtualdisplay

In [None]:
from pyvirtualdisplay import Display

display = Display(visible=0, size=(1400, 900))
display.start()

<pyvirtualdisplay.display.Display at 0x780d75dfec80>

In [None]:
%pip install gymnasium[mujoco] stable-baselines3

clear_output()

# Task

We will use the SAC algorithm to train the **walker** environment.

You can implement it yourself or use the Stablebaselines3 version.

Walker environment consists of a structure of legs and the agent's actions can move the joints. The goal is to make the structure able to walk.

You can see more about the actions, observations and rewards [here](https://gymnasium.farama.org/environments/mujoco/walker2d/)

![Walker Image](https://gymnasium.farama.org/_images/walker2d.gif)


In [None]:
import numpy as np

import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.noise import NormalActionNoise

from IPython.display import clear_output

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

### Creating the environment

In [None]:
make_env = lambda: gym.make('Walker2d-v4', render_mode='rgb_array')
env = DummyVecEnv([make_env for _ in range(4)])  # adjust accoring to available ram
num_actions = env.action_space.shape[0]

  and should_run_async(code)


In [None]:
noise_mean = np.array([0.0] * num_actions)
noise_std = np.array([0.1] * num_actions)

### Training the model

In [None]:
model = SAC("MlpPolicy",
            env,
            verbose=1,
            action_noise=NormalActionNoise(noise_mean, noise_std),  # noise for exploration
            learning_rate=1e-3)

Using cuda device


  and should_run_async(code)


In [None]:
clear_output()
model.learn(total_timesteps=int(1e6), progress_bar=True)

Output()

-----------------------------
| time/              |      |
|    episodes        | 4    |
|    fps             | 1669 |
|    time_elapsed    | 0    |
|    total_timesteps | 104  |
-----------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 565      |
|    time_elapsed    | 0        |
|    total_timesteps | 164      |
| train/             |          |
|    actor_loss      | -7.06    |
|    critic_loss     | 3.15     |
|    ent_coef        | 0.986    |
|    ent_coef_loss   | -0.139   |
|    learning_rate   | 0.001    |
|    n_updates       | 15       |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 12       |
|    fps             | 433      |
|    time_elapsed    | 0        |
|    total_timesteps | 228      |
| train/             |          |
|    actor_loss      | -7.88    |
|    critic_loss     | 2.63     |
|    ent

KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


In [None]:
model.save('/content/gdrive/MyDrive/Colab Notebooks/rl_summer_school/models/sac.zip')

  and should_run_async(code)


In [None]:
model.load('/content/gdrive/MyDrive/Colab Notebooks/rl_summer_school/models/sac.zip')

<stable_baselines3.sac.sac.SAC at 0x780d8fc27cd0>

### Evaluating and visualizing the performance of trained model

In [None]:
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward:.2f}")



Mean reward: 823.06


In [None]:
t_env = DummyVecEnv([lambda: gym.make('Walker2d-v4', render_mode="rgb_array")])
state = t_env.reset()
frames = []

while True:
    action, _ = model.predict(state)
    state_next, r, done, info = t_env.step(action)
    frames.append(t_env.render())
    state = state_next
    if done.all():
        break

t_env.close()

In [None]:
def frames_to_video(frames, fps=24):
    fig = plt.figure(figsize=(frames[0].shape[1] / 100, frames[0].shape[0] / 100), dpi=100)
    ax = plt.axes()
    ax.set_axis_off()

    if len(frames[0].shape) == 2:  # Grayscale image
        im = ax.imshow(frames[0], cmap='gray')
    else:  # Color image
        im = ax.imshow(frames[0])

    def init():
        if len(frames[0].shape) == 2:
            im.set_data(frames[0], cmap='gray')
        else:
            im.set_data(frames[0])
        return im,

    def update(frame):
        if len(frames[frame].shape) == 2:
            im.set_data(frames[frame], cmap='gray')
        else:
            im.set_data(frames[frame])
        return im,

    interval = 1000 / fps
    anim = FuncAnimation(fig, update, frames=len(frames), init_func=init, blit=True, interval=interval)
    plt.close()
    return HTML(anim.to_html5_video())

frames_to_video(frames)