In [9]:
import os
import time
import random
import itertools
import subprocess
import distutils.util

import numpy as np
import imageio
import gymnasium as gym
import robosuite as suite
from robosuite.wrappers import GymWrapper


In [None]:
# For Ubuntu
# Configure MuJoCo to use the EGL rendering backend (requires GPU)
print('Setting environment variable to use GPU rendering:')
%env MUJOCO_GL=egl

# For mac
os.environ["MUJOCO_GL"] = "cgl"   # macOS

# Check if installation was succesful.
try:
  print('Checking that the installation succeeded:')
  import mujoco
  mujoco.MjModel.from_xml_string('<mujoco/>')
except Exception as e:
  raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')

print('Installation successful.')



# Graphics and plotting.
print('Installing mediapy:')
!command -v ffmpeg >/dev/null || (apt update && apt install -y ffmpeg)
!pip install -q mediapy
import mediapy as media
import matplotlib.pyplot as plt

# More legible printing from numpy.
np.set_printoptions(precision=3, suppress=True, linewidth=100)

from IPython.display import clear_output
clear_output()

# For macOS

In [None]:

# Create environment with Franka rob1t
env = suite.make(
    env_name="Lift",
    robots="Panda",
    has_renderer=False,
    has_offscreen_renderer=True,
    use_camera_obs=False
)

# Reset the environment
obs = env.reset()

# Get cube position from observation
cube_pos = obs['cube_pos']
print("Cube position:", cube_pos)

# Define target positions for lifting the cube:
# Stage 1: Move above the cube
above_cube_pos = cube_pos.copy()
above_cube_pos[2] += 0.1  # Position 10cm above the cube

# Stage 2: Move down to grasp the cube
grasp_pos = cube_pos.copy()
grasp_pos[2] -= 0.01  # Move slightly down to grasp

# Stage 3: Lift the cube
lift_pos = cube_pos.copy()
lift_pos[2] += 0.5  # Lift 20cm above the table
#lift_pos[0] += 0.5  # Lift 20cm above the table

print(f"Above cube target: {above_cube_pos}")
print(f"Grasp position: {grasp_pos}")
print(f"Lift position: {lift_pos}")

# Control gains
kp = 5.0  # Position gain
gripper_close_steps = 50  # Steps over which to close gripper gradually
kd = 1.0  # Velocity gain

# Run the simulation
done = False
step_count = 0
max_steps = 500
current_stage = 1
grasped = False

frames = []
while step_count < max_steps:
    current_eef = obs['robot0_eef_pos']
    current_cube_pos = obs['cube_pos']

    # Determine target
    if current_stage == 1:
        target = above_cube_pos
        if np.linalg.norm(current_eef - target) < 0.02:
            current_stage = 2
            print("Above cube reached, moving to grasp")
    elif current_stage == 2:
        target = grasp_pos
        if np.linalg.norm(current_eef - target) < 0.01:
            current_stage = 3
            print("ready to grasp")
    elif current_stage == 3:
        target = grasp_pos
        if action[-1] > 0.5:
            grasped = True
        if np.linalg.norm(current_eef - target) < 0.01 and grasped:
            current_stage = 4 
            print("Cube grasped, lifting")
    else:
        target = lift_pos
        if np.linalg.norm(current_cube_pos - target) < 0.01:
            print("Cube lifted successfully!, current_cube_pos:", current_cube_pos, ", target:", target, ", current_stage:", current_stage)
            break

    # Compute simple proportional action in end-effector space
    action = np.zeros(env.action_dim)
    if current_stage <= 2:
        action[:3] = kp * (target - current_eef)  # x,y,z
        action[3:6] = 0.0  # no rotation change
    else:
        action[:3] = kp * (target - current_cube_pos)  # x,y,z
        action[3:6] = 0.0  # no rotation change
    # Gripper control
    if current_stage <= 2:
        action[-1] = -1.0  # open
    else:
        # Gradually close over first gripper_close_steps
        action[-1] = min(1.0, (step_count % gripper_close_steps) / gripper_close_steps)

    # Step environment
    obs, reward, done, info = env.step(action)
    # Render offscreen frame
    frame = env.sim.render(camera_name="frontview", width=640, height=480)
    frame = np.flip(frame, axis=0)
    frames.append(frame)

    time.sleep(0.01)
    step_count += 1

print("cube_pos:", current_cube_pos, "current_eef:", current_eef, ", target:", target, ", current_stage:", current_stage)
env.close()
print("Simulation finished")
imageio.mimwrite("lift_cube.mp4", frames, fps=30)
print("Video saved!")

[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)


Cube position: [-0.024  0.019  0.83 ]
Above cube target: [-0.024  0.019  0.93 ]
Grasp position: [-0.024  0.019  0.82 ]
Lift position: [-0.024  0.019  1.33 ]
Above cube reached, moving to grasp
ready to grasp
Cube grasped, lifting
Cube lifted successfully!, current_cube_pos: [-0.025  0.02   1.321] , target: [-0.024  0.019  1.33 ] , current_stage: 4
cube_pos: [-0.025  0.02   1.321] current_eef: [-0.029  0.02   1.33 ] , target: [-0.024  0.019  1.33 ] , current_stage: 4
Simulation finished
Video saved!


#### Robosuite envs aren’t directly Gym-compatible, use its GymWrapper and randomize parameters.

In [16]:
class RandomizedLiftEnv(gym.Wrapper):
    """
    Robosuite Lift task with domain randomization:
    - Random cube mass
    - Random cube friction
    """
    def __init__(self):
        env = suite.make(
            env_name="Lift",
            robots="Panda",
            has_renderer=False,
            has_offscreen_renderer=False,
            use_camera_obs=False,
            horizon=200,
            control_freq=20,
            reward_shaping=True,
        )
        super().__init__(GymWrapper(env))

    def reset(self, **kwargs):
        # Sample random parameters
        mass = np.random.uniform(0.05, 0.3)      # 50g – 300g
        friction = np.random.uniform(0.2, 1.0)   # slippery – sticky

        # Access robosuite env from inside wrapper
        env = self.env.env  

        # ✅ Body for mass
        cube_body_id = env.sim.model.body_name2id("cube_main")
        env.sim.model.body_mass[cube_body_id] = mass

        # ✅ Geom for friction (use cube_g0, not cube_main)
        cube_geom_id = env.sim.model.geom_name2id("cube_g0")
        env.sim.model.geom_friction[cube_geom_id] = [friction, 0.005, 0.0001]

        return super().reset(**kwargs)


In [None]:
from stable_baselines3 import PPO

env = RandomizedLiftEnv()

model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./ppo_franka_lift/")
model.learn(total_timesteps=200_000)
model.save("ppo_franka_lift")


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./ppo_franka_lift/PPO_1


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | 2.54     |
| time/              |          |
|    fps             | 299      |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 2048     |
---------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 2.85        |
| time/                   |             |
|    fps                  | 295         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010535341 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.91       |
|    explained_variance   | 0.0642      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0123      |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0153     |
|    std                  | 0.993       |
|    value_loss           | 0.114       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 6.37        |
| time/                   |             |
|    fps                  | 292         |
|    iterations           | 3           |
|    time_elapsed         | 21          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.014776892 |
|    clip_fraction        | 0.174       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.84       |
|    explained_variance   | 0.34        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0133      |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.0234     |
|    std                  | 0.984       |
|    value_loss           | 0.18        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 6.99        |
| time/                   |             |
|    fps                  | 291         |
|    iterations           | 4           |
|    time_elapsed         | 28          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.010633653 |
|    clip_fraction        | 0.0963      |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.84       |
|    explained_variance   | 0.244       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.15        |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.0165     |
|    std                  | 0.988       |
|    value_loss           | 0.624       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 6.34        |
| time/                   |             |
|    fps                  | 290         |
|    iterations           | 5           |
|    time_elapsed         | 35          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.013964478 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.84       |
|    explained_variance   | 0.428       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0908      |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.0235     |
|    std                  | 0.985       |
|    value_loss           | 0.465       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 6.89        |
| time/                   |             |
|    fps                  | 289         |
|    iterations           | 6           |
|    time_elapsed         | 42          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.012439124 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.83       |
|    explained_variance   | 0.406       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0719      |
|    n_updates            | 50          |
|    policy_gradient_loss | -0.0187     |
|    std                  | 0.986       |
|    value_loss           | 0.287       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 8.13        |
| time/                   |             |
|    fps                  | 288         |
|    iterations           | 7           |
|    time_elapsed         | 49          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.013713599 |
|    clip_fraction        | 0.154       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.83       |
|    explained_variance   | 0.409       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.204       |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0229     |
|    std                  | 0.984       |
|    value_loss           | 0.585       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 8.33        |
| time/                   |             |
|    fps                  | 288         |
|    iterations           | 8           |
|    time_elapsed         | 56          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.012706419 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.8        |
|    explained_variance   | 0.282       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.113       |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.0188     |
|    std                  | 0.978       |
|    value_loss           | 0.751       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 8.43        |
| time/                   |             |
|    fps                  | 288         |
|    iterations           | 9           |
|    time_elapsed         | 63          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.013838477 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.74       |
|    explained_variance   | 0.54        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.161       |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.0204     |
|    std                  | 0.969       |
|    value_loss           | 0.497       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 9.06        |
| time/                   |             |
|    fps                  | 288         |
|    iterations           | 10          |
|    time_elapsed         | 71          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.016309353 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.7        |
|    explained_variance   | 0.553       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0761      |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0227     |
|    std                  | 0.967       |
|    value_loss           | 0.458       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 10.1        |
| time/                   |             |
|    fps                  | 287         |
|    iterations           | 11          |
|    time_elapsed         | 78          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.015198961 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.7        |
|    explained_variance   | 0.451       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.182       |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0199     |
|    std                  | 0.969       |
|    value_loss           | 0.665       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 11.4        |
| time/                   |             |
|    fps                  | 287         |
|    iterations           | 12          |
|    time_elapsed         | 85          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.014234647 |
|    clip_fraction        | 0.151       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.69       |
|    explained_variance   | 0.76        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.102       |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.022      |
|    std                  | 0.963       |
|    value_loss           | 0.475       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 12.3        |
| time/                   |             |
|    fps                  | 287         |
|    iterations           | 13          |
|    time_elapsed         | 92          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.012371853 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.67       |
|    explained_variance   | 0.552       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.413       |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0195     |
|    std                  | 0.964       |
|    value_loss           | 0.942       |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 13.6        |
| time/                   |             |
|    fps                  | 287         |
|    iterations           | 14          |
|    time_elapsed         | 99          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.012890615 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.68       |
|    explained_variance   | 0.593       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.759       |
|    n_updates            | 130         |
|    policy_gradient_loss | -0.023      |
|    std                  | 0.966       |
|    value_loss           | 1.25        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 15.7        |
| time/                   |             |
|    fps                  | 287         |
|    iterations           | 15          |
|    time_elapsed         | 106         |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.012828422 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.66       |
|    explained_variance   | 0.482       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.368       |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.0209     |
|    std                  | 0.96        |
|    value_loss           | 1.37        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 200          |
|    ep_rew_mean          | 16.9         |
| time/                   |              |
|    fps                  | 287          |
|    iterations           | 16           |
|    time_elapsed         | 114          |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0139185665 |
|    clip_fraction        | 0.135        |
|    clip_range           | 0.2          |
|    entropy_loss         | -9.63        |
|    explained_variance   | 0.368        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.776        |
|    n_updates            | 150          |
|    policy_gradient_loss | -0.0201      |
|    std                  | 0.956        |
|    value_loss           | 2.37         |
------------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 200          |
|    ep_rew_mean          | 17.6         |
| time/                   |              |
|    fps                  | 286          |
|    iterations           | 17           |
|    time_elapsed         | 121          |
|    total_timesteps      | 34816        |
| train/                  |              |
|    approx_kl            | 0.0148983225 |
|    clip_fraction        | 0.17         |
|    clip_range           | 0.2          |
|    entropy_loss         | -9.61        |
|    explained_variance   | 0.472        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.621        |
|    n_updates            | 160          |
|    policy_gradient_loss | -0.0244      |
|    std                  | 0.954        |
|    value_loss           | 1.56         |
------------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 200          |
|    ep_rew_mean          | 18.9         |
| time/                   |              |
|    fps                  | 286          |
|    iterations           | 18           |
|    time_elapsed         | 128          |
|    total_timesteps      | 36864        |
| train/                  |              |
|    approx_kl            | 0.0144072585 |
|    clip_fraction        | 0.154        |
|    clip_range           | 0.2          |
|    entropy_loss         | -9.59        |
|    explained_variance   | 0.421        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.451        |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.0214      |
|    std                  | 0.952        |
|    value_loss           | 2.17         |
------------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 20.4        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 19          |
|    time_elapsed         | 135         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.013433233 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.6        |
|    explained_variance   | 0.382       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.462       |
|    n_updates            | 180         |
|    policy_gradient_loss | -0.0148     |
|    std                  | 0.955       |
|    value_loss           | 2.44        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 21.5        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 20          |
|    time_elapsed         | 142         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.015145335 |
|    clip_fraction        | 0.17        |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.6        |
|    explained_variance   | 0.167       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.252       |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0205     |
|    std                  | 0.954       |
|    value_loss           | 2.23        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 22.7        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 21          |
|    time_elapsed         | 150         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.015207767 |
|    clip_fraction        | 0.158       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.57       |
|    explained_variance   | 0.327       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.814       |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0187     |
|    std                  | 0.946       |
|    value_loss           | 2.37        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 23.3        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 22          |
|    time_elapsed         | 157         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.011964684 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.53       |
|    explained_variance   | 0.561       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.11        |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.0165     |
|    std                  | 0.943       |
|    value_loss           | 2.45        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 23.2        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 23          |
|    time_elapsed         | 164         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.018347703 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.52       |
|    explained_variance   | 0.507       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.68        |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.0228     |
|    std                  | 0.943       |
|    value_loss           | 1.87        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 24          |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 24          |
|    time_elapsed         | 171         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.015257711 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.52       |
|    explained_variance   | 0.456       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.622       |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.0213     |
|    std                  | 0.943       |
|    value_loss           | 2.07        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 24          |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 25          |
|    time_elapsed         | 178         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.016468633 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.53       |
|    explained_variance   | 0.406       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.4         |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.0206     |
|    std                  | 0.948       |
|    value_loss           | 1.73        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 24.7        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 26          |
|    time_elapsed         | 185         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.015368038 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.54       |
|    explained_variance   | 0.323       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.846       |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.0221     |
|    std                  | 0.944       |
|    value_loss           | 2.54        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 25.7        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 27          |
|    time_elapsed         | 193         |
|    total_timesteps      | 55296       |
| train/                  |             |
|    approx_kl            | 0.014564999 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.51       |
|    explained_variance   | 0.457       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.08        |
|    n_updates            | 260         |
|    policy_gradient_loss | -0.0187     |
|    std                  | 0.941       |
|    value_loss           | 2.65        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 26.5        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 28          |
|    time_elapsed         | 200         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.013648473 |
|    clip_fraction        | 0.153       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.5        |
|    explained_variance   | 0.413       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.12        |
|    n_updates            | 270         |
|    policy_gradient_loss | -0.0173     |
|    std                  | 0.941       |
|    value_loss           | 3.75        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 26.9        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 29          |
|    time_elapsed         | 207         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.015852353 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.48       |
|    explained_variance   | 0.567       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.662       |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.0229     |
|    std                  | 0.936       |
|    value_loss           | 2.74        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 27.1        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 30          |
|    time_elapsed         | 214         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.015803535 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.47       |
|    explained_variance   | 0.389       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.13        |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.0165     |
|    std                  | 0.936       |
|    value_loss           | 2.85        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 28          |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 31          |
|    time_elapsed         | 221         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.013993429 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.45       |
|    explained_variance   | 0.298       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.28        |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.0178     |
|    std                  | 0.933       |
|    value_loss           | 2.92        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 200        |
|    ep_rew_mean          | 28.5       |
| time/                   |            |
|    fps                  | 286        |
|    iterations           | 32         |
|    time_elapsed         | 229        |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.01336121 |
|    clip_fraction        | 0.143      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.45      |
|    explained_variance   | 0.541      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.435      |
|    n_updates            | 310        |
|    policy_gradient_loss | -0.0192    |
|    std                  | 0.935      |
|    value_loss           | 2.91       |
----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 28.9        |
| time/                   |             |
|    fps                  | 286         |
|    iterations           | 33          |
|    time_elapsed         | 236         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.015031665 |
|    clip_fraction        | 0.17        |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.45       |
|    explained_variance   | 0.478       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.907       |
|    n_updates            | 320         |
|    policy_gradient_loss | -0.0171     |
|    std                  | 0.934       |
|    value_loss           | 2.31        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 29.7        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 34          |
|    time_elapsed         | 243         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.018628385 |
|    clip_fraction        | 0.234       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.43       |
|    explained_variance   | 0.747       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.83        |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.0245     |
|    std                  | 0.929       |
|    value_loss           | 2.15        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 30          |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 35          |
|    time_elapsed         | 250         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.013559373 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.41       |
|    explained_variance   | 0.506       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.97        |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.0202     |
|    std                  | 0.928       |
|    value_loss           | 3.35        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 30.1        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 36          |
|    time_elapsed         | 258         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.014414968 |
|    clip_fraction        | 0.16        |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.41       |
|    explained_variance   | 0.54        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.22        |
|    n_updates            | 350         |
|    policy_gradient_loss | -0.0168     |
|    std                  | 0.929       |
|    value_loss           | 4.16        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 30.2        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 37          |
|    time_elapsed         | 265         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.015003188 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.41       |
|    explained_variance   | 0.641       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.15        |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.0162     |
|    std                  | 0.929       |
|    value_loss           | 3.84        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 30.6        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 38          |
|    time_elapsed         | 272         |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.015339218 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.41       |
|    explained_variance   | 0.674       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.16        |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.019      |
|    std                  | 0.929       |
|    value_loss           | 3.02        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 31.3        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 39          |
|    time_elapsed         | 279         |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.016392086 |
|    clip_fraction        | 0.174       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.4        |
|    explained_variance   | 0.564       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.19        |
|    n_updates            | 380         |
|    policy_gradient_loss | -0.0198     |
|    std                  | 0.925       |
|    value_loss           | 3.71        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 31.4        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 40          |
|    time_elapsed         | 287         |
|    total_timesteps      | 81920       |
| train/                  |             |
|    approx_kl            | 0.014663945 |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.38       |
|    explained_variance   | 0.576       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.19        |
|    n_updates            | 390         |
|    policy_gradient_loss | -0.0182     |
|    std                  | 0.924       |
|    value_loss           | 5.87        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 31.8        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 41          |
|    time_elapsed         | 294         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.016058888 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.37       |
|    explained_variance   | 0.486       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.832       |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.0201     |
|    std                  | 0.924       |
|    value_loss           | 3.61        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 31.9        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 42          |
|    time_elapsed         | 301         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.022735434 |
|    clip_fraction        | 0.249       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.34       |
|    explained_variance   | 0.771       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.862       |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.0241     |
|    std                  | 0.916       |
|    value_loss           | 1.83        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 32.4        |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 43          |
|    time_elapsed         | 308         |
|    total_timesteps      | 88064       |
| train/                  |             |
|    approx_kl            | 0.016729914 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.29       |
|    explained_variance   | 0.32        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.711       |
|    n_updates            | 420         |
|    policy_gradient_loss | -0.021      |
|    std                  | 0.909       |
|    value_loss           | 4.04        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 32.5        |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 44          |
|    time_elapsed         | 316         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.015169148 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.25       |
|    explained_variance   | 0.553       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.82        |
|    n_updates            | 430         |
|    policy_gradient_loss | -0.0218     |
|    std                  | 0.907       |
|    value_loss           | 3.06        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 31.9        |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 45          |
|    time_elapsed         | 323         |
|    total_timesteps      | 92160       |
| train/                  |             |
|    approx_kl            | 0.015955484 |
|    clip_fraction        | 0.189       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.25       |
|    explained_variance   | 0.563       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.812       |
|    n_updates            | 440         |
|    policy_gradient_loss | -0.0193     |
|    std                  | 0.909       |
|    value_loss           | 3.29        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 32.4        |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 46          |
|    time_elapsed         | 331         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.021183077 |
|    clip_fraction        | 0.25        |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.25       |
|    explained_variance   | 0.556       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.675       |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.0286     |
|    std                  | 0.908       |
|    value_loss           | 2.17        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 32.3        |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 47          |
|    time_elapsed         | 338         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.018394312 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.23       |
|    explained_variance   | 0.691       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.747       |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.0214     |
|    std                  | 0.905       |
|    value_loss           | 2.4         |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | 32.4        |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 48          |
|    time_elapsed         | 345         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.019945946 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.24       |
|    explained_variance   | 0.664       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.956       |
|    n_updates            | 470         |
|    policy_gradient_loss | -0.0212     |
|    std                  | 0.907       |
|    value_loss           | 2.67        |
-----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 200        |
|    ep_rew_mean          | 32.7       |
| time/                   |            |
|    fps                  | 284        |
|    iterations           | 49         |
|    time_elapsed         | 352        |
|    total_timesteps      | 100352     |
| train/                  |            |
|    approx_kl            | 0.01376545 |
|    clip_fraction        | 0.166      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.24      |
|    explained_variance   | 0.592      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.994      |
|    n_updates            | 480        |
|    policy_gradient_loss | -0.0168    |
|    std                  | 0.906      |
|    value_loss           | 3.33       |
----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)
[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/pyt

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 200        |
|    ep_rew_mean          | 32.7       |
| time/                   |            |
|    fps                  | 284        |
|    iterations           | 50         |
|    time_elapsed         | 360        |
|    total_timesteps      | 102400     |
| train/                  |            |
|    approx_kl            | 0.01597356 |
|    clip_fraction        | 0.174      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.24      |
|    explained_variance   | 0.598      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.56       |
|    n_updates            | 490        |
|    policy_gradient_loss | -0.017     |
|    std                  | 0.906      |
|    value_loss           | 3.92       |
----------------------------------------


[1m[32m[robosuite INFO] [0mLoading controller configuration from: /opt/miniconda3/envs/mujoco-env/lib/python3.10/site-packages/robosuite/controllers/config/robots/default_panda.json (composite_controller_factory.py:121)


In [None]:
import imageio

env = RandomizedLiftEnv()
model = PPO.load("ppo_franka_lift")

frames = []
obs = env.reset()
done = False
while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    frame = env.render(mode="rgb_array")
    frames.append(frame)

env.close()
imageio.mimsave("ppo_eval.gif", frames, fps=20)
