<a href="https://colab.research.google.com/github/simpler-env/SimplerEnv/blob/main/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SimplerEnv: Simulated Manipulation Policy Evaluation for Real-World Robots

- Project page: <https://simpler-env.github.io/>
- Code: <https://github.com/simpler-env/SimplerEnv>

## Create a Simulated Environment and Take Random Actions

In [None]:
import simpler_env
from simpler_env.utils.env.observation_utils import get_image_from_maniskill2_obs_dict
import mediapy
import sapien.core as sapien

#["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", 
# "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", 
# "widowx_stack_cube", "widowx_put_eggplant_in_basket"]
task_name = "google_robot_pick_coke_can"  

if 'env' in locals():
  print("Closing existing env")
  env.close()
  del env
env = simpler_env.make(task_name)
obs, reset_info = env.reset()
instruction = env.get_language_instruction()
print("Reset info", reset_info)
print("Instruction", instruction)

frames = []
done, truncated = False, False
while not (done or truncated):
   # action[:3]: delta xyz; action[3:6]: delta rotation in axis-angle representation;
   # action[6:7]: gripper (the meaning of open / close depends on robot URDF)
   image = get_image_from_maniskill2_obs_dict(env, obs)
   action = env.action_space.sample() # replace this with your policy inference
   obs, reward, done, truncated, info = env.step(action)
   frames.append(image)

episode_stats = info.get('episode_stats', {})
print("Episode stats", episode_stats)
mediapy.show_video(frames, fps=10)

Closing existing env


[2025-03-09 16:08:49.023] [svulkan2] [error] GLFW error: X11: The DISPLAY environment variable is missing


## Run Inference on Simulated Environments

In [1]:
import os
import numpy as np
import simpler_env
from simpler_env.utils.env.observation_utils import get_image_from_maniskill2_obs_dict
import mediapy


RT_1_CHECKPOINTS = {
    "rt_1_x": "rt_1_x_tf_trained_for_002272480_step",
    "rt_1_400k": "rt_1_tf_trained_for_000400120",
    "rt_1_58k": "rt_1_tf_trained_for_000058240",
    "rt_1_1k": "rt_1_tf_trained_for_000001120",
}


def get_rt_1_checkpoint(name, ckpt_dir="./checkpoints"):
  assert name in RT_1_CHECKPOINTS, name
  ckpt_name = RT_1_CHECKPOINTS[name]
  ckpt_path = os.path.join(ckpt_dir, ckpt_name)
  return ckpt_path

print(len(simpler_env.ENVIRONMENTS))
for e in simpler_env.ENVIRONMENTS:
    print(e) 

25
google_robot_pick_coke_can
google_robot_pick_horizontal_coke_can
google_robot_pick_vertical_coke_can
google_robot_pick_standing_coke_can
google_robot_pick_object
google_robot_move_near_v0
google_robot_move_near_v1
google_robot_move_near
google_robot_open_drawer
google_robot_open_top_drawer
google_robot_open_middle_drawer
google_robot_open_bottom_drawer
google_robot_close_drawer
google_robot_close_top_drawer
google_robot_close_middle_drawer
google_robot_close_bottom_drawer
google_robot_place_in_closed_drawer
google_robot_place_in_closed_top_drawer
google_robot_place_in_closed_middle_drawer
google_robot_place_in_closed_bottom_drawer
google_robot_place_apple_in_closed_top_drawer
widowx_spoon_on_towel
widowx_carrot_on_plate
widowx_stack_cube
widowx_put_eggplant_in_basket


In [2]:
# Select your Task
# ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", 
# "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", 
# "widowx_stack_cube", "widowx_put_eggplant_in_basket"]
task_name = "google_robot_pick_coke_can" 

if 'env' in locals():
  print("Closing existing env")
  env.close()
  del env
env = simpler_env.make(task_name)

obs, reset_info = env.reset()
instruction = env.get_language_instruction()
print("Reset info", reset_info)
print("Instruction", instruction)

if "google" in task_name:
  policy_setup = "google_robot"
else:
  policy_setup = "widowx_bridge"

[2025-03-09 16:33:00.146] [svulkan2] [error] GLFW error: X11: The DISPLAY environment variable is missing


Reset info {'scene_name': 'google_pick_coke_can_1_v4', 'scene_offset': None, 'scene_pose': None, 'scene_table_height': 0.87, 'urdf_version': 'recolor_tabletop_visual_matching_1', 'rgb_overlay_path': '/nas/longleaf/home/tis/mmai/ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png', 'rgb_overlay_cameras': ['overhead_camera'], 'rgb_overlay_mode': 'background', 'disable_bad_material': False, 'model_id': 'opened_coke_can', 'model_scale': 1.0, 'distractor_model_ids': None, 'distractor_model_scales': None, 'obj_init_pose_wrt_robot_base': Pose([0.587925, -0.0238302, 0.840576], [0.707052, -0.0081018, -0.01162, -0.70702]), 'orientation': 'laid_vertically'}
Instruction pick coke can


  logger.warn(


In [1]:
# Select your model
# ["rt_1_x", "rt_1_400k", "rt_1_58k", "rt_1_1k", "octo-base", "octo-small"]
model_name = "octo-base" 

if "rt_1" in model_name:
  from simpler_env.policies.rt1.rt1_model import RT1Inference

  ckpt_path = get_rt_1_checkpoint(model_name)
  model = RT1Inference(saved_model_path=ckpt_path, policy_setup=policy_setup)
elif "octo" in model_name:
  from simpler_env.policies.octo.octo_model import OctoInference

  model = OctoInference(model_type=model_name, policy_setup=policy_setup, init_rng=0)
else:
  raise ValueError(model_name)


RuntimeError: jaxlib version 0.5.1 is newer than and incompatible with jax version 0.4.6. Please update your jax and/or jaxlib packages.

In [4]:
# Run inference
obs, reset_info = env.reset()
instruction = env.get_language_instruction()
model.reset(instruction)
print(instruction)

image = get_image_from_maniskill2_obs_dict(env, obs)  # np.ndarray of shape (H, W, 3), uint8
images = [image]
predicted_terminated, success, truncated = False, False, False
timestep = 0
while not (predicted_terminated or truncated):
    # step the model; "raw_action" is raw model action output; "action" is the processed action to be sent into maniskill env
    raw_action, action = model.step(image)
    predicted_terminated = bool(action["terminate_episode"][0] > 0)
    obs, reward, success, truncated, info = env.step(
        np.concatenate([action["world_vector"], action["rot_axangle"], action["gripper"]])
    )
    print(timestep, info)
    # update image observation
    image = get_image_from_maniskill2_obs_dict(env, obs)
    images.append(image)
    timestep += 1

episode_stats = info.get("episode_stats", {})
print(f"Episode success: {success}")

  logger.warn(
  logger.warn(
W0000 00:00:1741551284.608872  506859 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


pick coke can




0 {'elapsed_steps': 1, 'is_grasped': False, 'consecutive_grasp': False, 'lifted_object': False, 'lifted_object_significantly': False, 'success': False, 'episode_stats': OrderedDict([('n_lift_significant', 0), ('consec_grasp', False), ('grasped', False)])}




1 {'elapsed_steps': 2, 'is_grasped': False, 'consecutive_grasp': False, 'lifted_object': False, 'lifted_object_significantly': False, 'success': False, 'episode_stats': OrderedDict([('n_lift_significant', 0), ('consec_grasp', False), ('grasped', False)])}
2 {'elapsed_steps': 3, 'is_grasped': False, 'consecutive_grasp': False, 'lifted_object': False, 'lifted_object_significantly': False, 'success': False, 'episode_stats': OrderedDict([('n_lift_significant', 0), ('consec_grasp', False), ('grasped', False)])}
3 {'elapsed_steps': 4, 'is_grasped': False, 'consecutive_grasp': False, 'lifted_object': False, 'lifted_object_significantly': False, 'success': False, 'episode_stats': OrderedDict([('n_lift_significant', 0), ('consec_grasp', False), ('grasped', False)])}
4 {'elapsed_steps': 5, 'is_grasped': False, 'consecutive_grasp': False, 'lifted_object': False, 'lifted_object_significantly': False, 'success': False, 'episode_stats': OrderedDict([('n_lift_significant', 0), ('consec_grasp', False)

In [5]:
print(task_name, model_name)
mediapy.show_video(images, fps=10)

google_robot_pick_coke_can octo-base


0
This browser does not support the video tag.


## Gallery

In [None]:
# @markdown RT-1-X close drawer
print(task_name, model_name)
mediapy.show_video(images, fps=10)


google_robot_place_in_closed_middle_drawer rt_1_x


0
This browser does not support the video tag.


In [8]:
# @markdown Octo-base widowx_put_eggplant_in_basket
print(task_name, model_name)
mediapy.show_video(images, fps=10)

google_robot_pick_horizontal_coke_can rt_1_x


0
This browser does not support the video tag.
