From b5472eac1c1ee43c5465fbe06b83ae0a6fd0000c Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 15:01:43 -0500
Subject: [PATCH 1/9] removed duplicate fct

---
 sim/play.py          | 31 +++++--------------------------
 sim/utils/helpers.py |  3 ++-
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/sim/play.py b/sim/play.py
index a227c3d6..408e3104 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -24,34 +24,13 @@
 
 from sim.env import run_dir  # noqa: E402
 from sim.envs import task_registry  # noqa: E402
-from sim.utils.helpers import get_args  # noqa: E402
+from sim.utils.helpers import (  # noqa: E402
+    export_policy_as_jit,
+    export_policy_as_onnx,
+    get_args,
+)
 from sim.utils.logger import Logger  # noqa: E402
 
-import torch  # isort: skip
-
-
-def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
-    os.makedirs(path, exist_ok=True)
-    path = os.path.join(path, "policy_1.pt")
-    model = copy.deepcopy(actor_critic.actor).to("cpu")
-    traced_script_module = torch.jit.script(model)
-    traced_script_module.save(path)
-
-
-def export_policy_as_onnx(actor_critic, path):
-    os.makedirs(path, exist_ok=True)
-    path = os.path.join(path, "policy_1.onnx")
-    model = copy.deepcopy(actor_critic.actor).to("cpu")
-
-    # Get the input dimension from the first layer of the model
-    first_layer = next(model.parameters())
-    input_dim = first_layer.shape[1]
-
-    # Create a dummy input tensor with the correct dimensions
-    dummy_input = torch.randn(1, input_dim)
-
-    torch.onnx.export(model, dummy_input, path)
-
 
 def play(args: argparse.Namespace) -> None:
     logger.info("Configuring environment and training settings...")
diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py
index 675fe070..3a2558d5 100755
--- a/sim/utils/helpers.py
+++ b/sim/utils/helpers.py
@@ -34,6 +34,7 @@
 import copy
 import os
 import random
+from typing import Any, Union
 
 import numpy as np
 
@@ -244,7 +245,7 @@ def get_args() -> argparse.Namespace:
     return args
 
 
-def export_policy_as_jit(actor_critic, path) -> None:
+def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
     os.makedirs(path, exist_ok=True)
     path = os.path.join(path, "policy_1.pt")
     model = copy.deepcopy(actor_critic.actor).to("cpu")

From e0f98b5ae47759d06e29988026f41ca3e50576d3 Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 15:05:40 -0500
Subject: [PATCH 2/9] nit (make format)

---
 sim/resources/stompymicro/joints.py | 2 +-
 sim/sim2sim.py                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sim/resources/stompymicro/joints.py b/sim/resources/stompymicro/joints.py
index b103a6bc..1f59ff1e 100644
--- a/sim/resources/stompymicro/joints.py
+++ b/sim/resources/stompymicro/joints.py
@@ -101,7 +101,7 @@ def default_standing(cls) -> Dict[str, float]:
             cls.legs.left.knee_pitch: -0.741,
             cls.legs.left.hip_yaw: 0,
             cls.legs.left.hip_roll: 0,
-            cls.legs.left.ankle_pitch: -0.5, 
+            cls.legs.left.ankle_pitch: -0.5,
             cls.legs.right.hip_pitch: -0.23,
             cls.legs.right.knee_pitch: 0.741,
             cls.legs.right.ankle_pitch: 0.5,
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
index f4e1cc1a..1fa30ef9 100755
--- a/sim/sim2sim.py
+++ b/sim/sim2sim.py
@@ -316,7 +316,7 @@ def get_policy_output(policy, input_data):
             dt=0.001,
             decimation=10,
             cycle_time=0.4,
-            tau_factor=4.,
+            tau_factor=4.0,
         )
 
     run_mujoco(policy, cfg, args.keyboard_use)

From ee8b9bc9228c39ebb10c0eba1c27d2a53eb65bd5 Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 15:07:41 -0500
Subject: [PATCH 3/9] added more easily findable DEFAULT_COMMAND constant at
 top of file

---
 sim/play.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sim/play.py b/sim/play.py
index 408e3104..b9f25b40 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -31,6 +31,8 @@
 )
 from sim.utils.logger import Logger  # noqa: E402
 
+DEFAULT_COMMAND = [0.4, 0.0, 0.0, 0.0]
+
 
 def play(args: argparse.Namespace) -> None:
     logger.info("Configuring environment and training settings...")
@@ -150,10 +152,10 @@ def play(args: argparse.Namespace) -> None:
             dset_actions[t] = actions.detach().numpy()
 
         if FIX_COMMAND:
-            env.commands[:, 0] = 0.2
-            env.commands[:, 1] = 0.0
-            env.commands[:, 2] = 0.0
-            env.commands[:, 3] = 0.0
+            env.commands[:, 0] = DEFAULT_COMMAND[0]
+            env.commands[:, 1] = DEFAULT_COMMAND[1]
+            env.commands[:, 2] = DEFAULT_COMMAND[2]
+            env.commands[:, 3] = DEFAULT_COMMAND[3]
         obs, critic_obs, rews, dones, infos = env.step(actions.detach())
         print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
 

From 9ef08db55ab5b09dfd34f3fe4aaec837511c887b Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 15:11:43 -0500
Subject: [PATCH 4/9] Added drawing of velocity arrows

---
 sim/play.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 1 deletion(-)

diff --git a/sim/play.py b/sim/play.py
index b9f25b40..7bc96cf4 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -31,7 +31,84 @@
 )
 from sim.utils.logger import Logger  # noqa: E402
 
-DEFAULT_COMMAND = [0.4, 0.0, 0.0, 0.0]
+DEFAULT_COMMAND = [0.3, 0.0, 0.0, 0.0]
+
+
+def draw_velocity_arrows(gym, viewer, env_handles, robot_positions, command_xs, command_ys, actual_xs, actual_ys):
+    """Draws command and actual velocity arrows for all robots."""
+    if viewer is None:
+        return
+
+    ARROW_HEIGHT = 0.2  # Height of arrow from robot's base
+    HEAD_SCALE = 0.1  # Size of arrowhead
+
+    gym.clear_lines(viewer)
+
+    for env_handle, robot_pos, command_x, command_y, actual_x, actual_y in zip(
+        env_handles, robot_positions, command_xs, command_ys, actual_xs, actual_ys
+    ):
+        # Draw both commanded and actual velocity arrows for this robot
+        for velocity_type, (vel_x, vel_y), color in [
+            ("command", (command_x, command_y), (0.0, 1.0, 0.0)),  # Green for command
+            ("actual", (actual_x, actual_y), (1.0, 0.0, 0.0)),  # Red for actual
+        ]:
+            start = gymapi.Vec3(robot_pos[0], robot_pos[1], robot_pos[2] + ARROW_HEIGHT)
+
+            # Scale arrow length by velocity magnitude
+            vel_magnitude = np.sqrt(vel_x**2 + vel_y**2)
+            if vel_magnitude > 0:
+                arrow_scale = np.clip(vel_magnitude, 0.1, 1.0)
+                normalized_x = vel_x / vel_magnitude
+                normalized_y = vel_y / vel_magnitude
+            else:
+                arrow_scale = 0.1
+                normalized_x = 0
+                normalized_y = 0
+
+            # End position - length varies with velocity magnitude
+            end = gymapi.Vec3(start.x + normalized_x * arrow_scale, start.y + normalized_y * arrow_scale, start.z)
+
+            # Calculate arrowhead
+            perp_x = -normalized_y
+            perp_y = normalized_x
+
+            head_left = gymapi.Vec3(
+                end.x - HEAD_SCALE * (normalized_x * 0.7 + perp_x * 0.7),
+                end.y - HEAD_SCALE * (normalized_y * 0.7 + perp_y * 0.7),
+                end.z,
+            )
+
+            head_right = gymapi.Vec3(
+                end.x - HEAD_SCALE * (normalized_x * 0.7 - perp_x * 0.7),
+                end.y - HEAD_SCALE * (normalized_y * 0.7 - perp_y * 0.7),
+                end.z,
+            )
+
+            # Draw lines
+            verts = [
+                start.x,
+                start.y,
+                start.z,
+                end.x,
+                end.y,
+                end.z,
+                end.x,
+                end.y,
+                end.z,
+                head_left.x,
+                head_left.y,
+                head_left.z,
+                end.x,
+                end.y,
+                end.z,
+                head_right.x,
+                head_right.y,
+                head_right.z,
+            ]
+            colors = [color[0], color[1], color[2]] * 6
+
+            # Add the lines to the viewer
+            gym.add_lines(viewer, env_handle, 3, verts, colors)
 
 
 def play(args: argparse.Namespace) -> None:
@@ -167,6 +244,18 @@ def play(args: argparse.Namespace) -> None:
             img = np.reshape(img, (1080, 1920, 4))
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
 
+            robot_positions = env.root_states[:, 0:3].cpu().numpy()
+            draw_velocity_arrows(
+                env.gym,
+                env.viewer,
+                env.envs,
+                robot_positions,
+                env.commands[:, 0].cpu().numpy(),  # cmd x vels
+                env.commands[:, 1].cpu().numpy(),  # cmd y vels
+                env.base_lin_vel[:, 0].cpu().numpy(),  # real x vels
+                env.base_lin_vel[:, 1].cpu().numpy(),  # real y vels
+            )
+
             video.write(img[..., :3])
 
         # Log states

From e45ac7adc031d862cdca68d55e4862c5f4b28492 Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 15:47:17 -0500
Subject: [PATCH 5/9] Added command manager

---
 sim/play.py              | 20 ++++++---
 sim/utils/cmd_manager.py | 92 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 7 deletions(-)
 create mode 100644 sim/utils/cmd_manager.py

diff --git a/sim/play.py b/sim/play.py
index 7bc96cf4..4f059c15 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -20,10 +20,9 @@
 from isaacgym import gymapi
 from tqdm import tqdm
 
-logger = logging.getLogger(__name__)
-
 from sim.env import run_dir  # noqa: E402
 from sim.envs import task_registry  # noqa: E402
+from sim.utils.cmd_manager import CommandManager  # noqa: E402
 from sim.utils.helpers import (  # noqa: E402
     export_policy_as_jit,
     export_policy_as_onnx,
@@ -31,7 +30,10 @@
 )
 from sim.utils.logger import Logger  # noqa: E402
 
+logger = logging.getLogger(__name__)
+
 DEFAULT_COMMAND = [0.3, 0.0, 0.0, 0.0]
+CMD_MODE = "fixed"
 
 
 def draw_velocity_arrows(gym, viewer, env_handles, robot_positions, command_xs, command_ys, actual_xs, actual_ys):
@@ -223,16 +225,20 @@ def play(args: argparse.Namespace) -> None:
             os.mkdir(experiment_dir)
         video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
 
+    cmd_manager = CommandManager(
+        num_envs=env_cfg.env.num_envs,
+        mode=CMD_MODE,
+        default_cmd=DEFAULT_COMMAND,
+        device=env.device,
+    )
+
     for t in tqdm(range(stop_state_log)):
         actions = policy(obs.detach())
         if args.log_h5:
             dset_actions[t] = actions.detach().numpy()
 
-        if FIX_COMMAND:
-            env.commands[:, 0] = DEFAULT_COMMAND[0]
-            env.commands[:, 1] = DEFAULT_COMMAND[1]
-            env.commands[:, 2] = DEFAULT_COMMAND[2]
-            env.commands[:, 3] = DEFAULT_COMMAND[3]
+        env.commands[:] = cmd_manager.update(env.dt)
+
         obs, critic_obs, rews, dones, infos = env.step(actions.detach())
         print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
 
diff --git a/sim/utils/cmd_manager.py b/sim/utils/cmd_manager.py
new file mode 100644
index 00000000..62e3e6e4
--- /dev/null
+++ b/sim/utils/cmd_manager.py
@@ -0,0 +1,92 @@
+import argparse
+from enum import Enum
+from typing import List
+
+import numpy as np
+import torch
+
+
+class CommandMode(Enum):
+    FIXED = "fixed"
+    OSCILLATING = "oscillating"
+    KEYBOARD = "keyboard"
+
+
+class CommandManager:
+    """Manages robot commands"""
+
+    def __init__(
+        self, num_envs: int = 1, mode: str = "oscillating", default_cmd: List[float] = [0.3, 0.0, 0.0, 0.0], device="cpu"
+    ):
+        self.num_envs = num_envs
+        self.mode = CommandMode(mode)
+        self.device = device
+        self.default_cmd = torch.tensor(default_cmd, device=self.device)
+        self.commands = self.default_cmd.repeat(num_envs, 1)
+        self.time = 0
+
+        # Mode-specific parameters
+        if self.mode == CommandMode.OSCILLATING:
+            self.osc_period = 10.0
+            self.osc_amplitude = 0.3
+        elif self.mode == CommandMode.KEYBOARD:
+            try:
+                import pygame
+
+                pygame.init()
+                # Create a minimal window to capture keyboard events
+                pygame.display.set_mode((100, 100))
+                self.x_vel_cmd = 0.0
+                self.y_vel_cmd = 0.0
+                self.yaw_vel_cmd = 0.0
+            except ImportError:
+                print("WARNING: pygame not found, falling back to fixed commands")
+                self.mode = CommandMode.FIXED
+
+    def update(self, dt: float) -> torch.Tensor:
+        """Updates and returns commands based on current mode."""
+        self.time += dt
+        if self.mode == CommandMode.FIXED:
+            return self.commands
+        elif self.mode == CommandMode.OSCILLATING:
+            # Oscillate x velocity
+            x_vel = self.osc_amplitude * torch.sin(torch.tensor(2 * np.pi * self.time / self.osc_period))
+            self.commands[:, 0] = x_vel.to(self.device)
+        elif self.mode == CommandMode.KEYBOARD:
+            self._handle_keyboard_input()
+            self.commands[:, 0] = torch.tensor(self.x_vel_cmd, device=self.device)
+            self.commands[:, 1] = torch.tensor(self.y_vel_cmd, device=self.device)
+            self.commands[:, 2] = torch.tensor(self.yaw_vel_cmd, device=self.device)
+        return self.commands
+
+    def _handle_keyboard_input(self):
+        """Handles keyboard input for command updates."""
+        import pygame
+
+        for event in pygame.event.get():
+            if event.type == pygame.QUIT:
+                pygame.quit()
+
+        keys = pygame.key.get_pressed()
+
+        # Update movement commands based on arrow keys
+        if keys[pygame.K_UP]:
+            self.x_vel_cmd = min(self.x_vel_cmd + 0.0005, 0.5)
+        if keys[pygame.K_DOWN]:
+            self.x_vel_cmd = max(self.x_vel_cmd - 0.0005, -0.5)
+        if keys[pygame.K_LEFT]:
+            self.y_vel_cmd = min(self.y_vel_cmd + 0.0005, 0.5)
+        if keys[pygame.K_RIGHT]:
+            self.y_vel_cmd = max(self.y_vel_cmd - 0.0005, -0.5)
+
+        # Yaw control
+        if keys[pygame.K_a]:
+            self.yaw_vel_cmd = min(self.yaw_vel_cmd + 0.001, 0.5)
+        if keys[pygame.K_z]:
+            self.yaw_vel_cmd = max(self.yaw_vel_cmd - 0.001, -0.5)
+
+        # Reset commands
+        if keys[pygame.K_SPACE]:
+            self.x_vel_cmd = 0.0
+            self.y_vel_cmd = 0.0
+            self.yaw_vel_cmd = 0.0

From b87f7a190a51bdc5331bfd793fce94b02f4e6362 Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 15:51:51 -0500
Subject: [PATCH 6/9] Added draw_vector helper to helpers.py

---
 sim/utils/helpers.py | 78 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py
index 3a2558d5..74951e41 100755
--- a/sim/utils/helpers.py
+++ b/sim/utils/helpers.py
@@ -34,7 +34,7 @@
 import copy
 import os
 import random
-from typing import Any, Union
+from typing import Any, Tuple, Union
 
 import numpy as np
 
@@ -266,3 +266,79 @@ def export_policy_as_onnx(actor_critic, path):
     dummy_input = torch.randn(1, input_dim)
 
     torch.onnx.export(model, dummy_input, path)
+
+
+def draw_vector(
+    gym: gymapi.Gym,
+    viewer: gymapi.Viewer,
+    env_handle: gymapi.Env,
+    start_pos: np.ndarray,
+    direction: Tuple[float, float],
+    color: Tuple[float, float, float],
+    clear_lines: bool = False,
+    height: float = 0.2, head_scale: float = 0.1
+) -> None:
+    """Draws a single vector with an arrowhead."""
+    if viewer is None:
+        return
+
+    # Unpack direction and create start position
+    vel_x, vel_y = direction
+    start = gymapi.Vec3(start_pos[0], start_pos[1], start_pos[2] + height)
+
+    # Scale arrow length by magnitude
+    vel_magnitude = np.sqrt(vel_x**2 + vel_y**2)
+    if vel_magnitude > 0:
+        arrow_scale = np.clip(vel_magnitude, 0.1, 1.0)
+        normalized_x = vel_x / vel_magnitude
+        normalized_y = vel_y / vel_magnitude
+    else:
+        arrow_scale = 0.1
+        normalized_x = 0
+        normalized_y = 0
+
+    # Calculate end position and arrowhead
+    end = gymapi.Vec3(start.x + normalized_x * arrow_scale, start.y + normalized_y * arrow_scale, start.z)
+
+    # Calculate perpendicular vector for arrowhead
+    perp_x = -normalized_y
+    perp_y = normalized_x
+
+    head_left = gymapi.Vec3(
+        end.x - head_scale * (normalized_x * 0.7 + perp_x * 0.7),
+        end.y - head_scale * (normalized_y * 0.7 + perp_y * 0.7),
+        end.z,
+    )
+
+    head_right = gymapi.Vec3(
+        end.x - head_scale * (normalized_x * 0.7 - perp_x * 0.7),
+        end.y - head_scale * (normalized_y * 0.7 - perp_y * 0.7),
+        end.z,
+    )
+
+    # Create vertices and colors
+    verts = [
+        start.x,
+        start.y,
+        start.z,
+        end.x,
+        end.y,
+        end.z,
+        end.x,
+        end.y,
+        end.z,
+        head_left.x,
+        head_left.y,
+        head_left.z,
+        end.x,
+        end.y,
+        end.z,
+        head_right.x,
+        head_right.y,
+        head_right.z,
+    ]
+    colors = [color[0], color[1], color[2]] * 6
+
+    if clear_lines:
+        gym.clear_lines(viewer)
+    gym.add_lines(viewer, env_handle, 3, verts, colors)

From 2c2bfcbbf9f0624659dc1324a5f8ed1ba1fccc86 Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 15:55:46 -0500
Subject: [PATCH 7/9] Cleanup of vector drawing (+make format)

---
 sim/play.py              | 95 ++--------------------------------------
 sim/utils/cmd_manager.py | 24 ++++++++--
 sim/utils/helpers.py     |  3 +-
 3 files changed, 26 insertions(+), 96 deletions(-)

diff --git a/sim/play.py b/sim/play.py
index 4f059c15..055a9f4f 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -8,11 +8,9 @@
 # mypy: ignore-errors
 
 import argparse
-import copy
 import logging
 import os
 from datetime import datetime
-from typing import Any, Union
 
 import cv2
 import h5py
@@ -33,84 +31,7 @@
 logger = logging.getLogger(__name__)
 
 DEFAULT_COMMAND = [0.3, 0.0, 0.0, 0.0]
-CMD_MODE = "fixed"
-
-
-def draw_velocity_arrows(gym, viewer, env_handles, robot_positions, command_xs, command_ys, actual_xs, actual_ys):
-    """Draws command and actual velocity arrows for all robots."""
-    if viewer is None:
-        return
-
-    ARROW_HEIGHT = 0.2  # Height of arrow from robot's base
-    HEAD_SCALE = 0.1  # Size of arrowhead
-
-    gym.clear_lines(viewer)
-
-    for env_handle, robot_pos, command_x, command_y, actual_x, actual_y in zip(
-        env_handles, robot_positions, command_xs, command_ys, actual_xs, actual_ys
-    ):
-        # Draw both commanded and actual velocity arrows for this robot
-        for velocity_type, (vel_x, vel_y), color in [
-            ("command", (command_x, command_y), (0.0, 1.0, 0.0)),  # Green for command
-            ("actual", (actual_x, actual_y), (1.0, 0.0, 0.0)),  # Red for actual
-        ]:
-            start = gymapi.Vec3(robot_pos[0], robot_pos[1], robot_pos[2] + ARROW_HEIGHT)
-
-            # Scale arrow length by velocity magnitude
-            vel_magnitude = np.sqrt(vel_x**2 + vel_y**2)
-            if vel_magnitude > 0:
-                arrow_scale = np.clip(vel_magnitude, 0.1, 1.0)
-                normalized_x = vel_x / vel_magnitude
-                normalized_y = vel_y / vel_magnitude
-            else:
-                arrow_scale = 0.1
-                normalized_x = 0
-                normalized_y = 0
-
-            # End position - length varies with velocity magnitude
-            end = gymapi.Vec3(start.x + normalized_x * arrow_scale, start.y + normalized_y * arrow_scale, start.z)
-
-            # Calculate arrowhead
-            perp_x = -normalized_y
-            perp_y = normalized_x
-
-            head_left = gymapi.Vec3(
-                end.x - HEAD_SCALE * (normalized_x * 0.7 + perp_x * 0.7),
-                end.y - HEAD_SCALE * (normalized_y * 0.7 + perp_y * 0.7),
-                end.z,
-            )
-
-            head_right = gymapi.Vec3(
-                end.x - HEAD_SCALE * (normalized_x * 0.7 - perp_x * 0.7),
-                end.y - HEAD_SCALE * (normalized_y * 0.7 - perp_y * 0.7),
-                end.z,
-            )
-
-            # Draw lines
-            verts = [
-                start.x,
-                start.y,
-                start.z,
-                end.x,
-                end.y,
-                end.z,
-                end.x,
-                end.y,
-                end.z,
-                head_left.x,
-                head_left.y,
-                head_left.z,
-                end.x,
-                end.y,
-                end.z,
-                head_right.x,
-                head_right.y,
-                head_right.z,
-            ]
-            colors = [color[0], color[1], color[2]] * 6
-
-            # Add the lines to the viewer
-            gym.add_lines(viewer, env_handle, 3, verts, colors)
+CMD_MODE = "oscillating"
 
 
 def play(args: argparse.Namespace) -> None:
@@ -249,19 +170,9 @@ def play(args: argparse.Namespace) -> None:
             img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
             img = np.reshape(img, (1080, 1920, 4))
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
-
             robot_positions = env.root_states[:, 0:3].cpu().numpy()
-            draw_velocity_arrows(
-                env.gym,
-                env.viewer,
-                env.envs,
-                robot_positions,
-                env.commands[:, 0].cpu().numpy(),  # cmd x vels
-                env.commands[:, 1].cpu().numpy(),  # cmd y vels
-                env.base_lin_vel[:, 0].cpu().numpy(),  # real x vels
-                env.base_lin_vel[:, 1].cpu().numpy(),  # real y vels
-            )
-
+            actual_vels = np.stack([env.base_lin_vel[:, 0].cpu().numpy(), env.base_lin_vel[:, 1].cpu().numpy()], axis=1)
+            cmd_manager.draw(env.gym, env.viewer, env.envs, robot_positions, actual_vels)
             video.write(img[..., :3])
 
         # Log states
diff --git a/sim/utils/cmd_manager.py b/sim/utils/cmd_manager.py
index 62e3e6e4..2f5243b0 100644
--- a/sim/utils/cmd_manager.py
+++ b/sim/utils/cmd_manager.py
@@ -1,10 +1,11 @@
-import argparse
 from enum import Enum
 from typing import List
 
 import numpy as np
 import torch
 
+from sim.utils.helpers import draw_vector
+
 
 class CommandMode(Enum):
     FIXED = "fixed"
@@ -16,7 +17,11 @@ class CommandManager:
     """Manages robot commands"""
 
     def __init__(
-        self, num_envs: int = 1, mode: str = "oscillating", default_cmd: List[float] = [0.3, 0.0, 0.0, 0.0], device="cpu"
+        self,
+        num_envs: int = 1,
+        mode: str = "oscillating",
+        default_cmd: List[float] = [0.3, 0.0, 0.0, 0.0],
+        device="cpu",
     ):
         self.num_envs = num_envs
         self.mode = CommandMode(mode)
@@ -34,7 +39,6 @@ def __init__(
                 import pygame
 
                 pygame.init()
-                # Create a minimal window to capture keyboard events
                 pygame.display.set_mode((100, 100))
                 self.x_vel_cmd = 0.0
                 self.y_vel_cmd = 0.0
@@ -48,17 +52,31 @@ def update(self, dt: float) -> torch.Tensor:
         self.time += dt
         if self.mode == CommandMode.FIXED:
             return self.commands
+
         elif self.mode == CommandMode.OSCILLATING:
             # Oscillate x velocity
             x_vel = self.osc_amplitude * torch.sin(torch.tensor(2 * np.pi * self.time / self.osc_period))
             self.commands[:, 0] = x_vel.to(self.device)
+
         elif self.mode == CommandMode.KEYBOARD:
             self._handle_keyboard_input()
             self.commands[:, 0] = torch.tensor(self.x_vel_cmd, device=self.device)
             self.commands[:, 1] = torch.tensor(self.y_vel_cmd, device=self.device)
             self.commands[:, 2] = torch.tensor(self.yaw_vel_cmd, device=self.device)
+
         return self.commands
 
+    def draw(self, gym, viewer, env_handles, robot_positions, actual_vels) -> None:
+        """Draws command and actual velocity arrows for all robots."""
+        if viewer is None:
+            return
+
+        gym.clear_lines(viewer)
+        cmd_vels = self.commands[:, :2].cpu().numpy()
+        for env_handle, robot_pos, cmd_vel, actual_vel in zip(env_handles, robot_positions, cmd_vels, actual_vels):
+            draw_vector(gym, viewer, env_handle, robot_pos, cmd_vel, (0.0, 1.0, 0.0))  # cmd vector (green)
+            draw_vector(gym, viewer, env_handle, robot_pos, actual_vel, (1.0, 0.0, 0.0))  # vel vector (red)
+
     def _handle_keyboard_input(self):
         """Handles keyboard input for command updates."""
         import pygame
diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py
index 74951e41..1f4a13cb 100755
--- a/sim/utils/helpers.py
+++ b/sim/utils/helpers.py
@@ -276,7 +276,8 @@ def draw_vector(
     direction: Tuple[float, float],
     color: Tuple[float, float, float],
     clear_lines: bool = False,
-    height: float = 0.2, head_scale: float = 0.1
+    height: float = 0.2,
+    head_scale: float = 0.1,
 ) -> None:
     """Draws a single vector with an arrowhead."""
     if viewer is None:

From e5bff1ea8751ec7d108c4a6e4a9df59f9c6424b2 Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 16:28:13 -0500
Subject: [PATCH 8/9] nit

---
 sim/envs/humanoids/stompymicro_config.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sim/envs/humanoids/stompymicro_config.py b/sim/envs/humanoids/stompymicro_config.py
index c9ad10b8..d0f3c2ae 100644
--- a/sim/envs/humanoids/stompymicro_config.py
+++ b/sim/envs/humanoids/stompymicro_config.py
@@ -1,5 +1,7 @@
 """Defines the environment configuration for the Getting up task"""
 
+import numpy as np
+
 from sim.env import robot_urdf_path
 from sim.envs.base.legged_robot_config import (  # type: ignore
     LeggedRobotCfg,
@@ -148,7 +150,7 @@ class ranges:
             lin_vel_x = [-0.05, 0.23]  # min max [m/s]
             lin_vel_y = [-0.04, 0.04]  # min max [m/s]
             ang_vel_yaw = [-0.1, 0.1]  # min max [rad/s]
-            heading = [-3.14, 3.14]
+            heading = [-np.pi, np.pi]  # min max [rad]
 
     class rewards:
         base_height_target = Robot.height

From f50edf4103459dbde22b9d2ebe71a17ffe77c945 Mon Sep 17 00:00:00 2001
From: Henri <henri123lemoine@gmail.com>
Date: Mon, 11 Nov 2024 16:30:28 -0500
Subject: [PATCH 9/9] minor refactor. Added random command, removed RENDER for
 headless argument, added env_cfg to cmd manager params

---
 sim/play.py              | 24 ++++++++++---------
 sim/utils/cmd_manager.py | 52 +++++++++++++++++++++++++++++++++-------
 sim/utils/helpers.py     |  9 +++++--
 3 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/sim/play.py b/sim/play.py
index 055a9f4f..32052bec 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -30,9 +30,6 @@
 
 logger = logging.getLogger(__name__)
 
-DEFAULT_COMMAND = [0.3, 0.0, 0.0, 0.0]
-CMD_MODE = "oscillating"
-
 
 def play(args: argparse.Namespace) -> None:
     logger.info("Configuring environment and training settings...")
@@ -67,7 +64,6 @@ def play(args: argparse.Namespace) -> None:
     policy = ppo_runner.get_inference_policy(device=env.device)
 
     # Export policy if needed
-    EXPORT_POLICY = True
     if EXPORT_POLICY:
         path = os.path.join(".")
         export_policy_as_jit(ppo_runner.alg.actor_critic, path)
@@ -85,6 +81,7 @@ def play(args: argparse.Namespace) -> None:
     joint_index = 1
     stop_state_log = 1000
     now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+
     if args.log_h5:
         h5_file = h5py.File(f"data{now}.h5", "w")
 
@@ -117,7 +114,7 @@ def play(args: argparse.Namespace) -> None:
             "observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32
         )  # root orientation
 
-    if RENDER:
+    if not args.headless:
         camera_properties = gymapi.CameraProperties()
         camera_properties.width = 1920
         camera_properties.height = 1080
@@ -151,6 +148,7 @@ def play(args: argparse.Namespace) -> None:
         mode=CMD_MODE,
         default_cmd=DEFAULT_COMMAND,
         device=env.device,
+        env_cfg=env_cfg
     )
 
     for t in tqdm(range(stop_state_log)):
@@ -163,7 +161,7 @@ def play(args: argparse.Namespace) -> None:
         obs, critic_obs, rews, dones, infos = env.step(actions.detach())
         print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
 
-        if RENDER:
+        if not args.headless:
             env.gym.fetch_results(env.sim, True)
             env.gym.step_graphics(env.sim)
             env.gym.render_all_camera_sensors(env.sim)
@@ -172,7 +170,10 @@ def play(args: argparse.Namespace) -> None:
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
             robot_positions = env.root_states[:, 0:3].cpu().numpy()
             actual_vels = np.stack([env.base_lin_vel[:, 0].cpu().numpy(), env.base_lin_vel[:, 1].cpu().numpy()], axis=1)
-            cmd_manager.draw(env.gym, env.viewer, env.envs, robot_positions, actual_vels)
+
+            if args.command_arrow:
+                cmd_manager.draw(env.gym, env.viewer, env.envs, robot_positions, actual_vels)
+
             video.write(img[..., :3])
 
         # Log states
@@ -224,7 +225,7 @@ def play(args: argparse.Namespace) -> None:
     env_logger.print_rewards()
     env_logger.plot_states()
 
-    if RENDER:
+    if not args.headless:
         video.release()
 
     if args.log_h5:
@@ -233,11 +234,12 @@ def play(args: argparse.Namespace) -> None:
 
 
 if __name__ == "__main__":
-    RENDER = True
-    FIX_COMMAND = True
-
+    EXPORT_POLICY = True
     EXPORT_ONNX = True
 
+    DEFAULT_COMMAND = [0.3, 0.0, 0.0, 0.0]
+    CMD_MODE = "random"  # options: "fixed", "oscillating", "random", "keyboard"
+
     base_args = get_args()
     parser = argparse.ArgumentParser(description="Extend base arguments with log_h5")
     parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging")
diff --git a/sim/utils/cmd_manager.py b/sim/utils/cmd_manager.py
index 2f5243b0..d223539d 100644
--- a/sim/utils/cmd_manager.py
+++ b/sim/utils/cmd_manager.py
@@ -11,6 +11,7 @@ class CommandMode(Enum):
     FIXED = "fixed"
     OSCILLATING = "oscillating"
     KEYBOARD = "keyboard"
+    RANDOM = "random"
 
 
 class CommandManager:
@@ -19,9 +20,10 @@ class CommandManager:
     def __init__(
         self,
         num_envs: int = 1,
-        mode: str = "oscillating",
+        mode: str = "fixed",
         default_cmd: List[float] = [0.3, 0.0, 0.0, 0.0],
         device="cpu",
+        env_cfg=None,
     ):
         self.num_envs = num_envs
         self.mode = CommandMode(mode)
@@ -29,15 +31,32 @@ def __init__(
         self.default_cmd = torch.tensor(default_cmd, device=self.device)
         self.commands = self.default_cmd.repeat(num_envs, 1)
         self.time = 0
+        self.env_cfg = env_cfg
 
         # Mode-specific parameters
         if self.mode == CommandMode.OSCILLATING:
-            self.osc_period = 10.0
-            self.osc_amplitude = 0.3
+            self.osc_period = 5.0  # secs
+            self.min_x_vel = env_cfg.commands.ranges.lin_vel_x[0] if env_cfg else 0.0
+            self.max_x_vel = env_cfg.commands.ranges.lin_vel_x[1] if env_cfg else 0.3
+            self.osc_amplitude = (self.max_x_vel - self.min_x_vel) / 2
+            self.osc_offset = (self.max_x_vel + self.min_x_vel) / 2
+        elif self.mode == CommandMode.RANDOM:
+            self.cmd_ranges = {
+                'lin_vel_x': env_cfg.commands.ranges.lin_vel_x,
+                'lin_vel_y': env_cfg.commands.ranges.lin_vel_y,
+                'ang_vel_yaw': env_cfg.commands.ranges.ang_vel_yaw,
+                'heading': env_cfg.commands.ranges.heading
+            } if env_cfg else {
+                'lin_vel_x': [-0.05, 0.23],
+                'lin_vel_y': [-0.05, 0.05],
+                'ang_vel_yaw': [-0.5, 0.5],
+                'heading': [-np.pi, np.pi]
+            }
+            self.resampling_time = env_cfg.commands.resampling_time if env_cfg else 8.0
+            self.last_sample_time = 0.0
         elif self.mode == CommandMode.KEYBOARD:
             try:
                 import pygame
-
                 pygame.init()
                 pygame.display.set_mode((100, 100))
                 self.x_vel_cmd = 0.0
@@ -50,14 +69,31 @@ def __init__(
     def update(self, dt: float) -> torch.Tensor:
         """Updates and returns commands based on current mode."""
         self.time += dt
+
         if self.mode == CommandMode.FIXED:
             return self.commands
-
         elif self.mode == CommandMode.OSCILLATING:
-            # Oscillate x velocity
-            x_vel = self.osc_amplitude * torch.sin(torch.tensor(2 * np.pi * self.time / self.osc_period))
+            # Oscillate x velocity between min and max
+            x_vel = self.osc_offset + self.osc_amplitude * torch.sin(
+                torch.tensor(2 * np.pi * self.time / self.osc_period)
+            )
             self.commands[:, 0] = x_vel.to(self.device)
-
+        elif self.mode == CommandMode.RANDOM:
+            if self.time - self.last_sample_time >= self.resampling_time:
+                self.last_sample_time = self.time
+                # Generate random commands within training ranges
+                new_commands = torch.tensor([
+                    np.random.uniform(*self.cmd_ranges['lin_vel_x']),
+                    np.random.uniform(*self.cmd_ranges['lin_vel_y']),
+                    0.0,
+                    np.random.uniform(*self.cmd_ranges['heading'])
+                ], device=self.device) if self.env_cfg and self.env_cfg.commands.heading_command else torch.tensor([
+                    np.random.uniform(*self.cmd_ranges['lin_vel_x']),
+                    np.random.uniform(*self.cmd_ranges['lin_vel_y']),
+                    np.random.uniform(*self.cmd_ranges['ang_vel_yaw']),
+                    0.0
+                ], device=self.device)
+                self.commands = new_commands.repeat(self.num_envs, 1)
         elif self.mode == CommandMode.KEYBOARD:
             self._handle_keyboard_input()
             self.commands[:, 0] = torch.tensor(self.x_vel_cmd, device=self.device)
diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py
index 1f4a13cb..f1530163 100755
--- a/sim/utils/helpers.py
+++ b/sim/utils/helpers.py
@@ -233,6 +233,12 @@ def get_args() -> argparse.Namespace:
             "default": False,
             "help": "Enable HDF5 logging",
         },
+        {
+            "name": "--command-arrow",
+            "action": "store_true",
+            "default": False,
+            "help": "Draw command and velocity arrows during visualization",
+        },
     ]
     # parse arguments
     args = gymutil.parse_arguments(description="RL Policy", custom_parameters=custom_parameters)
@@ -276,7 +282,6 @@ def draw_vector(
     direction: Tuple[float, float],
     color: Tuple[float, float, float],
     clear_lines: bool = False,
-    height: float = 0.2,
     head_scale: float = 0.1,
 ) -> None:
     """Draws a single vector with an arrowhead."""
@@ -285,7 +290,7 @@ def draw_vector(
 
     # Unpack direction and create start position
     vel_x, vel_y = direction
-    start = gymapi.Vec3(start_pos[0], start_pos[1], start_pos[2] + height)
+    start = gymapi.Vec3(start_pos[0], start_pos[1], start_pos[2])
 
     # Scale arrow length by magnitude
     vel_magnitude = np.sqrt(vel_x**2 + vel_y**2)