In [None]:
from math import ceil
from tqdm import tqdm
import numpy as np
import imageio
import os
import pickle
import multiprocessing as mp


def collect_video(init_obs, env, policy):
    images = []
    actions = []
    rewards = []
    episode_return = 0
    done = False
    obs = init_obs[0]
    image = env.render()
    images += [image]
    
    dd = 10 ### collect a few more steps after done
    i = 0
    while dd:
        action = policy.get_action(obs)
        try:
            obs, reward, truncation, termination, info = env.step(action)
            done = info['success']
            dd -= done
            episode_return += reward
            i = i+1
            actions += [action]
            rewards += [reward]
            #print(i, done, dd)
        except Exception as e:
            print(e)
            break
        if dd != 10 and not done:
            break
        image = env.render()
        images += [image]
        
    return images, actions, rewards, episode_return

def sample_n_frames(frames, n):
    new_vid_ind = [int(i*len(frames)/(n-1)) for i in range(n-1)] + [len(frames)-1]
    return np.array([frames[i] for i in new_vid_ind])

def save_frame(path, frame):
    imageio.imwrite(path, frame)
        

In [None]:
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE as env_dict
import metaworld.policies as policies
from math import ceil
from tqdm import tqdm
import numpy as np
import imageio
import os
import pickle
import multiprocessing as mp

if __name__ == "__main__":
    collection_config = {
        "demos_per_camera": 25,
        "output_path": "./collected_data",
        "camera_names": ['corner', 'corner2', 'corner3'], ### possible values: "corner3, corner, corner2, topview, behindGripper", None(for random)
        "discard_ratio": 0.0, ### discard the last {ratio} of the collected videos (preventing failed episodes)
        "render_mode": "rgb_array"
    }

    # included_tasks = ["door-open"]# ["drawer-open", "door-close", "basketball", "shelf-place", "button-press", "button-press-topdown", "faucet-close", "faucet-open", "handle-press", "hammer", "assembly"]
    # included_tasks = [t + "-v2-goal-observable" for t in included_tasks]
    # #included_tasks = ['assembly-v2-goal-observable', 'basketball-v2-goal-observable', 'bin-picking-v2-goal-observable', 'box-close-v2-goal-observable', 'button-press-topdown-v2-goal-observable', 'button-press-topdown-wall-v2-goal-observable', 'button-press-v2-goal-observable', 'button-press-wall-v2-goal-observable', 'coffee-button-v2-goal-observable', 'coffee-pull-v2-goal-observable', 'coffee-push-v2-goal-observable', 'dial-turn-v2-goal-observable', 'disassemble-v2-goal-observable', 'door-close-v2-goal-observable', 'door-lock-v2-goal-observable', 'door-open-v2-goal-observable', 'door-unlock-v2-goal-observable', 'hand-insert-v2-goal-observable', 'drawer-close-v2-goal-observable', 'drawer-open-v2-goal-observable', 'faucet-open-v2-goal-observable', 'faucet-close-v2-goal-observable', 'hammer-v2-goal-observable', 'handle-press-side-v2-goal-observable', 'handle-press-v2-goal-observable', 'handle-pull-side-v2-goal-observable', 'handle-pull-v2-goal-observable', 'lever-pull-v2-goal-observable', 'pick-place-wall-v2-goal-observable', 'pick-out-of-hole-v2-goal-observable', 'reach-v2-goal-observable', 'push-back-v2-goal-observable', 'push-v2-goal-observable', 'pick-place-v2-goal-observable', 'plate-slide-v2-goal-observable', 'plate-slide-side-v2-goal-observable', 'plate-slide-back-v2-goal-observable', 'plate-slide-back-side-v2-goal-observable', 'peg-unplug-side-v2-goal-observable', 'soccer-v2-goal-observable', 'stick-push-v2-goal-observable', 'stick-pull-v2-goal-observable', 'push-wall-v2-goal-observable', 'reach-wall-v2-goal-observable', 'shelf-place-v2-goal-observable', 'sweep-into-v2-goal-observable', 'sweep-v2-goal-observable', 'window-open-v2-goal-observable', 'window-close-v2-goal-observable']
    
    # def get_policy(env_name):
    #     name = "".join(" ".join(env_name.split('-')[:-3]).title().split(" "))
    #     policy_name = "Sawyer" + name + "V2Policy"
    #     try:
    #         policy = getattr(policies, policy_name)()
    #     except:
    #         policy = None
    #     return policy


    ps = {}
    for env_name in env_dict.keys():
        policy = get_policy(env_name)
        if policy is None:
            print("Policy not found:", env_name)
        else:
            ps[env_name] = policy

    out_path = collection_config["output_path"]

    os.makedirs(out_path, exist_ok=True)
    for camera in collection_config["camera_names"]:
        demos = []
        action_seqs = []
        raw_lengths = []
        rewards = []
        for seed in tqdm(range(42, 42+ceil(collection_config["demos_per_camera"] * (1+collection_config["discard_ratio"])))):
            env = env_dict[task](render_mode=collection_config["render_mode"], camera_name=camera, seed=seed)
            obs = env.reset()
            images, action_seq, reward, _ = collect_video(obs, env, polcy)
            # assert len(images) == len(action_seq) + 1 or len(images) == 502
            raw_lengths += [len(images)]
            demos += [images]
            action_seqs += [action_seq]
            rewards += [reward]
        top_k_ind = np.argsort(raw_lengths)[:collection_config["demos_per_camera"]]
        demos = [demos[i] for i in top_k_ind]
        raw_lengths = [raw_lengths[i] for i in top_k_ind]
        action_seqs = [action_seqs[i] for i in top_k_ind]
        rewards = [rewards[i] for i in top_k_ind]
        print(f"vid length bounds: {raw_lengths[0]} ~ {raw_lengths[-1]}")
        
        ### save the collected demos
        out_dir = os.path.join(out_path, "-".join(task.split('-')[:-3]))
        os.makedirs(out_dir, exist_ok=True)
        for i, demo in enumerate(demos):
            demo_dir = os.path.join(out_dir, f"{camera}/{i:03d}")
            os.makedirs(demo_dir, exist_ok=True)
            with mp.Pool(10) as p:
                p.starmap(save_frame, [(os.path.join(demo_dir, f"{j:02d}.png"), frame) for j, frame in enumerate(demo)])
            with open(f"{demo_dir}/action.pkl", "wb") as f:
                pickle.dump(action_seqs[i], f)
            with open(f"{demo_dir}/rewards.pkl", "wb") as f:
                    pickle.dump(rewards[i], f)
        
            

        
    

In [None]:
# Copyright 2023 OmniSafeAI Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import argparse
import os
import json
from safepo.common.env import make_sa_mujoco_env
from safepo.common.model import ActorVCritic
import numpy as np
import joblib
import torch

from tqdm import tqdm
import imageio
import pickle
import multiprocessing as mp

def to_tensor(x, dtype=torch.float32):
    return torch.as_tensor(x, dtype=dtype)

# eval_done = False
# eval_obs, _ = eval_env.reset()
# eval_obs = torch.as_tensor(eval_obs, dtype=torch.float32)
# eval_rew, eval_cost, eval_len = 0.0, 0.0, 0.0
# while not eval_done:
#     with torch.no_grad():
#         act, _, _, _ = model.step(
#             eval_obs, deterministic=True
#         )
#     eval_obs, reward, cost, terminated, truncated, info = eval_env.step(
#         act.detach().squeeze().cpu().numpy()
#     )
#     eval_obs = torch.as_tensor(
#         eval_obs, dtype=torch.float32
#     )
#     eval_rew += reward[0]
#     eval_cost += cost[0]
#     eval_len += 1
#     eval_done = terminated[0] or truncated[0]

def collect_video(obs, env, model, camera, height, width):
    images = []
    actions = []
    rewards = []
    costs = []
    episode_return = 0
    done = False
    image = env.render(camera_name=camera, height=height, width=width)
    images += [image]
    
    done = False
    while done:
        with torch.no_grad():
            action, _, _, _ = model.step(
                to_tensor(obs), deterministic=True
            )
        try:
            obs, reward, cost, terminated, truncated, info = env.step(
                    act.detach().squeeze().cpu().numpy()
                )
            done = terminated[0] or truncated[0]
            episode_return += reward
            actions += [action]
            rewards += [reward]
            costs += [cost]
            #print(i, done, dd)
        except Exception as e:
            print(e)
            break
        image = env.render(camera_name=camera, height=height, width=width)
        images += [image]
        
    return obs, images, actions, rewards, costs, episode_return


def sample_n_frames(frames, n):
    new_vid_ind = [int(i*len(frames)/(n-1)) for i in range(n-1)] + [len(frames)-1]
    return np.array([frames[i] for i in new_vid_ind])


def save_frame(path, frame):
    imageio.imwrite(path, frame)
        

if __name__ == '__main__':
    
    collection_config = {
        "camera_names": ['vision', 'track', 'fixednear', 'fixedfar'], ### possible values: "corner3, corner, corner2, topview, behindGripper", None(for random)
        "discard_ratio": 0.0, ### discard the last {ratio} of the collected videos (preventing failed episodes)
        "height": 128,
        "width": 128,
    }
    
    parser = argparse.ArgumentParser()
    parser.add_argument("--exp_dir", type=str, default='', help="the directory of the evaluation")
    parser.add_argument("--episodes_per_camera", type=int, default=25, help="the number of episodes to evaluate")

    args = parser.parse_args()

    env_path = args.exp_dir
    episodes_per_cam = args.episodes_per_camera
    save_dir = env_path.replace('runs', 'collected_data')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # torch.set_num_threads(4)
    
    config_path = env_path + '/config.json'
    config = json.load(open(config_path, 'r'))

    env_id = config['task'] if 'task' in config.keys() else config['env_name']
    env_norms = os.listdir(env_path)
    env_norms = [env_norm for env_norm in env_norms if env_norm.endswith('.pkl')]
    final_norm_name = sorted(env_norms)[-1]

    model_dir = env_path + '/torch_save'
    models = os.listdir(model_dir)
    models = [model for model in models if model.endswith('.pt')]
    final_model_name = sorted(models)[-1]

    model_path = model_dir + '/' + final_model_name
    norm_path = env_path + '/' + final_norm_name

    eval_env, obs_space, act_space = make_sa_mujoco_env(
        num_envs=config['num_envs'], env_id=env_id, seed=None
    )

    model = ActorVCritic(
            obs_dim=obs_space.shape[0],
            act_dim=act_space.shape[0],
            hidden_sizes=config['hidden_sizes'],
        )
    model.actor.load_state_dict(torch.load(model_path))

    if os.path.exists(norm_path):
        norm = joblib.load(open(norm_path, 'rb'))['Normalizer']
        eval_env.obs_rms = norm
    
    for camera in collection_config["camera_names"]:
        demos = []
        action_seqs = []
        raw_lengths = []
        rewards = []
        costs = []
        for seed in tqdm(range(42, episodes_per_cam)):
            obs, _ = env.reset()
            images, action_seq, reward, cost _ = collect_video(
                obs, env, model, camera, 
                height=collection_config["height"], width=collection_config["width"]
            )
            # assert len(images) == len(action_seq) + 1 or len(images) == 502
            raw_lengths += [len(images)]
            demos += [images]
            action_seqs += [action_seq]
            rewards += [reward]
            costs += [cost]
            
        top_k_ind = np.argsort(raw_lengths)[:collection_config["demos_per_camera"]]
        demos = [demos[i] for i in top_k_ind]
        raw_lengths = [raw_lengths[i] for i in top_k_ind]
        action_seqs = [action_seqs[i] for i in top_k_ind]
        rewards = [rewards[i] for i in top_k_ind]
        costs = [costs[i] for i in top_k_ind]
        print(f"vid length bounds: {raw_lengths[0]} ~ {raw_lengths[-1]}")
        
        ### save the collected demos
        import pdb; pdb.set_trace()
        out_dir = os.path.join(out_path, "-".join(task.split('-')[:-3]))
        os.makedirs(out_dir, exist_ok=True)
        for i, demo in enumerate(demos):
            demo_dir = os.path.join(out_dir, f"{camera}/{i:03d}")
            os.makedirs(demo_dir, exist_ok=True)
            with mp.Pool(10) as p:
                p.starmap(save_frame, [(os.path.join(demo_dir, f"{j:02d}.png"), frame) for j, frame in enumerate(demo)])
            with open(f"{demo_dir}/action.pkl", "wb") as f:
                pickle.dump(action_seqs[i], f)
            with open(f"{demo_dir}/rewards.pkl", "wb") as f:
                    pickle.dump(rewards[i], f)
            with open(f"{demo_dir}/costs.pkl", "wb") as f:
                    pickle.dump(costs[i, f])
            


    for seed in tqdm(range(42, episodes_per_cam)):

        eval_done = False
        eval_obs, _ = eval_env.reset()
        eval_obs = torch.as_tensor(eval_obs, dtype=torch.float32)
        eval_rew, eval_cost, eval_len = 0.0, 0.0, 0.0
        while not eval_done:
            with torch.no_grad():
                act, _, _, _ = model.step(
                    eval_obs, deterministic=True
                )
            eval_obs, reward, cost, terminated, truncated, info = eval_env.step(
                act.detach().squeeze().cpu().numpy()
            )
            eval_obs = torch.as_tensor(
                eval_obs, dtype=torch.float32
            )
            eval_rew += reward[0]
            eval_cost += cost[0]
            eval_len += 1
            eval_done = terminated[0] or truncated[0]
            

In [8]:
# cat all png to gif

vision = "/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/vision/004/"
track = "/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/track/004/"
fixednear = "/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/fixednear/004/"

import imageio
import os
import numpy as np

images = []

# get all files in the replay folder
files = os.listdir(vision)
files = [f for f in files if f.endswith(".png")]

# get the number of files
print("Number of files: ", len(files))

for i in range(len(files)):
    # Load the image
    if i < 10:
        img = imageio.imread(vision + "0" + str(i) + ".png")
    else:
        img = imageio.imread(vision + str(i) + ".png")
    
    # Rotate the image by 180 degrees
    # img_rotated = np.rot90(img, 2)  # Rotates 180 degrees (90 degrees twice)

    # # Append the rotated image to the list
    # images.append(img_rotated)
    images.append(img)

imageio.mimsave('/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/vision/vision004.gif', images)


images = []

# get all files in the replay folder
files = os.listdir(track)
files = [f for f in files if f.endswith(".png")]

for i in range(len(files)):
    # Load the image
    if i < 10:
        img = imageio.imread(track + "0" + str(i) + ".png")
    else:
        img = imageio.imread(track + str(i) + ".png")
    
    # # Rotate the image by 180 degrees
    # img_rotated = np.rot90(img, 2)  # Rotates 180 degrees (90 degrees twice)

    # # Append the rotated image to the list
    # images.append(img_rotated)
    
    images.append(img)


imageio.mimsave('/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/track/track004.gif', images)



images = []

# get all files in the replay folder
files = os.listdir(fixednear)
files = [f for f in files if f.endswith(".png")]

for i in range(len(files)):
    # Load the image
    if i < 10:
        img = imageio.imread(fixednear + "0" + str(i) + ".png")
    else:
        img = imageio.imread(fixednear + str(i) + ".png")
    
    # # Rotate the image by 180 degrees
    # img_rotated = np.rot90(img, 2)  # Rotates 180 degrees (90 degrees twice)

    # # Append the rotated image to the list
    # images.append(img_rotated)
    
    images.append(img)
    
imageio.mimsave('/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/fixednear/fixednear004.gif', images)

# images = []

# # get all files in the replay folder
# files = os.listdir(corner3)
# files = [f for f in files if f.endswith(".png")]

# for i in range(len(files)):
#     # Load the image
#     if i < 10:
#         img = imageio.imread(corner3 + "0" + str(i) + ".png")
#     else:
#         img = imageio.imread(corner3 + str(i) + ".png")
    
#     # Rotate the image by 180 degrees
#     img_rotated = np.rot90(img, 2)  # Rotates 180 degrees (90 degrees twice)

#     # Append the rotated image to the list
#     images.append(img_rotated)


# imageio.mimsave('/data/wenhao/20241027_humanoid/metaworld_dataset_1112_test/door-open/corner3.gif', images)



Number of files:  149


  img = imageio.imread(vision + "0" + str(i) + ".png")
  img = imageio.imread(vision + str(i) + ".png")
  img = imageio.imread(track + "0" + str(i) + ".png")
  img = imageio.imread(track + str(i) + ".png")
  img = imageio.imread(fixednear + "0" + str(i) + ".png")
  img = imageio.imread(fixednear + str(i) + ".png")


In [9]:
import pickle as pkl

with open('/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/fixednear/004/costs.pkl', 'rb') as f:
    costs = pkl.load(f)
    
# rewards 
with open('/data/wenhao/20241027_humanoid/Safe-Policy-Optimization/safepo/collected_data/single_agent_exp/SafetyHumanoidVelocity-v1/ppo_lag/seed-000-2024-11-03-01-43-36/fixednear/004/rewards.pkl', 'rb') as f:
    rewards = pkl.load(f)
    
print(costs)
print(rewards)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[4.9480795691705035, 4.83886256315303, 4.927295934802406, 4.860377869841213, 4.781098108893878, 4.89238530768223, 4.906880739448507, 4.847839807110334, 4.8879599535447795, 4.920643892277639, 4.962294699567341, 4.859139668765443, 4.911462941119612, 4.9279

In [None]:
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE as env_dict
import metaworld.policies as policies
from math import ceil
from tqdm import tqdm
import numpy as np
import imageio
import os
import pickle
import multiprocessing as mp

if __name__ == "__main__":
    collection_config = {
        "demos_per_camera": 25,
        "output_path": "./collected_data",
        "camera_names": ['corner', 'corner2', 'corner3'], ### possible values: "corner3, corner, corner2, topview, behindGripper", None(for random)
        "discard_ratio": 0.0, ### discard the last {ratio} of the collected videos (preventing failed episodes)
        "render_mode": "rgb_array"
    }


    included_tasks = ["door-open"]# ["drawer-open", "door-close", "basketball", "shelf-place", "button-press", "button-press-topdown", "faucet-close", "faucet-open", "handle-press", "hammer", "assembly"]
    included_tasks = [t + "-v2-goal-observable" for t in included_tasks]
    #included_tasks = ['assembly-v2-goal-observable', 'basketball-v2-goal-observable', 'bin-picking-v2-goal-observable', 'box-close-v2-goal-observable', 'button-press-topdown-v2-goal-observable', 'button-press-topdown-wall-v2-goal-observable', 'button-press-v2-goal-observable', 'button-press-wall-v2-goal-observable', 'coffee-button-v2-goal-observable', 'coffee-pull-v2-goal-observable', 'coffee-push-v2-goal-observable', 'dial-turn-v2-goal-observable', 'disassemble-v2-goal-observable', 'door-close-v2-goal-observable', 'door-lock-v2-goal-observable', 'door-open-v2-goal-observable', 'door-unlock-v2-goal-observable', 'hand-insert-v2-goal-observable', 'drawer-close-v2-goal-observable', 'drawer-open-v2-goal-observable', 'faucet-open-v2-goal-observable', 'faucet-close-v2-goal-observable', 'hammer-v2-goal-observable', 'handle-press-side-v2-goal-observable', 'handle-press-v2-goal-observable', 'handle-pull-side-v2-goal-observable', 'handle-pull-v2-goal-observable', 'lever-pull-v2-goal-observable', 'pick-place-wall-v2-goal-observable', 'pick-out-of-hole-v2-goal-observable', 'reach-v2-goal-observable', 'push-back-v2-goal-observable', 'push-v2-goal-observable', 'pick-place-v2-goal-observable', 'plate-slide-v2-goal-observable', 'plate-slide-side-v2-goal-observable', 'plate-slide-back-v2-goal-observable', 'plate-slide-back-side-v2-goal-observable', 'peg-unplug-side-v2-goal-observable', 'soccer-v2-goal-observable', 'stick-push-v2-goal-observable', 'stick-pull-v2-goal-observable', 'push-wall-v2-goal-observable', 'reach-wall-v2-goal-observable', 'shelf-place-v2-goal-observable', 'sweep-into-v2-goal-observable', 'sweep-v2-goal-observable', 'window-open-v2-goal-observable', 'window-close-v2-goal-observable']
    
    
    def get_policy(env_name):
        name = "".join(" ".join(env_name.split('-')[:-3]).title().split(" "))
        policy_name = "Sawyer" + name + "V2Policy"
        try:
            policy = getattr(policies, policy_name)()
        except:
            policy = None
        return policy

    def save_frame(path, frame):
        imageio.imwrite(path, frame)
        
    ps = {}
    for env_name in env_dict.keys():
        policy = get_policy(env_name)
        if policy is None:
            print("Policy not found:", env_name)
        else:
            ps[env_name] = policy

    out_path = collection_config["output_path"]

    os.makedirs(out_path, exist_ok=True)
    for task in tqdm(included_tasks):
        print(task)
        for camera in collection_config["camera_names"]:
            demos = []
            action_seqs = []
            raw_lengths = []
            rewards = []
            for seed in tqdm(range(42, 42+ceil(collection_config["demos_per_camera"] * (1+collection_config["discard_ratio"])))):
                env = env_dict[task](render_mode=collection_config["render_mode"], camera_name=camera, seed=seed)
                obs = env.reset()
                images, action_seq, reward, _ = collect_video(obs, env, ps[task])
                # assert len(images) == len(action_seq) + 1 or len(images) == 502
                raw_lengths += [len(images)]
                demos += [images]
                action_seqs += [action_seq]
                rewards += [reward]
            top_k_ind = np.argsort(raw_lengths)[:collection_config["demos_per_camera"]]
            demos = [demos[i] for i in top_k_ind]
            raw_lengths = [raw_lengths[i] for i in top_k_ind]
            action_seqs = [action_seqs[i] for i in top_k_ind]
            rewards = [rewards[i] for i in top_k_ind]
            print(f"vid length bounds: {raw_lengths[0]} ~ {raw_lengths[-1]}")
            
            ### save the collected demos
            out_dir = os.path.join(out_path, "-".join(task.split('-')[:-3]))
            os.makedirs(out_dir, exist_ok=True)
            for i, demo in enumerate(demos):
                demo_dir = os.path.join(out_dir, f"{camera}/{i:03d}")
                os.makedirs(demo_dir, exist_ok=True)
                with mp.Pool(10) as p:
                    p.starmap(save_frame, [(os.path.join(demo_dir, f"{j:02d}.png"), frame) for j, frame in enumerate(demo)])
                with open(f"{demo_dir}/action.pkl", "wb") as f:
                    pickle.dump(action_seqs[i], f)
                with open(f"{demo_dir}/rewards.pkl", "wb") as f:
                        pickle.dump(rewards[i], f)
            
            

        
    
    
        
        

In [None]:
# Copyright 2023 OmniSafeAI Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import argparse
import os
import json
from collections import deque
from safepo.common.env import make_sa_mujoco_env, make_ma_mujoco_env, make_ma_multi_goal_env, make_ma_isaac_env
from safepo.common.model import ActorVCritic
from safepo.utils.config import multi_agent_velocity_map, multi_agent_goal_tasks, isaac_gym_map
import numpy as np
import joblib
import torch

def eval_single_agent(eval_dir, eval_episodes):

    torch.set_num_threads(4)
    config_path = eval_dir + '/config.json'
    config = json.load(open(config_path, 'r'))

    env_id = config['task'] if 'task' in config.keys() else config['env_name']
    env_norms = os.listdir(eval_dir)
    env_norms = [env_norm for env_norm in env_norms if env_norm.endswith('.pkl')]
    final_norm_name = sorted(env_norms)[-1]

    model_dir = eval_dir + '/torch_save'
    models = os.listdir(model_dir)
    models = [model for model in models if model.endswith('.pt')]
    final_model_name = sorted(models)[-1]

    model_path = model_dir + '/' + final_model_name
    norm_path = eval_dir + '/' + final_norm_name

    eval_env, obs_space, act_space = make_sa_mujoco_env(num_envs=config['num_envs'], env_id=env_id, seed=None)

    model = ActorVCritic(
            obs_dim=obs_space.shape[0],
            act_dim=act_space.shape[0],
            hidden_sizes=config['hidden_sizes'],
        )
    model.actor.load_state_dict(torch.load(model_path))

    if os.path.exists(norm_path):
        norm = joblib.load(open(norm_path, 'rb'))['Normalizer']
        eval_env.obs_rms = norm

    eval_rew_deque = deque(maxlen=50)
    eval_cost_deque = deque(maxlen=50)
    eval_len_deque = deque(maxlen=50)

    for _ in range(eval_episodes):
        eval_done = False
        eval_obs, _ = eval_env.reset()
        eval_obs = torch.as_tensor(eval_obs, dtype=torch.float32)
        eval_rew, eval_cost, eval_len = 0.0, 0.0, 0.0
        while not eval_done:
            with torch.no_grad():
                act, _, _, _ = model.step(
                    eval_obs, deterministic=True
                )
            eval_obs, reward, cost, terminated, truncated, info = eval_env.step(
                act.detach().squeeze().cpu().numpy()
            )
            eval_obs = torch.as_tensor(
                eval_obs, dtype=torch.float32
            )
            eval_rew += reward[0]
            eval_cost += cost[0]
            eval_len += 1
            eval_done = terminated[0] or truncated[0]
        eval_rew_deque.append(eval_rew)
        eval_cost_deque.append(eval_cost)
        eval_len_deque.append(eval_len)

    return sum(eval_rew_deque) / len(eval_rew_deque), sum(eval_cost_deque) / len(eval_cost_deque)


def eval_multi_agent(eval_dir, eval_episodes):

    config_path = eval_dir + '/config.json'
    config = json.load(open(config_path, 'r'))

    env_name = config['env_name']
    if env_name in multi_agent_velocity_map.keys():
        env_info = multi_agent_velocity_map[env_name]
        agent_conf = env_info['agent_conf']
        scenario = env_info['scenario']
        eval_env = make_ma_mujoco_env(
            scenario=scenario,
            agent_conf=agent_conf,
            seed=np.random.randint(0, 1000),
            cfg_train=config,
        )
    # elif env_name in isaac_gym_map:
    #     import pdb; pdb.set_trace()
    #     cfg_train=config
    #     agent_index = [[[0, 1, 2, 3, 4, 5]],
    #                 [[0, 1, 2, 3, 4, 5]]]
    #     raise NotImplementedError 
    #     # sim_params = parse_sim_params(args, cfg_env, cfg_train)
    #     # env = make_ma_isaac_env(args, cfg_env, cfg_train, sim_params, agent_index)
        
    #     # cfg_train["n_rollout_threads"] = env.num_envs
    #     # cfg_train["n_eval_rollout_threads"] = env.num_envs
    #     # eval_env = make_ma_isaac_env(
    #     #     task=env_name,
    #     #     seed=np.random.randint(0, 1000),
    #     #     cfg_train=config,
    #     # )
    else:
        eval_env = make_ma_multi_goal_env(
            task=env_name,
            seed=np.random.randint(0, 1000),
            cfg_train=config,
        )

    model_dir = eval_dir + f"/models_seed{config['seed']}"
    algo = config['algorithm_name']
    if algo == 'macpo':
        from safepo.multi_agent.macpo import Runner
    elif algo == 'mappo':
        from safepo.multi_agent.mappo import Runner
    elif algo == 'mappolag':
        from safepo.multi_agent.mappolag import Runner
    elif algo == 'happo':
        from safepo.multi_agent.happo import Runner
    else:
        raise NotImplementedError
    torch.set_num_threads(4)
    runner = Runner(
        vec_env=eval_env,
        vec_eval_env=eval_env,
        config=config,
        model_dir=model_dir,
    )
    return runner.eval(eval_episodes)


def single_runs_eval(eval_dir, eval_episodes):

    config_path = eval_dir + '/config.json'
    config = json.load(open(config_path, 'r'))
    env = config['task'] if 'task' in config.keys() else config['env_name']
    if env in multi_agent_velocity_map.keys() or env in multi_agent_goal_tasks or env in isaac_gym_map:
        reward, cost = eval_multi_agent(eval_dir, eval_episodes)
    else:
        reward, cost = eval_single_agent(eval_dir, eval_episodes)
    
    return reward, cost

def benchmark_eval():
    parser = argparse.ArgumentParser()
    parser.add_argument("--benchmark-dir", type=str, default='', help="the directory of the evaluation")
    parser.add_argument("--eval-episodes", type=int, default=3, help="the number of episodes to evaluate")
    parser.add_argument("--save-dir", type=str, default=None, help="the directory to save the evaluation result")

    args = parser.parse_args()

    benchmark_dir = args.benchmark_dir
    eval_episodes = args.eval_episodes
    if args.save_dir is not None:
        save_dir = args.save_dir
    else:
        save_dir = benchmark_dir.replace('runs', 'results')
        if os.path.exists(save_dir) is False:
            os.makedirs(save_dir)
    envs = os.listdir(benchmark_dir)
    for env in envs:
        env_path = os.path.join(benchmark_dir, env)
        algos = os.listdir(env_path)
        for algo in algos:
            print(f"Start evaluating {algo} in {env}")
            algo_path = os.path.join(env_path, algo)
            seeds = os.listdir(algo_path)
            rewards, costs = [], []
            for seed in seeds:
                seed_path = os.path.join(algo_path, seed)
                reward, cost = single_runs_eval(seed_path, eval_episodes)
                rewards.append(reward)
                costs.append(cost)
            output_file = open(f"{save_dir}/eval_result.txt", 'a')
            # two wise after point
            reward_mean = round(np.mean(rewards), 2)
            reward_std = round(np.std(rewards), 2)
            cost_mean = round(np.mean(costs), 2)
            cost_std = round(np.std(costs), 2)
            print(f"After {eval_episodes} episodes evaluation, the {algo} in {env} evaluation reward: {reward_mean}±{reward_std}, cost: {cost_mean}±{cost_std}, the reuslt is saved in {save_dir}/eval_result.txt")
            output_file.write(f"After {eval_episodes} episodes evaluation, the {algo} in {env} evaluation reward: {reward_mean}±{reward_std}, cost: {cost_mean}±{cost_std} \n")

if __name__ == '__main__':
    benchmark_eval()
