diff --git a/benchmark/generate_exp.py b/benchmark/generate_exp.py
deleted file mode 100644
index 219b3da37..000000000
--- a/benchmark/generate_exp.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import argparse
-from distutils.util import strtobool
-
-parser = argparse.ArgumentParser(description='CleanRL Experiment Submission')
-# Common arguments
-parser.add_argument('--exp-script', type=str, default="exp.sh",
-                    help='the file name of this experiment')
-parser.add_argument('--algo', type=str, default="ppo.py",
-                    help='the algorithm that will be used')
-parser.add_argument('--gym-ids', nargs='+', 
-                    help='the ids of the gym environment')
-parser.add_argument('--total-timesteps', type=int, default=int(1e9),
-                    help='total timesteps of the experiments')
-parser.add_argument('--other-args', type=str, default="",
-                    help="the entity (team) of wandb's project")
-parser.add_argument('--wandb-project-name', type=str, default="cleanRL",
-                    help="the wandb's project name")
-                    
-args = parser.parse_args()
-
-template = '''
-for seed in {{1..2}}
-do
-    (sleep 0.3 && nohup xvfb-run -a python {} \\
-    --gym-id {} \\
-    --total-timesteps {} \\
-    --wandb-project-name {} \\
-    --track \\
-    {} \\
-    --capture-video \\
-    --seed $seed
-    ) >& /dev/null &
-done
-'''
-
-final_str = ""
-for env in args.gym_ids:
-    final_str += template.format(args.algo, env, args.total_timesteps, args.wandb_project_name, args.other_args)
-
-with open(f"{args.exp_script}", "w+") as f:
-    f.write(final_str)
\ No newline at end of file
diff --git a/benchmark/generate_exp.sh b/benchmark/generate_exp.sh
deleted file mode 100644
index 3471bd62c..000000000
--- a/benchmark/generate_exp.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-python generate_exp.py --exp-script scripts/ppo_mujoco.sh \
-    --algo ppo_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda False"
-
-python generate_exp.py --exp-script scripts/ppo_pybullet.sh \
-    --algo ppo_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda False"
-
-python generate_exp.py --exp-script scripts/ppo_atari.sh \
-    --algo ppo_atari_visual.py \
-    --total-timesteps 10000000 \
-    --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/ppo_other.sh \
-    --algo ppo.py \
-    --total-timesteps 2000000 \
-    --gym-ids CartPole-v1 Acrobot-v1 MountainCar-v0 LunarLander-v2 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda False"
-
-python generate_exp.py --exp-script scripts/td3_mujoco.sh \
-    --algo td3_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-    
-python generate_exp.py --exp-script scripts/td3_pybullet.sh \
-    --algo td3_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/ddpg_mujoco.sh \
-    --algo ddpg_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-    
-python generate_exp.py --exp-script scripts/ddpg_pybullet.sh \
-    --algo ddpg_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/sac_mujoco.sh \
-    --algo sac_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-    
-python generate_exp.py --exp-script scripts/sac_pybullet.sh \
-    --algo sac_continuous_action.py \
-    --total-timesteps 2000000 \
-    --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/dqn_atari.sh \
-    --algo dqn_atari_visual.py \
-    --total-timesteps 10000000 \
-    --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/dqn_other.sh \
-    --algo dqn.py \
-    --total-timesteps 2000000 \
-    --gym-ids CartPole-v1 Acrobot-v1 MountainCar-v0 LunarLander-v2 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/c51_atari.sh \
-    --algo c51_atari_visual.py \
-    --total-timesteps 10000000 \
-    --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/c51_other.sh \
-    --algo c51.py \
-    --total-timesteps 2000000 \
-    --gym-ids CartPole-v1 Acrobot-v1 MountainCar-v0 LunarLander-v2 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/apex_dqn_atari.sh \
-    --algo apex_dqn_atari_visual.py \
-    --total-timesteps 10000000 \
-    --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/offline_dqn_atari_visual.sh \
-    --algo offline_dqn_atari_visual.py \
-    --total-timesteps 10000000 \
-    --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
-
-python generate_exp.py --exp-script scripts/offline_dqn_cql_atari_visual.sh \
-    --algo offline_dqn_cql_atari_visual.py \
-    --total-timesteps 10000000 \
-    --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True"
\ No newline at end of file
diff --git a/benchmark/jobs.py b/benchmark/jobs.py
deleted file mode 100644
index 8b641f0b8..000000000
--- a/benchmark/jobs.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# pip install boto3
-import boto3
-import re
-import time
-import os
-import requests
-import json
-import argparse
-from distutils.util import strtobool
-client = boto3.client('batch')
-
-parser = argparse.ArgumentParser(description='CleanRL Experiment Submission')
-# Common arguments
-parser.add_argument('--exp-script', type=str, default="scripts/td3_pybullet.sh",
-                   help='the name of this experiment')
-parser.add_argument('--job-queue', type=str, default="cleanrl",
-                   help='the name of the job queue')
-parser.add_argument('--wandb-key', type=str, default="",
-                   help='the wandb key. If not provided, the script will try to read the env variable `WANDB_KEY`')
-parser.add_argument('--docker-repo', type=str, default="vwxyzjn/gym-microrts:latest",
-                   help='the name of the job queue')
-parser.add_argument('--job-definition', type=str, default="cleanrl",
-                   help='the name of the job definition')
-parser.add_argument('--num-seed', type=int, default=2,
-                   help='number of random seeds for experiments')
-parser.add_argument('--num-vcpu', type=int, default=1,
-                   help='number of vcpu per experiment')
-parser.add_argument('--num-memory', type=int, default=2000,
-                   help='number of memory (MB) per experiment')
-parser.add_argument('--num-gpu', type=int, default=0,
-                   help='number of gpu per experiment')
-parser.add_argument('--num-hours', type=float, default=16.0,
-                   help='number of hours allocated experiment')
-parser.add_argument('--upload-files-baseurl', type=str, default="",
-                   help='the baseurl of your website if you decide to upload files')
-parser.add_argument('--submit-aws', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
-                    help='if toggled, script will need to be uploaded')
-args = parser.parse_args()
-
-
-# get env variable values
-if not args.wandb_key:
-    args.wandb_key = os.environ['WANDB_KEY']
-assert len(args.wandb_key) > 0, "set the environment variable `WANDB_KEY` to your WANDB API key, something like `export WANDB_KEY=fdsfdsfdsfads` "
-# extract runs from bash scripts
-final_run_cmds = []
-with open(args.exp_script) as f:
-    strings = f.read()
-runs_match = re.findall('(python)(.+)((?:\n.+)+)(seed)',strings)
-for run_match in runs_match:
-    run_match_str = "".join(run_match).replace("\\\n", "")
-    # print(run_match_str)
-    for seed in range(1,1+args.num_seed):
-        final_run_cmds += [run_match_str.replace("$seed", str(seed)).split()]
-        if args.upload_files_baseurl:
-            file_name = final_run_cmds[-1][1]
-            link = args.upload_files_baseurl + '/' + file_name
-            final_run_cmds[-1] = ['curl', '-O', link, ';'] + final_run_cmds[-1]
-
-# use docker directly
-if not args.submit_aws:
-    cores = 40
-    current_core = 0
-    for final_run_cmd in final_run_cmds:
-        print(f'docker run -d --cpuset-cpus="{current_core}" -e WANDB={args.wandb_key} {args.docker_repo} ' + 
-            '/bin/bash -c "' + " ".join(final_run_cmd) + '"')
-        current_core = (current_core + 1) % cores
-
-# submit jobs
-if args.submit_aws:
-    for final_run_cmd in final_run_cmds:
-        job_name = re.findall('(python)(.+)(.py)'," ".join(final_run_cmd))[0][1].strip() + str(int(time.time()))
-        job_name = job_name.replace("/", "_").replace("_param ", "")
-        resources_requirements = []
-        if args.num_gpu:
-            resources_requirements = [
-                {
-                    'value': str(args.num_gpu),
-                    'type': 'GPU'
-                },
-            ]
-        
-        response = client.submit_job(
-            jobName=job_name,
-            jobQueue=args.job_queue,
-            jobDefinition=args.job_definition,
-            containerOverrides={
-                'vcpus': args.num_vcpu,
-                'memory': args.num_memory,
-                'command': ["/bin/bash", "-c", " ".join(final_run_cmd)],
-                'environment': [
-                    {
-                        'name': 'WANDB',
-                        'value': args.wandb_key
-                    }
-                ],
-                'resourceRequirements': resources_requirements,
-            },
-            retryStrategy={
-                'attempts': 1
-            },
-            timeout={
-                'attemptDurationSeconds': int(args.num_hours*60*60)
-            }
-        )
-        if response['ResponseMetadata']['HTTPStatusCode'] != 200:
-            print(response)
-            raise Exception("jobs submit failure")
-
diff --git a/benchmark/jobs.sh b/benchmark/jobs.sh
deleted file mode 100644
index 01845aa14..000000000
--- a/benchmark/jobs.sh
+++ /dev/null
@@ -1,129 +0,0 @@
-SUBMIT_AWS=False
-
-python jobs.py --exp-script scripts/ppo_pybullet.sh \
-    --job-queue cleanrl \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-memory 3000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/ppo_atari.sh \
-    --job-queue cleanrl_gpu \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 4 \
-    --num-gpu 1 \
-    --num-memory 14000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/ppo_other.sh \
-    --job-queue cleanrl \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 1 \
-    --num-memory 3000 \
-    --num-hours 16.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/td3_pybullet.sh \
-    --job-queue cleanrl_gpu \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-memory 14000 \
-    --num-gpu 1 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/ddpg_pybullet.sh \
-    --job-queue cleanrl_gpu \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-memory 14000 \
-    --num-gpu 1 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/sac_pybullet.sh \
-    --job-queue cleanrl_gpu \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-memory 14000 \
-    --num-gpu 1 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/dqn_atari.sh \
-    --job-queue cleanrl_gpu_large_memory \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-gpu 1 \
-    --num-memory 63000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/dqn_other.sh \
-    --job-queue cleanrl_gpu \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-gpu 1 \
-    --num-memory 3000 \
-    --num-hours 16.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/c51_atari.sh \
-    --job-queue cleanrl_gpu_large_memory \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-gpu 1 \
-    --num-memory 63000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/c51_other.sh \
-    --job-queue cleanrl_gpu \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 2 \
-    --num-gpu 1 \
-    --num-memory 3000 \
-    --num-hours 16.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/apex_dqn_atari.sh \
-    --job-queue cleanrl_gpu_large_memory \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 16 \
-    --num-gpu 1 \
-    --num-memory 63000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/offline_dqn_atari_visual.sh \
-    --job-queue cleanrl_gpu_large_memory \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 16 \
-    --num-gpu 1 \
-    --num-memory 63000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-python jobs.py --exp-script scripts/offline_dqn_cql_atari_visual.sh \
-    --job-queue cleanrl_gpu_large_memory \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 16 \
-    --num-gpu 1 \
-    --num-memory 63000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
\ No newline at end of file
diff --git a/benchmark/plots.py b/benchmark/plots.py
deleted file mode 100755
index c0dfd7e10..000000000
--- a/benchmark/plots.py
+++ /dev/null
@@ -1,289 +0,0 @@
-from os import path
-import pickle
-import wandb
-import pandas as pd
-import numpy as np
-import seaborn as sns
-import matplotlib.pyplot as plt
-import os
-import argparse
-from distutils.util import strtobool
-
-parser = argparse.ArgumentParser(description='CleanRL Plots')
-# Common arguments
-parser.add_argument('--wandb-project', type=str, default="cleanrl/cleanrl.benchmark",
-                   help='the name of wandb project (e.g. cleanrl/cleanrl)')
-parser.add_argument('--feature-of-interest', type=str, default='charts/episodic_return',
-                   help='which feature to be plotted on the y-axis')
-parser.add_argument('--hyper-params-tuned', nargs='+', default= [],
-                    help='the hyper parameters tuned')
-# parser.add_argument('--scan-history', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
-#                     help='if toggled, cuda will not be enabled by default')
-parser.add_argument('--interested-exp-names', nargs='+', default=[],
-                    help='the hyper parameters tuned')
-parser.add_argument('--samples', type=int, default=500,
-                    help='the sampled point of the run')
-parser.add_argument('--smooth-weight', type=float, default=0.90,
-                    help='the weight parameter of the exponential moving average')
-parser.add_argument('--last-n-episodes', type=int, default=10,
-                   help='for analysis only; the last n episodes from which the mean of the feature of interest is calculated')
-parser.add_argument('--num-points-x-axis', type=int, default=500,
-                   help='the number of points in the x-axis')
-parser.add_argument('--font-size', type=int, default=0,
-                   help='the font size of the plots')
-parser.add_argument('--x-label', type=str, default="Time Steps",
-                   help='the label of x-axis')
-parser.add_argument('--y-label', type=str, default="Average Episode Reward",
-                   help='the label of y-axis')
-parser.add_argument('--y-lim-bottom', type=float, default=0.0,
-                   help='the bottom limit for the y-axis')
-parser.add_argument('--output-format', type=str, default="svg",
-                   help='either `pdf`, `png`, or `svg`')
-args = parser.parse_args()
-api = wandb.Api()
-
-exp_convert_dict = {
-    # "ppo_atari_visual": "ppo",
-    # "ppo_continuous_action": "ppo"
-}
-
-# args.feature_of_interest = 'charts/episodic_return'
-feature_name = args.feature_of_interest.replace("/", "_")
-if not os.path.exists(feature_name):
-    os.makedirs(feature_name)
-
-if not path.exists(f"{feature_name}/all_df_cache.pkl"):
-    # Change oreilly-class/cifar to <entity/project-name>
-    runs = api.runs(args.wandb_project)
-    summary_list = [] 
-    config_list = [] 
-    name_list = []
-    envs = {}
-    data = []
-    exp_names = []
-    
-    for idx, run in enumerate(runs):
-        if args.feature_of_interest in run.summary:
-            # if args.scan_history:
-            #     ls = 
-            # else:
-            ls = run.history(keys=[args.feature_of_interest, 'global_step'], pandas=False, samples=args.samples)
-            metrics_dataframe = pd.DataFrame(ls[0])
-            for param in args.hyper_params_tuned:
-                if param in run.config:
-                    run.config['exp_name'] += "-" + param + "-" + str(run.config[param]) + "-"
-
-            metrics_dataframe.insert(len(metrics_dataframe.columns), "algo", run.config['exp_name'])
-            metrics_dataframe.insert(len(metrics_dataframe.columns), "seed", run.config['seed'])
-            
-            data += [metrics_dataframe]
-            if run.config["gym_id"] not in envs:
-                envs[run.config["gym_id"]] = [metrics_dataframe]
-                envs[run.config["gym_id"]+"total_timesteps"] = run.config["total_timesteps"]
-            else:
-                envs[run.config["gym_id"]] += [metrics_dataframe]
-
-            # run.summary are the output key/values like accuracy.  We call ._json_dict to omit large files 
-            summary_list.append(run.summary._json_dict) 
-        
-            # run.config is the input metrics.  We remove special values that start with _.
-            config_list.append({k:v for k,v in run.config.items() if not k.startswith('_')}) 
-        
-            # run.name is the name of the run.
-            name_list.append(run.name)       
-    
-    summary_df = pd.DataFrame.from_records(summary_list) 
-    config_df = pd.DataFrame.from_records(config_list) 
-    name_df = pd.DataFrame({'name': name_list}) 
-    all_df = pd.concat([name_df, config_df,summary_df], axis=1)
-    data = pd.concat(data, ignore_index=True)
-    
-    with open(f'{feature_name}/all_df_cache.pkl', 'wb') as handle:
-        pickle.dump(all_df, handle, protocol=pickle.HIGHEST_PROTOCOL)
-    with open(f'{feature_name}/envs_cache.pkl', 'wb') as handle:
-        pickle.dump(envs, handle, protocol=pickle.HIGHEST_PROTOCOL)
-else:
-    with open(f'{feature_name}/all_df_cache.pkl', 'rb') as handle:
-        all_df = pickle.load(handle)
-    with open(f'{feature_name}/envs_cache.pkl', 'rb') as handle:
-        envs = pickle.load(handle)
-print("data loaded")
-
-# https://stackoverflow.com/questions/42281844/what-is-the-mathematics-behind-the-smoothing-parameter-in-tensorboards-scalar#_=_
-def smooth(scalars, weight):  # Weight between 0 and 1
-    last = scalars[0]  # First value in the plot (first timestep)
-    smoothed = list()
-    for point in scalars:
-        smoothed_val = last * weight + (1 - weight) * point  # Calculate smoothed value
-        smoothed.append(smoothed_val)                        # Save it
-        last = smoothed_val                                  # Anchor the last smoothed value
-
-    return smoothed
-
-#smoothing
-for env in envs:
-    if not env.endswith("total_timesteps"):
-        for idx, metrics_dataframe in enumerate(envs[env]):
-            envs[env][idx] = metrics_dataframe.dropna(subset=[args.feature_of_interest])
-#             envs[env][idx][args.feature_of_interest] = smooth(metrics_dataframe[args.feature_of_interest], 0.85)
-
-sns.set(style="darkgrid")
-def get_df_for_env(gym_id):
-    env_total_timesteps = envs[gym_id+"total_timesteps"]
-    env_increment = env_total_timesteps / 500
-    envs_same_x_axis = []
-    for sampled_run in envs[gym_id]:
-        df = pd.DataFrame(columns=sampled_run.columns)
-        x_axis = [i*env_increment for i in range(500-2)]
-        current_row = 0
-        for timestep in x_axis:
-            while sampled_run.iloc[current_row]["global_step"] < timestep:
-                current_row += 1
-                if current_row > len(sampled_run)-2:
-                    break
-            if current_row > len(sampled_run)-2:
-                break
-            temp_row = sampled_run.iloc[current_row].copy()
-            temp_row["global_step"] = timestep
-            df = df.append(temp_row)
-        
-        envs_same_x_axis += [df]
-    return pd.concat(envs_same_x_axis, ignore_index=True)
-
-def export_legend(ax, filename="legend.pdf"):
-    # import matplotlib as mpl
-    # mpl.rcParams['text.usetex'] = True
-    # mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}'] #for \text command
-    fig2 = plt.figure()
-    ax2 = fig2.add_subplot()
-    ax2.axis('off')
-    handles, labels = ax.get_legend_handles_labels()
-
-    legend = ax2.legend(handles=handles[1:], labels=labels[1:], frameon=False, loc='lower center', ncol=3, fontsize=20, handlelength=1)
-    for text in legend.get_texts():
-        if text.get_text() in exp_convert_dict:
-            text.set_text(exp_convert_dict[text.get_text()])
-    for line in legend.get_lines():
-        line.set_linewidth(4.0)
-    fig  = legend.figure
-    fig.canvas.draw()
-    bbox  = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
-    fig.savefig(filename, dpi="figure", bbox_inches=bbox)
-    fig.clf()
-
-if not os.path.exists(f"{feature_name}/data"):
-    os.makedirs(f"{feature_name}/data")
-if not os.path.exists(f"{feature_name}/plots"):
-    os.makedirs(f"{feature_name}/plots")
-if not os.path.exists(f"{feature_name}/legends"):
-    os.makedirs(f"{feature_name}/legends")
-
-
-interested_exp_names = sorted(list(set(all_df['exp_name']))) # ['ppo_continuous_action', 'ppo_atari_visual']
-current_palette = sns.color_palette(n_colors=len(interested_exp_names))
-current_palette_dict = dict(zip(interested_exp_names, current_palette))
-if args.interested_exp_names:
-    interested_exp_names = args.interested_exp_names
-print(current_palette_dict)
-legend_df = pd.DataFrame()
-
-if args.font_size:
-    plt.rc('axes', titlesize=args.font_size)     # fontsize of the axes title
-    plt.rc('axes', labelsize=args.font_size)    # fontsize of the x and y labels
-    plt.rc('xtick', labelsize=args.font_size)    # fontsize of the tick labels
-    plt.rc('ytick', labelsize=args.font_size)    # fontsize of the tick labels
-    plt.rc('legend', fontsize=args.font_size)    # legend fontsize
-
-stats = {item: [] for item in ["gym_id", "exp_name", args.feature_of_interest]}
-# uncommenet the following to generate all figures
-for env in set(all_df["gym_id"]):
-    if not path.exists(f"{feature_name}/data/{env}.pkl"):
-        with open(f"{feature_name}/data/{env}.pkl", 'wb') as handle:
-            data = get_df_for_env(env)
-            data["seed"] = data["seed"].astype(float)
-            data[args.feature_of_interest] = data[args.feature_of_interest].astype(float)
-            pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
-    else:
-        with open(f"{feature_name}/data/{env}.pkl", 'rb') as handle:
-            data = pickle.load(handle)
-            print(f"{env}'s data loaded")
-    def _smooth(df):
-        df[args.feature_of_interest] = smooth(list(df[args.feature_of_interest]), args.smooth_weight)
-        return df
-
-    legend_df = legend_df.append(data)
-    ax = sns.lineplot(data=data.groupby(["seed", "algo"]).apply(_smooth).loc[data['algo'].isin(interested_exp_names)], x="global_step", y=args.feature_of_interest, hue="algo", ci='sd', palette=current_palette_dict,)
-    ax.set(xlabel=args.x_label, ylabel=args.y_label)
-    # ax.legend().remove()
-    handles, labels = ax.get_legend_handles_labels()
-    legend = ax.legend(handles=handles[1:], labels=labels[1:])
-    for text in legend.get_texts():
-        if text.get_text() in exp_convert_dict:
-            text.set_text(exp_convert_dict[text.get_text()])
-    if args.y_lim_bottom:
-        plt.ylim(bottom=args.y_lim_bottom)
-    plt.title(env)
-    
-    
-
-    plt.tight_layout()
-    plt.savefig(f"{feature_name}/plots/{env}.{args.output_format}")
-    plt.clf()
-    
-    for algo in interested_exp_names:
-        if algo in set(data['algo']):
-            algo_data = data.loc[data['algo'].isin([algo])]
-            last_n_episodes_global_step = sorted(algo_data["global_step"].unique())[-args.last_n_episodes]
-            last_n_episodes_features = algo_data[algo_data['global_step'] > last_n_episodes_global_step].groupby(
-                ['seed']
-            ).mean()[args.feature_of_interest]
-            
-            for item in last_n_episodes_features:
-                stats[args.feature_of_interest] += [item]
-                if algo in exp_convert_dict:
-                    stats['exp_name'] += [exp_convert_dict[algo]]
-                else:
-                    stats['exp_name'] += [algo]
-                stats['gym_id'] += [env]
-
-# export legend
-ax = sns.lineplot(data=legend_df, x="global_step", y=args.feature_of_interest, hue="algo", ci='sd', palette=current_palette_dict,)
-ax.set(xlabel='Time Steps', ylabel='Average Episode Reward')
-ax.legend().remove()
-export_legend(ax, f"{feature_name}/legend.{args.output_format}")
-plt.clf()
-
-
-# analysis
-stats_df = pd.DataFrame(stats)
-g = stats_df.groupby(
-    ['gym_id','exp_name']
-).agg(lambda x: f"{np.mean(x):.2f} ± {np.std(x):.2f}")
-print(g.reset_index().pivot('gym_id', 'exp_name', args.feature_of_interest).to_latex().replace("±", "$\pm$"))
-
-###############################
-# benchmark specific settings
-##############################
-final_df = g.reset_index().pivot('gym_id', 'exp_name', args.feature_of_interest)
-
-# atari related experiments
-print("===============Atari===========")
-print(final_df[["c51_atari_visual", "dqn_atari_visual", "ppo_atari_visual"]].loc[[
-    "BeamRiderNoFrameskip-v4", "QbertNoFrameskip-v4", "SpaceInvadersNoFrameskip-v4", "PongNoFrameskip-v4", "BreakoutNoFrameskip-v4"
-]].to_markdown())
-
-print("===============Mujoco and Pybullet===========")
-print(final_df[["ddpg_continuous_action", "td3_continuous_action", "ppo_continuous_action"]].loc[[
-    "Ant-v2", "Humanoid-v2", "Walker2DBulletEnv-v0", "HalfCheetahBulletEnv-v0", "HopperBulletEnv-v0", "BipedalWalker-v3", "LunarLanderContinuous-v2", "Pendulum-v0", "MountainCarContinuous-v0"
-]].to_markdown())
-
-# print run time
-runtime_df = all_df[['gym_id','exp_name', '_runtime']].copy()
-for i in range(len(runtime_df)):
-    runtime_df.iloc[i, runtime_df.columns.get_loc('exp_name')] = exp_convert_dict[runtime_df.iloc[i]['exp_name']]
-
-runtime_df.groupby(
-    ['gym_id','exp_name']
-).agg(
-    lambda x: f"{np.mean(x/3600):.2f} ± {np.std(x/3600):.2f}"
-).reset_index().pivot('exp_name', 'gym_id' , '_runtime')
\ No newline at end of file
diff --git a/benchmark/resubmit.py b/benchmark/resubmit.py
deleted file mode 100755
index 2bcbbf958..000000000
--- a/benchmark/resubmit.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# pip install boto3
-import boto3
-import re
-import time
-import os
-import requests
-import json
-import argparse
-import wandb
-import requests
-from distutils.util import strtobool
-client = boto3.client('batch')
-
-parser = argparse.ArgumentParser(description='CleanRL Experiment Submission')
-# Common arguments
-parser.add_argument('--wandb-project', type=str, default="cleanrl/cleanrl.benchmark",
-                   help='the name of wandb project (e.g. cleanrl/cleanrl)')
-parser.add_argument('--run-state', type=str, default="crashed",
-                   help='the name of this experiment')
-parser.add_argument('--job-queue', type=str, default="cleanrl",
-                   help='the name of the job queue')
-parser.add_argument('--job-definition', type=str, default="cleanrl",
-                   help='the name of the job definition')
-parser.add_argument('--num-vcpu', type=int, default=2,
-                   help='number of vcpu per experiment')
-parser.add_argument('--num-memory', type=int, default=15000,
-                   help='number of memory (MB) per experiment')
-parser.add_argument('--num-gpu', type=int, default=1,
-                   help='number of gpu per experiment')
-parser.add_argument('--num-hours', type=float, default=48.0,
-                   help='number of hours allocated experiment')
-parser.add_argument('--upload_files', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
-                    help='if toggled, script will need to be uploaded')
-parser.add_argument('--submit-aws', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True,
-                    help='if toggled, script will need to be uploaded')
-args = parser.parse_args()
-
-api = wandb.Api()
-
-if args.upload_files:
-    response = requests.get('http://127.0.0.1:4040/api/tunnels')
-    content = json.loads(response.content.decode())
-    assert response.status_code == 200
-    url = content['tunnels'][0]['public_url']
-
-# Project is specified by <entity/project-name>
-runs = api.runs(args.wandb_project)
-final_run_cmds = []
-for run in runs:
-    if run.state == args.run_state:
-        metadata = requests.get(url=run.file(name="wandb-metadata.json").url).json()
-        final_run_cmds += [["python", metadata["program"]] + metadata["args"]]
-        if args.upload_files:
-            file_name = final_run_cmds[-1][1]
-            link = url + '/' + file_name
-            final_run_cmds[-1] = ['wget', link, ';'] + final_run_cmds[-1]
-
-# get env variable values
-wandb_key = os.environ['WANDB_KEY']
-assert len(wandb_key) > 0, "set the environment variable `WANDB_KEY` to your WANDB API key, something like `export WANDB_KEY=fdsfdsfdsfads` "
-
-# use docker directly
-if not args.submit_aws:
-    cores = 40
-    repo = "vwxyzjn/cleanrl:latest"
-    current_core = 0
-    for final_run_cmd in final_run_cmds:
-        print(f'docker run -d --cpuset-cpus="{current_core}" -e WANDB={wandb_key} {repo} ' + 
-            '/bin/bash -c "' + " ".join(final_run_cmd) + '"')
-        current_core = (current_core + 1) % cores
-
-# submit jobs
-if args.submit_aws:
-    for final_run_cmd in final_run_cmds:
-        job_name = re.findall('(python)(.+)(.py)'," ".join(final_run_cmd))[0][1].strip() + str(int(time.time()))
-        job_name = job_name.replace("/", "_").replace("_param ", "")
-        resources_requirements = []
-        if args.num_gpu:
-            resources_requirements = [
-                {
-                    'value': '1',
-                    'type': 'GPU'
-                },
-            ]
-        
-        response = client.submit_job(
-            jobName=job_name,
-            jobQueue=args.job_queue,
-            jobDefinition=args.job_definition,
-            containerOverrides={
-                'vcpus': args.num_vcpu,
-                'memory': args.num_memory,
-                'command': ["/bin/bash", "-c", " ".join(final_run_cmd)],
-                'environment': [
-                    {
-                        'name': 'WANDB',
-                        'value': wandb_key
-                    }
-                ],
-                'resourceRequirements': resources_requirements,
-            },
-            retryStrategy={
-                'attempts': 1
-            },
-            timeout={
-                'attemptDurationSeconds': int(args.num_hours*60*60)
-            }
-        )
-        if response['ResponseMetadata']['HTTPStatusCode'] != 200:
-            print(response)
-            raise Exception("jobs submit failure")
-
diff --git a/benchmark/docker_queue.py b/cleanrl_utils/docker_queue.py
similarity index 100%
rename from benchmark/docker_queue.py
rename to cleanrl_utils/docker_queue.py
diff --git a/benchmark/experimentals.sh b/cloud/examples/experimentals.sh
similarity index 100%
rename from benchmark/experimentals.sh
rename to cloud/examples/experimentals.sh
diff --git a/benchmark/scripts/apex_dqn_atari.sh b/cloud/examples/scripts/apex_dqn_atari.sh
similarity index 100%
rename from benchmark/scripts/apex_dqn_atari.sh
rename to cloud/examples/scripts/apex_dqn_atari.sh
diff --git a/benchmark/scripts/c51_atari.sh b/cloud/examples/scripts/c51_atari.sh
similarity index 100%
rename from benchmark/scripts/c51_atari.sh
rename to cloud/examples/scripts/c51_atari.sh
diff --git a/benchmark/scripts/c51_other.sh b/cloud/examples/scripts/c51_other.sh
similarity index 100%
rename from benchmark/scripts/c51_other.sh
rename to cloud/examples/scripts/c51_other.sh
diff --git a/benchmark/scripts/ddpg_mujoco.sh b/cloud/examples/scripts/ddpg_mujoco.sh
similarity index 100%
rename from benchmark/scripts/ddpg_mujoco.sh
rename to cloud/examples/scripts/ddpg_mujoco.sh
diff --git a/benchmark/scripts/ddpg_pybullet.sh b/cloud/examples/scripts/ddpg_pybullet.sh
similarity index 100%
rename from benchmark/scripts/ddpg_pybullet.sh
rename to cloud/examples/scripts/ddpg_pybullet.sh
diff --git a/benchmark/scripts/dqn_atari.sh b/cloud/examples/scripts/dqn_atari.sh
similarity index 100%
rename from benchmark/scripts/dqn_atari.sh
rename to cloud/examples/scripts/dqn_atari.sh
diff --git a/benchmark/scripts/dqn_other.sh b/cloud/examples/scripts/dqn_other.sh
similarity index 100%
rename from benchmark/scripts/dqn_other.sh
rename to cloud/examples/scripts/dqn_other.sh
diff --git a/benchmark/scripts/offline_dqn_atari_visual.sh b/cloud/examples/scripts/offline_dqn_atari_visual.sh
similarity index 100%
rename from benchmark/scripts/offline_dqn_atari_visual.sh
rename to cloud/examples/scripts/offline_dqn_atari_visual.sh
diff --git a/benchmark/scripts/offline_dqn_cql_atari_visual.sh b/cloud/examples/scripts/offline_dqn_cql_atari_visual.sh
similarity index 100%
rename from benchmark/scripts/offline_dqn_cql_atari_visual.sh
rename to cloud/examples/scripts/offline_dqn_cql_atari_visual.sh
diff --git a/benchmark/scripts/ppo_atari.sh b/cloud/examples/scripts/ppo_atari.sh
similarity index 100%
rename from benchmark/scripts/ppo_atari.sh
rename to cloud/examples/scripts/ppo_atari.sh
diff --git a/benchmark/scripts/ppo_mujoco.sh b/cloud/examples/scripts/ppo_mujoco.sh
similarity index 100%
rename from benchmark/scripts/ppo_mujoco.sh
rename to cloud/examples/scripts/ppo_mujoco.sh
diff --git a/benchmark/scripts/ppo_other.sh b/cloud/examples/scripts/ppo_other.sh
similarity index 100%
rename from benchmark/scripts/ppo_other.sh
rename to cloud/examples/scripts/ppo_other.sh
diff --git a/benchmark/scripts/ppo_pybullet.sh b/cloud/examples/scripts/ppo_pybullet.sh
similarity index 100%
rename from benchmark/scripts/ppo_pybullet.sh
rename to cloud/examples/scripts/ppo_pybullet.sh
diff --git a/benchmark/scripts/sac_mujoco.sh b/cloud/examples/scripts/sac_mujoco.sh
similarity index 100%
rename from benchmark/scripts/sac_mujoco.sh
rename to cloud/examples/scripts/sac_mujoco.sh
diff --git a/benchmark/scripts/sac_pybullet.sh b/cloud/examples/scripts/sac_pybullet.sh
similarity index 100%
rename from benchmark/scripts/sac_pybullet.sh
rename to cloud/examples/scripts/sac_pybullet.sh
diff --git a/benchmark/scripts/td3_mujoco.sh b/cloud/examples/scripts/td3_mujoco.sh
similarity index 100%
rename from benchmark/scripts/td3_mujoco.sh
rename to cloud/examples/scripts/td3_mujoco.sh
diff --git a/benchmark/scripts/td3_pybullet.sh b/cloud/examples/scripts/td3_pybullet.sh
similarity index 100%
rename from benchmark/scripts/td3_pybullet.sh
rename to cloud/examples/scripts/td3_pybullet.sh
diff --git a/benchmark/submit_exp.sh b/cloud/examples/submit_exp.sh
similarity index 61%
rename from benchmark/submit_exp.sh
rename to cloud/examples/submit_exp.sh
index 9587bc865..68a263864 100644
--- a/benchmark/submit_exp.sh
+++ b/cloud/examples/submit_exp.sh
@@ -1,19 +1,3 @@
-python submit_exp.py --exp-script scripts/offline_dqn_cql_atari_visual.sh \
-    --algo offline_dqn_cql_atari_visual.py \
-    --total-timesteps 10000000 \
-    --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --wandb-project-name cleanrl.benchmark \
-    --other-args "--wandb-entity cleanrl --cuda True" \
-    --job-queue cleanrl_gpu_large_memory \
-    --job-definition cleanrl \
-    --num-seed 2 \
-    --num-vcpu 16 \
-    --num-gpu 1 \
-    --num-memory 63000 \
-    --num-hours 48.0 \
-    --submit-aws $SUBMIT_AWS
-
-
 python -m cleanrl.submit_exp --exp-script offline_dqn_cql_atari_visual.sh \
     --algo offline_dqn_cql_atari_visual.py \
     --total-timesteps 10000000 \
@@ -29,12 +13,6 @@ python -m cleanrl.submit_exp --exp-script offline_dqn_cql_atari_visual.sh \
     --num-hours 48.0 \
     --submit-aws $SUBMIT_AWS
 
-python ppo_procgen_impala_cnn.py --gym-id starpilot --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1
-python ppo_procgen_impala_cnn.py --gym-id coinrun --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1
-python ppo_procgen_impala_cnn.py --gym-id dodgeball --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1
-python ppo_procgen_impala_cnn.py --gym-id bossfight --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1
-
-
 python ppg_procgen_impala_cnn.py --gym-id starpilot --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1
 
 python -m cleanrl.utils.submit_exp --exp-script ppo.sh \
diff --git a/benchmark/terminate_all.sh b/cloud/examples/terminate_all.sh
similarity index 100%
rename from benchmark/terminate_all.sh
rename to cloud/examples/terminate_all.sh