diff --git a/benchmark/generate_exp.py b/benchmark/generate_exp.py deleted file mode 100644 index 219b3da37..000000000 --- a/benchmark/generate_exp.py +++ /dev/null @@ -1,41 +0,0 @@ -import argparse -from distutils.util import strtobool - -parser = argparse.ArgumentParser(description='CleanRL Experiment Submission') -# Common arguments -parser.add_argument('--exp-script', type=str, default="exp.sh", - help='the file name of this experiment') -parser.add_argument('--algo', type=str, default="ppo.py", - help='the algorithm that will be used') -parser.add_argument('--gym-ids', nargs='+', - help='the ids of the gym environment') -parser.add_argument('--total-timesteps', type=int, default=int(1e9), - help='total timesteps of the experiments') -parser.add_argument('--other-args', type=str, default="", - help="the entity (team) of wandb's project") -parser.add_argument('--wandb-project-name', type=str, default="cleanRL", - help="the wandb's project name") - -args = parser.parse_args() - -template = ''' -for seed in {{1..2}} -do - (sleep 0.3 && nohup xvfb-run -a python {} \\ - --gym-id {} \\ - --total-timesteps {} \\ - --wandb-project-name {} \\ - --track \\ - {} \\ - --capture-video \\ - --seed $seed - ) >& /dev/null & -done -''' - -final_str = "" -for env in args.gym_ids: - final_str += template.format(args.algo, env, args.total_timesteps, args.wandb_project_name, args.other_args) - -with open(f"{args.exp_script}", "w+") as f: - f.write(final_str) \ No newline at end of file diff --git a/benchmark/generate_exp.sh b/benchmark/generate_exp.sh deleted file mode 100644 index 3471bd62c..000000000 --- a/benchmark/generate_exp.sh +++ /dev/null @@ -1,118 +0,0 @@ -python generate_exp.py --exp-script scripts/ppo_mujoco.sh \ - --algo ppo_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda False" - -python generate_exp.py --exp-script scripts/ppo_pybullet.sh \ - --algo ppo_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda False" - -python generate_exp.py --exp-script scripts/ppo_atari.sh \ - --algo ppo_atari_visual.py \ - --total-timesteps 10000000 \ - --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/ppo_other.sh \ - --algo ppo.py \ - --total-timesteps 2000000 \ - --gym-ids CartPole-v1 Acrobot-v1 MountainCar-v0 LunarLander-v2 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda False" - -python generate_exp.py --exp-script scripts/td3_mujoco.sh \ - --algo td3_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/td3_pybullet.sh \ - --algo td3_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/ddpg_mujoco.sh \ - --algo ddpg_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/ddpg_pybullet.sh \ - --algo ddpg_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/sac_mujoco.sh \ - --algo sac_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids Reacher-v2 Pusher-v2 Thrower-v2 Striker-v2 InvertedPendulum-v2 HalfCheetah-v2 Hopper-v2 Swimmer-v2 Walker2d-v2 Ant-v2 Humanoid-v2 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/sac_pybullet.sh \ - --algo sac_continuous_action.py \ - --total-timesteps 2000000 \ - --gym-ids MinitaurBulletEnv-v0 MinitaurBulletDuckEnv-v0 InvertedPendulumBulletEnv-v0 InvertedDoublePendulumBulletEnv-v0 Walker2DBulletEnv-v0 HalfCheetahBulletEnv-v0 AntBulletEnv-v0 HopperBulletEnv-v0 HumanoidBulletEnv-v0 BipedalWalker-v3 LunarLanderContinuous-v2 Pendulum-v0 MountainCarContinuous-v0 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/dqn_atari.sh \ - --algo dqn_atari_visual.py \ - --total-timesteps 10000000 \ - --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/dqn_other.sh \ - --algo dqn.py \ - --total-timesteps 2000000 \ - --gym-ids CartPole-v1 Acrobot-v1 MountainCar-v0 LunarLander-v2 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/c51_atari.sh \ - --algo c51_atari_visual.py \ - --total-timesteps 10000000 \ - --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/c51_other.sh \ - --algo c51.py \ - --total-timesteps 2000000 \ - --gym-ids CartPole-v1 Acrobot-v1 MountainCar-v0 LunarLander-v2 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/apex_dqn_atari.sh \ - --algo apex_dqn_atari_visual.py \ - --total-timesteps 10000000 \ - --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/offline_dqn_atari_visual.sh \ - --algo offline_dqn_atari_visual.py \ - --total-timesteps 10000000 \ - --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" - -python generate_exp.py --exp-script scripts/offline_dqn_cql_atari_visual.sh \ - --algo offline_dqn_cql_atari_visual.py \ - --total-timesteps 10000000 \ - --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" \ No newline at end of file diff --git a/benchmark/jobs.py b/benchmark/jobs.py deleted file mode 100644 index 8b641f0b8..000000000 --- a/benchmark/jobs.py +++ /dev/null @@ -1,109 +0,0 @@ -# pip install boto3 -import boto3 -import re -import time -import os -import requests -import json -import argparse -from distutils.util import strtobool -client = boto3.client('batch') - -parser = argparse.ArgumentParser(description='CleanRL Experiment Submission') -# Common arguments -parser.add_argument('--exp-script', type=str, default="scripts/td3_pybullet.sh", - help='the name of this experiment') -parser.add_argument('--job-queue', type=str, default="cleanrl", - help='the name of the job queue') -parser.add_argument('--wandb-key', type=str, default="", - help='the wandb key. If not provided, the script will try to read the env variable `WANDB_KEY`') -parser.add_argument('--docker-repo', type=str, default="vwxyzjn/gym-microrts:latest", - help='the name of the job queue') -parser.add_argument('--job-definition', type=str, default="cleanrl", - help='the name of the job definition') -parser.add_argument('--num-seed', type=int, default=2, - help='number of random seeds for experiments') -parser.add_argument('--num-vcpu', type=int, default=1, - help='number of vcpu per experiment') -parser.add_argument('--num-memory', type=int, default=2000, - help='number of memory (MB) per experiment') -parser.add_argument('--num-gpu', type=int, default=0, - help='number of gpu per experiment') -parser.add_argument('--num-hours', type=float, default=16.0, - help='number of hours allocated experiment') -parser.add_argument('--upload-files-baseurl', type=str, default="", - help='the baseurl of your website if you decide to upload files') -parser.add_argument('--submit-aws', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True, - help='if toggled, script will need to be uploaded') -args = parser.parse_args() - - -# get env variable values -if not args.wandb_key: - args.wandb_key = os.environ['WANDB_KEY'] -assert len(args.wandb_key) > 0, "set the environment variable `WANDB_KEY` to your WANDB API key, something like `export WANDB_KEY=fdsfdsfdsfads` " -# extract runs from bash scripts -final_run_cmds = [] -with open(args.exp_script) as f: - strings = f.read() -runs_match = re.findall('(python)(.+)((?:\n.+)+)(seed)',strings) -for run_match in runs_match: - run_match_str = "".join(run_match).replace("\\\n", "") - # print(run_match_str) - for seed in range(1,1+args.num_seed): - final_run_cmds += [run_match_str.replace("$seed", str(seed)).split()] - if args.upload_files_baseurl: - file_name = final_run_cmds[-1][1] - link = args.upload_files_baseurl + '/' + file_name - final_run_cmds[-1] = ['curl', '-O', link, ';'] + final_run_cmds[-1] - -# use docker directly -if not args.submit_aws: - cores = 40 - current_core = 0 - for final_run_cmd in final_run_cmds: - print(f'docker run -d --cpuset-cpus="{current_core}" -e WANDB={args.wandb_key} {args.docker_repo} ' + - '/bin/bash -c "' + " ".join(final_run_cmd) + '"') - current_core = (current_core + 1) % cores - -# submit jobs -if args.submit_aws: - for final_run_cmd in final_run_cmds: - job_name = re.findall('(python)(.+)(.py)'," ".join(final_run_cmd))[0][1].strip() + str(int(time.time())) - job_name = job_name.replace("/", "_").replace("_param ", "") - resources_requirements = [] - if args.num_gpu: - resources_requirements = [ - { - 'value': str(args.num_gpu), - 'type': 'GPU' - }, - ] - - response = client.submit_job( - jobName=job_name, - jobQueue=args.job_queue, - jobDefinition=args.job_definition, - containerOverrides={ - 'vcpus': args.num_vcpu, - 'memory': args.num_memory, - 'command': ["/bin/bash", "-c", " ".join(final_run_cmd)], - 'environment': [ - { - 'name': 'WANDB', - 'value': args.wandb_key - } - ], - 'resourceRequirements': resources_requirements, - }, - retryStrategy={ - 'attempts': 1 - }, - timeout={ - 'attemptDurationSeconds': int(args.num_hours*60*60) - } - ) - if response['ResponseMetadata']['HTTPStatusCode'] != 200: - print(response) - raise Exception("jobs submit failure") - diff --git a/benchmark/jobs.sh b/benchmark/jobs.sh deleted file mode 100644 index 01845aa14..000000000 --- a/benchmark/jobs.sh +++ /dev/null @@ -1,129 +0,0 @@ -SUBMIT_AWS=False - -python jobs.py --exp-script scripts/ppo_pybullet.sh \ - --job-queue cleanrl \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-memory 3000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/ppo_atari.sh \ - --job-queue cleanrl_gpu \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 4 \ - --num-gpu 1 \ - --num-memory 14000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/ppo_other.sh \ - --job-queue cleanrl \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 1 \ - --num-memory 3000 \ - --num-hours 16.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/td3_pybullet.sh \ - --job-queue cleanrl_gpu \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-memory 14000 \ - --num-gpu 1 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/ddpg_pybullet.sh \ - --job-queue cleanrl_gpu \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-memory 14000 \ - --num-gpu 1 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/sac_pybullet.sh \ - --job-queue cleanrl_gpu \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-memory 14000 \ - --num-gpu 1 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/dqn_atari.sh \ - --job-queue cleanrl_gpu_large_memory \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-gpu 1 \ - --num-memory 63000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/dqn_other.sh \ - --job-queue cleanrl_gpu \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-gpu 1 \ - --num-memory 3000 \ - --num-hours 16.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/c51_atari.sh \ - --job-queue cleanrl_gpu_large_memory \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-gpu 1 \ - --num-memory 63000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/c51_other.sh \ - --job-queue cleanrl_gpu \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 2 \ - --num-gpu 1 \ - --num-memory 3000 \ - --num-hours 16.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/apex_dqn_atari.sh \ - --job-queue cleanrl_gpu_large_memory \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 16 \ - --num-gpu 1 \ - --num-memory 63000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/offline_dqn_atari_visual.sh \ - --job-queue cleanrl_gpu_large_memory \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 16 \ - --num-gpu 1 \ - --num-memory 63000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - -python jobs.py --exp-script scripts/offline_dqn_cql_atari_visual.sh \ - --job-queue cleanrl_gpu_large_memory \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 16 \ - --num-gpu 1 \ - --num-memory 63000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS \ No newline at end of file diff --git a/benchmark/plots.py b/benchmark/plots.py deleted file mode 100755 index c0dfd7e10..000000000 --- a/benchmark/plots.py +++ /dev/null @@ -1,289 +0,0 @@ -from os import path -import pickle -import wandb -import pandas as pd -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt -import os -import argparse -from distutils.util import strtobool - -parser = argparse.ArgumentParser(description='CleanRL Plots') -# Common arguments -parser.add_argument('--wandb-project', type=str, default="cleanrl/cleanrl.benchmark", - help='the name of wandb project (e.g. cleanrl/cleanrl)') -parser.add_argument('--feature-of-interest', type=str, default='charts/episodic_return', - help='which feature to be plotted on the y-axis') -parser.add_argument('--hyper-params-tuned', nargs='+', default= [], - help='the hyper parameters tuned') -# parser.add_argument('--scan-history', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True, -# help='if toggled, cuda will not be enabled by default') -parser.add_argument('--interested-exp-names', nargs='+', default=[], - help='the hyper parameters tuned') -parser.add_argument('--samples', type=int, default=500, - help='the sampled point of the run') -parser.add_argument('--smooth-weight', type=float, default=0.90, - help='the weight parameter of the exponential moving average') -parser.add_argument('--last-n-episodes', type=int, default=10, - help='for analysis only; the last n episodes from which the mean of the feature of interest is calculated') -parser.add_argument('--num-points-x-axis', type=int, default=500, - help='the number of points in the x-axis') -parser.add_argument('--font-size', type=int, default=0, - help='the font size of the plots') -parser.add_argument('--x-label', type=str, default="Time Steps", - help='the label of x-axis') -parser.add_argument('--y-label', type=str, default="Average Episode Reward", - help='the label of y-axis') -parser.add_argument('--y-lim-bottom', type=float, default=0.0, - help='the bottom limit for the y-axis') -parser.add_argument('--output-format', type=str, default="svg", - help='either `pdf`, `png`, or `svg`') -args = parser.parse_args() -api = wandb.Api() - -exp_convert_dict = { - # "ppo_atari_visual": "ppo", - # "ppo_continuous_action": "ppo" -} - -# args.feature_of_interest = 'charts/episodic_return' -feature_name = args.feature_of_interest.replace("/", "_") -if not os.path.exists(feature_name): - os.makedirs(feature_name) - -if not path.exists(f"{feature_name}/all_df_cache.pkl"): - # Change oreilly-class/cifar to - runs = api.runs(args.wandb_project) - summary_list = [] - config_list = [] - name_list = [] - envs = {} - data = [] - exp_names = [] - - for idx, run in enumerate(runs): - if args.feature_of_interest in run.summary: - # if args.scan_history: - # ls = - # else: - ls = run.history(keys=[args.feature_of_interest, 'global_step'], pandas=False, samples=args.samples) - metrics_dataframe = pd.DataFrame(ls[0]) - for param in args.hyper_params_tuned: - if param in run.config: - run.config['exp_name'] += "-" + param + "-" + str(run.config[param]) + "-" - - metrics_dataframe.insert(len(metrics_dataframe.columns), "algo", run.config['exp_name']) - metrics_dataframe.insert(len(metrics_dataframe.columns), "seed", run.config['seed']) - - data += [metrics_dataframe] - if run.config["gym_id"] not in envs: - envs[run.config["gym_id"]] = [metrics_dataframe] - envs[run.config["gym_id"]+"total_timesteps"] = run.config["total_timesteps"] - else: - envs[run.config["gym_id"]] += [metrics_dataframe] - - # run.summary are the output key/values like accuracy. We call ._json_dict to omit large files - summary_list.append(run.summary._json_dict) - - # run.config is the input metrics. We remove special values that start with _. - config_list.append({k:v for k,v in run.config.items() if not k.startswith('_')}) - - # run.name is the name of the run. - name_list.append(run.name) - - summary_df = pd.DataFrame.from_records(summary_list) - config_df = pd.DataFrame.from_records(config_list) - name_df = pd.DataFrame({'name': name_list}) - all_df = pd.concat([name_df, config_df,summary_df], axis=1) - data = pd.concat(data, ignore_index=True) - - with open(f'{feature_name}/all_df_cache.pkl', 'wb') as handle: - pickle.dump(all_df, handle, protocol=pickle.HIGHEST_PROTOCOL) - with open(f'{feature_name}/envs_cache.pkl', 'wb') as handle: - pickle.dump(envs, handle, protocol=pickle.HIGHEST_PROTOCOL) -else: - with open(f'{feature_name}/all_df_cache.pkl', 'rb') as handle: - all_df = pickle.load(handle) - with open(f'{feature_name}/envs_cache.pkl', 'rb') as handle: - envs = pickle.load(handle) -print("data loaded") - -# https://stackoverflow.com/questions/42281844/what-is-the-mathematics-behind-the-smoothing-parameter-in-tensorboards-scalar#_=_ -def smooth(scalars, weight): # Weight between 0 and 1 - last = scalars[0] # First value in the plot (first timestep) - smoothed = list() - for point in scalars: - smoothed_val = last * weight + (1 - weight) * point # Calculate smoothed value - smoothed.append(smoothed_val) # Save it - last = smoothed_val # Anchor the last smoothed value - - return smoothed - -#smoothing -for env in envs: - if not env.endswith("total_timesteps"): - for idx, metrics_dataframe in enumerate(envs[env]): - envs[env][idx] = metrics_dataframe.dropna(subset=[args.feature_of_interest]) -# envs[env][idx][args.feature_of_interest] = smooth(metrics_dataframe[args.feature_of_interest], 0.85) - -sns.set(style="darkgrid") -def get_df_for_env(gym_id): - env_total_timesteps = envs[gym_id+"total_timesteps"] - env_increment = env_total_timesteps / 500 - envs_same_x_axis = [] - for sampled_run in envs[gym_id]: - df = pd.DataFrame(columns=sampled_run.columns) - x_axis = [i*env_increment for i in range(500-2)] - current_row = 0 - for timestep in x_axis: - while sampled_run.iloc[current_row]["global_step"] < timestep: - current_row += 1 - if current_row > len(sampled_run)-2: - break - if current_row > len(sampled_run)-2: - break - temp_row = sampled_run.iloc[current_row].copy() - temp_row["global_step"] = timestep - df = df.append(temp_row) - - envs_same_x_axis += [df] - return pd.concat(envs_same_x_axis, ignore_index=True) - -def export_legend(ax, filename="legend.pdf"): - # import matplotlib as mpl - # mpl.rcParams['text.usetex'] = True - # mpl.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}'] #for \text command - fig2 = plt.figure() - ax2 = fig2.add_subplot() - ax2.axis('off') - handles, labels = ax.get_legend_handles_labels() - - legend = ax2.legend(handles=handles[1:], labels=labels[1:], frameon=False, loc='lower center', ncol=3, fontsize=20, handlelength=1) - for text in legend.get_texts(): - if text.get_text() in exp_convert_dict: - text.set_text(exp_convert_dict[text.get_text()]) - for line in legend.get_lines(): - line.set_linewidth(4.0) - fig = legend.figure - fig.canvas.draw() - bbox = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) - fig.savefig(filename, dpi="figure", bbox_inches=bbox) - fig.clf() - -if not os.path.exists(f"{feature_name}/data"): - os.makedirs(f"{feature_name}/data") -if not os.path.exists(f"{feature_name}/plots"): - os.makedirs(f"{feature_name}/plots") -if not os.path.exists(f"{feature_name}/legends"): - os.makedirs(f"{feature_name}/legends") - - -interested_exp_names = sorted(list(set(all_df['exp_name']))) # ['ppo_continuous_action', 'ppo_atari_visual'] -current_palette = sns.color_palette(n_colors=len(interested_exp_names)) -current_palette_dict = dict(zip(interested_exp_names, current_palette)) -if args.interested_exp_names: - interested_exp_names = args.interested_exp_names -print(current_palette_dict) -legend_df = pd.DataFrame() - -if args.font_size: - plt.rc('axes', titlesize=args.font_size) # fontsize of the axes title - plt.rc('axes', labelsize=args.font_size) # fontsize of the x and y labels - plt.rc('xtick', labelsize=args.font_size) # fontsize of the tick labels - plt.rc('ytick', labelsize=args.font_size) # fontsize of the tick labels - plt.rc('legend', fontsize=args.font_size) # legend fontsize - -stats = {item: [] for item in ["gym_id", "exp_name", args.feature_of_interest]} -# uncommenet the following to generate all figures -for env in set(all_df["gym_id"]): - if not path.exists(f"{feature_name}/data/{env}.pkl"): - with open(f"{feature_name}/data/{env}.pkl", 'wb') as handle: - data = get_df_for_env(env) - data["seed"] = data["seed"].astype(float) - data[args.feature_of_interest] = data[args.feature_of_interest].astype(float) - pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL) - else: - with open(f"{feature_name}/data/{env}.pkl", 'rb') as handle: - data = pickle.load(handle) - print(f"{env}'s data loaded") - def _smooth(df): - df[args.feature_of_interest] = smooth(list(df[args.feature_of_interest]), args.smooth_weight) - return df - - legend_df = legend_df.append(data) - ax = sns.lineplot(data=data.groupby(["seed", "algo"]).apply(_smooth).loc[data['algo'].isin(interested_exp_names)], x="global_step", y=args.feature_of_interest, hue="algo", ci='sd', palette=current_palette_dict,) - ax.set(xlabel=args.x_label, ylabel=args.y_label) - # ax.legend().remove() - handles, labels = ax.get_legend_handles_labels() - legend = ax.legend(handles=handles[1:], labels=labels[1:]) - for text in legend.get_texts(): - if text.get_text() in exp_convert_dict: - text.set_text(exp_convert_dict[text.get_text()]) - if args.y_lim_bottom: - plt.ylim(bottom=args.y_lim_bottom) - plt.title(env) - - - - plt.tight_layout() - plt.savefig(f"{feature_name}/plots/{env}.{args.output_format}") - plt.clf() - - for algo in interested_exp_names: - if algo in set(data['algo']): - algo_data = data.loc[data['algo'].isin([algo])] - last_n_episodes_global_step = sorted(algo_data["global_step"].unique())[-args.last_n_episodes] - last_n_episodes_features = algo_data[algo_data['global_step'] > last_n_episodes_global_step].groupby( - ['seed'] - ).mean()[args.feature_of_interest] - - for item in last_n_episodes_features: - stats[args.feature_of_interest] += [item] - if algo in exp_convert_dict: - stats['exp_name'] += [exp_convert_dict[algo]] - else: - stats['exp_name'] += [algo] - stats['gym_id'] += [env] - -# export legend -ax = sns.lineplot(data=legend_df, x="global_step", y=args.feature_of_interest, hue="algo", ci='sd', palette=current_palette_dict,) -ax.set(xlabel='Time Steps', ylabel='Average Episode Reward') -ax.legend().remove() -export_legend(ax, f"{feature_name}/legend.{args.output_format}") -plt.clf() - - -# analysis -stats_df = pd.DataFrame(stats) -g = stats_df.groupby( - ['gym_id','exp_name'] -).agg(lambda x: f"{np.mean(x):.2f} ± {np.std(x):.2f}") -print(g.reset_index().pivot('gym_id', 'exp_name', args.feature_of_interest).to_latex().replace("±", "$\pm$")) - -############################### -# benchmark specific settings -############################## -final_df = g.reset_index().pivot('gym_id', 'exp_name', args.feature_of_interest) - -# atari related experiments -print("===============Atari===========") -print(final_df[["c51_atari_visual", "dqn_atari_visual", "ppo_atari_visual"]].loc[[ - "BeamRiderNoFrameskip-v4", "QbertNoFrameskip-v4", "SpaceInvadersNoFrameskip-v4", "PongNoFrameskip-v4", "BreakoutNoFrameskip-v4" -]].to_markdown()) - -print("===============Mujoco and Pybullet===========") -print(final_df[["ddpg_continuous_action", "td3_continuous_action", "ppo_continuous_action"]].loc[[ - "Ant-v2", "Humanoid-v2", "Walker2DBulletEnv-v0", "HalfCheetahBulletEnv-v0", "HopperBulletEnv-v0", "BipedalWalker-v3", "LunarLanderContinuous-v2", "Pendulum-v0", "MountainCarContinuous-v0" -]].to_markdown()) - -# print run time -runtime_df = all_df[['gym_id','exp_name', '_runtime']].copy() -for i in range(len(runtime_df)): - runtime_df.iloc[i, runtime_df.columns.get_loc('exp_name')] = exp_convert_dict[runtime_df.iloc[i]['exp_name']] - -runtime_df.groupby( - ['gym_id','exp_name'] -).agg( - lambda x: f"{np.mean(x/3600):.2f} ± {np.std(x/3600):.2f}" -).reset_index().pivot('exp_name', 'gym_id' , '_runtime') \ No newline at end of file diff --git a/benchmark/resubmit.py b/benchmark/resubmit.py deleted file mode 100755 index 2bcbbf958..000000000 --- a/benchmark/resubmit.py +++ /dev/null @@ -1,112 +0,0 @@ -# pip install boto3 -import boto3 -import re -import time -import os -import requests -import json -import argparse -import wandb -import requests -from distutils.util import strtobool -client = boto3.client('batch') - -parser = argparse.ArgumentParser(description='CleanRL Experiment Submission') -# Common arguments -parser.add_argument('--wandb-project', type=str, default="cleanrl/cleanrl.benchmark", - help='the name of wandb project (e.g. cleanrl/cleanrl)') -parser.add_argument('--run-state', type=str, default="crashed", - help='the name of this experiment') -parser.add_argument('--job-queue', type=str, default="cleanrl", - help='the name of the job queue') -parser.add_argument('--job-definition', type=str, default="cleanrl", - help='the name of the job definition') -parser.add_argument('--num-vcpu', type=int, default=2, - help='number of vcpu per experiment') -parser.add_argument('--num-memory', type=int, default=15000, - help='number of memory (MB) per experiment') -parser.add_argument('--num-gpu', type=int, default=1, - help='number of gpu per experiment') -parser.add_argument('--num-hours', type=float, default=48.0, - help='number of hours allocated experiment') -parser.add_argument('--upload_files', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True, - help='if toggled, script will need to be uploaded') -parser.add_argument('--submit-aws', type=lambda x:bool(strtobool(x)), default=False, nargs='?', const=True, - help='if toggled, script will need to be uploaded') -args = parser.parse_args() - -api = wandb.Api() - -if args.upload_files: - response = requests.get('http://127.0.0.1:4040/api/tunnels') - content = json.loads(response.content.decode()) - assert response.status_code == 200 - url = content['tunnels'][0]['public_url'] - -# Project is specified by -runs = api.runs(args.wandb_project) -final_run_cmds = [] -for run in runs: - if run.state == args.run_state: - metadata = requests.get(url=run.file(name="wandb-metadata.json").url).json() - final_run_cmds += [["python", metadata["program"]] + metadata["args"]] - if args.upload_files: - file_name = final_run_cmds[-1][1] - link = url + '/' + file_name - final_run_cmds[-1] = ['wget', link, ';'] + final_run_cmds[-1] - -# get env variable values -wandb_key = os.environ['WANDB_KEY'] -assert len(wandb_key) > 0, "set the environment variable `WANDB_KEY` to your WANDB API key, something like `export WANDB_KEY=fdsfdsfdsfads` " - -# use docker directly -if not args.submit_aws: - cores = 40 - repo = "vwxyzjn/cleanrl:latest" - current_core = 0 - for final_run_cmd in final_run_cmds: - print(f'docker run -d --cpuset-cpus="{current_core}" -e WANDB={wandb_key} {repo} ' + - '/bin/bash -c "' + " ".join(final_run_cmd) + '"') - current_core = (current_core + 1) % cores - -# submit jobs -if args.submit_aws: - for final_run_cmd in final_run_cmds: - job_name = re.findall('(python)(.+)(.py)'," ".join(final_run_cmd))[0][1].strip() + str(int(time.time())) - job_name = job_name.replace("/", "_").replace("_param ", "") - resources_requirements = [] - if args.num_gpu: - resources_requirements = [ - { - 'value': '1', - 'type': 'GPU' - }, - ] - - response = client.submit_job( - jobName=job_name, - jobQueue=args.job_queue, - jobDefinition=args.job_definition, - containerOverrides={ - 'vcpus': args.num_vcpu, - 'memory': args.num_memory, - 'command': ["/bin/bash", "-c", " ".join(final_run_cmd)], - 'environment': [ - { - 'name': 'WANDB', - 'value': wandb_key - } - ], - 'resourceRequirements': resources_requirements, - }, - retryStrategy={ - 'attempts': 1 - }, - timeout={ - 'attemptDurationSeconds': int(args.num_hours*60*60) - } - ) - if response['ResponseMetadata']['HTTPStatusCode'] != 200: - print(response) - raise Exception("jobs submit failure") - diff --git a/benchmark/docker_queue.py b/cleanrl_utils/docker_queue.py similarity index 100% rename from benchmark/docker_queue.py rename to cleanrl_utils/docker_queue.py diff --git a/benchmark/experimentals.sh b/cloud/examples/experimentals.sh similarity index 100% rename from benchmark/experimentals.sh rename to cloud/examples/experimentals.sh diff --git a/benchmark/scripts/apex_dqn_atari.sh b/cloud/examples/scripts/apex_dqn_atari.sh similarity index 100% rename from benchmark/scripts/apex_dqn_atari.sh rename to cloud/examples/scripts/apex_dqn_atari.sh diff --git a/benchmark/scripts/c51_atari.sh b/cloud/examples/scripts/c51_atari.sh similarity index 100% rename from benchmark/scripts/c51_atari.sh rename to cloud/examples/scripts/c51_atari.sh diff --git a/benchmark/scripts/c51_other.sh b/cloud/examples/scripts/c51_other.sh similarity index 100% rename from benchmark/scripts/c51_other.sh rename to cloud/examples/scripts/c51_other.sh diff --git a/benchmark/scripts/ddpg_mujoco.sh b/cloud/examples/scripts/ddpg_mujoco.sh similarity index 100% rename from benchmark/scripts/ddpg_mujoco.sh rename to cloud/examples/scripts/ddpg_mujoco.sh diff --git a/benchmark/scripts/ddpg_pybullet.sh b/cloud/examples/scripts/ddpg_pybullet.sh similarity index 100% rename from benchmark/scripts/ddpg_pybullet.sh rename to cloud/examples/scripts/ddpg_pybullet.sh diff --git a/benchmark/scripts/dqn_atari.sh b/cloud/examples/scripts/dqn_atari.sh similarity index 100% rename from benchmark/scripts/dqn_atari.sh rename to cloud/examples/scripts/dqn_atari.sh diff --git a/benchmark/scripts/dqn_other.sh b/cloud/examples/scripts/dqn_other.sh similarity index 100% rename from benchmark/scripts/dqn_other.sh rename to cloud/examples/scripts/dqn_other.sh diff --git a/benchmark/scripts/offline_dqn_atari_visual.sh b/cloud/examples/scripts/offline_dqn_atari_visual.sh similarity index 100% rename from benchmark/scripts/offline_dqn_atari_visual.sh rename to cloud/examples/scripts/offline_dqn_atari_visual.sh diff --git a/benchmark/scripts/offline_dqn_cql_atari_visual.sh b/cloud/examples/scripts/offline_dqn_cql_atari_visual.sh similarity index 100% rename from benchmark/scripts/offline_dqn_cql_atari_visual.sh rename to cloud/examples/scripts/offline_dqn_cql_atari_visual.sh diff --git a/benchmark/scripts/ppo_atari.sh b/cloud/examples/scripts/ppo_atari.sh similarity index 100% rename from benchmark/scripts/ppo_atari.sh rename to cloud/examples/scripts/ppo_atari.sh diff --git a/benchmark/scripts/ppo_mujoco.sh b/cloud/examples/scripts/ppo_mujoco.sh similarity index 100% rename from benchmark/scripts/ppo_mujoco.sh rename to cloud/examples/scripts/ppo_mujoco.sh diff --git a/benchmark/scripts/ppo_other.sh b/cloud/examples/scripts/ppo_other.sh similarity index 100% rename from benchmark/scripts/ppo_other.sh rename to cloud/examples/scripts/ppo_other.sh diff --git a/benchmark/scripts/ppo_pybullet.sh b/cloud/examples/scripts/ppo_pybullet.sh similarity index 100% rename from benchmark/scripts/ppo_pybullet.sh rename to cloud/examples/scripts/ppo_pybullet.sh diff --git a/benchmark/scripts/sac_mujoco.sh b/cloud/examples/scripts/sac_mujoco.sh similarity index 100% rename from benchmark/scripts/sac_mujoco.sh rename to cloud/examples/scripts/sac_mujoco.sh diff --git a/benchmark/scripts/sac_pybullet.sh b/cloud/examples/scripts/sac_pybullet.sh similarity index 100% rename from benchmark/scripts/sac_pybullet.sh rename to cloud/examples/scripts/sac_pybullet.sh diff --git a/benchmark/scripts/td3_mujoco.sh b/cloud/examples/scripts/td3_mujoco.sh similarity index 100% rename from benchmark/scripts/td3_mujoco.sh rename to cloud/examples/scripts/td3_mujoco.sh diff --git a/benchmark/scripts/td3_pybullet.sh b/cloud/examples/scripts/td3_pybullet.sh similarity index 100% rename from benchmark/scripts/td3_pybullet.sh rename to cloud/examples/scripts/td3_pybullet.sh diff --git a/benchmark/submit_exp.sh b/cloud/examples/submit_exp.sh similarity index 61% rename from benchmark/submit_exp.sh rename to cloud/examples/submit_exp.sh index 9587bc865..68a263864 100644 --- a/benchmark/submit_exp.sh +++ b/cloud/examples/submit_exp.sh @@ -1,19 +1,3 @@ -python submit_exp.py --exp-script scripts/offline_dqn_cql_atari_visual.sh \ - --algo offline_dqn_cql_atari_visual.py \ - --total-timesteps 10000000 \ - --gym-ids BeamRiderNoFrameskip-v4 QbertNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 PongNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --wandb-project-name cleanrl.benchmark \ - --other-args "--wandb-entity cleanrl --cuda True" \ - --job-queue cleanrl_gpu_large_memory \ - --job-definition cleanrl \ - --num-seed 2 \ - --num-vcpu 16 \ - --num-gpu 1 \ - --num-memory 63000 \ - --num-hours 48.0 \ - --submit-aws $SUBMIT_AWS - - python -m cleanrl.submit_exp --exp-script offline_dqn_cql_atari_visual.sh \ --algo offline_dqn_cql_atari_visual.py \ --total-timesteps 10000000 \ @@ -29,12 +13,6 @@ python -m cleanrl.submit_exp --exp-script offline_dqn_cql_atari_visual.sh \ --num-hours 48.0 \ --submit-aws $SUBMIT_AWS -python ppo_procgen_impala_cnn.py --gym-id starpilot --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1 -python ppo_procgen_impala_cnn.py --gym-id coinrun --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1 -python ppo_procgen_impala_cnn.py --gym-id dodgeball --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1 -python ppo_procgen_impala_cnn.py --gym-id bossfight --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1 - - python ppg_procgen_impala_cnn.py --gym-id starpilot --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1 python -m cleanrl.utils.submit_exp --exp-script ppo.sh \ diff --git a/benchmark/terminate_all.sh b/cloud/examples/terminate_all.sh similarity index 100% rename from benchmark/terminate_all.sh rename to cloud/examples/terminate_all.sh