In [0]:
# connect G-drive
# To save to google drive and run on colab
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
# copy files to run-time machine from google gdrive
!cp gdrive/My\ Drive/Colab\ Notebooks/rl\ project/* .


In [0]:
# install ConfigSpace
!pip install ConfigSpace

In [0]:
# Function to save results in gdrive in order to have them after each run

from pathlib import Path
import shutil

folders = ['models', 'results', 'config']

def import2drive():
  save_parent = Path('./content').resolve().parent /'gdrive' / 'My Drive' / 'Colab Notebooks' / 'rl project'
  Path.is_dir(save_parent)
  for folder in folders:
    save_dir = save_parent / folder
    Path.mkdir(save_dir, parents=True, exist_ok=True)
    dirpath = Path('./content').resolve().parent / folder
    print(dirpath)
    for x in dirpath.iterdir():
        print(x)
        if x.is_file():
          shutil.copy(str(dirpath/x), save_dir)

In [0]:
# check arg parser arguments and possible configurations
!python arg_parser.py -h

In [0]:
# Defining Hyperparameters to be used in ConfigSpace
gamma = 0.99
cont_act_dim = 1
float_hyper = {'epsilon': [0,1], 'trace_decay': [0,1], 'alpha': [0.00001, 0.1],'entropy_coeff':[0.0001, 0.1]}

cat_hyper = {"episode": [1000, 2000, 3000, 4000], 'steps': [250, 500, 750], 'timesteps':[200,300,400,500,600],\
             'update_timesteps':[128,256,512,1024,2048], 'K_epochs': [5,10,15,20], 'eps_clip':[0.1,0.2,0.3,0.4,0.5],\
             'action_std': [0.1,0.2,0.3,0.4], 'hidden_unit':[32,64,128,256], 'reward_func': ['sparse','carrot', 'laplace','slow_rotation'],\
             'action_dim': [3,5,7]}


In [0]:

import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH

cs = CS.ConfigurationSpace(seed=1)
# Actor learning rate
cs.add_hyperparameter(CSH.UniformFloatHyperparameter(name='a_lr', lower=1e-4, upper=1e-1, default_value=1e-3, log=True))
# Critic Learning rate
cs.add_hyperparameter(CSH.UniformFloatHyperparameter(name='c_lr', lower=1e-4, upper=1e-1, default_value=1e-3, log=True))

for k, v in float_hyper.items():
  cs.add_hyperparameter(CSH.UniformFloatHyperparameter(name=k, lower=v[0], upper=v[1]))
for w, z in cat_hyper.items():
  cs.add_hyperparameter(CSH.CategoricalHyperparameter(name=w, choices=z))
cs

In [0]:
# Run REINFORCE Discrete
from utils import save_config_colab
rounds = 1
for i in range(rounds):
  sample = cs.sample_configuration()
  print(f'__starting round {i}')
  for item in sample.get_dictionary().items():
    print(item)
  save_config_colab(sample.get_dictionary(), i, 'REINFORCE_discrete')
  episode = sample.get('episode') # -e
  reward = sample.get('reward_func') # -rw
  action_dim = sample.get('action_dim') # -ac
  timesteps = sample.get('timesteps') # -s
  hidden_unit = sample.get('hidden_unit') # -hd
  a_lr = sample.get('a_lr') # -alr
  c_lr = sample.get('c_lr') # -clr

  !python run_reinforce_discrete.py -e $episode -rw $reward -ac $action_dim -s $timesteps -hd $hidden_unit -alr $a_lr -clr $c_lr -ec $i
  # import2drive()

  

In [0]:
## Run PPO Continues
from utils import save_config_colab
rounds = 1
for i in range(rounds):
  sample = cs.sample_configuration()
  print(f'__starting round {i}')
  for item in sample.get_dictionary().items():
    print(item)
  save_config_colab(sample.get_dictionary(), i, 'PPO_Continues')
  episode = sample.get('episode') # -e
  reward = sample.get('reward_func') # -rw
  action_dim = sample.get('action_dim') # -ac
  timesteps = sample.get('timesteps') # -s
  hidden_unit = sample.get('hidden_unit') # -hd
  a_lr = sample.get('a_lr') # -alr
  c_lr = sample.get('c_lr') # -clr
  k_ep = sample.get('K_epochs') # -ke
  action_std = sample.get('action_std') # -ad 
  entropy_coeff = sample.get('entropy_coeff') # -eco
  epsilon_clip = sample.get('eps_clip') # -ecp
  update_timesteps = sample.get('update_timesteps') # -us

  !python run_ppo_continuous.py -e $episode -s $timesteps -rw $reward -hd $hidden_unit -alr $a_lr -clr $c_lr -ke $k_ep -ad $action_std -eco $entropy_coeff  -ecp $epsilon_clip -us $update_timesteps
  # import2drive()

In [0]:
# Run REINFORCE continues
from utils import save_config_colab
rounds = 1
for i in range(rounds):
  sample = cs.sample_configuration()
  print(f'__starting round {i}')
  for item in sample.get_dictionary().items():
    print(item)
  save_config_colab(sample.get_dictionary(), i, 'REINFORCE_continues')
  episode = sample.get('episode') # -e
  reward = sample.get('reward_func') # -rw
  action_dim = sample.get('action_dim') # -ac
  timesteps = sample.get('timesteps') # -s
  hidden_unit = sample.get('hidden_unit') # -hd
  a_lr = sample.get('a_lr') # -alr
  c_lr = sample.get('c_lr') # -clr

  !python run_reinforce_continous.py -e $episode -rw $reward -ac $cont_act_dim -s $timesteps -hd $hidden_unit -alr $a_lr -clr $c_lr -ec $i
  # import2drive()

  

In [0]:
## Run PPO discrete
from utils import save_config_colab
rounds = 1
for i in range(rounds):
  sample = cs.sample_configuration()
  print(f'__starting round {i}')
  for item in sample.get_dictionary().items():
    print(item)
  save_config_colab(sample.get_dictionary(), i, 'PPO_discrete')
  episode = sample.get('episode') # -e
  reward = sample.get('reward_func') # -rw
  action_dim = sample.get('action_dim') # -ac
  timesteps = sample.get('timesteps') # -s
  hidden_unit = sample.get('hidden_unit') # -hd
  a_lr = sample.get('a_lr') # -alr
  c_lr = sample.get('c_lr') # -clr
  k_ep = sample.get('K_epochs') # -ke
  action_std = sample.get('action_std') # -ad 
  entropy_coeff = sample.get('entropy_coeff') # -eco
  epsilon_clip = sample.get('eps_clip') # -ecp
  update_timesteps = sample.get('update_timesteps') # -us

  !python run_ppo_discrete.py -ec $i -ac $action_dim -e $episode -s $timesteps -rw $reward -hd $hidden_unit -alr $a_lr -clr $c_lr -ke $k_ep -ad $action_std -eco $entropy_coeff  -ecp $epsilon_clip -us $update_timesteps
  # import2drive()

In [0]:
## Run DDPG continuous
from utils import save_config_colab
rounds = 21
relevant_params = ['episode', 'timesteps', 'reward_func', 'hidden_unit', 'action_std', 'a_lr', 'c_lr']
for i in range(16, rounds):
  sample = cs.sample_configuration()
  print(f'__starting round {i}')
  for rel_param in relevant_params:
    print(f'{rel_param}: {sample.get(rel_param)}')

  save_config_colab(sample.get_dictionary(), i, 'DDPG_continuous')
  episode = sample.get('episode') # -e
  reward = sample.get('reward_func') # -rw
  timesteps = sample.get('timesteps') # -s
  hidden_unit = sample.get('hidden_unit') # -hd
  a_lr = sample.get('a_lr') # -alr
  c_lr = sample.get('c_lr') # -clr
  action_std = sample.get('action_std') # -ad 

  !python run_ddpg_continuous.py -ec $i -e $episode -s $timesteps -rw $reward -hd $hidden_unit -alr $a_lr -clr $c_lr -ad $action_std
  import2drive()