### Azure ML Resources
- [Tune Hyperparameters](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters)

In [1]:
import azureml.core
from azureml.core import Experiment
from azureml.core.workspace import Workspace
from azureml.core.compute import ComputeTarget
from azureml.train.estimator import Estimator
from azureml.train.hyperdrive import *
from azureml.widgets import RunDetails

ws = Workspace.from_config()

Found the config file in: /home/common/notebooks/AML/config.json


In [2]:
p100_1 = ComputeTarget(workspace=ws, name='p100-1')  # each node has 1 P100
p100_2 = ComputeTarget(workspace=ws, name='p100-2')  # each node has 2 P100s
p100_1_low = ComputeTarget(workspace=ws, name='p100-1-low')  # each node has 1 P100, low-priority
k80 = ComputeTarget(workspace=ws, name='k80')  # each node has 1 K80

def train(name, cluster, args, hyper_params=None,
          source_directory='./MBRL', 
          entry_script='train.py', 
          docker_image='zer0n/rl',
          total_runs=100):
    trainer = Estimator(source_directory=source_directory,
                        script_params=params, 
                        compute_target=cluster,
                        entry_script=entry_script,
                        custom_docker_image=docker_image, 
                        use_gpu=True, 
                        user_managed=True)
    experiment = Experiment(ws, name=name)
    
    if hyper_params:
        sweeper = HyperDriveRunConfig(estimator=trainer,
                                      hyperparameter_sampling=RandomParameterSampling(hyper_params), 
                                      primary_metric_name="Average Return", 
                                      primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                      policy=MedianStoppingPolicy(evaluation_interval=1, delay_evaluation=5),
                                      max_total_runs=total_runs)
        return experiment.submit(sweeper)
    else:
        return experiment.submit(trainer)

def process_params(params: dict, user: str, throwaway=False):
    if throwaway:
        params['log_dir'] = f'../log/{env}/{user}/throwaway'
    else:
        env = params['env']
        excluded = ['env', 'evaluation_frequency']
        params_str = ','.join(f"{key}={val}" for (key,val) in params.items() if not key in excluded)
        params['log_dir'] = f'../log/{env}/{user}/{params_str}'
    
#     for key in list(params):
#         params[f'--{key}'] = params.pop(key)  # add -- to the key name

In [9]:
pendulum = 'Pendulum-v0'
half_cheetah = 'HalfCheetah-v2'
humanoid = 'Humanoid-v0'