# Tune Visualization

In order to visualize results, please install `plotly` with the following command:

  `pip install plotly`

In [1]:
import os
import pandas as pd
from ray.tune.visual_utils import load_results_to_df, generate_plotly_dim_dict
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)

### Specify the directory where all your results are in the variable `RESULTS_DIR`.

In [3]:
RESULTS_DIR = os.path.expanduser("../../logs/marketorderenv/hpsearch-ppo-aggressive/")
df = load_results_to_df(RESULTS_DIR)
[key for key in df]

['batch_mode',
 'callbacks:on_episode_end',
 'clip_param',
 'clip_rewards',
 'config:batch_mode',
 'config:callbacks:on_episode_end',
 'config:callbacks:on_episode_start',
 'config:callbacks:on_episode_step',
 'config:callbacks:on_sample_end',
 'config:callbacks:on_train_result',
 'config:clip_actions',
 'config:clip_param',
 'config:clip_rewards',
 'config:collect_metrics_timeout',
 'config:compress_observations',
 'config:entropy_coeff',
 'config:env',
 'config:env_config:max_episode_time',
 'config:env_config:max_sequence_skip',
 'config:env_config:random_start',
 'config:gamma',
 'config:grad_clip',
 'config:horizon',
 'config:input',
 'config:input_evaluation',
 'config:kl_coeff',
 'config:kl_target',
 'config:lambda',
 'config:local_evaluator_tf_session_args:inter_op_parallelism_threads',
 'config:local_evaluator_tf_session_args:intra_op_parallelism_threads',
 'config:log_level',
 'config:lr',
 'config:lr_schedule',
 'config:model:conv_activation',
 'config:model:conv_filters',
 

### Choose the fields you wish to visualize over in `GOOD_FIELDS`.

In [5]:
GOOD_FIELDS = ['experiment_id',
               'train_batch_size',
               'sgd_minibatch_size',
               'num_sgd_iter',
               'lr',
               'custom_metrics:capital_return_mean']

visualization_df = df[GOOD_FIELDS]
visualization_df = visualization_df.dropna()
visualization_df.sort_values('custom_metrics:capital_return_mean', ascending=False)

Unnamed: 0,experiment_id,train_batch_size,sgd_minibatch_size,num_sgd_iter,lr,custom_metrics:capital_return_mean
62,cb89a647b11d420996bd9e786c89c46e,10000,1024,30,0.0050,0.008753
25,e140a3ee25a947849ca39cdd2300eaca,40000,1024,5,0.0050,0.001192
87,730e2362fd7e4213bc379bc89a4984a7,20000,4096,30,0.0050,-0.000083
31,d140cadceb39435c867573f8b133b8f7,10000,1024,10,0.0050,-0.004088
1,1185a5baeddd448db9c9ddd309ff888c,20000,10000,10,0.0050,-0.004797
70,b18bde6b14d94f349c1585f1a6afd51c,20000,10000,5,0.0050,-0.005081
24,2b910ea4f05748c3bd74b3e4cb3c0728,40000,4096,30,0.0050,-0.005980
28,eabcf4c573574dfd9ad80cc7515597bb,40000,4096,10,0.0050,-0.007456
19,0d9cf9b696ee42c28ee0416c0d543faf,10000,4096,10,0.0050,-0.007978
56,18f8096a9a6e4580ac97067c86619539,10000,4096,5,0.0050,-0.008571


### Enjoy.

Documentation for this Plotly visualization can be found here: https://plot.ly/python/parallel-coordinates-plot/

In [6]:
data = [go.Parcoords(
            line = dict(
                       color = visualization_df['custom_metrics:capital_return_mean'],
                       colorscale = 'Jet',
                       showscale = True,
                       #reversescale = True,
                       cmin = visualization_df['custom_metrics:capital_return_mean'].min(),
                       cmax = visualization_df['custom_metrics:capital_return_mean'].max()
            ),
            dimensions = [generate_plotly_dim_dict(visualization_df, field) 
                              for field in visualization_df])
]

plotly.offline.iplot(data)

In [7]:
for col in [
               'train_batch_size',
               'sgd_minibatch_size',
               'num_sgd_iter',
               'lr']:
    
    print(visualization_df.groupby(col)['custom_metrics:capital_return_mean'].agg(['mean', 'median', 'min', 'max']))

                      mean    median       min       max
train_batch_size                                        
10000            -0.096306 -0.032323 -0.533994  0.008753
20000            -0.103389 -0.029453 -0.882600 -0.000083
40000            -0.185854 -0.083351 -0.889274  0.001192
                        mean    median       min       max
sgd_minibatch_size                                        
1024               -0.149814 -0.041379 -0.882600  0.008753
4096               -0.118668 -0.040196 -0.889274 -0.000083
10000              -0.117067 -0.050763 -0.533994 -0.004797
                  mean    median       min       max
num_sgd_iter                                        
5            -0.204260 -0.115241 -0.882600  0.001192
10           -0.140919 -0.085053 -0.889274 -0.004088
30           -0.040370 -0.026710 -0.211210  0.008753
            mean    median       min       max
lr                                            
0.0005 -0.227664 -0.172021 -0.889274 -0.012198
0.0050 -0.0293

In [9]:
import statsmodels.api as sm
sm.OLS(visualization_df['custom_metrics:capital_return_mean'],
      sm.add_constant(visualization_df[[               'train_batch_size',
               'sgd_minibatch_size',
               'num_sgd_iter',
               'lr']])).fit().summary()


Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.



0,1,2,3
Dep. Variable:,custom_metrics:capital_return_mean,R-squared:,0.445
Model:,OLS,Adj. R-squared:,0.424
Method:,Least Squares,F-statistic:,20.68
Date:,"Tue, 21 May 2019",Prob (F-statistic):,1.55e-12
Time:,22:14:38,Log-Likelihood:,58.628
No. Observations:,108,AIC:,-107.3
Df Residuals:,103,BIC:,-93.85
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.2841,0.043,-6.567,0.000,-0.370,-0.198
train_batch_size,-3.148e-06,1.11e-06,-2.834,0.006,-5.35e-06,-9.45e-07
sgd_minibatch_size,3.196e-06,3.72e-06,0.859,0.392,-4.18e-06,1.06e-05
num_sgd_iter,0.0061,0.001,4.771,0.000,0.004,0.009
lr,44.0656,6.157,7.156,0.000,31.854,56.278

0,1,2,3
Omnibus:,57.261,Durbin-Watson:,2.123
Prob(Omnibus):,0.0,Jarque-Bera (JB):,177.666
Skew:,-1.959,Prob(JB):,2.63e-39
Kurtosis:,7.912,Cond. No.,11900000.0
