In [1]:
import os
import pandas as pd
from ray.tune.visual_utils import load_results_to_df, generate_plotly_dim_dict
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)

### Specify the directory where all your results are in the variable `RESULTS_DIR`.

In [2]:
RESULTS_DIR = os.path.expanduser("../../logs_new_longer/marketorderenv/hpsearch-ppo-4/")
df = load_results_to_df(RESULTS_DIR)
[key for key in df]

['batch_mode',
 'callbacks:on_episode_end',
 'clip_param',
 'clip_rewards',
 'config:batch_mode',
 'config:callbacks:on_episode_end',
 'config:callbacks:on_episode_start',
 'config:callbacks:on_episode_step',
 'config:callbacks:on_sample_end',
 'config:callbacks:on_train_result',
 'config:clip_actions',
 'config:clip_param',
 'config:clip_rewards',
 'config:collect_metrics_timeout',
 'config:compress_observations',
 'config:entropy_coeff',
 'config:env',
 'config:env_config:max_episode_time',
 'config:env_config:max_sequence_skip',
 'config:env_config:random_start',
 'config:gamma',
 'config:grad_clip',
 'config:horizon',
 'config:input',
 'config:input_evaluation',
 'config:kl_coeff',
 'config:kl_target',
 'config:lambda',
 'config:local_evaluator_tf_session_args:inter_op_parallelism_threads',
 'config:local_evaluator_tf_session_args:intra_op_parallelism_threads',
 'config:log_level',
 'config:lr',
 'config:lr_schedule',
 'config:model:conv_activation',
 'config:model:conv_filters',
 

### Choose the fields you wish to visualize over in `GOOD_FIELDS`.

In [5]:
GOOD_FIELDS = ['experiment_id',
               'train_batch_size',
               'num_sgd_iter',
               'lr',
               'clip_param',
               'custom_metrics:capital_return_mean']

visualization_df = df[GOOD_FIELDS]
visualization_df = visualization_df.dropna()
visualization_df.sort_values('custom_metrics:capital_return_mean', ascending=False)

Unnamed: 0,experiment_id,train_batch_size,num_sgd_iter,lr,clip_param,custom_metrics:capital_return_mean
26,b79eec754b1c4f769d9f85fb5b38b7c3,40000,5,0.005,0.3,-0.000482
16,28b1d6f5f7584a8b892cd58b3643ca8a,40000,5,0.005,0.5,-0.000619
28,3d283e86bc2a4cd190ae6aae8a933664,40000,10,0.005,0.5,-0.000819
19,46cd6f5130564b6784814ecc968ec403,40000,10,0.005,0.5,-0.001637
5,363b88f00aa849f1a5573a34f8123c5c,80000,5,0.005,0.5,-0.002139
0,da4b915bfec44c488d2ff7b05dcca565,80000,5,0.005,0.3,-0.00217
6,0094eb31b82b474095e867551a445df9,80000,5,0.005,0.3,-0.003818
4,46273bd2662c4ab78a948d406834e8a8,40000,5,0.005,0.5,-0.003857
11,e18d3a15b48e45518855693c3d0536a6,80000,10,0.0005,0.5,-0.003872
23,7b44b2e9c2924a2cbca553418bf3f56f,40000,10,0.005,0.3,-0.004222


### Enjoy.

Documentation for this Plotly visualization can be found here: https://plot.ly/python/parallel-coordinates-plot/

In [6]:
data = [go.Parcoords(
            line = dict(
                       color = visualization_df['custom_metrics:capital_return_mean'],
                       colorscale = 'Jet',
                       showscale = True,
                       #reversescale = True,
                       cmin = visualization_df['custom_metrics:capital_return_mean'].min(),
                       cmax = visualization_df['custom_metrics:capital_return_mean'].max()
            ),
            dimensions = [generate_plotly_dim_dict(visualization_df, field) 
                              for field in visualization_df])
]

plotly.offline.iplot(data)

In [9]:
for col in ['train_batch_size',  'num_sgd_iter', 'clip_param', 'lr']:
    print(visualization_df.groupby(col)['custom_metrics:capital_return_mean'].agg(['mean', 'median', 'min', 'max']))

                      mean    median       min       max
train_batch_size                                        
40000            -0.011626 -0.005869 -0.073959 -0.000482
80000            -0.031318 -0.009146 -0.128863 -0.002139
                  mean    median       min       max
num_sgd_iter                                        
5            -0.029477 -0.007591 -0.128863 -0.000482
10           -0.013467 -0.007661 -0.077784 -0.000819
                mean    median       min       max
clip_param                                        
0.3        -0.019602 -0.007045 -0.128863 -0.000482
0.5        -0.023342 -0.008675 -0.108907 -0.000619
            mean    median       min       max
lr                                            
0.0005 -0.033161 -0.012695 -0.128863 -0.003872
0.0050 -0.009783 -0.004040 -0.077784 -0.000482


In [10]:
import statsmodels.api as sm
sm.OLS(visualization_df['custom_metrics:capital_return_mean'],
      sm.add_constant(visualization_df[['train_batch_size', 'num_sgd_iter', 'lr', 'clip_param']])).fit().summary()


Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.



0,1,2,3
Dep. Variable:,custom_metrics:capital_return_mean,R-squared:,0.276
Model:,OLS,Adj. R-squared:,0.169
Method:,Least Squares,F-statistic:,2.571
Date:,"Mon, 13 May 2019",Prob (F-statistic):,0.0606
Time:,22:31:38,Log-Likelihood:,68.876
No. Observations:,32,AIC:,-127.8
Df Residuals:,27,BIC:,-120.4
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0228,0.033,-0.696,0.492,-0.090,0.044
train_batch_size,-4.923e-07,2.71e-07,-1.819,0.080,-1.05e-06,6.29e-08
num_sgd_iter,0.0032,0.002,1.479,0.151,-0.001,0.008
lr,5.1951,2.405,2.160,0.040,0.260,10.130
clip_param,-0.0187,0.054,-0.346,0.732,-0.130,0.092

0,1,2,3
Omnibus:,10.12,Durbin-Watson:,1.633
Prob(Omnibus):,0.006,Jarque-Bera (JB):,8.969
Skew:,-1.219,Prob(JB):,0.0113
Kurtosis:,3.883,Cond. No.,28100000.0
