In [3]:
import sys
import os

project_path = os.path.dirname(os.path.abspath('./'))

if project_path not in sys.path:
    sys.path.append(project_path)

In [12]:
import pandas as pd
import numpy as np

from crome.optimize import pareto
from crome.config import config

PROJECT_PATH = config['DEFAULT']['project_path']

In [7]:
df = pd.read_json('{}/reports/results/scores.json'.format(PROJECT_PATH), orient='table')

In [27]:
df.replace('N/A', np.NaN).dtypes

month                                     int64
train.basic.auc_score                   float64
train.basic.f1_score                    float64
train.basic.precision_score             float64
train.basic.recall_score                float64
valid.basic.auc_score                   float64
valid.basic.f1_score                    float64
valid.basic.precision_score             float64
valid.basic.recall_score                float64
test.basic.auc_score                    float64
test.basic.f1_score                     float64
test.basic.precision_score              float64
test.basic.recall_score                 float64
test.additional.pred.true_positives       int64
test.additional.pred.early_pred           int64
test.additional.pred.early_pred.dist    float64
test.additional.pred.early_pred.time    float64
test.additional.pred.late_pred            int64
test.additional.pred.false_positives      int64
test.additional.true.true_positives       int64
test.additional.true.early_pred.dist    

In [15]:
table = df.groupby(['delta_s', 'delta_t', 'model', 'features']).agg({
    'valid.basic.f1_score': np.mean,
    'valid.basic.precision_score': np.mean,
    'valid.basic.recall_score': np.mean,
}).reset_index()

optimal_settings = pareto.eps_sort(table.values.tolist(), objectives=[0, 1, 4], maximize=[4], epsilons=[1e3, 5, 1e-3])

opt_df = pd.DataFrame(optimal_settings, columns=['delta_s', 'delta_t', 'model', 'features', 'F1 Score', 'Precision', 'Recall'])
opt_df = opt_df.assign(rank=opt_df.index+1)

In [18]:
opt_df[['delta_s', 'delta_t', 'model', 'features']].assign(optimal=True)

Unnamed: 0,delta_s,delta_t,model,features,optimal
0,1000,5,cnn,precip_mean,True
1,2000,5,cnn,congestion_mean,True
2,2000,10,cnn,base,True
3,2000,15,cnn,congestion_mean,True
4,2000,30,cnn,congestion_mean,True
5,3000,5,cnn,congestion_mean,True
6,3000,10,cnn,congestion_mean,True
7,3000,30,cnn,precip_mean,True
8,4000,5,cnn,precip_mean,True
9,4000,10,cnn,congestion_mean,True


In [19]:
df

Unnamed: 0,month,train.basic.auc_score,train.basic.f1_score,train.basic.precision_score,train.basic.recall_score,valid.basic.auc_score,valid.basic.f1_score,valid.basic.precision_score,valid.basic.recall_score,test.basic.auc_score,...,test.additional.true.early_pred.time,test.additional.true.early_pred,test.additional.true.late_pred,test.additional.true.false_negative,config.num_epochs,config.threshold,delta_t,delta_s,features,model
0,9,0.974965,0.551419,0.454142,0.701727,0.939449,0.421693,0.335446,0.570137,0.943808,...,4221.821429,294,241,424,7,0.172329,15,5000,congestion_mean+precip_mean,cnn
1,10,0.972139,0.513756,0.388988,0.756358,0.932352,0.413793,0.323702,0.575348,0.938595,...,6236.250000,418,288,410,7,0.149724,15,5000,congestion_mean+precip_mean,cnn
2,11,0.975957,0.528694,0.404983,0.761227,0.936178,0.417447,0.334167,0.559434,0.938559,...,5425.000000,374,228,354,7,0.164119,15,5000,congestion_mean+precip_mean,cnn
3,12,0.972940,0.538707,0.433000,0.712693,0.931315,0.418336,0.324676,0.592221,0.939432,...,4269.000000,299,231,396,7,0.157852,15,5000,congestion_mean+precip_mean,cnn
4,9,0.986304,0.488986,0.375716,0.700031,0.915718,0.304282,0.236842,0.443354,0.952699,...,2860.000000,189,227,543,6,0.12093,30,3000,congestion_mean+precip_mean,cnn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,12,0.997284,0.637207,0.528094,0.803154,0.746145,0.172695,0.151789,0.213470,0.847984,...,1505.000000,104,146,676,3,0.091611,30,1000,congestion_mean,cnn
716,9,0.996243,0.635410,0.524995,0.804640,0.737004,0.154978,0.127422,0.203143,0.862723,...,2002.285714,132,164,663,2,0.066545,20,1000,base,cnn
717,10,0.996673,0.740162,0.631429,0.894134,0.749787,0.182456,0.168001,0.206412,0.802696,...,2624.666667,173,198,745,3,0.087811,20,1000,base,cnn
718,11,0.995570,0.593014,0.459371,0.836324,0.727509,0.170005,0.157559,0.189935,0.789775,...,2538.333333,177,188,591,2,0.077606,20,1000,base,cnn


In [None]:
optimal_indexes = 

df['is_optimal'] = df.apply(lambda x: (x['delta_s'], x['delta_t'], x['model'], x['features']) in optimal_indexes, axis=1)