In [1]:
# Modify the file 00_setup.py to define input/output file paths on your system
# The information in 00_setup.py will be used across notebooks
from importlib.machinery import SourceFileLoader
setup = SourceFileLoader("setup", "./00_setup.py").load_module()

# Performance Summary
Combine performance of models tested, and summarize

Also combine all metrics for easy comparisons

*This script takes about 1 minutes on my MacBook Air*

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from pathlib import Path

In [4]:
from sba_gnn.sba_gnn import sg_plot 

In [5]:
sg_plot.plot_defaults()

## File Dictionaries

##### XGB basic tests

In [6]:
dict_pred_1_xgb = {
    'xgb base': Path(setup.temp_path).joinpath('03_DATA_combined_predictions.parquet'),
    'xgb mean':  Path(setup.temp_path).joinpath('04_DATA_combined_predictions.parquet'),
    'xgb one hot':  Path(setup.temp_path).joinpath('05_DATA_combined_predictions.parquet'),
    'xgb numeric naics':  Path(setup.temp_path).joinpath('06_DATA_combined_predictions.parquet'),
    'xgb mean':  Path(setup.temp_path).joinpath('07_DATA_combined_predictions.parquet'),
    'xgb hier': Path(setup.temp_path).joinpath('10_DATA_combined_predictions.parquet')
}

##### Neural network basic

In [7]:
dict_pred_2_nn_basic = {
    'nn base': Path(setup.temp_path).joinpath('21_DATA_predictions.parquet'),
    'nn emb': Path(setup.temp_path).joinpath('22_DATA_predictions.parquet'),
    'nn menc': Path(setup.temp_path).joinpath('23_DATA_predictions.parquet'),
    'nn hier': Path(setup.temp_path).joinpath('24_DATA_predictions.parquet'),
    'nn emb remap': Path(setup.temp_path).joinpath('25_DATA_predictions.parquet'),
}

##### XGB + embeddings

In [8]:
dict_pred_3_emb = {
    'xgb emb nn':  Path(setup.temp_path).joinpath('40_DATA_combined_predictions.parquet'),
    'xgb dgi base':  Path(setup.temp_path).joinpath('50_DATA_combined_predictions.parquet'),
    'xgb dgi base+label':  Path(setup.temp_path).joinpath('52_DATA_combined_predictions.parquet')
}

##### Clustering - Mean Encoding

In [9]:
dict_pred_4_clus_menc = {
    'xgb clus menc nn':  Path(setup.temp_path).joinpath('62_DATA_combined_predictions.parquet'),
    'xgb clus dgi hier':  Path(setup.temp_path).joinpath('65_DATA_combined_predictions.parquet')
}

##### Clustering - One Hot Encoding

In [10]:
dict_pred_5_oh = {
    'xgb clus oh nn':  Path(setup.temp_path).joinpath('61_DATA_combined_predictions.parquet'),
    'xgb clus oh dgi base':  Path(setup.temp_path).joinpath('64_DATA_combined_predictions.parquet'),
    'xgb clus oh dgi base+label':  Path(setup.temp_path).joinpath('71_DATA_combined_predictions.parquet')
}

## Loop Dictionaries
Some scripts have multiple models - pull these in a little differently

In [11]:
dict_loop = {
        'xgb hier vary start':  Path(setup.temp_path).joinpath('11_DATA_combined_predictions.parquet'),
        'xgb hier pair':  Path(setup.temp_path).joinpath('12_DATA_combined_predictions.parquet'),
        'xgb dgi hier v start':  Path(setup.temp_path).joinpath('66_DATA_combined_predictions.parquet'),
        'xgb dgi hier v start (alt)':  Path(setup.temp_path).joinpath('67_DATA_combined_predictions.parquet'),
        'xgb dgi hier pair':  Path(setup.temp_path).joinpath('68_DATA_combined_predictions.parquet')
}

## Combine predictions

##### Get dictionary of all files

In [12]:
all_dict_list = [dict_pred_1_xgb, dict_pred_2_nn_basic, 
                 dict_pred_3_emb, dict_pred_4_clus_menc, dict_pred_5_oh, dict_loop]

In [13]:
dict_pred= {k: v for d in all_dict_list for k, v in d.items()}

In [14]:
# Verify these all exist
all([Path(v).exists() for k, v in dict_pred.items()])

True

##### Standard metrics
Get filenames for standard metrics calculated in all scripts. These all have similar file naming

In [15]:
dict_metrics = {k:Path(setup.temp_path).joinpath(v.name[0:2] + '_REPORT_metrics.csv') \
                for k, v in dict_pred.items()}

In [16]:
# Verify these all exist
all([Path(v).exists() for k, v in dict_metrics.items()])

True

## Combine standard metrics

In [17]:
all_metrics = pd.concat([pd.read_csv(v) for k, v in dict_metrics.items()],
                        keys = dict_metrics.keys()) \
    .reset_index(level=0) \
    .drop(columns=['Unnamed: 0'], errors='ignore') \
    .rename(columns={'level_0':'model'})

In [18]:
all_metrics['script_num'] = all_metrics['model'].apply(lambda x: dict_metrics[x].name[0:2])

##### Modify loop field names

In [19]:
all_metrics[all_metrics['model'].isin(dict_loop.keys())]

Unnamed: 0,model,dset,accuracy_score,f1_score,precision_score,recall_score,average_precision_score,roc_auc_score,dset_naics_holdout,start_naics,script_num
0,xgb hier vary start,test,0.674488,0.442278,0.332824,0.659000,0.376333,0.731086,,NAICS_5,11
1,xgb hier vary start,train,0.664677,0.454880,0.340835,0.683622,0.392977,0.735772,,NAICS_5,11
2,xgb hier vary start,val,0.661267,0.451350,0.339903,0.671532,0.385388,0.726702,,NAICS_5,11
3,xgb hier vary start,test,0.660671,0.450608,0.337801,0.676533,0.382942,0.728478,0.0,NAICS_5,11
4,xgb hier vary start,test,0.694337,0.428456,0.324481,0.630488,0.367300,0.732314,1.0,NAICS_5,11
...,...,...,...,...,...,...,...,...,...,...,...
25,xgb dgi hier pair,test,0.672021,0.438179,0.329701,0.653041,0.371179,0.730046,,cluster_003,68
26,xgb dgi hier pair,train,0.666546,0.454316,0.341544,0.678267,0.393383,0.735652,,cluster_003,68
27,xgb dgi hier pair,val,0.662711,0.450020,0.340057,0.665085,0.385624,0.726945,,cluster_003,68
28,xgb dgi hier pair,test,0.661687,0.448672,0.337451,0.669250,0.383301,0.728486,0.0,cluster_003,68


In [20]:
all_metrics['model'] = np.where(all_metrics['model'].isin(dict_loop.keys()),
                                all_metrics['model'] + ':' + all_metrics['start_naics'],
                                all_metrics['model'])

In [21]:
all_metrics.tail(3)

Unnamed: 0,model,dset,accuracy_score,f1_score,precision_score,recall_score,average_precision_score,roc_auc_score,dset_naics_holdout,start_naics,script_num
27,xgb dgi hier pair:cluster_003,val,0.662711,0.45002,0.340057,0.665085,0.385624,0.726945,,cluster_003,68
28,xgb dgi hier pair:cluster_003,test,0.661687,0.448672,0.337451,0.66925,0.383301,0.728486,0.0,cluster_003,68
29,xgb dgi hier pair:cluster_003,test,0.686866,0.421077,0.317055,0.626681,0.35215,0.731225,1.0,cluster_003,68


In [22]:
all_metrics.to_csv(Path(setup.temp_path).joinpath('80_REPORT_metrics_combined.csv'), index=False)

## Summary Data
For test dataset only, show AUC and f1 scores for test dataset, along with f1 scores for holdout and low-volume NAICS

In [23]:
overall_stats =  all_metrics[(all_metrics['dset'] == 'test') & \
                             (all_metrics['dset_naics_holdout'] == 0)] \
    [['script_num', 'model', 'roc_auc_score', 'average_precision_score', 'f1_score', ]]

In [24]:
holdout_stats = all_metrics[(all_metrics['dset'] == 'test') & \
                            (all_metrics['dset_naics_holdout'] == 1)] \
    [['model', 'roc_auc_score', 'average_precision_score', 'f1_score']] \
    .rename(columns={'f1_score':'f1_score_ho', 'average_precision_score':'ap_ho',
                    'roc_auc_score':'roc_ho'})

In [25]:
overall_stats = overall_stats.merge(holdout_stats, on='model')

##### Selected stats

In [26]:
overall_stats.sort_values('average_precision_score', ascending=False).head(12)

Unnamed: 0,script_num,model,roc_auc_score,average_precision_score,f1_score,roc_ho,ap_ho,f1_score_ho
1,7,xgb mean,0.730515,0.38699,0.451451,0.73269,0.354598,0.423457
10,40,xgb emb nn,0.730254,0.385856,0.451501,0.717489,0.3311,0.391092
36,68,xgb dgi hier pair:cluster_834,0.728603,0.384275,0.449924,0.728745,0.339113,0.427372
26,66,xgb dgi hier v start:cluster_834,0.728585,0.384263,0.450125,0.712529,0.327377,0.408818
30,67,xgb dgi hier v start (alt):cluster_834,0.728585,0.384263,0.450125,0.712529,0.327377,0.408818
19,11,xgb hier vary start:NAICS_4,0.728771,0.384041,0.450069,0.737514,0.368883,0.433219
40,68,xgb dgi hier pair:cluster_010,0.728885,0.383998,0.44966,0.735481,0.356854,0.422107
34,67,xgb dgi hier v start (alt):cluster_010,0.728885,0.383998,0.44966,0.735481,0.356854,0.422107
20,11,xgb hier vary start:NAICS_3,0.728817,0.383981,0.44974,0.735148,0.358774,0.426627
24,12,xgb hier pair:NAICS_3,0.728789,0.383948,0.44966,0.735078,0.358998,0.42718


In [27]:
overall_stats.sort_values('ap_ho', ascending=False).head(12)

Unnamed: 0,script_num,model,roc_auc_score,average_precision_score,f1_score,roc_ho,ap_ho,f1_score_ho
19,11,xgb hier vary start:NAICS_4,0.728771,0.384041,0.450069,0.737514,0.368883,0.433219
23,12,xgb hier pair:NAICS_4,0.728652,0.383583,0.450148,0.737012,0.367824,0.430403
18,11,xgb hier vary start:NAICS_5,0.728478,0.382942,0.450608,0.732314,0.3673,0.428456
4,10,xgb hier,0.728478,0.382942,0.450608,0.732314,0.3673,0.428456
3,6,xgb numeric naics,0.723166,0.373796,0.446564,0.738718,0.362137,0.429141
16,64,xgb clus oh dgi base,0.711046,0.356489,0.438078,0.7376,0.360423,0.42791
8,24,nn hier,0.724727,0.375652,0.447923,0.732317,0.359838,0.430286
24,12,xgb hier pair:NAICS_3,0.728789,0.383948,0.44966,0.735078,0.358998,0.42718
20,11,xgb hier vary start:NAICS_3,0.728817,0.383981,0.44974,0.735148,0.358774,0.426627
22,12,xgb hier pair:NAICS_5,0.728569,0.383699,0.449787,0.730116,0.358658,0.42223


In [28]:
overall_stats[overall_stats['model'].isin(list(dict_pred_2_nn_basic.keys()))] \
    .sort_values('average_precision_score', ascending=False)

Unnamed: 0,script_num,model,roc_auc_score,average_precision_score,f1_score,roc_ho,ap_ho,f1_score_ho
6,22,nn emb,0.728407,0.383644,0.44933,0.716412,0.327247,0.395913
9,25,nn emb remap,0.727966,0.381034,0.451593,0.727708,0.343123,0.418752
7,23,nn menc,0.724844,0.376685,0.44795,0.725387,0.341251,0.417035
8,24,nn hier,0.724727,0.375652,0.447923,0.732317,0.359838,0.430286
5,21,nn base,0.699359,0.340217,0.432538,0.727186,0.341215,0.419558


In [29]:
overall_stats[overall_stats['model'].isin(list(dict_pred_3_emb.keys()))] \
    .sort_values('average_precision_score', ascending=False)

Unnamed: 0,script_num,model,roc_auc_score,average_precision_score,f1_score,roc_ho,ap_ho,f1_score_ho
10,40,xgb emb nn,0.730254,0.385856,0.451501,0.717489,0.3311,0.391092
11,50,xgb dgi base,0.724011,0.375218,0.447336,0.733705,0.346721,0.425681
12,52,xgb dgi base+label,0.723915,0.373773,0.446626,0.735716,0.35502,0.429074


In [30]:
overall_stats[overall_stats['model'].isin(dict_pred_4_clus_menc.keys())] \
    .sort_values('average_precision_score', ascending=False)

Unnamed: 0,script_num,model,roc_auc_score,average_precision_score,f1_score,roc_ho,ap_ho,f1_score_ho
13,62,xgb clus menc nn,0.728576,0.383574,0.449642,0.714015,0.313323,0.393603
14,65,xgb clus dgi hier,0.726191,0.379871,0.448569,0.714484,0.329468,0.391496


In [31]:
overall_stats[overall_stats['model'].isin(list(dict_pred_5_oh.keys()) + \
                                          ['xgb one hot'])] \
    .sort_values('average_precision_score', ascending=False)

Unnamed: 0,script_num,model,roc_auc_score,average_precision_score,f1_score,roc_ho,ap_ho,f1_score_ho
15,61,xgb clus oh nn,0.726223,0.374882,0.448602,0.727376,0.343861,0.162614
17,71,xgb clus oh dgi base+label,0.711826,0.359239,0.436635,0.737109,0.358599,0.428162
2,5,xgb one hot,0.713595,0.358867,0.439001,0.733087,0.350732,0.423228
16,64,xgb clus oh dgi base,0.711046,0.356489,0.438078,0.7376,0.360423,0.42791


In [32]:
overall_stats.to_csv(Path(setup.temp_path).joinpath('80_REPORT_summary_stats.csv'),
                    index=False)