# Benchmark the NodeGAM models with other GAMs (Spline, EBM, XGB-GAM)

Everything is run on a machine with 12-core Intel(R) Xeon(R) W-2133 CPU @ 3.60GHz, 16GB RAM, and a Titan XP GPU with cuda 11.2. I use the default hyperparameter, so the performance is lower than what paper reported.

Compare the models on the following datasets:
- 3 classification and 2 regression datasets

|           |   N  |  P |  Domain |     Problem    |
|:---------:|:----:|:--:|:-------:|:--------------:|
|   Mimic3  |  27K | 57 |  Health | Classification |
|   Adult   |  33K | 14 | Finance | Classification |
|   Credit  | 285K | 30 |  Retail | Classification |
|    Wine   |  5K  | 16 |  Nature |   Regression   |
| Bikeshare |  17K | 12 |  Retail |   Regression   |

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score

from nodegam.data import DATASETS
from nodegam.sklearn import NodeGAMRegressor, NodeGAMClassifier

# Quick benchmark to find a good default hyperparameter

In [3]:
from nodegam.gams.MyEBM import MyExplainableBoostingRegressor
from nodegam.gams.MyXGB import MyXGBOnehotClassifier, MyXGBOnehotRegressor
from nodegam.gams.MySpline import MySplineGAM, MySplineLogisticGAM

import time

In [4]:
def run(data_name, model_name, fold=0, seed=31):
    dataset = DATASETS[data_name.upper()](path='./data/', fold=fold)
    
    st_time = time.time()
    
    if model_name == 'nodegam':
        model_cls = NodeGAMClassifier if dataset['problem'] == 'classification' \
            else NodeGAMRegressor
    
        model = model_cls(
            arch='GAMAtt',
            in_features=dataset['X_train'].shape[1],
            cat_features=dataset.get('cat_features', None),
            objective='negative_auc' if dataset['problem'] == 'classification' else 'mse',
            ga2m=1,
        )
    elif model_name == 'ebm':
        model_cls = MyExplainableBoostingClassifier if dataset['problem'] == 'classification' \
            else MyExplainableBoostingRegressor
        model = model_cls()
    elif model_name == 'xgb-gam':
        model_cls = MyXGBOnehotClassifier if dataset['problem'] == 'classification' \
            else MyXGBOnehotRegressor
        model = model_cls()
    elif model_name == 'xgb':
        model_cls = MyXGBOnehotClassifier if dataset['problem'] == 'classification' \
            else MyXGBOnehotRegressor
        model = model_cls(max_depth=3)
    elif model_name == 'spline':
        model_cls = MySplineLogisticGAM if dataset['problem'] == 'classification' \
            else MySplineGAM
        model = model_cls()
    else:
        raise NotImplementedError()
        
    model.fit(dataset['X_train'], dataset['y_train'])
    
    if dataset['problem'] == 'classification':
        logit = model.predict_proba(dataset['X_test'])
        if logit.ndim == 2:
            logit = logit[:, 1]
        test_perf = roc_auc_score(dataset['y_test'], logit)
    else:
        logit = model.predict(dataset['X_test'])
        test_perf = np.sqrt(np.mean((logit - dataset['y_test']) ** 2))
    
    record = {}
    record['dataset'] = data_name
    record['model_name'] = model_name
    record['fold'] = fold
    record['seed'] = seed
    record['test_perf'] = test_perf
    record['time'] = round(float(time.time() - st_time), 0)
    return record

In [5]:
# dset = 'click'
# model_name = 'nodegam'
# fold=0

In [6]:
# record = run(data_name=dset, model_name=model_name, fold=fold)

In [7]:
records = []

In [8]:
for dset in ['adult', 'mimic2', 'wine', 'bikeshare', 'credit']:
    for model_name in ['ebm', 'xgb-gam', 'xgb', 'nodegam']:
        for fold in [0, 1, 2]:
            if any([(r['dataset'] == dset and r['model_name'] == model_name and r['fold'] == fold)
                    for r in records]):
                print(f'Already run {dset} {model_name} {fold}')
                continue

            try:
                record = run(data_name=dset, model_name=model_name, fold=fold)
            except Exception as e:
                print(e)
                record = dict(model_name=model_name, dataset=dset, seed=seed, error_msg=str(e))
            records.append(record)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, val

Steps	Train Err	Val Metric (negative_auc)
100	0.3948	-0.8618
200	0.3541	-0.8812
300	0.3191	-0.8865
400	0.3187	-0.8888
500	0.331	-0.8894
600	0.3059	-0.8957
700	0.3349	-0.8972
800	0.3126	-0.9038
900	0.3029	-0.9076
1000	0.2999	-0.9093
1100	0.2955	-0.9104
1200	0.312	-0.9112
1300	0.3221	-0.9118
1400	0.33	-0.9125
1500	0.3128	-0.9131
1600	0.3261	-0.9138
1700	0.3184	-0.9144
1800	0.3025	-0.9147
1900	0.2968	-0.9147
2000	0.2951	-0.9146
2100	0.3173	-0.9144
2200	0.2939	-0.9145
2300	0.3111	-0.9148
2400	0.3072	-0.9149
2500	0.3156	-0.915
2600	0.3165	-0.9151
2700	0.3154	-0.9152
2800	0.292	-0.9152
2900	0.2965	-0.9153
3000	0.3038	-0.9153
3100	0.3173	-0.9155
3200	0.3033	-0.9156
3300	0.3101	-0.9158
3400	0.3003	-0.9159
3500	0.286	-0.916
3600	0.2891	-0.9161
3700	0.2896	-0.9162
3800	0.319	-0.9169
3900	0.2993	-0.9172
4000	0.2685	-0.9173
4100	0.2981	-0.9174
4200	0.2885	-0.9176
4300	0.3014	-0.9174
4400	0.3028	-0.9176
4500	0.2982	-0.9176
LR: 1.00e-02 -> 2.00e-03
4600	0.2744	-0.9177
4700	0.3276	-0.9176
4800	0.2795

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Steps	Train Err	Val Metric (negative_auc)
100	0.3938	-0.8722
200	0.3545	-0.8936
300	0.3485	-0.8974
400	0.312	-0.9004
500	0.3163	-0.9005
600	0.3148	-0.9048
700	0.3024	-0.9087
800	0.3263	-0.9122
900	0.3352	-0.9155
1000	0.315	-0.9149
1100	0.3036	-0.9158
1200	0.3139	-0.9153
1300	0.3126	-0.9153
1400	0.3049	-0.9182
1500	0.302	-0.9196
1600	0.3129	-0.9207
1700	0.3065	-0.9213
1800	0.3067	-0.9217
1900	0.2973	-0.9218
2000	0.3016	-0.9218
2100	0.3087	-0.9218
2200	0.2926	-0.9219
2300	0.3019	-0.9219
2400	0.3009	-0.9221
2500	0.2807	-0.9222
2600	0.2933	-0.9222
2700	0.2971	-0.9222
2800	0.2893	-0.9223
2900	0.3016	-0.9225
3000	0.2995	-0.923
3100	0.3025	-0.9233
3200	0.3112	-0.9233
3300	0.2758	-0.9234
3400	0.2897	-0.9233
3500	0.3241	-0.9234
3600	0.2897	-0.9233
LR: 1.00e-02 -> 2.00e-03
3700	0.2802	-0.9234
3800	0.2831	-0.9234
3900	0.2914	-0.9235
4000	0.2972	-0.9235
4100	0.3121	-0.9234
4200	0.2966	-0.9235
LR: 2.00e-03 -> 4.00e-04
4300	0.3009	-0.9235
4400	0.2987	-0.9235
4500	0.2818	-0.9235
4600	0.2901	-0.9235
4

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Steps	Train Err	Val Metric (negative_auc)
100	0.3825	-0.8695
200	0.375	-0.8894
300	0.3572	-0.8957
400	0.3273	-0.8992
500	0.324	-0.9016
600	0.3084	-0.9072
700	0.3072	-0.9138
800	0.3043	-0.9187
900	0.3191	-0.9196
1000	0.3119	-0.919
1100	0.2986	-0.9202
1200	0.3126	-0.9209
1300	0.3246	-0.9224
1400	0.3059	-0.9237
1500	0.3021	-0.9246
1600	0.3136	-0.9245
1700	0.3021	-0.9248
1800	0.3073	-0.9252
1900	0.3118	-0.9254
2000	0.3136	-0.9254
2100	0.2846	-0.9255
2200	0.2857	-0.9255
2300	0.3016	-0.9255
2400	0.2933	-0.9257
2500	0.2972	-0.9258
2600	0.298	-0.9258
2700	0.3155	-0.9258
2800	0.2802	-0.9257
LR: 1.00e-02 -> 2.00e-03
2900	0.3026	-0.9257
3000	0.3105	-0.9258
3100	0.2993	-0.9258
3200	0.2816	-0.926
3300	0.3081	-0.9261
3400	0.312	-0.9262
3500	0.3021	-0.9261
3600	0.2849	-0.9262
3700	0.2972	-0.9262
3800	0.2989	-0.9262
3900	0.2843	-0.9263
4000	0.2905	-0.9264
4100	0.2995	-0.9264
4200	0.2888	-0.9265
4300	0.2952	-0.9266
4400	0.2941	-0.9267
4500	0.2853	-0.9267
4600	0.2786	-0.9269
4700	0.3064	-0.927
4800	0.31

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Steps	Train Err	Val Metric (negative_auc)
100	0.3136	-0.8007
200	0.3063	-0.8186
300	0.2822	-0.8227
400	0.2895	-0.8252
500	0.2782	-0.8263
600	0.2732	-0.8301
700	0.2688	-0.8327
800	0.2629	-0.8349
900	0.2235	-0.836
1000	0.2936	-0.8363
1100	0.2932	-0.8379
1200	0.2799	-0.8397
1300	0.2906	-0.8406
1400	0.2791	-0.8434
1500	0.2822	-0.8446
1600	0.2883	-0.8436
1700	0.2633	-0.8437
1800	0.212	-0.8431
1900	0.2865	-0.8437
2000	0.282	-0.8442
2100	0.2913	-0.8447
2200	0.2972	-0.845
2300	0.2604	-0.845
2400	0.2759	-0.845
2500	0.2869	-0.845
2600	0.2864	-0.8455
2700	0.2926	-0.8449
2800	0.267	-0.8447
2900	0.2687	-0.8442
LR: 1.00e-02 -> 2.00e-03
3000	0.2706	-0.8438
3100	0.2569	-0.8434
3200	0.2514	-0.8439
LR: 2.00e-03 -> 4.00e-04
3300	0.2691	-0.8446
3400	0.2544	-0.8452
3500	0.2519	-0.8455
3600	0.2493	-0.8455
3700	0.2687	-0.8454
3800	0.2762	-0.8453
LR: 4.00e-04 -> 8.00e-05
3900	0.2492	-0.8451
4000	0.2525	-0.845
4100	0.2769	-0.8449
LR: 8.00e-05 -> 1.60e-05
4200	0.2645	-0.8448
4300	0.2721	-0.8449
4400	0.2613	-0.8



Steps	Train Err	Val Metric (negative_auc)
100	0.0026	-0.9968
200	0.0078	-0.9987
300	0.0077	-0.9988
400	0.0008	-0.9988
500	0.0015	-0.9987
600	0.003	-0.9987
700	0.0008	-0.9984
800	0.0064	-0.9985
900	0.001	-0.9986
1000	0.0016	-0.9987
1100	0.0028	-0.9986
1200	0.0049	-0.9986
1300	0.0037	-0.9986
1400	0.0017	-0.9985
1500	0.0038	-0.9985
1600	0.0052	-0.9985
1700	0.002	-0.9986
1800	0.0049	-0.9986
1900	0.0108	-0.9987
2000	0.008	-0.9986
2100	0.007	-0.9986
2200	0.001	-0.9986
2300	0.0009	-0.9986
LR: 1.00e-02 -> 2.00e-03
2400	0.0007	-0.9986
2500	0.0017	-0.9986
2600	0.0059	-0.9987
LR: 2.00e-03 -> 4.00e-04
2700	0.0048	-0.9987
2800	0.0005	-0.9987
2900	0.001	-0.9987
LR: 4.00e-04 -> 8.00e-05
3000	0.0005	-0.9987
3100	0.005	-0.9987
3200	0.0013	-0.9987
LR: 8.00e-05 -> 1.60e-05
3300	0.0039	-0.9987
3400	0.008	-0.9987
3500	0.0006	-0.9987
LR: 1.60e-05 -> 3.20e-06
3600	0.0047	-0.9987
3700	0.0045	-0.9987
3800	0.0025	-0.9987
LR: 3.20e-06 -> 1.00e-06
3900	0.0087	-0.9987
4000	0.0014	-0.9987
BREAK. There is no improvm

In [9]:
for dset in ['year']:
    for model_name in ['ebm', 'xgb-gam', 'xgb', 'nodegam']:
        for seed in [0, 1, 2]:
            if any([(r['dataset'] == dset and r['model_name'] == model_name and r['seed'] == seed)
                    for r in records]):
                print(f'Already run {dset} {model_name} {fold}')
                continue

            try:
                record = run(data_name=dset, model_name=model_name, seed=seed)
            except Exception as e:
                print(e)
                record = dict(model_name=model_name, dataset=dset, seed=seed, error_msg=str(e))
            records.append(record)

Normalize y. mean = 1998.39208984375, std = 10.92832088470459
Steps	Train Err	Val Metric (mse)
100	1.3973	147.3397
200	0.822	126.1152
300	0.6901	118.5757
400	0.7585	103.8585
500	0.8088	101.5735
600	0.7972	92.8928
700	0.719	91.6918
800	0.6853	88.1823
900	0.6953	88.4449
1000	0.7137	87.368
1100	0.7081	85.6934
1200	0.6987	84.8604
1300	0.7011	83.5634
1400	0.6817	82.9605
1500	0.6903	82.4482
1600	0.7292	82.1023
1700	0.7731	82.1228
1800	0.7712	81.7418
1900	0.7389	81.6894
2000	0.7198	81.6422
2100	0.7435	81.4722
2200	0.6648	81.4725
2300	0.6464	81.4625
2400	0.7406	81.4186
2500	0.6934	81.2779
2600	0.7473	81.2658
2700	0.6994	81.1909
2800	0.686	81.1997
2900	0.6941	81.1207
3000	0.6923	81.1089
3100	0.7375	81.1294
3200	0.6944	81.1409
3300	0.7176	81.1783
3400	0.7032	81.1642
3500	0.671	81.0931
3600	0.6597	81.0792
3700	0.6267	81.1891
3800	0.6795	81.0583
3900	0.5982	80.9935
4000	0.6596	80.9999
4100	0.6956	80.9721
4200	0.7188	80.9337
4300	0.6243	80.9184
4400	0.7028	80.9298
4500	0.7297	80.8818
4600	0.6301	80

In [10]:
df = pd.DataFrame(records)
df

Unnamed: 0,dataset,model_name,fold,seed,test_perf,time
0,adult,ebm,0,31,0.929210,24.0
1,adult,ebm,1,31,0.923043,11.0
2,adult,ebm,2,31,0.927641,11.0
3,adult,xgb-gam,0,31,0.924382,6.0
4,adult,xgb-gam,1,31,0.922595,6.0
...,...,...,...,...,...,...
67,year,xgb,0,1,9.048994,537.0
68,year,xgb,0,2,9.048994,538.0
69,year,nodegam,0,0,9.009680,337.0
70,year,nodegam,0,1,9.018009,318.0


In [11]:
perf_df = df.groupby(['dataset', 'model_name']).agg(
    {'test_perf': ['mean', 'std'], 'time': ['mean', 'std']}
)

In [12]:
perf_df

Unnamed: 0_level_0,Unnamed: 1_level_0,test_perf,test_perf,time,time
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
dataset,model_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
adult,ebm,0.926631,0.003205,15.333333,7.505553
adult,nodegam,0.915616,0.002189,196.333333,55.509759
adult,xgb,0.92733,0.002119,1.0,0.0
adult,xgb-gam,0.924643,0.00219,6.0,0.0
bikeshare,ebm,55.675896,0.327145,15.333333,2.516611
bikeshare,nodegam,57.438405,3.898916,223.333333,23.352373
bikeshare,xgb,45.212191,1.253863,1.666667,0.57735
bikeshare,xgb-gam,101.093015,0.94632,0.666667,0.57735
credit,ebm,0.983871,0.006659,36.666667,2.081666
credit,nodegam,0.989025,0.008174,112.666667,35.809682


In [13]:
perf = perf_df['test_perf']
perf['summary'] = perf.apply(lambda row: f"{round(row['mean'], 3)} ± {round(row['std'], 3)}", 
                             axis=1).values
perf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,summary
dataset,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adult,ebm,0.926631,0.003205,0.927 ± 0.003
adult,nodegam,0.915616,0.002189,0.916 ± 0.002
adult,xgb,0.92733,0.002119,0.927 ± 0.002
adult,xgb-gam,0.924643,0.00219,0.925 ± 0.002
bikeshare,ebm,55.675896,0.327145,55.676 ± 0.327
bikeshare,nodegam,57.438405,3.898916,57.438 ± 3.899
bikeshare,xgb,45.212191,1.253863,45.212 ± 1.254
bikeshare,xgb-gam,101.093015,0.94632,101.093 ± 0.946
credit,ebm,0.983871,0.006659,0.984 ± 0.007
credit,nodegam,0.989025,0.008174,0.989 ± 0.008


In [14]:
perf_table = pd.pivot_table(perf[['summary']], index=['dataset'], columns=['model_name'], aggfunc='first')
perf_table

Unnamed: 0_level_0,summary,summary,summary,summary
model_name,ebm,nodegam,xgb,xgb-gam
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
adult,0.927 ± 0.003,0.916 ± 0.002,0.927 ± 0.002,0.925 ± 0.002
bikeshare,55.676 ± 0.327,57.438 ± 3.899,45.212 ± 1.254,101.093 ± 0.946
credit,0.984 ± 0.007,0.989 ± 0.008,0.984 ± 0.01,0.985 ± 0.008
mimic2,0.842 ± 0.019,0.844 ± 0.018,0.845 ± 0.019,0.833 ± 0.02
wine,0.69 ± 0.011,0.705 ± 0.012,0.682 ± 0.023,0.713 ± 0.006
year,9.204 ± 0.0,9.013 ± 0.004,9.049 ± 0.0,9.257 ± 0.0


In [15]:
perf_table = perf_table['summary'][['nodegam', 'ebm', 'xgb-gam', 'xgb']]
perf_table

model_name,nodegam,ebm,xgb-gam,xgb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adult,0.916 ± 0.002,0.927 ± 0.003,0.925 ± 0.002,0.927 ± 0.002
bikeshare,57.438 ± 3.899,55.676 ± 0.327,101.093 ± 0.946,45.212 ± 1.254
credit,0.989 ± 0.008,0.984 ± 0.007,0.985 ± 0.008,0.984 ± 0.01
mimic2,0.844 ± 0.018,0.842 ± 0.019,0.833 ± 0.02,0.845 ± 0.019
wine,0.705 ± 0.012,0.69 ± 0.011,0.713 ± 0.006,0.682 ± 0.023
year,9.013 ± 0.004,9.204 ± 0.0,9.257 ± 0.0,9.049 ± 0.0


In [16]:
perf_table.loc[['mimic2', 'adult', 'credit', 'wine', 'bikeshare', 'year']]

model_name,nodegam,ebm,xgb-gam,xgb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
mimic2,0.844 ± 0.018,0.842 ± 0.019,0.833 ± 0.02,0.845 ± 0.019
adult,0.916 ± 0.002,0.927 ± 0.003,0.925 ± 0.002,0.927 ± 0.002
credit,0.989 ± 0.008,0.984 ± 0.007,0.985 ± 0.008,0.984 ± 0.01
wine,0.705 ± 0.012,0.69 ± 0.011,0.713 ± 0.006,0.682 ± 0.023
bikeshare,57.438 ± 3.899,55.676 ± 0.327,101.093 ± 0.946,45.212 ± 1.254
year,9.013 ± 0.004,9.204 ± 0.0,9.257 ± 0.0,9.049 ± 0.0


Analyze run time

In [39]:
the_time = perf_df['time']
the_time['summary'] = the_time.apply(
    lambda row: f"{round(row['mean'], 0)} ± {round(row['std'], 0)}", 
    axis=1).values
the_time

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,summary
dataset,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adult,ebm,15.333333,7.505553,15.0 ± 8.0
adult,nodegam,196.333333,55.509759,196.0 ± 56.0
adult,xgb,1.0,0.0,1.0 ± 0.0
adult,xgb-gam,6.0,0.0,6.0 ± 0.0
bikeshare,ebm,15.333333,2.516611,15.0 ± 3.0
bikeshare,nodegam,223.333333,23.352373,223.0 ± 23.0
bikeshare,xgb,1.666667,0.57735,2.0 ± 1.0
bikeshare,xgb-gam,0.666667,0.57735,1.0 ± 1.0
credit,ebm,36.666667,2.081666,37.0 ± 2.0
credit,nodegam,112.666667,35.809682,113.0 ± 36.0


In [40]:
the_time_table = pd.pivot_table(the_time[['summary']], index=['dataset'], columns=['model_name'], aggfunc='first')
the_time_table

Unnamed: 0_level_0,summary,summary,summary,summary
model_name,ebm,nodegam,xgb,xgb-gam
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
adult,15.0 ± 8.0,196.0 ± 56.0,1.0 ± 0.0,6.0 ± 0.0
bikeshare,15.0 ± 3.0,223.0 ± 23.0,2.0 ± 1.0,1.0 ± 1.0
credit,37.0 ± 2.0,113.0 ± 36.0,16.0 ± 2.0,26.0 ± 7.0
mimic2,6.0 ± 2.0,105.0 ± 14.0,1.0 ± 1.0,0.0 ± 1.0
wine,4.0 ± 2.0,157.0 ± 86.0,0.0 ± 0.0,0.0 ± 0.0
year,501.0 ± 8.0,318.0 ± 20.0,537.0 ± 1.0,376.0 ± 1.0


In [41]:
the_time_table = the_time_table['summary'][['nodegam', 'ebm', 'xgb-gam', 'xgb']]
the_time_table

model_name,nodegam,ebm,xgb-gam,xgb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adult,196.0 ± 56.0,15.0 ± 8.0,6.0 ± 0.0,1.0 ± 0.0
bikeshare,223.0 ± 23.0,15.0 ± 3.0,1.0 ± 1.0,2.0 ± 1.0
credit,113.0 ± 36.0,37.0 ± 2.0,26.0 ± 7.0,16.0 ± 2.0
mimic2,105.0 ± 14.0,6.0 ± 2.0,0.0 ± 1.0,1.0 ± 1.0
wine,157.0 ± 86.0,4.0 ± 2.0,0.0 ± 0.0,0.0 ± 0.0
year,318.0 ± 20.0,501.0 ± 8.0,376.0 ± 1.0,537.0 ± 1.0


In [42]:
the_time_table = the_time_table.loc[['mimic2', 'adult', 'credit', 'wine', 'bikeshare', 'year']]

In [43]:
the_time_table

model_name,nodegam,ebm,xgb-gam,xgb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
mimic2,105.0 ± 14.0,6.0 ± 2.0,0.0 ± 1.0,1.0 ± 1.0
adult,196.0 ± 56.0,15.0 ± 8.0,6.0 ± 0.0,1.0 ± 0.0
credit,113.0 ± 36.0,37.0 ± 2.0,26.0 ± 7.0,16.0 ± 2.0
wine,157.0 ± 86.0,4.0 ± 2.0,0.0 ± 0.0,0.0 ± 0.0
bikeshare,223.0 ± 23.0,15.0 ± 3.0,1.0 ± 1.0,2.0 ± 1.0
year,318.0 ± 20.0,501.0 ± 8.0,376.0 ± 1.0,537.0 ± 1.0
