# Model Evaluation
- Using Pre-computed Predictions: https://docs.deepchecks.com/stable/tabular/usage_guides/supported_models.html#using-pre-computed-predictions

In [16]:
import pandas as pd
import numpy as np
from sklearn.metrics import log_loss, roc_auc_score
from deepchecks.tabular import Dataset
from deepchecks.tabular.suites import model_evaluation

In [2]:
!ls DAI_predict

[34mdefault_model[m[m [34msimple_model[m[m


In [3]:
!ls DAI_predict/simple_model

data---original-feature-importance.csv
h2oai_experiment_simple_model_custom_dataset_TitanicData2_dcTest_csv_predictions.csv
h2oai_experiment_simple_model_custom_dataset_TitanicData2_dcTrain_csv_predictions.csv


In [4]:
df_train = pd.read_csv('DAI_predict/simple_model/h2oai_experiment_simple_model_custom_dataset_TitanicData2_dcTrain_csv_predictions.csv')
df_test = pd.read_csv('DAI_predict/simple_model/h2oai_experiment_simple_model_custom_dataset_TitanicData2_dcTest_csv_predictions.csv')
df_train.shape, df_test.shape

((1047, 11), (262, 11))

In [5]:
df_train.head()

Unnamed: 0,Passenger_Id,pclass,sex,age,sibsp,parch,fare,survived,survived.0,survived.1,survived.predicted(th=0.40031)
0,648,3rd,female,22.0,0,0,7.75,1,0.397761,0.602239,1
1,391,3rd,male,16.0,1,3,34.375,0,0.874194,0.125806,0
2,351,1st,female,49.0,1,0,76.7292,1,0.140861,0.859139,1
3,1163,3rd,female,24.0,0,0,7.75,0,0.397761,0.602239,1
4,105,3rd,female,18.0,0,0,9.8417,1,0.397761,0.602239,1


In [6]:
df_test.head()

Unnamed: 0,Passenger_Id,pclass,sex,age,sibsp,parch,fare,survived,survived.0,survived.1,survived.predicted(th=0.40031)
0,1298,1st,male,17.0,0,2,110.8833,1,0.528816,0.471184,1
1,575,1st,female,45.0,0,1,59.4,1,0.056927,0.943073,1
2,328,3rd,male,20.0,0,0,7.8542,0,0.883736,0.116264,0
3,408,3rd,male,25.0,0,0,7.05,0,0.883736,0.116264,0
4,844,1st,male,,0,0,35.5,1,0.636392,0.363608,0


In [18]:
train_logloss = log_loss(df_train['survived'], df_train['survived.1'])
test_logloss = log_loss(df_test['survived'], df_test['survived.1'])
train_auc = roc_auc_score(df_train['survived'], df_train['survived.1'])
test_auc = roc_auc_score(df_test['survived'], df_test['survived.1'])

pd.DataFrame({'data':['Train', 'Test'], 'Logloss':[train_logloss, test_logloss], 'AUC of ROC':[train_auc, test_auc]})

Unnamed: 0,data,Logloss,AUC of ROC
0,Train,0.455065,0.84346
1,Test,0.497275,0.814475


In [7]:
df_fi = pd.read_csv('DAI_predict/simple_model/data---original-feature-importance.csv', skiprows=2)
df_fi

Unnamed: 0,label,value
0,sex,1.0
1,pclass,0.527
2,parch,0.0


In [8]:
# Utility functions to create data for Deepchecks
def get_proba_array(df, proba_cols) -> np.array:
    ''' Return predict probability of classification problem as numpy array '''
    return df[proba_cols].to_numpy()

def get_feature_inportance_series(df, features) -> pd.Series:
    '''
    Get DAI original feature importance dataframe and return the importance values with feture name index.
    The order of feature (which is 'features') must be same as the order of training dataset features.
    ''' 
    return pd.merge(pd.DataFrame({'label':features}), df, on='label', how='left').fillna(0.).set_index('label')['value']

In [9]:
train_proba = get_proba_array(df_train, ['survived.0','survived.1'])
test_proba = get_proba_array(df_test, ['survived.0','survived.1'])
train_proba.shape, test_proba.shape

((1047, 2), (262, 2))

In [10]:
features = ['pclass','sex','age','sibsp','parch','fare']
fi = get_feature_inportance_series(df_fi, features)
fi

label
pclass    0.527
sex       1.000
age       0.000
sibsp     0.000
parch     0.000
fare      0.000
Name: value, dtype: float64

In [11]:
train_features = ['Passenger_Id','pclass','sex','age','sibsp','parch','fare','survived']

ds_train = Dataset(df_train[train_features],
                   label='survived',
                   index_name='Passenger_Id',
                   cat_features=['pclass','sex'])
ds_test = Dataset(df_test[train_features],
                  label='survived',
                  index_name='Passenger_Id',
                  cat_features=['pclass','sex'])

In [12]:
type(ds_train)

deepchecks.tabular.dataset.Dataset

In [13]:
result = model_evaluation().run(train_dataset=ds_train,    # Train data (Dataset)
                                test_dataset=ds_test,      # Test data (Dataset)
                                feature_importance=fi,     # Feature importance (Series)
                                y_proba_train=train_proba, # Train prediction probability (array)
                                y_proba_test=test_proba)   # Test prediction probability (array)


feature_importance does not sum to 1. Normalizing to 1.





In [14]:
result.show()

Accordion(children=(VBox(children=(HTML(value='\n<h1 id="summary_AAQK53DH32UPD614MR1L3MPZF">Model Evaluation S…

In [15]:
# 結果をhtml保存
result.save_as_html('outputs/ModelEvaluation_DAI_simp.html')

# to see the result access to the html file

'outputs/ModelEvaluation_DAI_simp.html'