# Model Evaluation

In [18]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from deepchecks.tabular import Dataset
from deepchecks.tabular.suites import model_evaluation

In [2]:
!ls DAI_predict

data---original-feature-importance.csv
h2oai_experiment_walmart_custom_dataset_walmart_ts_6_fcst_grp_test_csv_predictions.csv
h2oai_experiment_walmart_custom_dataset_walmart_ts_6_fcst_grp_train_csv_predictions.csv


In [3]:
df_train = pd.read_csv('DAI_predict/h2oai_experiment_walmart_custom_dataset_walmart_ts_6_fcst_grp_train_csv_predictions.csv')
df_test = pd.read_csv('DAI_predict/h2oai_experiment_walmart_custom_dataset_walmart_ts_6_fcst_grp_test_csv_predictions.csv')
df_train.shape, df_test.shape

((702, 14), (36, 14))

In [4]:
df_train.head()

Unnamed: 0,Store,Dept,Date,Weekly_Sales,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,IsHoliday,sample_weight,Weekly_Sales.predicted,Weekly_Sales.predicted.lower,Weekly_Sales.predicted.upper
0,4,4,2010/2/5,59554.57,-1.0,-1.0,-1.0,-1.0,-1.0,0,1,58978.074,54351.161978,63166.79161
1,4,4,2010/2/12,54069.82,-1.0,-1.0,-1.0,-1.0,-1.0,1,5,54493.715,49866.802603,58682.432235
2,4,4,2010/2/19,53939.17,-1.0,-1.0,-1.0,-1.0,-1.0,0,1,54222.51,49595.599478,58411.22911
3,4,4,2010/2/26,54687.08,-1.0,-1.0,-1.0,-1.0,-1.0,0,1,54335.207,49708.29479,58523.924422
4,4,4,2010/3/5,56959.02,-1.0,-1.0,-1.0,-1.0,-1.0,0,1,57059.055,52432.142446,61247.772078


In [5]:
df_train['Weekly_Sales.predicted'].describe()

count      702.000000
mean     40109.673027
std       9708.002635
min      20190.540000
25%      35416.865000
50%      40366.246500
75%      43684.260250
max      72033.920000
Name: Weekly_Sales.predicted, dtype: float64

In [6]:
df_test.head()

Unnamed: 0,Store,Dept,Date,Weekly_Sales,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,IsHoliday,sample_weight,Weekly_Sales.predicted,Weekly_Sales.predicted.lower,Weekly_Sales.predicted.upper
0,4,4,2012/5/4,60576.41,13737.91,-1.0,93.65,6993.97,4541.89,0,1,66229.305,61602.392446,70418.022078
1,4,4,2012/5/11,57583.94,20499.88,86.0,103.05,5005.96,3815.16,0,1,58581.49,53954.57604,62770.205672
2,4,4,2012/5/18,60832.91,6531.12,-1.0,99.91,2290.18,2521.84,0,1,64543.867,59916.954946,68732.584578
3,4,4,2012/5/25,57781.87,4330.02,436.0,862.31,3268.41,3749.53,0,1,59194.082,54567.16979,63382.799422
4,4,4,2012/6/1,62758.52,10165.22,8.6,47.93,2676.33,4896.34,0,1,63442.332,58815.41979,67631.049422


In [7]:
df_test['Weekly_Sales.predicted'].describe()

count       36.000000
mean     41807.953944
std      11825.016855
min      22189.266000
25%      35618.425000
50%      41982.718500
75%      44903.335000
max      66229.305000
Name: Weekly_Sales.predicted, dtype: float64

In [21]:
train_rmse = mean_squared_error(df_train['Weekly_Sales'], df_train['Weekly_Sales.predicted'], squared=False)
test_rmse = mean_squared_error(df_test['Weekly_Sales'], df_test['Weekly_Sales.predicted'], squared=False)
train_r2 = r2_score(df_train['Weekly_Sales'], df_train['Weekly_Sales.predicted'])
test_r2 = r2_score(df_test['Weekly_Sales'], df_test['Weekly_Sales.predicted'])

pd.DataFrame({'data':['Train', 'Test'], 'RMSE':[train_rmse, test_rmse], 'R2':[train_r2, test_r2]})

Unnamed: 0,data,RMSE,R2
0,Train,924.604691,0.990846
1,Test,2086.180655,0.961067


In [8]:
df_fi = pd.read_csv('DAI_predict/data---original-feature-importance.csv', skiprows=2)
df_fi

Unnamed: 0,label,value
0,Store,1.0
1,Dept,0.518
2,Date,0.255
3,MarkDown3,0.005
4,MarkDown1,0.003
5,IsHoliday,0.001


In [9]:
# Utility functions to create data for Deepchecks
def get_proba_array(df, proba_cols) -> np.array:    # for Classification
    ''' Return predict probability of classification problem as numpy array '''
    return df[proba_cols].to_numpy()

def get_pred_array(df, pred_col) -> np.array:    # for Regression
    ''' Return predict probability of classification problem as numpy array '''
    return df[pred_col].to_numpy()

def get_feature_inportance_series(df, features) -> pd.Series:
    '''
    Get DAI original feature importance dataframe and return the importance values with feture name index.
    The order of feature (which is 'features') must be same as the order of training dataset features.
    ''' 
    return pd.merge(pd.DataFrame({'label':features}), df, on='label', how='left').fillna(0.).set_index('label')['value']

In [10]:
train_pred = get_proba_array(df_train, 'Weekly_Sales.predicted')
test_pred = get_proba_array(df_test, 'Weekly_Sales.predicted')
train_pred.shape, test_pred.shape

((702,), (36,))

In [12]:
features = ['Store','Dept','Date','MarkDown1','MarkDown2','MarkDown3','MarkDown4','MarkDown5','IsHoliday']
fi = get_feature_inportance_series(df_fi, features)
fi = fi.drop('Date', axis=0)   # "Date"カラムを削除する場合。 !!! Deepchecksでは時間変数は特徴量として扱うことができない !!!
fi

label
Store        1.000
Dept         0.518
MarkDown1    0.003
MarkDown2    0.000
MarkDown3    0.005
MarkDown4    0.000
MarkDown5    0.000
IsHoliday    0.001
Name: value, dtype: float64

In [13]:
train_features = ['Store','Dept','Date','MarkDown1','MarkDown2','MarkDown3','MarkDown4','MarkDown5','IsHoliday','Weekly_Sales']

ds_train = Dataset(df_train[train_features],
                   label='Weekly_Sales',
                   datetime_name = 'Date',
                   cat_features=['Store','Dept','IsHoliday'],)
ds_test = Dataset(df_test[train_features],
                  label='Weekly_Sales',
                  datetime_name = 'Date',
                  cat_features=['Store','Dept','IsHoliday'],)

In [14]:
ds_train

Unnamed: 0,Column,DType,Kind,Additional Info
0,Date,datetime64,Datetime,
1,Weekly_Sales,floating,,
2,Store,integer,Categorical Feature,
3,Dept,integer,Categorical Feature,
4,MarkDown1,floating,Numerical Feature,
5,MarkDown2,floating,Numerical Feature,
6,MarkDown3,floating,Numerical Feature,
7,MarkDown4,floating,Numerical Feature,
8,MarkDown5,floating,Numerical Feature,
9,IsHoliday,integer,Categorical Feature,

Unnamed: 0,Date,Weekly_Sales,Store,Dept,...,MarkDown3,MarkDown4,MarkDown5,IsHoliday
0,2010-02-05,59554.57,4,4,...,-1.00,-1.00,-1.00,0
1,2010-02-12,54069.82,4,4,...,-1.00,-1.00,-1.00,1
2,2010-02-19,53939.17,4,4,...,-1.00,-1.00,-1.00,0
3,2010-02-26,54687.08,4,4,...,-1.00,-1.00,-1.00,0
4,2010-03-05,56959.02,4,4,...,-1.00,-1.00,-1.00,0
...,...,...,...,...,...,...,...,...,...
697,2012-03-30,44506.97,31,13,...,-1.00,1334.23,3691.10,0
698,2012-04-06,45709.61,31,13,...,21.86,3623.01,16629.10,0
699,2012-04-13,43005.37,31,13,...,13.89,1482.03,3076.80,0
700,2012-04-20,47289.81,31,13,...,16.05,311.94,7709.86,0


In [15]:
result = model_evaluation().run(train_dataset=ds_train,    # Train data (Dataset)
                                test_dataset=ds_test,      # Test data (Dataset)
                                feature_importance=fi,     # Feature importance (Series)
                                #y_proba_train=train_proba,#Classification
                                y_pred_train=train_pred,    # Train prediction probability (array)
                                #y_proba_test=test_proba,#Classification
                                y_pred_test=test_pred)      # Test prediction probability (array)


feature_importance does not sum to 1. Normalizing to 1.



In [16]:
result.show()

Accordion(children=(VBox(children=(HTML(value='\n<h1 id="summary_FGSU4FY47BR0MJQY26QII7WPG">Model Evaluation S…

In [17]:
# 結果をhtml保存
result.save_as_html('outputs/ModelEvaluation_DAI.html')

# to see the result access to the html file

'outputs/ModelEvaluation_DAI (1).html'