<a href="https://colab.research.google.com/github/ykato27/Regression-Models/blob/main/interpretML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install interpret

Collecting interpret
  Downloading https://files.pythonhosted.org/packages/46/ce/444e5098422d15d28db4498da608866b14d8a18a8be68630af1146c80984/interpret-0.2.4-py3-none-any.whl
Collecting interpret-core[dash,debug,decisiontree,ebm,lime,linear,notebook,plotly,required,sensitivity,shap,skoperules,treeinterpreter]>=0.2.4
[?25l  Downloading https://files.pythonhosted.org/packages/d5/b6/e90ac757fda64caaea262c9fcce2d02fb4d141236aa40ce5f62c4d66efe1/interpret_core-0.2.4-py3-none-any.whl (5.8MB)
[K     |████████████████████████████████| 5.8MB 7.7MB/s 
[?25hCollecting dash-table>=4.1.0; extra == "dash"
[?25l  Downloading https://files.pythonhosted.org/packages/97/f7/f4969a926f20a55d3e5970d01b85ff9ad510dba32de189e72dd8f4992740/dash_table-4.11.3.tar.gz (1.8MB)
[K     |████████████████████████████████| 1.8MB 43.2MB/s 
[?25hCollecting dash-cytoscape>=0.1.1; extra == "dash"
[?25l  Downloading https://files.pythonhosted.org/packages/a1/98/93b356b47aca71d4fb1065990137b6b75eb527e8d1cd0e87dc037cead1

## interpretMLのサンプルコード

In [2]:
# ライブラリーのインポート
import os

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# ボストンの住宅価格データ
from sklearn.datasets import load_boston

# 前処理
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# interpretML
from interpret import show
from interpret.data import ClassHistogram, Marginal
from interpret.glassbox import ExplainableBoostingRegressor
from interpret.perf import RegressionPerf

# 回帰モデル
from sklearn.ensemble import RandomForestRegressor

# 評価指標
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [3]:
# データセットの読込み
boston = load_boston()
feature_names = list(boston.feature_names)

# 説明変数の格納
df = pd.DataFrame(boston.data, columns = boston.feature_names)
# 目的変数の追加
df['MEDV'] = boston.target

# データの中身を確認
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [4]:
# ランダムシード値
RANDOM_STATE = 10

# 学習データと評価データの割合
TEST_SIZE = 0.2

# 学習データと評価データを作成
x_train, x_test, y_train, y_test = train_test_split(df.iloc[:, 0:df.shape[1]-1],
                                                    df.iloc[:, df.shape[1]-1],
                                                    test_size=TEST_SIZE,
                                                    random_state=RANDOM_STATE)

In [5]:
# モデルの学習
rf = RandomForestRegressor(n_estimators=100, n_jobs=-1)
rf.fit(x_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=-1, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [6]:
rf_perf = RegressionPerf(rf.predict).explain_perf(x_test, y_test, name='rf')
show(rf_perf)

  detected_envs


#### Local Explanations: How an individual prediction was made

In [7]:
from interpret.blackbox import LimeTabular
from interpret import show

#Blackbox explainers need a predict function, and optionally a dataset
lime = LimeTabular(predict_fn=rf.predict, data=x_train, random_state=1)

#Pick the instances to explain, optionally pass in labels if you have them
lime_local = lime.explain_local(x_test[:5], y_test[:5], name='LIME')

show(lime_local)

In [8]:
from interpret.blackbox import ShapKernel
import numpy as np

background_val = np.median(x_train, axis=0).reshape(1, -1)
shap = ShapKernel(predict_fn=rf.predict, data=background_val, feature_names=feature_names)
shap_local = shap.explain_local(x_test[:5], y_test[:5], name='SHAP')
show(shap_local)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




#### Global Explanations: How the model behaves overall

In [9]:
from interpret.blackbox import MorrisSensitivity

sensitivity = MorrisSensitivity(predict_fn=rf.predict, data=x_train)
sensitivity_global = sensitivity.explain_global(name="Global Sensitivity")

show(sensitivity_global)

In [10]:
from interpret.blackbox import PartialDependence

pdp = PartialDependence(predict_fn=rf.predict, data=x_train)
pdp_global = pdp.explain_global(name='Partial Dependence')

show(pdp_global)

In [11]:
show([rf_perf, lime_local, shap_local, sensitivity_global, pdp_global])

#### EBMで学習

In [12]:
ebm = ExplainableBoostingRegressor(random_state=42)
ebm.fit(x_train, y_train)

ExplainableBoostingRegressor(binning='quantile', early_stopping_rounds=50,
                             early_stopping_tolerance=0.0001,
                             feature_names=['CRIM', 'ZN', 'INDUS', 'CHAS',
                                            'NOX', 'RM', 'AGE', 'DIS', 'RAD',
                                            'TAX', 'PTRATIO', 'B', 'LSTAT',
                                            'DIS x LSTAT', 'NOX x LSTAT',
                                            'CRIM x LSTAT', 'AGE x LSTAT',
                                            'NOX x DIS', 'DIS x B', 'NOX x RM',
                                            'TAX x LSTAT', 'INDUS x LSTAT',
                                            'RM x PTRATIO'],
                             feature_types=['continuous', 'c...
                                            'continuous', 'interaction',
                                            'interaction', 'interaction',
                                            'interaction'

In [13]:
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

In [14]:
ebm_local = ebm.explain_local(x_test, y_test, name='EBM')
show(ebm_local)

In [15]:
ebm_perf = RegressionPerf(ebm.predict).explain_perf(x_test, y_test, name="EBM")
show(ebm_perf)