In [None]:
!pip install --upgrade azureml-sdk[notebooks,explain]
!pip install --upgrade azureml-interpret

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

# load the diabetes dataset
print("Loading Data...")
data = pd.read_csv('data2/car-prediction.csv')

# applying label encoding
pre = preprocessing.LabelEncoder()
data = data.apply(pre.fit_transform)

#Normalize the numeric columns
scaler = MinMaxScaler()
num_cols = ['Selling_Price','Present_Price','Kms_Driven','car_age']
data[num_cols] = scaler.fit_transform(data[num_cols])

features = ['Selling_Price','Present_Price','Kms_Driven','Fuel_Type','Seller_Type','Transmission','car_age']
labels = ['not-Sold', 'Sold']
x, y = data[features].values, data['Owner'].values

# Split data into training set and test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

# Train a decision tree model
print('Training a decision tree model')
# model = DecisionTreeClassifier().fit(x_train, y_train)
model = LogisticRegression().fit(x_train, y_train)


# calculate accuracy
y_hat = model.predict(x_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)

# calculate AUC
y_scores = model.predict_proba(x_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))

print('Model trained.')

Loading Data...
Training a decision tree model
Accuracy: 0.9180327868852459
AUC: 0.6499999999999999
Model trained.


In [2]:
## model explainer

from interpret.ext.blackbox import TabularExplainer

# "features" and "classes" fields are optional
tab_explainer = TabularExplainer(model,
                             x_train, 
                             features=features, 
                             classes=labels)
print(tab_explainer, "ready!")

TabularExplainer ready!


The option feature_dependence has been renamed to feature_perturbation!
The option feature_perturbation="independent" is has been renamed to feature_perturbation="interventional"!
The feature_perturbation option is now deprecated in favor of using the appropriate masker (maskers.Independent, or maskers.Impute)


In [3]:
# Checking Global Importance of the Features

# you can use the training data or the test data here
global_tab_explanation = tab_explainer.explain_global(x_train)

# Get the top features by importance
global_tab_feature_importance = global_tab_explanation.get_feature_importance_dict()
for feature, importance in global_tab_feature_importance.items():
    print(feature,":", importance)

Seller_Type : 0.2896897612024229
Present_Price : 0.18264041815778764
Selling_Price : 0.15567785851149746
Fuel_Type : 0.12156829926425111
car_age : 0.07896360087411795
Transmission : 0.07547978951953974
Kms_Driven : 0.06417052611225581


In [4]:
# Checking the local feature Importance

# Get the observations we want to explain (the first two)
x_explain = x_test[0:2]

# Get predictions
predictions = model.predict(x_explain)

# Get local explanations
local_tab_explanation = tab_explainer.explain_local(x_explain)

# Get feature names and importance for each possible label
local_tab_features = local_tab_explanation.get_ranked_local_names()
local_tab_importance = local_tab_explanation.get_ranked_local_values()

for l in range(len(local_tab_features)):
    print('Support for', labels[l])
    label = local_tab_features[l]
    for o in range(len(label)):
        print("\tObservation", o + 1)
        feature_list = label[o]
        total_support = 0
        for f in range(len(feature_list)):
            print("\t\t", feature_list[f], ':', local_tab_importance[l][o][f])
            total_support += local_tab_importance[l][o][f]
        print("\t\t ----------\n\t\t Total:", total_support, "Prediction:", labels[predictions[o]])

Support for not-Sold
	Observation 1
		 Fuel_Type : 0.2720491850815395
		 Selling_Price : 0.19320256705624103
		 Seller_Type : 0.17354686154983903
		 Present_Price : 0.1360638668794778
		 car_age : 0.046840363755426905
		 Transmission : -0.03592691612212551
		 Kms_Driven : -0.09344326139935875
		 ----------
		 Total: 0.69233266680104 Prediction: not-Sold
	Observation 2
		 Kms_Driven : 0.10415517672950228
		 Transmission : -0.03592691612212551
		 Fuel_Type : -0.08126144489448582
		 car_age : -0.09194589922361625
		 Selling_Price : -0.2274367791968383
		 Present_Price : -0.25846028976105156
		 Seller_Type : -0.49394106748800337
		 ----------
		 Total: -1.0848172199566186 Prediction: not-Sold
Support for Sold
	Observation 1
		 Kms_Driven : 0.09344326139935875
		 Transmission : 0.03592691612212551
		 car_age : -0.046840363755426905
		 Present_Price : -0.1360638668794778
		 Seller_Type : -0.17354686154983903
		 Selling_Price : -0.19320256705624103
		 Fuel_Type : -0.2720491850815395
		 ------