In [1]:
import numpy as np
import os
from sklearn.preprocessing import OneHotEncoder
from alibi.datasets import fetch_adult
from alibi.explainers import CounterFactualProto
import pandas as pd 
import tensorflow as tf
from alibi.explainers import CounterFactual
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from ceteris_paribus.profiles import individual_variable_profile
from ceteris_paribus.plots.plots import plot_notebook, plot
from ceteris_paribus.explainer import explain
import imgkit
import shutil
import base64
import matplotlib
import matplotlib.pyplot as plt
from pandas.plotting import table

In [2]:
data = pd.read_csv('titanic.csv')
data.rename(columns={'Survived': 'class'}, inplace=True)
data['Sex'] = data['Sex'].map({'male':'Male','female':'Female'})
data['Embarked'] = data['Embarked'].map({'S':'Southampton','C':'Cherbourg','Q':'Queenstown'})
data['Pclass'] = data['Pclass'].map({1:'First', 2:'Second', 3:'Third'})
data['Relatives'] = data['SibSp'] + data['Parch']

data = data.drop(['PassengerId', 'Name','Ticket','Cabin', 'SibSp', 'Parch'], axis=1)
data = data.dropna()

f = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Relatives']

features = data.drop('class', axis=1)
#print(features)

training_features, testing_features, training_target, testing_target = \
    train_test_split(features, data['class'].values, random_state=None)

In [3]:
numeric_features = ['Age', 'Fare', 'Relatives']
numeric_transformer = Pipeline(steps=[
    ('scaler', MinMaxScaler())])

categorical_features = ['Pclass', 'Sex', 'Embarked']
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [4]:
model = Pipeline(steps=[('preprocessor', preprocessor),
('classifier', svm.SVC(probability=True))])

In [5]:
model.fit(training_features, training_target)

Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  Pipeline(memory=None,
                                                           steps=[('scaler',
                                                                   MinMaxScaler(copy=True,
                                                                                feature_range=(0,
                                                                                               1)))],
                                                           verbose=False),
                                                  ['Age', 'Fare', 'Relatives']),
                                                 ('cat',
                                                  P

In [6]:
print("XGB {}".format(accuracy_score(testing_target, model.predict(testing_features))))

XGB 0.7808988764044944


In [7]:
explainer_xgb = explain(model, variable_names=f, data=training_features, y=training_target, label='XGB', predict_function=lambda x: model.predict_proba(x)[::, 1])

In [8]:
cp_xgb = individual_variable_profile(explainer_xgb, np.array(['Third', 'Male', 60, 10, 'Southampton', 0]), np.array([0]))

In [9]:
p = plot(cp_xgb, selected_variables=["Sex"], width=700, height=800, size=4)

In [13]:
print(p)

0


In [10]:
options = {
    'height' : '500',
    'width' : '600'
}

In [11]:
imgkit.from_file('_plot_files/plots0.html', 'temp/out.jpg', options=options)

Loading page (1/2)
Rendering (2/2)                                                    
Done                                                               


True

In [32]:
with open("temp/out.jpg", "rb") as image_file:
    encoded_string = base64.b64encode(image_file.read())

In [33]:
print(encoded_string.decode('utf-8'))

oAKKzvFXi7SfAuiS6nreqado+mwsiSXd9cpbwRs7BEBdyFBZmVRzySAOTXAfHP9sDwR+z9qWi2et6lHPdav4hsPDk0NpPA76RNeRzSQzXavIphgKQSNvOTgZCkZwAeoUVx91+0N4AsvANt4rm8c+D4fC17L5FvrD6zbLp88mSNiTl/LZsqwwGzwfSsi2/ay8BX37QGmfDK38QWNx4r1jQm8RWUMUyPFc2odVGxw2Gcq3mBRkmNWcfKCaAPR6KKKACiiigAooooAKKKKACiiigAooooAKK4X4/ftB6L+zn4f0LUtbtdUuoPEHiDT/DdutjHG7pcXswhidw7oBGGYFiCSB0U9Kd4j/aP8E+Hh4ngTxLomp6v4QsJ9R1PR7DUYJtSt44YzI4MG8MpwON20ZIyRQB3FFeefAf8Aal8EftFeANO1/wAO67pri90e01u406W9tzf6RBcwrNGLqJJG8ltjDOTjIOCRzWX4+/bS8AeC/A2jeI7HWLbxhpGt+JbHwpFceHLu21COG9u5lhQSMJQoVWYF8EsAeFPSgD1eiiigAooooAKK4b/hoLQLX4h+L9A1Bzo6eCrC01LUdTv7q1hsViuBIVO7zTIm3yzuMqIvI2lucc54B/bg+HXxC1nx/Db69Y2Wk/Dmeyh1HXby9to9IuRd2qXMUkFwJSrJscAs235sgZHNAHrlFU/D/iLT/Fui22paVfWep6deIJLe6tJlmgnU9GR1JVh7g1coAKKKKACiue034teFNZtNKns/E3h67g124ktNNkh1GGRNQmj3CSKEhsSOux8quSNjZ6GuhoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoor56+KPjn4zaF+074Z8F6P4s+GNvpPjKx1jU7SW88FX1zcadFZPZhYpGTVo1mZhd8uFjAMf3PmwoBw

In [6]:
data = training_features.head()

In [7]:
css = """
<style type=\"text/css\">
table {
color: #333;
font-family: Helvetica, Arial, sans-serif;
width: 640px;
border-collapse:
collapse; 
border-spacing: 0;
}
td, th {
border: 1px solid transparent; /* No more visible border */
height: 30px;
}
th {
background: #DFDFDF; /* Darken header a bit */
font-weight: bold;
}
td {
background: #FAFAFA;
text-align: center;
}
table tr:nth-child(odd) td{
background-color: white;
}
</style>
"""

In [8]:
text_file = open("temp/head.html", "a")
# write the CSS
text_file.write(css)
# write the HTML-ized Pandas DataFrame
text_file.write(data.to_html())
text_file.close()

In [9]:
imgkitoptions = {"format": "jpg", 'width' : '600'}
imgkit.from_file("temp/head.html", "temp/head.jpg", options=imgkitoptions)

Loading page (1/2)
Rendering (2/2)                                                    
Done                                                               


True

In [14]:
print(not os.path.exists("temp/head.jpg"))

False
