In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.preprocessing import OneHotEncoder
from time import time
from alibi.datasets import fetch_adult
from alibi.explainers import CounterFactualProto
import pandas as pd 
from mlobject import *
import tensorflow as tf
from alibi.explainers import CounterFactual
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm

In [10]:
data = pd.read_csv('titanic.csv')
data.rename(columns={'Survived': 'class'}, inplace=True)
data['Sex'] = data['Sex'].map({'male':0,'female':1})
data['Embarked'] = data['Embarked'].map({'S':0,'C':1,'Q':2})
data['Relatives'] = data['SibSp'] + data['Parch']

data = data.drop(['PassengerId', 'Name','Ticket','Cabin', 'SibSp', 'Parch'], axis=1)
data = data.dropna()

f = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Relatives']

features = data.drop('class', axis=1).values
training_features, testing_features, training_target, testing_target = \
    train_test_split(features, data['class'].values, random_state=None)

In [5]:
train_df = pd.read_csv("titanic_train.csv")
train_df['Sex'] = train_df['Sex'].map({'Male':0,'Female':1})
train_df['Pclass'] = train_df['Pclass'].map({'First':0,'Second':1,'Third':2})
train_df['Embarked'] = train_df['Embarked'].map({'Southampton':0,'Cherbourg':1,'Queenstown':2})
#train_df = train_df.to_numpy()

test_df = pd.read_csv("titanic_test.csv")
test_df['Sex'] = test_df['Sex'].map({'Male':0,'Female':1})
test_df['Pclass'] = test_df['Pclass'].map({'First':1,'Second':2,'Third':3})
test_df['Embarked'] = test_df['Embarked'].map({'Southampton':0,'Cherbourg':1,'Queenstown':2})
#test_df = test_df.to_numpy()

X_train = train_df.loc[:, train_df.columns != 'class'].to_numpy()
y_train = train_df.loc[:, train_df.columns == 'class'].to_numpy()
X_test = test_df.loc[:, test_df.columns != 'class'].to_numpy()
y_test = test_df.loc[:, test_df.columns == 'class'].to_numpy()

In [3]:
print(training_features)

[[  1.       1.      18.     262.375    1.       4.    ]
 [  3.       1.      14.      11.2417   1.       1.    ]
 [  3.       0.       4.      11.1333   0.       2.    ]
 ...
 [  3.       0.      28.5     16.1      0.       0.    ]
 [  3.       0.      44.       8.05     0.       0.    ]
 [  3.       1.      22.       7.75     2.       0.    ]]


In [16]:
model = svm.SVC(gamma=0.001, C=100., probability=True)
    
model.fit(training_features, training_target)
certainty = metrics.accuracy_score(testing_target, model.predict(testing_features))
print("Accuracy:", certainty)

Accuracy: 0.7696629213483146


In [20]:
print(np.array([3, 0, 32, 10.0, 0, 0]))
print(model.predict_proba([np.array([1, 0, 60, 50, 0, 0])]))

[ 3.  0. 32. 10.  0.  0.]
[[0.80761428 0.19238572]]


In [8]:
predict_fn = lambda x: model.predict_proba(x)

In [9]:
shape = (1,) + X_train.shape[1:]


In [69]:
cf = CounterFactual(predict_fn, (1,6), distance_fn='l1', target_proba=0.6,
                    target_class='other', max_iter=1000, early_stop=50, lam_init=1e-1,
                    max_lam_steps=10, tol=0.05, learning_rate_init=0.1,
                    feature_range= (np.array([2, 0, 100, 100, 2, 4]), np.array([2, 1, 100, 100, 2, 4])))

In [70]:
explanation = cf.explain(np.array([2, 0, 32, 20.0, 0, 0], ndmin=2))


In [71]:
print(explanation['cf'])

{'X': array([[ 1.9992642 ,  0.55875885, 32.0012    , 20.002869  ,  0.        ,
         0.        ]], dtype=float32), 'distance': 0.5635648965835571, 'lambda': 0.055, 'index': 65, 'class': 1, 'proba': array([[0.44821809, 0.55178191]]), 'loss': 0.03332105363612798}


In [11]:
cf = CounterFactualProto(predict_fn,
                         shape,
                         beta=0.1,
                         cat_vars={0:3, 1:2, 4:3},
                         #cat_vars={0:3, 1:2, 4:3},
                         max_iterations=1000,
                         feature_range= (np.array([[1, 0, 0, 0, 0, 0]]), np.array([[3, 1, 90, 600, 2, 10]])),
                         #feature_range= (np.array([[-1, -1, -1, -1, -1, -1]]), np.array([[1, 1, 1, 1, 1, 1]])),
                         c_init=1.,
                         c_steps=5,
                         eps=(.01, .01)  # perturbation size for numerical gradients
                        )

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
Use tf.cast instead.


In [12]:
cf.fit(training_features, d_type='abdm', disc_perc=[25, 50, 75])

In [13]:
#explanation = cf.explain(testing_features[0].reshape((1,) + testing_features[0].shape))
explanation = cf.explain(np.array([1, 0, 60, 50, 0, 0], ndmin=2))

In [14]:
print(explanation['cf'])

{'X': array([[ 1.      ,  0.      , 75.774284, 46.530617,  0.      ,  8.349163]],
      dtype=float32), 'class': 1, 'proba': array([[0.48731952, 0.51268048]]), 'grads_graph': array([[ 3.6182404e-03, -7.1051586e-03,  3.2255707e+01, -7.2528000e+00,
        -0.0000000e+00,  1.7168257e+01]], dtype=float32), 'grads_num': array([[0., 0., 0., 0., 0., 0.]])}


In [22]:
print(training_features)

[[ 3.      0.     26.     14.4542  1.      1.    ]
 [ 3.      0.     27.      8.6625  0.      0.    ]
 [ 3.      0.     22.      7.225   1.      0.    ]
 ...
 [ 3.      0.     32.      7.8958  0.      0.    ]
 [ 3.      0.     25.      7.7417  2.      0.    ]
 [ 1.      0.     49.     56.9292  1.      1.    ]]


In [77]:
cat_vars = {0:3, 1:2}
#tf.constant([np.zeros(v) for _, v in cat_vars.items()])
k = [np.zeros(v) for _, v in cat_vars.items()]
print([np.zeros(v).tolist() for _, v in cat_vars.items()])
print([[0, 0, 0], [0]])
tf.ragged.constant([np.zeros(v).tolist() for _, v in cat_vars.items()])

[[0.0, 0.0, 0.0], [0.0, 0.0]]
[[0, 0, 0], [0]]


tf.RaggedTensor(values=Tensor("RaggedConstant_27/values:0", shape=(5,), dtype=float32), row_splits=Tensor("RaggedConstant_27/RaggedFromRowSplits/row_splits:0", shape=(3,), dtype=int64))

In [28]:
print(testing_features[0].reshape((1,) + testing_features[0].shape))


[[ 2.     0.    51.    12.525  0.     0.   ]]


In [43]:
print(np.array([2, 0, 32, 20.0, 0, 0], ndmin=2).shape)

(1, 6)


In [175]:
print(training_features[:,0])

[1. 1. 1. 2. 3. 3. 3. 3. 3. 2. 2. 3. 2. 3. 3. 3. 2. 3. 2. 2. 3. 3. 3. 3.
 3. 3. 3. 3. 1. 3. 3. 3. 2. 1. 3. 3. 3. 2. 3. 1. 2. 3. 1. 3. 3. 2. 2. 3.
 3. 1. 2. 2. 1. 3. 3. 2. 3. 3. 3. 3. 1. 2. 1. 1. 1. 1. 3. 2. 2. 1. 3. 2.
 3. 2. 3. 1. 3. 1. 2. 3. 3. 2. 3. 3. 1. 3. 2. 3. 2. 2. 2. 3. 3. 1. 3. 2.
 1. 1. 3. 2. 3. 3. 2. 3. 3. 3. 1. 1. 3. 1. 1. 3. 3. 3. 1. 3. 2. 1. 2. 3.
 3. 3. 3. 3. 3. 2. 2. 1. 1. 3. 3. 1. 3. 3. 2. 2. 3. 2. 2. 2. 1. 3. 1. 3.
 2. 2. 3. 3. 1. 1. 2. 2. 3. 3. 3. 3. 3. 2. 3. 3. 2. 3. 1. 2. 1. 1. 2. 3.
 1. 3. 1. 3. 1. 2. 3. 3. 3. 2. 1. 3. 3. 3. 1. 3. 1. 2. 1. 1. 2. 3. 3. 1.
 1. 1. 2. 1. 2. 1. 2. 2. 3. 3. 3. 3. 1. 1. 1. 3. 1. 2. 3. 1. 1. 1. 3. 1.
 3. 3. 1. 1. 1. 2. 1. 2. 3. 3. 3. 3. 1. 2. 3. 3. 3. 3. 3. 1. 1. 3. 1. 3.
 1. 3. 3. 3. 1. 2. 1. 1. 3. 1. 3. 1. 2. 2. 3. 2. 1. 3. 3. 3. 3. 3. 1. 1.
 3. 3. 1. 3. 2. 2. 3. 3. 2. 3. 3. 3. 3. 1. 3. 3. 2. 3. 2. 1. 3. 3. 1. 3.
 3. 2. 3. 2. 1. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 2. 3. 3. 1. 1. 3. 2. 3. 1.
 2. 3. 2. 3. 3. 1. 1. 1. 2. 3. 1. 1. 3. 1. 1. 3. 3.