In [102]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.pipeline import Pipeline 
dataset = pd.read_csv('../data/uci ai4i2020.csv')
features_to_keep = ['Type','Air temperature [K]','Process temperature [K]','Rotational speed [rpm]','Torque [Nm]','Tool wear [min]']
X = dataset[features_to_keep]
y = dataset['Machine failure'].values
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(
    transformers=[("encoder", OneHotEncoder(drop="first", handle_unknown="ignore"), ["Type"])],
    remainder="passthrough"
)
X = ct.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=1)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)
print("Train Accuracy:", model.score(X_train, y_train))
print("Test Accuracy:", model.score(X_test, y_test))
print(model.coef_)
print(model.intercept_)
import dice_ml
from dice_ml.utils import helpers
features_to_keep = ['Type', 'Air temperature [K]', 
                    'Process temperature [K]', 'Rotational speed [rpm]',
                     'Torque [Nm]', 'Tool wear [min]']
df_dice = dataset[features_to_keep + ['Machine failure']]
data_dice = dice_ml.Data(
    dataframe=df_dice,
    continuous_features=['Air temperature [K]',
                          'Process temperature [K]', 'Rotational speed [rpm]',
                          'Torque [Nm]', 'Tool wear [min]'],
    categorical_features=['Type'],
    outcome_name='Machine failure'
)

target_model = Pipeline(steps=[
    ('preprocessor', ct),
    ('classifier', model)
])

model_di = dice_ml.Model(model=target_model, backend='sklearn')
explainer = dice_ml.Dice(data_dice, model_di, method='genetic')
feature_names = ['Product ID', 
    'Type', 
    'Air temperature [K]', 
    'Process temperature [K]', 
    'Rotational speed [rpm]', 
    'Torque [Nm]', 
    'Tool wear [min]']
input_datapoint = dataset.loc[[99], features_to_keep]   
features_to_vary=['Air temperature [K]','Process temperature [K]','Tool wear [min]',
                  'Torque [Nm]','Rotational speed [rpm]']
permitted_range = {f: [dataset[f].min(), dataset[f].max()] for f in features_to_vary}
print ( "gentic counterfactual  method explantion")
print ("<------------------------>")
counterfactuals = explainer.generate_counterfactuals(
    input_datapoint,
    total_CFs=4,
    desired_class="opposite",
    features_to_vary=features_to_vary,
    permitted_range=permitted_range
)
counterfactuals.visualize_as_dataframe(show_only_changes=True) 
print ( "random counterfactual method explntion")
print("<-------------------->")
explainer2 = dice_ml.Dice(data_dice, model_di, method='random')
counterfactuals = explainer2.generate_counterfactuals(
         input_datapoint, 
         total_CFs=4, 
        desired_class='opposite',
        features_to_vary=features_to_vary,
         permitted_range= permitted_range
            ,sample_size=10000)
counterfactuals.visualize_as_dataframe(show_only_changes=True)
explainer3 = dice_ml.Dice(data_dice, model_di, method='kdtree')
print ( "kdtree counterfactual method explntion")
print("<-------------------->")
counterfactuals = explainer3.generate_counterfactuals(
         input_datapoint, 
         total_CFs=4, 
        desired_class="opposite",
        features_to_vary=features_to_vary,
         permitted_range= permitted_range)
counterfactuals.visualize_as_dataframe(show_only_changes=True)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Train Accuracy: 0.969375
Test Accuracy: 0.9735
[[ 0.36017881 -0.0138746   0.7916988  -0.87508908  0.0108625   0.26809426
   0.01255765]]
[-0.01066651]
gentic counterfactual  method explantion
<------------------------>


100%|██████████| 1/1 [00:00<00:00,  5.01it/s]

Query instance (original outcome : 0)





Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure
0,L,298.799988,308.799988,1523,44.400002,55,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure
0,-,-,-,-,65.7,191,1
0,-,-,-,-,65.7,191,1
0,-,-,-,-,60.3,163,1
0,-,-,-,-,62.9,134,1


random counterfactual method explntion
<-------------------->


100%|██████████| 1/1 [00:00<00:00,  1.16it/s]

Query instance (original outcome : 0)





Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure
0,L,298.799988,308.799988,1523,44.400002,55,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure
0,-,-,-,2060,-,-,1
1,-,-,-,2598,-,98,1
2,-,-,-,-,71.9,-,1
3,-,-,-,-,71.8,-,1


kdtree counterfactual method explntion
<-------------------->


100%|██████████| 1/1 [00:00<00:00,  8.09it/s]

Query instance (original outcome : 0)





Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure
0,L,298.799988,308.799988,1523,44.400002,55,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure
2299,-,-,-,-,62.9,134,1
4354,-,-,-,-,58.1,145,1
4002,-,-,-,-,59.2,153,1
5617,-,-,-,-,61.5,127,1
