In [1]:
import sys
sys.path.append("../")

In [2]:
from copy import deepcopy

import pandas as pd
import numpy as np
import shap
from sklearn.linear_model import LinearRegression, ElasticNet

from src.surrogate.shap import (
    LinearShap,
    ExactShap,
)

## Linear SHAP

In [3]:
"""
create sample dataset
"""

np.random.seed(10)
 
# sample dataset
x1 = np.array([3, 7, 3, 3, 6, 2, 9])
x2 = np.array([1, 9, -5, 7, 10, -2, 5])
y = 4*x1 + 2*x2 + 1
X = np.column_stack([x1, x2])

In [4]:
"""
fit linear regression
"""

model = LinearRegression()
model.fit(X, y)

In [5]:
"""
sample data used to process missing values in SHAP
"""

sampled_row = np.random.choice(range(len(y)), size=5)
X_sampled = X[sampled_row, :]

In [6]:
"""
initialize Linear SHAP instances
- `custom_explainer`: custom implementation
- `shap_explainer`: shap library
"""

custom_explainer = LinearShap(model, X_sampled)
shap_explainer = shap.LinearExplainer(model, X_sampled)

### Case1. Without Missing Value

In [7]:
x = np.array([2., 1.])

In [8]:
print("Baseline")
print(f"- Custom: {custom_explainer.expected_value:.4f}")
print(f"- SHAP Library: {shap_explainer.expected_value:.4f}")
print()
print("Shapley Value")
print(f"- Custom: {custom_explainer.shap_values(x)}")
print(f"- SHAP Library: {shap_explainer.shap_values(x)}")

Baseline
- Custom: 31.8000
- SHAP Library: 31.8000

Shapley Value
- Custom: [-12.   -8.8]
- SHAP Library: [-12.   -8.8]


### Case2. With Missing Value

In [9]:
x = np.array([np.nan, 1.])

In [10]:
print("Baseline")
print(f"- Custom: {custom_explainer.expected_value:.4f}")
print(f"- SHAP Library: {shap_explainer.expected_value:.4f}")
print()
print("Shapley Value")
print(f"- Custom: {custom_explainer.shap_values(x)}")
print(f"- SHAP Library: {shap_explainer.shap_values(x)}")

Baseline
- Custom: 31.8000
- SHAP Library: 31.8000

Shapley Value
- Custom: [ 0.  -8.8]
- SHAP Library: [ nan -8.8]


## Exact SHAP

In [11]:
"""
create sample dataset
"""

np.random.seed(10)
 
# sample dataset
x1 = np.array([3, 7, 3, 3, 6, 2, 9])
x2 = np.array([1, 9, -5, 7, 10, -2, 5])
y = 4*x1 + 2*x2 + 1
X = np.column_stack([x1, x2])

In [12]:
"""
fit elastic regression (with l1 + l2 regularization)
"""

model = ElasticNet()
model.fit(X, y)

In [13]:
"""
sample data used to process missing values in SHAP
"""

sampled_row = np.random.choice(range(len(y)), size=5)
X_sampled = X[sampled_row, :]

In [14]:
"""
initialize Exact SHAP instances
- `custom_explainer`: custom implementation
- `shap_explainer`: shap library
"""

custom_explainer = LinearShap(model, X_sampled)
shap_explainer = shap.Explainer(model, X_sampled)

### Case1. Without Missing Value

In [15]:
x = np.array([2., 1.])

In [16]:
print("Baseline")
print(f"- Custom: {custom_explainer.expected_value:.4f}")
print(f"- SHAP Library: {shap_explainer.expected_value:.4f}")
print()
print("Shapley Value")
print(f"- Custom: {custom_explainer.shap_values(x)}")
print(f"- SHAP Library: {shap_explainer.shap_values(x)}")

Baseline
- Custom: 28.1721
- SHAP Library: 28.1721

Shapley Value
- Custom: [-7.71805525 -8.33368842]
- SHAP Library: [-7.71805525 -8.33368842]


### Case2. With Missing Value

In [17]:
x = np.array([np.nan, 1.])

In [18]:
print("Baseline")
print(f"- Custom: {custom_explainer.expected_value:.4f}")
print(f"- SHAP Library: {shap_explainer.expected_value:.4f}")
print()
print("Shapley Value")
print(f"- Custom: {custom_explainer.shap_values(x)}")
print(f"- SHAP Library: {shap_explainer.shap_values(x)}")

Baseline
- Custom: 28.1721
- SHAP Library: 28.1721

Shapley Value
- Custom: [ 0.         -8.33368842]
- SHAP Library: [        nan -8.33368842]


## References
- https://zephyrus1111.tistory.com/271
- https://towardsdatascience.com/introduction-to-shap-values-and-their-application-in-machine-learning-8003718e6827