In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import shap
import joblib

In [23]:
# Load the LightGBM model
model = joblib.load('../lightgbm_model.joblib')

# Load the test dataset
X_test = pd.read_csv('../Dataset/xtest.csv')

# Randomly sample a row from the test dataset
single_record = X_test.sample(n=1)

# Predict the outcome using the model
prediction = model.predict(single_record)

print(prediction[0])

1


In [None]:
intro = "The following 3 variables most significantly influenced the prediction (in order of most to least):"
details = []
# Enumerate gives you both an index (starting from 0 by default) and the value
for count, (idx, row) in enumerate(top_features.iterrows(), start=1):
    details.append(f"{count}. {row['feature']}")
explanation = f"{intro}\n" + "\n".join(details)
print(explanation)

# Codeblock to use for Flask App

In [27]:
# Initialize the SHAP Explainer
explainer = shap.TreeExplainer(model)

# Calculate SHAP values for the sampled row
shap_values = explainer.shap_values(single_record)

# Check if the SHAP values are in the expected format (list with one element)
if isinstance(shap_values, list):
    shap_values_single = shap_values[0]  # shap_values[0] will contain the actual SHAP values for binary classification
else:
    shap_values_single = shap_values

# Create a DataFrame for SHAP values
feature_names = X_test.columns
shap_df = pd.DataFrame({
    'feature': feature_names,
    'shap_value': shap_values_single[0]  # Assuming shap_values_single is correctly formatted for single instance
})
shap_df['abs_shap_value'] = shap_df['shap_value'].abs()
top_features = shap_df.sort_values(by='shap_value', ascending=False)

intro = "The following 3 variables most significantly influenced the prediction (in order of most to least):"
details = []
# Enumerate gives you both an index (starting from 0 by default) and the value
for count, (idx, row) in enumerate(top_features.head(3).iterrows(), start=1):
    details.append(f"{count}. {row['feature']}")
explanation = f"{intro}\n" + "\n".join(details)
print(explanation)

The following 3 variables most significantly influenced the prediction (in order of most to least):
1. mean_HR_sleep
2. Heart Rate
3. RR_var_sleep




In [28]:
top_features_new = top_features.head(3)
top_features_new.loc[len(top_features.index)] = ['Other', sum(top_features['shap_value'][3:]), sum(top_features['abs_shap_value'][3:])]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_features_new.loc[len(top_features.index)] = ['Other', sum(top_features['shap_value'][3:]), sum(top_features['abs_shap_value'][3:])]


Unnamed: 0,feature,shap_value,abs_shap_value
0,mean_HR_sleep,2.449665,2.449665
9,Heart Rate,1.036343,1.036343
3,RR_var_sleep,0.751956,0.751956
10,Other,-0.136952,3.270037


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(font_scale = 1.2)
plt.figure(figsize=(8,8))

labels = top_features_new['feature']
sizes = top_features_new['relative_importance']
# colors = ['darkblue', 'mediumblue', 'dodgerblue','black']  # Different shades of green for visual appeal

# Create a pie chart with a hole in the middle (donut chart)
fig, ax = plt.subplots()
wedges, texts, autotexts = ax.pie(sizes, 
                                  labels=labels, 
                                  autopct='%0.1f%%', 
                                  startangle=90, 
                                  pctdistance=0.80,
                                  explode=[0.025, 0.025, 0.025, 0.025],
                                  colors=sns.color_palette('Set2'))

# Add a circle at the center to create a donut-like chart
hole = plt.Circle((0, 0), 0.6, facecolor='white')
plt.gcf().gca().add_artist(hole)

for text in texts:
    text.set_fontweight('bold')
    # text.set_horizontalalignment('center')
 
# Customize percent labels
for autotext in autotexts:
    # autotext.set_horizontalalignment('center')
    autotext.set_fontstyle('italic')

ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('Top 3 Factors Influencing Prediction')
plt.tight_layout()
plt.show()