# FastWoe Explanation

This notebook shows how to use the FastWoe class to explain the model.

In [1]:
import sys
import warnings
from pathlib import Path

import pandas as pd

from fastwoe import FastWoe, WeightOfEvidence

# Get the root directory
ROOT_DIR = Path.cwd()
if ROOT_DIR.name == "examples":
    ROOT_DIR = ROOT_DIR.parent

print(f"Root directory: {ROOT_DIR}")

# Add the root directory to Python path for imports
sys.path.insert(0, str(ROOT_DIR))
warnings.filterwarnings("ignore")

data_path = ROOT_DIR / "data" / "BankCaseStudyData.csv"

Root directory: /Users/deburky/Documents/Git-xRiskLab/fastwoe


In [2]:
dataset = pd.read_csv(str(data_path))

# Prepare features and labels
label = "Final_Decision"
dataset[label] = dataset[label].map({"Accept": 0, "Decline": 1})

num_features = [
    # "Application_Score",
    # "Bureau_Score",
    # "Loan_Amount",
    # "Time_with_Bank",
    # "Time_in_Employment",
    # "Loan_to_income",
    # "Gross_Annual_Income",
]

cat_features = [
    "Existing_Customer_Flag",
    "Loan_Payment_Frequency",
    "Residential_Status",
    "Cheque_Card_Flag",
    "Home_Telephone_Number",
]

features = num_features + cat_features

ix_train = dataset["split"] == "Development"
ix_test = dataset["split"] == "Validation"

X_train = dataset.loc[ix_train, features]
y_train = dataset.loc[ix_train, label]
X_test = dataset.loc[ix_test, features]
y_test = dataset.loc[ix_test, label]

X_train.loc[:, cat_features] = X_train.loc[:, cat_features].astype(str).fillna("NA")
X_test.loc[:, cat_features] = X_test.loc[:, cat_features].astype(str).fillna("NA")

In [3]:
# Fit encoder with automatic binning enabled
encoder = FastWoe(warn_on_numerical=True)
encoder.fit(X_train, y_train)


print("\nFirst feature mapping (first 5 bins):")
display(encoder.get_mapping(features[0]).head())

# Make predictions
preds = encoder.predict_proba(X_test)[:, 1]

# Create explainer and get explanation
explainer = WeightOfEvidence(encoder, X_train, y_train)

idx = -1
explanation = explainer.explain(X_test, sample_idx=idx, true_labels=y_test)

# Print results
print(f"\nExplanation for sample {idx}:")
print(f"True label: {explanation['true_label']}")
print(f"Predicted label: {explanation['predicted_label']}")
print(f"WOE Evidence: {explanation['total_woe']:.3f}")
print(f"Interpretation: {explanation['interpretation']}")

# Show feature contributions
if "feature_contributions" in explanation:
    print("\nFeature contributions:")
    for feature, woe_val in explanation["feature_contributions"].items():
        print(f"  {feature}: {woe_val:.3f}")


First feature mapping (first 5 bins):


Unnamed: 0,category,count,count_pct,good_count,bad_count,event_rate,woe,woe_se,woe_ci_lower,woe_ci_upper
0,N,17811,89.56102,15822,1989,0.111673,0.068096,0.02379,0.021469,0.114724
1,Y,2076,10.43898,1975,101,0.048651,-0.831337,0.102016,-1.031286,-0.631389



Explanation for sample -1:
True label: Positive
Predicted label: Positive
WOE Evidence: 3.568
Interpretation: Very strong evidence FOR the hypothesis

Feature contributions:
  Existing_Customer_Flag: 0.068
  Loan_Payment_Frequency: 0.583
  Residential_Status: 0.586
  Cheque_Card_Flag: 0.895
  Home_Telephone_Number: 1.435


In [4]:
# Show the binned mapping with stable WOE estimates
print("\nAll binned categories:")
mapping_df = encoder.get_mapping(features[0])
print(mapping_df[["category", "count", "event_rate", "woe", "woe_se"]].round(3))


All binned categories:
  category  count  event_rate    woe  woe_se
0        N  17811       0.112  0.068   0.024
1        Y   2076       0.049 -0.831   0.102


In [5]:
idx = 300
explanation = explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test)

# With custom confidence level
explanation = explainer.explain_ci(
    X_test, sample_idx=idx, true_labels=y_test, alpha=0.01
)  # 99% CI

# Pretty print format
explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test, return_dict=False)