# FastWoe Explanation

This notebook shows how to use the FastWoe class to explain the model.

In [1]:
import sys
import warnings
from pathlib import Path

import pandas as pd

from fastwoe import FastWoe, WeightOfEvidence

# Get the root directory
ROOT_DIR = Path.cwd()
if ROOT_DIR.name == "examples":
    ROOT_DIR = ROOT_DIR.parent

print(f"Root directory: {ROOT_DIR}")

# Add the root directory to Python path for imports
sys.path.insert(0, str(ROOT_DIR))
warnings.filterwarnings("ignore")

data_path = ROOT_DIR / "data" / "BankCaseStudyData.csv"

Root directory: /Users/deburky/Documents/Git-xRiskLab/fastwoe


In [2]:
dataset = pd.read_csv(str(data_path))

# Prepare features and labels
label = "Final_Decision"
dataset[label] = dataset[label].map({"Accept": 0, "Decline": 1})

num_features = [
    "Application_Score",
    "Bureau_Score",
    "Loan_Amount",
    "Time_with_Bank",
    "Time_in_Employment",
    "Loan_to_income",
    "Gross_Annual_Income",
]

cat_features = [
    "Existing_Customer_Flag",
    "Loan_Payment_Frequency",
    "Residential_Status",
    "Cheque_Card_Flag",
    "Home_Telephone_Number",
]

features = num_features + cat_features

ix_train = dataset["split"] == "Development"
ix_test = dataset["split"] == "Validation"

X_train = dataset.loc[ix_train, features]
y_train = dataset.loc[ix_train, label]
X_test = dataset.loc[ix_test, features]
y_test = dataset.loc[ix_test, label]

X_train.loc[:, cat_features] = X_train.loc[:, cat_features].astype(str).fillna("NA")
X_test.loc[:, cat_features] = X_test.loc[:, cat_features].astype(str).fillna("NA")

In [3]:
# Fit encoder with automatic binning enabled
encoder = FastWoe(warn_on_numerical=True)
encoder.fit(X_train, y_train)


print("\nFirst feature mapping:")
display(encoder.get_mapping(features[0]).head(10))

# Create explainer and get explanation
explainer = WeightOfEvidence(encoder, X_train, y_train)

idx = -1
explanation = explainer.explain(X_test, sample_idx=idx, true_labels=y_test)

# Print results
print(f"\nExplanation for sample {idx}:")
print(f"True label: {explanation['true_label']}")
print(f"Predicted label: {explanation['predicted_label']}")
print(f"WOE Evidence: {explanation['total_woe']:.3f}")
print(f"Interpretation: {explanation['interpretation']}")

# Show feature contributions
if "feature_contributions" in explanation:
    print("\nFeature contributions:")
    for feature, woe_val in explanation["feature_contributions"].items():
        print(f"  {feature}: {woe_val:.3f}")


First feature mapping:


Unnamed: 0,category,count,count_pct,good_count,bad_count,event_rate,woe,woe_se,woe_ci_lower,woe_ci_upper
0,"(-∞, 870.0]",3831,19.263841,2080,1751,0.457061,1.969685,0.032433,1.906118,2.033252
1,"(870.0, 925.0]",3921,19.716398,3710,211,0.053813,-0.725063,0.070773,-0.863777,-0.58635
2,"(925.0, 960.0]",3864,19.429778,3792,72,0.018634,-1.822117,0.118965,-2.055283,-1.58895
3,"(960.0, 985.0]",3829,19.253784,3793,36,0.009402,-2.515528,0.167456,-2.843735,-2.187321
4,"(985.0, ∞)",4442,22.3362,4422,20,0.004502,-3.256749,0.224112,-3.696,-2.817498



Explanation for sample -1:
True label: Positive
Predicted label: Positive
WOE Evidence: 9.567
Interpretation: Very strong evidence FOR the hypothesis

Feature contributions:
  Application_Score: 1.970
  Bureau_Score: 1.901
  Loan_Amount: 0.493
  Time_with_Bank: 0.379
  Time_in_Employment: 0.703
  Loan_to_income: 0.081
  Gross_Annual_Income: 0.473
  Existing_Customer_Flag: 0.068
  Loan_Payment_Frequency: 0.583
  Residential_Status: 0.586
  Cheque_Card_Flag: 0.895
  Home_Telephone_Number: 1.435


In [4]:
# Fit encoder with automatic binning enabled
# encoder = FastWoe(warn_on_numerical=True, binner_kwargs={"strategy": "kmeans"})
encoder = FastWoe(warn_on_numerical=True)
# encoder = FastWoe(warn_on_numerical=True)
encoder.fit(X_train, y_train)


print("\nFirst feature mapping:")
display(encoder.get_mapping(features[0]).head(10))

# Create explainer and get explanation
explainer = WeightOfEvidence(encoder, X_train, y_train)

idx = -1
explanation = explainer.explain(X_test, sample_idx=idx, true_labels=y_test)

# Print results
print(f"\nExplanation for sample {idx}:")
print(f"True label: {explanation['true_label']}")
print(f"Predicted label: {explanation['predicted_label']}")
print(f"WOE Evidence: {explanation['total_woe']:.3f}")
print(f"Interpretation: {explanation['interpretation']}")

# Show feature contributions
if "feature_contributions" in explanation:
    print("\nFeature contributions:")
    for feature, woe_val in explanation["feature_contributions"].items():
        print(f"  {feature}: {woe_val:.3f}")


First feature mapping:


Unnamed: 0,category,count,count_pct,good_count,bad_count,event_rate,woe,woe_se,woe_ci_lower,woe_ci_upper
0,"(-∞, 870.0]",3831,19.263841,2080,1751,0.457061,1.969685,0.032433,1.906118,2.033252
1,"(870.0, 925.0]",3921,19.716398,3710,211,0.053813,-0.725063,0.070773,-0.863777,-0.58635
2,"(925.0, 960.0]",3864,19.429778,3792,72,0.018634,-1.822117,0.118965,-2.055283,-1.58895
3,"(960.0, 985.0]",3829,19.253784,3793,36,0.009402,-2.515528,0.167456,-2.843735,-2.187321
4,"(985.0, ∞)",4442,22.3362,4422,20,0.004502,-3.256749,0.224112,-3.696,-2.817498



Explanation for sample -1:
True label: Positive
Predicted label: Positive
WOE Evidence: 9.567
Interpretation: Very strong evidence FOR the hypothesis

Feature contributions:
  Application_Score: 1.970
  Bureau_Score: 1.901
  Loan_Amount: 0.493
  Time_with_Bank: 0.379
  Time_in_Employment: 0.703
  Loan_to_income: 0.081
  Gross_Annual_Income: 0.473
  Existing_Customer_Flag: 0.068
  Loan_Payment_Frequency: 0.583
  Residential_Status: 0.586
  Cheque_Card_Flag: 0.895
  Home_Telephone_Number: 1.435


In [5]:
# Fit encoder with automatic binning enabled
encoder = FastWoe(warn_on_numerical=True)
encoder.fit(X_train, y_train)


print("\nFirst feature mapping:")
display(encoder.get_mapping(features[0]).head(10))

# Create explainer and get explanation
explainer = WeightOfEvidence(encoder, X_train, y_train)

idx = -1
explanation = explainer.explain(X_test, sample_idx=idx, true_labels=y_test)

# Print results
print(f"\nExplanation for sample {idx}:")
print(f"True label: {explanation['true_label']}")
print(f"Predicted label: {explanation['predicted_label']}")
print(f"WOE Evidence: {explanation['total_woe']:.3f}")
print(f"Interpretation: {explanation['interpretation']}")

# Show feature contributions
if "feature_contributions" in explanation:
    print("\nFeature contributions:")
    for feature, woe_val in explanation["feature_contributions"].items():
        print(f"  {feature}: {woe_val:.3f}")


First feature mapping:


Unnamed: 0,category,count,count_pct,good_count,bad_count,event_rate,woe,woe_se,woe_ci_lower,woe_ci_upper
0,"(-∞, 870.0]",3831,19.263841,2080,1751,0.457061,1.969685,0.032433,1.906118,2.033252
1,"(870.0, 925.0]",3921,19.716398,3710,211,0.053813,-0.725063,0.070773,-0.863777,-0.58635
2,"(925.0, 960.0]",3864,19.429778,3792,72,0.018634,-1.822117,0.118965,-2.055283,-1.58895
3,"(960.0, 985.0]",3829,19.253784,3793,36,0.009402,-2.515528,0.167456,-2.843735,-2.187321
4,"(985.0, ∞)",4442,22.3362,4422,20,0.004502,-3.256749,0.224112,-3.696,-2.817498



Explanation for sample -1:
True label: Positive
Predicted label: Positive
WOE Evidence: 9.567
Interpretation: Very strong evidence FOR the hypothesis

Feature contributions:
  Application_Score: 1.970
  Bureau_Score: 1.901
  Loan_Amount: 0.493
  Time_with_Bank: 0.379
  Time_in_Employment: 0.703
  Loan_to_income: 0.081
  Gross_Annual_Income: 0.473
  Existing_Customer_Flag: 0.068
  Loan_Payment_Frequency: 0.583
  Residential_Status: 0.586
  Cheque_Card_Flag: 0.895
  Home_Telephone_Number: 1.435


In [6]:
# Show the binned mapping with stable WOE estimates
print("\nAll binned categories:")
mapping_df = encoder.get_mapping(features[0])
print(mapping_df[["category", "count", "event_rate", "woe", "woe_se"]].round(3))


All binned categories:
         category  count  event_rate    woe  woe_se
0     (-∞, 870.0]   3831       0.457  1.970   0.032
1  (870.0, 925.0]   3921       0.054 -0.725   0.071
2  (925.0, 960.0]   3864       0.019 -1.822   0.119
3  (960.0, 985.0]   3829       0.009 -2.516   0.167
4      (985.0, ∞)   4442       0.005 -3.257   0.224


In [7]:
# get splits
splits = encoder.get_split_value_histogram("Application_Score", as_array=True)
print(splits)
splits_list = encoder.get_split_value_histogram("Application_Score", as_array=False)
print(splits_list)

[-inf 870. 925. 960. 985.  inf]
[-inf, 870.0, 925.0, 960.0, 985.0, inf]


In [8]:
idx = 150
explanation = explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test)

# With custom confidence level
explanation = explainer.explain_ci(
    X_test, sample_idx=idx, true_labels=y_test, alpha=0.05
)  # 99% CI

# Pretty print format
explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test, return_dict=False)