# <span style="font-family: Arial; color:#97f788">xBooster</span>

## <span style="font-family: Arial; color:navyblue">SHAP Scorecards</span>

Repo: <a href="https://github.com/xRiskLab/xBooster">https://github.com/xRiskLab/xBooster</a>

This notebook shows how to use native SHAP values for scorecard construction with
XGBoost, LightGBM, and CatBoost.

Examples of using native SHAP values for scorecards with XGBoost, LightGBM, and CatBoost.

**Highlights**

- SHAP computed only when calling `predict_score(..., method="shap")`.
- Scorecards stay lightweight (no stored SHAP values).
- Native SHAP extractionâ€”no external `shap` library.
- SHAP-based scoring and per-feature decomposition supported.
- Suitable for deeper models where interpretability is challenging.


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

# Import xbooster constructors
from xbooster.xgb_constructor import XGBScorecardConstructor
from xbooster.lgb_constructor import LGBScorecardConstructor
from xbooster.cb_constructor import CBScorecardConstructor

# Import model libraries
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier, Pool

## Generate Sample Data

We'll create a synthetic credit risk dataset for demonstration.


In [2]:
# Generate synthetic credit risk data
np.random.seed(42)
n_samples = 1000

X = pd.DataFrame(
    {
        "age": np.random.randint(18, 80, n_samples),
        "income": np.random.randint(20000, 150000, n_samples),
        "credit_history": np.random.randint(0, 10, n_samples),
        "debt_ratio": np.random.uniform(0.1, 0.8, n_samples),
        "employment_years": np.random.randint(0, 30, n_samples),
    }
)

# Create target with some relationship to features
y = (
    (
        (X["age"] < 30).astype(int) * 0.3
        + (X["income"] < 40000).astype(int) * 0.4
        + (X["debt_ratio"] > 0.6).astype(int) * 0.3
        + np.random.random(n_samples) * 0.2
    )
    .round()
    .astype(int)
)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Default rate: {y.mean():.2%}")

Training set: 800 samples
Test set: 200 samples
Default rate: 17.60%


## Example 1: XGBoost with SHAP


In [5]:
# Train XGBoost model with depth > 1
xgb_model = xgb.XGBClassifier(max_depth=3, n_estimators=50, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)

# Evaluate model
xgb_pred = xgb_model.predict_proba(X_test)[:, 1]
gini_xgb = roc_auc_score(y_test, xgb_pred) * 2 - 1
print(f"XGBoost Gini: {gini_xgb:.4f}")

XGBoost Gini: 0.9796


In [6]:
# Create scorecard constructor
xgb_constructor = XGBScorecardConstructor(xgb_model, X_train, y_train)
xgb_scorecard = xgb_constructor.construct_scorecard()

In [11]:
# Predict scores using SHAP method (no binning table needed)
xgb_scores_shap = xgb_constructor.predict_score(X_test, method="shap")
xgb_scores_leafs = xgb_constructor.predict_score(X_test)  # Leaf-based scorecard (default)

# Compare with actual model predictions
xgb_predictions = xgb_model.predict_proba(X_test)[:, 1]

# Show sample predictions
xgb_comparison_df = pd.DataFrame(
    {
        "SHAP_Score": xgb_scores_shap.iloc[:10].values,
        "XAddEvidence_Score": xgb_scores_leafs.iloc[:10].values,
        "Model_Prob": xgb_predictions[:10],
    }
)
print("\nSample predictions (first 10):")
display(xgb_comparison_df)


Sample predictions (first 10):


Unnamed: 0,SHAP_Score,XAddEvidence_Score,Model_Prob
0,685,776.0,0.002929
1,666,758.0,0.003752
2,77,173.0,0.930465
3,26,123.0,0.964173
4,672,762.0,0.003498
5,690,782.0,0.002663
6,699,790.0,0.002376
7,-7,82.0,0.976878
8,588,679.0,0.011058
9,676,767.0,0.003311


In [12]:
xgb_comparison_df.corr()

Unnamed: 0,SHAP_Score,XAddEvidence_Score,Model_Prob
SHAP_Score,1.0,0.999975,-0.994904
XAddEvidence_Score,0.999975,1.0,-0.994602
Model_Prob,-0.994904,-0.994602,1.0


In [13]:
# Decompose scores by feature using SHAP method
xgb_scores_decomposed = xgb_constructor.predict_scores(X_test, method="shap")
print("=== XGBoost SHAP Score Decomposition ===")
print(f"Feature-level decomposition shape: {xgb_scores_decomposed.shape}")
print(f"Columns: {xgb_scores_decomposed.columns.tolist()}")
print("\nFirst 5 rows (showing feature contributions and total score):")
display(xgb_scores_decomposed.head())

=== XGBoost SHAP Score Decomposition ===
Feature-level decomposition shape: (200, 6)
Columns: ['age_score', 'income_score', 'credit_history_score', 'debt_ratio_score', 'employment_years_score', 'score']

First 5 rows (showing feature contributions and total score):


Unnamed: 0,age_score,income_score,credit_history_score,debt_ratio_score,employment_years_score,score
0,122,274,78,142,69,685
1,127,285,78,111,65,666
2,114,-161,77,-37,84,77
3,120,-188,70,-54,78,26
4,126,280,74,116,76,672


## Example 2: LightGBM with SHAP


In [14]:
# Train LightGBM model with depth > 1
lgb_model = lgb.LGBMClassifier(
    max_depth=3, n_estimators=50, learning_rate=0.1, random_state=42, verbose=-1
)
lgb_model.fit(X_train, y_train)

# Evaluate model
lgb_pred = lgb_model.predict_proba(X_test)[:, 1]
gini_lgb = roc_auc_score(y_test, lgb_pred) * 2 - 1
print(f"LightGBM Gini: {gini_lgb:.4f}")

LightGBM Gini: 0.9794


In [15]:
# Create scorecard constructor
lgb_constructor = LGBScorecardConstructor(lgb_model, X_train, y_train)
lgb_scorecard = lgb_constructor.construct_scorecard()

In [17]:
# Predict scores using SHAP method (no binning table needed)
lgb_scores_shap = lgb_constructor.predict_score(X_test, method="shap")
lgb_scores_leafs = lgb_constructor.predict_score(X_test)  # Leaf-based scorecard (default)

# Compare with actual model predictions
lgb_predictions = lgb_model.predict_proba(X_test)[:, 1]
print(f"\nModel predictions - Mean: {lgb_predictions.mean():.4f}")

# Show sample predictions
lgb_comparison_df = pd.DataFrame(
    {
        "SHAP_Score": lgb_scores_shap.iloc[:10].values,
        "XAddEvidence_Score": lgb_scores_leafs.iloc[:10].values,
        "Model_Prob": lgb_predictions[:10],
    }
)
print("\nSample predictions (first 10):")
display(lgb_comparison_df)


Model predictions - Mean: 0.1635

Sample predictions (first 10):


Unnamed: 0,SHAP_Score,XAddEvidence_Score,Model_Prob
0,812,778.0,0.002745
1,814,779.0,0.002707
2,191,152.0,0.938611
3,133,96.0,0.971278
4,814,779.0,0.002712
5,818,783.0,0.002549
6,822,787.0,0.002433
7,41,1.0,0.991951
8,839,805.0,0.001935
9,818,783.0,0.002549


In [18]:
lgb_comparison_df.corr()

Unnamed: 0,SHAP_Score,XAddEvidence_Score,Model_Prob
SHAP_Score,1.0,0.999998,-0.996616
XAddEvidence_Score,0.999998,1.0,-0.996564
Model_Prob,-0.996616,-0.996564,1.0


In [19]:
# Decompose scores by feature using SHAP method
lgb_scores_decomposed = lgb_constructor.predict_scores(X_test, method="shap")
print("=== LightGBM SHAP Score Decomposition ===")
print(f"Feature-level decomposition shape: {lgb_scores_decomposed.shape}")
print(f"Columns: {lgb_scores_decomposed.columns.tolist()}")
print("\nFirst 5 rows (showing feature contributions and total score):")
display(lgb_scores_decomposed.head())

=== LightGBM SHAP Score Decomposition ===
Feature-level decomposition shape: (200, 6)
Columns: ['age_score', 'income_score', 'credit_history_score', 'debt_ratio_score', 'employment_years_score', 'score']

First 5 rows (showing feature contributions and total score):


Unnamed: 0,age_score,income_score,credit_history_score,debt_ratio_score,employment_years_score,score
0,151,220,135,171,135,812
1,157,222,141,159,135,814
2,174,-301,146,32,140,191
3,179,-337,132,17,142,133
4,156,220,136,163,139,814


## Example 3: CatBoost with SHAP


In [20]:
# Train CatBoost model with depth > 1
cb_model = CatBoostClassifier(
    max_depth=3, n_estimators=50, learning_rate=0.1, random_state=42, verbose=False
)

# Create Pool for CatBoost
train_pool = Pool(X_train, y_train)
test_pool = Pool(X_test, y_test)

cb_model.fit(train_pool)

# Evaluate model
cb_pred = cb_model.predict_proba(test_pool)[:, 1]
cb_gini = roc_auc_score(y_test, cb_pred) * 2 - 1
print(f"CatBoost Gini: {cb_gini:.4f}")

CatBoost Gini: 0.9893


In [21]:
# Create scorecard constructor
cb_constructor = CBScorecardConstructor(cb_model, train_pool)

# Construct scorecard (SHAP is NOT stored in scorecard - computed on-demand only)
cb_scorecard = cb_constructor.construct_scorecard()

print("Scorecard columns:", cb_scorecard.columns.tolist())
print(f"\nScorecard shape: {cb_scorecard.shape}")
print(
    "\nNote: SHAP values are NOT stored in the scorecard. They are computed on-demand when using predict_score(method='shap')"
)
print("\nFirst few rows of scorecard:")
display(
    cb_scorecard[["Tree", "LeafIndex", "Feature", "XAddEvidence", "Count", "EventRate"]].head(10)
)

Scorecard columns: ['Tree', 'LeafIndex', 'Feature', 'Sign', 'Split', 'CountPct', 'Count', 'NonEvents', 'Events', 'EventRate', 'XAddEvidence', 'WOE', 'IV', 'DetailedSplit']

Scorecard shape: (400, 14)

Note: SHAP values are NOT stored in the scorecard. They are computed on-demand when using predict_score(method='shap')

First few rows of scorecard:


Unnamed: 0,Tree,LeafIndex,Feature,XAddEvidence,Count,EventRate
0,0,0,income,0.097,62.0,0.790323
1,0,1,income,0.0,0.0,0.17625
2,0,2,income,-0.076,17.0,0.176471
3,0,3,income,-0.141,306.0,0.133987
4,0,4,income,0.047,69.0,0.637681
5,0,5,income,0.0,0.0,0.17625
6,0,6,income,-0.086,23.0,0.173913
7,0,7,income,-0.193,323.0,0.0
8,1,0,income,0.087,10.0,1.0
9,1,1,income,-0.131,28.0,0.0


In [22]:
# Predict scores using SHAP method (no binning table needed)
cb_scores_shap = cb_constructor.predict_score(X_test, method="shap")
cb_scores_leafs = cb_constructor.predict_score(
    X_test, method="pdo"
)  # Leaf-based scorecard (default)
# Compare with actual model predictions
cb_predictions = cb_model.predict_proba(test_pool)[:, 1]

# Show sample predictions
cb_comparison_df = pd.DataFrame(
    {
        "SHAP_Score": cb_scores_shap.iloc[:10].values,
        "XAddEvidence_Score": cb_scores_leafs.iloc[:10].values,
        "Model_Prob": cb_predictions[:10],
    }
)
print("\nSample predictions (first 10):")
display(cb_comparison_df)


Sample predictions (first 10):


Unnamed: 0,SHAP_Score,XAddEvidence_Score,Model_Prob
0,699,872.0,0.013236
1,699,888.0,0.01292
2,215,329.0,0.916289
3,230,354.0,0.899126
4,670,829.0,0.01959
5,706,885.0,0.011901
6,704,884.0,0.012361
7,318,428.0,0.728069
8,646,812.0,0.026979
9,703,884.0,0.012319


In [23]:
cb_comparison_df.corr()

Unnamed: 0,SHAP_Score,XAddEvidence_Score,Model_Prob
SHAP_Score,1.0,0.999293,-0.997071
XAddEvidence_Score,0.999293,1.0,-0.995186
Model_Prob,-0.997071,-0.995186,1.0


In [24]:
# Decompose scores by feature using SHAP method
cb_scores_decomposed = cb_constructor.predict_scores(X_test, method="shap")
print("=== CatBoost SHAP Score Decomposition ===")
print(f"Feature-level decomposition shape: {cb_scores_decomposed.shape}")
print(f"Columns: {cb_scores_decomposed.columns.tolist()}")
print("\nFirst 5 rows (showing feature contributions and total score):")
display(cb_scores_decomposed.head())

=== CatBoost SHAP Score Decomposition ===
Feature-level decomposition shape: (200, 6)
Columns: ['age_score', 'income_score', 'credit_history_score', 'debt_ratio_score', 'employment_years_score', 'score']

First 5 rows (showing feature contributions and total score):


Unnamed: 0,age_score,income_score,credit_history_score,debt_ratio_score,employment_years_score,score
0,136,178,118,148,119,699
1,144,176,114,148,117,699
2,141,-194,120,27,121,215
3,146,-196,117,45,118,230
4,144,138,120,150,118,670
