# <span style="font-family: Arial; color:#97f788">xBooster</span>

## <span style="font-family: Arial; color:navyblue">XAddEvidence and Feature SHAP Equivalence</span>

Repo: <a href="https://github.com/xRiskLab/xBooster">https://github.com/xRiskLab/xBooster</a>

This notebook demonstrates that XAddEvidence (per-tree margins) equals Feature SHAP (per-feature)
when using consistent base values. See docs/shap_scorecards.md for details.


In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split

from xbooster.shap_scorecard import extract_shap_values_xgb
from xbooster.xgb_constructor import XGBScorecardConstructor

## Generate Sample Data

We'll create a synthetic credit risk dataset for demonstration.


In [2]:
# Data Setup
np.random.seed(42)
X = pd.DataFrame(
    {
        "age": np.random.randint(18, 80, 1000),
        "income": np.random.randint(20000, 150000, 1000),
        "debt_ratio": np.random.uniform(0.1, 0.8, 1000),
    }
)
y = (
    (
        (X["age"] < 30).astype(int) * 0.3
        + (X["income"] < 40000).astype(int) * 0.4
        + (X["debt_ratio"] > 0.6).astype(int) * 0.3
        + np.random.random(1000) * 0.2
    )
    .round()
    .astype(int)
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Example


In [3]:
# Model & Scorecard
model = xgb.XGBClassifier(max_depth=3, n_estimators=50, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

constructor = XGBScorecardConstructor(model, X_train, y_train)
scorecard = constructor.construct_scorecard()

# Feature SHAP: per-feature decomposition (from TreeSHAP)
shap_full = extract_shap_values_xgb(model, X_test.head(10), constructor.base_score, False)
feature_shap_sum = shap_full[:, :-1].sum(axis=1)  # Sum across features
shap_base_value = shap_full[0, -1]

# XAddEvidence: per-tree decomposition (from scorecard)
leaf_indices = constructor.get_leafs(X_test, output_type="leaf_index")
n_trees = len(scorecard["Tree"].unique())

# Base value adjustment (constructor.base_score vs SHAP base_value)
base_adjustment = constructor.base_score - shap_base_value

In [4]:
scorecard.head(5)

Unnamed: 0,Tree,Node,Feature,Sign,Split,Count,CountPct,NonEvents,Events,EventRate,WOE,IV,XAddEvidence,DetailedSplit
0,0,4,debt_ratio,>=,0.596,47.0,0.05875,0.0,47.0,1.0,6.078716,1.9979,0.488621,"income < 40079, debt_ratio >= 0.596000433 or m..."
1,0,6,age,>=,30.0,540.0,0.675,540.0,0.0,0.0,-5.460802,4.488331,-0.120249,"income >= 40079 or missing, age >= 30 or missing"
2,0,7,age,<,32.0,18.0,0.0225,0.0,18.0,1.0,5.135757,0.646459,0.405848,"income < 40079, debt_ratio < 0.596000433, age ..."
3,0,8,age,>=,32.0,82.0,0.1025,40.0,42.0,0.512195,1.57363,0.366378,0.209722,"income < 40079, debt_ratio < 0.596000433, age ..."
4,0,9,debt_ratio,<,0.580177,77.0,0.09625,77.0,0.0,0.0,-3.518586,0.412376,-0.111869,"income >= 40079 or missing, age < 30, debt_rat..."


## XAddEvidence (with base adjustment)


In [5]:
# Sum XAddEvidence from table across all trees (with base adjustment)
xaddevidence_sum = []
for idx in X_test.index[:10]:
    obs_leafs = leaf_indices.loc[X_test.index.get_loc(idx)]
    total = sum(
        scorecard[(scorecard["Tree"] == t) & (scorecard["Node"] == obs_leafs.iloc[t])][
            "XAddEvidence"
        ].iloc[0]
        for t in range(n_trees)
    )
    # Add base adjustment to make it equal to sum of feature SHAP
    xaddevidence_sum.append(total + base_adjustment)
xaddevidence_sum = np.array(xaddevidence_sum)

# PDO Scaling
pdo, target_points, target_odds = 50, 600, 19
factor = pdo / np.log(2)
offset = target_points - factor * np.log(target_odds)
intercept = factor * shap_base_value

score_table = np.round(factor * (-xaddevidence_sum) - intercept + offset).astype(int)
score_feature = np.round(factor * (-feature_shap_sum) - intercept + offset).astype(int)

## Results


In [6]:
print("XAddEvidence (adjusted) vs Feature SHAP Comparison")
results = pd.DataFrame(
    {
        "XAddEvidence_adj": xaddevidence_sum.round(4),
        "Feature_SHAP": feature_shap_sum.round(4),
        "Score_Table": score_table,
        "Score_Feature": score_feature,
        "Diff": score_table - score_feature,
    },
    index=X_test.index[:10],
)
display(results)
print(f"\nMax difference: {results['Diff'].abs().max()} points")
print(f"Match exactly: {(results['Diff'] == 0).all()}")

XAddEvidence (adjusted) vs Feature SHAP Comparison


Unnamed: 0,XAddEvidence_adj,Feature_SHAP,Score_Table,Score_Feature,Diff
521,-4.3082,-4.3082,689,689,0
737,-4.368,-4.368,693,693,0
740,2.1726,2.1726,221,221,0
660,-3.1005,-3.1005,602,602,0
411,-4.4824,-4.4824,701,701,0
678,-4.3082,-4.3082,689,689,0
626,-4.4824,-4.4824,701,701,0
513,1.5022,1.5022,270,270,0
859,-3.716,-3.716,646,646,0
136,5.1997,5.1997,3,3,0



Max difference: 0 points
Match exactly: True
