# <span style="font-family: Arial; color:#97f788">xBooster</span>

## <span style="font-family: Arial; color:navyblue">Trees to Indices</span>

Repo: <a href="https://github.com/xRiskLab/xBooster">https://github.com/xRiskLab/xBooster</a>

In this notebook, we show how to convert trees to indices.


In [1]:
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_selector as selector
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

from sklearn import set_config

set_config(transform_output="pandas")

In [2]:
ROOT_DIR = Path.cwd()
DATA_DIR = ROOT_DIR / "data"

data = pd.read_csv(DATA_DIR / "train_u6lujuX_CVtuZ9i.csv")

X = data.drop(columns=["Loan_ID", "Loan_Status", "Gender"])
y = data["Loan_Status"].map({"Y": 0, "N": 1})

numerical_columns_selector = selector(dtype_exclude=object)
categorical_columns_selector = selector(dtype_include=object)

numerical_columns = numerical_columns_selector(X)
categorical_columns = categorical_columns_selector(X)
categorical_preprocessor = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

# Convert numerical columns to float and categorical columns to string
X[numerical_columns] = X[numerical_columns].astype(float)
X[categorical_columns] = X[categorical_columns].astype(str).fillna("NA")

preprocessor = ColumnTransformer(
    [
        ("one-hot-encoder", categorical_preprocessor, categorical_columns),
    ],
    remainder="passthrough",
    verbose_feature_names_out=False,
)

X_ohe = preprocessor.fit_transform(X)

ix_train, ix_test = train_test_split(data.index, stratify=y, test_size=0.2, random_state=42)
X_train, X_test = X.iloc[ix_train], X.iloc[ix_test]
X_train_ohe, X_test_ohe = X_ohe.iloc[ix_train], X_ohe.iloc[ix_test]
y_train, y_test = y.iloc[ix_train], y.iloc[ix_test]

## XGBoost


In [3]:
import xgboost as xgb
from xbooster.xgb_constructor import XGBScorecardConstructor

model = xgb.XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
model.fit(X_train_ohe, y_train)

constructor = XGBScorecardConstructor(model, X_train_ohe, y_train)
scorecard = constructor.construct_scorecard()

constructor.get_leafs(X_test_ohe, output_type="leaf_index").head(5)

Unnamed: 0,tree_0,tree_1,tree_2,tree_3,tree_4,tree_5,tree_6,tree_7,tree_8,tree_9,...,tree_90,tree_91,tree_92,tree_93,tree_94,tree_95,tree_96,tree_97,tree_98,tree_99
0,5,5,7,7,9,7,11,9,11,9,...,7,7,7,13,7,6,7,9,5,7
1,8,6,9,9,10,10,12,12,12,12,...,7,11,8,12,8,6,7,9,7,8
2,7,8,9,9,10,7,12,10,12,9,...,8,7,8,14,7,6,7,9,7,8
3,8,6,10,10,10,10,12,12,12,12,...,7,13,8,14,8,6,7,9,6,8
4,6,5,8,8,9,7,11,10,11,9,...,5,7,7,12,7,6,7,9,5,8


## LightGBM


In [4]:
import lightgbm as lgb
from xbooster.lgb_constructor import LGBScorecardConstructor

model = lgb.LGBMClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, verbose=-1)
model.fit(X_train_ohe, y_train)

constructor = LGBScorecardConstructor(model, X_train_ohe, y_train)
scorecard = constructor.construct_scorecard()

constructor.get_leafs(X_test_ohe, output_type="leaf_index").head(5)

Unnamed: 0,tree_0,tree_1,tree_2,tree_3,tree_4,tree_5,tree_6,tree_7,tree_8,tree_9,...,tree_90,tree_91,tree_92,tree_93,tree_94,tree_95,tree_96,tree_97,tree_98,tree_99
0,1,1,1,2,1,2,1,2,3,1,...,3,3,3,0,2,1,0,2,2,3
1,4,3,2,3,3,3,3,3,4,4,...,3,4,5,4,4,3,5,5,2,3
2,2,4,2,3,1,3,4,3,4,1,...,3,4,5,0,2,3,5,5,2,3
3,4,3,4,3,3,3,3,3,4,4,...,2,5,4,5,4,3,1,5,2,3
4,3,1,3,2,1,2,1,2,2,1,...,0,0,6,0,2,1,3,6,2,3


## CatBoost


In [5]:
import catboost as cb
from xbooster.cb_constructor import CBScorecardConstructor

model = cb.CatBoostClassifier(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    allow_writing_files=False,
    verbose=0,
    cat_features=categorical_columns,
)
model.fit(X_train, y_train)

constructor = CBScorecardConstructor(model, X_train, y_train)
scorecard = constructor.construct_scorecard()

constructor.get_leafs(X_test, output_type="leaf_index").head(5)

Unnamed: 0,tree_0,tree_1,tree_2,tree_3,tree_4,tree_5,tree_6,tree_7,tree_8,tree_9,...,tree_90,tree_91,tree_92,tree_93,tree_94,tree_95,tree_96,tree_97,tree_98,tree_99
0,5,1,1,3,7,7,5,5,4,3,...,5,4,3,3,7,6,6,3,1,4
1,5,1,5,3,7,7,5,5,7,7,...,5,4,7,3,7,7,6,3,1,0
2,5,1,5,3,7,7,5,5,7,7,...,5,4,7,3,7,7,6,3,1,4
3,0,0,4,4,4,2,6,0,1,0,...,5,4,7,3,7,7,6,3,1,0
4,7,1,7,3,7,7,5,5,5,3,...,5,4,3,3,7,7,6,3,1,4
