In [27]:
import numpy as np

import shap

import xgboost as xgb

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

In [28]:
# Load dataset

X, y = shap.datasets.adult()

In [29]:
# Split the Data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# Train an XGBoost model

model = xgb.XGBClassifier(n_estimators =100, learning_rate = 0.1, max_depth =3, random_state=42)

In [31]:
model.fit(X_train, y_train)

In [32]:
# Compute SHAP values

explainer = shap.Explainer(model)

shap_values = explainer(X_train)

In [33]:
# Get mean absolute SHAP values for feature selection

shap_importance = np.abs(shap_values.values).mean(axis=0)

feature_importance = dict(zip(X_train.columns,shap_importance))

In [34]:
# Select top-k features (let’s choose top 10)

selected_features = sorted(feature_importance, key=feature_importance.get, reverse=True ) [:10]

In [35]:
# Train new model with selected features

X_train_selected = X_train[selected_features]

X_test_selected = X_test[selected_features]

In [36]:
# Train with New Model 

new_model = xgb.XGBClassifier(n_estimators =100, learning_rate = 0.1, max_depth =3, random_state=42)

In [37]:
new_model.fit(X_train_selected,y_train)

In [38]:
# Predict with new model 

y_pred = new_model.predict(X_test_selected)

In [39]:
# Evaluate performance

accuracy = accuracy_score(y_test,y_pred)

print(f"Accuracy Score After Feature Selection : \n {accuracy :.4f}")

print(f"Selected Features : {selected_features}")

Accuracy Score After Feature Selection : 
 0.8718
Selected Features : ['Relationship', 'Age', 'Education-Num', 'Capital Gain', 'Hours per week', 'Occupation', 'Capital Loss', 'Sex', 'Marital Status', 'Workclass']
