In [1]:
import pandas as pd
from sklearn import datasets
import lightgbm as lgb
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
import shap

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
# Load dataset
boston = datasets.load_boston()
# Create feature DF
X = pd.DataFrame(data = boston.data, columns = boston['feature_names'])
# Create target DF
y = pd.DataFrame(data = boston.target, columns = ['MEDV'])

In [3]:
# Train LightGBM
parameters = {}
lgb_model = lgb.LGBMRegressor()
rgr = GridSearchCV(lgb_model, parameters, n_jobs=-1, cv=2, verbose=1)
lgb_model_grid = rgr.fit(X,y.MEDV)
# Calculate SHAP values
print("LightGBM type:", type(lgb_model_grid.best_estimator_.booster_))
shap.TreeExplainer(lgb_model_grid.best_estimator_.booster_).shap_values(X)

Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


LightGBM type: <class 'lightgbm.basic.Booster'>


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    1.8s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    1.8s finished


array([[-2.16235155e-01, -7.47436343e-02,  4.21347385e-01, ...,
         6.36263921e-01,  1.98596873e-01,  4.86600298e+00],
       [-2.05571476e-01, -3.65162885e-03, -3.24497556e-01, ...,
         2.25773801e-01,  1.96197744e-01,  1.82787165e+00],
       [ 9.27601864e-02, -1.80164834e-02, -2.21885639e-01, ...,
         1.35388007e-01, -2.83944693e-03,  7.20479950e+00],
       ...,
       [-1.92913823e-01, -9.45214800e-03, -4.30890696e-01, ...,
        -8.69442255e-01, -1.42631537e-01,  1.24285490e+00],
       [ 1.13954136e-01, -1.53728939e-02, -5.44186028e-01, ...,
        -8.73572324e-01, -4.53222683e-02,  2.02230509e+00],
       [-6.97427621e-01, -8.71539568e-03, -5.39408440e-01, ...,
        -7.55787571e-01, -1.34582969e-01,  3.34732744e-01]])

In [4]:
# Train XGBoost
xgb_model = xgb.XGBRegressor()
rgr = GridSearchCV(xgb_model, parameters, n_jobs=-1, cv=2, verbose=1)
xgb_model_grid = rgr.fit(X,y.MEDV)
# Calculate SHAP values (DOESN'T WORK)
print("XGBoost type:", type(xgb_model_grid.best_estimator_))
shap.TreeExplainer(xgb_model_grid.best_estimator_).shap_values(X)
print("XGBoost type:", type(xgb_model_grid.best_estimator_))
shap.TreeExplainer(xgb_model_grid.best_estimator_).shap_values(X)

Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


XGBoost type: <class 'xgboost.sklearn.XGBRegressor'>
XGBoost type: <class 'xgboost.sklearn.XGBRegressor'>


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    1.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    1.1s finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


array([[-0.51467264, -0.20504883,  0.06660128, ...,  0.4995593 ,
         0.13651758,  5.601616  ],
       [-0.33332354, -0.06292161, -0.17551032, ...,  0.71119964,
         0.07554361,  1.7569411 ],
       [-0.29614973, -0.06196661, -0.32156122, ...,  0.30347508,
         0.19353725,  6.9513354 ],
       ...,
       [-0.5108058 , -0.18954909,  0.03790583, ..., -1.4830108 ,
         0.06967882,  2.8791142 ],
       [ 0.31853032, -0.18596269,  0.0397671 , ..., -1.508986  ,
         0.15543678,  2.6643074 ],
       [-0.8217313 , -0.20032728, -0.06452336, ..., -1.2430359 ,
         0.03995529,  1.7653933 ]], dtype=float32)