In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Default `.feature_importances` can be different depending on the `max_depth` chosen

In [2]:
N_FEATURES = 20

X, y = \
make_classification(n_samples=1000,
                    n_features=N_FEATURES,
                    n_informative=2,
                    n_redundant=2,
                    n_classes=2,
                    flip_y=0.1,
                    shuffle=False,
                    random_state=42)

X = pd.DataFrame(X, columns=[f'column_{i+1}' for i in range(N_FEATURES)])

rfc_deep = RandomForestClassifier(random_state=42, max_depth=None).fit(X, y)
rfc_shallow = RandomForestClassifier(random_state=42, max_depth=10).fit(X, y)

In [3]:
df_imp_deep = \
(pd.DataFrame(list(zip(X.columns, rfc_deep.feature_importances_)),
              columns=['feature_name', 'shap_importance'])
 .sort_values(by='shap_importance', ascending=False)
 .reset_index(drop=True)
)

df_imp_shallow = \
(pd.DataFrame(list(zip(X.columns, rfc_shallow.feature_importances_)),
              columns=['feature_name', 'shap_importance'])
 .sort_values(by='shap_importance', ascending=False)
 .reset_index(drop=True)
)

df_imp_shallow_deep = \
df_imp_shallow.merge(df_imp_deep,
                     on='feature_name',
                     suffixes = ('_deep', '_shallow')
                    )

df_imp_shallow_deep

Unnamed: 0,feature_name,shap_importance_deep,shap_importance_shallow
0,column_2,0.28352,0.278748
1,column_3,0.22381,0.20115
2,column_4,0.090527,0.092612
3,column_1,0.085596,0.085144
4,column_10,0.02514,0.028244
5,column_6,0.024907,0.025434
6,column_9,0.02486,0.022617
7,column_13,0.021553,0.020537
8,column_14,0.020954,0.024815
9,column_7,0.020306,0.021798


# It's close, but different enough to lead to different sorting!

In [4]:
(np.array(df_imp_shallow
  .sort_values(by='shap_importance')
  .feature_name
  .to_list())
,
 np.array(df_imp_deep
  .sort_values(by='shap_importance')
  .feature_name
  .to_list()))

(array(['column_5', 'column_20', 'column_18', 'column_19', 'column_16',
        'column_8', 'column_17', 'column_12', 'column_11', 'column_15',
        'column_7', 'column_14', 'column_13', 'column_9', 'column_6',
        'column_10', 'column_1', 'column_4', 'column_3', 'column_2'],
       dtype='<U9'),
 array(['column_20', 'column_18', 'column_16', 'column_5', 'column_17',
        'column_19', 'column_13', 'column_8', 'column_7', 'column_12',
        'column_15', 'column_9', 'column_11', 'column_14', 'column_6',
        'column_10', 'column_1', 'column_4', 'column_3', 'column_2'],
       dtype='<U9'))