# MDI+: Example Usages

In [1]:
%reload_ext autoreload
%autoreload 2
import sys
sys.path.append("../")
sys.path.append("../../")

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score, roc_auc_score, mean_squared_error

from imodels.importance import RandomForestPlusRegressor, RandomForestPlusClassifier, \
    RidgeRegressorPPM, LassoRegressorPPM, IdentityTransformer
from imodels.importance.rf_plus import _fast_r2_score

In [2]:
def neg_mae(y_true, y_pred, **kwargs):
    """
    Evaluates negative mean absolute error
    """
    return -mean_absolute_error(y_true, y_pred, **kwargs)


def neg_log_loss(y_true, y_pred, epsilon=1e-15):
    """
    Evaluates negative log-loss
    """
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Clip predicted probabilities to avoid extreme values
    return np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

In [3]:
# helper variables
rf_regressor = RandomForestRegressor(n_estimators=100, min_samples_leaf=5, max_features=0.33, random_state=331)
rf_classifier = RandomForestClassifier(n_estimators=100, min_samples_leaf=1, max_features="sqrt", random_state=331)

## 1. Regression Example

In [4]:
# generate data from linear model: y = x1 + x2 + N(0, 1)
n = 200
p = 10
s = 2
X = np.random.normal(size=(n, p))
# center and scale X
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
beta = np.concatenate((np.ones(s), np.zeros(p-s)))
y = np.matmul(X, beta) + np.random.normal(size=n)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=12345)
# center and scale X_train and X_test
X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)

### 1.1 MDI+ with default settings for regression

In [5]:
# fit RF+
rf_plus_model = RandomForestPlusRegressor(rf_model=rf_regressor, include_raw=False)
rf_plus_model.fit(X_train, y_train)

In [6]:
# make predictions with RF+
preds = rf_plus_model.predict(X_test)
r2_score(y_test, preds)

0.40242828284053744

In [7]:
# get MDI+ scores (higher r^2 value = greater importance)
mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X_train, y_train, lfi = True)

In [8]:
mdi_plus_scores["global"]

Unnamed: 0,var,importance
0,0,0.160821
1,1,0.137709
2,2,0.003746
3,3,0.002064
4,4,-0.003895
5,5,0.002333
6,6,0.010807
7,7,-0.003076
8,8,0.00104
9,9,-0.002691


In [9]:
pd.DataFrame(mdi_plus_scores["lfi"])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.347431,0.656422,0.091236,0.126617,0.064319,0.085751,0.143623,0.056039,0.075497,0.074397
1,0.422863,0.683193,0.231352,0.067656,0.054619,0.069480,0.121610,0.042558,0.056922,0.090364
2,0.375842,0.608113,0.119677,0.107915,0.076017,0.133919,0.113840,0.047253,0.071883,0.095492
3,0.667774,0.431562,0.148454,0.053492,0.069511,0.084041,0.129177,0.027858,0.081832,0.061674
4,1.037731,0.412402,0.042545,0.056888,0.047786,0.082329,0.180629,0.041052,0.071779,0.067571
...,...,...,...,...,...,...,...,...,...,...
129,0.338038,0.504424,0.125996,0.068368,0.101320,0.125005,0.120638,0.035960,0.062877,0.146211
130,0.433299,0.455608,0.081813,0.070660,0.046324,0.142913,0.143550,0.047924,0.089665,0.106585
131,0.530148,0.458888,0.057952,0.254663,0.039336,0.065577,0.132535,0.051792,0.057985,0.090875
132,0.427297,0.628819,0.101507,0.078907,0.041686,0.073799,0.113929,0.051301,0.176591,0.098375


#### 1.1.1. Local Feature Importances

In [10]:
# get global MDI+ scores using default scoring function (i.e., R^2 for regression so higher = greater importance)
# and local MDI+ scores using mean squared error (so lower = greater importance)
mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(
    X_train, y_train, local_scoring_fns=mean_squared_error, version = "all", lfi=True
)

In [11]:
mdi_plus_scores["global"].sort_values("importance", ascending=False)

Unnamed: 0,var,importance
0,0,0.160821
1,1,0.137709
6,6,0.010807
2,2,0.003746
5,5,0.002333
3,3,0.002064
8,8,0.00104
9,9,-0.002691
7,7,-0.003076
4,4,-0.003895


In [12]:
mdi_plus_scores["local"]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,5.892492,2.788309,5.360317,4.446785,4.728104,4.690573,4.543298,4.769549,4.829387,4.703560
1,2.335183,0.450097,0.841223,1.097633,1.192961,1.058881,0.926622,1.054718,1.064826,1.289984
2,1.612797,0.419604,1.373187,0.922712,0.960667,1.218743,0.909598,1.086224,0.975724,0.952273
3,1.400665,1.591003,2.314167,2.537099,2.650273,2.442327,2.244768,2.477664,2.522605,2.505772
4,3.620706,9.193288,7.790216,7.720937,7.675604,7.259464,8.317204,7.445131,7.556932,7.629363
...,...,...,...,...,...,...,...,...,...,...
129,1.353738,3.133899,1.459657,1.681560,1.533709,1.574633,1.991049,1.689754,1.777915,1.472297
130,0.250970,0.983362,0.455219,0.538388,0.536939,0.521610,0.637389,0.553528,0.601830,0.499272
131,4.423719,4.710212,6.225040,5.465345,6.246408,6.503521,6.846845,6.316022,6.400983,6.328303
132,3.851156,3.171343,5.783894,5.386039,5.370475,5.571545,5.576345,5.343736,4.776978,5.022356


In [13]:
pd.DataFrame(mdi_plus_scores["lfi"])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.347431,0.656422,0.091236,0.126617,0.064319,0.085751,0.143623,0.056039,0.075497,0.074397
1,0.422863,0.683193,0.231352,0.067656,0.054619,0.069480,0.121610,0.042558,0.056922,0.090364
2,0.375842,0.608113,0.119677,0.107915,0.076017,0.133919,0.113840,0.047253,0.071883,0.095492
3,0.667774,0.431562,0.148454,0.053492,0.069511,0.084041,0.129177,0.027858,0.081832,0.061674
4,1.037731,0.412402,0.042545,0.056888,0.047786,0.082329,0.180629,0.041052,0.071779,0.067571
...,...,...,...,...,...,...,...,...,...,...
129,0.338038,0.504424,0.125996,0.068368,0.101320,0.125005,0.120638,0.035960,0.062877,0.146211
130,0.433299,0.455608,0.081813,0.070660,0.046324,0.142913,0.143550,0.047924,0.089665,0.106585
131,0.530148,0.458888,0.057952,0.254663,0.039336,0.065577,0.132535,0.051792,0.057985,0.090875
132,0.427297,0.628819,0.101507,0.078907,0.041686,0.073799,0.113929,0.051301,0.176591,0.098375


In [14]:
# get global MDI+ scores using default scoring function (i.e., R^2 for regression so higher = greater importance)
# and local MDI+ scores using mean squared error (so lower = greater importance)
mdi_plus_scores_zach = rf_plus_model.get_mdi_plus_scores(
    X_train, y_train, local_scoring_fns=mean_squared_error, version = "sub"
)

In [15]:
mdi_plus_scores_zach["global"].sort_values("importance", ascending=False)

Unnamed: 0,var,importance
0,0,0.164535
1,1,0.14158
6,6,0.011097
2,2,0.003947
5,5,0.001953
3,3,0.001624
8,8,0.001046
9,9,-0.002618
7,7,-0.003149
4,4,-0.003651


In [16]:
mdi_plus_scores_zach["local"]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,5.900707,2.765728,5.356417,4.452950,4.729282,4.694918,4.535507,4.774042,4.829643,4.698227
1,2.335874,0.443189,0.841743,1.101807,1.194513,1.061911,0.922959,1.055296,1.064395,1.289640
2,1.618978,0.407810,1.373051,0.924772,0.960655,1.222726,0.908976,1.087722,0.974720,0.950162
3,1.410923,1.552239,2.314816,2.544996,2.646510,2.447284,2.237000,2.478118,2.520984,2.503571
4,3.650020,9.143536,7.789708,7.730090,7.674412,7.259317,8.305479,7.446832,7.546663,7.623937
...,...,...,...,...,...,...,...,...,...,...
129,1.330448,3.132856,1.459676,1.677516,1.534367,1.572059,1.995592,1.688778,1.779127,1.472487
130,0.242708,0.979798,0.456749,0.535494,0.537425,0.521223,0.638533,0.553581,0.601282,0.499177
131,4.368340,4.719194,6.222518,5.459919,6.248424,6.506755,6.857707,6.317306,6.406852,6.330970
132,3.805184,3.176985,5.781991,5.381202,5.365851,5.564432,5.584107,5.344798,4.777231,5.027122


### 1.2 MDI+ with custom partial prediction model and evaluation metric(s)

In [None]:
# fit RF+ with custom partial prediction model
rf_plus_model = RandomForestPlusRegressor(rf_model=rf_regressor, prediction_model=LassoRegressorPPM())
rf_plus_model.fit(X_train, y_train)

In [None]:
# get MDI+ scores with custom evaluation metrics/scorers
mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X_train, y_train, scoring_fns={"r2_score": _fast_r2_score, "negative_mae": neg_mae})
mdi_plus_scores.sort_values("r2_score", ascending=False)

Unnamed: 0,var,r2_score,negative_mae
1,1,0.355008,-1.072944
0,0,0.344606,-1.068841
8,8,0.001796,-1.300843
5,5,0.000643,-1.302655
9,9,-0.000328,-1.304342
7,7,-0.001216,-1.305684
6,6,-0.001977,-1.305188
4,4,-0.002845,-1.305564
2,2,-0.003521,-1.306609
3,3,-0.003754,-1.306165


#### 1.2.1. Local Feature Importances

In [None]:
# get global and local MDI+ scores with custom evaluation metrics/scorers
mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(
    X_train, y_train,
    scoring_fns={"r2_score": _fast_r2_score, "negative_mae": neg_mae},
    local_scoring_fns=True
)

  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and
  Evaluates the negative log-loss between the observed and


KeyboardInterrupt: 

In [None]:
mdi_plus_scores["global"].sort_values("negative_mae", ascending=False)

Unnamed: 0,var,r2_score,negative_mae
0,0,0.232921,-1.082321
1,1,0.28204,-1.08452
4,4,0.001701,-1.225607
2,2,-0.003384,-1.226898
5,5,-0.000294,-1.228226
6,6,-0.002832,-1.228636
3,3,-0.003271,-1.229609
8,8,-0.000396,-1.229755
7,7,-0.004269,-1.230301
9,9,-0.000468,-1.231957


In [None]:
mdi_plus_scores["local"]["negative_mae"]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.393579,-1.214633,-1.084519,-1.091143,-1.034764,-1.090147,-1.076047,-1.091932,-1.085082,-1.139749
1,-0.790884,-1.749695,-0.482110,-0.508377,-0.472782,-0.501301,-0.496919,-0.498205,-0.526730,-0.495575
2,-0.677236,-1.009102,-0.535736,-0.532358,-0.512320,-0.539106,-0.533261,-0.511866,-0.495965,-0.421820
3,-0.261727,-1.919838,-1.528549,-1.516499,-1.457200,-1.538888,-1.487078,-1.526788,-1.539444,-1.496135
4,-0.330837,-1.417515,-0.999925,-1.015754,-0.971787,-1.027616,-1.019009,-1.025287,-0.968227,-0.989040
...,...,...,...,...,...,...,...,...,...,...
129,-0.386316,-0.068927,-0.496897,-0.500035,-0.484733,-0.487473,-0.472289,-0.483606,-0.517238,-0.486323
130,-2.806146,-2.685375,-3.714496,-3.713738,-3.728909,-3.727175,-3.714673,-3.715684,-3.727564,-3.715268
131,-0.580311,-0.179154,-0.047248,-0.043142,-0.050008,-0.059487,-0.037682,-0.035503,-0.061670,-0.142827
132,-1.677650,-1.418440,-1.008235,-0.992973,-1.006555,-1.018746,-1.007073,-1.012703,-1.038724,-1.000363


### 1.3 MDI+ with custom transformer

The example below is equivalent to running RF+ with `include_raw=True`

In [None]:
# fit RF+ with custom transformer
rf_plus_model = RandomForestPlusRegressor(rf_model=rf_regressor, include_raw=False, add_transformers=[IdentityTransformer()])
rf_plus_model.fit(X_train, y_train)

In [None]:
# get MDI+ scores
mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X_train, y_train)
mdi_plus_scores.sort_values("importance", ascending=False)

LFI MATRIX
            0         1         2         3         4         5         6  \
0    0.689113  0.133543  0.035881  0.027716  0.112682  0.037896  0.051118   
1    1.209122  1.219596  0.042923  0.068320  0.043585  0.037152  0.026171   
2    1.127865  0.445313  0.043397  0.051979  0.039214  0.035620  0.044617   
3    1.229295  0.396856  0.026889  0.033222  0.126957  0.053641  0.096983   
4    0.681569  0.357366  0.057780  0.031690  0.072309  0.052541  0.040252   
..        ...       ...       ...       ...       ...       ...       ...   
129  0.124054  0.433910  0.048079  0.039582  0.039842  0.048600  0.062956   
130  0.936911  1.038180  0.019049  0.056592  0.028408  0.024814  0.126952   
131  0.578721  0.131963  0.055507  0.034949  0.064628  0.053566  0.043184   
132  0.617103  0.328494  0.031089  0.076533  0.037318  0.034234  0.069337   
133  0.745796  0.594324  0.051269  0.060806  0.105050  0.038488  0.035610   

            7         8         9  
0    0.019435  0.067701  0.0

Unnamed: 0,var,importance
1,1,0.287724
0,0,0.238051
4,4,0.004065
9,9,0.001857
8,8,-0.000231
5,5,-0.000524
3,3,-0.003818
6,6,-0.004103
7,7,-0.006076
2,2,-0.00662


### 1.4 Choosing the GLM and scoring metric via stability score

There are many choices of GLMs and scoring metrics that can be made within the MDI+ framework.

One way to select the GLM and scoring metric in MDI+ is by evaluating the stability of the feature importances/rankings for each choice of GLM/metric and taking the GLM/metric that is the most stable across different bootstrap samples of trees. For example, we can take the GLM and metric with the highest stability score, as measured by RBO below.

In [None]:
n_bootstraps = 25
prediction_models = {"ridge": RidgeRegressorPPM(), "lasso": LassoRegressorPPM()}
scoring_fns = {"r2": _fast_r2_score, "neg_mae": neg_mae}
stability_dict = {}
for model_name, prediction_model in prediction_models.items():
    # fit RF+
    rf_plus_model = RandomForestPlusRegressor(rf_model=rf_regressor, prediction_model=prediction_model)
    rf_plus_model.fit(X_train, y_train)
    # get MDI+ scores
    mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X_train, y_train, scoring_fns=scoring_fns)
    # get MDI+ stability scores
    mdi_plus_stability_scores = rf_plus_model.get_mdi_plus_stability_scores(B=n_bootstraps)
    stability_dict[model_name] = mdi_plus_stability_scores

LFI MATRIX
            0         1         2         3         4         5         6  \
0    0.693001  0.138618  0.037185  0.031931  0.108767  0.044038  0.059631   
1    1.218989  1.228197  0.046060  0.075842  0.045172  0.045686  0.031000   
2    1.136659  0.448015  0.045802  0.059951  0.043084  0.044175  0.052556   
3    1.233886  0.405712  0.032656  0.039352  0.128188  0.058558  0.103594   
4    0.685681  0.359112  0.059503  0.037074  0.077003  0.058771  0.047674   
..        ...       ...       ...       ...       ...       ...       ...   
129  0.133084  0.435573  0.053019  0.046578  0.044339  0.055068  0.072806   
130  0.951078  1.047434  0.021512  0.062685  0.030651  0.029721  0.139449   
131  0.579655  0.133289  0.058194  0.040044  0.068581  0.062869  0.048708   
132  0.620683  0.327788  0.038446  0.086254  0.039834  0.040365  0.079147   
133  0.748782  0.593470  0.056272  0.066204  0.107013  0.044215  0.041659   

            7         8         9  
0    0.023996  0.072911  0.0

In [None]:
pd.concat(stability_dict, axis=0).reset_index().rename(columns={"level_0": "ppm"}).drop(columns=["level_1"]).sort_values("RBO", ascending=False)

Unnamed: 0,ppm,scorer,RBO,tauAP
0,ridge,r2,0.927324,0.88495
1,ridge,neg_mae,0.891125,0.789819
2,lasso,r2,0.887515,0.833474
3,lasso,neg_mae,0.886008,0.794456


### 1.5 Aggregating multiple MDI+ rankings in an ensemble

Instead of choosing a single GLM and metric to use in MDI+, it may be preferable in some cases to aggregate MDI+ feature importances/rankings across multiple choices of GLMs and metrics.

One naive method for doing this ensembling is to take the median rank across each choice of GLM and metric (as shown below). However, more creative ensembling schemes can also be explored.

In [None]:
prediction_models = {"ridge": RidgeRegressorPPM(), "lasso": LassoRegressorPPM()}
scoring_fns = {"r2": _fast_r2_score, "neg_mae": neg_mae}
mdi_plus_scores_dict = {}
for model_name, prediction_model in prediction_models.items():
    # fit RF+
    rf_plus_model = RandomForestPlusRegressor(rf_model=rf_regressor, prediction_model=prediction_model)
    rf_plus_model.fit(X_train, y_train)
    # get MDI+ scores
    mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X_train, y_train, scoring_fns=scoring_fns)
    for col in mdi_plus_scores.columns:
        if col != "var":
            mdi_plus_scores = mdi_plus_scores.rename(columns={col: model_name + "_" + col})
    mdi_plus_scores_dict[model_name] = mdi_plus_scores

LFI MATRIX
            0         1         2         3         4         5         6  \
0    0.693001  0.138618  0.037185  0.031931  0.108767  0.044038  0.059631   
1    1.218989  1.228197  0.046060  0.075842  0.045172  0.045686  0.031000   
2    1.136659  0.448015  0.045802  0.059951  0.043084  0.044175  0.052556   
3    1.233886  0.405712  0.032656  0.039352  0.128188  0.058558  0.103594   
4    0.685681  0.359112  0.059503  0.037074  0.077003  0.058771  0.047674   
..        ...       ...       ...       ...       ...       ...       ...   
129  0.133084  0.435573  0.053019  0.046578  0.044339  0.055068  0.072806   
130  0.951078  1.047434  0.021512  0.062685  0.030651  0.029721  0.139449   
131  0.579655  0.133289  0.058194  0.040044  0.068581  0.062869  0.048708   
132  0.620683  0.327788  0.038446  0.086254  0.039834  0.040365  0.079147   
133  0.748782  0.593470  0.056272  0.066204  0.107013  0.044215  0.041659   

            7         8         9  
0    0.023996  0.072911  0.0

In [None]:
mdi_plus_scores_df = pd.concat([df.set_index('var') for df in mdi_plus_scores_dict.values()], axis=1)
mdi_plus_ranks_df = mdi_plus_scores_df.rank(ascending=False).median(axis=1)
mdi_plus_ranks_df = pd.DataFrame(mdi_plus_ranks_df, columns=["median_rank"]).reset_index()
mdi_plus_ranks_df.sort_values("median_rank")

Unnamed: 0,var,median_rank
1,1,1.0
0,0,2.0
4,4,3.0
5,5,5.0
2,2,6.5
6,6,6.5
8,8,7.0
3,3,7.5
9,9,8.0
7,7,9.0


## 2. Classification Example

In [None]:
# generate data from logistic model: logit(E[Y|X]) = x1 + x2
n = 200
p = 10
s = 2
X = np.random.normal(size=(n, p))
beta = np.concatenate((np.ones(s), np.zeros(p-s)))
probs = 1 / (1 + np.exp(-np.matmul(X, beta)))
y = (np.random.uniform(size=n) < probs) * 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=12345)

### 2.1 MDI+ with default classification settings

In [None]:
# fit RF+
rf_plus_model = RandomForestPlusClassifier(rf_model=rf_classifier)
rf_plus_model.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
# make predictions with RF+
preds = rf_plus_model.predict(X_test)
prob_preds = rf_plus_model.predict_proba(X_test)
accuracy_score(y_test, preds), roc_auc_score(y_test, prob_preds[:, 1])

(0.7424242424242424, 0.7640552995391706)

In [None]:
# get MDI+ scores (higher ngative log-loss value = greater importance)
mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X_train, y_train)
mdi_plus_scores.sort_values("importance", ascending=False)

num iterations: 100
in loop
blocked data shape: (134, 34)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 29)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 35)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 33)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 31)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shap

Unnamed: 0,var,importance
0,0,-0.616593
1,1,-0.665463
2,2,-0.680308
9,9,-0.688854
4,4,-0.693649
8,8,-0.693752
5,5,-0.695553
3,3,-0.697111
7,7,-0.697527
6,6,-0.699142


#### 2.1.1. Local Feature Importances

In [None]:
# get MDI+ scores (higher ngative log-loss value = greater importance)
mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(
    X_train, y_train, local_scoring_fns=neg_log_loss
)

num iterations: 100
in loop
blocked data shape: (134, 34)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 29)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 35)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 33)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
in loop
blocked data shape: (134, 31)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shape (134,)
lfi shap

In [None]:
mdi_plus_scores["global"].sort_values("importance", ascending=False)

Unnamed: 0,var,importance
0,0,-0.616593
1,1,-0.665463
2,2,-0.680308
9,9,-0.688854
4,4,-0.693649
8,8,-0.693752
5,5,-0.695553
3,3,-0.697111
7,7,-0.697527
6,6,-0.699142


In [None]:
mdi_plus_scores["local"]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.406029,-1.103065,-0.677194,-0.805959,-0.871749,-0.807468,-0.813090,-0.830206,-0.802054,-0.693133
1,-0.436759,-0.567771,-0.681448,-0.848359,-0.763372,-0.795017,-0.789513,-0.840240,-0.803859,-0.810027
2,-0.515824,-0.426828,-0.497530,-0.590758,-0.589239,-0.599080,-0.643801,-0.592818,-0.599137,-0.611461
3,-1.282723,-0.509471,-0.733439,-0.805253,-0.860139,-0.822921,-0.791343,-0.804302,-0.746693,-0.805781
4,-0.280789,-0.826833,-0.609911,-0.613720,-0.601011,-0.640249,-0.760580,-0.616854,-0.542901,-0.517158
...,...,...,...,...,...,...,...,...,...,...
129,-1.321501,-0.455961,-0.767508,-0.829361,-0.789773,-0.713934,-0.841510,-0.829062,-0.768522,-0.812111
130,-0.373533,-0.481636,-0.637352,-0.572210,-0.537389,-0.573758,-0.597916,-0.573012,-0.545420,-0.625384
131,-0.412565,-0.338685,-0.541747,-0.570623,-0.602924,-0.568087,-0.585289,-0.585849,-0.577893,-0.506107
132,-0.230500,-0.688026,-0.630211,-0.604885,-0.791419,-0.584004,-0.681644,-0.586002,-0.517218,-0.614945


Note: `neg_log_loss()` was re-defined here since the default sklearn `log_loss()` metric requires vector inputs and thus cannot be directly used as a local scoring function.