In [18]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.inspection import permutation_importance
import numpy as np
import pandas as pd
diabetes = load_diabetes()
X_train, X_val, y_train, y_val = train_test_split(
    diabetes.data, diabetes.target, random_state=0)

model = Ridge(alpha=1e-2).fit(X_train, y_train)
model.score(X_val, y_val)

0.35666062386954556

In [20]:
feat_col_list = diabetes.feature_names
feat_col_list

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [31]:
scoring = ['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error']
r_multi = permutation_importance(
    model, X_val, y_val, n_repeats=30, random_state=0, scoring=scoring)

r_list = []

k_fold_i = 0
for metric in r_multi:
    print(f"{metric}")

    col_list = ["metric", "measure", "k_fold_i"] + feat_col_list

    r_list.extend([
        [metric, "mean", k_fold_i] + list(r_multi[metric].importances_mean),
        [metric, "std", k_fold_i] + list(r_multi[metric].importances_std),
    ])



    # r = r_multi[metric]
    # for i in r.importances_mean.argsort()[::-1]:
    #     if r.importances_mean[i] - 2 * r.importances_std[i] > 0:
    #         print(f"    {diabetes.feature_names[i]:<8}"
    #               f"{r.importances_mean[i]:.3f}"
    #               f" +/- {r.importances_std[i]:.3f}")


r2
    s5      0.204 +/- 0.050
    bmi     0.176 +/- 0.048
    bp      0.088 +/- 0.033
    sex     0.056 +/- 0.023
neg_mean_absolute_percentage_error
    s5      0.081 +/- 0.020
    bmi     0.064 +/- 0.015
    bp      0.029 +/- 0.010
neg_mean_squared_error
    s5      1013.903 +/- 246.460
    bmi     872.694 +/- 240.296
    bp      438.681 +/- 163.025
    sex     277.382 +/- 115.126


In [32]:
df = pd.DataFrame(df_list, columns=col_list)
df

Unnamed: 0,metric,measure,k_fold_i,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,r2,mean,0,-0.001993,0.055874,0.17579,0.088365,0.042211,0.002036,0.002038,0.003187,0.204234,0.002787
1,r2,std,0,0.003647,0.02319,0.048404,0.032839,0.031413,0.002622,0.013093,0.008402,0.049645,0.003158
2,neg_mean_absolute_percentage_error,mean,0,-0.001143,0.013456,0.0638,0.029115,0.013455,7.5e-05,0.005368,0.00222,0.080886,0.001165
3,neg_mean_absolute_percentage_error,std,0,0.001463,0.007807,0.014821,0.010031,0.011302,0.000947,0.006004,0.004014,0.020069,0.001074
4,neg_mean_squared_error,mean,0,-9.892416,277.381974,872.694277,438.681037,209.554527,10.108816,10.115175,15.821355,1013.902651,13.834977
5,neg_mean_squared_error,std,0,18.1033,115.126095,240.296323,163.024802,155.945284,13.016553,65.000803,41.711877,246.460266,15.678956


In [6]:
r_multi.keys()

dict_keys(['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error'])

In [9]:
r_multi['r2'].importances_mean

array([-0.00199267,  0.05587407,  0.17579   ,  0.08836513,  0.04221134,
        0.00203626,  0.00203754,  0.00318695,  0.20423412,  0.00278683])

In [10]:
r_multi['r2'].importances_std

array([0.00364661, 0.02319027, 0.04840377, 0.03283868, 0.03141263,
       0.00262197, 0.01309335, 0.00840218, 0.04964539, 0.00315827])

In [12]:
r_multi['r2'].importances.shape

(10, 30)

In [13]:
r_multi['r2'].importances[0]

array([-0.00409598,  0.00217375, -0.00166394, -0.00205083,  0.00489654,
        0.00356133,  0.00322849,  0.00124552, -0.00191488, -0.00491859,
        0.00060739, -0.0064221 , -0.00343046, -0.00195777,  0.00318553,
       -0.00734629, -0.00028103, -0.00121117, -0.00396204, -0.00066587,
       -0.0106756 , -0.00322609, -0.00508704, -0.00553144,  0.00234617,
       -0.00297843, -0.0083366 , -0.00151655, -0.00192957, -0.00182241])

In [15]:
np.mean(r_multi['r2'].importances[0])

-0.001992665521383663

In [16]:
np.std(r_multi['r2'].importances[0])

0.0036466139533947598