In [30]:
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR

import xgboost as xgb
import numpy as np

from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error


In [53]:
def handle_XGBoost(X_train, y_train):
    # alpha values from 0.01 to 1000 up to 10 values log scale
    alphalist = np.logspace(0, 2, 10, endpoint=True)
    
    error_dict = {}
    feature_dict = {}

    # cross-validate on alpha (regularization strenght) from alphalist
    for i in alphalist:
        # print(i)
        bst = xgb.sklearn.XGBRegressor(reg_alpha=i)
        
        # cross-validate on num of features selected using RFE
        selector = RFECV(bst, step=1, cv=5)
        selector = selector.fit(X_train, y_train)
        selector.support_ 
        
        # selected feature index
        feature = []
        for j in range(len(selector.support_)):
            if selector.support_[j]:
                feature.append(j)
        feature_dict[i] = feature
                
        # cross_val_score on the underlying estimator array of scores
        score = cross_val_score(selector.estimator_, X_train[:,feature], y_train, 
                                cv=5, scoring='neg_mean_absolute_error').mean()
        error_dict[i] = -score

    opt_a = min(error_dict, key= error_dict.get)
    # print(error_dict)
    opt_feature = feature_dict[opt_a]

    print("alpha (lambda) from CV: {}".format(opt_a))
    print("size of opt_feature from CV:", len(opt_feature))

    # obtain train MAE with CV'ed alpha value
    bst = xgb.sklearn.XGBRegressor(reg_alpha=opt_a)
    bst.fit(X_train[:,opt_feature], y_train)
    y_pred = bst.predict(X_train[:,opt_feature])
    print("Train MAE: {:.6}".format(mean_absolute_error(y_train, y_pred)))
    
    return bst

In [54]:
bst = xgb.sklearn.XGBRegressor(reg_alpha=1)

X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
selector = RFECV(bst, step=1, cv=5)
selector = selector.fit(X, y)
selector.support_ 

new_bst = handle_XGBoost(X, y)

alpha (lambda) from CV: 1.6681005372000588
size of opt_feature from CV: 6
Train MAE: 0.220982
