## One-Hot Encoding + PCA Data Comparison

- 시계열 데이터를 one-hot encoding
- PCA 피처를 추가

In [4]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# 데이터 로드
train = pd.read_csv('./train.csv')

# yymm 컬럼을 날짜 형식으로 변환 (연도는 임의로 설정)
train['yymm'] = pd.to_datetime('2024' + train['yymm'], format='%Y%m%d %H:%M')

# day, hour, minute, weekday 컬럼 생성
day = train['yymm'].dt.day
day_dummies = pd.get_dummies(day, prefix='day')

hour = train['yymm'].dt.hour
hour_dummies = pd.get_dummies(hour, prefix='hour')

minute = train['yymm'].dt.minute
minute_dummies = pd.get_dummies(minute, prefix='minute')

# weekday 컬럼 생성
weekday = day % 7
weekday = weekday.map({0:'Mon', 1:'Tue', 2:'Wed', 3:'Thu', 4:'Fri', 5:'Sat', 6:'Sun'})
weekday_dummies = pd.get_dummies(weekday)

# PCA 피처 생성
features = train.loc[:, 'V1':'V26'] # V1 ~ V26 컬럼 선택

scaler = StandardScaler()
features = scaler.fit_transform(features) # 피처 표준화

pca = PCA(n_components=3)
pca_features = pca.fit_transform(features) # PCA 피처 생성

# 생성된 주성분을 DataFrame으로 변환
pca_columns = ['PCA1', 'PCA2', 'PCA3']
pca_df = pd.DataFrame(pca_features, columns=pca_columns)

# 원본 데이터와 PCA 피처 결합
train = pd.concat([train, day_dummies, hour_dummies, minute_dummies, weekday_dummies, pca_df], axis=1)

# yymm 컬럼 삭제
train.drop('yymm', axis=1, inplace=True)

# 결과 출력
train.head(10)

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,Fri,Mon,Sat,Sun,Thu,Tue,Wed,PCA1,PCA2,PCA3
0,-5.327,12.25,-3.294,-7.855,-1.196,13.824,-10.249,-3.04,-5.17,8.077,...,False,False,False,False,False,True,False,-2.616525,-2.341176,-3.016174
1,-5.267,12.916,-3.22,-7.788,-1.196,14.424,-10.249,-3.04,-4.97,8.027,...,False,False,False,False,False,True,False,-2.691167,-2.187795,-3.000616
2,-5.127,13.583,-3.13,-7.658,-1.196,15.081,-10.359,-3.04,-4.83,7.977,...,False,False,False,False,False,True,False,-2.724164,-2.076864,-2.954282
3,-5.06,14.25,-3.13,-7.532,-1.196,14.961,-10.359,-3.04,-4.83,7.927,...,False,False,False,False,False,True,False,-2.744811,-2.076954,-2.864929
4,-4.967,14.916,-3.094,-7.462,-1.196,15.454,-10.359,-3.04,-4.97,7.877,...,False,False,False,False,False,True,False,-2.827777,-1.960836,-2.942832
5,-4.967,15.583,-3.02,-7.388,-1.196,15.284,-10.419,-3.04,-4.86,7.827,...,False,False,False,False,False,True,False,-2.914471,-1.90683,-2.889545
6,-4.827,16.25,-2.92,-7.288,-1.196,15.351,-10.449,-3.04,-4.933,7.777,...,False,False,False,False,False,True,False,-3.008483,-1.854571,-2.893859
7,-4.797,16.25,-2.92,-7.222,-1.196,14.188,-10.516,-3.04,-4.86,7.727,...,False,False,False,False,False,True,False,-3.088857,-1.917965,-2.752764
8,-4.737,16.25,-2.83,-7.188,-1.196,14.048,-10.659,-3.04,-4.933,7.677,...,False,False,False,False,False,True,False,-3.212438,-1.820224,-2.834904
9,-4.9,16.25,-2.89,-7.188,-1.196,14.014,-10.659,-3.04,-4.43,7.627,...,False,False,False,False,False,True,False,-3.306377,-1.653918,-2.872736


## Base Model MAE

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)    # Target을 제외한 모든 컬럼을 X로 지정
y = train['Target']                 # Target 컬럼을 y로 지정

# train, test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'SVR': SVR(),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'XGBoost': XGBRegressor(random_state=42),
    'LightGBM': LGBMRegressor(random_state=42, verbose=-1),
    'Decision Tree': DecisionTreeRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# 각 모델에 대해 학습 및 5-fold 교차검증 수행
for model_name, model in models.items():
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring=mae_scorer)
    print(f'{model_name}: {scores.mean()}')

Linear Regression: 12.702331197553068
Ridge: 12.698160106760502
Lasso: 12.580224346550702
ElasticNet: 12.582955143638385
SVR: 12.56434179957121
Gradient Boosting: 12.695622332027604
Random Forest: 13.208968772774359
XGBoost: 14.024569050620073
LightGBM: 13.254466529819197
Decision Tree: 16.934163402564103


## Feature Selection MAE

### Filter Method

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)  # Target 컬럼 제외
y = train['Target']  # Target 컬럼

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'SVR': SVR(),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# SelectKBest로 K-최고 특성 선택
test = SelectKBest(score_func=f_regression, k=X.shape[1])
fit = test.fit(X, y)

# 선택된 특성들의 인덱스를 내림차순으로 정렬
sorted_columns = np.argsort(fit.scores_)[::-1]

# 각 모델에 대해 최적의 특성 선택
for model_name, model in models.items():

    # 최적의 특성을 찾기 위한 변수 초기화
    best_score = float('inf')
    best_features = []

    # 최적의 특성 선택
    for i in range(1, X.shape[1] + 1):
        # 선택된 feature들의 인덱스
        fs = sorted_columns[:i]

        # 선택된 feature만 선택 (Pandas DataFrame에서 iloc 사용)
        X_selected = X.iloc[:, fs]
        
        # 선택된 feature들의 이름
        selected_feature_names = X.columns[fs].tolist()
        
        # 교차 검증
        mae = cross_val_score(model, X_selected, y, cv=5, scoring=mae_scorer).mean()

        # 가장 성능이 좋은 MAE 및 feature를 저장
        if mae < best_score:
            best_score = mae
            best_features = selected_feature_names
    
    # 결과 출력
    print(f'{model_name} best score: {best_score}, num_features: {len(best_features)}, best features: {best_features}')

Linear Regression best score: 12.491249180919453, num_features: 26, best features: ['hour_17', 'day_28', 'day_29', 'V7', 'day_20', 'day_22', 'day_3', 'hour_14', 'minute_10', 'Mon', 'day_30', 'hour_12', 'V17', 'hour_22', 'day_24', 'Wed', 'hour_11', 'V10', 'hour_7', 'day_21', 'day_16', 'day_4', 'V4', 'hour_23', 'minute_30', 'hour_13']
Ridge best score: 12.490641913362998, num_features: 28, best features: ['hour_17', 'day_28', 'day_29', 'V7', 'day_20', 'day_22', 'day_3', 'hour_14', 'minute_10', 'Mon', 'day_30', 'hour_12', 'V17', 'hour_22', 'day_24', 'Wed', 'hour_11', 'V10', 'hour_7', 'day_21', 'day_16', 'day_4', 'V4', 'hour_23', 'minute_30', 'hour_13', 'hour_20', 'hour_16']
Lasso best score: 12.535082495159886, num_features: 23, best features: ['hour_17', 'day_28', 'day_29', 'V7', 'day_20', 'day_22', 'day_3', 'hour_14', 'minute_10', 'Mon', 'day_30', 'hour_12', 'V17', 'hour_22', 'day_24', 'Wed', 'hour_11', 'V10', 'hour_7', 'day_21', 'day_16', 'day_4', 'V4']
ElasticNet best score: 12.529968

### Forward Selection

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)  # Target 컬럼 제외
y = train['Target']  # Target 컬럼

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'SVR': SVR(),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# 각 모델에 대해 최적의 특성 선택
for model_name, model in models.items():
    
    # 최적의 특성을 찾기 위한 변수 초기화
    best_score = float('inf')
    best_features = []

    # 최적의 특성 선택
    sfs = SequentialFeatureSelector(model, n_features_to_select=5, direction='forward')
    fit = sfs.fit(X, y)

    # 선택된 피처
    fs = X.columns[fit.support_].tolist()

    # 선택된 feature 데이터프레임 생성
    X_selected = X.iloc[:, fit.get_support()]

    # 교차 검증
    mae = cross_val_score(model, X_selected, y, cv=5, scoring=mae_scorer).mean()

    # 결과 출력
    print(f'{model_name} best score: {mae}, best features: {fs}')


Linear Regression best score: 12.507142870053295, best features: ['V17', 'hour_11', 'hour_17', 'minute_10', 'PCA3']
Ridge best score: 12.50727753935644, best features: ['V17', 'hour_11', 'hour_17', 'minute_10', 'PCA3']
Lasso best score: 12.534578335004287, best features: ['V1', 'V3', 'V5', 'V10', 'V17']
ElasticNet best score: 12.528527107451348, best features: ['V1', 'V8', 'V10', 'V17', 'V25']
SVR best score: 12.522103727033684, best features: ['day_16', 'day_22', 'day_30', 'hour_11', 'hour_22']
Gradient Boosting best score: 12.498082945702834, best features: ['hour_5', 'hour_7', 'hour_11', 'hour_17', 'Sat']


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)  # Target 컬럼 제외
y = train['Target']  # Target 컬럼

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'SVR': SVR(),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# 각 모델에 대해 최적의 특성 선택
for model_name, model in models.items():
    
    # 최적의 특성을 찾기 위한 변수 초기화
    best_score = float('inf')
    best_features = []

    # 최적의 특성 선택
    sfs = SequentialFeatureSelector(model, n_features_to_select=10, direction='forward')
    fit = sfs.fit(X, y)

    # 선택된 피처
    fs = X.columns[fit.support_].tolist()

    # 선택된 feature 데이터프레임 생성
    X_selected = X.iloc[:, fit.get_support()]

    # 교차 검증
    mae = cross_val_score(model, X_selected, y, cv=5, scoring=mae_scorer).mean()

    # 결과 출력
    print(f'{model_name} best score: {mae}, best features: {fs}')


Linear Regression best score: 12.489278598979428, best features: ['V17', 'V26', 'hour_7', 'hour_11', 'hour_13', 'hour_17', 'hour_22', 'minute_10', 'Wed', 'PCA3']
Ridge best score: 12.489479345807295, best features: ['V17', 'V26', 'hour_7', 'hour_11', 'hour_13', 'hour_17', 'hour_22', 'minute_10', 'Wed', 'PCA3']
Lasso best score: 12.534578335004287, best features: ['V1', 'V3', 'V5', 'V8', 'V9', 'V10', 'V12', 'V13', 'V14', 'V17']
ElasticNet best score: 12.528527107451348, best features: ['V1', 'V3', 'V5', 'V8', 'V9', 'V10', 'V12', 'V13', 'V17', 'V25']
SVR best score: 12.47460979363962, best features: ['day_14', 'day_16', 'day_22', 'day_24', 'day_30', 'hour_11', 'hour_17', 'hour_18', 'hour_22', 'Mon']
Gradient Boosting best score: 12.474189750551032, best features: ['day_3', 'hour_5', 'hour_7', 'hour_11', 'hour_12', 'hour_13', 'hour_17', 'hour_19', 'Mon', 'Sat']


In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)  # Target 컬럼 제외
y = train['Target']  # Target 컬럼

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'SVR': SVR(),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# 각 모델에 대해 최적의 특성 선택
for model_name, model in models.items():
    
    # 최적의 특성을 찾기 위한 변수 초기화
    best_score = float('inf')
    best_features = []

    # 최적의 특성 선택
    sfs = SequentialFeatureSelector(model, n_features_to_select=15, direction='forward')
    fit = sfs.fit(X, y)

    # 선택된 피처
    fs = X.columns[fit.support_].tolist()

    # 선택된 feature 데이터프레임 생성
    X_selected = X.iloc[:, fit.get_support()]

    # 교차 검증
    mae = cross_val_score(model, X_selected, y, cv=5, scoring=mae_scorer).mean()

    # 결과 출력
    print(f'{model_name} best score: {mae}, best features: {fs}')


Linear Regression best score: 12.486743268065808, best features: ['V17', 'V26', 'day_13', 'day_18', 'day_24', 'day_30', 'hour_7', 'hour_11', 'hour_13', 'hour_17', 'hour_22', 'minute_10', 'minute_30', 'Wed', 'PCA3']
Ridge best score: 12.486972738196066, best features: ['V17', 'V26', 'day_13', 'day_18', 'day_24', 'day_30', 'hour_7', 'hour_11', 'hour_13', 'hour_17', 'hour_22', 'minute_10', 'minute_30', 'Wed', 'PCA3']
Lasso best score: 12.534578335004287, best features: ['V1', 'V3', 'V5', 'V8', 'V9', 'V10', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20']
ElasticNet best score: 12.528527107451348, best features: ['V1', 'V3', 'V5', 'V8', 'V9', 'V10', 'V12', 'V13', 'V15', 'V16', 'V17', 'V18', 'V19', 'V24', 'V25']
SVR best score: 12.459366779792328, best features: ['day_7', 'day_11', 'day_14', 'day_16', 'day_22', 'day_24', 'day_27', 'day_30', 'day_31', 'hour_10', 'hour_11', 'hour_17', 'hour_18', 'hour_22', 'Mon']
Gradient Boosting best score: 12.471963236363806, best features: [

### Backward Elimination

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.feature_selection import RFE
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)  # Target 컬럼 제외
y = train['Target']  # Target 컬럼

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# RFE를 사용하여 feature selection
for model_name, model in models.items():
    rfe = RFE(model, n_features_to_select=5)
    fit = rfe.fit(X, y)

    fs = X.columns[fit.support_].tolist()
    X_selected = X.iloc[:, fit.get_support()]

    # 선택된 feature로 cross-validation 수행
    score = cross_val_score(model, X_selected, y, cv=5, scoring=mae_scorer)

    print(f'{model_name} score: {score.mean()}, selected features: {fs}')

Linear Regression score: 12.520577531700535, selected features: ['day_3', 'day_20', 'day_22', 'hour_14', 'hour_17']
Ridge score: 12.5209753319046, selected features: ['day_3', 'day_13', 'day_22', 'hour_14', 'hour_17']
Lasso score: 12.53489854284295, selected features: ['V2', 'V4', 'V7', 'V10', 'V17']
ElasticNet score: 12.530659655893988, selected features: ['V4', 'V7', 'V10', 'V17', 'V25']
Gradient Boosting score: 12.707682365925155, selected features: ['V3', 'V6', 'V25', 'PCA2', 'PCA3']


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.feature_selection import RFE
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)  # Target 컬럼 제외
y = train['Target']  # Target 컬럼

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# RFE를 사용하여 feature selection
for model_name, model in models.items():
    rfe = RFE(model, n_features_to_select=10)
    fit = rfe.fit(X, y)

    fs = X.columns[fit.support_].tolist()
    X_selected = X.iloc[:, fit.get_support()]

    # 선택된 feature로 cross-validation 수행
    score = cross_val_score(model, X_selected, y, cv=5, scoring=mae_scorer)

    print(f'{model_name} score: {score.mean()}, selected features: {fs}')

Linear Regression score: 12.510263323509992, selected features: ['day_3', 'day_6', 'day_14', 'day_20', 'day_22', 'hour_11', 'hour_14', 'hour_17', 'hour_22', 'Mon']
Ridge score: 12.509509814234656, selected features: ['day_3', 'day_13', 'day_14', 'day_22', 'day_27', 'hour_14', 'hour_17', 'hour_22', 'Mon', 'Sun']
Lasso score: 12.542847276969237, selected features: ['V2', 'V4', 'V7', 'V10', 'V11', 'V17', 'V21', 'PCA1', 'PCA2', 'PCA3']
ElasticNet score: 12.538953353611754, selected features: ['V2', 'V4', 'V7', 'V10', 'V11', 'V17', 'V21', 'V25', 'V26', 'day_2']
Gradient Boosting score: 12.628751710566949, selected features: ['V3', 'V6', 'V7', 'V16', 'V20', 'V22', 'V24', 'V25', 'PCA2', 'PCA3']


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.feature_selection import RFE
from sklearn.metrics import make_scorer, mean_absolute_error

# 데이터 분할
X = train.drop('Target', axis=1)  # Target 컬럼 제외
y = train['Target']  # Target 컬럼

# 모델 정의
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(random_state=42),
    'Lasso': Lasso(random_state=42),
    'ElasticNet': ElasticNet(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# MAE를 평가 기준으로 사용하기 위해 scorer 정의
mae_scorer = make_scorer(mean_absolute_error)

# RFE를 사용하여 feature selection
for model_name, model in models.items():
    rfe = RFE(model, n_features_to_select=15)
    fit = rfe.fit(X, y)

    fs = X.columns[fit.support_].tolist()
    X_selected = X.iloc[:, fit.get_support()]

    # 선택된 feature로 cross-validation 수행
    score = cross_val_score(model, X_selected, y, cv=5, scoring=mae_scorer)

    print(f'{model_name} score: {score.mean()}, selected features: {fs}')

Linear Regression score: 12.520775014211157, selected features: ['day_3', 'day_6', 'day_14', 'day_15', 'day_18', 'day_20', 'day_22', 'day_30', 'hour_11', 'hour_12', 'hour_14', 'hour_17', 'hour_22', 'Mon', 'Tue']
Ridge score: 12.511681929062489, selected features: ['day_3', 'day_13', 'day_14', 'day_15', 'day_18', 'day_22', 'day_27', 'hour_11', 'hour_12', 'hour_14', 'hour_17', 'hour_22', 'Mon', 'Sun', 'Tue']
Lasso score: 12.542847276969237, selected features: ['V2', 'V4', 'V7', 'V10', 'V11', 'V17', 'V21', 'Sat', 'Sun', 'Thu', 'Tue', 'Wed', 'PCA1', 'PCA2', 'PCA3']
ElasticNet score: 12.538953353611754, selected features: ['V2', 'V4', 'V7', 'V10', 'V11', 'V17', 'V21', 'V25', 'V26', 'day_1', 'day_2', 'day_3', 'day_4', 'day_5', 'day_6']
Gradient Boosting score: 12.637897085150147, selected features: ['V3', 'V6', 'V7', 'V9', 'V15', 'V16', 'V17', 'V18', 'V20', 'V21', 'V22', 'V24', 'V25', 'PCA2', 'PCA3']
