In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV, Lasso, LassoCV, ElasticNet, ElasticNetCV, SGDRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
import tensorflow as tf
import keras
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.regularizers import l2
# Evaluation (Metrics & DM-Test)
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from itertools import islice
from math import lgamma, fabs, isnan, nan, exp, log, log1p, sqrt
from typing import Sequence, Callable, List, Tuple
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA

#### **1. Data Preparation**

In [2]:
df = pd.read_csv("df_ff_factors_100325.csv")
df = df.drop(columns=["Unnamed: 0","crsp_portno"])
df = df.sort_values(by='date')
df

df_tech = pd.read_csv("df_ff_factors_techfunds.csv")
df_tech = df_tech.drop(columns=["Unnamed: 0","crsp_portno"])
df_tech = df_tech.sort_values(by='date')

The chunk below describes the various functions involved in the training of the models, which are described in greater depth below:

(1) Generation of lagged dataframe i.e. lagging the characteristics

(2) Generation of stepped dataframe i.e. lagged characteristics being together with the 1-month ahead forecast

(3) Demeaning function for fund-level characteristics, which is important to ensure accuracy in the overall forecasting of annualised alphas

In [3]:
# Generate Lagged Dataset
def create_lagged_dataset(dataset, lag, target_var, id):
    lagged_dataset = dataset.copy()
    columns_list = list(lagged_dataset.columns)
    data_join = {}
    for column in columns_list:
        if column == target_var:
            data_join[column] = lagged_dataset[column]
        for n in range(1,lag+1):
            data_join[F'{column}_L{n}'] = lagged_dataset.groupby(id)[column].shift(n)
    lagged_dataset = pd.concat(data_join.values(), axis=1, ignore_index = True)
    lagged_dataset.columns = data_join.keys()
    return lagged_dataset.dropna()

# Generate Stepped Dataset for Training
## Steps is the number of months ahead that we are forecasting, e.g. step=2 is 2 months ahead.
## Note step=1 results in no change to dataset, i.e. use generated lagged variables to forecast current. 
def create_stepped_dataset(dataset, step, target_var, id):
    
    shifted_dataset = dataset.copy()
    shifted_dataset['shifted_target'] = shifted_dataset.groupby(id)[target_var].shift(-step + 1)
    
    # Drop rows where the shifted target is NaN (these occur due to the shift operation)
    shifted_dataset = shifted_dataset.dropna(subset=['shifted_target'])
    
    # Separate the features (X) and the target (y)
    X = shifted_dataset.drop(columns=[target_var, 'shifted_target'])
    y = shifted_dataset[['shifted_target']]
    y = y.rename(columns={'shifted_target':target_var})
    return X, y

def demeaning_fund_char(dataset, id, characteristic):
    for col in characteristic:
        dataset[f'demeaned_{col}'] = dataset[col] - dataset.groupby(id)[col].transform('mean')
    return dataset

In [4]:
import pandas as pd

# Assuming 'dataset' is your DataFrame and 'normalized_flow' is the column with NaN values
df['normalised_flow'] = df['normalised_flow'].fillna(method='ffill')
char_to_demean = ["exp_ratio", "turn_ratio", "normalised_flow", "shortrun_momentum"]
df_demeaned = demeaning_fund_char(df, id="crsp_fundno", characteristic = char_to_demean)
df_demeaned = df_demeaned.drop(columns = char_to_demean)   # drop original columns
df.head()

df_tech['normalised_flow'] = df_tech['normalised_flow'].fillna(method='ffill')
char_to_demean = ["exp_ratio", "turn_ratio", "normalised_flow", "shortrun_momentum"]
df_tech_demeaned = demeaning_fund_char(df_tech, id="crsp_fundno", characteristic = char_to_demean)
df_tech_demeaned = df_tech_demeaned.drop(columns = char_to_demean)   # drop original columns

df_tech

  df['normalised_flow'] = df['normalised_flow'].fillna(method='ffill')
  df_tech['normalised_flow'] = df_tech['normalised_flow'].fillna(method='ffill')


Unnamed: 0,crsp_fundno,date,mth_return,exp_ratio,turn_ratio,normalised_flow,gdp_to_debt_ratio,gdp_growth_rate,unm_rate,infl_rate,...,rolling_sharpe,mkt_return,rolling_alpha_3f,rolling_alpha_4f,rolling_alpha_5f,shortrun_momentum,demeaned_exp_ratio,demeaned_turn_ratio,demeaned_normalised_flow,demeaned_shortrun_momentum
43631,32553.0,1993-08-31,0.068935,0.0162,0.15,0.009529,64.101,3.5225,6.8,2.90,...,3.248937,0.056070,0.068910,0.068910,0.068910,0.012866,0.000025,-0.464384,0.001507,0.020627
43632,32553.0,1993-09-30,-0.004082,0.0162,0.15,0.014881,64.101,3.5225,6.7,2.90,...,2.216452,0.027009,-0.004108,-0.004108,-0.004108,-0.009113,0.000025,-0.464384,0.006859,-0.001352
43633,32553.0,1993-10-29,0.040984,0.0162,0.15,0.030641,64.669,3.5225,6.8,2.90,...,3.058133,0.021609,0.040962,0.040962,0.040962,0.000383,0.000025,-0.464384,0.022620,0.008144
13747,12051.0,1993-10-29,0.034712,0.0188,0.77,0.070030,64.669,3.5225,6.8,2.90,...,5.744340,0.021609,0.034690,0.034690,0.034690,0.013103,0.007662,-0.699757,0.066601,0.016976
13748,12051.0,1993-11-30,-0.075974,0.0188,0.77,0.000259,64.669,3.5225,6.6,2.90,...,-0.536935,-0.010806,-0.075999,-0.075999,-0.075999,-0.026033,0.007662,-0.699757,-0.003170,-0.022160
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3031,4610.0,2024-06-28,0.078565,0.0217,0.23,-0.650182,120.040,2.5427,4.1,3.35,...,1.619481,0.055058,0.001428,0.002329,0.003201,-0.025985,-0.001211,-1.434538,-0.575797,-0.025378
1960,4330.0,2024-07-31,0.028831,0.0077,0.31,-0.022800,120.731,2.5427,4.2,3.35,...,0.691406,0.055058,0.004765,0.005643,0.010978,-0.024241,-0.000815,-0.039661,-0.033919,-0.013655
2113,4333.0,2024-07-31,0.028826,0.0076,0.31,-0.402964,120.731,2.5427,4.2,3.35,...,0.691738,0.055058,0.004733,0.005601,0.010953,-0.024228,-0.000737,-0.039661,-0.817815,-0.013661
1842,4327.0,2024-07-31,0.028616,0.0102,0.31,-0.032107,120.731,2.5427,4.2,3.35,...,0.676394,0.055058,0.004506,0.005384,0.010697,-0.024451,-0.000815,-0.039661,-0.042810,-0.013657


In [5]:
# Generates next date
def generate_next_date(list_of_dates, date):
    return list_of_dates[list_of_dates > date].min()

def process_factor_model(X_factor, y_factor, train_end, test_date):
    X_train = X_factor[X_factor['date_L1'] <= train_end].drop(columns='date_L1')
    X_test = X_factor[X_factor['date_L1'] == test_date].drop(columns='date_L1')

    y_train = y_factor.loc[X_train.index]
    y_test = y_factor.loc[X_test.index]
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test

#### **2. Model Training (Alpha)**

Asset-Pricing Factor Models:
* 5-Factor + MOM Model: mktrf, SMB, HML, RMW, CMA, UMD

The overall_function trains the models using the final selected parameters after hyperparameter tuning.

Sample code for Regime-Switching Model

Overall Training Cycle Code

In [None]:
# OLD HYPERPARAMS
# lstm_hyperparams_dict_healthcare = dict(
#     {'2019': dict({
#         'num_layers':3,
#         'units':[96,32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.2,0.1],
#         'activation':['tanh','tanh','linear'],
#         'lr':0.019962442034576384
#     }),
#      '2020': dict({
#         'num_layers':3,
#         'units':[64,32,32],
#         'optimizer':'Nadam',
#         'drop_out':[0.1,0.1,0.1],
#         'activation':['tanh','tanh','linear'],
#         'lr':0.021504061608420576
#     }),
#      '2021': dict({
#         'num_layers':3,
#         'units':[96,32,32],
#         'optimizer':'Nadam',
#         'drop_out':[0.1,0.1,0.1],
#         'activation':['tanh','tanh','linear'],
#         'lr':0.012302249555768368
#     }),
#      '2022': dict({
#         'num_layers':3,
#         'units':[96,32,32],
#         'optimizer':'Nadam',
#         'drop_out':[0.1,0.2,0.2],
#         'activation':['tanh','tanh','linear'],
#         'lr':0.010092957472413086
#     }),
#      '2023': dict({
#         'num_layers':2,
#         'units':[64,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.2],
#         'activation':['tanh','tanh'],
#         'lr':0.018388080425636798
#     }),
#      '2024': dict({
#         'num_layers':2,
#         'units':[32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.1,0.2],
#         'activation':['tanh','tanh'],
#         'lr':0.007947588699235507
#     })}
# )

rf_hyperparams_dict_healthcare = dict(
    {'2019': dict({
        'n_estimators':300,
        'min_samples_split':5,
        'min_samples_leaf':4,
        'max_features':'log2',
        'max_depth':20
    }),
     '2020': dict({
        'n_estimators':100,
        'min_samples_split':10,
        'min_samples_leaf':4,
        'max_features':'log2',
        'max_depth':None
    }),
     '2021': dict({
        'n_estimators':300,
        'min_samples_split':5,
        'min_samples_leaf':4,
        'max_features':'log2',
        'max_depth':20
    }),
     '2022': dict({
        'n_estimators':100,
        'min_samples_split':10,
        'min_samples_leaf':4,
        'max_features':'log2',
        'max_depth':None
    }),
     '2023': dict({
        'n_estimators':300,
        'min_samples_split':5,
        'min_samples_leaf':4,
        'max_features':'log2',
        'max_depth':20
    }),
     '2024': dict({
        'n_estimators':100,
        'min_samples_split':10,
        'min_samples_leaf':4,
        'max_features':'log2',
        'max_depth':None
    })}
)

Healthcare Mutual Funds

In [6]:
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
from lightgbm import LGBMRegressor

# Define a simplified XGBoost parameter grid
xgb_param_grid = {
    'n_estimators': [100, 300, 500],  # Number of boosting rounds
    'learning_rate': [0.01, 0.05, 0.1],  # Learning rate (step size)
    'max_depth': [3, 5, 7],  # Maximum depth of a tree
    'subsample': [0.7, 0.8, 0.9],  # Fraction of samples for training each tree
    'colsample_bytree': [0.7, 0.8, 1.0]  # Fraction of features for each tree
}

def xgb_tuner(X_train, y_train, xgb_param_grid):
    # Initialize XGBRegressor
    xgb_model = XGBRegressor(random_state=42, n_jobs=-1)

    # Use RandomizedSearchCV for efficiency
    xgb_random_search = RandomizedSearchCV(
        estimator=xgb_model,
        param_distributions=xgb_param_grid,
        n_iter=10,  # Number of parameter settings tested
        cv=TimeSeriesSplit(n_splits=5),  # TimeSeriesSplit for time-dependent data
        scoring='neg_mean_squared_error',  # Evaluate using negative mean squared error
        verbose=2,
        random_state=42,
        n_jobs=-1
    )

    # Perform hyperparameter tuning
    xgb_random_search.fit(X_train, y_train.values.ravel())

    # Best hyperparameters
    best_xgb_params = xgb_random_search.best_params_
    print(f'Best XGBoost Parameters: {best_xgb_params}')
    return best_xgb_params

# Define a simplified LGBM parameter grid
lgbm_param_grid = {
    'n_estimators': [100, 300, 500],  # Number of boosting rounds
    'learning_rate': [0.01, 0.05, 0.1],  # Learning rate (step size)
    'max_depth': [-1, 3, 5],  # Maximum depth of a tree, -1 means no limit
    'num_leaves': [31, 63, 127],  # Number of leaves in a tree (controls complexity)
    'subsample': [0.7, 0.8, 0.9],  # Fraction of samples used for training each tree
    'colsample_bytree': [0.7, 0.8, 1.0]  # Fraction of features used for each tree
}

def lgbm_tuner(X_train, y_train, lgbm_param_grid):
    # Initialize LGBMRegressor
    lgbm_model = LGBMRegressor(random_state=42, n_jobs=-1)

    # Use RandomizedSearchCV for efficiency
    lgbm_random_search = RandomizedSearchCV(
        estimator=lgbm_model,
        param_distributions=lgbm_param_grid,
        n_iter=10,  # Number of parameter settings tested
        cv=TimeSeriesSplit(n_splits=5),  # TimeSeriesSplit for time-dependent data
        scoring='neg_mean_squared_error',  # Evaluate using negative mean squared error
        verbose=2,
        random_state=42,
        n_jobs=-1
    )

    # Perform hyperparameter tuning
    lgbm_random_search.fit(X_train, y_train.values.ravel())

    # Best hyperparameters
    best_lgbm_params = lgbm_random_search.best_params_
    print(f'Best LGBM Parameters: {best_lgbm_params}')
    return best_lgbm_params

In [7]:
# Training Cycle
from tensorflow.keras.optimizers import Adam, Nadam, Adagrad
from tensorflow.keras.models import Sequential, load_model
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
def overall_function(dataset, outcome, 
                     lstm_hyperparams_dict_healthcare, rf_hyperparams_dict_healthcare):
    dataset['date'] = pd.to_datetime(dataset['date']) # converting to date format
    dataset = dataset.sort_values(by='date')
    df_factor = dataset.drop(columns=['mkt_return','mth_return','rf']) # remove irrelevant variables

    # Creating Lagged and Stepped Datasets
    X_dataset, y_dataset = create_stepped_dataset(create_lagged_dataset(df_factor, lag=1,target_var=outcome, id = 'crsp_fundno'),step=1,target_var=outcome, id = 'crsp_fundno_L1')
    # return(X_dataset)
    X_dataset = X_dataset.drop(columns=['crsp_fundno_L1'], errors='ignore')

    list_of_dates = pd.to_datetime(X_dataset['date_L1'])
    percentile_70 = list_of_dates.quantile(0.7) # 70-30 split
    train_end = list_of_dates.loc[(list_of_dates - percentile_70).abs().idxmin()]
    df_end = list_of_dates.max()
    # return(y_dataset)
    results = []

    ## Implement cross-validation split
    tscv = TimeSeriesSplit(n_splits = 5)
    
    while train_end != df_end:
        
        test_date = generate_next_date(list_of_dates, train_end)
        if pd.isna(test_date):
            break 

        # Process data for modeling
        X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
        
        # For Adding Results
        df_in_loop = y_test.copy()
        
        ### Model 1: Lasso Regression
        lasso_cv = LassoCV(cv = tscv, random_state = 18, max_iter = 100000)
        lasso_cv.fit(X_train, y_train)
        
        # Create the Lasso model with the optimal alpha value
        lasso_model = Lasso(alpha = lasso_cv.alpha_)
        lasso_model.fit(X_train, y_train)
        lassopred = lasso_model.predict(X_test)
        # Adding Linear Model
        df_in_loop[f'lasso'] = lassopred
        print('Lasso Done')
        
        ### Model 1: Ridge Regression ###
        ridge_cv = RidgeCV(cv = tscv)
        ridge_cv.fit(X_train, y_train)
    
        ridge_model = Ridge(alpha = ridge_cv.alpha_)
        ridge_model.fit(X_train, y_train)
        
        ridgepred = ridge_model.predict(X_test)
        # Adding Linear Model
        df_in_loop[f'ridge'] = ridgepred
        print('Ridge Done')
        
        ### Model 2: LSTM ###
        # X_train_lstm = np.array(X_train).astype(np.float32)
        # X_test_lstm = np.array(X_test).astype(np.float32)
        # y_train_lstm = np.array(y_train).astype(np.float32)
        
        test_year = test_date.year
        # print(f'test_year is {test_year} with hyperparameters of {lstm_hyperparams_dict_healthcare[str(test_year)]}')
        # lstm_model = load_model(f'best_lstm_model_{train_end.year}.keras')
        
        # num_layers = len(lstm_hyperparams_dict_healthcare[str(test_year)]['units'])
        
        # # Building LSTM Model
        # lstm_model = Sequential()
        # for layer_num in range(num_layers):
        #     is_last_layer = (layer_num == (num_layers - 1))  # Check if it's the last layer
        #     lstm_model.add(LSTM(
        #         units=lstm_hyperparams_dict_healthcare[str(test_year)]['units'][layer_num], 
        #         return_sequences=not is_last_layer,  # Only last layer has return_sequences=False
        #         input_shape=(X_train_lstm.shape[1], 1) if layer_num == 0 else None,  # Define input shape only for the first layer
        #         activation=lstm_hyperparams_dict_healthcare[str(test_year)]['activation'][layer_num]
        #     ))
        #     lstm_model.add(Dropout(lstm_hyperparams_dict_healthcare[str(test_year)]['drop_out'][layer_num]))
        # # Output Layer
        # lstm_model.add(Dense(units=1))
        
        # # Compilation
        # if lstm_hyperparams_dict_healthcare[str(test_year)]['optimizer'] == 'Adam':
        #     lstm_model.compile(
        #         optimizer=Adam(learning_rate=lstm_hyperparams_dict_healthcare[str(test_year)]['lr']),
        #         loss='mean_squared_error',
        #         metrics=[
        #             tf.keras.metrics.RootMeanSquaredError()
        #         ]
        #     )
        # elif lstm_hyperparams_dict_healthcare[str(test_year)]['optimizer'] == 'Nadam':
        #     lstm_model.compile(
        #         optimizer=Nadam(learning_rate=lstm_hyperparams_dict_healthcare[str(test_year)]['lr']),
        #         loss='mean_squared_error',
        #         metrics=[
        #             tf.keras.metrics.RootMeanSquaredError()
        #         ]
        #     )

        # Early stopping callback
        # callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

        # # Train the model
        # lstm_model.fit(
        #     X_train_lstm, y_train_lstm,
        #     epochs=10, batch_size=512,
        #     callbacks=[callback]
        # )

        # lstmpred = lstm_model.predict(X_test_lstm)
        # df_in_loop[f'lstm'] = lstmpred
        # print('LSTM Done')
        
        optimal_param_xgb = xgb_tuner(X_train, y_train, xgb_param_grid)
        xgboost_model = xgb.XGBRegressor(subsample=optimal_param_xgb['subsample'],
                                         n_estimators=optimal_param_xgb['n_estimators'],
                                         max_depth=optimal_param_xgb['max_depth'],
                                         learning_rate=optimal_param_xgb['learning_rate'],
                                         colsample_bytree=optimal_param_xgb['colsample_bytree'],
                                         random_state=18)
        xgboost_model.fit(X_train, y_train)
        df_in_loop[f'xgboost'] = xgboost_model.predict(X_test)
        print('XGBoost done')
        #errors(xgboost_model, 'gradientboost', X_train, y_train, errors_path, window_end+1, pred_train_path)

        ## 16. LightGBM
        optimal_param_lgbm = lgbm_tuner(X_train, y_train, lgbm_param_grid)
        lightgbm_model = lgb.LGBMRegressor(objective='regression', 
                                           subsample = optimal_param_lgbm['subsample'],
                                           num_leaves = optimal_param_lgbm['num_leaves'],
                                           n_estimators = optimal_param_lgbm['n_estimators'],
                                           max_depth= optimal_param_lgbm['max_depth'],
                                           learning_rate=optimal_param_lgbm['learning_rate'],
                                           colsample_bytree=optimal_param_lgbm['colsample_bytree'],
                                           random_state=18, verbosity=-1)
        lightgbm_model.fit(X_train, y_train)
        df_in_loop[f'lgbm'] = lightgbm_model.predict(X_test)
        print('LGBM done')
                
        ### Model 3: Random Forest Regression ###
        print(f'test_year is {test_year} with hyperparameters of {rf_hyperparams_dict_healthcare[str(test_year)]}')
        rf_model = RandomForestRegressor(n_estimators=rf_hyperparams_dict_healthcare[str(test_year)]['n_estimators'], 
                                         min_samples_split=rf_hyperparams_dict_healthcare[str(test_year)]['min_samples_split'],
                                         min_samples_leaf=rf_hyperparams_dict_healthcare[str(test_year)]['min_samples_leaf'],
                                         max_features = rf_hyperparams_dict_healthcare[str(test_year)]['max_features'], 
                                         max_depth=rf_hyperparams_dict_healthcare[str(test_year)]['max_depth'],
                                         random_state=40, n_jobs=-1)

        rf_model.fit(X_train, y_train.values.ravel())
        rf_pred = rf_model.predict(X_test)
        # return(y_pred)
        df_in_loop[f'rf'] = rf_pred
        
        print('RF Done')
        
        ### Model 4: PCA ###
        X_train = pd.DataFrame(X_train)
        X_test = pd.DataFrame(X_test)

        X_train_pure = X_train
        X_test_pure = X_test
        
        pca = PCA()
        pca.fit(X_train_pure)
        cumulative_variance_ratio = np.cumsum(pca.explained_variance_ratio_)
          #to explain more than 85% of the variance
        num_components = np.where(cumulative_variance_ratio >= 0.85)[0][0] + 1 
        pca_new = PCA(n_components=num_components)
        X_train_pca = pca_new.fit_transform(X_train_pure)
        X_train_pca = pd.DataFrame(X_train_pca)
        X_train_pca.columns = X_train_pca.columns.astype(str)

        X_test_pca = pca_new.transform(X_test_pure)
        X_test_pca = pd.DataFrame(X_test_pca)
        X_test_pca.columns = X_test_pca.columns.astype(str)
        
        pure_factor_model = LinearRegression()
        pure_factor_model.fit(X_train_pca, y_train)
        y_pred = pure_factor_model.predict(X_test_pca)
        df_in_loop[f'pca'] = y_pred
        print('PCA Done')
        
        # return(y_pred)
        # Add results into loop
        results.append(df_in_loop)
        train_end = test_date
        num_remaining_dates = len(list(set(date for date in list_of_dates if date > test_date)))
        print(f'{num_remaining_dates} dates remaining')
    # return(results)
    combined_df = pd.concat(results, ignore_index=True)
    return combined_df


In [None]:
# Run to train the models
y_new = overall_function(dataset=df, outcome = "rolling_alpha_5f", 
                         lstm_hyperparams_dict_healthcare=None, rf_hyperparams_dict_healthcare=rf_hyperparams_dict_healthcare)

  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
57 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
56 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
55 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
54 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
53 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
52 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
51 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
50 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 500, 'max_depth': -1, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
49 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
48 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
47 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 500, 'max_depth': -1, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
46 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
45 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 500, 'max_depth': -1, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
44 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
43 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 500, 'max_depth': -1, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
42 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
41 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
40 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 500, 'max_depth': -1, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
39 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
38 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
37 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
36 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
35 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
34 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
33 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
32 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
31 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
30 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
29 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
28 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits




Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
27 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
26 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
25 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 500, 'max_depth': -1, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
24 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
23 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
22 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
21 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
20 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
19 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
18 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
17 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
16 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
15 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
14 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 63, 'n_estimators': 500, 'max_depth': -1, 'learning_rate': 0.01, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
13 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
12 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
11 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
10 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
9 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.7, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 0.7}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
8 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
7 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits


  y = column_or_1d(y, warn=True)


Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}
LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
6 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
5 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
4 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
3 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
2 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
1 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.8, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.8, 'num_leaves': 63, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05, 'colsample_bytree': 0.8}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
0 dates remaining


In [21]:
y_new.to_csv('tuned_results_xgboostlgbm_yearly_tuned_every_round.csv')

Technology Sector

In [9]:
# lstm_hyperparams_dict_tech = dict(
#     {'2019': dict({
#         'num_layers':2,
#         'units':[32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.1,0.1],
#         'activation':['linear','tanh'],
#         'lr':0.04130929884537057
#     }),
#      '2020': dict({
#         'num_layers':2,
#         'units':[96,32],
#         'optimizer':'Adam',
#         'drop_out':[0.1,0.1],
#         'activation':['tanh','tanh'],
#         'lr':0.0017545326289340611
#     }),
#      '2021': dict({
#         'num_layers':2,
#         'units':[96,32],
#         'optimizer':'Adam',
#         'drop_out':[0.1,0.2],
#         'activation':['linear','tanh'],
#         'lr':0.008514542675036612
#     }),
#      '2022': dict({
#         'num_layers':2,
#         'units':[32,32],
#         'optimizer':'Adagrad',
#         'drop_out':[0.2,0.1],
#         'activation':['linear','tanh'],
#         'lr':0.06210952184736157
#     }),
#      '2023': dict({
#         'num_layers':4,
#         'units':[128,32,32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.1,0.1,0.2],
#         'activation':['tanh','tanh','linear','linear'],
#         'lr':0.019671144721368838
#     }),
#      '2024': dict({
#         'num_layers':2,
#         'units':[32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.1],
#         'activation':['tanh','tanh'],
#         'lr':0.004409350423921994
#     })}
# )
lstm_hyperparams_dict_tech = dict(
    {'2019': dict({
        'num_layers':2,
        'units':[32,32],
        'optimizer':'Adam',
        'drop_out':[0.1,0.1],
        'activation':['linear','tanh'],
        'lr':0.04130929884537057
    }),
     '2020': dict({
        'num_layers':2,
        'units':[96,32],
        'optimizer':'Adam',
        'drop_out':[0.1,0.1],
        'activation':['tanh','tanh'],
        'lr':0.0017545326289340611
    }),
     '2021': dict({
        'num_layers':2,
        'units':[96,32],
        'optimizer':'Adam',
        'drop_out':[0.1,0.2],
        'activation':['linear','tanh'],
        'lr':0.008514542675036612
    }),
     '2022': dict({
        'num_layers':2,
        'units':[32,32],
        'optimizer':'Adagrad',
        'drop_out':[0.2,0.1],
        'activation':['linear','tanh'],
        'lr':0.06210952184736157
    }),
     '2023': dict({
        'num_layers':4,
        'units':[128,32,32,32],
        'optimizer':'Adam',
        'drop_out':[0.2,0.1,0.1,0.2],
        'activation':['tanh','tanh','linear','linear'],
        'lr':0.019671144721368838
    }),
     '2024': dict({
        'num_layers':2,
        'units':[32,32],
        'optimizer':'Adam',
        'drop_out':[0.2,0.1],
        'activation':['tanh','tanh'],
        'lr':0.004409350423921994
    })}
)

rf_hyperparams_dict_tech = dict(
    {'2019': dict({
        'n_estimators': 300,
        'min_samples_split': 5,
        'min_samples_leaf': 4,
        'max_features': 'log2',
        'max_depth': 20
    }),
     '2020': dict({
        'n_estimators': 300,
        'min_samples_split': 5,
        'min_samples_leaf': 4,
        'max_features': 'log2',
        'max_depth': 20
    }),
     '2021': dict({
        'n_estimators': 300,
        'min_samples_split': 5,
        'min_samples_leaf': 4,
        'max_features': 'log2',
        'max_depth': 20
    }),
     '2022': dict({
        'n_estimators': 100,
        'min_samples_split': 10,
        'min_samples_leaf': 4,
        'max_features': 'log2',
        'max_depth': None
    }),
     '2023': dict({
        'n_estimators': 300,
        'min_samples_split': 5,
        'min_samples_leaf': 4,
        'max_features': 'log2',
        'max_depth': 20
    }),
     '2024': dict({
        'n_estimators': 100,
        'min_samples_split': 5,
        'min_samples_leaf': 4,
        'max_features': 'log2',
        'max_depth': 30
    })}
)

In [10]:
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.keras.models import Sequential, load_model
def overall_function(dataset, outcome,lstm_hyperparams_dict_tech, rf_hyperparams_dict_tech):
    dataset['date'] = pd.to_datetime(dataset['date']) # converting to date format
    dataset = dataset.sort_values(by='date')
    df_factor = dataset.drop(columns=['mkt_return','mth_return','rf','rolling_sharpe', 'rolling_alpha_3f', 'rolling_alpha_4f']) # remove irrelevant variables

    # Creating Lagged and Stepped Datasets
    X_dataset, y_dataset = create_stepped_dataset(create_lagged_dataset(df_factor, lag=1,target_var=outcome, id = 'crsp_fundno'),step=1,target_var=outcome, id = 'crsp_fundno_L1')
    # return(X_dataset)
    X_dataset = X_dataset.drop(columns=['crsp_fundno_L1'], errors='ignore')

    list_of_dates = pd.to_datetime(X_dataset['date_L1'])
    percentile_70 = list_of_dates.quantile(0.7) # 70-30 split
    train_end = list_of_dates.loc[(list_of_dates - percentile_70).abs().idxmin()]
    df_end = list_of_dates.max()
    # return(y_dataset)
    results = []

    ## Implement cross-validation split
    tscv = TimeSeriesSplit(n_splits = 5)
    
    while train_end != df_end:
        
        test_date = generate_next_date(list_of_dates, train_end)
        if pd.isna(test_date):
            break 

        # Process data for modeling
        X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
        
        # For Adding Results
        df_in_loop = y_test.copy()
        
        ### Model 1: Lasso Regression
        lasso_cv = LassoCV(cv = tscv, random_state = 18, max_iter = 100000)
        lasso_cv.fit(X_train, y_train)
        
        # Create the Lasso model with the optimal alpha value
        lasso_model = Lasso(alpha = lasso_cv.alpha_)
        lasso_model.fit(X_train, y_train)
        lassopred = lasso_model.predict(X_test)
        # Adding Linear Model
        df_in_loop[f'lasso'] = lassopred
        print('Lasso Done')
        
        ### Model 1: Ridge Regression ###
        ridge_cv = RidgeCV(cv = tscv)
        ridge_cv.fit(X_train, y_train)
    
        ridge_model = Ridge(alpha = ridge_cv.alpha_)
        ridge_model.fit(X_train, y_train)
        
        ridgepred = ridge_model.predict(X_test)
        # Adding Linear Model
        df_in_loop[f'ridge'] = ridgepred
        print('Ridge Done')
        
        ### Model 2: LSTM ###
        X_train_lstm = np.array(X_train).astype(np.float32)
        X_test_lstm = np.array(X_test).astype(np.float32)
        y_train_lstm = np.array(y_train).astype(np.float32)
        
        test_year = test_date.year
        print(f'test_year is {test_year} with hyperparameters of {lstm_hyperparams_dict_tech[str(test_year)]}')
        # lstm_model = load_model(f'best_lstm_model_{train_end.year}.keras')
        
        num_layers = len(lstm_hyperparams_dict_tech[str(test_year)]['units'])
        
        # Building LSTM Model
        lstm_model = Sequential()
        for layer_num in range(num_layers):
            is_last_layer = (layer_num == (num_layers - 1))  # Check if it's the last layer
            lstm_model.add(LSTM(
                units=lstm_hyperparams_dict_tech[str(test_year)]['units'][layer_num], 
                return_sequences=not is_last_layer,  # Only last layer has return_sequences=False
                input_shape=(X_train_lstm.shape[1], 1) if layer_num == 0 else None,  # Define input shape only for the first layer
                activation=lstm_hyperparams_dict_tech[str(test_year)]['activation'][layer_num]
            ))
            lstm_model.add(Dropout(lstm_hyperparams_dict_tech[str(test_year)]['drop_out'][layer_num]))
        # Output Layer
        lstm_model.add(Dense(units=1))
        
        # Compilation
        if lstm_hyperparams_dict_tech[str(test_year)]['optimizer'] == 'Adam':
            lstm_model.compile(
                optimizer=Adam(learning_rate=lstm_hyperparams_dict_tech[str(test_year)]['lr']),
                loss='mean_squared_error',
                metrics=[
                    tf.keras.metrics.RootMeanSquaredError()
                ]
            )
        elif lstm_hyperparams_dict_tech[str(test_year)]['optimizer'] == 'Nadam':
            lstm_model.compile(
                optimizer=Nadam(learning_rate=lstm_hyperparams_dict_tech[str(test_year)]['lr']),
                loss='mean_squared_error',
                metrics=[
                    tf.keras.metrics.RootMeanSquaredError()
                ]
            )
        elif lstm_hyperparams_dict_tech[str(test_year)]['optimizer'] == 'Adagrad':
            lstm_model.compile(
                optimizer=Adagrad(learning_rate=lstm_hyperparams_dict_tech[str(test_year)]['lr']),
                loss='mean_squared_error',
                metrics=[
                    tf.keras.metrics.RootMeanSquaredError()
                ]
            )

        # Early stopping callback
        callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

        # Train the model
        lstm_model.fit(
            X_train_lstm, y_train_lstm,
            epochs=10, batch_size=512,
            callbacks=[callback]
        )

        lstmpred = lstm_model.predict(X_test_lstm)
        df_in_loop[f'lstm'] = lstmpred
        print('LSTM Done')
        
        optimal_param_xgb = xgb_tuner(X_train, y_train, xgb_param_grid)
        xgboost_model = xgb.XGBRegressor(subsample=optimal_param_xgb['subsample'],
                                         n_estimators=optimal_param_xgb['n_estimators'],
                                         max_depth=optimal_param_xgb['max_depth'],
                                         learning_rate=optimal_param_xgb['learning_rate'],
                                         colsample_bytree=optimal_param_xgb['colsample_bytree'],
                                         random_state=18)
        xgboost_model.fit(X_train, y_train)
        df_in_loop[f'xgboost'] = xgboost_model.predict(X_test)
        print('XGBoost done')

        ## 16. LightGBM
        optimal_param_lgbm = lgbm_tuner(X_train, y_train, lgbm_param_grid)
        lightgbm_model = lgb.LGBMRegressor(objective='regression', 
                                           subsample = optimal_param_lgbm['subsample'],
                                           num_leaves = optimal_param_lgbm['num_leaves'],
                                           n_estimators = optimal_param_lgbm['n_estimators'],
                                           max_depth= optimal_param_lgbm['max_depth'],
                                           learning_rate=optimal_param_lgbm['learning_rate'],
                                           colsample_bytree=optimal_param_lgbm['colsample_bytree'],
                                           random_state=18, verbosity=-1)
        lightgbm_model.fit(X_train, y_train)
        df_in_loop[f'lgbm'] = lightgbm_model.predict(X_test)
        print('LGBM done')
                
        ### Model 3: Random Forest Regression ###
        print(f'test_year is {test_year} with hyperparameters of {rf_hyperparams_dict_tech[str(test_year)]}')
        rf_model = RandomForestRegressor(n_estimators=rf_hyperparams_dict_tech[str(test_year)]['n_estimators'], 
                                         min_samples_split=rf_hyperparams_dict_tech[str(test_year)]['min_samples_split'],
                                         min_samples_leaf=rf_hyperparams_dict_tech[str(test_year)]['min_samples_leaf'],
                                         max_features = rf_hyperparams_dict_tech[str(test_year)]['max_features'], 
                                         max_depth=rf_hyperparams_dict_tech[str(test_year)]['max_depth'],
                                         random_state=40, n_jobs=-1)

        rf_model.fit(X_train, y_train.values.ravel())
        rf_pred = rf_model.predict(X_test)
        # return(y_pred)
        df_in_loop[f'rf'] = rf_pred
        
        print('RF Done')
        
        ### Model 5: PCA ###
        X_train = pd.DataFrame(X_train)
        X_test = pd.DataFrame(X_test)

        X_train_pure = X_train
        X_test_pure = X_test
        
        pca = PCA()
        pca.fit(X_train_pure)
        cumulative_variance_ratio = np.cumsum(pca.explained_variance_ratio_)
          #to explain more than 85% of the variance
        num_components = np.where(cumulative_variance_ratio >= 0.85)[0][0] + 1 
        pca_new = PCA(n_components=num_components)
        X_train_pca = pca_new.fit_transform(X_train_pure)
        X_train_pca = pd.DataFrame(X_train_pca)
        X_train_pca.columns = X_train_pca.columns.astype(str)

        X_test_pca = pca_new.transform(X_test_pure)
        X_test_pca = pd.DataFrame(X_test_pca)
        X_test_pca.columns = X_test_pca.columns.astype(str)
        
        pure_factor_model = LinearRegression()
        pure_factor_model.fit(X_train_pca, y_train)
        y_pred = pure_factor_model.predict(X_test_pca)
        df_in_loop[f'pca'] = y_pred
        print('PCA Done')
        
        # Add results into loop
        results.append(df_in_loop)
        train_end = test_date
        num_remaining_dates = len(list(set(date for date in list_of_dates if date > test_date)))
        print(f'{num_remaining_dates} dates remaining')
        
    combined_df = pd.concat(results, ignore_index=True)
    return combined_df

In [11]:
# Run to train the models
y_tech = overall_function(dataset=df_tech, outcome = "rolling_alpha_5f", 
                          lstm_hyperparams_dict_tech=lstm_hyperparams_dict_tech, rf_hyperparams_dict_tech=rf_hyperparams_dict_tech)

  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}


  super().__init__(**kwargs)


Epoch 1/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 34ms/step - loss: 0.0207 - root_mean_squared_error: 0.1307
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 0.0010 - root_mean_squared_error: 0.0303    
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 0.0035 - root_mean_squared_error: 0.0580
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 6.1520e-04 - root_mean_squared_error: 0.0248
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - loss: 6.1096e-04 - root_mean_squared_error: 0.0247
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - loss: 5.9978e-04 - root_mean_squared_error: 0.0245
Epoch 7/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 5.9395e-04 - root_mean_squared_error: 0.0244
Epoch 8/10
[1m75/75[0m [32m━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
64 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - loss: 0.0225 - root_mean_squared_error: 0.1342
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.6206e-04 - root_mean_squared_error: 0.0237
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.5059e-04 - root_mean_squared_error: 0.0235
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.6153e-04 - root_mean_squared_error: 0.0237
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.0531e-04 - root_mean_squared_error: 0.0225
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 4.9360e-04 - root_mean_squared_error: 0.0222
Epoch 7/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.0816e-04 - root_mean_squared_error: 0.0225
Epoch 8/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
63 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 38ms/step - loss: 0.0144 - root_mean_squared_error: 0.1084
Epoch 2/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 5.6547e-04 - root_mean_squared_error: 0.0238
Epoch 3/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 5.3872e-04 - root_mean_squared_error: 0.0232
Epoch 4/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.2007e-04 - root_mean_squared_error: 0.0228
Epoch 5/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 5.1662e-04 - root_mean_squared_error: 0.0227
Epoch 6/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.2717e-04 - root_mean_squared_error: 0.0229
Epoch 7/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 5.0290e-04 - root_mean_squared_error: 0.0224
Epoch 8/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
62 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - loss: 0.0116 - root_mean_squared_error: 0.0970
Epoch 2/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.4368e-04 - root_mean_squared_error: 0.0233
Epoch 3/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.4776e-04 - root_mean_squared_error: 0.0234
Epoch 4/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.3893e-04 - root_mean_squared_error: 0.0232
Epoch 5/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - loss: 5.1502e-04 - root_mean_squared_error: 0.0227
Epoch 6/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 4.9556e-04 - root_mean_squared_error: 0.0223
Epoch 7/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.0762e-04 - root_mean_squared_error: 0.0225
Epoch 8/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
61 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - loss: 0.0277 - root_mean_squared_error: 0.1482
Epoch 2/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.7339e-04 - root_mean_squared_error: 0.0239
Epoch 3/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.4382e-04 - root_mean_squared_error: 0.0233
Epoch 4/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.0213e-04 - root_mean_squared_error: 0.0224
Epoch 5/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.3706e-04 - root_mean_squared_error: 0.0232
Epoch 6/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 5.2027e-04 - root_mean_squared_error: 0.0228
Epoch 7/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.1909e-04 - root_mean_squared_error: 0.0228
Epoch 8/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
60 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 40ms/step - loss: 0.0243 - root_mean_squared_error: 0.1417
Epoch 2/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 0.0012 - root_mean_squared_error: 0.0339
Epoch 3/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 6.5324e-04 - root_mean_squared_error: 0.0255
Epoch 4/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - loss: 6.1298e-04 - root_mean_squared_error: 0.0248
Epoch 5/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.9429e-04 - root_mean_squared_error: 0.0244
Epoch 6/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - loss: 5.7073e-04 - root_mean_squared_error: 0.0239
Epoch 7/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 6.0405e-04 - root_mean_squared_error: 0.0246
Epoch 8/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
59 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - loss: 0.0334 - root_mean_squared_error: 0.1633
Epoch 2/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 6.1536e-04 - root_mean_squared_error: 0.0248
Epoch 3/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.9633e-04 - root_mean_squared_error: 0.0244
Epoch 4/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 0.0030 - root_mean_squared_error: 0.0446
Epoch 5/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.0053 - root_mean_squared_error: 0.0709
Epoch 6/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 6.3141e-04 - root_mean_squared_error: 0.0251
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step
LSTM Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estim

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
58 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 40ms/step - loss: 0.0100 - root_mean_squared_error: 0.0908
Epoch 2/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.4284e-04 - root_mean_squared_error: 0.0233
Epoch 3/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 5.1177e-04 - root_mean_squared_error: 0.0226
Epoch 4/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.3442e-04 - root_mean_squared_error: 0.0231
Epoch 5/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 4.6988e-04 - root_mean_squared_error: 0.0217
Epoch 6/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 4.7877e-04 - root_mean_squared_error: 0.0219
Epoch 7/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 0.0015 - root_mean_squared_error: 0.0291  
Epoch 8/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
57 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - loss: 0.0246 - root_mean_squared_error: 0.1410
Epoch 2/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.4822e-04 - root_mean_squared_error: 0.0234
Epoch 3/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.6633e-04 - root_mean_squared_error: 0.0238
Epoch 4/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.3611e-04 - root_mean_squared_error: 0.0231
Epoch 5/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.0671e-04 - root_mean_squared_error: 0.0225
Epoch 6/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.9882e-04 - root_mean_squared_error: 0.0245
Epoch 7/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.1138e-04 - root_mean_squared_error: 0.0226
Epoch 8/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
56 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - loss: 0.0239 - root_mean_squared_error: 0.1384
Epoch 2/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.2579e-04 - root_mean_squared_error: 0.0229
Epoch 3/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 4.9225e-04 - root_mean_squared_error: 0.0222
Epoch 4/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.2771e-04 - root_mean_squared_error: 0.0230
Epoch 5/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.3643e-04 - root_mean_squared_error: 0.0232
Epoch 6/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 4.9413e-04 - root_mean_squared_error: 0.0222
Epoch 7/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 4.8918e-04 - root_mean_squared_error: 0.0221
Epoch 8/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
55 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2019 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.04130929884537057}
Epoch 1/10


  super().__init__(**kwargs)


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - loss: 0.0193 - root_mean_squared_error: 0.1240
Epoch 2/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.4249e-04 - root_mean_squared_error: 0.0233
Epoch 3/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.1909e-04 - root_mean_squared_error: 0.0228
Epoch 4/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.3225e-04 - root_mean_squared_error: 0.0231
Epoch 5/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.2654e-04 - root_mean_squared_error: 0.0229
Epoch 6/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - loss: 5.0504e-04 - root_mean_squared_error: 0.0225
Epoch 7/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - loss: 5.3054e-04 - root_mean_squared_error: 0.0230
Epoch 8/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2019 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
54 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 75ms/step - loss: 7.1512e-04 - root_mean_squared_error: 0.0266
Epoch 2/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.2293e-04 - root_mean_squared_error: 0.0229
Epoch 3/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.0792e-04 - root_mean_squared_error: 0.0225
Epoch 4/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.3969e-04 - root_mean_squared_error: 0.0232
Epoch 5/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 76ms/step - loss: 5.2361e-04 - root_mean_squared_error: 0.0229
Epoch 6/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.1758e-04 - root_mean_squared_error: 0.0227
Epoch 7/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 4.8131e-04 - root_mean_squared_error: 0.0219
Epoch 8/10
[1m80/80[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
53 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 73ms/step - loss: 7.7106e-04 - root_mean_squared_error: 0.0276
Epoch 2/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.1458e-04 - root_mean_squared_error: 0.0227
Epoch 3/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.2171e-04 - root_mean_squared_error: 0.0228
Epoch 4/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.0608e-04 - root_mean_squared_error: 0.0225
Epoch 5/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.0334e-04 - root_mean_squared_error: 0.0224
Epoch 6/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.3456e-04 - root_mean_squared_error: 0.0231
Epoch 7/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.1940e-04 - root_mean_squared_error: 0.0228
Epoch 8/10
[1m81/81[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
52 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 73ms/step - loss: 9.9674e-04 - root_mean_squared_error: 0.0312
Epoch 2/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6618e-04 - root_mean_squared_error: 0.0238
Epoch 3/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.5250e-04 - root_mean_squared_error: 0.0235
Epoch 4/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.1115e-04 - root_mean_squared_error: 0.0226
Epoch 5/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6857e-04 - root_mean_squared_error: 0.0238
Epoch 6/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.3999e-04 - root_mean_squared_error: 0.0232
Epoch 7/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.1053e-04 - root_mean_squared_error: 0.0226
Epoch 8/10
[1m81/81[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
51 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 74ms/step - loss: 0.0010 - root_mean_squared_error: 0.0317
Epoch 2/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.7919e-04 - root_mean_squared_error: 0.0241
Epoch 3/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.7281e-04 - root_mean_squared_error: 0.0239
Epoch 4/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.5987e-04 - root_mean_squared_error: 0.0237
Epoch 5/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.7249e-04 - root_mean_squared_error: 0.0239
Epoch 6/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.5666e-04 - root_mean_squared_error: 0.0236
Epoch 7/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.3046e-04 - root_mean_squared_error: 0.0230
Epoch 8/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
50 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 78ms/step - loss: 7.6604e-04 - root_mean_squared_error: 0.0275
Epoch 2/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 76ms/step - loss: 5.7275e-04 - root_mean_squared_error: 0.0239
Epoch 3/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 76ms/step - loss: 5.4671e-04 - root_mean_squared_error: 0.0234
Epoch 4/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.5460e-04 - root_mean_squared_error: 0.0235
Epoch 5/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.4637e-04 - root_mean_squared_error: 0.0234
Epoch 6/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.5408e-04 - root_mean_squared_error: 0.0235
Epoch 7/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 78ms/step - loss: 5.2549e-04 - root_mean_squared_error: 0.0229
Epoch 8/10
[1m82/82[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
49 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 75ms/step - loss: 7.6515e-04 - root_mean_squared_error: 0.0275
Epoch 2/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.5844e-04 - root_mean_squared_error: 0.0236
Epoch 3/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.8848e-04 - root_mean_squared_error: 0.0243
Epoch 4/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.4716e-04 - root_mean_squared_error: 0.0234
Epoch 5/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.5351e-04 - root_mean_squared_error: 0.0235
Epoch 6/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.8343e-04 - root_mean_squared_error: 0.0241
Epoch 7/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - loss: 5.5395e-04 - root_mean_squared_error: 0.0235
Epoch 8/10
[1m83/83[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
48 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 75ms/step - loss: 9.0910e-04 - root_mean_squared_error: 0.0299
Epoch 2/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.7472e-04 - root_mean_squared_error: 0.0240
Epoch 3/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.5336e-04 - root_mean_squared_error: 0.0235
Epoch 4/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.4026e-04 - root_mean_squared_error: 0.0232
Epoch 5/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.4891e-04 - root_mean_squared_error: 0.0234
Epoch 6/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.8477e-04 - root_mean_squared_error: 0.0242
Epoch 7/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.5191e-04 - root_mean_squared_error: 0.0235
Epoch 8/10
[1m83/83[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
47 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}


  super().__init__(**kwargs)


Epoch 1/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 74ms/step - loss: 8.5279e-04 - root_mean_squared_error: 0.0289
Epoch 2/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.7006e-04 - root_mean_squared_error: 0.0239
Epoch 3/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6384e-04 - root_mean_squared_error: 0.0237
Epoch 4/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6767e-04 - root_mean_squared_error: 0.0238
Epoch 5/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 5.5431e-04 - root_mean_squared_error: 0.0235
Epoch 6/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 77ms/step - loss: 5.6755e-04 - root_mean_squared_error: 0.0238
Epoch 7/10
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 76ms/step - loss: 5.1569e-04 - root_mean_squared_error: 0.0227
Epoch 8/10
[1m84/84[0m [32m━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
46 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}


  super().__init__(**kwargs)


Epoch 1/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 73ms/step - loss: 0.0012 - root_mean_squared_error: 0.0335
Epoch 2/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.5781e-04 - root_mean_squared_error: 0.0236
Epoch 3/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.4430e-04 - root_mean_squared_error: 0.0233
Epoch 4/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.6062e-04 - root_mean_squared_error: 0.0237
Epoch 5/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6922e-04 - root_mean_squared_error: 0.0239
Epoch 6/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.7311e-04 - root_mean_squared_error: 0.0239
Epoch 7/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.2969e-04 - root_mean_squared_error: 0.0230
Epoch 8/10
[1m85/85[0m [32m━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
45 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 73ms/step - loss: 7.6221e-04 - root_mean_squared_error: 0.0274
Epoch 2/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6518e-04 - root_mean_squared_error: 0.0238
Epoch 3/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6348e-04 - root_mean_squared_error: 0.0237
Epoch 4/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.5815e-04 - root_mean_squared_error: 0.0236
Epoch 5/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.3387e-04 - root_mean_squared_error: 0.0231
Epoch 6/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.0944e-04 - root_mean_squared_error: 0.0226
Epoch 7/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 77ms/step - loss: 5.0851e-04 - root_mean_squared_error: 0.0225
Epoch 8/10
[1m85/85[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
44 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}
Epoch 1/10


  super().__init__(**kwargs)


[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 71ms/step - loss: 9.6749e-04 - root_mean_squared_error: 0.0307
Epoch 2/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.6910e-04 - root_mean_squared_error: 0.0239
Epoch 3/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 71ms/step - loss: 5.5334e-04 - root_mean_squared_error: 0.0235
Epoch 4/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.8690e-04 - root_mean_squared_error: 0.0242
Epoch 5/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.4091e-04 - root_mean_squared_error: 0.0232
Epoch 6/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.3359e-04 - root_mean_squared_error: 0.0231
Epoch 7/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.7298e-04 - root_mean_squared_error: 0.0239
Epoch 8/10
[1m86/86[0m [32m━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
43 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2020 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.0017545326289340611}


  super().__init__(**kwargs)


Epoch 1/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 74ms/step - loss: 8.4769e-04 - root_mean_squared_error: 0.0289
Epoch 2/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.3318e-04 - root_mean_squared_error: 0.0231
Epoch 3/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.6912e-04 - root_mean_squared_error: 0.0239
Epoch 4/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.4769e-04 - root_mean_squared_error: 0.0234
Epoch 5/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 73ms/step - loss: 5.6228e-04 - root_mean_squared_error: 0.0237
Epoch 6/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.7292e-04 - root_mean_squared_error: 0.0239
Epoch 7/10
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.2740e-04 - root_mean_squared_error: 0.0230
Epoch 8/10
[1m86/86[0m [32m━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2020 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
42 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 70ms/step - loss: 0.0049 - root_mean_squared_error: 0.0635
Epoch 2/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.8245e-04 - root_mean_squared_error: 0.0241
Epoch 3/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.5775e-04 - root_mean_squared_error: 0.0236
Epoch 4/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.5413e-04 - root_mean_squared_error: 0.0235
Epoch 5/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.4420e-04 - root_mean_squared_error: 0.0233
Epoch 6/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.2638e-04 - root_mean_squared_error: 0.0229
Epoch 7/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.5471e-04 - root_mean_squared_error: 0.0235
Epoch 8/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
41 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 70ms/step - loss: 0.0048 - root_mean_squared_error: 0.0630
Epoch 2/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 71ms/step - loss: 5.7017e-04 - root_mean_squared_error: 0.0239
Epoch 3/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.3654e-04 - root_mean_squared_error: 0.0232
Epoch 4/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.5429e-04 - root_mean_squared_error: 0.0235
Epoch 5/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.4354e-04 - root_mean_squared_error: 0.0233
Epoch 6/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - loss: 5.6824e-04 - root_mean_squared_error: 0.0238
Epoch 7/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.6242e-04 - root_mean_squared_error: 0.0237
Epoch 8/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
40 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 70ms/step - loss: 0.0055 - root_mean_squared_error: 0.0673
Epoch 2/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.6833e-04 - root_mean_squared_error: 0.0238
Epoch 3/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.8387e-04 - root_mean_squared_error: 0.0241
Epoch 4/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 0.0044 - root_mean_squared_error: 0.0597
Epoch 5/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 6.3776e-04 - root_mean_squared_error: 0.0252
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
LSTM Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, 

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
39 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 69ms/step - loss: 0.0037 - root_mean_squared_error: 0.0557
Epoch 2/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.6401e-04 - root_mean_squared_error: 0.0237
Epoch 3/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.7113e-04 - root_mean_squared_error: 0.0239
Epoch 4/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.3299e-04 - root_mean_squared_error: 0.0231
Epoch 5/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.4994e-04 - root_mean_squared_error: 0.0234
Epoch 6/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 71ms/step - loss: 5.4093e-04 - root_mean_squared_error: 0.0233
Epoch 7/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.4720e-04 - root_mean_squared_error: 0.0234
Epoch 8/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
38 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 71ms/step - loss: 0.0055 - root_mean_squared_error: 0.0672
Epoch 2/10
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.8581e-04 - root_mean_squared_error: 0.0242
Epoch 3/10
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.6976e-04 - root_mean_squared_error: 0.0239
Epoch 4/10
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 7.5407e-04 - root_mean_squared_error: 0.0273
Epoch 5/10
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 6.5753e-04 - root_mean_squared_error: 0.0256
Epoch 6/10
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 6.5957e-04 - root_mean_squared_error: 0.0257
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step
LSTM Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
37 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 69ms/step - loss: 0.0028 - root_mean_squared_error: 0.0490
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.5546e-04 - root_mean_squared_error: 0.0236
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.7256e-04 - root_mean_squared_error: 0.0239
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.2973e-04 - root_mean_squared_error: 0.0230
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.2949e-04 - root_mean_squared_error: 0.0230
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.5859e-04 - root_mean_squared_error: 0.0236
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.3078e-04 - root_mean_squared_error: 0.0230
Epoch 8/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
36 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 70ms/step - loss: 0.0057 - root_mean_squared_error: 0.0681
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.3125e-04 - root_mean_squared_error: 0.0230
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.3968e-04 - root_mean_squared_error: 0.0232
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.3277e-04 - root_mean_squared_error: 0.0231
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.5375e-04 - root_mean_squared_error: 0.0235
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.4925e-04 - root_mean_squared_error: 0.0234
Epoch 7/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.0770e-04 - root_mean_squared_error: 0.0225
Epoch 8/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
35 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 69ms/step - loss: 0.0037 - root_mean_squared_error: 0.0556
Epoch 2/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.6841e-04 - root_mean_squared_error: 0.0238
Epoch 3/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.4780e-04 - root_mean_squared_error: 0.0234
Epoch 4/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.5369e-04 - root_mean_squared_error: 0.0235
Epoch 5/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.4907e-04 - root_mean_squared_error: 0.0234
Epoch 6/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 68ms/step - loss: 5.3096e-04 - root_mean_squared_error: 0.0230
Epoch 7/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 5.4357e-04 - root_mean_squared_error: 0.0233
Epoch 8/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


RF Done
PCA Done
34 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 69ms/step - loss: 0.0051 - root_mean_squared_error: 0.0650
Epoch 2/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 6.6811e-04 - root_mean_squared_error: 0.0258
Epoch 3/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.6957e-04 - root_mean_squared_error: 0.0239
Epoch 4/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 7.3906e-04 - root_mean_squared_error: 0.0272
Epoch 5/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - loss: 0.0019 - root_mean_squared_error: 0.0383
Epoch 6/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 0.0026 - root_mean_squared_error: 0.0492
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step
LSTM Done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best XGBoost Parameters: {'subsample': 0.7, 'n_es

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
33 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 69ms/step - loss: 0.0065 - root_mean_squared_error: 0.0726
Epoch 2/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 71ms/step - loss: 5.6313e-04 - root_mean_squared_error: 0.0237
Epoch 3/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 70ms/step - loss: 5.3626e-04 - root_mean_squared_error: 0.0231
Epoch 4/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 70ms/step - loss: 5.2635e-04 - root_mean_squared_error: 0.0229
Epoch 5/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 71ms/step - loss: 5.5330e-04 - root_mean_squared_error: 0.0235
Epoch 6/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 71ms/step - loss: 5.6087e-04 - root_mean_squared_error: 0.0237
Epoch 7/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 70ms/step - loss: 5.7336e-04 - root_mean_squared_error: 0.0239
Epoch 8/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
32 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 70ms/step - loss: 0.0038 - root_mean_squared_error: 0.0568
Epoch 2/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 70ms/step - loss: 5.6720e-04 - root_mean_squared_error: 0.0238
Epoch 3/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 70ms/step - loss: 5.5839e-04 - root_mean_squared_error: 0.0236
Epoch 4/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 71ms/step - loss: 5.2050e-04 - root_mean_squared_error: 0.0228
Epoch 5/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 72ms/step - loss: 5.5785e-04 - root_mean_squared_error: 0.0236
Epoch 6/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 75ms/step - loss: 5.3938e-04 - root_mean_squared_error: 0.0232
Epoch 7/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 74ms/step - loss: 5.5843e-04 - root_mean_squared_error: 0.0236
Epoch 8/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
31 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2021 with hyperparameters of {'num_layers': 2, 'units': [96, 32], 'optimizer': 'Adam', 'drop_out': [0.1, 0.2], 'activation': ['linear', 'tanh'], 'lr': 0.008514542675036612}
Epoch 1/10


  super().__init__(**kwargs)


[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 71ms/step - loss: 0.0032 - root_mean_squared_error: 0.0518
Epoch 2/10
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 71ms/step - loss: 5.3031e-04 - root_mean_squared_error: 0.0230
Epoch 3/10
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 72ms/step - loss: 5.2981e-04 - root_mean_squared_error: 0.0230
Epoch 4/10
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 70ms/step - loss: 5.1816e-04 - root_mean_squared_error: 0.0228
Epoch 5/10
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 70ms/step - loss: 5.5294e-04 - root_mean_squared_error: 0.0235
Epoch 6/10
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 70ms/step - loss: 5.4173e-04 - root_mean_squared_error: 0.0233
Epoch 7/10
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 71ms/step - loss: 5.3465e-04 - root_mean_squared_error: 0.0231
Epoch 8/10
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2021 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
30 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}


  super().__init__(**kwargs)


Epoch 1/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 38ms/step - loss: 0.0017 - root_mean_squared_error: 0.0402
Epoch 2/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 8.6942e-04 - root_mean_squared_error: 0.0295
Epoch 3/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.6399e-04 - root_mean_squared_error: 0.0276
Epoch 4/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.8825e-04 - root_mean_squared_error: 0.0281
Epoch 5/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.3270e-04 - root_mean_squared_error: 0.0271
Epoch 6/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.3094e-04 - root_mean_squared_error: 0.0270
Epoch 7/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.2131e-04 - root_mean_squared_error: 0.0269
Epoch 8/10
[1m94/94[0m [32m━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
29 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - loss: 9.1077e-04 - root_mean_squared_error: 0.0302
Epoch 2/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - loss: 7.6514e-04 - root_mean_squared_error: 0.0277
Epoch 3/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.3913e-04 - root_mean_squared_error: 0.0272
Epoch 4/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - loss: 7.0439e-04 - root_mean_squared_error: 0.0265
Epoch 5/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - loss: 6.8566e-04 - root_mean_squared_error: 0.0262
Epoch 6/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - loss: 6.6650e-04 - root_mean_squared_error: 0.0258
Epoch 7/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - loss: 6.8898e-04 - root_mean_squared_error: 0.0262
Epoch 8/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
28 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 42ms/step - loss: 8.8803e-04 - root_mean_squared_error: 0.0298
Epoch 2/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 58ms/step - loss: 8.0077e-04 - root_mean_squared_error: 0.0283
Epoch 3/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m814s[0m 9s/step - loss: 7.5485e-04 - root_mean_squared_error: 0.0275
Epoch 4/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 55ms/step - loss: 7.3808e-04 - root_mean_squared_error: 0.0272
Epoch 5/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 50ms/step - loss: 7.2371e-04 - root_mean_squared_error: 0.0269
Epoch 6/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 6.9602e-04 - root_mean_squared_error: 0.0264
Epoch 7/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - loss: 6.6057e-04 - root_mean_squared_error: 0.0257
Epoch 8/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━



Best XGBoost Parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
XGBoost done
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best LGBM Parameters: {'subsample': 0.9, 'num_leaves': 31, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 1.0}


  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
27 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 47ms/step - loss: 8.3985e-04 - root_mean_squared_error: 0.0290
Epoch 2/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 7.6321e-04 - root_mean_squared_error: 0.0276
Epoch 3/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 6.9873e-04 - root_mean_squared_error: 0.0264
Epoch 4/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 7.2092e-04 - root_mean_squared_error: 0.0268
Epoch 5/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 6.8386e-04 - root_mean_squared_error: 0.0261
Epoch 6/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - loss: 6.8664e-04 - root_mean_squared_error: 0.0262
Epoch 7/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 6.9968e-04 - root_mean_squared_error: 0.0264
Epoch 8/10
[1m95/95[0m [32m━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
26 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 47ms/step - loss: 0.0019 - root_mean_squared_error: 0.0431
Epoch 2/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - loss: 8.6198e-04 - root_mean_squared_error: 0.0294
Epoch 3/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 7.9063e-04 - root_mean_squared_error: 0.0281
Epoch 4/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - loss: 7.7082e-04 - root_mean_squared_error: 0.0278
Epoch 5/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 7.3033e-04 - root_mean_squared_error: 0.0270
Epoch 6/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - loss: 7.0816e-04 - root_mean_squared_error: 0.0266
Epoch 7/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 7.1172e-04 - root_mean_squared_error: 0.0267
Epoch 8/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
25 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}


  super().__init__(**kwargs)


Epoch 1/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 47ms/step - loss: 0.0011 - root_mean_squared_error: 0.0331
Epoch 2/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 7.1255e-04 - root_mean_squared_error: 0.0267
Epoch 3/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 48ms/step - loss: 6.6025e-04 - root_mean_squared_error: 0.0257
Epoch 4/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 6.8442e-04 - root_mean_squared_error: 0.0262
Epoch 5/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 48ms/step - loss: 6.5586e-04 - root_mean_squared_error: 0.0256
Epoch 6/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 6.7847e-04 - root_mean_squared_error: 0.0260
Epoch 7/10
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - loss: 6.3480e-04 - root_mean_squared_error: 0.0252
Epoch 8/10
[1m96/96[0m [32m━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
24 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 46ms/step - loss: 0.0014 - root_mean_squared_error: 0.0369
Epoch 2/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 7.0201e-04 - root_mean_squared_error: 0.0265
Epoch 3/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 6.9728e-04 - root_mean_squared_error: 0.0264
Epoch 4/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 6.7850e-04 - root_mean_squared_error: 0.0260
Epoch 5/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 45ms/step - loss: 6.6989e-04 - root_mean_squared_error: 0.0259
Epoch 6/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 6.7821e-04 - root_mean_squared_error: 0.0260
Epoch 7/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 6.3531e-04 - root_mean_squared_error: 0.0252
Epoch 8/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
23 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 40ms/step - loss: 0.0011 - root_mean_squared_error: 0.0328
Epoch 2/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - loss: 8.4868e-04 - root_mean_squared_error: 0.0291
Epoch 3/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 7.8284e-04 - root_mean_squared_error: 0.0280
Epoch 4/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - loss: 7.2496e-04 - root_mean_squared_error: 0.0269
Epoch 5/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 7.2206e-04 - root_mean_squared_error: 0.0269
Epoch 6/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 6.9315e-04 - root_mean_squared_error: 0.0263
Epoch 7/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 44ms/step - loss: 6.5765e-04 - root_mean_squared_error: 0.0256
Epoch 8/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
22 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 38ms/step - loss: 8.9128e-04 - root_mean_squared_error: 0.0298
Epoch 2/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 7.5552e-04 - root_mean_squared_error: 0.0275
Epoch 3/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 7.2739e-04 - root_mean_squared_error: 0.0270
Epoch 4/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.5494e-04 - root_mean_squared_error: 0.0275
Epoch 5/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 6.9644e-04 - root_mean_squared_error: 0.0264
Epoch 6/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 7.0928e-04 - root_mean_squared_error: 0.0266
Epoch 7/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 6.4528e-04 - root_mean_squared_error: 0.0254
Epoch 8/10
[1m98/98[0m [32m━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
21 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 38ms/step - loss: 9.0415e-04 - root_mean_squared_error: 0.0300
Epoch 2/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 7.1719e-04 - root_mean_squared_error: 0.0268
Epoch 3/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 6.6086e-04 - root_mean_squared_error: 0.0257
Epoch 4/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 6.6361e-04 - root_mean_squared_error: 0.0258
Epoch 5/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 6.6581e-04 - root_mean_squared_error: 0.0258
Epoch 6/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 6.2906e-04 - root_mean_squared_error: 0.0251
Epoch 7/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - loss: 6.2704e-04 - root_mean_squared_error: 0.0250
Epoch 8/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
20 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 40ms/step - loss: 0.0013 - root_mean_squared_error: 0.0359
Epoch 2/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 7.8370e-04 - root_mean_squared_error: 0.0280
Epoch 3/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 7.3068e-04 - root_mean_squared_error: 0.0270
Epoch 4/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 7.1412e-04 - root_mean_squared_error: 0.0267
Epoch 5/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 6.6873e-04 - root_mean_squared_error: 0.0259
Epoch 6/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 6.7821e-04 - root_mean_squared_error: 0.0260
Epoch 7/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 6.6520e-04 - root_mean_squared_error: 0.0258
Epoch 8/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
19 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2022 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adagrad', 'drop_out': [0.2, 0.1], 'activation': ['linear', 'tanh'], 'lr': 0.06210952184736157}
Epoch 1/10


  super().__init__(**kwargs)


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 39ms/step - loss: 0.0047 - root_mean_squared_error: 0.0657
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 7.8220e-04 - root_mean_squared_error: 0.0280
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 7.4196e-04 - root_mean_squared_error: 0.0272
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 6.8466e-04 - root_mean_squared_error: 0.0262
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 6.8313e-04 - root_mean_squared_error: 0.0261
Epoch 6/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - loss: 6.7085e-04 - root_mean_squared_error: 0.0259
Epoch 7/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - loss: 6.7488e-04 - root_mean_squared_error: 0.0260
Epoch 8/10
[1m100/100[0m [32m━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2022 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
RF Done
PCA Done
18 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 139ms/step - loss: 0.0119 - root_mean_squared_error: 0.1001
Epoch 2/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 138ms/step - loss: 5.5516e-04 - root_mean_squared_error: 0.0236
Epoch 3/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 138ms/step - loss: 5.0855e-04 - root_mean_squared_error: 0.0225
Epoch 4/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 139ms/step - loss: 5.1951e-04 - root_mean_squared_error: 0.0228
Epoch 5/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 141ms/step - loss: 5.3800e-04 - root_mean_squared_error: 0.0232
Epoch 6/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 5.0061e-04 - root_mean_squared_error: 0.0224
Epoch 7/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 4.9555e-04 - root_mean_squared_error: 0.0223
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
17 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 139ms/step - loss: 0.0072 - root_mean_squared_error: 0.0761
Epoch 2/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 142ms/step - loss: 5.2773e-04 - root_mean_squared_error: 0.0230
Epoch 3/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 4.6431e-04 - root_mean_squared_error: 0.0215
Epoch 4/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 5.1684e-04 - root_mean_squared_error: 0.0227
Epoch 5/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 4.8882e-04 - root_mean_squared_error: 0.0221
Epoch 6/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 141ms/step - loss: 4.4692e-04 - root_mean_squared_error: 0.0211
Epoch 7/10
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 4.3517e-04 - root_mean_squared_error: 0.0208
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
16 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 140ms/step - loss: 0.0146 - root_mean_squared_error: 0.1097
Epoch 2/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 141ms/step - loss: 5.7691e-04 - root_mean_squared_error: 0.0240
Epoch 3/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 5.5523e-04 - root_mean_squared_error: 0.0235
Epoch 4/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 141ms/step - loss: 5.3537e-04 - root_mean_squared_error: 0.0231
Epoch 5/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 141ms/step - loss: 5.2300e-04 - root_mean_squared_error: 0.0229
Epoch 6/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 140ms/step - loss: 5.1973e-04 - root_mean_squared_error: 0.0228
Epoch 7/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 141ms/step - loss: 4.9432e-04 - root_mean_squared_error: 0.0222
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
15 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 159ms/step - loss: 0.0093 - root_mean_squared_error: 0.0855
Epoch 2/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 159ms/step - loss: 5.3046e-04 - root_mean_squared_error: 0.0230
Epoch 3/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 160ms/step - loss: 5.3546e-04 - root_mean_squared_error: 0.0231
Epoch 4/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 159ms/step - loss: 5.2653e-04 - root_mean_squared_error: 0.0229
Epoch 5/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 160ms/step - loss: 5.1471e-04 - root_mean_squared_error: 0.0227
Epoch 6/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 160ms/step - loss: 4.8286e-04 - root_mean_squared_error: 0.0220
Epoch 7/10
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 160ms/step - loss: 4.9576e-04 - root_mean_squared_error: 0.0223
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
14 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 141ms/step - loss: 0.0080 - root_mean_squared_error: 0.0804
Epoch 2/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 5.0402e-04 - root_mean_squared_error: 0.0224
Epoch 3/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 4.7788e-04 - root_mean_squared_error: 0.0219
Epoch 4/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 4.7776e-04 - root_mean_squared_error: 0.0219
Epoch 5/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 4.9687e-04 - root_mean_squared_error: 0.0223
Epoch 6/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 4.5277e-04 - root_mean_squared_error: 0.0213
Epoch 7/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.2882e-04 - root_mean_squared_error: 0.0207
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
13 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 160ms/step - loss: 0.0210 - root_mean_squared_error: 0.1306
Epoch 2/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 160ms/step - loss: 5.4269e-04 - root_mean_squared_error: 0.0233
Epoch 3/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 154ms/step - loss: 5.0105e-04 - root_mean_squared_error: 0.0224
Epoch 4/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 5.1220e-04 - root_mean_squared_error: 0.0226
Epoch 5/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 5.0828e-04 - root_mean_squared_error: 0.0225
Epoch 6/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 5.0421e-04 - root_mean_squared_error: 0.0225
Epoch 7/10
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 4.8480e-04 - root_mean_squared_error: 0.0220
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
12 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 153ms/step - loss: 0.0118 - root_mean_squared_error: 0.0961
Epoch 2/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 5.3778e-04 - root_mean_squared_error: 0.0232
Epoch 3/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 5.2312e-04 - root_mean_squared_error: 0.0229
Epoch 4/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 4.7307e-04 - root_mean_squared_error: 0.0217
Epoch 5/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 4.9464e-04 - root_mean_squared_error: 0.0222
Epoch 6/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 4.9623e-04 - root_mean_squared_error: 0.0223
Epoch 7/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 5.1038e-04 - root_mean_squared_error: 0.0226
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
11 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 141ms/step - loss: 0.0065 - root_mean_squared_error: 0.0732
Epoch 2/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 4.8536e-04 - root_mean_squared_error: 0.0220
Epoch 3/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 4.7460e-04 - root_mean_squared_error: 0.0218
Epoch 4/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 4.7910e-04 - root_mean_squared_error: 0.0219
Epoch 5/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 4.5665e-04 - root_mean_squared_error: 0.0214
Epoch 6/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 4.7826e-04 - root_mean_squared_error: 0.0219
Epoch 7/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 4.5600e-04 - root_mean_squared_error: 0.0213
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
10 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 141ms/step - loss: 0.0101 - root_mean_squared_error: 0.0931
Epoch 2/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 159ms/step - loss: 5.2461e-04 - root_mean_squared_error: 0.0229
Epoch 3/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.9804e-04 - root_mean_squared_error: 0.0223
Epoch 4/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 4.8045e-04 - root_mean_squared_error: 0.0219
Epoch 5/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 144ms/step - loss: 5.2464e-04 - root_mean_squared_error: 0.0229
Epoch 6/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 4.8186e-04 - root_mean_squared_error: 0.0219
Epoch 7/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 5.0741e-04 - root_mean_squared_error: 0.0225
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
9 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 140ms/step - loss: 0.0073 - root_mean_squared_error: 0.0762
Epoch 2/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 139ms/step - loss: 5.4634e-04 - root_mean_squared_error: 0.0234
Epoch 3/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.8865e-04 - root_mean_squared_error: 0.0221
Epoch 4/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.7803e-04 - root_mean_squared_error: 0.0219
Epoch 5/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.4368e-04 - root_mean_squared_error: 0.0211
Epoch 6/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.5285e-04 - root_mean_squared_error: 0.0213
Epoch 7/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - loss: 4.6585e-04 - root_mean_squared_error: 0.0216
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
8 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 140ms/step - loss: 0.0110 - root_mean_squared_error: 0.0962
Epoch 2/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 137ms/step - loss: 4.8674e-04 - root_mean_squared_error: 0.0221
Epoch 3/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 137ms/step - loss: 5.0633e-04 - root_mean_squared_error: 0.0225
Epoch 4/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 137ms/step - loss: 4.7292e-04 - root_mean_squared_error: 0.0217
Epoch 5/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.5818e-04 - root_mean_squared_error: 0.0214
Epoch 6/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 139ms/step - loss: 4.5847e-04 - root_mean_squared_error: 0.0214
Epoch 7/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.6793e-04 - root_mean_squared_error: 0.0216
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
7 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2023 with hyperparameters of {'num_layers': 4, 'units': [128, 32, 32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1, 0.1, 0.2], 'activation': ['tanh', 'tanh', 'linear', 'linear'], 'lr': 0.019671144721368838}


  super().__init__(**kwargs)


Epoch 1/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 139ms/step - loss: 0.0104 - root_mean_squared_error: 0.0922
Epoch 2/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 4.9044e-04 - root_mean_squared_error: 0.0221
Epoch 3/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 5.0453e-04 - root_mean_squared_error: 0.0225
Epoch 4/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 140ms/step - loss: 5.1552e-04 - root_mean_squared_error: 0.0227
Epoch 5/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 5.0276e-04 - root_mean_squared_error: 0.0224
Epoch 6/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - loss: 4.7667e-04 - root_mean_squared_error: 0.0218
Epoch 7/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 158ms/step - loss: 4.8313e-04 - root_mean_squared_error: 0.0220
Epoch 8/10

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2023 with hyperparameters of {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
RF Done
PCA Done
6 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2024 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.004409350423921994}
Epoch 1/10


  super().__init__(**kwargs)


[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - loss: 0.0010 - root_mean_squared_error: 0.0311
Epoch 2/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 5.2834e-04 - root_mean_squared_error: 0.0230
Epoch 3/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 4.8390e-04 - root_mean_squared_error: 0.0220
Epoch 4/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 5.3116e-04 - root_mean_squared_error: 0.0230
Epoch 5/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 5.2657e-04 - root_mean_squared_error: 0.0229
Epoch 6/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 5.1020e-04 - root_mean_squared_error: 0.0226
Epoch 7/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 4.9709e-04 - root_mean_squared_error: 0.0223
Epoch 8/10
[1m106/106[0m [32m━━━

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
RF Done
PCA Done
5 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2024 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.004409350423921994}
Epoch 1/10


  super().__init__(**kwargs)


[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - loss: 9.3224e-04 - root_mean_squared_error: 0.0300
Epoch 2/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 4.9680e-04 - root_mean_squared_error: 0.0223
Epoch 3/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 5.0874e-04 - root_mean_squared_error: 0.0225
Epoch 4/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 4.9961e-04 - root_mean_squared_error: 0.0223
Epoch 5/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - loss: 5.0867e-04 - root_mean_squared_error: 0.0225
Epoch 6/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 5.0503e-04 - root_mean_squared_error: 0.0225
Epoch 7/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 40ms/step - loss: 4.8068e-04 - root_mean_squared_error: 0.0219
Epoch 8/10
[1m106/106[0m [32

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
RF Done
PCA Done
4 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2024 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.004409350423921994}
Epoch 1/10


  super().__init__(**kwargs)


[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 48ms/step - loss: 9.5213e-04 - root_mean_squared_error: 0.0303
Epoch 2/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 50ms/step - loss: 5.1206e-04 - root_mean_squared_error: 0.0226
Epoch 3/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 48ms/step - loss: 5.0250e-04 - root_mean_squared_error: 0.0224
Epoch 4/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 49ms/step - loss: 4.9898e-04 - root_mean_squared_error: 0.0223
Epoch 5/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 48ms/step - loss: 4.7743e-04 - root_mean_squared_error: 0.0218
Epoch 6/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 4.5726e-04 - root_mean_squared_error: 0.0214
Epoch 7/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 53ms/step - loss: 4.7525e-04 - root_mean_squared_error: 0.0218
Epoch 8/10
[1m106/106[0m [3

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
RF Done
PCA Done
3 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2024 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.004409350423921994}
Epoch 1/10


  super().__init__(**kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 45ms/step - loss: 8.0976e-04 - root_mean_squared_error: 0.0281
Epoch 2/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 5.0498e-04 - root_mean_squared_error: 0.0225
Epoch 3/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 4.9334e-04 - root_mean_squared_error: 0.0222
Epoch 4/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 4.9416e-04 - root_mean_squared_error: 0.0222
Epoch 5/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 4.7351e-04 - root_mean_squared_error: 0.0218
Epoch 6/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 4.6817e-04 - root_mean_squared_error: 0.0216
Epoch 7/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 4.9133e-04 - root_mean_squared_error: 0.0222
Epoch 8/10
[1m107/107[0m [3

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
RF Done
PCA Done
2 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2024 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.004409350423921994}
Epoch 1/10


  super().__init__(**kwargs)


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 46ms/step - loss: 9.5939e-04 - root_mean_squared_error: 0.0303
Epoch 2/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 49ms/step - loss: 5.1212e-04 - root_mean_squared_error: 0.0226
Epoch 3/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 5.1341e-04 - root_mean_squared_error: 0.0227
Epoch 4/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 4.8900e-04 - root_mean_squared_error: 0.0221
Epoch 5/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 4.7150e-04 - root_mean_squared_error: 0.0217
Epoch 6/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 5.0419e-04 - root_mean_squared_error: 0.0225
Epoch 7/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 4.8524e-04 - root_mean_squared_error: 0.0220
Epoch 8/10
[1m107/107[0m [3

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
RF Done
PCA Done
1 dates remaining


  y = column_or_1d(y, warn=True)


Lasso Done
Ridge Done
test_year is 2024 with hyperparameters of {'num_layers': 2, 'units': [32, 32], 'optimizer': 'Adam', 'drop_out': [0.2, 0.1], 'activation': ['tanh', 'tanh'], 'lr': 0.004409350423921994}


  super().__init__(**kwargs)


Epoch 1/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 46ms/step - loss: 8.4190e-04 - root_mean_squared_error: 0.0285
Epoch 2/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 5.2905e-04 - root_mean_squared_error: 0.0230
Epoch 3/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 5.1069e-04 - root_mean_squared_error: 0.0226
Epoch 4/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 5.0424e-04 - root_mean_squared_error: 0.0225
Epoch 5/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - loss: 5.0475e-04 - root_mean_squared_error: 0.0225
Epoch 6/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - loss: 4.8658e-04 - root_mean_squared_error: 0.0221
Epoch 7/10
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 4.8928e-04 - root_mean_squared_error: 0.0221
Epoch 8/10
[1m107/

  y = column_or_1d(y, warn=True)


LGBM done
test_year is 2024 with hyperparameters of {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
RF Done
PCA Done
0 dates remaining


In [12]:
y_tech.to_csv("tech_funds_forecast_tuned.csv")

Random Forest Hyperparameter Tuning (Healthcare)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
tscv = TimeSeriesSplit(n_splits = 5)

df['date'] = pd.to_datetime(df['date']) # converting to date format
df = df.sort_values(by='date')
df_factor = df.drop(columns=['mkt_return','mth_return','rf']) # remove irrelevant variables

# Creating Lagged and Stepped Datasets
X_dataset, y_dataset = create_stepped_dataset(create_lagged_dataset(df_factor, lag=1,target_var='rolling_alpha_5f', id = 'crsp_fundno'),step=1,target_var='rolling_alpha_5f', id = 'crsp_fundno_L1')
# return(X_dataset)
X_dataset = X_dataset.drop(columns=['crsp_fundno_L1'], errors='ignore')

list_of_dates = pd.to_datetime(X_dataset['date_L1'])
percentile_70 = list_of_dates.quantile(0.7) # 70-30 split

train_end = list_of_dates.loc[(list_of_dates - percentile_70).abs().idxmin()]
df_end = list_of_dates.max()

# Define hyperparameter grid
rf_param_grid = {
    'n_estimators': [100, 300, 500, 1000],  # Number of trees
    'max_depth': [10, 20, 30, None],  # Depth of trees
    'min_samples_split': [2, 5, 10],  # Minimum samples for split
    'min_samples_leaf': [1, 2, 4],  # Minimum samples per leaf
    'max_features': ['sqrt', 'log2', None]  # Number of features considered per split
}

def rf_tuner(X_train, y_train, rf_param_grid):
    # Initialize RandomForestRegressor
    rf_model = RandomForestRegressor(random_state=40, n_jobs=-1)

    # Use RandomizedSearchCV for efficiency
    rf_random_search = RandomizedSearchCV(
        estimator=rf_model,
        param_distributions=rf_param_grid,
        n_iter=20,  # Number of parameter settings tested
        cv=tscv,  # TimeSeriesSplit
        scoring='neg_mean_squared_error',
        verbose=2,
        random_state=42,
        n_jobs=-1
    )

    # Perform hyperparameter tuning
    rf_random_search.fit(X_train, y_train.values.ravel())

    # Best hyperparameters
    best_rf_params = rf_random_search.best_params_
    print(f'Best RF Parameters: {best_rf_params}')
    return(best_rf_params)

In [None]:
hyperparam_lst = []

# 2019
test_date = generate_next_date(list_of_dates, train_end)
X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
initial_best_rf_params = rf_tuner(X_train, y_train, rf_param_grid)
hyperparam_lst.append(initial_best_rf_params)
current_year = 2019

while pd.notna(test_date):
    test_date = generate_next_date(list_of_dates, train_end)
    if test_date.year != (current_year + 1): # Still same year: skip and repeat until the year is the next year
        train_end = test_date
        print(f"Train End is {train_end}, year is {train_end.year}, which is same as current year {current_year}. Repeating until following year is obtained")
        continue
    else:
        print(f"Test Date is {test_date}, year is {test_date.year}, which means it is being trained until the end of {current_year}. Conducting Tuning")
        # Process data for modeling
        X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
        best_rf_params = rf_tuner(X_train, y_train, rf_param_grid)
        hyperparam_lst.append(best_rf_params)
        train_end = test_date # after tuning, expand the training set
        current_year = current_year + 1

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best RF Parameters: {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
Train End is 2019-12-31 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Test Date is 2020-01-31 00:00:00, year is 2020, which means it is being trained until the end of 2019. Conducting Tuning
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best RF Parameters: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': None}
Train End is 2020-02-28 00:00:00, year is 2020, which is same as current year 2020. Repeating until following year is obtained
Train End is 2020-03-31 00:00:00, year is 2020, which is same as current year 2020. Repeating until following year is obtained
Train End is 2020-04-30 00:00:00, year is 2020, which is same as current year 2020. Repeating until following y

LSTM Hyperparam Tuning Functions

In [25]:
from tensorflow.keras.optimizers import Adam, Adagrad, Nadam
def build_lstm_model(hp):
    lstm_model = Sequential()
    # Select the number of LSTM layers (2, 3, or 4)
    num_layers = hp.Choice('num_layers', [2, 3, 4])
    # Choose optimizer
    optimizer_name = hp.Choice('optimizer', ['Adam', 'Adagrad'])
    optimizers = {
        "Adam": Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG')),
        "Adagrad": Adagrad(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG')),
        "Nadam": Nadam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG'))
    }
    # Layer 1
    first_layer_units = hp.Int('lstm_units_1', min_value=32, max_value=128, step=32)
    lstm_model.add(LSTM(
        units=first_layer_units,
        return_sequences=True,
        input_shape=(X_train_lstm.shape[1], 1),
        activation=hp.Choice(f'activation_1', ['tanh', 'sigmoid', 'linear'])
    ))
    lstm_model.add(Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.3, step=0.1)))
    
    # Add remaining layers with non-increasing units
    previous_units = first_layer_units  # Track previous layer units
    
    for i in range(1, num_layers):
        available_units = [u for u in [32, 64, 96, 128] if u <= previous_units]  # Only allow non-increasing sizes
        current_units = hp.Choice(f'lstm_units_{i+1}', available_units)

        lstm_model.add(LSTM(
            units=current_units,
            return_sequences=True if i < num_layers - 1 else False,  
            activation=hp.Choice(f'activation_{i+1}', ['tanh', 'sigmoid', 'linear'])
        ))
        lstm_model.add(Dropout(hp.Float(f'dropout_{i+1}', min_value=0.1, max_value=0.3, step=0.1)))
        previous_units = current_units  # Update tracking variable
    
    # Output Layer
    lstm_model.add(Dense(units=1))
    
    # Compile the model
    lstm_model.compile(
        optimizer=optimizers[optimizer_name],
        loss='mean_squared_error',
        metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )
    return lstm_model

def tune_lstm(X_train_lstm, y_train_lstm, year, save_path="best_lstm_model"):
    tuner = kt.BayesianOptimization(
        build_lstm_model,
        objective='val_loss',
        max_trials=50,
        executions_per_trial=1,
        directory=f'lstm_tuning_{year}_tech',
        project_name='fund_forecasting'
    )

    # Perform the search with batch_size as a hyperparameter to tune
    tuner.search(
        X_train_lstm, y_train_lstm,
        epochs=10,
        validation_split=0.2,
        batch_size=512,
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]
    )

    # Get the best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    
    # Print the best hyperparameters
    print("Best Hyperparameters:", best_hps.values)

    return best_hps

LSTM Hyperparameter Tuning for 2019 - 2024 --> Yearly Hyperparameter Tuning

In [32]:
df_tech['date'] = pd.to_datetime(df_tech['date']) # converting to date format
df_tech = df_tech.sort_values(by='date')
df_factor = df_tech.drop(columns=['mkt_return','mth_return','rf','rolling_sharpe', 'rolling_alpha_3f', 'rolling_alpha_4f'])

# Creating Lagged and Stepped Datasets
X_dataset, y_dataset = create_stepped_dataset(create_lagged_dataset(df_factor, lag=1,target_var='rolling_alpha_5f', id = 'crsp_fundno'),step=1,target_var='rolling_alpha_5f', id = 'crsp_fundno_L1')
# return(X_dataset)
X_dataset = X_dataset.drop(columns=['crsp_fundno_L1'], errors='ignore')

list_of_dates = pd.to_datetime(X_dataset['date_L1'])
percentile_70 = list_of_dates.quantile(0.7) # 70-30 split

train_end = list_of_dates.loc[(list_of_dates - percentile_70).abs().idxmin()]
df_end = list_of_dates.max()

hyperparam_lst = []

# 2019
current_year = 2019
test_date = generate_next_date(list_of_dates, train_end)
X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
X_train_lstm = np.array(X_train).astype(np.float32)
X_test_lstm = np.array(X_test).astype(np.float32)
y_train_lstm = np.array(y_train).astype(np.float32)
best_model_params = tune_lstm(X_train_lstm, y_train_lstm, year = current_year)
hyperparam_lst.append(best_model_params)
# current_year = 2020

while pd.notna(test_date):
    test_date = generate_next_date(list_of_dates, train_end)
    if test_date.year != (current_year + 1): # Still same year: skip and repeat until the year is the next year
        train_end = test_date
        print(f"Train End is {train_end}, year is {train_end.year}, which is same as current year {current_year}. Repeating until following year is obtained")
        continue
    else:
        print(f"Test Date is {test_date}, year is {test_date.year}, which means it is being trained until the end of {current_year}. Conducting Tuning")
        # Process data for modeling
        X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
        X_train_lstm = np.array(X_train).astype(np.float32)
        X_test_lstm = np.array(X_test).astype(np.float32)
        y_train_lstm = np.array(y_train).astype(np.float32)
        best_model_params = tune_lstm(X_train_lstm, y_train_lstm, year = current_year+1)
        hyperparam_lst.append(best_model_params)
        train_end = test_date # after tuning, expand the training set
        current_year = current_year + 1

Reloading Tuner from lstm_tuning_2019_tech\fund_forecasting\tuner0.json
Best Hyperparameters: {'num_layers': 2, 'optimizer': 'Adam', 'learning_rate': 0.04130929884537057, 'lstm_units_1': 32, 'activation_1': 'linear', 'dropout_1': 0.1, 'lstm_units_2': 32, 'activation_2': 'tanh', 'dropout_2': 0.1, 'lstm_units_3': 32, 'activation_3': 'tanh', 'dropout_3': 0.1, 'lstm_units_4': 32, 'activation_4': 'sigmoid', 'dropout_4': 0.2}
Train End is 2019-02-28 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-03-29 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-04-30 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-05-31 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-06-28 00:00:00, year is 2019, which is same as cur

In [29]:
from sklearn.model_selection import RandomizedSearchCV
tscv = TimeSeriesSplit(n_splits = 5)

df_tech['date'] = pd.to_datetime(df_tech['date']) # converting to date format
df_tech = df_tech.sort_values(by='date')
df_factor = df_tech.drop(columns=['mkt_return','mth_return','rf','rolling_sharpe', 'rolling_alpha_3f', 'rolling_alpha_4f'])

# Creating Lagged and Stepped Datasets
X_dataset, y_dataset = create_stepped_dataset(create_lagged_dataset(df_factor, lag=1,target_var='rolling_alpha_5f', id = 'crsp_fundno'),step=1,target_var='rolling_alpha_5f', id = 'crsp_fundno_L1')
# return(X_dataset)
X_dataset = X_dataset.drop(columns=['crsp_fundno_L1'], errors='ignore')

list_of_dates = pd.to_datetime(X_dataset['date_L1'])
percentile_70 = list_of_dates.quantile(0.7) # 70-30 split

train_end = list_of_dates.loc[(list_of_dates - percentile_70).abs().idxmin()]
df_end = list_of_dates.max()

# Define hyperparameter grid
rf_param_grid = {
    'n_estimators': [100, 300, 500, 1000],  # Number of trees
    'max_depth': [10, 20, 30, None],  # Depth of trees
    'min_samples_split': [2, 5, 10],  # Minimum samples for split
    'min_samples_leaf': [1, 2, 4],  # Minimum samples per leaf
    'max_features': ['sqrt', 'log2', None]  # Number of features considered per split
}

def rf_tuner(X_train, y_train, rf_param_grid):
    # Initialize RandomForestRegressor
    rf_model = RandomForestRegressor(random_state=40, n_jobs=-1)

    # Use RandomizedSearchCV for efficiency
    rf_random_search = RandomizedSearchCV(
        estimator=rf_model,
        param_distributions=rf_param_grid,
        n_iter=20,  # Number of parameter settings tested
        cv=tscv,  # TimeSeriesSplit
        scoring='neg_mean_squared_error',
        verbose=2,
        random_state=42,
        n_jobs=-1
    )

    # Perform hyperparameter tuning
    rf_random_search.fit(X_train, y_train.values.ravel())

    # Best hyperparameters
    best_rf_params = rf_random_search.best_params_
    print(f'Best RF Parameters: {best_rf_params}')
    return(best_rf_params)

In [30]:
hyperparam_lst_rf = []

# 2019
test_date = generate_next_date(list_of_dates, train_end)
X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
initial_best_rf_params = rf_tuner(X_train, y_train, rf_param_grid)
hyperparam_lst_rf.append(initial_best_rf_params)
current_year = 2019

while pd.notna(test_date):
    test_date = generate_next_date(list_of_dates, train_end)
    if test_date.year != (current_year + 1): # Still same year: skip and repeat until the year is the next year
        train_end = test_date
        print(f"Train End is {train_end}, year is {train_end.year}, which is same as current year {current_year}. Repeating until following year is obtained")
        continue
    else:
        print(f"Test Date is {test_date}, year is {test_date.year}, which means it is being trained until the end of {current_year}. Conducting Tuning")
        # Process data for modeling
        X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
        best_rf_params = rf_tuner(X_train, y_train, rf_param_grid)
        hyperparam_lst_rf.append(best_rf_params)
        train_end = test_date # after tuning, expand the training set
        current_year = current_year + 1

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best RF Parameters: {'n_estimators': 300, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 20}
Train End is 2019-02-28 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-03-29 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-04-30 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-05-31 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-06-28 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-07-31 00:00:00, year is 2019, which is same as current year 2019. Repeating until following year is obtained
Train End is 2019-08-30 00:00:00, year is 2019

In [31]:
hyperparam_lst_rf

[{'n_estimators': 300,
  'min_samples_split': 5,
  'min_samples_leaf': 4,
  'max_features': 'log2',
  'max_depth': 20},
 {'n_estimators': 300,
  'min_samples_split': 5,
  'min_samples_leaf': 4,
  'max_features': 'log2',
  'max_depth': 20},
 {'n_estimators': 300,
  'min_samples_split': 5,
  'min_samples_leaf': 4,
  'max_features': 'log2',
  'max_depth': 20},
 {'n_estimators': 100,
  'min_samples_split': 10,
  'min_samples_leaf': 4,
  'max_features': 'log2',
  'max_depth': None},
 {'n_estimators': 300,
  'min_samples_split': 5,
  'min_samples_leaf': 4,
  'max_features': 'log2',
  'max_depth': 20},
 {'n_estimators': 100,
  'min_samples_split': 5,
  'min_samples_leaf': 4,
  'max_features': 'log2',
  'max_depth': 30}]

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam, Adagrad, Nadam
import keras_tuner as kt
tf.compat.v1.enable_eager_execution()
# Convert date column to datetime and sort
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(by='date')

# Remove irrelevant variables
df_factor = df.drop(columns=['mkt_return', 'mth_return', 'rf'], errors='ignore')

# Create lagged and stepped datasets
X_dataset, y_dataset = create_stepped_dataset(
    create_lagged_dataset(df_factor, lag=1, target_var='rolling_alpha_5f', id='crsp_fundno'),
    step=1,
    target_var='rolling_alpha_5f',
    id='crsp_fundno_L1'
)

# Drop fund ID column (if present)
X_dataset = X_dataset.drop(columns=['crsp_fundno_L1'], errors='ignore')

# Define train-test split based on date
list_of_dates = pd.to_datetime(X_dataset['date_L1'])
percentile_70 = list_of_dates.quantile(0.7)

train_end = list_of_dates.loc[(list_of_dates - percentile_70).abs().idxmin()]
df_end = list_of_dates.max()

hyperparam_lst = []

# Start from 2019
current_year = 2018
test_date = generate_next_date(list_of_dates, train_end)
pretrained_model = None

# ---- Function to Build LSTM Model ----
def build_lstm_model(hp):
    lstm_model = Sequential()
    
    num_layers = hp.Choice('num_layers', [2, 3, 4])  # Choose number of LSTM layers
    optimizer_name = hp.Choice('optimizer', ['Adam', 'Adagrad', 'Nadam'])  # Choose optimizer

    optimizers = {
        "Adam": Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG')),
        "Adagrad": Adagrad(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG')),
        "Nadam": Nadam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG'))
    }
    
    first_layer_units = hp.Int('lstm_units_1', min_value=32, max_value=128, step=32)
    lstm_model.add(LSTM(
        units=first_layer_units,
        return_sequences=True,
        input_shape=(X_train_lstm.shape[1], 1),
        activation=hp.Choice('activation_1', ['tanh', 'sigmoid', 'linear'])
    ))
    lstm_model.add(Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.3, step=0.1)))

    previous_units = first_layer_units

    for i in range(1, num_layers):
        available_units = [u for u in [32, 64, 96, 128] if u <= previous_units]
        current_units = hp.Choice(f'lstm_units_{i+1}', available_units)

        lstm_model.add(LSTM(
            units=current_units,
            return_sequences=True if i < num_layers - 1 else False,
            activation=hp.Choice(f'activation_{i+1}', ['tanh', 'sigmoid', 'linear'])
        ))
        lstm_model.add(Dropout(hp.Float(f'dropout_{i+1}', min_value=0.1, max_value=0.3, step=0.1)))
        previous_units = current_units  

    # Output layer
    lstm_model.add(Dense(units=1))

    # Compile model
    lstm_model.compile(
        optimizer=optimizers[optimizer_name],
        loss='mean_squared_error',
        metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )
    return lstm_model

# ---- Function to Tune or Fine-Tune LSTM ----
def tune_lstm(X_train_lstm, y_train_lstm, year, pretrained_model=None, save_path="best_lstm_model"):
    """Trains or fine-tunes an LSTM model with hyperparameter tuning."""
    
    model_path = f"{save_path}_{year}.keras"
    
    if pretrained_model and os.path.exists(pretrained_model):
        print(f"Loading pretrained model from {pretrained_model}...")
        lstm_model = load_model(pretrained_model)
    else:
        print("No pretrained model found. Running full hyperparameter tuning...")
        tuner = kt.BayesianOptimization(
            build_lstm_model,
            objective='val_loss',
            max_trials=50,
            executions_per_trial=1,
            directory=f'lstm_tuning_{year}',
            project_name='fund_forecasting'
        )

        tuner.search(
            X_train_lstm, y_train_lstm,
            epochs=50,
            validation_split=0.2,
            batch_size=128,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]
        )

        best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
        print(f"Best Hyperparameters for {year}: {best_hps.values}")
        lstm_model = tuner.hypermodel.build(best_hps)

    # Train (or fine-tune) the model
    lstm_model.fit(
        X_train_lstm, y_train_lstm,
        epochs=20,
        validation_split=0.2,
        batch_size=128,
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]
    )

    # Save model for next year's training
    lstm_model.save(model_path)
    print(f"Model saved as {model_path}")
    
    return model_path  # Return saved model path

# ---- Loop Through Years for Transfer Learning ----
while pd.notna(test_date):
    test_date = generate_next_date(list_of_dates, train_end)
    
    if test_date.year != (current_year + 1):
        train_end = test_date
        print(f"Train End: {train_end}, same year {current_year}. Repeating until next year.")
        continue

    print(f"\nTraining {current_year} model until {test_date}. Conducting hyperparameter tuning...")
    
    # Prepare data for training
    X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
    X_train_lstm = np.array(X_train).astype(np.float32)
    X_test_lstm = np.array(X_test).astype(np.float32)
    y_train_lstm = np.array(y_train).astype(np.float32)

    # Train or fine-tune model
    pretrained_model = tune_lstm(X_train_lstm, y_train_lstm, year=current_year+1, pretrained_model=pretrained_model)
    hyperparam_lst.append(pretrained_model)

    # Update year and training end date
    train_end = test_date
    current_year += 1


Training 2018 model until 2019-12-31 00:00:00. Conducting hyperparameter tuning...
No pretrained model found. Running full hyperparameter tuning...
Reloading Tuner from lstm_tuning_2019\fund_forecasting\tuner0.json
Best Hyperparameters for 2019: {'num_layers': 4, 'optimizer': 'Nadam', 'learning_rate': 0.007390757774868635, 'lstm_units_1': 32, 'activation_1': 'linear', 'dropout_1': 0.1, 'lstm_units_2': 32, 'activation_2': 'tanh', 'dropout_2': 0.2, 'lstm_units_3': 32, 'activation_3': 'sigmoid', 'dropout_3': 0.1, 'lstm_units_4': 32, 'activation_4': 'tanh', 'dropout_4': 0.1}


  super().__init__(**kwargs)


Epoch 1/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 116ms/step - loss: 0.0245 - root_mean_squared_error: 0.1340 - val_loss: 2.4114e-04 - val_root_mean_squared_error: 0.0155
Epoch 2/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 106ms/step - loss: 5.5111e-04 - root_mean_squared_error: 0.0235 - val_loss: 2.4324e-04 - val_root_mean_squared_error: 0.0156
Epoch 3/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 104ms/step - loss: 5.2553e-04 - root_mean_squared_error: 0.0229 - val_loss: 2.5315e-04 - val_root_mean_squared_error: 0.0159
Epoch 4/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 104ms/step - loss: 4.9586e-04 - root_mean_squared_error: 0.0223 - val_loss: 2.3901e-04 - val_root_mean_squared_error: 0.0155
Epoch 5/10
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 106ms/step - loss: 4.8268e-04 - root_mean_squared_error: 0.0220 - val_loss: 2.5097e-04 - val_root_mean_squared_err

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam, Adagrad, Nadam
import keras_tuner as kt
tf.compat.v1.enable_eager_execution()
# Convert date column to datetime and sort
df_tech['date'] = pd.to_datetime(df_tech['date'])
df_tech = df_tech.sort_values(by='date')

# Remove irrelevant variables
df_factor = df_tech.drop(columns=['mkt_return', 'mth_return', 'rf'], errors='ignore')

# Create lagged and stepped datasets
X_dataset, y_dataset = create_stepped_dataset(
    create_lagged_dataset(df_factor, lag=1, target_var='rolling_alpha_5f', id='crsp_fundno'),
    step=1,
    target_var='rolling_alpha_5f',
    id='crsp_fundno_L1'
)

# Drop fund ID column (if present)
X_dataset = X_dataset.drop(columns=['crsp_fundno_L1'], errors='ignore')

# Define train-test split based on date
list_of_dates = pd.to_datetime(X_dataset['date_L1'])
percentile_70 = list_of_dates.quantile(0.7)

train_end = list_of_dates.loc[(list_of_dates - percentile_70).abs().idxmin()]
df_end = list_of_dates.max()

hyperparam_lst = []

# Start from 2019
current_year = 2018
test_date = generate_next_date(list_of_dates, train_end)
pretrained_model = None

# ---- Function to Build LSTM Model ----
def build_lstm_model(hp):
    lstm_model = Sequential()
    
    num_layers = hp.Choice('num_layers', [2, 3, 4])  # Choose number of LSTM layers
    optimizer_name = hp.Choice('optimizer', ['Adam', 'Adagrad', 'Nadam'])  # Choose optimizer

    optimizers = {
        "Adam": Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG')),
        "Adagrad": Adagrad(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG')),
        "Nadam": Nadam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-1, sampling='LOG'))
    }
    
    first_layer_units = hp.Int('lstm_units_1', min_value=32, max_value=128, step=32)
    lstm_model.add(LSTM(
        units=first_layer_units,
        return_sequences=True,
        input_shape=(X_train_lstm.shape[1], 1),
        activation=hp.Choice('activation_1', ['tanh', 'sigmoid', 'linear'])
    ))
    lstm_model.add(Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.3, step=0.1)))

    previous_units = first_layer_units

    for i in range(1, num_layers):
        available_units = [u for u in [32, 64, 96, 128] if u <= previous_units]
        current_units = hp.Choice(f'lstm_units_{i+1}', available_units)

        lstm_model.add(LSTM(
            units=current_units,
            return_sequences=True if i < num_layers - 1 else False,
            activation=hp.Choice(f'activation_{i+1}', ['tanh', 'sigmoid', 'linear'])
        ))
        lstm_model.add(Dropout(hp.Float(f'dropout_{i+1}', min_value=0.1, max_value=0.3, step=0.1)))
        previous_units = current_units  

    # Output layer
    lstm_model.add(Dense(units=1))

    # Compile model
    lstm_model.compile(
        optimizer=optimizers[optimizer_name],
        loss='mean_squared_error',
        metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )
    return lstm_model

# ---- Function to Tune or Fine-Tune LSTM ----
def tune_lstm(X_train_lstm, y_train_lstm, year, pretrained_model=None, save_path="best_lstm_model"):
    """Trains or fine-tunes an LSTM model with hyperparameter tuning."""
    
    model_path = f"{save_path}_{year}_tech.keras"
    
    if pretrained_model and os.path.exists(pretrained_model):
        print(f"Loading pretrained model from {pretrained_model}...")
        lstm_model = load_model(pretrained_model)
    else:
        print("No pretrained model found. Running full hyperparameter tuning...")
        tuner = kt.BayesianOptimization(
            build_lstm_model,
            objective='val_loss',
            max_trials=50,
            executions_per_trial=1,
            directory=f'lstm_tuning_{year}',
            project_name='fund_forecasting'
        )

        tuner.search(
            X_train_lstm, y_train_lstm,
            epochs=50,
            validation_split=0.2,
            batch_size=128,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]
        )

        best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
        print(f"Best Hyperparameters for {year}: {best_hps.values}")
        lstm_model = tuner.hypermodel.build(best_hps)

    # Train (or fine-tune) the model
    lstm_model.fit(
        X_train_lstm, y_train_lstm,
        epochs=20,
        validation_split=0.2,
        batch_size=128,
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]
    )

    # Save model for next year's training
    lstm_model.save(model_path)
    print(f"Model saved as {model_path}")
    
    return model_path  # Return saved model path

# ---- Loop Through Years for Transfer Learning ----
while pd.notna(test_date):
    test_date = generate_next_date(list_of_dates, train_end)
    
    if test_date.year != (current_year + 1):
        train_end = test_date
        print(f"Train End: {train_end}, same year {current_year}. Repeating until next year.")
        continue

    print(f"\nTraining {current_year} model until {test_date}. Conducting hyperparameter tuning...")
    
    # Prepare data for training
    X_train, X_test, y_train, y_test = process_factor_model(X_dataset, y_dataset, train_end, test_date)
    X_train_lstm = np.array(X_train).astype(np.float32)
    X_test_lstm = np.array(X_test).astype(np.float32)
    y_train_lstm = np.array(y_train).astype(np.float32)

    # Train or fine-tune model
    pretrained_model = tune_lstm(X_train_lstm, y_train_lstm, year=current_year+1, pretrained_model=pretrained_model)
    hyperparam_lst.append(pretrained_model)

    # Update year and training end date
    train_end = test_date
    current_year += 1

In [None]:
# lstm_hyperparams_dict_healthcare = dict(
#     {'2019': dict({
#         'num_layers':2,
#         'units':[32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.2],
#         'activation':['tanh','sigmoid'],
#         'lr':0.01995625160697196
#     }),
#      '2020': dict({
#         'num_layers':3,
#         'units':[32,32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.2,0.1],
#         'activation':['linear','sigmoid','sigmoid'],
#         'lr':0.0011839804874350056
#     }),
#      '2021': dict({
#         'num_layers':2,
#         'units':[128,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.1],
#         'activation':['tanh','linear'],
#         'lr':0.018389663547277172
#     }),
#      '2022': dict({
#         'num_layers':4,
#         'units':[64,32,32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.1,0.1,0.2,0.2],
#         'activation':['linear','tanh','tanh','linear'],
#         'lr':0.011965744369339311
#     }),
#      '2023': dict({
#         'num_layers':2,
#         'units':[32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.2,0.1],
#         'activation':['tanh','tanh'],
#         'lr':0.0005130193097484537
#     }),
#      '2024': dict({
#         'num_layers':2,
#         'units':[32,32],
#         'optimizer':'Adam',
#         'drop_out':[0.1,0.2],
#         'activation':['linear','tanh'],
#         'lr':0.0035163892485339547
#     })}
# )