In [38]:
# Global tools
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
from sklearn.model_selection import GridSearchCV, train_test_split


Feature Selection Summary:

Benchmark OLS: ['AMD' 'ADS_Index' 'RF' 'CBBTCUSD' 'DJIA' 'MACD_Signal' 'RSI']

Ridge: ['AMD' 'ADS_Index' 'Mkt-RF' 'SMB' 'HML' 'RMW' 'CMA' 'RF' 'CBBTCUSD' 'DJIA'
 'MACD_Signal' 'RSI']

Lasso: ['RF' 'CBBTCUSD' 'DJIA' 'MACD_Signal']

Elastic Net: ['ADS_Index' 'RF' 'CBBTCUSD' 'DJIA' 'MACD_Signal' 'RSI']

LARS: ['ADS_Index' 'Mkt-RF' 'SMB' 'RMW' 'RF' 'CBBTCUSD' 'DJIA' 'MACD_Signal']


In [39]:
df = pd.read_csv('./DataSelected.csv')
target_column = 'NVDA' 
y = df[target_column]
X = df.drop(columns=[target_column])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [40]:
# Simple Linear Models + Grid Search 

In [41]:
# Lasso 
param_grid = {
    #'alpha': [0.001, 0.01, 0.1, 1.0, 10.0] # Lasso Best Params: {'alpha': 0.001}
    'alpha': np.linspace(0.0001, 0.1, 1000)
    }
scorer = make_scorer(mean_squared_error, greater_is_better=False)
lasso = Lasso()
grid_search_lasso = GridSearchCV(
    estimator=lasso,
    param_grid=param_grid,
    scoring=scorer,
    cv=10,              
    n_jobs=-1       
)

grid_search_lasso.fit(X_train, y_train)

print("Lasso Best Params:", grid_search_lasso.best_params_)
print("Lasso Best Score:", grid_search_lasso.best_score_)

best_lasso = grid_search_lasso.best_estimator_
test_predictions = best_lasso.predict(X_test)
test_mse = mean_squared_error(y_test, test_predictions)
print("Test MSE with Best Found Parameters:", test_mse)

Lasso Best Params: {'alpha': np.float64(0.0017000000000000001)}
Lasso Best Score: -0.16834712238655897
Test MSE with Best Found Parameters: 0.1699278032812201


In [42]:
# Ridge
param_grid_ridge = {
    #'alpha': [0.01, 0.1, 1.0, 10.0, 100.0] # Ridge Best Params: {'alpha': 0.01}
    'alpha': np.linspace(0.0001, 0.1, 1000)
}
scorer = make_scorer(mean_squared_error, greater_is_better=False)
ridge = Ridge()
grid_search_ridge = GridSearchCV(
    estimator=ridge, 
    param_grid=param_grid_ridge, 
    scoring=scorer, 
    cv=10, 
    n_jobs=-1)
grid_search_ridge.fit(X_train, y_train)

print("Ridge Best Params:", grid_search_ridge.best_params_)
print("Ridge Best Score:", grid_search_ridge.best_score_)

best_ridge = grid_search_ridge.best_estimator_
test_predictions = best_ridge.predict(X_test)
test_mse = mean_squared_error(y_test, test_predictions)
print("Test MSE with Best Found Parameters:", test_mse)

Ridge Best Params: {'alpha': np.float64(0.1)}
Ridge Best Score: -0.16836949428374867
Test MSE with Best Found Parameters: 0.16915805783224286


In [43]:
# Elastic Net
param_grid_elastic = {
    # 'alpha': [0.01, 0.1, 1.0], # Elastic Net Best Params: {'alpha': 0.01, 'l1_ratio': 0.9}
    # 'l1_ratio': [0.1, 0.5, 0.9]
    'alpha': np.logspace(-3, -1, 50), 
    'l1_ratio': np.linspace(0.1, 1.0, 10)  
}
scorer = make_scorer(mean_squared_error, greater_is_better=False)
elastic = ElasticNet()
grid_search_elastic = GridSearchCV(
    estimator=elastic, 
    param_grid=param_grid_elastic, 
    scoring=scorer, 
    cv=10, 
    n_jobs=-1)
grid_search_elastic.fit(X_train, y_train)
print("Elastic Net Best Params:", grid_search_elastic.best_params_)
print("Elastic Net Best Score:", grid_search_elastic.best_score_)

best_elastic = grid_search_elastic.best_estimator_
test_predictions = best_elastic.predict(X_test)
test_mse = mean_squared_error(y_test, test_predictions)
print("Test MSE with Best Found Parameters:", test_mse)


Elastic Net Best Params: {'alpha': np.float64(0.006551285568595509), 'l1_ratio': np.float64(0.1)}
Elastic Net Best Score: -0.16812036079581824
Test MSE with Best Found Parameters: 0.1705288136810767
