In [1]:
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor 
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from xgboost import XGBRegressor
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import regr_nn
import torch.nn as nn

In [2]:
# Load the training data

dfs_tt, dfs_ho = [], []
data_dict = {'dfs_tt':dfs_tt, 'dfs_ho':dfs_ho}
for i in range(5):
    for key in data_dict:
        data_dict[key].append(pd.read_csv(f"preprocessing/preproc_{key.split("_")[-1]}_fold_{i}.csv"))

In [None]:
# Check for normality

linregr = LinearRegression()
for i in range(5):
    linregr.fit(dfs_tt[i].iloc[:,1:-1], dfs_tt[i].iloc[:,-1])
    y_pred = linregr.predict(dfs_ho[i].iloc[:,1:-1])      
    resids = dfs_ho[i].iloc[:,-1] - y_pred
    
    # Create a scatter plot of residuals
   
    plt.scatter(y_pred, resids)
    plt.xlabel(f"Predicted Values for fold #{i}")
    plt.ylabel("Residuals")
    plt.title(f"Residual Plot for fold #{i}")
    plt.axhline(y=0, color='b', linestyle='-')
    plt.show()  

In [4]:
# Baseline model

baseline = LinearRegression()

In [5]:
# Degree 2 polynomial with regularization models

alphas = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
l1ratios = np.linspace(start=0, stop=1, num=20, endpoint=False)

deg2_ridge = Pipeline([('poly2', PolynomialFeatures(include_bias=False)),
                      ('ridge', RidgeCV(alphas=alphas, cv=5))])
deg2_lasso = Pipeline([('poly2', PolynomialFeatures(include_bias=False)),
                      ('lasso', LassoCV(alphas=alphas))])
deg2_elasticnet = Pipeline([('poly2', PolynomialFeatures(include_bias=False)),
                      ('elasticnet', ElasticNetCV(alphas=alphas, l1_ratio=l1ratios, cv=5))])

In [6]:
# Ensemble learning

randomforest = RandomForestRegressor()
adaboost = AdaBoostRegressor()
xgradboost = XGBRegressor()

In [7]:
# models = [baseline, deg2_ridge, deg2_lasso, deg2_elasticnet, randomforest, adaboost, xgradboost]#, neuralnetwork]
# rmses = []
# for model in models:
#     model.fit(dfs_tt[0].iloc[:,1:-1], dfs_tt[0].iloc[:,-1])
#     y_pred = model.predict(dfs_ho[0].iloc[:,1:-1])
#     rmses.append(np.sqrt(mean_squared_error(dfs_ho[0].iloc[:,-1], y_pred)))
# print(rmses)    


In [None]:
# Neural network

neuralnetwork = regr_nn.RegressionNN(input_size=318, hidden_size = 8, output_size=1)
neuralnetwork.fit(dfs_tt[0].iloc[:,1:-1], dfs_tt[0].iloc[:,-1])
y_pred = pd.DataFrame(neuralnetwork.predict(dfs_ho[0].iloc[:,1:-1]))#.numpy())
print(y_pred) 
print(f'rmse = {(np.sqrt(mean_squared_error(dfs_ho[0].iloc[:,-1], y_pred)))}')

  return F.mse_loss(input, target, reduction=self.reduction)


y_pred at epoch 0: tensor([[0.1601],
        [0.2145],
        [0.1631],
        ...,
        [0.2463],
        [0.0976],
        [0.1700]], grad_fn=<AddmmBackward0>)
loss at epoch 0: 38996500480.0
y_pred at epoch 1000: tensor([[181151.5938],
        [184378.1094],
        [179017.0312],
        ...,
        [179981.7969],
        [176410.5938],
        [182337.3906]], grad_fn=<AddmmBackward0>)
loss at epoch 1000: 6279001600.0
y_pred at epoch 2000: tensor([[180861.7969],
        [182431.5938],
        [179932.8750],
        ...,
        [181623.4531],
        [178180.7969],
        [181787.0312]], grad_fn=<AddmmBackward0>)
loss at epoch 2000: 6276279808.0
y_pred at epoch 3000: tensor([[180757.9688],
        [181682.4531],
        [180400.6719],
        ...,
        [181782.1094],
        [179257.4844],
        [181462.7812]], grad_fn=<AddmmBackward0>)
loss at epoch 3000: 6275366912.0
y_pred at epoch 4000: tensor([[180744.5625],
        [181353.8594],
        [180654.4688],
        ...,

In [9]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from sample import MyModel

# # Example usage
# model = MyModel(input_size=10, hidden_size=20, output_size=1)
# X = torch.randn(100, 10)
# y = torch.randn(100, 1)

# model.fit(X, y, epochs=1000)
# predictions = model.predict(X)
# print(predictions)

In [10]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from sesample import MyNeuralNet
# model = MyNeuralNet()
# X_train = dfs_tt[0].iloc[:,1:-1]
# y_train = dfs_tt[0].iloc[:,-1]
# X_test = dfs_ho[0].iloc[:,1:-1]
# model.fit(X_train, y_train)
# pred = model.predict(X_test)
# pred