In [None]:
import pickle
import pandas as pd
import time
import numpy as np
%matplotlib inline 
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, make_scorer, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from time import time

In [None]:
Stress = pd.read_csv('./Dataset/stress.csv', index_col='collect_time', parse_dates=['collect_time'])
Targets = pd.read_csv('./Dataset/targets.csv', index_col='collect_time', parse_dates=['collect_time'])
print(Stress.shape)
print(Targets.shape)

In [None]:
#################################################################################################
# To test the model
scaler_x = StandardScaler()
scaler_y = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(Stress,Targets, test_size=0.2, shuffle=True, random_state=7)
#################################################################################################
# Feature scaling required for neural network

scaler_x.fit(X_train)
scaled_X_train = scaler_x.transform(X_train)
scaled_X_test = scaler_x.transform(X_test)


scaler_y.fit(y_train)
scaled_y_train = scaler_y.transform(y_train)
scaled_y_test = scaler_y.transform(y_test)
#################################################################################################
np.random.seed(7)
scoring_param = make_scorer(mean_squared_error,greater_is_better=False)
#################################################################################################

# LR

In [None]:
t0 = time()
lin_reg = LinearRegression().fit(scaled_X_train, scaled_y_train)
t1 = time()
Time_Taken = (t1-t0)
print("Time taken to train the model: %0.2f" % Time_Taken,"seconds")
Y_Train_Pred = lin_reg.predict(scaled_X_train)
Y_Train_Pred = scaler_y.inverse_transform(Y_Train_Pred)
rmse = np.sqrt(mean_squared_error(y_train,Y_Train_Pred))
RSQ = r2_score(y_train,Y_Train_Pred)
MAE = mean_absolute_error(y_train, Y_Train_Pred)
MAPE = mean_absolute_percentage_error(y_train, Y_Train_Pred) * 100
################################################################
################################################################
print("############ Model Accuracy on Daily Training Data ############")
print("RMSE: %0.4f" % rmse)
print("R-squared: %0.4f" % RSQ)
print("MAPE: {:.2f}".format(MAPE))
print("########################################")
Y_Test_Pred = lin_reg.predict(scaled_X_test)
Y_Test_Pred = scaler_y.inverse_transform(Y_Test_Pred)
rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))
RSQ = r2_score(y_test,Y_Test_Pred)
MAE = mean_absolute_error(y_test, Y_Test_Pred)
MAPE = mean_absolute_percentage_error(y_test, Y_Test_Pred) * 100
################################################################
################################################################
print("############ Model Accuracy on Daily Testing Data ############")
print("RMSE: %0.4f" % rmse)
print("R-squared: %0.4f" % RSQ)
print("MBE: %0.4f" % MAE)
print("MAPE: {:.2f}".format(MAPE))
print("########################################")
################################################################
################################################################
# 保存模型
with open('./Modelpkl/LR.pkl','wb') as f:
    pickle.dump(lin_reg, f)

# RF

In [None]:
t0 = time()
p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,2000,num=20)],
    max_depth = [int(i) for i in np.linspace(1,10,num=10)],
  min_samples_leaf = [int(i) for i in np.linspace(1,10,num=10)])

rf = GridSearchCV(estimator = RandomForestRegressor(random_state=7), param_grid = p_grid, 
                     scoring = scoring_param, cv = 4, verbose=1, n_jobs=-1)

rf.fit(scaled_X_train ,scaled_y_train)
t1 = time()
Time_Taken = (t1-t0)
print("Time taken to train the model: %0.2f" % Time_Taken,"seconds")
print("Best RF Estimators: %0.3f" % rf.best_params_.get('n_estimators'))
print("Best RF Max Depth: %0.3f" % rf.best_params_.get('max_depth'))
print("Best RF Min Samples in Leaf: %0.3f" % rf.best_params_.get('min_samples_leaf'))
Y_Train_Pred = rf.predict(scaled_X_train)
Y_Train_Pred = scaler_y.inverse_transform(Y_Train_Pred)
rmse = np.sqrt(mean_squared_error(y_train,Y_Train_Pred))
RSQ = r2_score(y_train,Y_Train_Pred)
MAE = mean_absolute_error(y_train, Y_Train_Pred)
MAPE = mean_absolute_percentage_error(y_train, Y_Train_Pred) * 100
################################################################
################################################################
print("############ Model Accuracy on Daily Training Data ############")
print("RMSE: {:.4f}".format(rmse))
print("R-squared: {:.4f}".format(RSQ))
print("MAE: {:.4f}".format(MAE))
print("MAPE: {:.2f}".format(MAPE))
print("########################################")
Y_Test_Pred = rf.predict(scaled_X_test)
Y_Test_Pred = scaler_y.inverse_transform(Y_Test_Pred)
rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))
RSQ = r2_score(y_test,Y_Test_Pred)
MAE = mean_absolute_error(y_test, Y_Test_Pred)
MAPE = mean_absolute_percentage_error(y_test, Y_Test_Pred)
################################################################
################################################################
print("############ Model Accuracy on Daily Testing Data ############")
print("RMSE: {:.4f}".format(rmse))
print("R-squared: {:.4f}".format(RSQ))
print("MAE: {:.4f}".format(MAE))
print("MAPE: {:.2f}".format(MAPE))
print("########################################")
print(" ")
# 保存模型
with open('./Modelpkl/RF.pkl','wb') as f:
    pickle.dump(rf.best_estimator_, f)

# XGBoost

In [None]:
t0 = time()
p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,2000,num=20)],
    max_depth = [int(i) for i in np.linspace(1,10,num=10)],
  learning_rate = np.linspace(0.001,0.1,num=10))

xgb = GridSearchCV(estimator = XGBRegressor(random_state=7), param_grid = p_grid, 
                     scoring = scoring_param, cv = 4, verbose=1, n_jobs=-1)
xgb.fit(scaled_X_train,scaled_y_train)
t1 = time()
Time_Taken = (t1-t0)
print("Time taken to train the model: %0.2f" % Time_Taken,"seconds")
print("Best XGB Estimators: %0.3f" % xgb.best_params_.get('n_estimators'))
print("Best XGB Max Depth: %0.3f" % xgb.best_params_.get('max_depth'))
print("Best XGB Learning Rate: %0.3f" % xgb.best_params_.get('learning_rate'))
Y_Train_Pred = xgb.predict(scaled_X_train)
Y_Train_Pred = scaler_y.inverse_transform(Y_Train_Pred)
rmse = np.sqrt(mean_squared_error(y_train,Y_Train_Pred))
RSQ = r2_score(y_train,Y_Train_Pred)
MAE = mean_absolute_error(y_train, Y_Train_Pred)
MAPE = mean_absolute_percentage_error(y_train, Y_Train_Pred) * 100
################################################################
################################################################
print("############ Model Accuracy on Daily Training Data ############")
print("RMSE: %0.4f" % rmse)
print("R-squared: %0.4f" % RSQ)
print('MAE: {:.4f}'.format(MAE))
print('MAPE: {:.2f}'.format(MAPE))
print("########################################")
Y_Test_Pred = xgb.predict(scaled_X_test)
Y_Test_Pred = scaler_y.inverse_transform(Y_Test_Pred)
rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))
RSQ = r2_score(y_test,Y_Test_Pred)
MAE = mean_absolute_error(y_test, Y_Test_Pred)
MAPE = mean_absolute_percentage_error(y_test, Y_Test_Pred)
################################################################
################################################################
print("############ Model Accuracy on Daily Testing Data ############")
print("RMSE: %0.4f" % rmse)
print("R-squared: %0.4f" % RSQ)
print('MAE: {:.4f}'.format(MAE))
print('MAPE: {:.2f}'.format(MAPE))
print("########################################")
# 保存模型
with open('./Modelpkl/xgb.pkl','wb') as f:
    pickle.dump(xgb.best_estimator_, f)

In [None]:
import pandas as pd
x_data = pd.read_csv('./Dataset/stress.csv', index_col='collect_time', parse_dates=['collect_time'])
y_data = pd.read_csv('./Dataset/targets.csv', index_col='collect_time', parse_dates=['collect_time'])

In [None]:
import numpy as np
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
import gc
import logging
from sklearn.preprocessing import MinMaxScaler
import warnings
from sklearn.model_selection import GridSearchCV
warnings.filterwarnings('ignore')
cols = x_data.columns
raw_data = pd.merge(x_data, y_data, left_index=True, right_index=True)
target = y_data.columns
depth = 64
batch_size = 128
prediction_horizon = 1
L = len(x_data)
train_size = int(0.6*L)
val_size = int(0.2*L)
test_size = L - train_size - val_size

data_train = raw_data.iloc[:train_size + val_size]
data_test = raw_data.iloc[train_size+val_size:]

scaler_cols = MinMaxScaler(feature_range=(0, 1)).fit(data_train[cols].values)
data_train_cols_scale = scaler_cols.transform(data_train[cols].values)
data_test_cols_scale = scaler_cols.transform(data_test[cols].values)
data_train_cols_scale = pd.DataFrame(data_train_cols_scale)
data_test_cols_scale = pd.DataFrame(data_test_cols_scale)

scaler_target = MinMaxScaler(feature_range=(
    0, 1)).fit(data_train[target].values)
data_train_target_scale = scaler_target.transform(data_train[target].values)
data_test_target_scale = scaler_target.transform(data_test[target].values)
data_train_target_scale = pd.DataFrame(data_train_target_scale)
data_test_target_scale = pd.DataFrame(data_test_target_scale)

# train
X1 = np.zeros((train_size + val_size, depth, len(cols)))
y_his1 = np.zeros((train_size + val_size, depth, 7))
y1 = np.zeros((train_size + val_size, 7))

for i, name in enumerate(data_train_cols_scale.columns):
    for j in range(depth):
        X1[:, j, i] = data_train_cols_scale[name].shift(
            depth - j - 1).fillna(method="bfill")
for j in range(depth):
    y_his1[:, j, :] = data_train_target_scale.shift(
        depth - j - 1).fillna(method="bfill")
y1 = data_train_target_scale.shift(- depth -
                                   prediction_horizon+1).fillna(method="bfill")

X_train = X1[depth-1:-prediction_horizon]
y_his_train = y_his1[depth-1:-prediction_horizon]
y_train = y1[:-depth-prediction_horizon+1]

del X1, y1, y_his1, data_train_cols_scale, data_train_target_scale
gc.collect()

# test
X3 = np.zeros((test_size, depth, len(cols)))
y_his3 = np.zeros((test_size, depth, 7))
y3 = np.zeros((test_size, 7))

for i, name in enumerate(data_test_cols_scale.columns):
    for j in range(depth):
        X3[:, j, i] = data_test_cols_scale[name].shift(
            depth - j - 1).fillna(method="bfill")
for j in range(depth):
    y_his3[:, j, :] = data_test_target_scale.shift(
        depth - j - 1).fillna(method="bfill")
y3 = data_test_target_scale.shift(- depth -
                                  prediction_horizon+1).fillna(method="bfill")

X_test = X3[depth-1:-prediction_horizon]
y_his_test = y_his3[depth-1:-prediction_horizon]
y_test = y3[:-depth-prediction_horizon+1]

del X3, y3, y_his3, data_test_cols_scale, data_test_target_scale
gc.collect()
# X_train 自变量，y_his历史预测值，y_tain预测值对应的真实值
X_train_t = torch.Tensor(X_train)
X_test_t = torch.Tensor(X_test)
y_his_train_t = torch.Tensor(y_his_train)
y_his_test_t = torch.Tensor(y_his_test)
y_train_t = torch.Tensor(y_train.values)
y_test_t = torch.Tensor(y_test.values)
gc.collect()


X_train = np.concatenate((X_train, y_his_train), axis=2).reshape(X_train.shape[0], -1) 
y_train = y_train.values

X_test = np.concatenate((X_test, y_his_test), axis=2).reshape(X_test.shape[0], -1) 
y_test = y_test.values

In [None]:
def testLoader(X, y,  input_size, batch_size):
    X = X.reshape(X.shape[0], -1, input_size + 7)
    X_test = X[:, :, :input_size]
    y_his_test = X[:, :, input_size:]
    del X

    X_test_t = torch.Tensor(X_test)
    y_his_test_t = torch.Tensor(y_his_test)
    y_test_t = torch.Tensor(y)

    del  X_test, y_his_test, y

    test_loader = DataLoader(TensorDataset(
        X_test_t, y_his_test_t, y_test_t), shuffle=True, batch_size=batch_size)
    
    del X_test_t, y_his_test_t, y_test_t
    gc.collect()

    return test_loader

def trainValLoader(X, y, input_size, batch_size):
    X = X.reshape(X.shape[0], -1, input_size + 7)
    train_size = int(X.shape[0] * 0.75)

    X_train = X[:, :, :input_size]
    X_val = X[train_size:, :, :input_size]

    y_his_train = X[:, :, input_size:]
    y_his_val = X[train_size:, :, input_size:]

    y_train = y[:, :]
    y_val = y[train_size:, :]

    del X, y

    X_train_t = torch.Tensor(X_train)
    X_val_t = torch.Tensor(X_val)
    y_his_train_t = torch.Tensor(y_his_train)
    y_his_val_t = torch.Tensor(y_his_val)
    y_train_t = torch.Tensor(y_train)
    y_val_t = torch.Tensor(y_val)

    del X_train,X_val, y_his_train,y_his_val, y_train, y_val

    train_loader = DataLoader(TensorDataset(
        X_train_t, y_his_train_t, y_train_t), shuffle=True, batch_size=batch_size)
    val_loader = DataLoader(TensorDataset(
        X_val_t, y_his_val_t, y_val_t), shuffle=True, batch_size=batch_size)
    del  X_train_t, y_his_train_t, y_train_t, X_val_t, y_his_val_t, y_val_t
    return train_loader, val_loader

In [None]:
from sklearn.model_selection import KFold, ParameterGrid
from sklearn.linear_model import SGDClassifier
from sklearn.base import clone
from networks.LSTMModel import Module_LSTM
import time
def Base_fit(model, train_len, val_len, train_loader, val_loader, device):
    epochs = 100
    min_val_loss = 9999
    loss_function = nn.MSELoss().to(device)
    opt = torch.optim.Adam(model.parameters(), lr=0.001)
    epoch_scheduler = torch.optim.lr_scheduler.StepLR(opt, 20, gamma=0.9)
    train_start = time.monotonic()
    for i in range(epochs):
        mse_train = 0
        model.train()
        for batch_x, batch_y_h, batch_y in train_loader:
            opt.zero_grad()
            y_pred = model(batch_x.to(device), batch_y_h.to(device))
            loss = loss_function(y_pred, batch_y.to(device))
            loss.backward()
            mse_train += loss.item() * batch_x.shape[0]
            opt.step()
        epoch_scheduler.step()
        model.eval()
        with torch.no_grad():
            mse_val = 0
            for batch_x, batch_y_h, batch_y in val_loader:
                output = model(batch_x.to(device), batch_y_h.to(device))
                mse_val += loss_function(output, batch_y.to(device)).item() * batch_x.shape[0]
        if min_val_loss > mse_val ** 0.5:
            min_val_loss = mse_val ** 0.5
        if i % 10 == 0:
            logging.info("Iter: " + str(i) + " train: " + str((mse_train / train_len) ** 0.5) + " val: " + str(
                (mse_val / val_len) ** 0.5))
    train_end = time.monotonic()
    logging.info("LSTM training time: {:.4f}".format(train_end - train_start))
    return min_val_loss

def cross_verification(base_estimator, parameters, model_name, X_train, y_train, device):
    skfolds = KFold(n_splits=4)
    fit_funcs ={
        'LSTM': Base_fit,
        'GRU': Base_fit,
    }
    local_loss = []
    for train_index, test_index in skfolds.split(X_train, y_train):
        estimator = clone(base_estimator)
        cloned_parameters = {}
        for k, v in parameters.items():
            cloned_parameters[k] = clone(v, safe=False)
        estimator = clone(estimator.set_params(**cloned_parameters))
        estimator.to(device)   
        X_train_folds = X_train[train_index]
        y_train_folds = (y_train[train_index])
        X_test_fold = X_train[test_index]
        y_test_fold = (y_train[test_index])
        train_len = X_train_folds.shape[0]
        val_len = X_test_fold.shape[0]

        X_train_folds = X_train_folds.reshape(X_train_folds.shape[0], -1, 38 + 7)
        X_test_fold = X_test_fold.reshape(X_test_fold.shape[0], -1, 38 + 7)
        
        X_train_t = torch.Tensor(X_train_folds[:,:,:38])
        X_val_t = torch.Tensor(X_test_fold[:,:,:38])
        y_his_train_t = torch.Tensor(X_train_folds[:,:,38:])
        y_his_val_t = torch.Tensor(X_test_fold[:,:,38:])
        y_train_t = torch.Tensor(y_train_folds)
        y_val_t = torch.Tensor(y_test_fold)
        
        del X_train_folds,X_test_fold,y_train_folds,y_test_fold
        # del X_train, y_his_train, y_train

        train_loader = DataLoader(TensorDataset(
            X_train_t, y_his_train_t, y_train_t), shuffle=True, batch_size=128)
        val_loader = DataLoader(TensorDataset(
            X_val_t, y_his_val_t, y_val_t), shuffle=True, batch_size=128)
        
        del X_train_t, y_his_train_t, y_train_t, X_val_t, y_his_val_t, y_val_t
        gc.collect()

        local_min_val_loss = fit_funcs[model_name](estimator, train_len, val_len, train_loader, val_loader, device)
        local_loss.append(local_min_val_loss)
        del estimator
        gc.collect()
    return np.mean(local_loss) 

def get_best_params(estimator, param_grid,model_name, X_train, y_train, device):
    global_val_loss = []
    candidate_params = list(ParameterGrid(param_grid))
    base_estimator = clone(estimator)
    for cand_idx, parameters in enumerate(candidate_params):
        logging.info('cuurent parameters index: {}'.format(cand_idx))
        local_val_loss = cross_verification(base_estimator, parameters, model_name, X_train, y_train, device)
        global_val_loss.append(local_val_loss)
        torch.cuda.empty_cache()
    for cand_idx, parameters in enumerate(candidate_params):
        logging.info('val loss: {}'.format(global_val_loss[cand_idx]))
    index = np.argmin(global_val_loss)
    best_parameters = candidate_params[index]
    return best_parameters

# LSTM

In [None]:
from networks.LSTMModel import Module_LSTM
import time
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score, mean_squared_error
import warnings
import numpy as np
%load_ext autoreload
%autoreload 
warnings.filterwarnings('ignore')
filename = "./logfiles/lstm.log"
logging.basicConfig(filename=filename, format='%(asctime)s %(filename)s %(levelname)s %(message)s',
                    datefmt='%a %d %b %Y %H:%M:%S', filemode='w', level=logging.INFO, force=True)
train_loader, val_loader = trainValLoader(X_train, y_train, 38, 128)
test_loader = testLoader(X_test, y_test, 38, 128)
param_grid = {
    'hidden_size':[16,32,64,128],
    'dropout': [ i * 0.1 for i in range(1, 5)],
}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
base_estimator = Module_LSTM().to(device)
best_params = get_best_params(base_estimator, param_grid, 'LSTM', X_train, y_train, device)
logging.info('best params: hidden_size: {}, dropout: {}'.format(best_params['hidden_size'], best_params['dropout']))

model = clone(clone(base_estimator).set_params(**best_params))
model.to(device)
loss_function = nn.MSELoss().to(device)
min_val_loss = 9999
epochs = 100
opt = torch.optim.Adam(model.parameters(), lr=0.001)
epoch_scheduler = torch.optim.lr_scheduler.StepLR(opt, 20, gamma=0.9)
train_start = time.monotonic()
for i in range(epochs):
    mse_train = 0
    model.train()
    for batch_x, batch_y_h, batch_y in train_loader:
        opt.zero_grad()
        y_pred = model(batch_x.to(device), batch_y_h.to(device))
        loss = loss_function(y_pred, batch_y.to(device))
        loss.backward()
        mse_train += loss.item() * batch_x.shape[0]
        opt.step()
    epoch_scheduler.step()
    model.eval()
    with torch.no_grad():
        mse_val = 0
        for batch_x, batch_y_h, batch_y in val_loader:
            output = model(batch_x.to(device), batch_y_h.to(device))
            mse_val += loss_function(output, batch_y.to(device)).item() * batch_x.shape[0]
    if min_val_loss > mse_val ** 0.5:
        min_val_loss = mse_val ** 0.5
        filename = "./Modelpkl/lstm.pt"
        torch.save(model.state_dict(), filename)
    if i % 10 == 0:
        logging.info("Iter: " + str(i) + " train: " + str((mse_train / (len(X_train) * 0.75)) ** 0.5) + " val: " + str(
            (mse_val / (len(X_train) * 0.25)) ** 0.5))
train_end = time.monotonic()
logging.info("LSTM training time: {:.4f}".format(train_end - train_start))

# train set metrics
filename = "./Modelpkl/lstm.pt"
model.load_state_dict(torch.load(filename))
model.eval()
with torch.no_grad():
    mse_val = 0
    preds = []
    true = []
    for batch_x, batch_y_h, batch_y in train_loader:
        output = model(batch_x.to(device), batch_y_h.to(device))
        preds.append(output.detach().cpu().numpy())
        true.append(batch_y.detach().cpu().numpy())
        l = loss_function(output, batch_y.to(device)).item()
        mse_val += l
preds = np.concatenate(preds)
true = np.concatenate(true)

true = scaler_target.inverse_transform(true)
preds = scaler_target.inverse_transform(preds)
mse = mean_squared_error(true, preds)
mae = mean_absolute_error(true, preds)
mape = mean_absolute_percentage_error(true, preds) * 100
r2 = r2_score(true, preds)

logging.info('Train data result:')
logging.info('Train RMSE: {:.4f}, Train R2: {:.4f}, Train MAE: {:.4f}, Train MAPE: {:.4f}'.format(
    mse**0.5, r2, mae, mape))

# test set metrics
filename = "./Modelpkl/lstm.pt"
model.load_state_dict(torch.load(filename))
model.eval()
with torch.no_grad():
    mse_val = 0
    preds = []
    true = []
    for batch_x, batch_y_h, batch_y in test_loader:
        output = model(batch_x.to(device), batch_y_h.to(device))
        preds.append(output.detach().cpu().numpy())
        true.append(batch_y.detach().cpu().numpy())
        l = loss_function(output, batch_y.to(device)).item()
        mse_val += l
preds = np.concatenate(preds)
true = np.concatenate(true)

true = scaler_target.inverse_transform(true)
preds = scaler_target.inverse_transform(preds)
mse = mean_squared_error(true, preds)
mae = mean_absolute_error(true, preds)
mape = mean_absolute_percentage_error(true, preds) * 100
r2 = r2_score(true, preds)
logging.info('Test data result:')
logging.info('Test RMSE: {:.4f}, Test R2: {:.4f}, Test MAE: {:.4f}, Test MAPE: {:.4f}'.format(
    mse**0.5, r2, mae, mape))

# GRU

In [None]:
from networks.GRUModel import Module_GRU
import time
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score, mean_squared_error
import warnings
import numpy as np
%load_ext autoreload
%autoreload 
warnings.filterwarnings('ignore')
filename = "./logfiles/gru.log"
logging.basicConfig(filename=filename, format='%(asctime)s %(filename)s %(levelname)s %(message)s',
                    datefmt='%a %d %b %Y %H:%M:%S', filemode='w', level=logging.INFO, force=True)
train_loader, val_loader = trainValLoader(X_train, y_train, 38, 128)
test_loder = testLoader(X_test, y_test, 38, 128)
param_grid = {
    'hidden_size':[16,32,64,128],
    'dropout': [ i * 0.1 for i in range(1, 5)],
}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
base_estimator = Module_GRU().to(device)
best_params = get_best_params(base_estimator, param_grid, 'GRU', X_train, y_train, device)
logging.info('best params: hidden_size: {}, dropout: {}'.format(best_params['hidden_size'], best_params['dropout']))

model = clone(clone(base_estimator).set_params(**best_params))
model.to(device)
loss_function = nn.MSELoss().to(device)
min_val_loss = 9999
epochs = 100
opt = torch.optim.Adam(model.parameters(), lr=0.001)
epoch_scheduler = torch.optim.lr_scheduler.StepLR(opt, 20, gamma=0.9)
train_start = time.monotonic()
for i in range(epochs):
    mse_train = 0
    model.train()
    for batch_x, batch_y_h, batch_y in train_loader:
        opt.zero_grad()
        y_pred = model(batch_x.to(device), batch_y_h.to(device))
        loss = loss_function(y_pred, batch_y.to(device))
        loss.backward()
        mse_train += loss.item() * batch_x.shape[0]
        opt.step()
    epoch_scheduler.step()
    model.eval()
    with torch.no_grad():
        mse_val = 0
        for batch_x, batch_y_h, batch_y in val_loader:
            output = model(batch_x.to(device), batch_y_h.to(device))
            mse_val += loss_function(output, batch_y.to(device)).item() * batch_x.shape[0]
    if min_val_loss > mse_val ** 0.5:
        min_val_loss = mse_val ** 0.5
        filename = "./save/gru.pt"
        torch.save(model.state_dict(), filename)
    if i % 10 == 0:
        logging.info("Iter: " + str(i) + " train: " + str((mse_train / (len(X_train) * 0.75)) ** 0.5) + " val: " + str(
            (mse_val / (len(X_train) * 0.25)) ** 0.5))
train_end = time.monotonic()
logging.info("GRU training time: {:.4f}".format(train_end - train_start))

# train set metrics
filename = "./save/gru.pt"
model.load_state_dict(torch.load(filename))
model.eval()
print(model)
with torch.no_grad():
    mse_val = 0
    preds = []
    true = []
    for batch_x, batch_y_h, batch_y in train_loader:
        output = model(batch_x.to(device), batch_y_h.to(device))
        preds.append(output.detach().cpu().numpy())
        true.append(batch_y.detach().cpu().numpy())
        l = loss_function(output, batch_y.to(device)).item()
        mse_val += l
preds = np.concatenate(preds)
true = np.concatenate(true)

true = scaler_target.inverse_transform(true)
preds = scaler_target.inverse_transform(preds)
mse = mean_squared_error(true, preds)
mae = mean_absolute_error(true, preds)
mape = mean_absolute_percentage_error(true, preds) * 100
r2 = r2_score(true, preds)

logging.info('Train data result:')
logging.info('Train RMSE: {:.4f}, Train R2: {:.4f}, Train MAE: {:.4f}, Train MAPE: {:.4f}'.format(
    mse**0.5, r2, mae, mape))

# test set metrics
filename = "./save/gru.pt"
model.load_state_dict(torch.load(filename))
model.eval()
print(model)
with torch.no_grad():
    mse_val = 0
    preds = []
    true = []
    for batch_x, batch_y_h, batch_y in test_loader:
        output = model(batch_x.to(device), batch_y_h.to(device))
        preds.append(output.detach().cpu().numpy())
        true.append(batch_y.detach().cpu().numpy())
        l = loss_function(output, batch_y.to(device)).item()
        mse_val += l
preds = np.concatenate(preds)
true = np.concatenate(true)

true = scaler_target.inverse_transform(true)
preds = scaler_target.inverse_transform(preds)
mse = mean_squared_error(true, preds)
mae = mean_absolute_error(true, preds)
mape = mean_absolute_percentage_error(true, preds) * 100
r2 = r2_score(true, preds)
logging.info('Test data result:')
logging.info('Test RMSE: {:.4f}, Test R2: {:.4f}, Test MAE: {:.4f}, Test MAPE: {:.4f}'.format(
    mse**0.5, r2, mae, mape))