In [1]:
import math
import torch
import time
import random
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.parameter import Parameter
from torch.nn import init
from torch import Tensor
from scipy.special import gamma 
import matplotlib.pyplot as plt

In [2]:
def get_memory_usage():
    if torch.cuda.is_available():
        return torch.cuda.memory_allocated()/ (1024*1024)
    return 0

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class Fractional_Order_Matrix_Differential_Solver(torch.autograd.Function):
    @staticmethod
    def forward(ctx,input1,w,b,alpha,k,epoch):
        alpha = torch.tensor(alpha)
        k = torch.tensor(k)
        epoch = torch.tensor(epoch)
        ctx.save_for_backward(input1,w,b,alpha,k,epoch)
        outputs = input1@w + b
        return outputs

    @staticmethod
    def backward(ctx, grad_outputs):
        input1,w,b,alpha,k,epoch = ctx.saved_tensors
        x_fractional, w_fractional = Fractional_Order_Matrix_Differential_Solver.Fractional_Order_Matrix_Differential_Linear(input1,w,b,alpha,k,epoch)   
        x_grad = torch.mm(grad_outputs,x_fractional)
        w_grad = torch.mm(w_fractional,grad_outputs)
        b_grad = grad_outputs.sum(dim=0)
        return x_grad, w_grad, b_grad,None,None,None

    @staticmethod
    def Fractional_Order_Matrix_Differential_Linear(x,w,b,alpha,k,epoch):
        #w
        wf = w[:,0].view(1,-1)
        #main
        w_main = torch.mul(x,(torch.abs(wf)+1e-8)**(1-alpha)/gamma(2-alpha))
        #partial
        x_rows, x_cols = x.size()
        bias = torch.full((x_rows, x_cols),b[0].item())
        bias = bias.to(device)
        w_partial = torch.mul(torch.mm(x,wf.T).view(-1,1).expand(-1,x_cols) - torch.mul(x,wf) + bias, torch.sign(wf)*(torch.abs(wf)+1e-8)**(-alpha)/gamma(1-alpha))
        return w.T, (w_main + torch.exp(-k*epoch)*w_partial).T

class FLinear(nn.Module):
    
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, alpha=0.9, k = 0.9, bias: bool = True,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.k = k

        self.weight = Parameter(torch.empty((in_features, out_features), **factory_kwargs))
        if bias:
            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            init.uniform_(self.bias, -bound, bound)

    def forward(self, x, epoch):
        return Fractional_Order_Matrix_Differential_Solver.apply(x, self.weight, self.bias, self.alpha, self.k, epoch)

    def extra_repr(self) -> str:
        return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"
    
def split(X,y):
    X_train,X_temp,y_train,y_temp = train_test_split(X,y,test_size=0.3,shuffle=False)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.333,shuffle=False)
    return X_train,X_val,X_test,y_train,y_val,y_test

#Mean Square Error
def MSE(pred,true):
    return np.mean((pred-true)**2)

#Mean Absolute Error
def MAE(pred, true):
    return np.mean(np.abs(pred-true))

def RMSE(pred,true):
    return np.sqrt(np.mean((pred-true)**2))

def MAPE(pred, true):
    return np.mean(np.abs((pred - true) / true))


In [3]:
slide_windows_size = 192  #i.e.,input length 192
pred_length = 384     #i.e.,prediction lengths 384
stock = 'DJI'    #ETTh2,DJI
df_DJIA = pd.read_csv(r'./data/'+stock+'.csv')
# del df_DJIA['date']        #ETT2
del df_DJIA['Date']        #DJI
scaler = MinMaxScaler(feature_range=(0, 1))

sca_DJIA = scaler.fit_transform(df_DJIA)

features_j = 4     #ETTh2:6,DJI:4
def create_sequences(data, slide_windows_size, pred_length):
    X, y = [], []
    for i in range(len(data) - slide_windows_size - pred_length + 1):
        X.append(data[i:i+slide_windows_size, :])  # sliding window size [seq_len, features]
        y.append(data[i+slide_windows_size:i+slide_windows_size+pred_length, features_j])  
    return np.array(X), np.array(y)

X, y = create_sequences(sca_DJIA, slide_windows_size, pred_length)
X = torch.Tensor(X).to(device)
y = torch.Tensor(y).to(device)

X_train,X_val,X_test,y_train,y_val,y_test = split(X,y)   #7:2:1 

In [None]:
alpha = 1.0   ####0.7,0.8,0.9,1.0
k = 0.01   #In integer order, k does not play a role.

lrs =[0.1,0.05,0.03]                     #4e-2 
momentums =[0.1,0.5,0.9]                                        #0.9 
weight_decays =[0.1,0.01,0.001,0.0001]                                            #1e-2

num_feature = 5     #ETTh1:7,DJI:5
batch_size = 256
set_seed()
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1=256, hidden_size2=128,output_size=pred_length):   ###DJI:hidden_size=256,ETTh1:hidden_size=128.
        super().__init__()
        self.flatten = nn.Flatten()
        # self.linear1 = FLinear(input_size, hidden_size1, alpha, k)  
        self.linear1 = nn.Linear(input_size, hidden_size1)
        self.leakrelu1 = nn.LeakyReLU()                          
        # self.linear2 = FLinear(hidden_size1, hidden_size2, alpha, k) 
        self.linear2 = nn.Linear(hidden_size1, hidden_size2)
        self.leakrelu2 = nn.LeakyReLU()
        # self.linear3 = FLinear(hidden_size2, output_size, alpha, k)   
        self.linear3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x, epoch=0):
        x = self.flatten(x)    # (batch_size, seq_len*num_features)
        # x = self.leakrelu1(self.linear1(x, epoch)) 
        x = self.leakrelu1(self.linear1(x))
        # x = self.leakrelu2(self.linear2(x, epoch))
        x = self.leakrelu2(self.linear2(x))
        # x = self.linear3(x, epoch)
        x = self.linear3(x)
        return x

lr_best = 0
momentum_best = 0
weight_decay_best = 0
best_evaluation = 10

for lr in lrs:
    for momentum in momentums:
        for weight_decay in weight_decays:
            set_seed()
            model = MLP(input_size=slide_windows_size*num_feature).to(device)
            num_epochs = 500   #
            best_loss = 10
            criterion = nn.MSELoss()
            # optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=1e-2)
            optimizer = torch.optim.SGD(model.parameters(),lr=lr,momentum=momentum,weight_decay=weight_decay)
            for ii in range(num_epochs):
                model.train()
                loss_sum = 0
                for inputs, targets in train_loader:
                    optimizer.zero_grad()
                    outputs = model(inputs,ii)
                    loss = criterion(outputs, targets)
                    loss_sum += loss
                    loss.backward()   #The default value of retain_graph is False.
                    optimizer.step()
                # train_loss10.append(loss_sum.cpu().detach().numpy())     ###########
                
                # print(f"Epoch {ii + 1}/{num_epochs}, Train Loss: {loss_sum.cpu().detach().numpy():.4f}")
                    
                model.eval()
                with torch.no_grad():
                    Val_outputs = model(X_val)
                    MSE_val = MSE(y_val.cpu().detach().numpy(),Val_outputs.cpu().detach().numpy())
                    
                    # val_loss10.append(MSE_val)   ########################Validation_loss
                    
                    # print(f"Epoch {ii + 1}/{num_epochs}, Val Loss: {MSE_val:.4f}")
                    # print('')
                    if best_loss > MSE_val:
                        best_loss = MSE_val
                        torch.save(model.state_dict(), r'./model/table2/'+stock+'_model_fractional_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth') 

            model.load_state_dict(torch.load('./model/table2/'+stock+'_model_fractional_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth'))
            model.eval()
            with torch.no_grad():
                test_outputs = model(X_test)
            RMSE10 = RMSE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
            MAE10 = MAE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
            MAPE10 = MAPE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'RMSE:{RMSE10:.6f}')
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAE:{MAE10:.6f}')
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAPE:{MAPE10:.6f}')
            if best_evaluation > RMSE10 + MAE10 + MAPE10:
                best_evaluation = RMSE10 + MAE10 + MAPE10
                print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay))
                print('best_evaluation:',best_evaluation)

1.0_0.01_0.1_0.1_0.1_RMSE:0.865125
1.0_0.01_0.1_0.1_0.1_MAE:0.860487
1.0_0.01_0.1_0.1_0.1_MAPE:93.083916
1.0_0.01_0.1_0.1_0.01_RMSE:0.236520
1.0_0.01_0.1_0.1_0.01_MAE:0.209617
1.0_0.01_0.1_0.1_0.01_MAPE:0.336179
1.0_0.01_0.1_0.1_0.01
best_evaluation: 0.78231573
1.0_0.01_0.1_0.1_0.001_RMSE:0.187794
1.0_0.01_0.1_0.1_0.001_MAE:0.153760
1.0_0.01_0.1_0.1_0.001_MAPE:0.230558
1.0_0.01_0.1_0.1_0.001
best_evaluation: 0.57211137
1.0_0.01_0.1_0.1_0.0001_RMSE:0.212755
1.0_0.01_0.1_0.1_0.0001_MAE:0.176063
1.0_0.01_0.1_0.1_0.0001_MAPE:0.278526
1.0_0.01_0.1_0.5_0.1_RMSE:0.865119
1.0_0.01_0.1_0.5_0.1_MAE:0.860481
1.0_0.01_0.1_0.5_0.1_MAPE:93.023186
1.0_0.01_0.1_0.5_0.01_RMSE:0.235831
1.0_0.01_0.1_0.5_0.01_MAE:0.208836
1.0_0.01_0.1_0.5_0.01_MAPE:0.334580
1.0_0.01_0.1_0.5_0.001_RMSE:0.186824
1.0_0.01_0.1_0.5_0.001_MAE:0.151415
1.0_0.01_0.1_0.5_0.001_MAPE:0.228141
1.0_0.01_0.1_0.5_0.001
best_evaluation: 0.5663805
1.0_0.01_0.1_0.5_0.0001_RMSE:0.212734
1.0_0.01_0.1_0.5_0.0001_MAE:0.176203
1.0_0.01_0.1_0.5_

In [8]:
best_evaluation

0.5654673

In [9]:
alphas = [0.8,0.85,0.9,0.95]   ####0.7,0.8,0.9,1.0
ks = [0.005,0.01,0.05,0.1,0.5]   

lr = 0.1               
momentums =0.9                                       
weight_decay =1e-3                                        

num_feature = 5     #ETTh1:7,DJI:5
batch_size = 256
set_seed()
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1=256, hidden_size2=128,output_size=pred_length):   ###DJI:hidden_size=256,ETTh1:hidden_size=128.
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear1 = FLinear(input_size, hidden_size1, alpha, k)  
        self.leakrelu1 = nn.LeakyReLU()                          
        self.linear2 = FLinear(hidden_size1, hidden_size2, alpha, k) 
        self.leakrelu2 = nn.LeakyReLU()
        self.linear3 = FLinear(hidden_size2, output_size, alpha, k)   

    def forward(self, x, epoch=0):
        x = self.flatten(x)    # (batch_size, seq_len*num_features)
        x = self.leakrelu1(self.linear1(x, epoch)) 
        x = self.leakrelu2(self.linear2(x, epoch))
        x = self.linear3(x, epoch)
        return x

for alpha in alphas:
    for k in ks:
        set_seed()
        model = MLP(input_size=slide_windows_size*num_feature).to(device)
        num_epochs = 500   #
        best_loss = 10
        criterion = nn.MSELoss()
        # optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=1e-2)
        optimizer = torch.optim.SGD(model.parameters(),lr=lr,momentum=momentum,weight_decay=weight_decay)
        for ii in range(num_epochs):
            model.train()
            loss_sum = 0
            for inputs, targets in train_loader:
                optimizer.zero_grad()
                outputs = model(inputs,ii)
                loss = criterion(outputs, targets)
                loss_sum += loss
                loss.backward()   #The default value of retain_graph is False.
                optimizer.step()
            # train_loss10.append(loss_sum.cpu().detach().numpy())     ###########
            
            # print(f"Epoch {ii + 1}/{num_epochs}, Train Loss: {loss_sum.cpu().detach().numpy():.4f}")
                
            model.eval()
            with torch.no_grad():
                Val_outputs = model(X_val)
                MSE_val = MSE(y_val.cpu().detach().numpy(),Val_outputs.cpu().detach().numpy())
                
                # val_loss10.append(MSE_val)   ########################Validation_loss
                
                # print(f"Epoch {ii + 1}/{num_epochs}, Val Loss: {MSE_val:.4f}")
                # print('')
                if best_loss > MSE_val:
                    best_loss = MSE_val
                    torch.save(model.state_dict(), r'./model/table2/'+stock+'_model_fractional_f_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth') 

        model.load_state_dict(torch.load('./model/table2/'+stock+'_model_fractional_f_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth'))
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test)
        RMSE10 = RMSE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
        MAE10 = MAE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
        MAPE10 = MAPE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
        print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'RMSE:{RMSE10:.6f}')
        print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAE:{MAE10:.6f}')
        print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAPE:{MAPE10:.6f}')
        if best_evaluation > RMSE10 + MAE10 + MAPE10:
            best_evaluation = RMSE10 + MAE10 + MAPE10
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay))
            print('best_evaluation:',best_evaluation)

0.8_0.005_0.1_0.9_0.001_RMSE:0.185478
0.8_0.005_0.1_0.9_0.001_MAE:0.154724
0.8_0.005_0.1_0.9_0.001_MAPE:0.210638
0.8_0.005_0.1_0.9_0.001
best_evaluation: 0.55084056
0.8_0.01_0.1_0.9_0.001_RMSE:0.170215
0.8_0.01_0.1_0.9_0.001_MAE:0.143907
0.8_0.01_0.1_0.9_0.001_MAPE:0.197538
0.8_0.01_0.1_0.9_0.001
best_evaluation: 0.51165974
0.8_0.05_0.1_0.9_0.001_RMSE:0.204546
0.8_0.05_0.1_0.9_0.001_MAE:0.170802
0.8_0.05_0.1_0.9_0.001_MAPE:0.262138
0.8_0.1_0.1_0.9_0.001_RMSE:0.187661
0.8_0.1_0.1_0.9_0.001_MAE:0.156263
0.8_0.1_0.1_0.9_0.001_MAPE:0.212264
0.8_0.5_0.1_0.9_0.001_RMSE:0.192001
0.8_0.5_0.1_0.9_0.001_MAE:0.161765
0.8_0.5_0.1_0.9_0.001_MAPE:0.235442
0.85_0.005_0.1_0.9_0.001_RMSE:0.188668
0.85_0.005_0.1_0.9_0.001_MAE:0.157837
0.85_0.005_0.1_0.9_0.001_MAPE:0.220020
0.85_0.01_0.1_0.9_0.001_RMSE:0.187836
0.85_0.01_0.1_0.9_0.001_MAE:0.157617
0.85_0.01_0.1_0.9_0.001_MAPE:0.218301
0.85_0.05_0.1_0.9_0.001_RMSE:0.198446
0.85_0.05_0.1_0.9_0.001_MAE:0.161253
0.85_0.05_0.1_0.9_0.001_MAPE:0.236569
0.85_0.1

In [30]:
slide_windows_size = 192  #i.e.,input length 192
pred_length = 384     #i.e.,prediction lengths 384
stock = 'ETTh2'    #ETTh2,DJI
df_DJIA = pd.read_csv(r'./data/'+stock+'.csv')
del df_DJIA['date']        #ETT2
# del df_DJIA['Date']        #DJI
scaler = MinMaxScaler(feature_range=(0, 1))

sca_DJIA = scaler.fit_transform(df_DJIA)

features_j = 6     #ETTh2:6,DJI:4
def create_sequences(data, slide_windows_size, pred_length):
    X, y = [], []
    for i in range(len(data) - slide_windows_size - pred_length + 1):
        X.append(data[i:i+slide_windows_size, :])  # sliding window size [seq_len, features]
        y.append(data[i+slide_windows_size:i+slide_windows_size+pred_length, features_j])  
    return np.array(X), np.array(y)

X, y = create_sequences(sca_DJIA, slide_windows_size, pred_length)
X = torch.Tensor(X).to(device)
y = torch.Tensor(y).to(device)

X_train,X_val,X_test,y_train,y_val,y_test = split(X,y)   #7:2:1 

In [None]:
alpha = 1.0   ####0.7,0.8,0.9,1.0
k = 0.01    #In integer order, k does not play a role.

lrs =[0.01,0.005,0.001,0.0005]                     #ETTh2
momentums =[0.1,0.5,0.9]                                         #0.9 
weight_decays =[0.01,0.001,0.0001]                                            #1e-2

num_feature = 7     #ETTh1:7,DJI:5
batch_size = 256
set_seed()
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1=256, hidden_size2=128,output_size=pred_length):   ###DJI:hidden_size=256,ETTh1:hidden_size=128.
        super().__init__()
        self.flatten = nn.Flatten()
        # self.linear1 = FLinear(input_size, hidden_size1, alpha, k)  
        self.linear1 = nn.Linear(input_size, hidden_size1)
        self.leakrelu1 = nn.LeakyReLU()                          
        # self.linear2 = FLinear(hidden_size1, hidden_size2, alpha, k) 
        self.linear2 = nn.Linear(hidden_size1, hidden_size2)
        self.leakrelu2 = nn.LeakyReLU()
        # self.linear3 = FLinear(hidden_size2, output_size, alpha, k)   
        self.linear3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x, epoch=0):
        x = self.flatten(x)    # (batch_size, seq_len*num_features)
        # x = self.leakrelu1(self.linear1(x, epoch)) 
        x = self.leakrelu1(self.linear1(x))
        # x = self.leakrelu2(self.linear2(x, epoch))
        x = self.leakrelu2(self.linear2(x))
        # x = self.linear3(x, epoch)
        x = self.linear3(x)
        return x

lr_best = 0
momentum_best = 0
weight_decay_best = 0
best_evaluation = 10

for lr in lrs:
    for momentum in momentums:
        for weight_decay in weight_decays:
            set_seed()
            model = MLP(input_size=slide_windows_size*num_feature).to(device)
            num_epochs = 500   #
            best_loss = 10
            criterion = nn.MSELoss()
            # optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=1e-2)
            optimizer = torch.optim.SGD(model.parameters(),lr=lr,momentum=momentum,weight_decay=weight_decay)
            for ii in range(num_epochs):
                model.train()
                loss_sum = 0
                for inputs, targets in train_loader:
                    optimizer.zero_grad()
                    outputs = model(inputs,ii)
                    loss = criterion(outputs, targets)
                    loss_sum += loss
                    loss.backward()   #The default value of retain_graph is False.
                    optimizer.step()
                # train_loss10.append(loss_sum.cpu().detach().numpy())     ###########
                
                # print(f"Epoch {ii + 1}/{num_epochs}, Train Loss: {loss_sum.cpu().detach().numpy():.4f}")
                    
                model.eval()
                with torch.no_grad():
                    Val_outputs = model(X_val)
                    MSE_val = MSE(y_val.cpu().detach().numpy(),Val_outputs.cpu().detach().numpy())
                    
                    # val_loss10.append(MSE_val)   ########################Validation_loss
                    
                    # print(f"Epoch {ii + 1}/{num_epochs}, Val Loss: {MSE_val:.4f}")
                    # print('')
                    if best_loss > MSE_val:
                        best_loss = MSE_val
                        torch.save(model.state_dict(), r'./model/table2/'+stock+'_model_fractional_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth') 

            model.load_state_dict(torch.load('./model/table2/'+stock+'_model_fractional_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth'))
            model.eval()
            with torch.no_grad():
                test_outputs = model(X_test)
            RMSE10 = RMSE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
            MAE10 = MAE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
            MAPE10 = MAPE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'RMSE:{RMSE10:.6f}')
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAE:{MAE10:.6f}')
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAPE:{MAPE10:.6f}')
            if best_evaluation > RMSE10 + MAE10 + MAPE10:
                best_evaluation = RMSE10 + MAE10 + MAPE10
                print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay))
                print('best_evaluation:',best_evaluation)

1.0_0.01_0.01_0.1_0.01_RMSE:0.111223
1.0_0.01_0.01_0.1_0.01_MAE:0.087949
1.0_0.01_0.01_0.1_0.01_MAPE:0.167158
1.0_0.01_0.01_0.1_0.01
best_evaluation: 0.36633083
1.0_0.01_0.01_0.1_0.001_RMSE:0.107440
1.0_0.01_0.01_0.1_0.001_MAE:0.086532
1.0_0.01_0.01_0.1_0.001_MAPE:0.156864
1.0_0.01_0.01_0.1_0.001
best_evaluation: 0.3508362
1.0_0.01_0.01_0.1_0.0001_RMSE:0.107330
1.0_0.01_0.01_0.1_0.0001_MAE:0.086617
1.0_0.01_0.01_0.1_0.0001_MAPE:0.156034
1.0_0.01_0.01_0.1_0.0001
best_evaluation: 0.3499819
1.0_0.01_0.01_0.5_0.01_RMSE:0.111372
1.0_0.01_0.01_0.5_0.01_MAE:0.088036
1.0_0.01_0.01_0.5_0.01_MAPE:0.167517
1.0_0.01_0.01_0.5_0.001_RMSE:0.107191
1.0_0.01_0.01_0.5_0.001_MAE:0.086377
1.0_0.01_0.01_0.5_0.001_MAPE:0.156200
1.0_0.01_0.01_0.5_0.001
best_evaluation: 0.34976786
1.0_0.01_0.01_0.5_0.0001_RMSE:0.106825
1.0_0.01_0.01_0.5_0.0001_MAE:0.086327
1.0_0.01_0.01_0.5_0.0001_MAPE:0.154558
1.0_0.01_0.01_0.5_0.0001
best_evaluation: 0.34770986
1.0_0.01_0.01_0.9_0.01_RMSE:0.112342
1.0_0.01_0.01_0.9_0.01_MAE

In [None]:
alphas = [0.8,0.85,0.9,0.95]   ####0.7,0.8,0.9,1.0
ks = [0.005,0.01,0.05,0.1,0.5]   

lr = 0.01               
momentums =0.9                                       
weight_decay =1e-4                                        

num_feature = 7     #ETTh1:7,DJI:5
batch_size = 256
set_seed()
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1=256, hidden_size2=128,output_size=pred_length):   ###DJI:hidden_size=256,ETTh1:hidden_size=128.
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear1 = FLinear(input_size, hidden_size1, alpha, k)  
        self.leakrelu1 = nn.LeakyReLU()                          
        self.linear2 = FLinear(hidden_size1, hidden_size2, alpha, k) 
        self.leakrelu2 = nn.LeakyReLU()
        self.linear3 = FLinear(hidden_size2, output_size, alpha, k)   

    def forward(self, x, epoch=0):
        x = self.flatten(x)    # (batch_size, seq_len*num_features)
        x = self.leakrelu1(self.linear1(x, epoch)) 
        x = self.leakrelu2(self.linear2(x, epoch))
        x = self.linear3(x, epoch)
        return x

for alpha in alphas:
    for k in ks:
        set_seed()
        model = MLP(input_size=slide_windows_size*num_feature).to(device)
        num_epochs = 500   #
        best_loss = 10
        criterion = nn.MSELoss()
        # optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=1e-2)
        optimizer = torch.optim.SGD(model.parameters(),lr=lr,momentum=momentum,weight_decay=weight_decay)
        for ii in range(num_epochs):
            model.train()
            loss_sum = 0
            for inputs, targets in train_loader:
                optimizer.zero_grad()
                outputs = model(inputs,ii)
                loss = criterion(outputs, targets)
                loss_sum += loss
                loss.backward()   #The default value of retain_graph is False.
                optimizer.step()
            # train_loss10.append(loss_sum.cpu().detach().numpy())     ###########
            
            # print(f"Epoch {ii + 1}/{num_epochs}, Train Loss: {loss_sum.cpu().detach().numpy():.4f}")
                
            model.eval()
            with torch.no_grad():
                Val_outputs = model(X_val)
                MSE_val = MSE(y_val.cpu().detach().numpy(),Val_outputs.cpu().detach().numpy())
                
                # val_loss10.append(MSE_val)   ########################Validation_loss
                
                # print(f"Epoch {ii + 1}/{num_epochs}, Val Loss: {MSE_val:.4f}")
                # print('')
                if best_loss > MSE_val:
                    best_loss = MSE_val
                    torch.save(model.state_dict(), r'./model/table2/'+stock+'_model_fractional_f_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth') 

        model.load_state_dict(torch.load('./model/table2/'+stock+'_model_fractional_f_'+str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_.pth'))
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test)
        RMSE10 = RMSE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
        MAE10 = MAE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
        MAPE10 = MAPE(y_test.cpu().numpy(),test_outputs.cpu().detach().numpy())
        print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'RMSE:{RMSE10:.6f}')
        print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAE:{MAE10:.6f}')
        print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay)+'_'+f'MAPE:{MAPE10:.6f}')
        if best_evaluation > RMSE10 + MAE10 + MAPE10:
            best_evaluation = RMSE10 + MAE10 + MAPE10
            print(str(alpha)+'_'+str(k)+'_'+str(lr)+'_'+str(momentum)+'_'+str(weight_decay))
            print('best_evaluation:',best_evaluation)

0.8_0.005_0.01_0.9_0.0001_RMSE:0.136359
0.8_0.005_0.01_0.9_0.0001_MAE:0.110283
0.8_0.005_0.01_0.9_0.0001_MAPE:0.181609
0.8_0.01_0.01_0.9_0.0001_RMSE:0.133766
0.8_0.01_0.01_0.9_0.0001_MAE:0.108530
0.8_0.01_0.01_0.9_0.0001_MAPE:0.190440
0.8_0.05_0.01_0.9_0.0001_RMSE:0.104112
0.8_0.05_0.01_0.9_0.0001_MAE:0.084587
0.8_0.05_0.01_0.9_0.0001_MAPE:0.146903
0.8_0.05_0.01_0.9_0.0001
best_evaluation: 0.3356028
0.8_0.1_0.01_0.9_0.0001_RMSE:0.108161
0.8_0.1_0.01_0.9_0.0001_MAE:0.088050
0.8_0.1_0.01_0.9_0.0001_MAPE:0.154006
0.8_0.5_0.01_0.9_0.0001_RMSE:0.106071
0.8_0.5_0.01_0.9_0.0001_MAE:0.085806
0.8_0.5_0.01_0.9_0.0001_MAPE:0.152801
0.85_0.005_0.01_0.9_0.0001_RMSE:0.098687
0.85_0.005_0.01_0.9_0.0001_MAE:0.079301
0.85_0.005_0.01_0.9_0.0001_MAPE:0.137870
0.85_0.005_0.01_0.9_0.0001
best_evaluation: 0.3158567
0.85_0.01_0.01_0.9_0.0001_RMSE:0.105068
0.85_0.01_0.01_0.9_0.0001_MAE:0.085190
0.85_0.01_0.01_0.9_0.0001_MAPE:0.149007
0.85_0.05_0.01_0.9_0.0001_RMSE:0.107791
0.85_0.05_0.01_0.9_0.0001_MAE:0.0865