In [11]:
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from data_preprocessing import get_data

In [2]:
X_train, y_train, X_test, y_test, y_scaler, train_loader, test_loader = get_data(
            timestep = 10,
            batch_size = 64,
            y_name = 'Closing price',
            train_size = 6627,
            path = "Data/SSE000001.csv")
y_test = y_scaler.inverse_transform(y_test)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

torch.Size([6618, 10, 8]) torch.Size([6618, 1]) torch.Size([490, 10, 8]) (490, 1)


In [4]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print(device)
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(device)
else:
    print ("GPU device not found.")

device = None

GPU device not found.


# CNN

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # Convolutional layer parameters from the table
        self.conv1d = nn.Conv1d(in_channels= 8, out_channels= 32, kernel_size= 1, padding="same")
        
        # Activation function
        self.tanh = nn.Tanh()

        # Pooling
        self.maxpool1d = nn.MaxPool1d(kernel_size= 1)

        self.relu = nn.ReLU()

    def forward(self, x):
        # Apply convolutional layer
        x = self.conv1d(x)
        x = self.tanh(x)
        x = self.maxpool1d(x)
        x = self.relu(x)
        return x


# CNN_LSTM

## LSTM

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size = 8, lstm_num_layers = 1, lstm_hidden_size = 64,  fc1_output_size = 16):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.lstm_num_layers = lstm_num_layers
        self.lstm_hidden_size = lstm_hidden_size
        # self.lstm_dropout = lstm_dropout
        self.fc1_output_size = fc1_output_size

        self.lstm = nn.LSTM(input_size = self.input_size, 
                             hidden_size = self.lstm_hidden_size,
                             num_layers = self.lstm_num_layers,
                             batch_first = True)

        # self.dropout = nn.Dropout(p = self.lstm_dropout)
        # self.fc1 = nn.Linear(self.lstm_hidden_size, self.fc1_output_size)
        # self.fc2 = nn.Linear(self.fc1_output_size, 1)
        self.fc1 = nn.Linear(self.lstm_hidden_size, 1)
        self.tanh = nn.Tanh()

    def forward(self, x):
        h0 = torch.zeros(self.lstm_num_layers, x.size(0), self.lstm_hidden_size).to(device)
        c0 = torch.zeros(self.lstm_num_layers, x.size(0), self.lstm_hidden_size).to(device)

        # x = F.relu(x)

        h_lstm, _ = self.lstm(x, (h0, c0))

        tan_h_lstm = self.tanh(h_lstm)
        # h_dropout = self.dropout(h_lstm)

        # h_fc1 = self.fc1(h_lstm)
        # h_fc1 = F.relu(h_fc1)

        # h_fc2 = self.fc2(h_fc1)
        # output = h_fc2[:, -1, :]

        h_fc1 = self.fc1(tan_h_lstm)
        output = h_fc1[:, -1, :]

        return output

## Define model

In [60]:
class CNN_LSTM(nn.Module):
    def __init__(self):
        super(CNN_LSTM, self).__init__()
        self.cnn = CNN()
        self.lstm = LSTM(input_size=32)  # Assuming the CNN output has 32 features

    def forward(self, x):
        batch_size, seq_len, features = x.shape  #[64, 10, 8]
        # CNN expects the channels in the second dimension
        x = x.permute(0, 2, 1)  #[64, 8, 10]
        x = self.cnn(x) #[64, 32, 10]
        # Permute back to (batch, seq_len, features) for the LSTM
        x = x.permute(0, 2, 1) #[64, 10, 32]
        x = self.lstm(x)
        return x

In [61]:
# setting hyperparameters
batch_size = 64 #? None?
time_steps = 10


# input_size = 8
# lstm_num_layers = 1
# lstm_hidden_size = 64
# lstm_dropout = 0.2
# fc1_output_size = 16
learning_rate = 0.0001
num_epoch = 100

model = CNN_LSTM().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.L1Loss() # Mean Absolute Error

## Train

In [62]:
model.train(True)

for epoch in range(num_epoch):
    for batch_index, (X, y_true) in enumerate(train_loader):
        X, y_true = X.to(), y_true.to()
        y_pred = model(X)
        loss = criterion(y_pred, y_true)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 10 == 0:
            print(f'Epoch {epoch}, Batch {batch_index}, Loss: {loss.item()}')
            # Assuming you want to clear the output in a Jupyter notebook to avoid clutter
            # You would uncomment the next line in a Jupyter notebook environment
            # display.clear_output(wait=True)

model.train(False)

Epoch 0, Batch 0, Loss: 0.7491185665130615
Epoch 0, Batch 10, Loss: 0.7189062833786011
Epoch 0, Batch 20, Loss: 0.6353232860565186
Epoch 0, Batch 30, Loss: 0.6690065860748291
Epoch 0, Batch 40, Loss: 0.6986151337623596
Epoch 0, Batch 50, Loss: 0.6975069046020508
Epoch 0, Batch 60, Loss: 0.731777548789978
Epoch 0, Batch 70, Loss: 0.6237348318099976
Epoch 0, Batch 80, Loss: 0.6206910610198975
Epoch 0, Batch 90, Loss: 0.621310293674469
Epoch 0, Batch 100, Loss: 0.535598874092102
Epoch 1, Batch 0, Loss: 0.5707441568374634
Epoch 1, Batch 10, Loss: 0.45867636799812317
Epoch 1, Batch 20, Loss: 0.36037755012512207
Epoch 1, Batch 30, Loss: 0.20988866686820984
Epoch 1, Batch 40, Loss: 0.12761858105659485
Epoch 1, Batch 50, Loss: 0.14389510452747345
Epoch 1, Batch 60, Loss: 0.19735851883888245
Epoch 1, Batch 70, Loss: 0.12763115763664246
Epoch 1, Batch 80, Loss: 0.11140377074480057
Epoch 1, Batch 90, Loss: 0.16423824429512024
Epoch 1, Batch 100, Loss: 0.06517411023378372
Epoch 2, Batch 0, Loss: 0

CNN_LSTM(
  (cnn): CNN(
    (conv1d): Conv1d(8, 32, kernel_size=(1,), stride=(1,), padding=same)
    (tanh): Tanh()
    (maxpool1d): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (relu): ReLU()
  )
  (lstm): LSTM(
    (lstm): LSTM(32, 64, batch_first=True)
    (fc1): Linear(in_features=64, out_features=1, bias=True)
    (tanh): Tanh()
  )
)

## Evaluation

In [63]:
with torch.no_grad():
    y_train_pred = model(X_train.to()).to()
    y_test_pred = model(X_test.to()).to()

y_test_pred = y_scaler.inverse_transform(y_test_pred)

train_mse = mean_squared_error(y_train_pred, y_train, squared=False)
test_mse = mean_squared_error(y_test_pred, y_test, squared=False)

train_mae = mean_absolute_error(y_train_pred, y_train)
test_mae = mean_absolute_error(y_test_pred, y_test)

train_r2 = r2_score(y_train_pred, y_train)
test_r2 = r2_score(y_test_pred, y_test)

print("Train RMSE: {0:.6f}; Test RMSE {1:.6f}\n".format(train_mse, test_mse))
print("Train  MAE: {0:.6f}; Test  MAE {1:.6f}\n".format(train_mae, test_mae))
print("Train  R^2: {0:.6f}; Test  R^2 {1:.6f}\n".format(train_r2, test_r2))

Train RMSE: 0.039533; Test RMSE 41.517510

Train  MAE: 0.023022; Test  MAE 28.970661

Train  R^2: 0.998437; Test  R^2 0.962160



## save the model

In [64]:
lstm_num_layers = 1
lstm_hidden_size = 64
path = "models/CNN_LSTM_{0}Epoch_{1}Lr_{2}Layer_{3}Size.pt".format(num_epoch, learning_rate, lstm_num_layers, lstm_hidden_size)
torch.save(model.state_dict(), path)

# CNN-BILSTM

## BILSTM

In [71]:
class BILSTM(nn.Module):
    def __init__(self, input_size = 8, lstm_num_layers = 1, lstm_hidden_size = 64, lstm_dropout = 0.2, fc1_output_size = 16):
        super(BILSTM, self).__init__()
        self.input_size = input_size
        self.lstm_num_layers = lstm_num_layers
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_dropout = lstm_dropout
        self.fc1_output_size = fc1_output_size

        self.lstm = nn.LSTM(input_size = self.input_size, 
                             hidden_size = self.lstm_hidden_size,
                             num_layers = self.lstm_num_layers,
                             batch_first = True,
                             bidirectional = True)
        
        # self.fc1 = nn.Linear(self.lstm_hidden_size * 2, self.fc1_output_size)
        # self.fc2 = nn.Linear(self.fc1_output_size, 1)
        self.fc1 = nn.Linear(self.lstm_hidden_size * 2, 1)
        self.tanh = nn.Tanh()

    def forward(self, x):
        h0 = torch.zeros(self.lstm_num_layers * 2, x.size(0), self.lstm_hidden_size).to(device)
        c0 = torch.zeros(self.lstm_num_layers * 2, x.size(0), self.lstm_hidden_size).to(device)


        h_lstm, _ = self.lstm(x, (h0, c0))

        # h_fc1 = self.fc1(h_lstm)
        # h_fc1 = F.relu(h_fc1)

        # h_fc2 = self.fc2(h_fc1)
        # output = h_fc2[:, -1, :]

        tan_h_lstm = self.tanh(h_lstm)
        h_fc1 = self.fc1(tan_h_lstm)
        output = h_fc1[:, -1, :]

        return output

## Define model

In [72]:
class CNN_BILSTM(nn.Module):
    def __init__(self):
        super(CNN_BILSTM, self).__init__()
        self.cnn = CNN()
        self.bilstm = BILSTM(input_size=32)  # Assuming the CNN output has 32 features

    def forward(self, x):
        batch_size, seq_len, features = x.shape  #[64, 10, 8]
        # CNN expects the channels in the second dimension
        x = x.permute(0, 2, 1)  #[64, 8, 10]
        x = self.cnn(x)
        # Permute back to (batch, seq_len, features) for the LSTM
        x = x.permute(0, 2, 1)
        # Reshape x to fit BiLSTM input requirements
        # x = x.contiguous().view(batch_size, seq_len, -1)
        x = self.bilstm(x)
        return x

In [73]:
# setting hyperparameters
batch_size = 64 #? None?
time_steps = 10


# input_size = 8
# lstm_num_layers = 1
# lstm_hidden_size = 64
# lstm_dropout = 0.2
# fc1_output_size = 16
learning_rate = 0.0001
num_epoch = 100

model = CNN_BILSTM().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.L1Loss() # Mean Absolute Error

## Train

In [74]:
model.train(True)

for epoch in range(num_epoch):
    for batch_index, (X, y_true) in enumerate(train_loader):
        X, y_true = X.to(), y_true.to()
        y_pred = model(X)
        loss = criterion(y_pred, y_true)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 10 == 0:
            print(f'Epoch {epoch}, Batch {batch_index}, Loss: {loss.item()}')
            # Assuming you want to clear the output in a Jupyter notebook to avoid clutter
            # You would uncomment the next line in a Jupyter notebook environment
            # display.clear_output(wait=True)

model.train(False)

Epoch 0, Batch 0, Loss: 0.8165919780731201
Epoch 0, Batch 10, Loss: 0.8068506717681885
Epoch 0, Batch 20, Loss: 0.7826277017593384
Epoch 0, Batch 30, Loss: 0.7410628199577332
Epoch 0, Batch 40, Loss: 0.7587448954582214
Epoch 0, Batch 50, Loss: 0.6637872457504272
Epoch 0, Batch 60, Loss: 0.7033519148826599
Epoch 0, Batch 70, Loss: 0.718801736831665
Epoch 0, Batch 80, Loss: 0.7241650819778442
Epoch 0, Batch 90, Loss: 0.6135603785514832
Epoch 0, Batch 100, Loss: 0.5913267731666565
Epoch 1, Batch 0, Loss: 0.597760021686554
Epoch 1, Batch 10, Loss: 0.5409653186798096
Epoch 1, Batch 20, Loss: 0.4647952616214752
Epoch 1, Batch 30, Loss: 0.3569648563861847
Epoch 1, Batch 40, Loss: 0.24569526314735413
Epoch 1, Batch 50, Loss: 0.17313966155052185
Epoch 1, Batch 60, Loss: 0.1711079627275467
Epoch 1, Batch 70, Loss: 0.1007409617304802
Epoch 1, Batch 80, Loss: 0.1458434909582138
Epoch 1, Batch 90, Loss: 0.14248357713222504
Epoch 1, Batch 100, Loss: 0.12932683527469635
Epoch 2, Batch 0, Loss: 0.0944

CNN_BILSTM(
  (cnn): CNN(
    (conv1d): Conv1d(8, 32, kernel_size=(1,), stride=(1,), padding=same)
    (tanh): Tanh()
    (maxpool1d): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (relu): ReLU()
  )
  (bilstm): BILSTM(
    (lstm): LSTM(32, 64, batch_first=True, bidirectional=True)
    (fc1): Linear(in_features=128, out_features=1, bias=True)
    (tanh): Tanh()
  )
)

## Evaluation

In [76]:
with torch.no_grad():
    y_train_pred = model(X_train.to()).to()
    y_test_pred = model(X_test.to()).to()

y_test_pred = y_scaler.inverse_transform(y_test_pred)

train_mse = mean_squared_error(y_train_pred, y_train, squared=False)
test_mse = mean_squared_error(y_test_pred, y_test, squared=False)

train_mae = mean_absolute_error(y_train_pred, y_train)
test_mae = mean_absolute_error(y_test_pred, y_test)

train_r2 = r2_score(y_train_pred, y_train)
test_r2 = r2_score(y_test_pred, y_test)

print("Train RMSE: {0:.6f}; Test RMSE {1:.6f}\n".format(train_mse, test_mse))
print("Train  MAE: {0:.6f}; Test  MAE {1:.6f}\n".format(train_mae, test_mae))
print("Train  R^2: {0:.6f}; Test  R^2 {1:.6f}\n".format(train_r2, test_r2))

Train RMSE: 0.039768; Test RMSE 44.994372

Train  MAE: 0.023019; Test  MAE 31.383991

Train  R^2: 0.998418; Test  R^2 0.959002

