In [11]:
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from data_preprocessing import get_data

In [2]:
X_train, y_train, X_test, y_test, y_scaler, train_loader, test_loader = get_data(
            timestep = 10,
            batch_size = 64,
            y_name = 'Closing price',
            train_size = 6627,
            path = "Data/SSE000001.csv")
y_test = y_scaler.inverse_transform(y_test)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

torch.Size([6618, 10, 8]) torch.Size([6618, 1]) torch.Size([490, 10, 8]) (490, 1)


In [4]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print(device)
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(device)
else:
    print ("GPU device not found.")

device = None

GPU device not found.


# CNN

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # Convolutional layer parameters from the table
        self.conv1d = nn.Conv1d(in_channels= 8, out_channels= 32, kernel_size= 1, padding="same")
        
        # Activation function
        self.tanh = nn.Tanh()

        # Pooling
        self.maxpool1d = nn.MaxPool1d(kernel_size= 1)

        self.relu = nn.ReLU()

    def forward(self, x):
        # Apply convolutional layer
        x = self.conv1d(x)
        x = self.tanh(x)
        x = self.maxpool1d(x)
        x = self.relu(x)
        return x


# BILSTM

In [6]:
class BILSTM(nn.Module):
    def __init__(self, input_size = 8, lstm_num_layers = 1, lstm_hidden_size = 64, lstm_dropout = 0.2, fc1_output_size = 16):
        super(BILSTM, self).__init__()
        self.input_size = input_size
        self.lstm_num_layers = lstm_num_layers
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_dropout = lstm_dropout
        self.fc1_output_size = fc1_output_size

        self.lstm = nn.LSTM(input_size = self.input_size, 
                             hidden_size = self.lstm_hidden_size,
                             num_layers = self.lstm_num_layers,
                             batch_first = True,
                             bidirectional = True)
        
        self.fc1 = nn.Linear(self.lstm_hidden_size * 2, self.fc1_output_size)
        self.fc2 = nn.Linear(self.fc1_output_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.lstm_num_layers * 2, x.size(0), self.lstm_hidden_size).to(device)
        c0 = torch.zeros(self.lstm_num_layers * 2, x.size(0), self.lstm_hidden_size).to(device)


        h_lstm, _ = self.lstm(x, (h0, c0))

        h_fc1 = self.fc1(h_lstm)
        h_fc1 = F.relu(h_fc1)

        h_fc2 = self.fc2(h_fc1)
        output = h_fc2[:, -1, :]

        return output

In [7]:
class LSTM(nn.Module):
    def __init__(self, input_size = 8, lstm_num_layers = 1, lstm_hidden_size = 64,  fc1_output_size = 16):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.lstm_num_layers = lstm_num_layers
        self.lstm_hidden_size = lstm_hidden_size
        # self.lstm_dropout = lstm_dropout
        self.fc1_output_size = fc1_output_size

        self.lstm = nn.LSTM(input_size = self.input_size, 
                             hidden_size = self.lstm_hidden_size,
                             num_layers = self.lstm_num_layers,
                             batch_first = True)

        # self.dropout = nn.Dropout(p = self.lstm_dropout)
        self.fc1 = nn.Linear(self.lstm_hidden_size, self.fc1_output_size)
        self.fc2 = nn.Linear(self.fc1_output_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.lstm_num_layers, x.size(0), self.lstm_hidden_size).to(device)
        c0 = torch.zeros(self.lstm_num_layers, x.size(0), self.lstm_hidden_size).to(device)

        # x = F.relu(x)

        h_lstm, _ = self.lstm(x, (h0, c0))
        # h_dropout = self.dropout(h_lstm)

        h_fc1 = self.fc1(h_lstm)
        h_fc1 = F.relu(h_fc1)

        h_fc2 = self.fc2(h_fc1)
        output = h_fc2[:, -1, :]

        return output

# CNN_LSTM

In [8]:
class CNN_BiLSTM(nn.Module):
    def __init__(self):
        super(CNN_BiLSTM, self).__init__()
        self.cnn = CNN()
        self.lstm = LSTM(input_size=32)  # Assuming the CNN output has 32 features

    def forward(self, x):
        batch_size, seq_len, features = x.shape  #[64, 10, 8]
        # CNN expects the channels in the second dimension
        x = x.permute(0, 2, 1)  #[64, 8, 10]
        x = self.cnn(x)
        # Permute back to (batch, seq_len, features) for the LSTM
        x = x.permute(0, 2, 1)
        # Reshape x to fit BiLSTM input requirements
        x = x.contiguous().view(batch_size, seq_len, -1)
        x = self.lstm(x)
        return x

# Define model

In [15]:
# setting hyperparameters
batch_size = 64 #? None?
time_steps = 10


# input_size = 8
# lstm_num_layers = 1
# lstm_hidden_size = 64
# lstm_dropout = 0.2
# fc1_output_size = 16
learning_rate = 0.0001
num_epoch = 20

model = CNN_BiLSTM().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.L1Loss() # Mean Absolute Error

# Train

In [16]:
model.train(True)

for epoch in range(num_epoch):
    for batch_index, (X, y_true) in enumerate(train_loader):
        X, y_true = X.to(), y_true.to()
        y_pred = model(X)
        loss = criterion(y_pred, y_true)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 10 == 0:
            print(f'Epoch {epoch}, Batch {batch_index}, Loss: {loss.item()}')
            # Assuming you want to clear the output in a Jupyter notebook to avoid clutter
            # You would uncomment the next line in a Jupyter notebook environment
            # display.clear_output(wait=True)

model.train(False)

Epoch 0, Batch 0, Loss: 0.7840930223464966
Epoch 0, Batch 10, Loss: 0.8014177083969116
Epoch 0, Batch 20, Loss: 0.7768257856369019
Epoch 0, Batch 30, Loss: 0.8865891695022583
Epoch 0, Batch 40, Loss: 0.8622677326202393
Epoch 0, Batch 50, Loss: 0.8164482116699219
Epoch 0, Batch 60, Loss: 0.8551117181777954
Epoch 0, Batch 70, Loss: 0.6956839561462402
Epoch 0, Batch 80, Loss: 0.7023064494132996
Epoch 0, Batch 90, Loss: 0.7838397026062012
Epoch 0, Batch 100, Loss: 0.5964908003807068
Epoch 1, Batch 0, Loss: 0.5842689275741577
Epoch 1, Batch 10, Loss: 0.7036736607551575
Epoch 1, Batch 20, Loss: 0.6841187477111816
Epoch 1, Batch 30, Loss: 0.5703263282775879
Epoch 1, Batch 40, Loss: 0.4365098476409912
Epoch 1, Batch 50, Loss: 0.3237282633781433
Epoch 1, Batch 60, Loss: 0.29942458868026733
Epoch 1, Batch 70, Loss: 0.2904644012451172
Epoch 1, Batch 80, Loss: 0.3288952112197876
Epoch 1, Batch 90, Loss: 0.320485383272171
Epoch 1, Batch 100, Loss: 0.23158499598503113
Epoch 2, Batch 0, Loss: 0.22200

CNN_BiLSTM(
  (cnn): CNN(
    (conv1d): Conv1d(8, 32, kernel_size=(1,), stride=(1,), padding=same)
    (tanh): Tanh()
    (maxpool1d): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (relu): ReLU()
  )
  (lstm): LSTM(
    (lstm): LSTM(32, 64, batch_first=True)
    (fc1): Linear(in_features=64, out_features=16, bias=True)
    (fc2): Linear(in_features=16, out_features=1, bias=True)
  )
)

# Evaluation

In [17]:
with torch.no_grad():
    y_train_pred = model(X_train.to()).to()
    y_test_pred = model(X_test.to()).to()

y_test_pred = y_scaler.inverse_transform(y_test_pred)

train_mse = mean_squared_error(y_train_pred, y_train, squared=False)
test_mse = mean_squared_error(y_test_pred, y_test, squared=False)

train_mae = mean_absolute_error(y_train_pred, y_train)
test_mae = mean_absolute_error(y_test_pred, y_test)

train_r2 = r2_score(y_train_pred, y_train)
test_r2 = r2_score(y_test_pred, y_test)

print("Train RMSE: {0:.6f}; Test RMSE {1:.6f}\n".format(train_mse, test_mse))
print("Train  MAE: {0:.6f}; Test  MAE {1:.6f}\n".format(train_mae, test_mae))
print("Train  R^2: {0:.6f}; Test  R^2 {1:.6f}\n".format(train_r2, test_r2))

Train RMSE: 0.048857; Test RMSE 52.625295

Train  MAE: 0.026714; Test  MAE 38.450717

Train  R^2: 0.997588; Test  R^2 0.932287

