In [41]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from IPython import display

In [42]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print(device)
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(device)
else:
    print ("GPU device not found.")

mps


In [None]:
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [43]:
class BILSTM(nn.Module):
    def __init__(self, lstm_num_layers = 1, lstm_hidden_size = 64, lstm_dropout = 0.2, fc1_output_size = 16):
        super(BILSTM, self).__init__()
        self.lstm_num_layers = lstm_num_layers
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_dropout = lstm_dropout
        self.fc1_output_size = fc1_output_size

        self.lstm = nn.LSTM(input_size = 8, 
                             hidden_size = self.lstm_hidden_size,
                             num_layers = self.lstm_num_layers,
                             batch_first = True,
                             bidirectional = True)
        
        # self.lstm2 = nn.LSTM(input_size = self.lstm_hidden_size * 2, 
        #                      hidden_size = self.lstm_hidden_size,
        #                      num_layers = self.lstm_num_layers,
        #                      batch_first = True,
        #                      bidirectional = True)

        # self.dropout = nn.Dropout(p = self.lstm_dropout)
        self.fc1 = nn.Linear(self.lstm_hidden_size * 2, self.fc1_output_size)
        self.fc2 = nn.Linear(self.fc1_output_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.lstm_num_layers * 2, x.size(0), self.lstm_hidden_size).to(device)
        c0 = torch.zeros(self.lstm_num_layers * 2, x.size(0), self.lstm_hidden_size).to(device)

        # x = F.relu(x)

        h_lstm, _ = self.lstm(x, (h0, c0))
        # h_lstm = self.dropout(h_lstm)

        h_fc1 = self.fc1(h_lstm)
        h_fc1 = F.relu(h_fc1)

        h_fc2 = self.fc2(h_fc1)
        output = h_fc2[:, -1, :]

        return output

In [44]:
input_tensor = torch.randn(64,10,8).to(device)
model = BILSTM().to(device)
output_tensor = model(input_tensor)
output_tensor.shape

torch.Size([64, 1])

#### Hyperparameters

In [30]:
lstm_num_layers = 1
lstm_hidden_size = 64
lstm_dropout = 0.2
fc1_output_size = 16
num_epoch = 10
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.L1Loss() # Mean Absolute Error

#### Train

In [None]:
model = BILSTM().to(device)
model.train(True)

for epoch in num_epoch:
    for batch_index, batch in enumerate(train_loader):
        x, y_true = batch[0].to(device), batch[1].to(device)
        y_pred = model(x)
        loss = criterion(y_pred, y_true)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 100 == 0:
            print('Epoch {0}, Batch {1}, Loss: {2:.3f}'.format(epoch, batch_index, loss.item()))
            display.clear_output(wait=True)

model.train(False)

#### Save the model

In [None]:
path = "BILSTM_{0}epoch_{1}lr_{2}layer_{3}size.pt".format(num_epoch, learning_rate, lstm_num_layers, lstm_hidden_size)
torch.save(model.state_dict(), path)

# model = BILSTM()
# model.load_state_dict(torch.load(path))
# model.eval()

#### Evaluate

In [None]:
with torch.no_grad():
    Y_train_pred = model(X_train.to(device)).to('cpu')
    Y_test_pred = model(X_test.to(device)).to('cpu')

train_mse = mean_squared_error(Y_train_pred, Y_train)
test_mse = mean_squared_error(Y_test_pred, Y_test)

train_mae = mean_absolute_error(Y_train_pred, Y_train)
test_mae = mean_absolute_error(Y_test_pred, Y_test)

train_r2 = r2_score(Y_train_pred, Y_train)
test_r2 = r2_score(Y_test_pred, Y_test)

print("Train MSE: {0}; Test MSE {1:.4f}\n".format(train_mse, test_mse))
print("Train MAE: {0}; Test MAE {1:.4f}\n".format(train_mae, test_mae))
print("Train R^2: {0}; Test R^2 {1:.4f}\n".format(train_r2, test_r2))