In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
###--------------------------- Data Preparation ---------------------------

## Read data from CSV
doc = pd.read_csv('./datasets/dataset.csv')

## Drop out timestamp
doc = doc.drop(columns= 'datatime')
doc.head()

## Convert to np array
data = doc.to_numpy()
# print(data)

## Normalize the data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)
# print(normalized_data)

In [4]:
## Create data sequence
x, y = [], []
input_length = 24
output_length = 1
for i in range(len(data) - input_length - output_length + 1):
    x.append(data[i:i+input_length])  # Input sequence (24 time steps)
    y.append(data[i+input_length : i+input_length+output_length, 2]) 
x = np.array(x)
y = np.array(y)
print(x.shape, y.shape)


## Convert data to PyTorch tensor
x_ten = torch.tensor(x, dtype= torch.float32).to(device)
y_ten = torch.tensor(y, dtype= torch.float32).to(device)


## Split the data
x_train, x_val, y_train, y_val = train_test_split(x_ten, y_ten, test_size= 0.2, shuffle= False)
print(f'Train:\t{x_train.shape}, {y_train.shape}\nVal:\t{x_val.shape}, {y_val.shape}')

## Create batches for train & val
train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)

train_loader = DataLoader(train_dataset, batch_size= 32, shuffle= True)
val_loader = DataLoader(val_dataset, batch_size= 32, shuffle= False)


(87648, 24, 3) (87648, 1)
Train:	torch.Size([70118, 24, 3]), torch.Size([70118, 1])
Val:	torch.Size([17530, 24, 3]), torch.Size([17530, 1])


In [5]:
###--------------------------- Model Preparation ---------------------------
class LSTMModel(nn.Module):
    def __init__(self, n_hidden, n_lstm_layers= 1):
        super(LSTMModel, self).__init__()
        self.n_hidden = n_hidden
        self.n_lstm_layers = n_lstm_layers
        
        self.lstm = nn.LSTM(3, n_hidden, n_lstm_layers, batch_first= True)
        self.fc = nn.Linear(self.n_hidden, 1)

    def forward(self, x):
        h0 = torch.zeros(self.n_lstm_layers, x.size(0), self.n_hidden).to(device)
        c0 = torch.zeros(self.n_lstm_layers, x.size(0), self.n_hidden).to(device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [9]:
## Variables
n_hidden = 2  # Number of LSTM hidden units
n_lstm_layers = 1  # Number of LSTM layers
learning_rate = 0.001
num_epochs = 10000
batch_size = 32

In [10]:
## Initialize the model, loss function, and optimizer
model = LSTMModel(n_hidden, n_lstm_layers).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## Training loop
best_val_loss = 1e9
best_epoch = 0
for epoch in range(num_epochs):
    ## Training phase
    model.train()
    train_loss = 0.0
    for batch_X, batch_y in train_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    ## Calculate average training loss
    train_loss /= len(train_loader)
    
    
    ## Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()
    
    ## Calculate average validation loss
    val_loss /= len(val_loader)
    
    ## Print training and validation loss
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    ## Save the model if validation loss improves
    if val_loss < best_val_loss:
        print(f'Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. Saving model...')
        best_val_loss = val_loss
        best_epoch = epoch + 1
        
        ## Save the model checkpoint
        checkpoint = {
            'epoch': best_epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': best_val_loss,
        }
        torch.save(checkpoint, f'./results/model{round(best_val_loss*10000)}.pth')
        torch.save(checkpoint, './results/best_model.pth')
        
print('Training Complete...')
print(f'Best model saved at epoch {best_epoch} with validation loss: {best_val_loss:.4f}')

Epoch [1/10000], Train Loss: 1.6447, Val Loss: 0.7943
Validation loss improved from 1000000000.0000 to 0.7943. Saving model...
Epoch [2/10000], Train Loss: 0.4229, Val Loss: 0.5167
Validation loss improved from 0.7943 to 0.5167. Saving model...
Epoch [3/10000], Train Loss: 0.3684, Val Loss: 0.5206
Epoch [4/10000], Train Loss: 0.3574, Val Loss: 0.4487
Validation loss improved from 0.5167 to 0.4487. Saving model...
Epoch [5/10000], Train Loss: 0.3530, Val Loss: 0.4724
Epoch [6/10000], Train Loss: 0.3500, Val Loss: 0.5136
Epoch [7/10000], Train Loss: 0.3479, Val Loss: 0.5388
Epoch [8/10000], Train Loss: 0.3466, Val Loss: 0.4415
Validation loss improved from 0.4487 to 0.4415. Saving model...
Epoch [9/10000], Train Loss: 0.3446, Val Loss: 0.4591
Epoch [10/10000], Train Loss: 0.3439, Val Loss: 0.4179
Validation loss improved from 0.4415 to 0.4179. Saving model...
Epoch [11/10000], Train Loss: 0.3435, Val Loss: 0.4694
Epoch [12/10000], Train Loss: 0.3437, Val Loss: 0.5029
Epoch [13/10000], Tr

KeyboardInterrupt: 