In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler
import time
import copy

import random

import torch
import torch.nn as nn

import torch.optim as optim
import matplotlib.pyplot as plt
import math


In [None]:
data = pd.read_csv('data/IONITY_temp.csv')
# Adding / removing columns.
data.set_index('index', inplace=True)

data.drop('Unnamed: 0', axis=1, inplace=True)
data.drop('station_id', axis=1, inplace=True)
data.drop('outlet_count', axis=1, inplace=True)
data.drop('unknown_count', axis=1, inplace=True)
data.drop('available_count', axis=1, inplace=True)
data.drop('occupied_count', axis=1, inplace=True)
data.drop('offline_count', axis=1, inplace=True)

print(data['Occupancy'].isna().sum())
print(len(data['Occupancy']))


data.info()


In [None]:
future_steps = 36
seq_len = 576
batch_size = 8

indices = list(range(len(data) - future_steps - seq_len))
print(len(indices))

train_i = indices[:int(len(indices)*0.8)] 
val_i = indices[int(len(indices)*0.8):int(len(indices)*0.9)]
test_i = indices[int(len(indices)*0.9):]

In [None]:
from torch.utils.data import Dataset, DataLoader

class datasetMaker(Dataset):
    def __init__(self, data, indices_conversion, seq_len=10, future_steps=5):
        # Assuming 'data' is a numpy array or a pandas DataFrame, convert it to a numpy array
        self.data = data.values if isinstance(data, pd.DataFrame) else data
        self.indices_conversion = indices_conversion
        self.seq_len = seq_len
        self.future_steps = future_steps

    def __len__(self):
        # Subtract seq_len to avoid going out of bounds
        return len(self.indices_conversion)

    def __getitem__(self, index):
        # Get the sequence and label, and convert them to torch tensors
        index = self.indices_conversion[index]
        #random_index = random.randint(0,len(self.data)-self.seq_len-1)
        #random_index = 1000
        seq = torch.tensor(self.data[index:index+self.seq_len], dtype=torch.float)
        label = torch.tensor(self.data[index+self.seq_len:index+self.seq_len+self.future_steps], dtype=torch.float)
        label=torch.unsqueeze(label[:,0], 1)
        
        return seq, label
    
train_dataset = datasetMaker(data, train_i, seq_len, future_steps)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
        
val_dataset = datasetMaker(data, val_i, seq_len, future_steps)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

test_dataset = datasetMaker(data, test_i, seq_len, future_steps)
test_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

print(len(train_loader))

print()

for data, label in train_loader:
    print(data.shape, label.shape)
    break


In [None]:
def train_epoch(epoch, optimizer, loss_function, model, train_loader, future_steps):
    total_loss = 0
    model.train()
    for batch_idx, (data,label) in enumerate(train_loader):

        data = data.cuda()
        label = label.cuda()
                
        optimizer.zero_grad()
        
        predictions = model(data, future=future_steps)
                
        #print(predictions.shape)
            
        loss_value = loss_function(predictions,label)
        loss_value.backward()
        optimizer.step()

        total_loss += loss_value.item()
    return total_loss / len(train_loader)

def validate_epoch(epoch, loss, model, val_loader, future_steps):
    total_loss = 0
    model.eval()

    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(val_loader):
            
            data = data.cuda()
            label = label.cuda()
            
            predictions = model(data, future=future_steps)
            
            
            loss_value = loss(predictions, label)
            total_loss += loss_value.item()
    return total_loss / len(val_loader)

def a_proper_training(num_epoch, model, optimizer, loss_function, loader, future_steps):
    best_epoch = None
    best_model = None
    best_loss = None
    train_losses = list()
    val_losses = list()
    print("Begin Training")

    for epoch in range(num_epoch):
        start_time = time.time()  # Start time

        train_loss = train_epoch(epoch, optimizer, loss_function, model, train_loader, future_steps)
        val_loss = validate_epoch(0, criterion, model, val_loader, future_steps)
        train_losses.append(train_loss)
        val_losses.append(val_loss)  
        
        if epoch == 0:
            best_loss = val_loss
            
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = copy.deepcopy(model)
            best_epoch = epoch

        end_time = time.time()
        elapsed_time = end_time - start_time
        
        print(f"Epoch {epoch + 1}/{num_epoch}: Train Loss = {round(train_loss,10)} Val Loss = {round(val_loss,10)} Elapsed_time = {round(elapsed_time / 60, 2)} minutes")
            
    return (best_model, best_epoch, train_losses, val_losses)


In [None]:

class MultiStepLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(MultiStepLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Flatten LSTM parameters
        self.lstm.flatten_parameters()
        
        # Fully connected layer to map LSTM output to desired output_size
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, future=1):
        
        predictions = []
        
        
        for _ in range(future):
            # Initialize hidden state and cell state        
            batch_size, sequence_length, _ = x.size()
            h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
            c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)

            # LSTM forward pass
            out, (h0, c0) = self.lstm(x, (h0, c0))
            
            pred = out[:, -1, :]
            
            x = torch.cat((x, pred.unsqueeze(1)), dim=1)
            
            t = self.fc(pred)
            predictions.append(t) # Append occupancy to predictions
            

        # Stack predictions along the sequence length dimension'
        predictions = torch.cat(predictions, dim=1)
        
        predictions = torch.unsqueeze(predictions, dim = 2)
        return predictions

In [None]:
model = MultiStepLSTM(input_size=1, hidden_size=1, output_size=1, num_layers=3).cuda()

optimizer = optim.Adam(model.parameters(), lr=0.005)
criterion = nn.MSELoss()

if True:
    best_model, best_epoch, train_losses, val_losses = a_proper_training(
         30, model, optimizer, criterion, train_loader, future_steps
    )
    torch.save(best_model.state_dict(), "models/lstm_model.pth")



In [None]:
plt.plot(train_losses, label="train")
plt.plot(val_losses, label="val")
plt.title("MSE Loss")
plt.legend()

In [None]:

# Assuming val_loader is your DataLoader and best_model is your model

for data, label in val_loader:
    data = data.cuda()

    # Get predictions from the model
    predictions = best_model(data, future=future_steps)
    
    # Convert tensors to numpy arrays
    predictions = predictions.detach().cpu().numpy()
    labels = label.numpy()
    
    # Determine the number of rows and columns for the grid
    batch_size, sequence_length, _ = predictions.shape
    num_rows = int(math.ceil(batch_size / 4))  # 4 columns for a 4x4 grid
    num_cols = 4
    
    # Create a grid of subplots
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 15))
    fig.suptitle("Predictions vs True Values")
    
    # Plot each sequence in the batch
    for i in range(batch_size):
        row = i // num_cols
        col = i % num_cols
        
        ax = axes[row, col] if num_rows > 1 else axes[col]
        
        # Get the predictions and true values for the current sequence
        t = labels[i, :, 0]
        p = predictions[i, :, 0]
        
        # Plot the predictions and true values on the current subplot
        ax.plot(p, label="Predictions")
        ax.plot(t, label="True Values")
        ax.set_title(f"Sequence {i+1}")
        ax.legend()
        ax.set_ylim(0, 1)

    
    # Adjust layout and display the plot
    plt.tight_layout()
    plt.show()
    break  # Break after processing the first batch


In [None]:
import torch.nn.functional as F

# Assuming dataloader is your DataLoader and model is your neural network model

# Set the model to evaluation mode
best_model.eval()
#best_model.cpu()

# Initialize variables to accumulate predictions and true labels
all_predictions = []
all_labels = []

# Iterate through all batches in the DataLoader
for batch_data, batch_labels in val_loader:
    batch_data = batch_data.cuda()
    batch_labels = batch_labels.cuda()
    # Forward pass: compute predicted outputs by passing inputs to the model
    batch_predictions = best_model(batch_data, future=future_steps)
    
    # Append batch predictions and labels to the accumulated lists
    all_predictions.append(batch_predictions)
    all_labels.append(batch_labels)

# Concatenate the lists of predictions and labels into tensors
all_predictions = torch.cat(all_predictions, dim=0)
all_labels = torch.cat(all_labels, dim=0)

# Calculate mean squared error
mse = F.mse_loss(all_predictions, all_labels)

print("Mean Squared Error:", mse.item())
