# This Example shows the Prediction of Bike Flow in the NYC City using the deep learning model ST-ResNet.

Find the details of the ST-ResNet model in the <a href="https://dl.acm.org/doi/10.5555/3298239.3298479">corresponding paper</a>

Details of the dataset can be found <a href="https://github.com/FIBLAB/DeepSTN">here</a>.

### Import Modules and Define Parameters

In [1]:
import os
import time
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
from geotorchai.models.grid import STResNet
from geotorchai.datasets.grid import BikeNYCDeepSTN

In [2]:
## Define parameters
len_closeness = 3
len_period = 4
len_trend = 4
nb_residual_unit = 4
map_height, map_width = 21, 12
nb_flow = 2
nb_area = 81
T = 24

epoch_nums = 10
learning_rate = 0.0002
batch_size = 32
validation_ratio = 0.1
test_ratio = 0.1
params = {'batch_size': batch_size, 'shuffle': False}

## make sure that PATH_TO_DATASET exists in the running directory
PATH_TO_DATASET = "data/deepstn"
MODEL_SAVE_DIR = "model-stresnet"
MODEL_SAVE_PATH = MODEL_SAVE_DIR + "/stresnet.pth"
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

### Loading Train and Test Dataset

In [3]:
## Load training and test dataset
full_dataset = BikeNYCDeepSTN(root = PATH_TO_DATASET, download = True)

## get the min-max-difference of normalized data for future use in calculating actual losses
min_max_diff = full_dataset.get_min_max_difference()

File downloading started...


100%|█████████████████████████| 17708640/17708640 [00:00<00:00, 31568249.76it/s]


File downloading finished
File downloading started...


100%|████████████████████████████████| 18224/18224 [00:00<00:00, 7864697.61it/s]

File downloading finished





### Split Train Dataset into Train and Validation

In [4]:
## Initialize training and validation indices to split the dataset
dataset_size = len(full_dataset)
indices = list(range(dataset_size))
val_split = int(np.floor((1 - (validation_ratio + test_ratio)) * dataset_size))
test_split = int(np.floor((1 - test_ratio) * dataset_size))
train_indices, val_indices, test_indices = indices[:val_split], indices[val_split:test_split], indices[test_split:]

In [5]:
## Define training and validation data sampler
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

## Define training and validation data loader
train_loader = DataLoader(full_dataset, **params, sampler=train_sampler)
val_loader = DataLoader(full_dataset, **params, sampler=valid_sampler)
test_loader = DataLoader(full_dataset, **params, sampler=test_sampler)

### Initialize Model and Hyperparameters

In [6]:
## set device to CPU or GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
## Define Model
model = STResNet((len_closeness, nb_flow, map_height, map_width),
                (len_period, nb_flow, map_height, map_width),
                (len_trend, nb_flow , map_height, map_width),
                external_dim = None, nb_residual_unit = nb_residual_unit)
## Define hyper-parameters
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model.to(device)
loss_fn.to(device)

MSELoss()

### Method for Returning Validation Loss

In [7]:
## Before starting training, define a method to calculate validation loss
def get_validation_loss(model, data_loader, criterion, device):
    model.eval()
    mean_loss = []
    for i, sample in enumerate(data_loader):
        X_c = sample["x_closeness"].type(torch.FloatTensor).to(device)
        X_p = sample["x_period"].type(torch.FloatTensor).to(device)
        X_t = sample["x_trend"].type(torch.FloatTensor).to(device)
        Y_batch = sample["y_data"].type(torch.FloatTensor).to(device)

        outputs = model(X_c, X_p, X_t)
        mse= criterion(outputs, Y_batch).item()
        mean_loss.append(mse)

    mean_loss = np.mean(mean_loss)
    return mean_loss

### Train the Model

In [8]:
## Perform training and validation
min_val_loss = None
for e in range(epoch_nums):
    for i, sample in enumerate(train_loader):
        X_c = sample["x_closeness"].type(torch.FloatTensor).to(device)
        X_p = sample["x_period"].type(torch.FloatTensor).to(device)
        X_t = sample["x_trend"].type(torch.FloatTensor).to(device)
        Y_batch = sample["y_data"].type(torch.FloatTensor).to(device)

        # Forward pass
        outputs = model(X_c, X_p, X_t)
        loss = loss_fn(outputs, Y_batch)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Training Loss: {:.4f}'.format(e + 1, epoch_nums, loss.item()))

    ## Perform model validation after finishing each epoch training
    val_loss = get_validation_loss(model, val_loader, loss_fn, device)
    print('Mean validation loss:', val_loss)

    if min_val_loss == None or val_loss < min_val_loss:
        min_val_loss = val_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print('Best model saved!')

Epoch [1/10], Training Loss: 0.0066
Mean validation loss: 0.008179346293521425
Best model saved!
Epoch [2/10], Training Loss: 0.0019
Mean validation loss: 0.003460483121064802
Best model saved!
Epoch [3/10], Training Loss: 0.0018
Mean validation loss: 0.0025046314112842083
Best model saved!
Epoch [4/10], Training Loss: 0.0020
Mean validation loss: 0.0020039992523379624
Best model saved!
Epoch [5/10], Training Loss: 0.0014
Mean validation loss: 0.0017165297467727214
Best model saved!
Epoch [6/10], Training Loss: 0.0010
Mean validation loss: 0.0015735180738071601
Best model saved!
Epoch [7/10], Training Loss: 0.0008
Mean validation loss: 0.0014622912761600066
Best model saved!
Epoch [8/10], Training Loss: 0.0006
Mean validation loss: 0.00130579936861371
Best model saved!
Epoch [9/10], Training Loss: 0.0007
Mean validation loss: 0.001220505762224396
Best model saved!
Epoch [10/10], Training Loss: 0.0010
Mean validation loss: 0.0011776623384018119
Best model saved!


### Define a Method to Return MSE, MAE, RMSE Errors

In [9]:
## Before testing, Define a method to calculate three types of loss: MSE, MAE, RMSE
def compute_errors(preds, y_true):
    pred_mean = preds[:, 0:2]
    diff = y_true - pred_mean

    mse = np.mean(diff ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(diff))

    return mse, mae, rmse

### Evaluate on Test Dataset

In [10]:
## Perform testing on the best model with test dataset
model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=lambda storage, loc: storage))

rmse_list=[]
mse_list=[]
mae_list=[]
for i, sample in enumerate(test_loader):
    X_c = sample["x_closeness"].type(torch.FloatTensor).to(device)
    X_p = sample["x_period"].type(torch.FloatTensor).to(device)
    X_t = sample["x_trend"].type(torch.FloatTensor).to(device)
    Y_batch = sample["y_data"].type(torch.FloatTensor).to(device)

    outputs = model(X_c, X_p, X_t)
    mse, mae, rmse = compute_errors(outputs.cpu().data.numpy(), Y_batch.cpu().data.numpy())

    rmse_list.append(rmse)
    mse_list.append(mse)
    mae_list.append(mae)
    
rmse = np.mean(rmse_list)
mse = np.mean(mse_list)
mae = np.mean(mae_list)

print('Test mse: %.6f mae: %.6f rmse (norm): %.6f, mae (real): %.6f, rmse (real): %.6f' % (mse, mae, rmse, mae * min_max_diff/2, rmse*min_max_diff/2))

Test mse: 0.001049 mae: 0.013647 rmse (norm): 0.032330, mae (real): 5.028948, rmse (real): 11.913621
