In [1]:
import numpy as np
import torch
from torch import nn
torch.manual_seed(42)
from torch.utils.data import Dataset,DataLoader
import wandb
import argparse

In [2]:
train_set=np.load("dataset/train.npz")
val_set=np.load("dataset/val.npz")

In [3]:
class Mydataset(Dataset):
    def __init__(self,dataset):
        self.X=torch.tensor(dataset['x'],dtype = torch.float32)
        self.y=torch.tensor(dataset['y'],dtype = torch.float32)
        self.locations=torch.tensor(dataset['locations'],dtype = torch.int)
        self.times=torch.tensor(dataset['times'],dtype = torch.int)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self,index):
        return self.X[index],self.y[index],self.locations[index],self.times[index]

def generate_dataloaders(train_data,val_data,batch_size):
    train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
    X, y,locations,times=next(iter(train_loader))
    print(f'TRAIN: the shape of X: {X.shape}; the shape of y: {y.shape}\
the shape of locations: {locations.shape};the shape of times: {times.shape};')

    val_loader = DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True)
    X, y,locations,times=next(iter(val_loader))
    print(f'VAL: the shape of X: {X.shape}; the shape of y: {y.shape}\
the shape of locations: {locations.shape};the shape of times: {times.shape};')
    return train_loader, val_loader

In [4]:
train_data = Mydataset(train_set)
val_data = Mydataset(val_set)
train_loader, val_loader=generate_dataloaders(train_data, val_data,batch_size=32)

TRAIN: the shape of X: torch.Size([32, 8, 49]); the shape of y: torch.Size([32, 1])the shape of locations: torch.Size([32, 2]);the shape of times: torch.Size([32]);
VAL: the shape of X: torch.Size([32, 8, 49]); the shape of y: torch.Size([32, 1])the shape of locations: torch.Size([32, 2]);the shape of times: torch.Size([32]);


In [5]:
class AdditiveAttention(nn.Module):
    def __init__(self, key_size, query_size, num_hiddens, dropout, **kwargs):
        super(AdditiveAttention, self).__init__(**kwargs)
        self.W_k = nn.Linear(key_size, num_hiddens, bias=False)
        self.W_q = nn.Linear(query_size, num_hiddens, bias=False)
        self.w_v = nn.Linear(num_hiddens, 1, bias=False)
        self.dropout = nn.Dropout(dropout)

    def forward(self, queries, keys, values):
        queries, keys = self.W_q(queries), self.W_k(keys)
        features = queries + keys
        features = torch.tanh(features)
        scores = self.w_v(features).squeeze(-1)
        attention_weights = torch.softmax(scores, dim=-1) 
        # bmm: (batch_sz, 1, num_hiddens) = (batch_sz, 1, key_size) * (batch_sz, key_size, num_hiddens)
        output = torch.bmm(self.dropout(attention_weights.unsqueeze(1)), values)
        output = output.squeeze(1) # (batch_sz, num_hiddens)
        return output

class Model_v2(nn.Module):
    """concat the GRU output and categorical embeddings, feed into MLP."""
    def __init__(self, my_confg):
        super().__init__()
        self.loc_x_embedLayer=nn.Embedding(my_confg.loc_dim,my_confg.embed_loc_size)
        self.loc_y_embedLayer=nn.Embedding(my_confg.loc_dim,my_confg.embed_loc_size)
        self.time_embedLayer=nn.Embedding(my_confg.time_dim,my_confg.embed_time_size)
        self.rnn = nn.GRU(my_confg.X_dim, my_confg.num_hiddens,my_confg.num_layers)
        concat_dim = 2*my_confg.embed_loc_size+my_confg.embed_time_size
        self.attention = AdditiveAttention(my_confg.num_hiddens, concat_dim,
                                               my_confg.num_hiddens, my_confg.dropout)
        self.mlp = nn.Sequential(
            nn.Linear(my_confg.num_hiddens, my_confg.l_dim),
            nn.ReLU(),
            nn.Linear(my_confg.l_dim, my_confg.output_dim)
        )

    def forward(self, X, locations,times, state=None):
        X = X.permute(1,0,2)
        X = X.to(torch.float32)
        y, _ = self.rnn(X)
        loc_embedding = torch.cat((self.loc_x_embedLayer(locations[:,0]),
                    self.loc_y_embedLayer(locations[:,1])),axis=1)
        time_embedding = self.time_embedLayer(times)
        categorical_embedding = torch.cat((loc_embedding,time_embedding),axis=1)
        key_value = y.permute(1, 0, 2)
        output = self.attention(torch.unsqueeze(categorical_embedding, dim=1),key_value,key_value)
        output = self.mlp(output)
        return output

In [6]:
my_confg={
    # Training confg
    'num_epochs': 10,
    'load_from_path': None,
    'save_to_path': "checkpoints/v1/",
    'use_wandb': False,
    # Categorical Embeddings
    'loc_dim':10, # dataset-specific
    'embed_loc_size':5,
    'time_dim':24, # dataset-specific
    'embed_time_size':10,
    # GRU
    'X_dim':49, # dataset-specific
    'num_hiddens':8,
    'num_layers':2,
    # MLP
    'l_dim':16,
    'output_dim':1, # dataset-specific
    # Attention
    'dropout':0
}
my_confg = argparse.Namespace(**my_confg)

In [7]:
net = Model_v2(my_confg)
if my_confg.use_wandb:
    wandb.login()
    wandb.init(project="trial", config={'lr':0.001,'num_layers':2,'num_hiddens':8})

In [8]:
# Trail
X_sample, _ ,loc_sample,time_sample =next(iter(train_loader))
y_trail=net(X_sample,loc_sample,time_sample)