In [6]:
import os
current_dir = os.getcwd()
if "tscc" in current_dir:
    os.chdir("/tscc/nfs/home/bax001/scratch/CSE_251B")

In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

## LOAD INPUT DATA

In [9]:
train = np.load("data/train.npz")

In [13]:
train_data = train["data"]

## DATASET and DATALOADER

In [7]:
class AgentTrajectoryDataset(Dataset):
    def __init__(self, npz_file_path):
        """
        Args:
            npz_file_path (str): Path to .npz file.
        """
        super().__init__()
        data = np.load(npz_file_path)
        self.data = data['data']  # (N, 50, T, 6)
        
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        sample = self.data[idx]  # (50, T, 6)
        
        position = sample[..., 0:2]    # (50, T, 2)
        velocity = sample[..., 2:4]    # (50, T, 2)
        heading = sample[..., 4]       # (50, T)
        object_type = sample[..., 5]   # (50, T)

        return {
            'position': torch.tensor(position, dtype=torch.float32),
            'velocity': torch.tensor(velocity, dtype=torch.float32),
            'heading': torch.tensor(heading, dtype=torch.float32),
            'object_type': torch.tensor(object_type, dtype=torch.long)
        }

# Instantiate datasets
train_dataset = AgentTrajectoryDataset('data/train.npz')
test_dataset = AgentTrajectoryDataset('data/test_input.npz')

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Example: peek at one batch
batch = next(iter(train_loader))
print(batch['position'].shape)     # (64, 50, 110, 2)
print(batch['velocity'].shape)     # (64, 50, 110, 2)
print(batch['heading'].shape)      # (64, 50, 110)
print(batch['object_type'].shape)  # (64, 50, 110)

torch.Size([64, 50, 110, 2])
torch.Size([64, 50, 110, 2])
torch.Size([64, 50, 110])
torch.Size([64, 50, 110])


## Train Val Test Split

In [14]:
from torch.utils.data import random_split

# 80% train, 20% val split
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")


Train samples: 8000
Validation samples: 2000
Test samples: 2100


## MODEL DEF

## Training loop

## Run on test set

## Run on external test set and submit to Kaggle