In [1]:
%matplotlib inline

import argparse
from typing import Dict

from tempfile import gettempdir

import numpy as np
import torch
from torch import nn, optim
from torchvision.models.resnet import resnet50
from tqdm import tqdm

from avkit.configs import load_config_data
from avkit.data import LocalDataManager
from avkit.dataset import AgentDataset, EgoDataset
from avkit.dataset.utilities import build_dataloader
from avkit.rasterization import build_rasterizer
from avkit.evaluation import write_coords_as_csv, compute_mse_error_csv
from avkit.geometry import transform_points
from avkit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from matplotlib import pyplot as plt

import os

## Prepare Data path and load cfg

By setting the `AVKIT_DATA_FOLDER` variable, we can point the script to the folder where the data lie.

Then, we load our config file with relative paths and other configurations (rasterer, training params...).

In [2]:
# set env variable for data
os.environ["AVKIT_DATA_FOLDER"] = "/Users/pondruska/prediction-dataset"
# get config
cfg = load_config_data("./prediction_config.yaml")
print(cfg)

OrderedDict([('format_version', 4), ('model_params', OrderedDict([('model_architecture', 'resnet50'), ('history_num_frames', 0), ('history_step_size', 1), ('history_delta_time', 0.1), ('future_num_frames', 12), ('future_step_size', 1), ('future_delta_time', 0.1)])), ('raster_params', OrderedDict([('raster_size', [224, 224]), ('pixel_size', [0.25, 0.25]), ('ego_center', [0.25, 0.5]), ('map_type', 'py_semantic'), ('satellite_map_key', 'aerial_map/aerial_map.png'), ('semantic_map_lyftbag_key', 'maps/semantic_maps/raster_test_lyftbag.lyftbag'), ('semantic_map_json_key', 'semantic_map/semantic_map.pb'), ('filter_agents_threshold', 0.5)])), ('train_data_loader', OrderedDict([('datasets', [OrderedDict([('key', 'sample_scenes/20200504_competition_sample.zarr'), ('scene_indices', [1])])]), ('perturb_probability', 0.0), ('batch_size', 12), ('shuffle', True), ('num_workers', 16)])), ('val_data_loader', OrderedDict([('datasets', [OrderedDict([('key', 'sample_scenes/20200504_competition_sample.zarr

## Model

Our baseline is a simple `resnet50` pretrained on `imagenet`. We must replace the input and the final layer to address our requirements.

In [3]:
def build_model(cfg: Dict) -> torch.nn.Module:
    # load pre-trained Conv2D model
    model = resnet50(pretrained=True)

    # change input size
    num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
    num_in_channels = 3 + num_history_channels
    model.conv1 = nn.Conv2d(
        num_in_channels,
        model.conv1.out_channels,
        kernel_size=model.conv1.kernel_size,
        stride=model.conv1.stride,
        padding=model.conv1.padding,
        bias=False,
    )
    # change output size
    # X, Y  * number of future states
    num_targets = 2 * cfg["model_params"]["future_num_frames"]
    model.fc = nn.Linear(in_features=2048, out_features=num_targets)

    return model

In [4]:
def forward(data, model, device, criterion):
    inputs = data["image"].to(device)
    targets = data["target_positions"].to(device).reshape(len(data["target_positions"]), -1)
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss = loss.mean()  # weighted average
    return loss, outputs

## Load some stuff

In [5]:
dm = LocalDataManager(None)
# ===== INIT DATASETS
rasterizer = build_rasterizer(cfg, dm)
train_dataloader = build_dataloader(cfg, "train", dm, AgentDataset, rasterizer)
eval_dataloader = build_dataloader(cfg, "val", dm, AgentDataset, rasterizer)


semantic_map/semantic_map.pb is not present in local data folder /tmp/avkit_data


FileNotFoundError: semantic_map/semantic_map.pb not found

In [None]:
# ==== INIT MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = build_model(cfg).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss(reduction="none")

# Training

In [None]:
# ==== TRAIN LOOP
tr_it = iter(train_dataloader)
progress_bar = tqdm(range(cfg["train_params"]["max_num_steps"]))
losses_train = []
for _ in progress_bar:
    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_dataloader)
        data = next(tr_it)

    model.train()
    torch.set_grad_enabled(True)
    loss, _ = forward(data, model, device, criterion)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    losses_train.append(loss.item())
    progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")

# Evaluation
we can now run inference and store predicted and annotated trajectories. 

In this example we run it on a single scene from the eval dataset for computationl constraints. 

In [None]:
# ==== EVAL LOOP
model.eval()
torch.set_grad_enabled(False)
losses_eval = []

# store information for evaluation
future_coords_offsets_pd = []
future_coords_offsets_gt = []

timestamps = []
agent_ids = []

progress_bar = tqdm(eval_dataloader)
for data in progress_bar:
    loss, ouputs = forward(data, model, device, criterion)
    losses_eval.append(loss.item())
    progress_bar.set_description(f"Running EVAL, loss: {loss.item()} loss(avg): {np.mean(losses_eval)}")

    future_coords_offsets_pd.append(ouputs.reshape(len(ouputs), -1, 2).cpu().numpy())
    future_coords_offsets_gt.append(data["target_positions"].reshape(len(ouputs), -1, 2).cpu().numpy())

    timestamps.append(data["timestamp"].numpy())
    agent_ids.append(data["track_id"].numpy())
    

### Save results in the competition format and perform evaluation
After the model has predicted trajectories for our evaluation set, we can save them in a `csv` file in the competiion format. To simulate a complete evaluation session we can also save the GT in another `csv` and get the score.

In [None]:
# ==== COMPUTE CSV
pred_path = f"{gettempdir()}/pred.csv"
gt_path = f"{gettempdir()}/gt.csv"

write_coords_as_csv(pred_path, future_num_frames=cfg["model_params"]["future_num_frames"],
                    future_coords_offsets=np.concatenate(future_coords_offsets_pd),
                    timestamps=np.concatenate(timestamps),
                    agent_ids=np.concatenate(agent_ids))
write_coords_as_csv(gt_path, future_num_frames=cfg["model_params"]["future_num_frames"],
                    future_coords_offsets=np.concatenate(future_coords_offsets_gt),
                    timestamps=np.concatenate(timestamps),
                    agent_ids=np.concatenate(agent_ids))

print(f"current error is {compute_mse_error_csv(gt_path, pred_path)}")

### Visualise results
We can also visualise some result from the ego(AV) point of view. Let's have a look at the frame number `5198`

In [None]:
eval_agent_dataset = eval_dataloader.dataset.datasets[0].dataset
eval_ego_dataset = EgoDataset(cfg, eval_agent_dataset.dataset, rasterizer)
frame_number = 5198

model.eval()
torch.set_grad_enabled(False)

# get AV point-of-view frame
data_ego = eval_ego_dataset[frame_number]
im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))


center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
agent_indices = eval_agent_dataset.get_frame_indices(frame_number)

predicted_positions = []
target_positions = []

for v_index in agent_indices:
    data_agent = eval_agent_dataset[v_index]

    out_net = model(torch.from_numpy(data_agent["image"]).unsqueeze(0).to(device))
    out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()

    # store absolute world coordinates
    image_to_world = np.linalg.inv(data_agent["world_to_image"])
    predicted_positions.append(transform_points(out_pos + center, image_to_world))
    target_positions.append(transform_points(data_agent["target_positions"] + center, image_to_world))

# convert coordinates to AV point-of-view so we can draw them
predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["world_to_image"]) - center
target_positions = transform_points(np.concatenate(target_positions), data_ego["world_to_image"]) - center

yaws = np.zeros((len(predicted_positions), 1))
draw_trajectory(im_ego, center, predicted_positions, yaws, PREDICTED_POINTS_COLOR)
draw_trajectory(im_ego, center, target_positions, yaws, TARGET_POINTS_COLOR)

plt.imshow(im_ego[::-1])