First develop an extension of the ReID-based tracker we created in the previous code that will make it more robust to occlusions by allowing it to recover from missed detections

Then implement a Message Passing Network from scratch, and use it to build a model that will learn to combine position information and reid features to directly predict associations between past tracks and detections. 


- Adapt the track management scheme of our ReIDTracker allow it to recover from missed detections.
- Implement a Message Passing Network from scratch to operate on bipartite graphs
- Implement the pairwise feature  computation to obtain features for our Message Passing Network
- Train the Message Passing Network and improve your tracker's IDF1 score



In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
root_dir = "gdrive/My Drive/Colab Notebooks/cv3dst_exercise/"
gnn_root_dir = "gdrive/My Drive/Colab Notebooks/cv3dst_gnn_exercise/"

In [None]:
!ls "gdrive/My Drive/Colab Notebooks/cv3dst_gnn_exercise/data"

preprocessed_data_test_2.pth  preprocessed_data_train_2.pth


In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

!pip install tqdm lap
!pip install https://github.com/timmeinhardt/py-motmetrics/archive/fix_pandas_deprecating_warnings.zip

Collecting lap
  Downloading lap-0.4.0.tar.gz (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 8.0 MB/s 
[?25hBuilding wheels for collected packages: lap
  Building wheel for lap (setup.py) ... [?25l[?25hdone
  Created wheel for lap: filename=lap-0.4.0-cp37-cp37m-linux_x86_64.whl size=1590173 sha256=349557e07395409f0a1a28af752a99c565bdd76a051e53480bac0f458b49b9c6
  Stored in directory: /root/.cache/pip/wheels/b1/0b/e3/ef9daf1b5547b56389e42c80c3100f1e6479bf5fd00fd9d6ba
Successfully built lap
Installing collected packages: lap
Successfully installed lap-0.4.0
[K     - 148 kB 3.2 MB/s
Building wheels for collected packages: motmetrics
  Building wheel for motmetrics (setup.py) ... [?25l[?25hdone
  Created wheel for motmetrics: filename=motmetrics-1.1.3-py3-none-any.whl size=134199 sha256=6a81f48be4ef97d5459e2fe94a9aa42a1766946d38080659e4ab0a382117361c
  Stored in directory: /tmp/pip-ephem-wheel-cache-oakw_x61/wheels/39/60/bf/90b1b02ff42db1bf7f2d2fa3eef2fe8bc46061182cf4ce7b

In [None]:
import os
import sys
sys.path.append(os.path.join(gnn_root_dir, 'src'))


import matplotlib.pyplot as plt
import numpy as np
import time
from tqdm.autonotebook import tqdm

import torch
from torch.utils.data import DataLoader

from tracker.data_track import MOT16Sequences
from tracker.tracker import Tracker, ReIDTracker
from tracker.utils import run_tracker, cosine_distance
from scipy.optimize import linear_sum_assignment as linear_assignment
import os.path as osp

import motmetrics as mm
mm.lap.default_solver = 'lap'

  if __name__ == '__main__':


In [None]:
!ls "gdrive/My Drive/Colab Notebooks/cv3dst_exercise/data/MOT16/train"
!ls "gdrive/My Drive/Colab Notebooks/cv3dst_exercise/data/MOT16/test"

MOT16-02  MOT16-04  MOT16-05  MOT16-09	MOT16-10  MOT16-11  MOT16-13
MOT16-01  MOT16-03  MOT16-06  MOT16-07	MOT16-08  MOT16-12  MOT16-14


In [None]:
train_db = torch.load(osp.join(gnn_root_dir, 'data/preprocessed_data_train_2.pth'))


start with a sample of the ``ReIDTracker`` from ReID




In [None]:
_UNMATCHED_COST=255
class ReIDHungarianTracker(ReIDTracker):
    def data_association(self, boxes, scores, pred_features):  
        """Refactored from previous implementation to split it onto distance computation and track management"""
        if self.tracks:
            track_boxes = torch.stack([t.box for t in self.tracks], axis=0)
            track_features = torch.stack([t.get_feature() for t in self.tracks], axis=0)
            
            distance = self.compute_distance_matrix(track_features, pred_features,
                                                    track_boxes, boxes, metric_fn=cosine_distance)

            # Perform Hungarian matching.
            row_idx, col_idx = linear_assignment(distance)            
            self.update_tracks(row_idx, col_idx,distance, boxes, scores, pred_features)

            
        else:
            # No tracks exist.
            self.add(boxes, scores, pred_features)
        
    def update_tracks(self, row_idx, col_idx, distance, boxes, scores, pred_features):
        """Updates existing tracks and removes unmatched tracks.
           Reminder: If the costs are equal to _UNMATCHED_COST, it's not a 
           match. 
        """
        track_ids = [t.id for t in self.tracks]

        unmatched_track_ids = []
        seen_track_ids = []
        seen_box_idx = []
        for track_idx, box_idx in zip(row_idx, col_idx):
            costs = distance[track_idx, box_idx] 
            internal_track_id = track_ids[track_idx]
            seen_track_ids.append(internal_track_id)
            if costs == _UNMATCHED_COST:
                unmatched_track_ids.append(internal_track_id)
            else:
                self.tracks[track_idx].box = boxes[box_idx]
                self.tracks[track_idx].add_feature(pred_features[box_idx])
                seen_box_idx.append(box_idx)

        unseen_track_ids = set(track_ids) - set(seen_track_ids)
        unmatched_track_ids.extend(list(unseen_track_ids))
        self.tracks = [t for t in self.tracks
                       if t.id not in unmatched_track_ids]


        # Add new tracks.
        new_boxes_idx = set(range(len(boxes))) - set(seen_box_idx)
        new_boxes = [boxes[i] for i in new_boxes_idx]
        new_scores = [scores[i] for i in new_boxes_idx]
        new_features = [pred_features[i] for i in new_boxes_idx]
        self.add(new_boxes, new_scores, new_features)


In [None]:
val_sequences = MOT16Sequences('MOT16-reid', root_dir = osp.join(root_dir, 'data/MOT16'), vis_threshold=0.)

In [None]:
tracker = ReIDHungarianTracker(None)
run_tracker(val_sequences, db=train_db, tracker=tracker, output_dir=None)

Tracking: MOT16-02
Tracks found: 314
Runtime for MOT16-02: 6.8 s.
Tracking: MOT16-05
Tracks found: 295
Runtime for MOT16-05: 1.7 s.
Tracking: MOT16-09
Tracks found: 87
Runtime for MOT16-09: 1.2 s.
Tracking: MOT16-11
Tracks found: 187
Runtime for MOT16-11: 2.9 s.
Runtime for all sequences: 12.6 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT  MT  PT ML   FP    FN IDs   FM  MOTA  MOTP
MOT16-02 41.6% 59.1% 32.1% 52.2% 96.1%  62  11  39 12  390  8873 203  216 49.1% 0.096
MOT16-05 57.9% 68.5% 50.1% 68.8% 94.0% 133  56  65 12  305  2156 176  146 61.9% 0.142
MOT16-09 52.2% 64.6% 43.8% 66.3% 97.7%  26  13  12  1   82  1793  72   79 63.4% 0.083
MOT16-11 63.4% 69.9% 58.0% 80.2% 96.6%  75  44  24  7  266  1871  88   90 76.4% 0.083
OVERALL  51.6% 64.8% 42.8% 63.5% 96.1% 296 124 140 32 1043 14693 539  531 59.6% 0.099


Unnamed: 0,idf1,idp,idr,recall,precision,num_unique_objects,mostly_tracked,partially_tracked,mostly_lost,num_false_positives,num_misses,num_switches,num_fragmentations,mota,motp
MOT16-02,0.416193,0.591008,0.321188,0.522469,0.961378,62,11,39,12,390,8873,203,216,0.490555,0.095583
MOT16-05,0.57882,0.684564,0.501373,0.688304,0.939795,133,56,65,12,305,2156,176,146,0.618765,0.14184
MOT16-09,0.52243,0.646099,0.438498,0.663286,0.97731,26,13,12,1,82,1793,72,79,0.634366,0.082864
MOT16-11,0.634042,0.699017,0.580119,0.801717,0.966032,75,44,24,7,266,1871,88,90,0.764201,0.082746
OVERALL,0.515792,0.648089,0.428351,0.635038,0.960803,296,124,140,32,1043,14693,539,531,0.595743,0.098641


## Part I - Long-Term ReID Tracker


Allow the tracker to maintain tracks that are not matched during data association by refering them as **inactive**. 

In [None]:
class LongTermReIDHungarianTracker(ReIDHungarianTracker):
    def __init__(self, patience, *args, **kwargs):
        """ Add a patience parameter"""
        self.patience=patience
        super().__init__(*args, **kwargs)

    def update_results(self):
        """Only store boxes for tracks that are active"""
        for t in self.tracks:
            if t.id not in self.results.keys():
                self.results[t.id] = {}
            if t.inactive == 0: # Only change
                self.results[t.id][self.im_index] = np.concatenate([t.box.cpu().numpy(), np.array([t.score])])

        self.im_index += 1        
        
    def update_tracks(self, row_idx, col_idx, distance, boxes, scores, pred_features):
        track_ids = [t.id for t in self.tracks]

        unmatched_track_ids = []
        seen_track_ids = []
        seen_box_idx = []
        for track_idx, box_idx in zip(row_idx, col_idx):
            costs = distance[track_idx, box_idx] 
            internal_track_id = track_ids[track_idx]
            seen_track_ids.append(internal_track_id)
            if costs == _UNMATCHED_COST:
                unmatched_track_ids.append(internal_track_id)

            else:
                self.tracks[track_idx].box = boxes[box_idx]
                self.tracks[track_idx].add_feature(pred_features[box_idx])
                
                # Note: the track is matched, therefore, inactive is set to 0
                self.tracks[track_idx].inactive=0
                seen_box_idx.append(box_idx)
                

        unseen_track_ids = set(track_ids) - set(seen_track_ids)
        unmatched_track_ids.extend(list(unseen_track_ids))

        kill_ids = []
        # Update the `inactive` attribute for those tracks that have been 
        # not been matched. kill those for which the inactive parameter 
        # is > self.patience
        #print(track_ids)
        #print(unseen_track_ids)
        for t in self.tracks:
          #print(unmatched_track)
          if t.id in unmatched_track_ids:
              t.inactive +=1
              if t.inactive > self.patience:
                  kill_ids.append(t.id)

        self.tracks = [t for t in self.tracks
                       if t.id not in kill_ids]
      
        
        new_boxes_idx = set(range(len(boxes))) - set(seen_box_idx)
        new_boxes = [boxes[i] for i in new_boxes_idx]
        new_scores = [scores[i] for i in new_boxes_idx]
        new_features = [pred_features[i] for i in new_boxes_idx]
        self.add(new_boxes, new_scores, new_features)

In [None]:
tracker = LongTermReIDHungarianTracker(patience=20, obj_detect=None)
run_tracker(val_sequences, db=train_db, tracker=tracker, output_dir=None)

Tracking: MOT16-02
Tracks found: 130
Runtime for MOT16-02: 6.7 s.
Tracking: MOT16-05
Tracks found: 155
Runtime for MOT16-05: 2.0 s.
Tracking: MOT16-09
Tracks found: 51
Runtime for MOT16-09: 1.5 s.
Tracking: MOT16-11
Tracks found: 91
Runtime for MOT16-11: 3.8 s.
Runtime for all sequences: 14.0 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT  MT  PT ML   FP    FN IDs   FM  MOTA  MOTP
MOT16-02 47.2% 67.0% 36.4% 52.2% 96.1%  62  11  38 13  390  8873 142  220 49.4% 0.095
MOT16-05 62.5% 74.0% 54.2% 68.8% 94.0% 133  56  65 12  305  2156 119  149 62.7% 0.142
MOT16-09 56.6% 70.0% 47.5% 66.3% 97.7%  26  12  13  1   82  1793  41   83 64.0% 0.085
MOT16-11 69.4% 76.5% 63.5% 80.2% 96.6%  75  44  25  6  266  1871  48   90 76.8% 0.083
OVERALL  56.9% 71.5% 47.3% 63.5% 96.1% 296 123 141 32 1043 14693 350  542 60.0% 0.099


Unnamed: 0,idf1,idp,idr,recall,precision,num_unique_objects,mostly_tracked,partially_tracked,mostly_lost,num_false_positives,num_misses,num_switches,num_fragmentations,mota,motp
MOT16-02,0.472122,0.67043,0.364351,0.522469,0.961378,62,11,38,13,390,8873,142,220,0.493838,0.094828
MOT16-05,0.625386,0.739637,0.541709,0.688304,0.939795,133,56,65,12,305,2156,119,149,0.627006,0.141822
MOT16-09,0.566059,0.700055,0.475117,0.663286,0.97731,26,12,13,1,82,1793,41,83,0.640188,0.085351
MOT16-11,0.693693,0.764781,0.634697,0.801717,0.966032,75,44,25,6,266,1871,48,90,0.76844,0.082838
OVERALL,0.569361,0.715397,0.472838,0.635038,0.960803,296,123,141,32,1043,14693,350,542,0.600437,0.098722


## Part II - Building a tracker based on Neural Message Passing

Problems and solutions

It relies solely on appearance to predict similarity scores between objectes. 

This can be problematic whenever appearance alone may not discriminative, and it'd be best to also take into account object position and size attributes. Secondly, our tracker can only account for pairwise similarities among objects. Ideally, we would like it to also consider higher-order information.

To address these limitations. We will now build a tracker that will combine both apperance and position information with a Message Passing Neural Network, inspired by the approach presented in [Learning a Neural Solver for Multiple Object Tracking, CVPR 2020](https://arxiv.org/abs/1912.07515)

The overall idea will be to build, for every tracking step, a bipartite graph containing two sets of nodes: past tracks, and detections in the current frame. We will initialize node features with ReID embeddings, and edge features with relative position features and ReID distance. We will use an MPN to refine these edge embeddings. The learning task will be to classify the edge embeddings in this graph, which is equivalent to predicting the entries of our data association similarity matrix.


### Building an MPN for Bipartite Graphs

We will first build a Neural Message Passing layer based on the Graph Networks framework introduced in [Relational inductive biases, deep learning, and graph networks, arXiv 2020](https://arxiv.org/abs/1806.01261), as explained in the *A More General Framework* slides of [Lecture 5](https://www.moodle.tum.de/pluginfile.php/2928927/mod_resource/content/1/5.MOT2.pdf) (slides 70 to 75).

We will be using a bipartite graph, i.e., we will have two sets of nodes $A$ (past tracks), and $B$ (detections), and our set of edges will be $A\times B$. That is, we will connect every pair of past tracks and detections.

We will have initial node features (i.e. reid embeddings) matrices: $X_A$ and $X_B$ and an initial edge features tensor $E$.

$X_A$ and $X_B$ have shape $|A|\times \text{node_dim}$ and $|B|\times \text{node_dim}$, respectively.

$E$ has shape $|A| \times |B| \times \text{edge_dim}$. Its $(i, j)$ entry contains the edge features of node $i$ in $A$ and node $j$ in $B$.

With the given layer, we will produce new node feature matrices $X_A'$ and $X_B'$ and edge features $E'$ with the same dimensions. 
Please refer to the formulas in the slides and figure how to apply them in this setting.

You are asked to implement both the node and edge update steps in the class below

**NOTE 1**: Working with a bipartite graph allows us to vectorize all operations in the formulas in a straightforward manner (keep in mind that we store edge features in a matrix). Given a node in $A$, it is connected to all nodes in $B$.

**NOTE 2**: You do not need to care about batching several graphs. This implementation will only work with a single graph at a time.

In [None]:
from torch import nn
import timeit
class BipartiteNeuralMessagePassingLayer(nn.Module):    
    def __init__(self, node_dim, edge_dim, dropout=0.):
        super().__init__()

        edge_in_dim  = 2*node_dim + 2*edge_dim # 2*edge_dim since we always concatenate initial edge features
        self.edge_mlp = nn.Sequential(*[nn.Linear(edge_in_dim, edge_dim), nn.ReLU(), nn.Dropout(dropout), 
                                    nn.Linear(edge_dim, edge_dim), nn.ReLU(), nn.Dropout(dropout)])

        node_in_dim  = node_dim + edge_dim
        self.node_mlp = nn.Sequential(*[nn.Linear(node_in_dim, node_dim), nn.ReLU(), nn.Dropout(dropout),  
                                        nn.Linear(node_dim, node_dim), nn.ReLU(), nn.Dropout(dropout)])

    def edge_update(self, edge_embeds, nodes_a_embeds, nodes_b_embeds):
        """
        Node-to-edge updates, as descibed in slide 71, lecture 5.
        Args:
            edge_embeds: torch.Tensor with shape (|A|, |B|, 2 x edge_dim) 
            nodes_a_embeds: torch.Tensor with shape (|A|, node_dim)
            nodes_a_embeds: torch.Tensor with shape (|B|, node_dim)
            
        returns:
            updated_edge_feats = torch.Tensor with shape (|A|, |B|, edge_dim) 
        """
        
        n_nodes_a, n_nodes_b, _  = edge_embeds.shape

        _, node_dim = nodes_a_embeds.shape
        nodes_embeds = torch.zeros(n_nodes_a, n_nodes_b, 2*node_dim)  #has shape (|A|, |B|, 2*node_dim)
        for a in range(n_nodes_a):
          for b in range(n_nodes_b):
            nodes_embeds[a][b] = torch.cat((nodes_a_embeds[a], nodes_b_embeds[b]),0)

        edge_in = torch.cat((nodes_embeds.cuda(), edge_embeds.cuda()),2) # has shape (|A|, |B|, 2*node_dim + 2*edge_dim) 
        
        return self.edge_mlp(edge_in)

    def node_update(self, edge_embeds, nodes_a_embeds, nodes_b_embeds):
        """
        Edge-to-node updates, as descibed in slide 75, lecture 5.

        Args:
            edge_embeds: torch.Tensor with shape (|A|, |B|, edge_dim) 
            nodes_a_embeds: torch.Tensor with shape (|A|, node_dim)
            nodes_b_embeds: torch.Tensor with shape (|B|, node_dim)
            
        returns:
            tuple(
                updated_nodes_a_embeds: torch.Tensor with shape (|A|, node_dim),
                updated_nodes_b_embeds: torch.Tensor with shape (|B|, node_dim)
                )
        """
        
        #edge_a_embeds = torch.sum(edge_embeds, 1) # Has shape (|A|, edge_dim) 
        nodes_a_in =  torch.cat((nodes_a_embeds,edge_embeds.sum(1)), dim=1)# Has shape (|A|, node_dim + edge_dim) 
        #edge_b_embeds = torch.sum(edge_embeds, 0) # Has shape (|B|, edge_dim) 
        nodes_b_in = torch.cat((nodes_b_embeds,edge_embeds.sum(0)), dim=1) # Has shape (|B|, node_dim + edge_dim) 

        nodes_a = self.node_mlp(nodes_a_in)
        nodes_b = self.node_mlp(nodes_b_in)

        return nodes_a, nodes_b

    def forward(self, edge_embeds, nodes_a_embeds, nodes_b_embeds):
        edge_embeds_latent = self.edge_update(edge_embeds, nodes_a_embeds, nodes_b_embeds)
        nodes_a_latent, nodes_b_latent = self.node_update(edge_embeds_latent, nodes_a_embeds, nodes_b_embeds)

        return edge_embeds_latent, nodes_a_latent, nodes_b_latent

## Building the entire network to predict similarities


 given two bounding boxes $(x_i, y_i, w_i, h_i)$ and  $(x_j, y_j, w_j, h_j)$ and timestamps $t_i$ and $t_j$, compute an initial 5-dimensional edge feature vector as:
$$ E_(i, j) = \left (\frac{2(x_j - x_i)}{h_i + h_j}, \frac{2(y_j - y_i)}{h_i + h_j}, \log{\frac{h_i}{h_j}}, \log{\frac{w_i}{w_j}}, t_j - t_i \right )$$


In [None]:
from torch.nn import functional as F
import math 
class AssignmentSimilarityNet(nn.Module):
    def __init__(self, reid_network, node_dim, edge_dim, reid_dim, edges_in_dim, num_steps, dropout=0.):
        super().__init__()
        self.reid_network = reid_network
        self.graph_net = BipartiteNeuralMessagePassingLayer(node_dim=node_dim, edge_dim=edge_dim, dropout=dropout)
        self.num_steps = num_steps
        self.cnn_linear = nn.Linear(reid_dim, node_dim)
        self.edge_in_mlp = nn.Sequential(*[nn.Linear(edges_in_dim, edge_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(edge_dim, edge_dim), nn.ReLU(),nn.Dropout(dropout)])
        self.classifier = nn.Sequential(*[nn.Linear(edge_dim, edge_dim), nn.ReLU(), nn.Linear(edge_dim, 1)])
        
    
    def compute_edge_feats(self, track_coords, current_coords, track_t, curr_t):    
        """
        Computes initial edge feature tensor

        Args:
            track_coords: track's frame box coordinates, given by top-left and bottom-right coordinates
                          torch.Tensor with shape (num_tracks, 4)
            current_coords: current frame box coordinates, given by top-left and bottom-right coordinates
                            has shape (num_boxes, 4)
                          
            track_t: track's timestamps, torch.Tensor with with shape (num_tracks, )
            curr_t: current frame's timestamps, torch.Tensor withwith shape (num_boxes,)        
            
        
        Returns:
            tensor with shape (num_trakcs, num_boxes, 5) containing pairwise
            position and time difference features 
        """

        num_tracks,_ = track_coords.shape
        num_boxes,_ = current_coords.shape
        edge_feats = torch.zeros(num_tracks, num_boxes, 5)
        for i in range(num_tracks):
          track_x = (track_coords[i][0]+track_coords[i][2])/2
          track_y = (track_coords[i][1]+track_coords[i][3])/2
          track_w = track_coords[i][2]-track_coords[i][0]
          track_h = track_coords[i][3]-track_coords[i][1]

          for j in range(num_boxes):
            #print(current_coords[i][0])
            
            current_x = (current_coords[j][0]+current_coords[j][2])/2
            current_y = (current_coords[j][1]+current_coords[j][3])/2
            current_w = current_coords[j][2]-current_coords[j][0]
            current_h = current_coords[j][3]-current_coords[j][1]

            edge_feats[i][j][0] = 2*(current_x-track_x) / (current_h+track_h)
            #print(edge_feat_1)
            edge_feats[i][j][1] = 2*(current_y-track_y) / (current_h+track_h)
            edge_feats[i][j][2] = math.log((track_h/current_h))
            edge_feats[i][j][3] = math.log((track_w/current_w))
            edge_feats[i][j][4] = curr_t[j] - track_t[i]
            #edge_feats[i][j] = torch.stack([edge_feat_1, edge_feat_2, edge_feat_3, edge_feat_4, edge_feat_5])

        return edge_feats.cuda() # has shape (num_trakcs, num_boxes, 5)


    def forward(self, track_app, current_app, track_coords, current_coords, track_t, curr_t):
        """
        Args:
            track_app: track's reid embeddings, torch.Tensor with shape (num_tracks, 512)
            current_app: current frame detections' reid embeddings, torch.Tensor with shape (num_boxes, 512)
            track_coords: track's frame box coordinates, given by top-left and bottom-right coordinates
                          torch.Tensor with shape (num_tracks, 4)
            current_coords: current frame box coordinates, given by top-left and bottom-right coordinates
                            has shape (num_boxes, 4)
                          
            track_t: track's timestamps, torch.Tensor with with shape (num_tracks, )
            curr_t: current frame's timestamps, torch.Tensor withwith shape (num_boxes,)
            
        Returns:
            classified edges: torch.Tensor with shape (num_steps, num_tracks, num_boxes),
                             containing at entry (step, i, j) the unnormalized probability that track i and 
                             detection j are a match, according to the classifier at the given neural message passing step
        """
        
        # Get initial edge embeddings to
        dist_reid = cosine_distance(track_app, current_app)
        pos_edge_feats = self.compute_edge_feats(track_coords, current_coords, track_t, curr_t)
        #print(pos_edge_feats.get_device())
        #print(dist_reid.get_device())
        edge_feats = torch.cat((pos_edge_feats, dist_reid.unsqueeze(-1)), dim=-1)
        edge_embeds = self.edge_in_mlp(edge_feats)
        initial_edge_embeds = edge_embeds.clone()

        # Get initial node embeddings, reduce dimensionality from 512 to node_dim
        track_embeds = F.relu(self.cnn_linear(track_app))
        curr_embeds =F.relu(self.cnn_linear(current_app))

        classified_edges = []
        for _ in range(self.num_steps):
            edge_embeds = torch.cat((edge_embeds, initial_edge_embeds), dim=-1)            
            edge_embeds, track_embeds, curr_embeds = self.graph_net(edge_embeds=edge_embeds, 
                                                                    nodes_a_embeds=track_embeds, 
                                                                    nodes_b_embeds=curr_embeds)

            classified_edges.append(self.classifier(edge_embeds))

        return torch.stack(classified_edges).squeeze(-1)

## Putting everything together

In [None]:
_UNMATCHED_COST=255
class MPNTracker(LongTermReIDHungarianTracker):
    def __init__(self, assign_net, *args, **kwargs):
        self.assign_net = assign_net
        super().__init__(*args, **kwargs)
        
    def data_association(self, boxes, scores, pred_features):  
        if self.tracks:  
            track_boxes = torch.stack([t.box for t in self.tracks], axis=0).cuda()
            track_features = torch.stack([t.get_feature() for t in self.tracks], axis=0).cuda()
            
            # Hacky way to recover the timestamps of boxes and tracks
            curr_t = self.im_index * torch.ones((pred_features.shape[0],)).cuda()
            track_t = torch.as_tensor([self.im_index - t.inactive - 1 for t in self.tracks]).cuda()
            
            # Do a forward pass through self.assign_net to obtain our costs.
            classified_edges = self.assign_net(track_features, pred_features.cuda(), track_boxes, boxes, track_t, curr_t)
            pred_sim = torch.sigmoid(classified_edges).cpu()

            pred_sim = pred_sim[-1]  # Use predictions at last message passing step
            distance = (1- pred_sim) 
            
            # Do not allow mataches when sim < 0.5, to avoid low-confident associations
            distance = np.where(pred_sim < 0.5, _UNMATCHED_COST, distance) 

            # Perform Hungarian matching.
            row_idx, col_idx = linear_assignment(distance)            
            self.update_tracks(row_idx, col_idx,distance, boxes, scores, pred_features)

            
        else:
            # No tracks exist.
            self.add(boxes, scores, pred_features)

## Training and evaluating our model



In [None]:
from gnn.dataset import LongTrackTrainingDataset
from torch.utils.data import DataLoader
from gnn.trainer import train_one_epoch

MAX_PATIENCE = 20
MAX_EPOCHS = 5
EVAL_FREQ = 1


# Define our model, and init 
assign_net = AssignmentSimilarityNet(reid_network=None, # Not needed since we work with precomputed features
                                     node_dim=32, 
                                     edge_dim=64, 
                                     reid_dim=512, 
                                     edges_in_dim=6, 
                                     num_steps=10).cuda()

dataset = LongTrackTrainingDataset(dataset='MOT16-train_wo_val2', 
                                   db=train_db, 
                                   root_dir= osp.join(root_dir, 'data/MOT16'),
                                   max_past_frames = MAX_PATIENCE,
                                   vis_threshold=0.25)

data_loader = DataLoader(dataset, batch_size=8, collate_fn = lambda x: x, 
                         shuffle=True, num_workers=2, drop_last=True)
device = torch.device('cuda')
optimizer = torch.optim.Adam(assign_net.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5)

We only leave 2 sequences for validation in order to maximize 
the amount of training data. For your convenience, here are the
 LongTermReIDTracker results on them. Your validation IDF1 scores should show an improvement of ~0.5 over them.
```

          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 47.2% 67.0% 36.4% 52.2% 96.1%  62 11 38 13 390  8873 142  220 49.4% 0.095
MOT16-11 69.4% 76.5% 63.5% 80.2% 96.6%  75 44 25  6 266  1871  48   90 76.8% 0.083
OVERALL  55.5% 71.2% 45.5% 61.7% 96.3% 137 55 63 19 656 10744 190  310 58.6% 0.090
```



Let's start training!

Note that we have observed quite a lot of noise in validation scores among epochs and runs. This can be explained due to the small size of our training and
validation sets, that's why we perform early stopping to obtain the best performing model on validation. 

In [None]:
best_idf1 = 0.
for epoch in range(1, MAX_EPOCHS + 1):
    print(f"-------- EPOCH {epoch:2d} --------")
    train_one_epoch(model = assign_net, data_loader=data_loader, optimizer=optimizer, print_freq=50)
    scheduler.step()

    if epoch % EVAL_FREQ == 0:
        tracker =  MPNTracker(assign_net=assign_net.eval(), obj_detect=None, patience=MAX_PATIENCE)
        val_sequences = MOT16Sequences('MOT16-val2', osp.join(root_dir, 'data/MOT16'), vis_threshold=0.)
        res = run_tracker(val_sequences, db=train_db, tracker=tracker, output_dir=None)
        idf1 = res.loc['OVERALL']['idf1']
        if idf1 > best_idf1:
            best_idf1 = idf1
            torch.save(assign_net.state_dict(), osp.join(root_dir, 'output', 'best_ckpt.pth'))
        

-------- EPOCH  1 --------


0it [00:00, ?it/s]

Iter 50. Loss: 1.248. Accuracy: 0.908. Recall: 0.304. Precision: 0.157
Iter 100. Loss: 0.388. Accuracy: 0.957. Recall: 0.955. Precision: 0.636
Iter 150. Loss: 0.108. Accuracy: 0.985. Recall: 0.987. Precision: 0.821
Iter 200. Loss: 0.084. Accuracy: 0.990. Recall: 0.991. Precision: 0.870
Iter 250. Loss: 0.107. Accuracy: 0.989. Recall: 0.988. Precision: 0.885
Iter 300. Loss: 0.078. Accuracy: 0.989. Recall: 0.986. Precision: 0.873
Iter 350. Loss: 0.059. Accuracy: 0.993. Recall: 0.993. Precision: 0.892
Iter 400. Loss: 0.052. Accuracy: 0.993. Recall: 0.992. Precision: 0.913
Iter 450. Loss: 0.041. Accuracy: 0.994. Recall: 0.993. Precision: 0.918
Tracking: MOT16-02
Tracks found: 158
Runtime for MOT16-02: 141.0 s.
Tracking: MOT16-11
Tracks found: 104
Runtime for MOT16-11: 60.3 s.
Runtime for all sequences: 201.2 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 48.0% 68.2% 37.1% 52.2% 96.1%  62 12 37 13 390  8873 114  213 49.5% 0.089
MOT16-11 71.7% 7

0it [00:00, ?it/s]

Iter 50. Loss: 0.052. Accuracy: 0.993. Recall: 0.991. Precision: 0.914
Iter 100. Loss: 0.054. Accuracy: 0.996. Recall: 0.992. Precision: 0.935
Iter 150. Loss: 0.036. Accuracy: 0.997. Recall: 0.988. Precision: 0.958
Iter 200. Loss: 0.042. Accuracy: 0.998. Recall: 0.994. Precision: 0.962
Iter 250. Loss: 0.015. Accuracy: 0.999. Recall: 0.998. Precision: 0.990
Iter 300. Loss: 0.016. Accuracy: 0.999. Recall: 0.998. Precision: 0.991
Iter 350. Loss: 0.015. Accuracy: 0.999. Recall: 0.999. Precision: 0.992
Iter 400. Loss: 0.022. Accuracy: 0.999. Recall: 0.997. Precision: 0.987
Iter 450. Loss: 0.017. Accuracy: 0.999. Recall: 1.000. Precision: 0.991
Tracking: MOT16-02
Tracks found: 102
Runtime for MOT16-02: 132.4 s.
Tracking: MOT16-11
Tracks found: 86
Runtime for MOT16-11: 58.3 s.
Runtime for all sequences: 190.7 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 48.2% 68.4% 37.2% 52.2% 96.1%  62 11 38 13 390  8873 101  219 49.6% 0.095
MOT16-11 70.3% 77

0it [00:00, ?it/s]

Iter 50. Loss: 0.013. Accuracy: 0.999. Recall: 0.999. Precision: 0.994
Iter 100. Loss: 0.020. Accuracy: 0.999. Recall: 0.999. Precision: 0.990
Iter 150. Loss: 0.012. Accuracy: 0.999. Recall: 1.000. Precision: 0.989
Iter 200. Loss: 0.017. Accuracy: 0.999. Recall: 0.998. Precision: 0.984
Iter 250. Loss: 0.019. Accuracy: 0.999. Recall: 0.997. Precision: 0.987
Iter 300. Loss: 0.011. Accuracy: 0.999. Recall: 0.999. Precision: 0.991
Iter 350. Loss: 0.011. Accuracy: 0.999. Recall: 0.998. Precision: 0.989
Iter 400. Loss: 0.016. Accuracy: 0.999. Recall: 0.998. Precision: 0.985
Iter 450. Loss: 0.014. Accuracy: 0.999. Recall: 0.999. Precision: 0.989
Tracking: MOT16-02
Tracks found: 98
Runtime for MOT16-02: 131.9 s.
Tracking: MOT16-11
Tracks found: 88
Runtime for MOT16-11: 58.3 s.
Runtime for all sequences: 190.2 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 47.9% 68.1% 37.0% 52.2% 96.1%  62 11 38 13 390  8873 100  221 49.6% 0.095
MOT16-11 70.8% 78.

0it [00:00, ?it/s]

Iter 50. Loss: 0.011. Accuracy: 0.999. Recall: 0.999. Precision: 0.989
Iter 100. Loss: 0.011. Accuracy: 0.999. Recall: 0.999. Precision: 0.987
Iter 150. Loss: 0.010. Accuracy: 1.000. Recall: 0.999. Precision: 0.993
Iter 200. Loss: 0.009. Accuracy: 0.999. Recall: 1.000. Precision: 0.993
Iter 250. Loss: 0.014. Accuracy: 0.999. Recall: 0.998. Precision: 0.990
Iter 300. Loss: 0.023. Accuracy: 0.999. Recall: 0.998. Precision: 0.986
Iter 350. Loss: 0.015. Accuracy: 0.999. Recall: 0.998. Precision: 0.985
Iter 400. Loss: 0.011. Accuracy: 0.999. Recall: 1.000. Precision: 0.989
Iter 450. Loss: 0.024. Accuracy: 0.999. Recall: 0.998. Precision: 0.987
Tracking: MOT16-02
Tracks found: 87
Runtime for MOT16-02: 132.6 s.
Tracking: MOT16-11
Tracks found: 79
Runtime for MOT16-11: 58.3 s.
Runtime for all sequences: 190.9 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 47.1% 66.9% 36.3% 52.2% 96.1%  62 11 38 13 390  8873 102  219 49.6% 0.095
MOT16-11 71.4% 78.

0it [00:00, ?it/s]

Iter 50. Loss: 0.012. Accuracy: 0.999. Recall: 0.999. Precision: 0.993
Iter 100. Loss: 0.009. Accuracy: 1.000. Recall: 0.999. Precision: 0.993
Iter 150. Loss: 0.012. Accuracy: 0.999. Recall: 0.999. Precision: 0.990
Iter 200. Loss: 0.016. Accuracy: 0.998. Recall: 0.997. Precision: 0.984
Iter 250. Loss: 0.019. Accuracy: 0.999. Recall: 0.999. Precision: 0.988
Iter 300. Loss: 0.016. Accuracy: 0.998. Recall: 0.999. Precision: 0.984
Iter 350. Loss: 0.010. Accuracy: 0.999. Recall: 1.000. Precision: 0.990
Iter 400. Loss: 0.009. Accuracy: 0.999. Recall: 0.999. Precision: 0.988
Iter 450. Loss: 0.012. Accuracy: 0.998. Recall: 0.999. Precision: 0.986
Tracking: MOT16-02
Tracks found: 100
Runtime for MOT16-02: 130.6 s.
Tracking: MOT16-11
Tracks found: 87
Runtime for MOT16-11: 58.1 s.
Runtime for all sequences: 188.7 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 47.7% 67.8% 36.8% 52.2% 96.1%  62 11 38 13 390  8873 104  220 49.6% 0.096
MOT16-11 70.5% 77

In [None]:
best_ckpt = torch.load(osp.join(root_dir, 'output', 'best_ckpt.pth'))
assign_net.load_state_dict(best_ckpt)

tracker =  MPNTracker(assign_net=assign_net.eval(), obj_detect=None, patience=MAX_PATIENCE)
test_db = torch.load(osp.join(gnn_root_dir, 'data/preprocessed_data_test_2.pth'))
val_sequences = MOT16Sequences('MOT16-test', osp.join(root_dir, 'data/MOT16'), vis_threshold=0.)
run_tracker(val_sequences, db=test_db, tracker=tracker, output_dir=osp.join(root_dir, 'output'))

Tracking: MOT16-01
No GT evaluation data available.
Tracks found: 92
Runtime for MOT16-01: 52.1 s.
Writing predictions to: gdrive/My Drive/Colab Notebooks/cv3dst_exercise/output/MOT16-01.txt
Tracking: MOT16-03
No GT evaluation data available.
Tracks found: 651
Runtime for MOT16-03: 3080.7 s.
Writing predictions to: gdrive/My Drive/Colab Notebooks/cv3dst_exercise/output/MOT16-03.txt
Tracking: MOT16-08
No GT evaluation data available.
Tracks found: 192
Runtime for MOT16-08: 116.4 s.
Writing predictions to: gdrive/My Drive/Colab Notebooks/cv3dst_exercise/output/MOT16-08.txt
Tracking: MOT16-12
No GT evaluation data available.
Tracks found: 157
Runtime for MOT16-12: 40.7 s.
Writing predictions to: gdrive/My Drive/Colab Notebooks/cv3dst_exercise/output/MOT16-12.txt
Runtime for all sequences: 3289.8 s.
