In [9]:
import math
import numpy as np
import cv2
import torch
import open3d as o3d
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.kitti as fouk
import fiftyone.utils.utils_3d as fou3d
from fiftyone import ViewField as F

In [10]:
dataset = foz.load_zoo_dataset("kitti-multiview")

Downloading split 'train' to '/Users/jacobmarks/fiftyone/kitti-multiview/train' if necessary
Parsing dataset metadata
Found 22443 samples
Downloading split 'test' to '/Users/jacobmarks/fiftyone/kitti-multiview/test' if necessary
Parsing dataset metadata
Found 22554 samples
Dataset info written to '/Users/jacobmarks/fiftyone/kitti-multiview/info.json'
Loading 'kitti-multiview' split 'train'
Importing samples...
 100% |█████████████| 22443/22443 [1.4s elapsed, 0s remaining, 16.4K samples/s]         
Migrating dataset 'kitti-multiview' to v0.18.0
Import complete
Loading 'kitti-multiview' split 'test'
Importing samples...
 100% |█████████████| 22554/22554 [307.4ms elapsed, 0s remaining, 73.6K samples/s]      
Migrating dataset '2022.11.22.14.46.41' to v0.18.0
Import complete
Dataset 'kitti-multiview' created


In [11]:
train_view = dataset.match_tags("train")

In [12]:
# session = fo.launch_app(dataset)

In [13]:
dataset = dataset.match_tags("train")[:200].clone()

view = dataset[0:100]
### Get rid of right slice because we don't use it at all
view.group_slice = "right"
for group in view.iter_groups():
    r = group['right'].id
    dataset.delete_samples(r)

## Generate feature maps

In [14]:
# Front side (of vehicle) Point Cloud boundary for BEV
min_bound = (0, -25, -2.73)
max_bound = (50, 25, 1.27)
bev_width = 608
bev_height = 608

In [23]:
dataset.group_slice = "pcd"

In [24]:
fou3d.compute_birds_eye_view_maps(dataset, bev_width, bev_height, min_bound = min_bound, max_bound = max_bound)

Parsing samples...
 100% |█████████████████| 200/200 [17.9s elapsed, 0s remaining, 11.3 samples/s]      


In [25]:
class_list = ["Car", "Pedestrian", "Cyclist"]

In [26]:
train_view.group_slice = "pcd"
height = F("dimensions")[0]
exp = F("ground_truth.detections").map(height)
heights_dict = {}
for c in class_list:
    heights_dict[c] = train_view.filter_labels(
    "ground_truth", F("label") == c).mean(exp)

In [27]:
# session = fo.launch_app(train_view, auto = False)

In [28]:
# session.url

## Apply Model

In [29]:
conf_thresh, nms_thresh = 0.9, 0.3
img_size = bev_height
args = {"img_size": img_size, "conf_thresh": conf_thresh, "nms_thresh":nms_thresh}
model = foz.load_zoo_model("complex-yolo-v3-torch", args = args)

In [30]:
model.set_class_heights(heights_dict)

In [31]:
dataset.group_slice = "bev"

In [32]:
import fiftyone.utils.complex_yolo as foucy
foucy.apply_model(model, dataset, feature_map_field = 'feature_map_filepath', pcd_group_slice = 'pcd', min_bound = min_bound, max_bound = max_bound)



 100% |██████████████████████████████████████████████████████████████████████████████████████| 200/200 [59.4s elapsed, 0s remaining, 3.4 it/s]      


## Transform coordinates

In [36]:
def get_calib_path(sample):
    sample_path = sample.filepath.split("/")
    calib_path = sample_path.copy()
    calib_path[-2] = "calib"
    calib_path = "/".join(calib_path)
    calib_path = calib_path[:-3] + "txt"
    return calib_path

In [119]:
def get_left_from_pcd(pcd_sample):
    dataset = pcd_sample._dataset
    dataset.group_slice = "pcd"
    group = dataset.get_group(pcd_sample.group.id)
    left_id = group["left"].id
    dataset.group_slice = "left"
    left_sample = dataset[left_id]
    return left_sample

In [120]:
def get_corners3d(h, w, l, R, t):
    corners3d = np.array(
        [
            [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2],
            [0, 0, 0, 0, -h, -h, -h, -h],
            [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2],
        ]
    )
    return R @ corners3d + t[:, np.newaxis]

In [184]:
def get_img2d_shape(img_sample):
    if "metadata" in sample:
        return img_sample.metadata.width, img_sample.metadata.height
    else:
        h, w = cv2.imread(img_sample.filepath).shape[:2]
        return w, h

In [190]:
def corners3d_to_img_boxes(P, corners3d, img2d_shape):
    """
    :param corners3d: (N, 8, 3) corners in rect coordinate
    :return: boxes: (N, 4) [x1, y1, x2, y2] in img2d coords
    """
    corners3d = corners3d
    num_boxes = corners3d.shape[0]
    corners3d_hom = np.concatenate((corners3d, np.ones((num_boxes, 8, 1))), axis=2)  # (N, 8, 4)
    img_pts = np.matmul(corners3d_hom, P.T)  # (N, 8, 3)

    x, y = img_pts[:, :, 0] / img_pts[:, :, 2], img_pts[:, :, 1] / img_pts[:, :, 2]
    x1, y1 = np.min(x, axis=1).reshape(-1, 1), np.min(y, axis=1).reshape(-1, 1)
    x2, y2 = np.max(x, axis=1).reshape(-1, 1), np.max(y, axis=1).reshape(-1, 1)
    
    boxes = np.concatenate((x1, y1, x2, y2), axis=1)
    
    boxes[:, 0] = np.clip(boxes[:, 0], 0, img2d_shape[0] - 1)
    boxes[:, 1] = np.clip(boxes[:, 1], 0, img2d_shape[1] - 1)
    boxes[:, 2] = np.clip(boxes[:, 2], 0, img2d_shape[0] - 1)
    boxes[:, 3] = np.clip(boxes[:, 3], 0, img2d_shape[1] - 1)
    
    boxes[:, 0]/=img2d_shape[0]
    boxes[:, 2]/=img2d_shape[0]
    boxes[:, 1]/=img2d_shape[1]
    boxes[:, 3]/=img2d_shape[1]
    
    boxes[:, 2] -= boxes[:, 0]
    boxes[:, 3] -= boxes[:, 1]
    
    return boxes

In [217]:
def add_pcd_sample_detections_to_image(pcd_sample):
    if "predictions" not in pcd_sample or len(pcd_sample.predictions.detections) == 0:
        return
    calib_mats = fouk._load_calibration_matrices(get_calib_path(pcd_sample))
    P = calib_mats["P2"]
    
    left_sample = get_left_from_pcd(pcd_sample)
    img2d_shape = get_img2d_shape(left_sample)
    
    corners3d = []
    detections3d = pcd_sample.predictions.detections
    
    for det3d in detections3d:
        h, w, l = det3d["dimensions"]
        t = np.array(det3d["location"])
        R = fouk._roty(det3d["rotation"][1])
        corners3d.append(get_corners3d(h, w, l, R, t).T)
    corners3d = np.array(corners3d)
    bboxes = corners3d_to_img_boxes(P, corners3d, img2d_shape)
    
    detections2d = []
    for i, det3d in enumerate(detections3d):
        new_det2d = fo.Detection(
            label=det3d.label,
            bounding_box=bboxes[i],
            confidence=det3d.confidence,
            alpha = det3d.alpha
        )
        detections2d.append(new_det2d)
    
    left_sample["predictions"] = fo.Detections(detections=detections2d)
    left_sample.save()
        

In [222]:
def project_detections_to_images(pcd_samples):
    for pcd_sample in pcd_samples:
        add_pcd_sample_detections_to_image(pcd_sample)

In [224]:
dataset.group_slice = "pcd"
pcd_samples = dataset.select_group_slices("pcd")

In [225]:
project_detections_to_images(pcd_samples)

## Evaluate detection results

In [236]:
dataset.group_slice = "left"
eval_view_2d = dataset.filter_labels(
        "ground_truth", 
        F("label").is_in(class_list)
)

results_2d = eval_view_2d.evaluate_detections(
    "predictions",
    iou = 0.5,
    compute_mAP=True,
)

In [249]:
dataset.group_slice = "pcd"
eval_view_3d = dataset.filter_labels(
        "ground_truth", 
        F("label").is_in(class_list)
)

results_3d = eval_view_3d.evaluate_detections(
    "predictions",
    iou = 0.3,
    compute_mAP=True,
)

Evaluating detections...
 100% |█████████████████| 200/200 [649.5ms elapsed, 0s remaining, 307.9 samples/s]      
Performing IoU sweep...
 100% |█████████████████| 200/200 [955.2ms elapsed, 0s remaining, 196.5 samples/s]      


In [250]:
results_2d.print_report()

              precision    recall  f1-score   support

         Car       0.72      0.70      0.71       733
     Cyclist       0.60      0.54      0.57        46
  Pedestrian       0.10      0.11      0.10       104

   micro avg       0.63      0.63      0.63       883
   macro avg       0.47      0.45      0.46       883
weighted avg       0.64      0.63      0.63       883



In [248]:
results_3d.print_report()

              precision    recall  f1-score   support

         Car       0.88      0.86      0.87       733
     Cyclist       0.62      0.57      0.59        46
  Pedestrian       0.34      0.37      0.35       104

   micro avg       0.80      0.79      0.79       883
   macro avg       0.61      0.60      0.60       883
weighted avg       0.80      0.79      0.80       883



## To do

* Add detections to BEV - ground truth and predictions
* Add visualizations along the way
    * BEV - only in front of car --> bounds
* Description of the model
* RGB BEV encoding
    * And how the compute_BEV function stores its data, plus WHY
* Limitations of the model - no height info --> used average height by class
* Add in average $z$ by class
* Talk about how the model could be improved with sensor fusion
* Different IoU thresholds in $2d$ vs $3d$ because overlap gets smaller as the number of dimensions increases...
* Mention setting group slice whenever we want to access a particular group, and how we can't visualize in the app if the group slice is set to pcd
* Mention right clicking and moving the mouse/mousepad to drag position in the $3d$ viewer