In [1]:
import torch
import os
from models.pyramid import build_pyramid_occupancy_network, build_extended_pyramid_occupancy_network
from model import UNET
import torchvision.transforms as transforms
from dataset import NuScenesDataset
from torch.utils.data import DataLoader
import albumentations as A
import numpy as np
import cv2
from experiments.ipm.ipm_utilities import ipm_transform
import matplotlib.pyplot as plt
from configs.config_utilities import load_config
from utilities.torch import detect_device

from logger import colorise
import matplotlib.pyplot as plt
from nuscenes_utilities import NUSCENES_CLASS_NAMES, flatten_labels
from torchmetrics import classification, Precision, Recall
import pandas as pd
import math
from tqdm import tqdm

In [2]:
device = detect_device()

In [3]:
def plot_seperate_classes(pred: torch.Tensor, is_label:bool = True, title:str = None):
    fig, axs = plt.subplots(nrows=2, ncols=7, figsize=(20, 6))
    fig.suptitle(title, fontweight='bold', fontsize=18)
    axs = axs.flatten()

    if not is_label:
        pred = pred.sigmoid()

    for i, name in enumerate(NUSCENES_CLASS_NAMES):
        class_i = pred[i]
        axs[i].imshow(colorise(class_i, "coolwarm", 0, 1), origin='lower')
        axs[i].set_title(name)
        axs[i].axis('off')


def plot_result(
    img: torch.Tensor, label: torch.Tensor, mask: torch.Tensor, pred: torch.Tensor
):
    pred = (pred.sigmoid() >= 0.5).long().cpu()
    img = img.cpu().permute(1, 2, 0)
    label = label.cpu()
    mask = (mask.cpu() == -1).long()

    colorised_gt = colorise(
        flatten_labels(label),
        "nipy_spectral",
        flatten=True,
    ).permute(0, 2, 3, 1).squeeze(0)

    colorised_pred = colorise(
        flatten_labels(pred),
        "nipy_spectral",
        flatten=True,
    ).permute(0, 2, 3, 1).squeeze(0)

    fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(20, 6))
    axs[0].imshow(img)
    axs[0].set_title("Image")

    axs[1].imshow(colorised_gt, origin="lower")
    axs[1].set_title("Ground truth")

    axs[2].imshow(mask[..., None]*colorised_gt, origin="lower")
    axs[2].set_title("Ground truth + Visible mask")

    axs[3].imshow(colorised_pred, origin="lower")
    axs[3].set_title("Predicted")

    for ax in axs:
        ax.set_axis_off()

In [4]:
config = load_config("configs/configs.yml")

In [5]:
dataset = NuScenesDataset(
    nuscenes_dir="nuscenes",
    nuscenes_version="v1.0-mini",
    label_dir="labels",
    sample_tokens=np.loadtxt("configs/mini_val_sample_tokens.csv", dtype=str),
    image_size=(200, 112),
)

dataset_loader = DataLoader(
    dataset,
    batch_size=8,
    num_workers=2,
    pin_memory=True,
    shuffle=False,
)

--------------------------------------------------
Loading NuScenes version v1.0-mini ...
--------------------------------------------------


In [6]:
def save_result_to_csv(label, pred, mask, save_name):
        iou_metric_by_class = classification.JaccardIndex(
            task="multilabel",
            num_classes=None,
            num_labels=14,
            average="none",
        ).to(device)

        precision = Precision(
            task="multilabel",
            num_classes=None,
            num_labels=14,
            average="none",
        ).to(device)

        recall = Recall(
            task="multilabel",
            num_classes=None,
            num_labels=14,
            average="none",
        ).to(device)

        mask = (mask == -1).long()
        pred_with_mask= mask.unsqueeze(1).expand(-1, 14, -1, -1) * pred
        label_with_mask = mask.unsqueeze(1).expand(-1, 14, -1, -1) * label

        iou_result = iou_metric_by_class(pred, label).cpu()
        precision_result = precision(pred, label).cpu()
        recall_result = recall(pred, label).cpu()

        iou_result_w_mask = iou_metric_by_class(pred_with_mask, label_with_mask).cpu()
        precision_result_w_mask = precision(pred_with_mask, label_with_mask).cpu()
        recall_result_w_mask = recall(pred_with_mask, label_with_mask).cpu()

        data = {
                "classes": NUSCENES_CLASS_NAMES,
                "iou": iou_result,
                "precision": precision_result,
                "recall": recall_result,
                "iou_w_mask": iou_result_w_mask,
                "precision_w_mask": precision_result_w_mask,
                "recall_w_mask": recall_result_w_mask
        }
        df = pd.DataFrame(data)
        df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: np.round(x*100, 4)).astype('float')
        # df = df.astype('float')

        df.to_csv(f"{save_name}", header=True, index=False)


def save_pred_model_based(image, label, mask, pred, model_nm, n_scenes):
    if not os.path.exists(f"pred_img/{str(n_scenes)}scene/"):
        os.mkdir(f"pred_img/{str(n_scenes)}scene/")
        os.mkdir(f"pred_img/{str(n_scenes)}scene/{model_nm}")
        os.mkdir(f"pred_img/{str(n_scenes)}scene/{model_nm}/pred")
        os.mkdir(f"pred_img/{str(n_scenes)}scene/{model_nm}/pred_binary")
         


    for i in tqdm(range(len(image))):
        plot_result(image[i], label[i], mask[i], pred[i])
        plt.savefig(f"pred_img/{str(n_scenes)}scene/{model_nm}/pred/pred_img{i}.png")
        plt.close()
        
        plot_seperate_classes(pred[i])
        plt.savefig(f"pred_img/{str(n_scenes)}scene/{model_nm}/pred_binary/binary_img{i}.png")
        plt.close()


def save_pred_image_based(image, label, mask, pred, model_nm, n_scenes):
    pred = (pred.sigmoid() >= 0.5).long().cpu()
    mask = (mask.cpu() == -1).long()

    if not os.path.exists(f"pred_img/{str(n_scenes)}scene_by_image/"):
        os.mkdir(f"pred_img/{str(n_scenes)}scene_by_image/")

    for i in tqdm(range(len(image))):
        if not os.path.exists(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}"):
            os.mkdir(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}")

        colorised_gt = colorise(
            flatten_labels(label[i].cpu()),
            "nipy_spectral",
            flatten=True,
        ).permute(0, 2, 3, 1).squeeze(0)


        colorised_pred = colorise(
            flatten_labels(pred[i].cpu()),
            "nipy_spectral",
            flatten=True,
        ).permute(0, 2, 3, 1).squeeze(0)


        if not (os.path.exists(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}.png") and \
            os.path.exists(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}_gt.png") and \
            os.path.exists(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}_gt_w_mask.png")    ):

            # save img
            img = image[i].cpu().permute(1, 2, 0)
            plt.imshow(img)
            plt.axis('off')
            plt.savefig(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}.png", bbox_inches='tight', pad_inches=0)
            plt.close()

            # save gt
            plt.imshow(colorised_gt, origin="lower")
            plt.axis('off')
            plt.savefig(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}_gt.png", bbox_inches='tight', pad_inches=0)
            plt.close()

            # save gt + mask
            plt.imshow(mask[i][..., None]*colorised_gt, origin="lower")
            plt.axis('off')
            plt.savefig(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}_gt_w_mask.png", bbox_inches='tight', pad_inches=0)
            plt.close()

        # save pred
        plt.imshow(colorised_pred, origin="lower")
        plt.axis('off')
        plt.savefig(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}_{model_nm}.png", bbox_inches='tight', pad_inches=0)
        plt.close()

        # save pred + mask
        plt.imshow(mask[i][..., None]*colorised_pred, origin="lower")
        plt.axis('off')
        plt.savefig(f"pred_img/{str(n_scenes)}scene_by_image/img_{i}/img_{i}_{model_nm}_w_mask.png", bbox_inches='tight', pad_inches=0)
        plt.close()

In [9]:
# models = [
#     "Original_PON_V_multilabel_1693230110.0703723",
#     "Original_Fixed_PON_V+H-reversed_multilabel_1693329350.7664964",
#     "Original_Fixed_PON_V+H-stacked_multilabel_1693333195.990043",
# ]

models = [
    # "Full_EPON_H-collage_1693459089.077859",
    "Full_EPON_H-stack_1693466315.8951316",
    "Full_PON_1693460068.9404485"
]

In [10]:

for model in models:
    if model == "Full_PON_1693460068.9404485":
        network = build_pyramid_occupancy_network(config)
    elif "collage" in model:
        network = build_extended_pyramid_occupancy_network(config, htfm_method="collage")
    elif "stack" in model:
        network = build_extended_pyramid_occupancy_network(config, htfm_method="stack")

    # match model name to model file path
    model_path = f"checkpoints/{model}/{model}_00299.pt"

    model_to_load = torch.load(model_path)
    network.load_state_dict(model_to_load["model_state_dict"])
    network.to(device)

    image_final = []
    pred_final = []
    mask_final = []
    label_final =[]

    with torch.no_grad():
        network.eval()
        for batch_idx, batch in enumerate(dataset_loader):
            image, label, mask, calib = batch

            image = image.to(device)
            label = label.to(device)
            mask = mask.to(device)
            calib = calib.to(device)

            pred = network(image, calib)

            image_final.append(image)
            pred_final.append(pred)
            mask_final.append(mask)
            label_final.append(label)


    # stack all batch
    image = torch.cat(image_final, dim=0)
    pred = torch.cat(pred_final, dim=0)
    mask = torch.cat(mask_final, dim=0)
    label = torch.cat(label_final, dim=0)


    save_result_to_csv(label, pred, mask, save_name=f"pred_img/{model}.csv")
    # save_pred_model_based(image, label, mask, pred, model_nm=model, n_scenes="final_10_")
    save_pred_image_based(image, label, mask, pred, model_nm=model, n_scenes="final_10_")
        


100%|██████████| 122/122 [00:29<00:00,  4.17it/s]
100%|██████████| 122/122 [00:10<00:00, 11.91it/s]


In [14]:
df_pon = pd.read_csv("pred_img/Full_PON_1693460068.9404485.csv")
df_pon

Unnamed: 0,classes,iou,precision,recall,iou_w_mask,precision_w_mask,recall_w_mask
0,drivable_area,70.7482,80.4641,85.421,75.4146,82.9176,89.2868
1,ped_crossing,37.0297,57.8653,50.7001,35.5399,54.2992,50.7076
2,walkway,36.7591,56.6849,51.1176,41.8142,59.5306,58.4207
3,carpark,21.7706,60.2403,25.4237,24.7612,59.9885,29.6596
4,car,10.0151,24.9921,14.3191,11.7246,31.2417,15.8021
5,truck,3.3748,11.1768,4.6117,4.2715,10.8514,6.5808
6,bus,16.071,31.3606,24.7913,7.5653,14.4945,13.6631
7,trailer,0.0,0.0,0.0,0.0,0.0,0.0
8,construction_vehicle,2.6468,8.1281,3.7766,2.971,10.0711,4.0438
9,pedestrian,0.4907,11.7647,0.5094,0.2913,11.7647,0.2978


In [15]:
df_stack = pd.read_csv("pred_img/Full_EPON_H-stack_1693466315.8951316.csv")
df_stack

Unnamed: 0,classes,iou,precision,recall,iou_w_mask,precision_w_mask,recall_w_mask
0,drivable_area,75.2112,86.4248,85.2868,76.7223,87.0786,86.579
1,ped_crossing,54.7307,75.412,66.6187,50.6781,71.3949,63.5899
2,walkway,48.3228,68.6236,62.0274,51.3494,72.0786,64.0998
3,carpark,35.8988,57.4729,48.884,39.5416,59.8274,53.8355
4,car,16.3792,34.1389,23.9457,18.3359,38.7155,25.8342
5,truck,11.835,32.6942,15.6473,7.7739,22.7481,10.5624
6,bus,35.6363,68.2572,42.7153,18.1267,46.3292,22.9449
7,trailer,0.0,0.0,0.0,0.0,0.0,0.0
8,construction_vehicle,1.9559,10.3,2.3575,1.5251,10.2493,1.7602
9,pedestrian,2.3943,33.0214,2.5166,2.6767,25.4902,2.9039


In [16]:
print(df_pon[['iou_w_mask', 'precision_w_mask', 'recall_w_mask']].mean())
print(df_stack[['iou_w_mask', 'precision_w_mask', 'recall_w_mask']].mean())

iou_w_mask          14.932071
precision_w_mask    27.975514
recall_w_mask       19.534464
dtype: float64
iou_w_mask          19.541907
precision_w_mask    34.899886
recall_w_mask       24.471757
dtype: float64


In [17]:
zip_result_iou = zip(NUSCENES_CLASS_NAMES, df_pon['iou_w_mask'], df_stack['iou_w_mask'])

for name, pon,  stack in zip_result_iou:
    print(f"{name} & {pon}  & {stack}"+" \\\\") 

drivable_area & 75.4146  & 76.7223 \\
ped_crossing & 35.5399  & 50.6781 \\
walkway & 41.8142  & 51.3494 \\
carpark & 24.7612  & 39.5416 \\
car & 11.7246  & 18.3359 \\
truck & 4.2715  & 7.7739 \\
bus & 7.5653  & 18.1267 \\
trailer & 0.0  & 0.0 \\
construction_vehicle & 2.971  & 1.5251 \\
pedestrian & 0.2913  & 2.6767 \\
motorcycle & 0.3115  & 0.0 \\
bicycle & 1.4682  & 4.6584 \\
traffic_cone & 2.5606  & 0.1468 \\
barrier & 0.3551  & 2.0518 \\


In [18]:
zip_result_pr = zip(
    NUSCENES_CLASS_NAMES,
    df_pon["precision_w_mask"],
    df_pon["recall_w_mask"],
    df_stack["precision_w_mask"],
    df_stack["recall_w_mask"],
)

for name, pon_p, pon_r, stack_p, stack_r in zip_result_pr:
    print(f"{name} & {pon_p:.4f} & {pon_r:.4f}  & {stack_p:.4f} & {stack_r:.4f}" + " \\\\")

drivable_area & 82.9176 & 89.2868  & 87.0786 & 86.5790 \\
ped_crossing & 54.2992 & 50.7076  & 71.3949 & 63.5899 \\
walkway & 59.5306 & 58.4207  & 72.0786 & 64.0998 \\
carpark & 59.9885 & 29.6596  & 59.8274 & 53.8355 \\
car & 31.2417 & 15.8021  & 38.7155 & 25.8342 \\
truck & 10.8514 & 6.5808  & 22.7481 & 10.5624 \\
bus & 14.4945 & 13.6631  & 46.3292 & 22.9449 \\
trailer & 0.0000 & 0.0000  & 0.0000 & 0.0000 \\
construction_vehicle & 10.0711 & 4.0438  & 10.2493 & 1.7602 \\
pedestrian & 11.7647 & 0.2978  & 25.4902 & 2.9039 \\
motorcycle & 3.6697 & 0.3393  & 0.0000 & 0.0000 \\
bicycle & 17.3077 & 1.5789  & 10.2041 & 7.8947 \\
traffic_cone & 29.8387 & 2.7246  & 33.3333 & 0.1473 \\
barrier & 5.6818 & 0.3774  & 11.1492 & 2.4528 \\


In [None]:
# print(f"-- iou score of -- ")
# for class_name, iou in zip(NUSCENES_CLASS_NAMES, iou_result):
#     print(f"{class_name}, {iou*100:.4f}")

# print(f"-- precision and recall -- ")
# for class_name, _precision, _recall in zip(NUSCENES_CLASS_NAMES, precision_result, recall_result):
#     print(f"{class_name}, {_precision*100:.4f}, {_recall*100:.4f}")

In [None]:
# plot_result_separate(label[image_idx], is_label=True)

In [None]:
# plot_result_separate(pred[image_idx], is_label=False)

In [None]:
\begin{figure*}[h]
    \centering
    \setkeys{Gin}{width=\linewidth}
\begin{subfigure}{0.25\textwidth}
    \caption*{Images}
\includegraphics{images/pred_img/img_30/img_30.png}\\[3pt]
\includegraphics{images/pred_img/img_52/img_52.png}\\[3pt]
\includegraphics{images/pred_img/img_47/img_47.png}\\[3pt]
\includegraphics{images/pred_img/img_114/img_114.png}\\[3pt]
\includegraphics{images/pred_img/img_118/img_118.png}
\end{subfigure}
\begin{subfigure}{0.1429\linewidth}
    \caption*{Ground truth}
\includegraphics{images/pred_img/img_30/img_30_gt_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_52/img_52_gt_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_47/img_47_gt_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_114/img_114_gt_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_118/img_118_gt_w_mask.png}
\end{subfigure}
\begin{subfigure}{0.1429\linewidth}
    \caption*{PON}
\includegraphics{images/pred_img/img_30/img_30_Full_PON_1693460068.9404485_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_52/img_52_Full_PON_1693460068.9404485_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_47/img_47_Full_PON_1693460068.9404485_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_114/img_114_Full_PON_1693460068.9404485_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_118/img_118_Full_PON_1693460068.9404485_w_mask.png}
\end{subfigure}
\begin{subfigure}{0.1429\linewidth}
    \caption*{H-PON}
\includegraphics{images/pred_img/img_30/img_30_Full_EPON_H-stack_1693466315.8951316_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_52/img_52_Full_EPON_H-stack_1693466315.8951316_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_47/img_47_Full_EPON_H-stack_1693466315.8951316_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_114/img_114_Full_EPON_H-stack_1693466315.8951316_w_mask.png}\\[3pt]
\includegraphics{images/pred_img/img_118/img_118_Full_EPON_H-stack_1693466315.8951316_w_mask.png}
\end{subfigure}
    \caption{Comparison of prediction results on validation samples between PON and H-PON included images and ground truths. Black occlusion masks were ignored during evaluation.}
    \label{fig:prediction_images}
\end{figure*}