In [None]:
import os
import sys
import argparse

print(os.getcwd())
os.chdir("../../")
os.getcwd()

sys.argv = ["view", "--config", "config/single_task_object_detection.yaml"]

parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, required=True, help="Path to the config file")
args = parser.parse_args()

print(args.config)

In [None]:
from config_experiments import config
from torchvision.transforms import transforms
from dataloader import VOC08Attr
import matplotlib.pyplot as plt
from model import ObjectDetectionModel
from utils import set_device
import torch
from bbox_transform import resize_bounding_boxes, apply_nms
import matplotlib.patches as patches
import torchvision

In [None]:
transform_val = transform = transforms.Compose(
    [
        transforms.Resize(
            size=config["transform"]["resize_values"],
            max_size=config["transform"]["max_size"],
        ),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=config["transform"]["mean"], std=config["transform"]["std"]
        ),
    ]
)

In [None]:
model_path = "../dl_project/experiments/object_detection/2024-07-25_10-51-08/models/best_model_epoch_95.pth"

device = set_device(config["global"]["gpu_id"])
model = ObjectDetectionModel().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))

In [None]:
val_data = VOC08Attr(train=False, transform=None)
val_data_for_model = VOC08Attr(train=False, transform=transform_val)

In [None]:
def inference(idx):
    img_transform, img_size_orig_transform, _, _, _, ss_rois_transform = (
        val_data_for_model[idx]
    )
    image, img_size_orig, gt_class, gt_bbox, gt_attributes, ss_rois = val_data[idx]
    img_transform = img_transform.unsqueeze(0).to(device)
    ss_rois_transform = ss_rois_transform.to(device)

    indices_batch = torch.zeros(ss_rois_transform.shape[0], device=device).unsqueeze(-1)

    cls_max_score_net, max_score_net, bboxs_net = model.prediction_img(
        img_transform, ss_rois_transform, indices_batch
    )

    bboxs_net = resize_bounding_boxes(
        bboxs_net,
        orig_size=(img_transform.shape[3], img_transform.shape[2]),
        new_size=img_size_orig_transform,
    )

    pred_bbox, pred_class, pred_score = apply_nms(
        cls_max_score_net, max_score_net, bboxs_net
    )
    pred_bbox, pred_class, pred_score = (
        pred_bbox.cpu(),
        pred_class.cpu(),
        pred_score.cpu(),
    )
    return image, gt_bbox, gt_class, pred_bbox, pred_class, pred_score

In [None]:
def plot_inference(image, gt_bbox, gt_class, pred_bbox, pred_class, pred_score):
    im = image
    fig, ax = plt.subplots()
    ax.imshow(im)

    for gt_el in gt_bbox:
        x_min, y_min, x_max, y_max = gt_el

        rect = patches.Rectangle(
            (x_min, y_min),
            x_max - x_min,
            y_max - y_min,
            linewidth=2,
            edgecolor="g",
            facecolor="none",
        )
        ax.add_patch(rect)

    for pred_el in pred_bbox:
        x_min, y_min, x_max, y_max = pred_el

        rect = patches.Rectangle(
            (x_min, y_min),
            x_max - x_min,
            y_max - y_min,
            linewidth=1,
            edgecolor="r",
            facecolor="none",
        )
        ax.add_patch(rect)

    print("NET")
    for box, c, score in zip(pred_bbox, pred_class, pred_score):
        print(f"{box.int()} \t class: {c.item()} \tscore: {score.item():.3f}")

    print("\nGT")

    for box, c in zip(gt_bbox, gt_class):
        print(f"{box.int()} \t class: {c.item()}")
    plt.show()

In [None]:
idx = torch.randint(low=0, high=len(val_data), size=(1,))

print(f"IDX: {idx.item()}\n")
image, gt_bbox, gt_class, pred_bbox, pred_class, pred_score = inference(idx=idx)
plot_inference(image, gt_bbox, gt_class, pred_bbox, pred_class, pred_score)

## Studio Recall


In [None]:
ratio_list = []
rec_list = []
for idx in range(len(val_data)):
    _, gt_bbox, gt_class, pred_bbox, pred_class, pred_score = inference(idx=idx)
    ratio_list.append(
        gt_bbox.shape[0] / max(pred_bbox.shape[0], gt_bbox.shape[0] * 0.01)
    )
    gt_bbox = gt_bbox.tolist()
    pred_bbox = pred_bbox.tolist()
    gt_class = gt_class.tolist()
    pred_class = pred_class.tolist()
    num_gt = len(gt_bbox)
    i_pred = 0
    while i_pred < len(pred_bbox):
        i_gt = 0
        while i_gt < len(gt_bbox):
            iou = torchvision.ops.box_iou(
                torch.tensor(pred_bbox[i_pred]).unsqueeze(0),
                torch.tensor(gt_bbox[i_gt]).unsqueeze(0),
            )
            if iou.item() >= 0.5 and gt_class[i_gt] == pred_class[i_pred]:
                gt_bbox.pop(i_gt)
                gt_class.pop(i_gt)
                pred_bbox.pop(i_pred)
                pred_class.pop(i_pred)
                i_pred -= 1
                break
            i_gt += 1
        i_pred += 1
        if len(gt_bbox) == 0:
            break

    rec_list.append((num_gt - len(gt_bbox)) / num_gt)

In [None]:
ratios = list(filter(lambda x: x < 100, ratio_list))
_ = plt.hist(list(filter(lambda x: x < 2, ratio_list)), bins=20)
print(f"gt/pred {sum(ratios) / len(ratios)}")
print(f"nopred/image{(len(ratio_list) - len(ratios)) / len(ratio_list)}")
print(f"rec avg {sum(rec_list)/len(rec_list)}")

In [None]:
_ = plt.hist(rec_list, bins=100)

In [None]:
from metrics import compute_mAP, view_mAP_for_class

mAP = compute_mAP(val_data_for_model, model, device)
mAP

In [None]:
mAP["map_per_class"]

In [None]:
torch.mean(mAP["map_per_class"])