In [1]:
from ast import literal_eval
import pandas as pd
import os
import cv2
import imagesize
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [2]:
from src.full_model.evaluate_bbox_variations.custom_dataset_bbox_variations import CustomDatasetBboxVariations

In [3]:
IMAGE_INPUT_SIZE = 512

In [4]:
path_to_partial_test_set = "/u/home/tanida/datasets/dataset-with-reference-reports-partial-1000/test-1000.csv"

In [5]:
def get_test_set_as_df():
    def compute_bbox_widths_heights(row):
        bbox_coordinates_single_image = row["bbox_coordinates"]
        widths_heights = []
        for bbox_coords in bbox_coordinates_single_image:
            x1, y1, x2, y2 = bbox_coords
            width = x2 - x1
            height = y2 - y1
            widths_heights.append([width, height])

        return widths_heights

    def retrieve_image_widths_heights(row):
        mimic_image_file_path = row["mimic_image_file_path"]
        width, height = imagesize.get(mimic_image_file_path)
        return [width, height]

    usecols = [
        "mimic_image_file_path",
        "bbox_coordinates",
        "bbox_labels",
        "bbox_phrases",
        "bbox_phrase_exists",
    ]

    # all of the columns below are stored as strings in the csv_file
    # however, as they are actually lists, we apply the literal_eval func to convert them to lists
    converters = {
        "bbox_coordinates": literal_eval,
        "bbox_labels": literal_eval,
        "bbox_phrases": literal_eval,
        "bbox_phrase_exists": literal_eval,
    }

    test_set_as_df = pd.read_csv(path_to_partial_test_set, usecols=usecols, converters=converters)

    # add new columns that contain the bbox_widths_heights (List[List[int]] with len(outer_list)=29 and len(inner_list) = 2)
    # and image_width_height (List[int] of len 2)
    test_set_as_df["bbox_widths_heights"] = test_set_as_df.apply(lambda row: compute_bbox_widths_heights(row), axis=1)
    test_set_as_df["image_width_height"] = test_set_as_df.apply(lambda row: retrieve_image_widths_heights(row), axis=1)

    return test_set_as_df

In [6]:
dataset_as_df = get_test_set_as_df()[:3]

In [7]:
dataset_as_df.head()

Unnamed: 0,mimic_image_file_path,bbox_coordinates,bbox_labels,bbox_phrases,bbox_phrase_exists,bbox_widths_heights,image_width_height
0,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[327, 231, 1200, 2114], [477, 300, 1200, 968]...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[There is no focal consolidation, pleural effu...","[True, False, False, True, False, False, True,...","[[873, 1883], [723, 668], [751, 423], [819, 72...","[2544, 3056]"
1,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[300, 382, 1227, 2332], [477, 436, 1227, 1118...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[Pulmonary vasculature is normal. Lungs are cl...,"[True, False, False, False, True, False, True,...","[[927, 1950], [750, 682], [778, 437], [859, 77...","[2544, 3056]"
2,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[229, 652, 1171, 2330], [386, 676, 1086, 1135...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[No acute intrathoracic process. There is no f...,"[True, False, False, False, False, True, True,...","[[942, 1678], [700, 459], [870, 434], [942, 76...","[2539, 2705]"


In [8]:
def vary_bbox_coords_position(row):
    def check_coordinate(coord, dimension):
        """Make sure that new coordinate is still within the image."""
        if coord < 0:
            return 0
        elif coord > dimension:
            return dimension
        else:
            return coord

    bbox_coords_single_image = row["bbox_coordinates"]  # List[List[int]] of shape 29 x 4
    bbox_widths_heights_single_image = row["bbox_widths_heights"]  # List[List[int]] of shape 29 x 2
    relative_position_variation_bboxes = row["relative_position_variations"]  # List[List[float]] of shape 29 x 2
    image_width, image_height = row["image_width_height"]  # two integers

    # to store the new bbox coordinates after they have been varied
    varied_bbox_coords_single_image = []

    for bbox_coords, bbox_width_height, relative_position_variations in zip(bbox_coords_single_image, bbox_widths_heights_single_image, relative_position_variation_bboxes):
        x1, y1, x2, y2 = bbox_coords
        bbox_width, bbox_height = bbox_width_height
        x_rel, y_rel = relative_position_variations

        # if e.g. x_rel = 0.5 and bbox_width = 100, then x_var = 50
        x_var = int(bbox_width * x_rel)
        y_var = int(bbox_height * y_rel)

        x1 += x_var
        x2 += x_var
        y1 += y_var
        y2 += y_var

        x1 = check_coordinate(x1, image_width)
        x2 = check_coordinate(x2, image_width)
        y1 = check_coordinate(y1, image_height)
        y2 = check_coordinate(y2, image_height)

        varied_bbox_coords_single_image.append([x1, y1, x2, y2])

    return varied_bbox_coords_single_image

In [9]:
num_images = len(dataset_as_df)
mean = 0
std = 0.1

relative_position_variations = np.random.normal(mean, std, size=(num_images, 29, 2))
dataset_as_df["relative_position_variations"] = relative_position_variations.tolist()
dataset_as_df["bbox_coordinates_varied"] = dataset_as_df.apply(lambda row: vary_bbox_coords_position(row), axis=1)

In [10]:
dataset_as_df.head()

Unnamed: 0,mimic_image_file_path,bbox_coordinates,bbox_labels,bbox_phrases,bbox_phrase_exists,bbox_widths_heights,image_width_height,relative_position_variations,bbox_coordinates_varied
0,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[327, 231, 1200, 2114], [477, 300, 1200, 968]...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[There is no focal consolidation, pleural effu...","[True, False, False, True, False, False, True,...","[[873, 1883], [723, 668], [751, 423], [819, 72...","[2544, 3056]","[[0.0002484327798317826, -0.030613123338941364...","[[327, 174, 1200, 2057], [459, 207, 1182, 875]..."
1,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[300, 382, 1227, 2332], [477, 436, 1227, 1118...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[Pulmonary vasculature is normal. Lungs are cl...,"[True, False, False, False, True, False, True,...","[[927, 1950], [750, 682], [778, 437], [859, 77...","[2544, 3056]","[[-0.1288327000191102, -0.04847466952915247], ...","[[181, 288, 1108, 2238], [593, 428, 1343, 1110..."
2,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[229, 652, 1171, 2330], [386, 676, 1086, 1135...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[No acute intrathoracic process. There is no f...,"[True, False, False, False, False, True, True,...","[[942, 1678], [700, 459], [870, 434], [942, 76...","[2539, 2705]","[[0.24228792944585484, 0.05820166856769004], [...","[[457, 749, 1399, 2427], [521, 698, 1221, 1157..."


In [11]:
def get_transforms():
    # see compute_mean_std_dataset.py in src/dataset
    mean = 0.471
    std = 0.302

    # don't apply data augmentations to test set
    test_transforms = A.Compose(
        [
            A.LongestMaxSize(max_size=IMAGE_INPUT_SIZE, interpolation=cv2.INTER_AREA),
            A.PadIfNeeded(min_height=IMAGE_INPUT_SIZE, min_width=IMAGE_INPUT_SIZE, border_mode=cv2.BORDER_CONSTANT),
            A.Normalize(mean=mean, std=std),
            ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format="pascal_voc", label_fields=["class_labels"]),
    )

    return test_transforms

test_transforms = get_transforms()

In [12]:
dataset = CustomDatasetBboxVariations(dataset_as_df, test_transforms, log=None)

In [17]:
sample_1_new.keys()

dict_keys(['bboxes', 'bbox_phrases', 'bbox_phrase_exists'])

In [15]:
sample_1_new = dataset[0]
bboxes = sample_1_new["bboxes"]
for bbox in bboxes:
    print(bbox.shape)

torch.Size([1, 315, 146])
torch.Size([1, 112, 121])
torch.Size([1, 71, 126])
torch.Size([1, 121, 137])
torch.Size([1, 100, 66])
torch.Size([1, 73, 109])
torch.Size([1, 45, 46])
torch.Size([1, 50, 163])
torch.Size([1, 331, 139])
torch.Size([1, 111, 126])
torch.Size([1, 76, 123])
torch.Size([1, 132, 135])
torch.Size([1, 105, 64])
torch.Size([1, 73, 112])
torch.Size([1, 45, 45])
torch.Size([1, 50, 139])
torch.Size([1, 194, 62])
torch.Size([1, 479, 60])
torch.Size([1, 50, 158])
torch.Size([1, 48, 155])
torch.Size([1, 34, 35])
torch.Size([1, 252, 139])
torch.Size([1, 124, 68])
torch.Size([1, 82, 34])
torch.Size([1, 126, 140])
torch.Size([1, 39, 41])
torch.Size([1, 80, 41])
torch.Size([1, 20, 21])
torch.Size([1, 194, 313])


In [20]:
from src.object_detector.object_detector import ObjectDetector
import torch

In [36]:
object_detector = ObjectDetector(return_feature_vectors=True)
roi = object_detector.roi_heads

In [21]:
images = torch.randn(size=(4,1,512,512))
features = object_detector.backbone(images)

In [24]:
images, features = object_detector._transform_inputs_for_rpn_and_roi(images, features)

In [27]:
proposals, _ = object_detector.rpn(images, features, None)

In [49]:
proposals

[tensor([[  0.0000, 265.1243, 214.5091, 408.9094],
         [269.9316, 475.7477, 512.0000, 512.0000],
         [196.6741,   0.0000, 221.5159,  73.7689],
         ...,
         [284.0431,   0.0000, 333.1023,   1.1170],
         [408.0223,   0.0000, 421.9564,  82.1881],
         [429.7241, 321.3693, 445.9416, 349.3703]]),
 tensor([[364.0947,  97.0798, 428.8401, 512.0000],
         [  0.0000, 396.8503,  85.6579, 512.0000],
         [343.2073,   0.0000, 512.0000, 255.4650],
         ...,
         [444.6466, 112.4482, 512.0000, 163.8498],
         [224.0168,   0.0000, 268.2040,  50.9014],
         [195.6041,   0.0000, 247.8583,  73.5272]]),
 tensor([[434.2826, 241.6065, 485.5868, 301.9100],
         [103.8180,  56.6616, 124.5713,  72.2622],
         [ 78.7888,   0.0000, 110.3002,  54.6100],
         ...,
         [125.9488, 339.9266, 202.5423, 402.4144],
         [345.4297,   0.0000, 512.0000,  29.8741],
         [215.3850, 335.2614, 226.2601, 456.3967]]),
 tensor([[341.4861, 270.6695, 392.

In [37]:
image_shapes = images.image_sizes

In [48]:
print(proposals[0].shape)
print(proposals[1].shape)
print(proposals[2].shape)
print(proposals[3].shape)

torch.Size([1595, 4])
torch.Size([1559, 4])
torch.Size([1567, 4])
torch.Size([1539, 4])


In [45]:
box_roi_pool_feature_maps = roi.box_roi_pool(features, proposals, image_shapes)

In [47]:
box_roi_pool_feature_maps.shape

torch.Size([6260, 2048, 8, 8])