In [1]:
from ast import literal_eval
import logging
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import evaluate
import imagesize
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

from src.full_model.evaluate_bbox_variations.custom_dataset_bbox_variations import CustomDatasetBboxVariations
from src.full_model.report_generation_model import ReportGenerationModel
from src.full_model.train_full_model import get_tokenizer
from src.path_datasets_and_weights import path_runs_full_model

# specify the checkpoint you want to evaluate by setting "RUN" and "CHECKPOINT"
RUN = 46
CHECKPOINT = "checkpoint_val_loss_19.793_overall_steps_155252.pt"
IMAGE_INPUT_SIZE = 512
BATCH_SIZE = 8
NUM_BEAMS = 4
MAX_NUM_TOKENS_GENERATE = 300

# test csv file with only 1000 images (you can create it by setting NUM_ROWS_TO_CREATE_IN_NEW_CSV_FILES in line 67 of create_dataset.py to 1000)
path_to_partial_test_set = "/u/home/tanida/datasets/dataset-with-reference-reports-partial/test-200.csv"

# path where "bbox_variations_results.txt" will be saved
path_results_txt_file = "/u/home/tanida/region-guided-chest-x-ray-report-generation/src/full_model/evaluate_bbox_variations/bbox_variations_results.txt"

In [2]:
def get_test_set_as_df():
    def compute_bbox_widths_heights(row):
        bbox_coordinates_single_image = row["bbox_coordinates"]
        widths_heights = []
        for bbox_coords in bbox_coordinates_single_image:
            x1, y1, x2, y2 = bbox_coords
            width = x2 - x1
            height = y2 - y1
            widths_heights.append([width, height])

        return widths_heights

    def retrieve_image_widths_heights(row):
        mimic_image_file_path = row["mimic_image_file_path"]
        width, height = imagesize.get(mimic_image_file_path)
        return [width, height]

    usecols = [
        "mimic_image_file_path",
        "bbox_coordinates",
        "bbox_labels",
        "bbox_phrases"
    ]

    # all of the columns below are stored as strings in the csv_file
    # however, as they are actually lists, we apply the literal_eval func to convert them to lists
    converters = {
        "bbox_coordinates": literal_eval,
        "bbox_labels": literal_eval,
        "bbox_phrases": literal_eval
    }

    test_set_as_df = pd.read_csv(path_to_partial_test_set, usecols=usecols, converters=converters)

    # add new columns that contain the bbox_widths_heights (List[List[int]] with len(outer_list)=29 and len(inner_list) = 2)
    # and image_width_height (List[int] of len 2)
    test_set_as_df["bbox_widths_heights"] = test_set_as_df.apply(lambda row: compute_bbox_widths_heights(row), axis=1)
    test_set_as_df["image_width_height"] = test_set_as_df.apply(lambda row: retrieve_image_widths_heights(row), axis=1)

    return test_set_as_df

In [3]:
test_set_as_df = get_test_set_as_df()

In [4]:
tokenizer = get_tokenizer()

In [5]:
num_images = len(test_set_as_df)

In [6]:
mean = 0
std = 0.5

In [7]:
def get_transforms():
    # see compute_mean_std_dataset.py in src/dataset
    mean = 0.471
    std = 0.302

    # don't apply data augmentations to test set
    test_transforms = A.Compose(
        [
            A.LongestMaxSize(max_size=IMAGE_INPUT_SIZE, interpolation=cv2.INTER_AREA),
            A.PadIfNeeded(min_height=IMAGE_INPUT_SIZE, min_width=IMAGE_INPUT_SIZE, border_mode=cv2.BORDER_CONSTANT),
            A.Normalize(mean=mean, std=std),
            ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format="pascal_voc", label_fields=["class_labels"]),
    )

    return test_transforms

In [8]:
transforms = get_transforms()

In [9]:
aspect_ratio_variations = np.exp(np.random.normal(mean, 1.0, size=(num_images, 29)))

In [10]:
test_set_as_df["aspect_ratio_variations"] = aspect_ratio_variations.tolist()

In [11]:
test_set_as_df.head()

Unnamed: 0,mimic_image_file_path,bbox_coordinates,bbox_labels,bbox_phrases,bbox_widths_heights,image_width_height,aspect_ratio_variations
0,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[327, 231, 1200, 2114], [477, 300, 1200, 968]...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[There is no focal consolidation, pleural effu...","[[873, 1883], [723, 668], [751, 423], [819, 72...","[2544, 3056]","[0.47207627009996284, 3.7298101144727123, 3.47..."
1,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[300, 382, 1227, 2332], [477, 436, 1227, 1118...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[Pulmonary vasculature is normal. Lungs are cl...,"[[927, 1950], [750, 682], [778, 437], [859, 77...","[2544, 3056]","[1.2130515457799664, 1.418061689779785, 1.8061..."
2,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[229, 652, 1171, 2330], [386, 676, 1086, 1135...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[No acute intrathoracic process. There is no f...,"[[942, 1678], [700, 459], [870, 434], [942, 76...","[2539, 2705]","[0.9991927276203627, 0.5280679905555473, 4.926..."
3,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[467, 596, 1453, 2451], [505, 648, 1206, 1154...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[No acute intrathoracic process. There is no f...,"[[986, 1855], [701, 506], [817, 467], [973, 83...","[2258, 2906]","[3.424713621223937, 0.5865182479234781, 0.7146..."
4,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[286, 341, 1255, 2073], [450, 368, 1255, 1036...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[The lungs are clear of focal consolidation, p...","[[969, 1732], [805, 668], [846, 369], [873, 66...","[2544, 3056]","[1.7741734241947746, 1.3750439836832253, 0.924..."


In [15]:
import math

In [12]:
def check_coordinate(coord, dimension):
    """Make sure that new (varied) coordinate is still within the image."""
    if coord < 0:
        return 0
    elif coord > dimension:
        return dimension
    else:
        return coord

def vary_bbox_coords_by_aspect_ratio(row):
    bbox_coords_single_image = row["bbox_coordinates"]  # List[List[int]] of shape 29 x 4
    bbox_widths_heights_single_image = row["bbox_widths_heights"]  # List[List[int]] of shape 29 x 2
    aspect_ratio_variations_bboxes = row["aspect_ratio_variations"]  # List[float] of len 29
    image_width, image_height = row["image_width_height"]  # two integers

    # to store the new bbox coordinates after they have been varied
    varied_bbox_coords_single_image = []

    for bbox_coords, bbox_width_height, ratio_variation in zip(bbox_coords_single_image, bbox_widths_heights_single_image, aspect_ratio_variations_bboxes):
        x1, y1, x2, y2 = bbox_coords
        bbox_width, bbox_height = bbox_width_height
        # gt_bbox_mid_point stays the same for the bbox varied in its aspect ratio, and thus serves as the "anchor point"
        # to compute the new bbox coordinates (using the new bbox width and height)
        ground_truth_bbox_mid_point_x = x1 + bbox_width / 2
        ground_truth_bbox_mid_point_y = y1 + bbox_height / 2
        bbox_area = bbox_width * bbox_height
        bbox_aspect_ratio = bbox_width / bbox_height

        # ratio_variation is a single positive float, e.g. 1.232 or 0.845
        bbox_aspect_ratio_new = ratio_variation * bbox_aspect_ratio

        # the new bbox height and width are computed by solving the 2 equations
        # 1) bbox_width_new / bbox_height_new = bbox_aspect_ratio_new
        # 2) bbox_width_new * bbox_height_new = bbox_area
        bbox_height_new = math.sqrt(bbox_area / bbox_aspect_ratio_new)
        bbox_width_new = bbox_aspect_ratio_new * bbox_height_new

        x1_new = ground_truth_bbox_mid_point_x - bbox_width_new / 2
        x2_new = ground_truth_bbox_mid_point_x + bbox_width_new / 2

        y1_new = ground_truth_bbox_mid_point_y - bbox_height_new / 2
        y2_new = ground_truth_bbox_mid_point_y + bbox_height_new / 2

        x1 = check_coordinate(int(x1_new), image_width)
        x2 = check_coordinate(int(x2_new), image_width)
        y1 = check_coordinate(int(y1_new), image_height)
        y2 = check_coordinate(int(y2_new), image_height)

        varied_bbox_coords_single_image.append([x1, y1, x2, y2])

    return varied_bbox_coords_single_image

In [16]:
variation_func = vary_bbox_coords_by_aspect_ratio

In [17]:
test_set_as_df["bbox_coordinates_varied"] = test_set_as_df.apply(lambda row: variation_func(row), axis=1)

In [18]:
test_set_as_df.head()

Unnamed: 0,mimic_image_file_path,bbox_coordinates,bbox_labels,bbox_phrases,bbox_widths_heights,image_width_height,aspect_ratio_variations,bbox_coordinates_varied
0,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[327, 231, 1200, 2114], [477, 300, 1200, 968]...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[There is no focal consolidation, pleural effu...","[[873, 1883], [723, 668], [751, 423], [819, 72...","[2544, 3056]","[0.47207627009996284, 3.7298101144727123, 3.47...","[[463, 0, 1063, 2542], [140, 461, 1536, 806], ..."
1,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[300, 382, 1227, 2332], [477, 436, 1227, 1118...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[Pulmonary vasculature is normal. Lungs are cl...,"[[927, 1950], [750, 682], [778, 437], [859, 77...","[2544, 3056]","[1.2130515457799664, 1.418061689779785, 1.8061...","[[253, 471, 1273, 2242], [405, 490, 1298, 1063..."
2,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[229, 652, 1171, 2330], [386, 676, 1086, 1135...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[No acute intrathoracic process. There is no f...,"[[942, 1678], [700, 459], [870, 434], [942, 76...","[2539, 2705]","[0.9991927276203627, 0.5280679905555473, 4.926...","[[229, 651, 1170, 2330], [481, 589, 990, 1221]..."
3,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[467, 596, 1453, 2451], [505, 648, 1206, 1154...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",[No acute intrathoracic process. There is no f...,"[[986, 1855], [701, 506], [817, 467], [973, 83...","[2258, 2906]","[3.424713621223937, 0.5865182479234781, 0.7146...","[[47, 1022, 1872, 2024], [587, 570, 1123, 1231..."
4,/u/home/tanida/datasets/mimic-cxr-jpg/files/p1...,"[[286, 341, 1255, 2073], [450, 368, 1255, 1036...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...","[The lungs are clear of focal consolidation, p...","[[969, 1732], [805, 668], [846, 369], [873, 66...","[2544, 3056]","[1.7741734241947746, 1.3750439836832253, 0.924...","[[125, 556, 1415, 1857], [380, 417, 1324, 986]..."


In [137]:
test_set_as_df.iloc[68]["bbox_coordinates_varied"]

[[180, 608, 940, 1752],
 [0, 0, 2080, 1672],
 [508, 1133, 717, 1214],
 [0, 1262, 1089, 2101],
 [645, 950, 1056, 1384],
 [224, 254, 1305, 774],
 [40, 1814, 104, 1931],
 [0, 1005, 2956, 2539],
 [1473, 617, 2023, 1400],
 [1513, 531, 1943, 839],
 [1767, 1110, 1794, 1118],
 [0, 637, 2956, 2277],
 [1130, 795, 1969, 1420],
 [0, 0, 2956, 2539],
 [1957, 1362, 2475, 1883],
 [1551, 1589, 1945, 1827],
 [0, 0, 2482, 2539],
 [0, 0, 2542, 2539],
 [207, 101, 1151, 768],
 [807, 0, 2556, 1086],
 [1351, 834, 1457, 919],
 [1153, 933, 1695, 1652],
 [1188, 756, 1568, 1064],
 [785, 559, 1403, 1485],
 [410, 997, 2438, 2340],
 [896, 1331, 1068, 1504],
 [908, 1645, 1056, 1942],
 [1146, 935, 1293, 1082],
 [825, 1877, 1404, 2152]]

In [136]:
for i in range(200):
    bboxes = test_set_as_df.iloc[i]["bbox_coordinates_varied"]
    for bbox_coords_index, bbox_coords in enumerate(bboxes):
        x1, y1, x2, y2 = bbox_coords
        if x2 - x1 < 10 or y2 - y1 < 10:
            print(i, bbox_coords_index)

68 10
80 27
107 6
123 27
144 18
153 27
163 27


In [4]:
np.random.normal(0, 0.5, size=10)

array([-0.59525181,  0.10993591, -0.10645957, -0.70574957, -0.24149051,
        0.60088104, -0.35290152,  0.01259068, -0.19556408,  0.09656956])

In [5]:
np.random.normal(0, 1.0, size=10)

array([ 0.34929093,  0.59118838,  0.52040987,  0.82522239,  0.42620351,
        0.19564003, -0.51411218, -3.07368882, -0.39515907, -0.79539918])

In [8]:
np.random.normal(0, 0.5, size=10)

array([ 0.79734168,  0.00930649,  0.28070434, -0.09923841,  0.23160378,
       -0.90954699,  0.06646241, -0.30853982,  0.65408975,  0.38791886])

In [9]:
np.random.normal(0, 1, size=10)

array([-0.2801704 , -0.72393771, -0.94331004,  0.56168624, -0.69279264,
        1.70175958, -1.05172261, -0.66115156, -1.01089971, -1.45888071])

In [6]:
np.exp(np.random.normal(0, 0.5, size=10))

array([0.70356096, 0.41949808, 1.45557954, 0.39377462, 1.23275992,
       0.88899211, 1.24116889, 1.54518859, 2.73002589, 1.98552915])

In [7]:
np.exp(np.random.normal(0, 1.0, size=10))

array([2.88232681, 4.24328428, 1.05003478, 1.0945349 , 0.6186245 ,
       1.3507537 , 3.04341375, 0.98087029, 0.99919273, 0.52806799])