In [None]:
#Imports
from __future__ import print_function
import os
import time
import json
import argparse
import numpy as np
import soundfile as sf
from torchvision.transforms import v2 as T
import utils.extra as extra
import cv2
from torchvision.io import read_image


import torch
import torch.nn as nn
import torch.optim as optim
import time
from torch.utils.data import DataLoader

from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import dataset
from utils.params import Params
from PIL import Image

In [None]:
#Set up the model, load it in, and prepare data transforms

model_name = "maskrcnn_v0"

params = Params("saved_models/" + model_name + "_hparams.yaml", "DEFAULT")
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model_import = __import__('.'.join(['models', "resnet50"]),  fromlist=['object'])
model = model_import.net(params).to(device)
model.load_state_dict(torch.load("saved_models/" + model_name + ".ckpt"))
model.eval()

loss_function = nn.BCELoss()
val = model_import.val

transforms = [
    T.ToDtype(torch.float, scale=True),
    T.ToPureTensor(),
    T.Resize((512, 512))
    #T.RandomHorizontalFlip(0.5)
]

transformer = T.Compose(transforms)


In [4]:
#Helper function to convert json labels to png

def json_to_mask(json_name, height, width):
    f = open("../Dataset/test/labels/"+json_name+".json")
    im = cv2.imread("../Dataset/test/images/"+json_name+".png")
    annotation = json.load(f)
    masks = np.zeros((len(annotation["shapes"]), 512, 512))
    if len(annotation["shapes"]) == 0:
        masks = np.zeros((1, 512, 512))
    #print("Expecting", len(annotation["shapes"]), "annotations")
    for i in range(len(annotation["shapes"])):

        #Make sure that the labels are scaled correctly with the images and masks
        if im.shape[0] == 512:
            points = [[int(point[0]), int(point[1])] for point in annotation["shapes"][i]["points"]]
        if im.shape[0] == 1024:
            points = [[int(point[0] / 2), int(point[1] / 2)] for point in annotation["shapes"][i]["points"]]

        points = np.array(points)
        cv2.fillPoly(masks[i, :, :], pts=[points], color=((i + 1) * 10, 0, 0))
        masks[i, :, :] = (masks[i, :, :] == (i + 1) * 10)
    #print("Returning masks of shape", masks.shape, "with max", np.max(masks))
    return torch.from_numpy(masks).to(dtype=torch.float32)

def iou_score(pred, actual, threshold):
    actual = 1.0 * actual
    pred = 1.0 * (pred >= threshold)
    intersection = np.multiply(pred, actual)
    union = (np.add(pred, actual) >= 1)
    if np.mean(pred) == 0.0 and torch.mean(actual) == 0.0:
        return 1.0
    return (torch.sum(intersection) / torch.sum(union)).numpy()

In [10]:
image_numbers = range(len(os.listdir("../Dataset/test/images/")))
image_paths = []
for number in image_numbers:
    image_paths.append(os.listdir("../Dataset/test/images/")[number])

save_images = True
visualize = False

if visualize:  
    fig, ax = plt.subplots(len(image_numbers), 3, figsize=(12, 4 * len(image_numbers)))
    fig.tight_layout(pad=0.2, w_pad=0.01)
    
ims = []
act_masks = []
pred_masks = []
ious = []

threshold = 0.5
score_threshold = 0.5

for i in range(len(image_numbers)):
    #Load in and process the image for input
    im = read_image("../Dataset/test/images/" + image_paths[i])
    ims.append(im.squeeze())
    im = transformer(im)
    im = im.squeeze()
    im = torch.stack((im, im, im))

    #If there is more than one actual mask, sum them all into one
    mask = json_to_mask(image_paths[i][:-4], 512, 512)
    act_mask = torch.zeros(mask[0].shape)
    for mask_num in range(mask.shape[0]):
        act_mask = torch.add(act_mask, mask[mask_num])
    act_mask = (act_mask >= 1) #set them all back to 1
    act_masks.append(act_mask.squeeze())

    #Finally, get the predictions and stack the masks in the same way
    out = model([im], None)
    print("Pred scores:", out[0]["scores"])
    wanted_masks = torch.where(out[0]["scores"] > score_threshold)
    pred_mask = torch.zeros((512, 512))
    for index in wanted_masks[0]:
        this_mask = out[0]["masks"][index].squeeze()

        if save_images:
            cv2.imwrite("../Dataset/test/preds/" + image_paths[i][:-11] + "_" + str(index.cpu().detach().numpy() + 1) + ".png", 255.0 * this_mask.cpu().detach().numpy())

        if this_mask.shape[0] == 3:
            this_mask = this_mask[0, :, :].squeeze()
        pred_mask = torch.add(pred_mask, this_mask)
    pred_mask = (pred_mask >= threshold).detach().cpu().numpy()
    pred_masks.append(pred_mask)
    
    ious.append(iou_score(pred_masks[i], act_masks[i], threshold))

if visualize:
    if len(image_numbers) == 1:
        ax[0].imshow(ims[0], cmap='gray')
        ax[0].set_title("Image (" + image_paths[0] + ")")
        ax[1].imshow(act_masks[0])
        ax[1].set_title("Ground Truth")
        ax[2].imshow(pred_masks[0])
        ax[2].set_title("Predicted (IoU:" + str(ious[0]) + ")")
    else:
        for i in range(len(image_numbers)):
            ax[i, 0].imshow(ims[i], cmap='gray')
            ax[i, 0].set_title("Image (" + image_paths[i] + ")")
            ax[i, 1].imshow(act_masks[i])
            ax[i, 1].set_title("Ground Truth")
            ax[i, 2].imshow(pred_masks[i])
            ax[i, 2].set_title("Predicted (IoU:" + str(ious[i]) + ")")



Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([0.3738, 0.1798], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([0.2853], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([0.4729, 0.1849, 0.1848], grad_fn=<IndexBackward0>)
Pred scores: tensor([0.2880, 0.1861], grad_fn=<IndexBackward0>)
Pred scores: tensor([0.3749, 0.2764], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred scores: tensor([0.0935, 0.0924, 0.0916], grad_fn=<IndexBackward0>)
Pred scores: tensor([], grad_fn=<IndexBackward0>)
Pred sco