In [1]:
import wandb
!wandb login

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/witold/.netrc


In [2]:
# try:
#     import torchmetrics
# except:
#     !pip install torchmetrics
#     import torchmetrics

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
import pandas as pd
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import torchvision
import ast
import torch
import torch.nn as nn
from tqdm import tqdm
from torchmetrics.detection.map import MeanAveragePrecision

# this should probably be changed to something smart, right?
KAGGLE_PATH_ANNOTATIONS = '/kaggle/input/tensorflow-great-barrier-reef/train.csv'
KAGGLE_PATH_IMG_DIR = '/kaggle/input/tensorflow-great-barrier-reef/train_images/'
LOCAL_PATH_ANNOTATIONS = 'data/train.csv'
LOCAL_PATH_IMG_DIR = 'data/train_images/'
COLAB_PATH_ANNOTATIONS = '/content/drive/MyDrive/data/train.csv'
COLAB_PATH_IMG_DIR = '/content/drive/MyDrive/data/train_images/'

wandb.config = {
    "learning_rate": 0.001,
    "epochs": 115,
    "batch_size": 8,
    "momentum": 0.9,
    "weight_decay": 0.0005,
    "confidence_threshold": 0.5,  # save a bounding box if model returned confidence above this threshold
    "optimizer": 'SGD',
}

In [5]:
class StarfishDataset(Dataset):
    def __init__(self,
                 annotations_file=LOCAL_PATH_ANNOTATIONS,
                 img_dir=LOCAL_PATH_IMG_DIR
                 ):
        self.img_labels = pd.read_csv(annotations_file)
        self.annotated = self.img_labels[self.img_labels['annotations'] != '[]']  # get only annotated frames
        self.img_dir = img_dir

    def __len__(self):
        return len(self.annotated)

    def __getitem__(self, idx):
        image = read_image(os.path.join(self.img_dir, 'video_{}'.format(self.annotated.iloc[idx][0]),
                                        '{}.jpg'.format(self.annotated.iloc[idx][2])))
        min_image = image.min()
        max_image = image.max()
        # normalize image to 0-1 - required by torchvision
        image -= min_image
        image = torch.FloatTensor(image / max_image)
        labels = self.annotated.iloc[idx][-1]
        labels = ast.literal_eval(labels)
        coords = []
        for parsed_label in labels:
            x1, y1 = parsed_label['x'], parsed_label['y']
            x2, y2 = x1 + parsed_label['width'], y1 + parsed_label['height']
            coords.append([x1, y1, x2, y2])

        boxes = torch.FloatTensor(coords)
        labels = torch.LongTensor([1 for _ in range(
            len(coords))])  # label has to be integer, since we have only one label I coded it as 1 for simplicity
        return image, boxes, labels

In [6]:
def collate_fn(batch):
    targets = []
    images = []
    for imgs, boxes, labels in batch:
        images.append(imgs)
        d = {}
        d['boxes'] = boxes
        d['labels'] = labels
        targets.append(d)
    return images, targets


def slice_output(output: dict, confidence_threshold: float = wandb.config['confidence_threshold']) -> dict:
    """
    this method is responsible for validating models output w.r.t confidence_threshold defined above.
    It accepts an output dictionary from model, namely {'boxes':[], 'labels':[], 'scores':[]}
    It returns a dictionary sliced to items with score above confidence_threshold
    """

    num_valid_elements = np.sum(np.array(output['scores']) >= confidence_threshold)
    # output['scores'] = output['scores'].to(device)
    if num_valid_elements == 0:
        num_valid_elements = 1
    res = {}
    for key, value in output.items():
        res[key] = value[:num_valid_elements]
    return res

In [7]:
# https://towardsdatascience.com/evaluating-performance-of-an-object-detection-model-137a349c517b

# https://machinelearningmastery.com/fbeta-measure-for-machine-learning/

def calc_iou(gt_bbox, pred_bbox):
    """
    This function takes the predicted bounding box and ground truth bounding box and
    return the IoU ratio
    """
    x_topleft_gt, y_topleft_gt, x_bottomright_gt, y_bottomright_gt = gt_bbox
    # print(gt_bbox, pred_bbox)
    x_topleft_p, y_topleft_p, x_bottomright_p, y_bottomright_p = pred_bbox

    if (x_topleft_gt > x_bottomright_gt) or (y_topleft_gt > y_bottomright_gt):
        raise AssertionError("Ground Truth Bounding Box is not correct")
    if (x_topleft_p > x_bottomright_p) or (y_topleft_p > y_bottomright_p):
        raise AssertionError("Predicted Bounding Box is not correct", x_topleft_p, x_bottomright_p, y_topleft_p,
                             y_bottomright_gt)

    #if the GT bbox and predicted BBox do not overlap then iou=0
    if x_bottomright_gt < x_topleft_p:
        # If bottom right of x-coordinate  GT  bbox is less than or above the top left of x coordinate of  the predicted BBox

        return 0.0
    if (
            y_bottomright_gt < y_topleft_p):  # If bottom right of y-coordinate  GT  bbox is less than or above the top left of y coordinate of  the predicted BBox

        return 0.0
    if (
            x_topleft_gt > x_bottomright_p):  # If bottom right of x-coordinate  GT  bbox is greater than or below the bottom right  of x coordinate of  the predcited BBox

        return 0.0
    if (
            y_topleft_gt > y_bottomright_p):  # If bottom right of y-coordinate  GT  bbox is greater than or below the bottom right  of y coordinate of  the predcited BBox

        return 0.0

    GT_bbox_area = (x_bottomright_gt - x_topleft_gt + 1) * (y_bottomright_gt - y_topleft_gt + 1)
    Pred_bbox_area = (x_bottomright_p - x_topleft_p + 1) * (y_bottomright_p - y_topleft_p + 1)

    x_top_left = np.max([x_topleft_gt, x_topleft_p])
    y_top_left = np.max([y_topleft_gt, y_topleft_p])
    x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p])
    y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p])

    intersection_area = (x_bottom_right - x_top_left + 1) * (y_bottom_right - y_top_left + 1)

    union_area = (GT_bbox_area + Pred_bbox_area - intersection_area)

    return intersection_area / union_area


def calc_precision_recall(image_results):
    """Calculates precision and recall from the set of images
    Args:
        image_results (dict): dictionary formatted like:
            {
                'img_id1': {'true_pos': int, 'false_pos': int, 'false_neg': int},
                'img_id2': ...
                ...
            }
    Returns:
        tuple: of floats of (precision, recall)
    """
    true_positive = 0
    false_positive = 0
    false_negative = 0
    for img_id, res in image_results.items():
        true_positive += res['true_positive']
        false_positive += res['false_positive']
        false_negative += res['false_negative']
    try:
        precision = true_positive / (true_positive + false_positive)
    except ZeroDivisionError:
        precision = 0.0
    try:
        recall = true_positive / (true_positive + false_negative)
    except ZeroDivisionError:
        recall = 0.0
    return (precision, recall)


def get_single_image_results(gt_boxes, pred_boxes, iou_thr):
    """Calculates number of true_pos, false_pos, false_neg from single batch of boxes.
    Args:
        gt_boxes (list of list of floats): list of locations of ground truth
            objects as [xmin, ymin, xmax, ymax]
        pred_boxes (dict): dict of dicts of 'boxes' (formatted like `gt_boxes`)
            and 'scores'
        iou_thr (float): value of IoU to consider as threshold for a
            true prediction.
    Returns:
        dict: true positives (int), false positives (int), false negatives (int)
    """
    all_pred_indices = range(len(pred_boxes))
    all_gt_indices = range(len(gt_boxes))
    if len(all_pred_indices) == 0:
        tp = 0
        fp = 0
        fn = 0
        return {'true_positive': tp, 'false_positive': fp, 'false_negative': fn}
    if len(all_gt_indices) == 0:
        tp = 0
        fp = 0
        fn = 0
        return {'true_positive': tp, 'false_positive': fp, 'false_negative': fn}

    TP = 0
    FP = 0

    for ipb, pred_box in enumerate(pred_boxes):
        for igb, gt_box in enumerate(gt_boxes):
            iou = calc_iou(gt_box, pred_box)
            if iou >= iou_thr:
                TP += 1
            elif 0.0 < iou < iou_thr:
                FP += 1
            elif iou == 0.0:
                pass

    if TP == 0 and FP == 0:
        FN = len(gt_boxes)  # none of the groundtruth had been detected
        return {'true_positive': 0, 'false_positive': 0, 'false_negative': FN}
    else:
        return {'true_positive': TP, 'false_positive': FP, 'false_negative': len(gt_boxes) - TP - FP}

In [8]:
torch.manual_seed(23)

dataset = StarfishDataset()
print(len(dataset))
train_size = 3000
test_size = (len(dataset) - train_size) // 2
val_size = len(dataset) - train_size - test_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, (train_size, val_size, test_size))

print(
    'Train dataset: {} instances, validation dataset: {}, test dataset: {}'.format(len(train_dataset), len(val_dataset),
                                                                                   len(test_dataset)))

train_dataloader = DataLoader(
    train_dataset, batch_size=wandb.config['batch_size'], shuffle=False, num_workers=1, collate_fn=collate_fn)
test_dataloader = DataLoader(
    test_dataset, batch_size=wandb.config['batch_size'], shuffle=False, num_workers=1, collate_fn=collate_fn)
val_dataloader = DataLoader(
    val_dataset, batch_size=wandb.config['batch_size'], shuffle=False, num_workers=1, collate_fn=collate_fn)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
cpu = torch.device('cpu')
print('Used device: {}'.format(device))


num_classes = 2  # starfish and not starfish I guess    

4919
Train dataset: 3000 instances, validation dataset: 960, test dataset: 959
Used device: cuda


In [9]:
wandb.init(project="great-barrier-reef", entity="ap-wt", config=wandb.config)

model = torchvision.models.detection.ssd300_vgg16(pretrained=False, pretrained_backbone=False, num_classes=num_classes)

model.to(device)

params = [p for p in model.parameters() if p.requires_grad]

if wandb.config['optimizer'] == 'SGD':
    optimizer = torch.optim.SGD(params, lr=wandb.config['learning_rate'], momentum=wandb.config['momentum'],
                                weight_decay=wandb.config['weight_decay'])
elif wandb.config['optimizer'] == 'Adam':
    optimizer = torch.optim.Adam(params, lr=wandb.config['learning_rate'], weight_decay=wandb.config['weight_decay'])
elif wandb.config['optimizer'] == 'AdamW':
    optimizer = torch.optim.AdamW(params, lr=wandb.config['learning_rate'], weight_decay=wandb.config['weight_decay'])

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

[34m[1mwandb[0m: Currently logged in as: [33map-wt[0m (use `wandb login --relogin` to force relogin)


[W NNPACK.cpp:79] Could not initialize NNPACK! Reason: Unsupported hardware.


In [None]:
# https://pytorch.org/vision/stable/models.html#runtime-characteristics see Faster R-CNN for the details of this model, what it requires, returns, etc

# https://github.com/pytorch/vision/blob/main/references/detection/engine.py probably see training and eval loops here


wandb.watch(model, log="all", log_freq=50)
for e in tqdm(range(wandb.config['epochs'])):
    print('\n')
    model.train()

    for idx, (images, targets) in enumerate(train_dataloader):

        images = list(image.to(device) for image in images)

        for d in targets:
            d['boxes'] = d['boxes'].to(device)
            d['labels'] = d['labels'].to(device)

        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        loss.backward()
        optimizer.zero_grad()
        optimizer.step()

        if idx % 50 == 0:
            wandb.log({"epoch": e, "train_loss": loss})

        if device == torch.device("cuda"):
            torch.cuda.synchronize()
            torch.cuda.empty_cache()

    lr_scheduler.step()

    model.eval()
    metric = MeanAveragePrecision()

    with torch.no_grad():
        gt_boxes = dict()
        pred_boxes = dict()
        for idx, (images, targets) in enumerate(val_dataloader):

            # for d in targets:
            #     d['boxes'] = d['boxes'].to(device)
            #     d['labels'] = d['labels'].to(device)
            images = list(image.to(device) for image in images)
            predictions = model(images)
            outputs = [{k: v.to(cpu) for k, v in t.items()} for t in predictions]

            outputs = [slice_output(out) for out in outputs]

            metric.update(outputs, targets)
            metrics = metric.compute()

            gt_boxes[idx] = [d['boxes'].tolist()[0] for d in targets]
            tmp_pred_boxes = {'boxes': [], "scores": []}
            for d in outputs:
                try:
                    tmp_pred_boxes['boxes'].append(d['boxes'].tolist()[0])
                    tmp_pred_boxes['scores'].append(d['scores'].tolist()[0])
                except:
                    print('fail')
                    continue
            pred_boxes[idx] = tmp_pred_boxes

        tmp = {}
        for idx, (gt, prd) in enumerate(zip(gt_boxes.values(), pred_boxes.values())):
            res = get_single_image_results(gt, prd['boxes'], 0.5)
            tmp[idx] = res
        precision, recall = calc_precision_recall(tmp)
        try:
            F2 = (5 * precision * recall) / (4 * precision + recall)
        except:
            F2 = 0

        wandb.log({'eval/MAP': metrics['map'],
                   'eval/MAR_1': metrics['mar_1'],
                   'eval/Precision': precision,
                   'eval/Recall': recall,
                   'eval/F2 score': F2})

    lr_scheduler.step()



  0%|          | 0/115 [00:00<?, ?it/s]





  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [None]:
model.eval()
metric = MeanAveragePrecision()
with torch.no_grad():
    gt_boxes = dict()
    pred_boxes = dict()
    for idx, (images, targets) in enumerate(val_dataloader):

        # for d in targets:
        #     d['boxes'] = d['boxes'].to(device)
        #     d['labels'] = d['labels'].to(device)

        images = list(image.to(device) for image in images)
        predictions = model(images)
        outputs = [{k: v.to(cpu) for k, v in t.items()} for t in predictions]

        outputs = [slice_output(out) for out in outputs]

        metric.update(outputs, targets)
        metrics = metric.compute()

        gt_boxes[idx] = [d['boxes'].tolist()[0] for d in targets]
        tmp_pred_boxes = {'boxes': [], "scores": []}
        for d in outputs:
            try:
                tmp_pred_boxes['boxes'].append(d['boxes'].tolist()[0])
                tmp_pred_boxes['scores'].append(d['scores'].tolist()[0])
            except:
                print('fail')
                continue
        pred_boxes[idx] = tmp_pred_boxes

    tmp = {}
    for idx, (gt, prd) in enumerate(zip(gt_boxes.values(), pred_boxes.values())):
        res = get_single_image_results(gt, prd['boxes'], 0.5)
        tmp[idx] = res
    precision, recall = calc_precision_recall(tmp)
    try:
        F2 = (5 * precision * recall) / (4 * precision + recall)
    except:
        F2 = 0

    columns = ['metric', 'test/value']
    test_metrics = [
        ['MAP', metrics['map']],
        ['MAR1', metrics['mar_1']],
        ['Precision', precision],
        ['Recall', recall],
        ['F2 score', F2]
    ]
    table = wandb.Table(columns=columns, data=test_metrics, allow_mixed_types=True)
    wandb.log({"Test set metrics": table})
    wandb.log({'test/MAP': metrics['map'],
           'test/MAR_1': metrics['mar_1'],
           'test/Precision': precision,
           'test/Recall': recall,
           'test/F2 score': F2})

In [None]:
wandb.finish()
torch.cuda.empty_cache()

In [None]:
torch.save(model.state_dict(), 'FasterRCNN-from-scratch.pt')