In [1]:
import os, glob
import sys
import json
from PIL import Image
from collections import Counter

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tifffile as tiff
import matplotlib.pyplot as plt
from tqdm import tqdm

import cv2

from sklearn.model_selection import KFold

sys.path.append("detection-wheel")

In [2]:
EPOCHS = 150

In [3]:

import os
import numpy as np
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image


class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, imgs, masks, mode):
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = imgs#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
        self.masks = masks#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))
        
        self.mode = mode
        
        init_augm = []
        if mode == 'train':
            init_augm = [
                A.HorizontalFlip(p=0.25),
                A.VerticalFlip(p=0.25),
                A.Transpose(p=0.25),
                # A.GridDistortion(p=0.25),
                # A.RandomSizedCrop(min_max_height=(int(512 * 0.8), int(512 * 0.9)),
                #                     height=512, width=512, p=0.25),
                
                
                A.CLAHE(p=0.2),
                A.RandomBrightnessContrast(p=0.2),    
                A.RandomGamma(p=0.2),
                
                A.OneOf([
                        A.GaussNoise(var_limit=[10, 50]),
                        A.GaussianBlur(),
                        A.MotionBlur(),
                        ], p=0.1),
                A.MultiplicativeNoise(per_channel=True, multiplier=(0.95, 1.05), p=0.2),
            ]
        
        self.transform = A.Compose(
            init_augm+[ToTensorV2()], bbox_params=A.BboxParams(format='pascal_voc'))

        
        
        

    def __getitem__(self, idx):
        # load images and masks
        img_path = self.imgs[idx]
        mask_path = self.masks[idx]
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path).convert('L')
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        #masks = (mask == obj_ids[:, None, None])
        #print((obj_ids[:, None, None]).shape)
        #masks = mask == obj_ids[:, None, None]
        masks = [np.where(mask== obj_ids[i, None, None],1,0) for i in range(len(obj_ids))]
        masks = np.array(masks)

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        bboxes = []
        for i in range(num_objs):
            pos = np.nonzero(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])
            bboxes.append([xmin, ymin, xmax, ymax, 0])
        
        img = np.array(img)
        
        
        transformed = self.transform(image=img, bboxes=bboxes)
        img = transformed['image'].float() / 255.0
        boxes = transformed['bboxes']
        boxes = []
        # get xmin, ymin, xmax, ymax from bboxes
        for i in range(len(transformed['bboxes'])):
            boxes.append(transformed['bboxes'][i][:4])


        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        try:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            #print(area,area.shape,area.dtype)
        except:
            area = torch.tensor([[0],[0]])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        #print(masks.shape)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        # if self.transforms is not None:
        #     img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [4]:
import torchvision
from torchvision.models import list_models
detection_models = list_models(module=torchvision.models.detection)
detection_models

['fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcos_resnet50_fpn',
 'keypointrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'retinanet_resnet50_fpn',
 'retinanet_resnet50_fpn_v2',
 'ssd300_vgg16',
 'ssdlite320_mobilenet_v3_large']

In [5]:
import torchvision
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.resnet import ResNet50_Weights

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="DEFAULT", weights_backbone=ResNet50_Weights.IMAGENET1K_V2)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [6]:
from engine import train_one_epoch, evaluate
import utils

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [8]:
n_imgs = len(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*'))
n_imgs



1622

In [9]:
all_imgs = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
all_masks = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))
dataset_train = PennFudanDataset(all_imgs, all_masks, mode='train')

In [10]:
x=dataset_train[0][0]
x.shape, x.min(), x.max()

(torch.Size([3, 512, 512]), tensor(0.), tensor(1.))

In [11]:
# kf = KFold(n_splits=5, shuffle=True, random_state=43)
# for i, (train_index, test_index) in enumerate(kf.split(range(n_imgs))):
#     if i!=0: continue
#     all_imgs = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
#     all_masks = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))
#     all_imgs = np.array(all_imgs)
#     all_masks = np.array(all_masks)
#     train_img = all_imgs[train_index]
#     train_mask = all_masks[train_index]
#     val_img = all_imgs[test_index]
#     val_mask = all_masks[test_index]
#     dataset_train = PennFudanDataset(train_img, train_mask, get_transform(train=True))
#     dataset_val = PennFudanDataset(val_img, val_mask, get_transform(train=False))
#     train_dl = torch.utils.data.DataLoader(
#         dataset_train, batch_size=4, shuffle=True, num_workers=os.cpu_count(), pin_memory=True, drop_last=True, collate_fn=utils.collate_fn)
#     val_dl = torch.utils.data.DataLoader(
#         dataset_val, batch_size=1, shuffle=False, num_workers=os.cpu_count(), pin_memory=True,collate_fn=utils.collate_fn)
    
#     model = get_model_instance_segmentation(num_classes=2)
#     model.to(device)
#     params = [p for p in model.parameters() if p.requires_grad]
#     optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-6)
#     # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
#     # set linear warmup scheduler, with constant learning rate after warmup
#     scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.0001,
#                                                 steps_per_epoch=10, epochs=EPOCHS//10,
#                                                 pct_start=0.01)
    
#     for epoch in range(EPOCHS):
#         train_one_epoch(model, optimizer, train_dl, device, epoch, print_freq=50)
#         evaluate(model, val_dl, device=device)
#         scheduler.step()
#         model_path = f'fold_{i}_epoch{epoch}.pth'
#         torch.save(model.state_dict(), model_path)
        

In [12]:
all_indices = np.arange(n_imgs)
# take random 1400 images for training
train_index = np.random.choice(all_indices, size=1400, replace=False)
# take the rest for validation
test_index = np.setdiff1d(all_indices, train_index)



In [13]:
# check if train_index and test_index are mutually exclusive
len(np.intersect1d(train_index, test_index))

0

In [14]:

all_imgs = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
all_masks = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))
all_imgs = np.array(all_imgs)
all_masks = np.array(all_masks)
train_img = all_imgs[train_index]
train_mask = all_masks[train_index]
val_img = all_imgs[test_index]
val_mask = all_masks[test_index]
dataset_train = PennFudanDataset(train_img, train_mask, 'train')
dataset_val = PennFudanDataset(val_img, val_mask, 'test')
train_dl = torch.utils.data.DataLoader(
    dataset_train, batch_size=4, shuffle=True, num_workers=os.cpu_count(), pin_memory=True, drop_last=True, collate_fn=utils.collate_fn)
val_dl = torch.utils.data.DataLoader(
    dataset_val, batch_size=1, shuffle=False, num_workers=os.cpu_count(), pin_memory=True,collate_fn=utils.collate_fn)

model = get_model_instance_segmentation(num_classes=2)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-6)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
# set linear warmup scheduler, with constant learning rate after warmup
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.0001,
                                            steps_per_epoch=10, epochs=20,
                                            pct_start=0.01)

for epoch in range(EPOCHS):
    train_one_epoch(model, optimizer, train_dl, device, epoch, print_freq=50)
    evaluate(model, val_dl, device=device)
    scheduler.step()
    model_path = f'ckpts/fold_{0}_epoch{epoch}.pth'
    torch.save(model.state_dict(), model_path)
    

Epoch: [0]  [  0/350]  eta: 0:14:23  lr: 0.000000  loss: 2.9118 (2.9118)  loss_classifier: 0.7202 (0.7202)  loss_box_reg: 0.1259 (0.1259)  loss_mask: 1.4431 (1.4431)  loss_objectness: 0.5804 (0.5804)  loss_rpn_box_reg: 0.0422 (0.0422)  time: 2.4660  data: 1.1636  max mem: 5151
Epoch: [0]  [ 50/350]  eta: 0:01:06  lr: 0.000001  loss: 3.5595 (3.4909)  loss_classifier: 0.6408 (0.6796)  loss_box_reg: 0.1029 (0.1244)  loss_mask: 1.6903 (1.6009)  loss_objectness: 0.9440 (0.9817)  loss_rpn_box_reg: 0.1175 (0.1043)  time: 0.1736  data: 0.0001  max mem: 5851
Epoch: [0]  [100/350]  eta: 0:00:49  lr: 0.000001  loss: 2.7691 (3.2410)  loss_classifier: 0.4172 (0.5828)  loss_box_reg: 0.1112 (0.1197)  loss_mask: 1.2666 (1.5127)  loss_objectness: 0.8648 (0.9220)  loss_rpn_box_reg: 0.0911 (0.1038)  time: 0.1732  data: 0.0001  max mem: 5953
Epoch: [0]  [150/350]  eta: 0:00:37  lr: 0.000002  loss: 2.6284 (3.0689)  loss_classifier: 0.2607 (0.4878)  loss_box_reg: 0.1329 (0.1244)  loss_mask: 1.2349 (1.4449) 

KeyboardInterrupt: 