In [1]:
import os, glob
import sys
import json
from PIL import Image
from collections import Counter

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tifffile as tiff
import matplotlib.pyplot as plt
from tqdm import tqdm

import cv2

from sklearn.model_selection import KFold

sys.path.append("detection-wheel")

In [2]:
EPOCHS = 1

In [3]:
import os
import numpy as np
import torch
from PIL import Image


class PennFudanDatasetVal(torch.utils.data.Dataset):
    def __init__(self, imgs, masks, transforms):
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = imgs#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
        self.masks = masks#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))

    def __getitem__(self, idx):
        # load images and masks
        img_path = self.imgs[idx]
        mask_path = self.masks[idx]
        img = Image.open(img_path).convert("RGB")
        
        
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path).convert('L')
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        #masks = (mask == obj_ids[:, None, None])
        #print((obj_ids[:, None, None]).shape)
        #masks = mask == obj_ids[:, None, None]
        masks = [np.where(mask== obj_ids[i, None, None],1,0) for i in range(len(obj_ids))]
        masks = np.array(masks)

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.nonzero(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])


        # check if there are d uplicate boxes
        for i, box_i in enumerate(boxes):
             for j, box_j in enumerate(boxes):
                if i != j and box_i == box_j:
                    print("Duplicate box")

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        try:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            #print(area,area.shape,area.dtype)
        except:
            area = torch.tensor([[0],[0]])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        #print(masks.shape)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)
        
        # print(img.shape, img.min(), img.max())

        return img, target

    def __len__(self):
        return len(self.imgs)

In [4]:
# import os
# import numpy as np
# import torch
# from PIL import Image


# class PennFudanDatasetVal(torch.utils.data.Dataset):
#     def __init__(self, imgs, masks, transforms):
#         self.transforms = transforms
#         # load all image files, sorting them to
#         # ensure that they are aligned
#         self.imgs = imgs#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
#         self.masks = masks#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))

#     def __getitem__(self, idx):
#         # load images and masks
#         img_path = self.imgs[idx]
#         mask_path = self.masks[idx]
#         img = Image.open(img_path).convert("RGB")
#         print(img.shape, img.min(), img.max())
        
#         # note that we haven't converted the mask to RGB,
#         # because each color corresponds to a different instance
#         # with 0 being background
#         mask = Image.open(mask_path).convert('L')
#         # convert the PIL Image into a numpy array
#         mask = np.array(mask)
        
        
        
#         # get masks by connected components
#         num_outputs, labels, stats, centroids = cv2.connectedComponentsWithStats((mask > 0.5).astype(np.uint8)*255, 8)
#         boxes = stats[:, [cv2.CC_STAT_LEFT, cv2.CC_STAT_TOP, cv2.CC_STAT_WIDTH, cv2.CC_STAT_HEIGHT]]
#         label_masks = [labels == i for i in range(num_outputs)]
        
#         # masks = [(mask * m) for m in label_masks]
#         masks = []
#         for m in label_masks:
#             mask_m = mask * m
#             if np.sum(mask_m) > 0:
#                 masks.append(mask_m)
        
                
#         masks = np.array(masks)
#         masks = (masks>1).astype(np.int32)


        
        
#         # get bounding box coordinates for each mask
#         num_objs = len(masks)
#         boxes = []
#         for i in range(num_objs):
            
            
            
#             pos = np.nonzero(masks[i])
#             xmin = np.min(pos[1])
#             xmax = np.max(pos[1])
#             ymin = np.min(pos[0])
#             ymax = np.max(pos[0])
#             boxes.append([xmin, ymin, xmax, ymax])

#         # convert everything into a torch.Tensor
#         boxes = torch.as_tensor(boxes, dtype=torch.float32)
#         # there is only one class
#         labels = torch.ones((num_objs,), dtype=torch.int64)
#         masks = torch.as_tensor(masks, dtype=torch.uint8)

#         image_id = torch.tensor([idx])
#         try:
#             area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
#             #print(area,area.shape,area.dtype)
#         except:
#             area = torch.tensor([[0],[0]])
#         # suppose all instances are not crowd
#         iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
#         #print(masks.shape)

#         target = {}
#         target["boxes"] = boxes
#         target["labels"] = labels
#         target["masks"] = masks
#         target["image_id"] = image_id
#         target["area"] = area
#         target["iscrowd"] = iscrowd

#         if self.transforms is not None:
#             img, target = self.transforms(img, target)

#         print(img.shape, img.min(), img.max())

#         return img, target

#     def __len__(self):
#         return len(self.imgs)

In [5]:
import torchvision
from torchvision.models import list_models
detection_models = list_models(module=torchvision.models.detection)
detection_models

['fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcos_resnet50_fpn',
 'keypointrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'retinanet_resnet50_fpn',
 'retinanet_resnet50_fpn_v2',
 'ssd300_vgg16',
 'ssdlite320_mobilenet_v3_large']

In [6]:
import torchvision
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.resnet import ResNet50_Weights

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="DEFAULT", weights_backbone=ResNet50_Weights.IMAGENET1K_V2)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [7]:
import transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(T.RandomIoUCrop())
        transforms.append(T.RandomZoomOut())
        transforms.append(T.RandomPhotometricDistort())
        transforms.append(T.ScaleJitter())
        transforms.append(T.RandomShortestSize())
        
    return T.Compose(transforms)

In [8]:
from engine import train_one_epoch, evaluate
import utils

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [10]:
n_imgs = len(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*'))
n_imgs


1622

In [11]:
kf = KFold(n_splits=5, shuffle=True, random_state=43)
for i, (train_index, test_index) in enumerate(kf.split(range(n_imgs))):
    if i!=0: continue
    all_imgs = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
    all_masks = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))
    
    
    # all_imgs = sorted(glob.glob('train/image/*.png'))
    # all_masks = sorted(glob.glob('train/mask/*.png'))
    
    
    all_imgs = np.array(all_imgs)
    all_masks = np.array(all_masks)
    train_img = all_imgs[train_index]
    train_mask = all_masks[train_index]
    
    
    # train_img = train_img
    
    val_img = all_imgs[test_index]
    val_mask = all_masks[test_index]
    
    dataset_val = PennFudanDatasetVal(val_img, val_mask, get_transform(train=False))
    val_dl = torch.utils.data.DataLoader(
        dataset_val, batch_size=1, shuffle=False, num_workers=os.cpu_count(), pin_memory=True,collate_fn=utils.collate_fn)
                                                        #os.cpu_count()
    
    
    model = get_model_instance_segmentation(num_classes=2)
    
    
    weights_path = "/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/vanilla-mask-rcnn-2-fold-0-all-augmentations/ckpts/fold_0_epoch164.pth"
    model.load_state_dict(torch.load(weights_path))
    
    model.to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.Adam(params, lr=2e-5)
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    # set linear warmup scheduler, with constant learning rate after warmup
    
    
    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 
                                                    base_lr=2e-5, 
                                                    max_lr=3e-3, 
                                                    step_size_up=40, 
                                                    step_size_down=40, 
                                                    mode='triangular2', 
                                                    cycle_momentum=False)

    # scheduler = torch.optim.lr_scheduler.LinearLR(optimizer,start_factor=0.01,
    #                                         end_factor=1,
    #                                         total_iters=10)

    
    for epoch in range(EPOCHS):
        # train_one_epoch(model, optimizer, train_dl, device, epoch, print_freq=50)
        evaluate(model, val_dl, device=device)
        # scheduler.step()
        # model_path = f'ckpts/fold_{i}_epoch{epoch}.pth' 
        # torch.save(model.state_dict(), model_path)
        

creating index...
index created!
Test:  [  0/325]  eta: 0:08:01  model_time: 0.9561 (0.9561)  evaluator_time: 0.0280 (0.0280)  time: 1.4822  data: 0.4977  max mem: 514
Test:  [100/325]  eta: 0:00:14  model_time: 0.0232 (0.0408)  evaluator_time: 0.0101 (0.0165)  time: 0.0387  data: 0.0001  max mem: 522
Test:  [200/325]  eta: 0:00:06  model_time: 0.0235 (0.0357)  evaluator_time: 0.0121 (0.0164)  time: 0.0440  data: 0.0001  max mem: 522
Test:  [300/325]  eta: 0:00:01  model_time: 0.0218 (0.0339)  evaluator_time: 0.0110 (0.0165)  time: 0.0420  data: 0.0001  max mem: 522
Test:  [324/325]  eta: 0:00:00  model_time: 0.0226 (0.0334)  evaluator_time: 0.0120 (0.0164)  time: 0.0396  data: 0.0001  max mem: 522
Test: Total time: 0:00:16 (0.0522 s / it)
Averaged stats: model_time: 0.0226 (0.0334)  evaluator_time: 0.0120 (0.0164)
Accumulating evaluation results...
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   al