In [1]:
import os, glob
import sys
import json
from PIL import Image
from collections import Counter

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tifffile as tiff
import matplotlib.pyplot as plt
from tqdm import tqdm

import cv2

from sklearn.model_selection import KFold

sys.path.append("detection-wheel")

In [2]:
EPOCHS = 5

In [3]:
import os
import numpy as np
import torch
from PIL import Image


class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, imgs, masks, transforms):
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = imgs#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
        self.masks = masks#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))

    def __getitem__(self, idx):
        # load images and masks
        img_path = self.imgs[idx]
        mask_path = self.masks[idx]
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path).convert('L')
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        #masks = (mask == obj_ids[:, None, None])
        #print((obj_ids[:, None, None]).shape)
        #masks = mask == obj_ids[:, None, None]
        masks = [np.where(mask== obj_ids[i, None, None],1,0) for i in range(len(obj_ids))]
        masks = np.array(masks)

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.nonzero(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        try:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            #print(area,area.shape,area.dtype)
        except:
            area = torch.tensor([[0],[0]])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        #print(masks.shape)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [4]:
import torchvision
from torchvision.models import list_models
detection_models = list_models(module=torchvision.models.detection)
detection_models

['fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcos_resnet50_fpn',
 'keypointrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'retinanet_resnet50_fpn',
 'retinanet_resnet50_fpn_v2',
 'ssd300_vgg16',
 'ssdlite320_mobilenet_v3_large']

In [5]:
import torchvision
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.resnet import ResNet50_Weights

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="DEFAULT", weights_backbone=ResNet50_Weights.IMAGENET1K_V2)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    
    # model.rpn.anchor_generator.sizes = ((8, 16, 32, 64, 128,256),)

    return model

In [6]:
# model = get_model_instance_segmentation(2)
# model.rpn.anchor_generator.__dict__

In [7]:
import transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    # if train:
    #     transforms.append(T.RandomHorizontalFlip(0.5))
    #     transforms.append(T.RandomIoUCrop())
    #     transforms.append(T.RandomZoomOut())
    #     transforms.append(T.RandomPhotometricDistort())
    #     transforms.append(T.ScaleJitter())
    #     transforms.append(T.RandomShortestSize())
    return T.Compose(transforms)

In [8]:
from engine_2 import train_one_epoch, evaluate
import utils

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [10]:

model = get_model_instance_segmentation(num_classes=2)
model.to(device)

# SWA: average all the weights

epochs_to_take = np.arange(15, 40)

weights_path = f"ckpts/fold_0_epoch15.pth"
state_dict = torch.load(weights_path)
    
for take_epoch in epochs_to_take:

    weights_path = f"ckpts/fold_0_epoch{take_epoch}.pth"
    state_dict_epoch = torch.load(weights_path)
    
    for key in state_dict_epoch.keys():
        state_dict[key] += state_dict_epoch[key]

for key in state_dict.keys():
    state_dict[key] = state_dict[key] / len(epochs_to_take)
    
model.load_state_dict(state_dict)

<All keys matched successfully>

In [11]:
import numpy as np
epochs_to_take = np.arange(15, 40)
epochs_to_take

array([15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
       32, 33, 34, 35, 36, 37, 38, 39])

In [12]:
# import sys

# # Create a custom function to log output
# def log_output(text):
#     with open('output.log', 'a') as f:
#         f.write(text)

# # Redirect stdout to the custom log function
# sys.stdout.write = log_output


In [14]:
n_imgs = len(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*'))
n_imgs



1622

In [15]:
kf = KFold(n_splits=5, shuffle=True, random_state=43)
for i, (train_index, test_index) in enumerate(kf.split(range(n_imgs))):
    if i!=0: continue
    all_imgs = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
    all_masks = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))
    all_imgs = np.array(all_imgs)
    all_masks = np.array(all_masks)
    train_img = all_imgs[train_index]
    train_mask = all_masks[train_index]
    val_img = all_imgs[test_index]
    val_mask = all_masks[test_index]
    dataset_train = PennFudanDataset(train_img, train_mask, get_transform(train=True))
    dataset_val = PennFudanDataset(val_img, val_mask, get_transform(train=False))
    train_dl = torch.utils.data.DataLoader(
        dataset_train, batch_size=4, shuffle=True, num_workers=os.cpu_count(), pin_memory=True, drop_last=True, collate_fn=utils.collate_fn)
    val_dl = torch.utils.data.DataLoader(
        dataset_val, batch_size=1, shuffle=False, num_workers=os.cpu_count(), pin_memory=True,collate_fn=utils.collate_fn)
    
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.002)
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    # set linear warmup scheduler, with constant learning rate after warmup
    # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.0001,
    #                                             steps_per_epoch=10, epochs=EPOCHS//10,
    #                                             pct_start=0.01)
    
    
    scheduler = torch.optim.lr_scheduler.LinearLR(optimizer,start_factor=1,
                                            end_factor=1,
                                            total_iters=30)
    
    for epoch in range(EPOCHS):
        train_one_epoch(model, optimizer, train_dl, device, epoch, print_freq=50)
        evaluate(model, val_dl, device=device)
        scheduler.step()
        model_path = f'ckpts-swa/fold_{i}_epoch{epoch}.pth'
        torch.save(model.state_dict(), model_path)
        

Epoch: [0]  [  0/324]  eta: 0:17:24  lr: 0.002000  loss: 0.7763 (0.7763)  loss_classifier: 0.1574 (0.1574)  loss_box_reg: 0.2935 (0.2935)  loss_mask: 0.2410 (0.2410)  loss_objectness: 0.0440 (0.0440)  loss_rpn_box_reg: 0.0405 (0.0405)  time: 3.2242  data: 1.2823  max mem: 6548
Epoch: [0]  [ 50/324]  eta: 0:00:37  lr: 0.002000  loss: 0.7048 (0.7165)  loss_classifier: 0.1434 (0.1507)  loss_box_reg: 0.2576 (0.2649)  loss_mask: 0.2388 (0.2433)  loss_objectness: 0.0290 (0.0297)  loss_rpn_box_reg: 0.0235 (0.0278)  time: 0.0733  data: 0.0001  max mem: 12508
Epoch: [0]  [100/324]  eta: 0:00:23  lr: 0.002000  loss: 0.6642 (0.7218)  loss_classifier: 0.1382 (0.1538)  loss_box_reg: 0.2624 (0.2637)  loss_mask: 0.2305 (0.2453)  loss_objectness: 0.0270 (0.0295)  loss_rpn_box_reg: 0.0204 (0.0294)  time: 0.0731  data: 0.0001  max mem: 12508
Epoch: [0]  [150/324]  eta: 0:00:16  lr: 0.002000  loss: 0.6950 (0.7238)  loss_classifier: 0.1538 (0.1549)  loss_box_reg: 0.2652 (0.2654)  loss_mask: 0.2325 (0.2442



Epoch: [1]  [  0/324]  eta: 0:07:02  lr: 0.002000  loss: 0.9134 (0.9134)  loss_classifier: 0.2239 (0.2239)  loss_box_reg: 0.3500 (0.3500)  loss_mask: 0.2614 (0.2614)  loss_objectness: 0.0499 (0.0499)  loss_rpn_box_reg: 0.0283 (0.0283)  time: 1.3035  data: 1.2200  max mem: 12592
Epoch: [1]  [ 50/324]  eta: 0:00:26  lr: 0.002000  loss: 0.7097 (0.7407)  loss_classifier: 0.1426 (0.1580)  loss_box_reg: 0.2404 (0.2809)  loss_mask: 0.2308 (0.2438)  loss_objectness: 0.0300 (0.0293)  loss_rpn_box_reg: 0.0209 (0.0288)  time: 0.0735  data: 0.0001  max mem: 12592
Epoch: [1]  [100/324]  eta: 0:00:19  lr: 0.002000  loss: 0.6985 (0.7318)  loss_classifier: 0.1451 (0.1584)  loss_box_reg: 0.2695 (0.2751)  loss_mask: 0.2303 (0.2407)  loss_objectness: 0.0255 (0.0297)  loss_rpn_box_reg: 0.0225 (0.0278)  time: 0.0733  data: 0.0001  max mem: 12622
Epoch: [1]  [150/324]  eta: 0:00:14  lr: 0.002000  loss: 0.7384 (0.7243)  loss_classifier: 0.1514 (0.1558)  loss_box_reg: 0.2871 (0.2710)  loss_mask: 0.2334 (0.240

In [None]:
all_indices = np.arange(n_imgs)
# take random 1400 images for training
train_index = np.random.choice(all_indices, size=1400, replace=False)
# take the rest for validation
test_index = np.setdiff1d(all_indices, train_index)



In [None]:
# check if train_index and test_index are mutually exclusive
len(np.intersect1d(train_index, test_index))