In [1]:
import os, glob
import sys
import json
from PIL import Image
from collections import Counter

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tifffile as tiff
import matplotlib.pyplot as plt
from tqdm import tqdm

import cv2

from sklearn.model_selection import KFold

sys.path.append("detection-wheel")

In [2]:
EPOCHS = 5

In [3]:
import os
import numpy as np
import torch
from PIL import Image


class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, imgs, masks, transforms):
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = imgs#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
        self.masks = masks#sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))

    def __getitem__(self, idx):
        # load images and masks
        img_path = self.imgs[idx]
        mask_path = self.masks[idx]
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path).convert('L')
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        #masks = (mask == obj_ids[:, None, None])
        #print((obj_ids[:, None, None]).shape)
        #masks = mask == obj_ids[:, None, None]
        masks = [np.where(mask== obj_ids[i, None, None],1,0) for i in range(len(obj_ids))]
        masks = np.array(masks)

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.nonzero(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        try:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            #print(area,area.shape,area.dtype)
        except:
            area = torch.tensor([[0],[0]])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        #print(masks.shape)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [4]:
import torchvision
from torchvision.models import list_models
detection_models = list_models(module=torchvision.models.detection)
detection_models

['fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcos_resnet50_fpn',
 'keypointrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'retinanet_resnet50_fpn',
 'retinanet_resnet50_fpn_v2',
 'ssd300_vgg16',
 'ssdlite320_mobilenet_v3_large']

In [5]:
import torchvision
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.resnet import ResNet50_Weights

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="DEFAULT", weights_backbone=ResNet50_Weights.IMAGENET1K_V2)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    
    # model.rpn.anchor_generator.sizes = ((8, 16, 32, 64, 128,256),)

    return model

In [6]:
# model = get_model_instance_segmentation(2)
# model.rpn.anchor_generator.__dict__

In [7]:
import transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    # if train:
    #     transforms.append(T.RandomHorizontalFlip(0.5))
    #     transforms.append(T.RandomIoUCrop())
    #     transforms.append(T.RandomZoomOut())
    #     transforms.append(T.RandomPhotometricDistort())
    #     transforms.append(T.ScaleJitter())
    #     transforms.append(T.RandomShortestSize())
    return T.Compose(transforms)

In [8]:
from engine_2 import train_one_epoch, evaluate
import utils

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [10]:

model = get_model_instance_segmentation(num_classes=2)
model.to(device)

# SWA: average all the weights

epochs_to_take = np.arange(21, 40)

weights_path = f"ckpts/fold_0_epoch20.pth"
state_dict = torch.load(weights_path)
    
for take_epoch in epochs_to_take:

    weights_path = f"ckpts/fold_0_epoch{take_epoch}.pth"
    state_dict_epoch = torch.load(weights_path)
    
    for key in state_dict_epoch.keys():
        state_dict[key] += state_dict_epoch[key]

for key in state_dict.keys():
    state_dict[key] = state_dict[key] / (len(epochs_to_take) + 1)
    
model.load_state_dict(state_dict)

<All keys matched successfully>

In [11]:
import numpy as np
epochs_to_take = np.arange(15, 40)
epochs_to_take

array([15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
       32, 33, 34, 35, 36, 37, 38, 39])

In [12]:
# import sys

# # Create a custom function to log output
# def log_output(text):
#     with open('output.log', 'a') as f:
#         f.write(text)

# # Redirect stdout to the custom log function
# sys.stdout.write = log_output


In [13]:
n_imgs = len(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*'))
n_imgs



1622

In [14]:
kf = KFold(n_splits=5, shuffle=True, random_state=43)
for i, (train_index, test_index) in enumerate(kf.split(range(n_imgs))):
    if i!=0: continue
    all_imgs = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/image/*.png'))
    all_masks = sorted(glob.glob('/home/viktor/Documents/kaggle/hubmap-2023/experiments/mask-rcnn/new-dataset/train/mask/*.png'))
    all_imgs = np.array(all_imgs)
    all_masks = np.array(all_masks)
    train_img = all_imgs[train_index]
    train_mask = all_masks[train_index]
    val_img = all_imgs[test_index]
    val_mask = all_masks[test_index]
    dataset_train = PennFudanDataset(train_img, train_mask, get_transform(train=True))
    dataset_val = PennFudanDataset(val_img, val_mask, get_transform(train=False))
    train_dl = torch.utils.data.DataLoader(
        dataset_train, batch_size=4, shuffle=True, num_workers=os.cpu_count(), pin_memory=True, drop_last=True, collate_fn=utils.collate_fn)
    val_dl = torch.utils.data.DataLoader(
        dataset_val, batch_size=1, shuffle=False, num_workers=os.cpu_count(), pin_memory=True,collate_fn=utils.collate_fn)
    
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.002)
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    # set linear warmup scheduler, with constant learning rate after warmup
    # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.0001,
    #                                             steps_per_epoch=10, epochs=EPOCHS//10,
    #                                             pct_start=0.01)
    
    
    scheduler = torch.optim.lr_scheduler.LinearLR(optimizer,start_factor=1,
                                            end_factor=1,
                                            total_iters=30)
    
    for epoch in range(EPOCHS):
        train_one_epoch(model, optimizer, train_dl, device, epoch, print_freq=50)
        evaluate(model, val_dl, device=device)
        scheduler.step()
        model_path = f'ckpts-swa/fold_{i}_epoch{epoch}.pth'
        torch.save(model.state_dict(), model_path)
        

Epoch: [0]  [  0/324]  eta: 0:13:11  lr: 0.002000  loss: 0.6395 (0.6395)  loss_classifier: 0.1402 (0.1402)  loss_box_reg: 0.2301 (0.2301)  loss_mask: 0.2197 (0.2197)  loss_objectness: 0.0370 (0.0370)  loss_rpn_box_reg: 0.0125 (0.0125)  time: 2.4415  data: 1.2994  max mem: 6378
Epoch: [0]  [ 50/324]  eta: 0:00:32  lr: 0.002000  loss: 0.6867 (0.6978)  loss_classifier: 0.1513 (0.1553)  loss_box_reg: 0.2290 (0.2441)  loss_mask: 0.2400 (0.2391)  loss_objectness: 0.0310 (0.0337)  loss_rpn_box_reg: 0.0190 (0.0256)  time: 0.0733  data: 0.0001  max mem: 12474
Epoch: [0]  [100/324]  eta: 0:00:21  lr: 0.002000  loss: 0.7075 (0.7100)  loss_classifier: 0.1527 (0.1586)  loss_box_reg: 0.2551 (0.2532)  loss_mask: 0.2334 (0.2391)  loss_objectness: 0.0359 (0.0336)  loss_rpn_box_reg: 0.0191 (0.0255)  time: 0.0730  data: 0.0002  max mem: 12474
Epoch: [0]  [150/324]  eta: 0:00:15  lr: 0.002000  loss: 0.6737 (0.7117)  loss_classifier: 0.1531 (0.1593)  loss_box_reg: 0.2461 (0.2536)  loss_mask: 0.2298 (0.2391



Epoch: [1]  [  0/324]  eta: 0:07:55  lr: 0.002000  loss: 0.7273 (0.7273)  loss_classifier: 0.1774 (0.1774)  loss_box_reg: 0.2565 (0.2565)  loss_mask: 0.2481 (0.2481)  loss_objectness: 0.0185 (0.0185)  loss_rpn_box_reg: 0.0268 (0.0268)  time: 1.4672  data: 1.3856  max mem: 12602
Epoch: [1]  [ 50/324]  eta: 0:00:27  lr: 0.002000  loss: 0.6980 (0.7081)  loss_classifier: 0.1500 (0.1548)  loss_box_reg: 0.2533 (0.2533)  loss_mask: 0.2431 (0.2423)  loss_objectness: 0.0343 (0.0332)  loss_rpn_box_reg: 0.0206 (0.0246)  time: 0.0732  data: 0.0001  max mem: 12602
Epoch: [1]  [100/324]  eta: 0:00:19  lr: 0.002000  loss: 0.6729 (0.7075)  loss_classifier: 0.1479 (0.1561)  loss_box_reg: 0.2370 (0.2510)  loss_mask: 0.2336 (0.2426)  loss_objectness: 0.0322 (0.0334)  loss_rpn_box_reg: 0.0179 (0.0243)  time: 0.0731  data: 0.0004  max mem: 12602
Epoch: [1]  [150/324]  eta: 0:00:14  lr: 0.002000  loss: 0.6734 (0.7089)  loss_classifier: 0.1561 (0.1570)  loss_box_reg: 0.2358 (0.2509)  loss_mask: 0.2393 (0.242

KeyboardInterrupt: 

In [None]:
all_indices = np.arange(n_imgs)
# take random 1400 images for training
train_index = np.random.choice(all_indices, size=1400, replace=False)
# take the rest for validation
test_index = np.setdiff1d(all_indices, train_index)



In [None]:
# check if train_index and test_index are mutually exclusive
len(np.intersect1d(train_index, test_index))