In [19]:
from pathlib import Path
import random, cv2, os
import matplotlib.pyplot as plt

In [20]:
import numpy as np
from tqdm.auto import tqdm
import pandas as pd
import cv2
import matplotlib.pyplot as plt

import base64
import typing as t
import zlib
import json
import torch

In [21]:
class CFG:
    
    MODEL_PATH = "/home/viktor/Documents/kaggle/hubmap-2023/experiments/mvp-segformer-norm2-bce-loss/ckpts/segformer_epoch_83.pt"
    THRESHOLD = 0.4
    IMG_DIR = "/home/viktor/Documents/kaggle/hubmap-2023/kaggle-data/test"
    N_TRAIN = 1400 # Take first N_TRAIN images for training, rest for validation
    
    min_mask_area = 0 # minimum number of pixels of mask
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [22]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import json
from PIL import Image
from skimage.draw import polygon
from albumentations import Compose, Resize, HorizontalFlip, VerticalFlip, BboxParams
from torch.utils.data import Dataset, DataLoader


class HubmapDataset(Dataset):
        
    def __init__(self, image_dir):
        
        self.image_dir = image_dir
        self.image_files = os.listdir(self.image_dir)
        
        self.aug_list = [
                A.Resize(512, 512),
                A.Normalize(
                    mean= [0, 0, 0],
                    std= [1, 1, 1],
                    max_pixel_value = 255
                ),
                ToTensorV2(transpose_mask=True),
            ]
        
        # Create the augmentation pipeline
        self.augmentations = A.Compose(self.aug_list)

    def __len__(self):
        return len(self.image_files)
        
    def __getitem__(self, idx):
        
        image_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(image_path)
        
        
        # # Get the mask
        # mask = np.zeros((512, 512), dtype=np.float32)
        
        # for annot in self.json_labels[idx]['annotations']:
        #     cords = annot['coordinates']
        #     if annot['type'] == "blood_vessel":
        #         for cord in cords:
        #             rr, cc = polygon(np.array([i[1] for i in cord]), np.asarray([i[0] for i in cord]))
        #             mask[rr, cc] = 1
                    
        image = np.array(image)

        # image = torch.tensor(np.array
        augmented = self.augmentations(image=image)
        image = augmented["image"]
        
        mean = torch.mean(image, dim=[1,2])
        std = torch.std(image, dim=[1,2])
        
        image = (image - mean[:, None, None]) / std[:, None, None]
        
        label_idx = self.image_files[idx]
        
        return image, label_idx


In [23]:
val_dataset = HubmapDataset(image_dir=CFG.IMG_DIR)

Some weights of the model checkpoint at nvidia/mit-b5 were not used when initializing SegformerForSemanticSegmentation: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b5 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.linear_c.1.proj.weight', 'decode_head.batch_norm.running_mean', 'decode

In [25]:
def seg_to_det(
    seg: np.ndarray, 
):
    num_outputs, labels, stats, centroids = cv2.connectedComponentsWithStats(seg)
    boxes = stats[:, [cv2.CC_STAT_LEFT, cv2.CC_STAT_TOP, cv2.CC_STAT_WIDTH, cv2.CC_STAT_HEIGHT]]
    label_masks = [labels == i for i in range(num_outputs)]
    dets = {
        "boxes": np.stack([
            boxes[:, 0],
            boxes[:, 1],
            boxes[:, 0] + boxes[:, 2],
            boxes[:, 1] + boxes[:, 3],
        ], axis=1),
        "masks": [seg * m for m in label_masks],
    }
    dets["scores"] = [np.max(seg[m]) for m in label_masks]
    
    # remove dets element where 'boxes' = [0, 0, 512, 512]
    boxes_to_remove = [0, 0, 512, 512]
    indices_to_remove = np.where(np.all(dets["boxes"] == boxes_to_remove, axis=1))
    
    dets["boxes"] = np.delete(dets["boxes"], indices_to_remove, axis=0)
    dets["masks"] = [i for j, i in enumerate(dets["masks"]) if j not in indices_to_remove]
    dets["scores"] = np.delete(dets["scores"], indices_to_remove)
    
    
    # remove dets where np.sum(mask) < CFG.min_mask_area
    indices_to_remove = []
    for i, mask in enumerate(dets["masks"]):
        if np.sum(mask) < CFG.min_mask_area:
            indices_to_remove.append(i)
            
    dets["boxes"] = np.delete(dets["boxes"], indices_to_remove, axis=0)
    dets["masks"] = [i for j, i in enumerate(dets["masks"]) if j not in indices_to_remove]
    dets["scores"] = np.delete(dets["scores"], indices_to_remove)
    
    return dets

def predict(image):
    with torch.no_grad():
        pred = model(image)
        pred = torch.sigmoid(pred)
        pred = pred.squeeze().cpu().numpy()
    
    
    pred = (pred > CFG.THRESHOLD).astype(np.uint8)*255
    dets = seg_to_det(pred)
    pred_masks, pred_classes, scores, boxes = dets["masks"], [0]*len(dets["masks"]), dets["scores"], dets["boxes"]
    
    # convert pred_masks to uint8_t
    pred_masks = [m.astype(np.uint8) for m in pred_masks]
    
    return pred_masks, pred_classes, scores, boxes



### Our custom code

In [31]:
import base64
import numpy as np
from pycocotools import _mask as coco_mask
import typing as t
import zlib


def encode_binary_mask(mask: np.ndarray) -> t.Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  # if mask.dtype != np.bool:
  #   raise ValueError(
  #       "encode_binary_mask expects a binary mask, received dtype == %s" %
  #       mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str


In [33]:

pbar = enumerate(val_dataset)
pbar = tqdm(pbar, total=len(val_dataset))

sub_dict = {}

# empty dataframe with columns id,height,width,prediction_string
df = pd.DataFrame(columns=['id', 'height', 'width', 'prediction_string'])

for i, (images, label_idx) in pbar:
    
    
    width, height = images.shape[2], images.shape[1]
    
    sub_dict['height'] = height
    sub_dict['width'] = width
    sub_dict['id'] = label_idx.replace(".tif", "")
    images = images.unsqueeze(0).to(CFG.device)    
    
    # make prediction
    # pred_masks, pred_classes, scores = predict(predictor, img)
    pred_masks, pred_classes, scores_pred, bboxes_pred = predict(images)
    
    
    min_area_dilation = 1e4
    # dilate all pred masks below min_area_dilation
    # for i, m in enumerate(pred_masks):
    #     if m.sum() < min_area_dilation:
    #         pred_masks[i] = binary_dilation(m/255).astype(np.uint8)*255
        
    
    min_area = 0e4
    max_area = 5e4
    min_height = 5
    min_width = 5
    # # remove pred_masks with area < 200
    remove_inds = []
    for i, m in enumerate(pred_masks):
        if (m.sum() < max_area and m.sum() > min_area) or (bboxes_pred[i,3] - bboxes_pred[i,1] < min_height or bboxes_pred[i,2] - bboxes_pred[i,0] < min_width):
            remove_inds.append(i)
    pred_masks = [m for i, m in enumerate(pred_masks) if i not in remove_inds]
    pred_classes = [c for i, c in enumerate(pred_classes) if i not in remove_inds]
    scores_pred = [s for i, s in enumerate(scores_pred) if i not in remove_inds]
    bboxes_pred = bboxes_pred[~np.isin(np.arange(len(bboxes_pred)), remove_inds)]
    
    
    prediction_string = ""
    for i, m in enumerate(pred_masks):
        # convert m to bool
        
        # rle encode m using encode_binary_mask
        pred_str = encode_binary_mask(m)
        
        prediction_string += "0 1.0 " + pred_str.decode("utf-8") + " "
    
    
    sub_dict['prediction_string'] = prediction_string
    
    print(sub_dict)
    
    
    df_sub = pd.DataFrame(sub_dict, index=[0])
    
    # concat to main dataframe
    df = pd.concat([df, df_sub], axis=0).reset_index(drop=True)
    

  0%|          | 0/1 [00:00<?, ?it/s]

{'height': 512, 'width': 512, 'id': '72e40acccadf', 'prediction_string': '0 1.0 eNoLSDA1z7Q39AdBAwMQhgAIC5lEl4XowAf8DdExCKLQgQnZZgBQhRym 0 1.0 eNpLSsoys0izN/Ux9jH2M/Y18jf0MzQAY38DMDYAU/5QQT8jENvfCKQUiE09Y2LTDABKXxDg 0 1.0 eNrLSg0yMsm2N/I3MDAEYT9DAwjLwB/EMoAygBgs5Q9U6G/oH5GVYAoAmMMN2Q== 0 1.0 eNplj8sKAjEMRX/ppq0igh3K4MpOgzIOFFy5qo/q/+9MOxVR00VKOCfhnmM2KnfGq0CsGEyDHnTk/uK658bf3XJvw80tRsvJqclmQBgwQAKL0Cd34GgfyO4aJnuktQCyrADyCs2g4qA6mL8McYnbCJ/+VbP1N1U/bDnwhtvRGoVbJFANR0GFElN7s1ttx3SiF3lqPIU= 0 1.0 eNqLywswMc6yN/Mx9jXyNwRCIz9DA0M/QzDbwN/I3wDIM/Yz9DOC8PwNDUDYH6TI2NfY2y7aPy0x3hgAjRwSHg== 0 1.0 eNoLzE8yM8uwN/Ux9jXyNfYz9jPyM/Q3NDAw8DcwBBFAYAhiA1kQDCUNDP0MMAFMiaG/uSfYIBD0MfE18jPxNfaNCUg2AADK1hom 0 1.0 eNoLzc8xsU+wN/Uy9jP1NvExNIADQ3/sbISIoT8IwtgIjMxD5yPrNPQ38osMCDECAKUWHYM= '}


In [34]:
df

Unnamed: 0,id,height,width,prediction_string
0,72e40acccadf,512,512,0 1.0 eNoLSDA1z7Q39AdBAwMQhgAIC5lEl4XowAf8DdEx...


In [36]:
df.to_csv("submission.csv", index=False)