In [1]:
# imports
!pip install segmentation_models_pytorch

import os
from tqdm.notebook import tqdm
import gc
from torch.nn import Parameter
import torch.nn.functional as F
import torch.nn as nn
import math
import timm
import pandas as pl
import torch
import numpy as np
from torch.amp import GradScaler
import cv2
import random
from tqdm.notebook import tqdm
from torch.autograd import Variable
from skimage.metrics import structural_similarity as ssim
import pandas as pd
import segmentation_models_pytorch as smp
import matplotlib.pyplot as plt

Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl.metadata (32 kB)
Collecting efficientnet-pytorch>=0.6.1 (from segmentation_models_pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pretrainedmodels>=0.7.1 (from segmentation_models_pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting munch (from pretrainedmodels>=0.7.1->segmentation_models_pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl (121 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.3/121.3 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading munch-4.0.0-py2.py3-none-any.w

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(56)

In [3]:
train_msk = np.load('/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/msk_array.npy')
train_images = sorted(os.listdir('/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/train'))
test_images = sorted(os.listdir('/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/test'))
test_msk = np.zeros((len(test_images), train_msk.shape[1], train_msk.shape[2]))

train_images = [f'/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/train/{path}' for path in train_images]
test_images = [f'/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/data/test/{path}' for path in test_images]
len(train_images)

545

In [4]:
import cv2
import numpy as np
import torch
import albumentations as A

class Dataset(torch.utils.data.Dataset):
    def __init__(self, path_image, msks, transform=None):
        self.path_image = path_image
        self.msks = msks
        self.transform = transform
        self.shift_value = 25
        # Resize input image
        self.image_size = 512

    def resize(self, img, interp):
        return cv2.resize(img, (self.image_size, self.image_size), interpolation=interp)

    def __len__(self):
        return len(self.path_image)

    def shift_mask(self, mask):
        h, w = mask.shape[:2]
        shifted_mask = np.zeros_like(mask)
        # Shift the mask down and to the right by self.shift_value pixels
        shifted_mask[:-self.shift_value, :-self.shift_value] = mask[self.shift_value:, self.shift_value:]
        return shifted_mask
        
    def __getitem__(self, i):
        # Read image and get corresponding mask
        img = cv2.imread(self.path_image[i])
        msk = self.msks[i]
        msk = self.shift_mask(msk)
        
        # Ensure the mask has a channel dimension
        if len(msk.shape) == 2:
            msk = msk[:, :, None]

        # (Optional) Resize images if needed:
        # img = self.resize(img, cv2.INTER_LINEAR)
        # msk = self.resize(msk, cv2.INTER_NEAREST)
        
        # Apply albumentations transform if provided
        if self.transform is not None:
            augmented = self.transform(image=img, mask=msk)
            img, msk = augmented["image"], augmented["mask"]
        
        # Normalize image: scale pixel values to [-0.5, 0.5]
        img = (img / 255.0) - 0.5
        # Change image shape from (H, W, C) to (C, H, W) for PyTorch
        img = np.transpose(img, (2, 0, 1)).astype(np.float32)
        img = torch.from_numpy(img)
        msk = torch.from_numpy(msk)
        return img, msk

transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5)
])

  check_for_updates()


In [5]:
batch_size = 2
valid_batch_size = 2
epochs = 9
lr = 3.22e-4
clip_grad_norm = 15.28
DEVICE = 'cuda'
params_train = {'batch_size': batch_size, 'shuffle': True, 'drop_last': True, 'num_workers': 2}
params_val = {'batch_size': batch_size, 'shuffle': False, 'drop_last': False, 'num_workers': 2}
val_samples = 5
train_loader = torch.utils.data.DataLoader(Dataset(train_images[:-val_samples], train_msk[:-val_samples], transform=transform), **params_train)
val_loader = torch.utils.data.DataLoader(Dataset(train_images[-val_samples:], train_msk[-val_samples:], transform=None), **params_val)

In [6]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.unet = smp.Unet('efficientnet-b2',
                             encoder_weights='imagenet',
                             classes=1,
                             decoder_channels=[256, 128, 64, 32, 16],
        )
    def forward(self, x):
        y = self.unet(x)
        return y

In [7]:
def evaluate(model, val_loader):
    model.eval()
    total_loss = 0.0
    total_samples = 0

    with torch.no_grad():
        for img, target in val_loader:
            img = img.to(DEVICE)
            target = target.to(DEVICE)
            with torch.amp.autocast('cuda'):
                outputs = model(img)
                loss = loss_func(outputs, target)
            batch_size = img.size(0)
            total_loss += loss.item() * batch_size
            total_samples += batch_size

    val_loss = total_loss / total_samples
    val_dice = 1 - val_loss
    return val_dice, val_loss



gc.collect()
torch.cuda.empty_cache()

model = Model().cuda()
num_train_steps = int(len(train_loader) / batch_size  * epochs)
loss_func= smp.losses.DiceLoss(mode="binary", smooth=1.)

scaler = GradScaler('cuda')
optimizer = torch.optim.AdamW(model.parameters(), lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs, 1e-6)
for epoch in range(epochs):
    model.train()
    average_loss = 0
    tk0 = tqdm(enumerate(train_loader), total = len(train_loader))
    for batch_number,  (img, target)  in tk0:
        optimizer.zero_grad()
        img = img.to(DEVICE)
        target = target.to(DEVICE)
        # continue
        with torch.amp.autocast('cuda'):
            outputs = model(img)
            loss = loss_func(outputs, target)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_norm)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        average_loss += loss.cpu().detach().numpy()
        tk0.set_postfix(loss=average_loss / (batch_number + 1),lr = scheduler.get_last_lr()[0], stage="train", epoch = epoch)

    model.train(False)

    val_dice, val_loss = evaluate(model, val_loader)
    print(f"Val_dice: {val_dice}, val_dice_loss: {val_loss}")

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b2-8bb594d6.pth
100%|██████████| 35.1M/35.1M [00:00<00:00, 67.5MB/s]


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.4408156633377075, val_dice_loss: 0.5591843366622925


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.5579724550247193, val_dice_loss: 0.44202754497528074


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.5437312364578247, val_dice_loss: 0.45626876354217527


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.5724545955657959, val_dice_loss: 0.4275454044342041


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.47660033702850346, val_dice_loss: 0.5233996629714965


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.5512706160545349, val_dice_loss: 0.4487293839454651


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.5541805386543274, val_dice_loss: 0.44581946134567263


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.5551225423812867, val_dice_loss: 0.4448774576187134


  0%|          | 0/270 [00:00<?, ?it/s]

Val_dice: 0.5490256667137146, val_dice_loss: 0.4509743332862854


In [8]:
params_val = {'batch_size': batch_size, 'shuffle': False, 'drop_last': False, 'num_workers': 2}
test_loader = torch.utils.data.DataLoader(Dataset(test_images, test_msk), **params_val)

In [9]:
preds = []
imgs_list = []
target_list = []
model.eval()
average_loss = 0
with torch.no_grad():
    for batch_number,  (img, target)  in enumerate(test_loader):
        img = img.to(DEVICE)
        target = target.to(DEVICE)

        with torch.amp.autocast('cuda'):
            outputs = model(img)

        preds += [outputs.sigmoid().to('cpu').numpy()]

preds = np.concatenate(preds)[:, 0, ...]

In [10]:
preds = (preds > 0.5).astype(np.uint8)

In [11]:
def rle_encode(x, fg_val=1):
    """
    Args:
        x:  numpy array of shape (height, width), 1 - mask, 0 - background
    Returns: run length encoding as list
    """

    dots = np.where(
        x.T.flatten() == fg_val)[0]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths


def list_to_string(x):
    """
    Converts list to a string representation
    Empty list returns '-'
    """
    if x: # non-empty list
        s = str(x).replace("[", "").replace("]", "").replace(",", "")
    else:
        s = '-'
    return s

In [12]:
true_list = [list_to_string(rle_encode(ans)) for ans in preds]

predict_df = pd.DataFrame()
predict_df['Id'] = [f'{x:03d}.jpg' for x in range(150)]
predict_df['Target'] = true_list
predict_df.to_csv('submission.csv', index = None)