In [1]:
!pip install segmentation_models_pytorch

import os
from tqdm.notebook import tqdm
import gc
from torch.nn import Parameter
import torch.nn.functional as F
import torch.nn as nn
import math
import timm
import pandas as pl
import torch
import numpy as np
from torch.amp import GradScaler
import cv2
import random
from tqdm.notebook import tqdm
from torch.autograd import Variable
from skimage.metrics import structural_similarity as ssim
import pandas as pd
import segmentation_models_pytorch as smp

Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl.metadata (32 kB)
Collecting efficientnet-pytorch>=0.6.1 (from segmentation_models_pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pretrainedmodels>=0.7.1 (from segmentation_models_pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting munch (from pretrainedmodels>=0.7.1->segmentation_models_pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl (121 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.3/121.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading munch-4.0.0-py2.py3-none-any.w

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(228)

In [None]:
train_msk = np.load('data/msk_array.npy')
train_images = sorted(os.listdir('data/data/train'))
test_images = sorted(os.listdir('data/data/test'))
test_msk = np.zeros((len(test_images), train_msk.shape[1], train_msk.shape[2]))

train_images = [f'data/data/train/{path}' for path in train_images]
test_images = [f'data/data/test/{path}' for path in test_images]
len(train_images)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/ioai-2025-preparation-class-lesson-8-homework/msk_array.npy'

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, path_image, msks):

        self.path_image = path_image
        self.msks = msks
        # Resize input image
        self.image_size = 512

    def resize(self, img, interp):
        return  cv2.resize(
            img, (self.image_size, self.image_size), interpolation=interp)

    def __len__(self):
        return len(self.path_image)

    def __getitem__(self, i):

        img = cv2.imread(self.path_image[i])
        msk = self.msks[i]
        msk = msk[:, :, None]

        # img = cv2.resize(
        #     img, (self.image_size, self.image_size), interpolation= cv2.INTER_LINEAR)
        # msk = cv2.resize(
        #     msk, (self.image_size, self.image_size), interpolation= cv2.INTER_LINEAR)

        img = (img / 255.) - 0.5
        img = np.transpose(img,(2,0,1)).astype(np.float32)
        img = torch.from_numpy(img)
        msk = torch.from_numpy(msk)

        return img, msk


In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.unet = smp.Unet('efficientnet-b2',
                             encoder_weights='imagenet',
                             classes=1,
                             decoder_channels=[256, 128, 64, 32, 16],
        )
    def forward(self, x):
        y = self.unet(x)
        return y

In [None]:
gc.collect()
torch.cuda.empty_cache()

batch_size = 4
valid_batch_size = 4
epochs = 7
lr = 3.22e-4
clip_grad_norm = 15.28
DEVICE = 'cuda'
params_train = {'batch_size': batch_size, 'shuffle': True, 'drop_last': True, 'num_workers': 2}
params_val = {'batch_size': batch_size, 'shuffle': False, 'drop_last': False, 'num_workers': 2}


train_loader = torch.utils.data.DataLoader(Dataset(train_images[:-70], train_msk[:-70]), **params_train)
val_loader = torch.utils.data.DataLoader(Dataset(train_images[-70:], train_msk[-70:]), **params_val)

model = Model().cuda()
num_train_steps = int(len(train_loader) / batch_size  * epochs)
loss_func= smp.losses.DiceLoss(mode="binary", smooth=1.)

scaler = GradScaler('cuda')
optimizer = torch.optim.AdamW(model.parameters(), lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs, 1e-6)
for epoch in range(epochs):
    model.train()
    average_loss = 0
    tk0 = tqdm(enumerate(train_loader), total = len(train_loader))
    for batch_number,  (img, target)  in tk0:
        optimizer.zero_grad()
        img = img.to(DEVICE)
        target = target.to(DEVICE)
        # continue
        with torch.amp.autocast('cuda'):
            outputs = model(img)
            loss = loss_func(outputs, target)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_norm)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        average_loss += loss.cpu().detach().numpy()
        tk0.set_postfix(loss=average_loss / (batch_number + 1),lr = scheduler.get_last_lr()[0], stage="train", epoch = epoch)

In [None]:
params_val = {'batch_size': batch_size, 'shuffle': False, 'drop_last': False, 'num_workers': 2}
test_loader = torch.utils.data.DataLoader(Dataset(test_images, test_msk), **params_val)

In [None]:
preds = []
imgs_list = []
target_list = []
model.eval()
average_loss = 0
with torch.no_grad():
    for batch_number,  (img, target)  in enumerate(test_loader):
        img = img.to(DEVICE)
        target = target.to(DEVICE)

        with torch.amp.autocast('cuda'):
            outputs = model(img)

        preds += [outputs.sigmoid().to('cpu').numpy()]

preds = np.concatenate(preds)[:, 0, ...]

In [None]:
preds = (preds > 0.5).astype(np.uint8)

In [None]:
def rle_encode(x, fg_val=1):
    """
    Args:
        x:  numpy array of shape (height, width), 1 - mask, 0 - background
    Returns: run length encoding as list
    """

    dots = np.where(
        x.T.flatten() == fg_val)[0]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths


def list_to_string(x):
    """
    Converts list to a string representation
    Empty list returns '-'
    """
    if x: # non-empty list
        s = str(x).replace("[", "").replace("]", "").replace(",", "")
    else:
        s = '-'
    return s

In [None]:
true_list = [list_to_string(rle_encode(ans)) for ans in preds]

predict_df = pd.DataFrame()
predict_df['Id'] = [f'{x:03d}.jpg' for x in range(150)]
predict_df['Target'] = true_list
predict_df.to_csv('submission.csv', index = None)