In [1]:
import pandas as pd

cwd = os.path.abspath('..')
data_path = os.path.join(cwd, 'input')
train_path = os.path.join(data_path, 'train')
label_path = os.path.join(data_path, 'train.csv')
test_path = os.path.join(data_path, 'test')
csv_path = os.path.join(data_path, 'sample_submission.csv')
info_path = os.path.join(data_path, 'HuBMAP-20-dataset_information.csv')

label_df = pd.read_csv(label_path)
info_df = pd.read_csv(info_path)
test_df = pd.read_csv(csv_path)
label_df

Unnamed: 0,id,encoding
0,2f6ecfcdf,296084587 4 296115835 6 296115859 14 296147109...
1,8242609fa,96909968 56 96941265 60 96972563 64 97003861 6...
2,aaa6a05cc,30989109 59 31007591 64 31026074 68 31044556 7...
3,cb2d976f4,78144363 5 78179297 15 78214231 25 78249165 35...
4,b9a3865fc,61271840 4 61303134 13 61334428 22 61365722 30...
5,b2dc8411c,56157731 21 56172571 45 56187411 51 56202252 5...
6,0486052bb,101676003 6 101701785 8 101727568 9 101753351 ...
7,e79de561c,7334642 14 7350821 41 7367001 67 7383180 82 73...
8,095bf7a1f,113277795 21 113315936 53 113354083 87 1133922...
9,54f2eec69,124967057 36 124997425 109 125027828 147 12505...


In [2]:
import os
import sys
import cv2
import argparse
from tqdm import tqdm
import warnings
# warnings.filterwarnings("ignore")

import numpy as np
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt

project_path = os.path.abspath('.')
data_path = os.path.join(project_path, 'train_tiles')
images_path = os.path.join(data_path, 'images')
masks_path = os.path.join(data_path, 'masks')

cfg = {
    'fold_num': 5,
    'seed': 2021,
    'model': 'resnet34',
    'img_size': 512,
    'epochs': 20,
    'train_batch_size': 8,
    'val_batch_size': 8,
    'T_0': 1,
    'T_mul': 2,
    'lr': 2e-4,
    'min_lr': 2e-6,
    'accum_iter': 2,
    'weight_decay': 1e-6,
    'num_workers': 8,
    'device': 'cuda'
}
best_loss = float('inf')

train_transforms = A.Compose([
    A.Resize(cfg['img_size'], cfg['img_size']),
    A.Transpose(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.CLAHE(clip_limit=4.0, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=30, p=0.5),
    A.HueSaturationValue(p=0.5),
    A.Rotate(p=0.5),
    A.RandomBrightnessContrast(p=0.5),

    A.OneOf([
        A.OpticalDistortion(distort_limit=0.8),
        A.GridDistortion(num_steps=3, distort_limit=0.5),
        A.ElasticTransform(alpha=3),
        A.IAAPiecewiseAffine(p=0.5),
        A.IAASharpen(p=0.5),
        A.IAAEmboss(p=0.5),
    ], p=0.5),

    A.Normalize(mean=(0.5936, 0.4990, 0.6150), std=(0.0757, 0.0967, 0.0623)),
    ToTensorV2(),
], p=1.0)

val_transforms = A.Compose([
    A.Resize(cfg['img_size'], cfg['img_size']),
    A.Normalize(mean=(0.5936, 0.4990, 0.6150), std=(0.0757, 0.0967, 0.0623)),
    ToTensorV2(),
], p=1.0)

class KidneyDataset(Dataset):
    def __init__(self, images_path, masks_path, transform=None):
        self.images_path = images_path
        self.images = os.listdir(images_path)
        self.masks_path = masks_path
        self.masks = os.listdir(masks_path)
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_path, self.images[idx])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img_id = self.images[idx].split('.')[0]
        mask_path = os.path.join(self.masks_path, img_id+'.png')
        mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)

        seg = np.array(mask, dtype=np.uint8).max() == 1

        if self.transform is not None:
            transformed = self.transform(image=img, mask=mask)
            img = transformed['image']
            mask = transformed['mask']
        return (img, mask, 1) if seg else (img, mask, 0)

def test_aug():
    dataset = KidneyDataset(images_path=images_path, masks_path=masks_path, transform=train_transforms)
    dataset.transform = A.Compose([t for t in dataset.transform if not isinstance(t, (A.Normalize, ToTensorV2))])
    cnt = 8
    samples = np.random.randint(0, len(dataset), cnt)
    _, ax = plt.subplots(nrows=3, ncols=cnt, figsize=(35, 5*3))
    for i, idx in enumerate(samples):
        img, mask, _ = dataset[idx]
        ax[0, i].imshow(img)
        ax[1, i].imshow(mask)
        ax[2, i].imshow(img)
        ax[2, i].imshow(mask, alpha=0.2)
    plt.tight_layout()
    plt.show()

test_aug()

FileNotFoundError: [Errno 2] No such file or directory: '/datadisk/kg/kidney/deprecated/train_tiles/images'