In [1]:
import segmentation_models_pytorch as smp
import torch
import torchvision
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
from torch.utils.data.distributed import DistributedSampler
from torch.nn.parallel import DistributedDataParallel as DDP

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
csv_file = pd.read_csv('../../files/train_ship_segmentations_v2.csv')
csv_file = csv_file.groupby('ImageId')['EncodedPixels'].apply(list).reset_index()
image_ids, pixels = csv_file['ImageId'].values.tolist(), csv_file['EncodedPixels'].values.tolist()

In [4]:
csv_file['fixed_inputs'] = csv_file['ImageId'].apply(lambda x: '../../files/train_v2/' + x)
csv_file['mask_paths'] = csv_file['ImageId'].apply(lambda x: '../../files/masks_v1/train/' + x.split('.')[0] + '.' + 'png')

In [5]:
csv_file['fixed_inputs'] = csv_file['ImageId'].apply(lambda x: '../../files/train_v2/' + x)
csv_file['mask_paths'] = csv_file['ImageId'].apply(lambda x: '../../files/masks_v1/train/' + x.split('.')[0] + '.' + 'png')

In [6]:
for x in tqdm(csv_file['fixed_inputs'].values.tolist()):
    if os.path.exists(x) == False:
        print(x)

100%|██████████| 192556/192556 [00:00<00:00, 463927.50it/s]


In [7]:
for x in tqdm(csv_file['mask_paths'].values.tolist()):
    if os.path.exists(x) == False:
        print(x)

100%|██████████| 192556/192556 [00:00<00:00, 448485.93it/s]


In [8]:
csv_file['fixed_inputs'].values.tolist()[0]

'../../files/train_v2/00003e153.jpg'

In [9]:
csv_file = csv_file[csv_file['fixed_inputs'] != '../../files/train_v2/6384c3e78.jpg']

In [10]:
def split_datasets(csv_file, test_size = 0.01):
    train, test = train_test_split(csv_file, test_size = test_size, random_state=42)
    train, val = train_test_split(train, test_size = test_size, random_state=42)
    return train, val, test

In [11]:
train, val, test = split_datasets(csv_file)

In [12]:
class Version1Dataset(Dataset):
    def __init__(self, csv_file):
        self.input_images = csv_file['fixed_inputs'].values
        self.mask_images = csv_file['mask_paths'].values
    
    def __len__(self):
        return len(self.input_images)
    
    def __getitem__(self, idx):
        img = torchvision.io.read_file(self.input_images[idx])
        img = torchvision.io.decode_jpeg(img, torchvision.io.ImageReadMode.RGB)
        mask = torchvision.io.read_file(self.mask_images[idx])
        mask = torchvision.io.decode_image(mask, torchvision.io.ImageReadMode.GRAY)
        img = torchvision.transforms.Resize((512, 512))(img)
        mask = torchvision.transforms.Resize((512, 512))(mask)
        img = img / 255
        mask = mask / 255
        return img, mask

In [13]:
train_dataset = Version1Dataset(train)
train_dataloader = DataLoader(train_dataset, shuffle = True, batch_size = 32)

In [14]:
model = smp.Unet(
    encoder_name="inceptionv4",
    encoder_weights=None,
    in_channels=3,             
    classes=1,                
)

In [15]:
# create loss, optimizer and other stuff

In [16]:
# Loss function
def dice_bce_loss(inputs, targets):
    # remove if your model inherently handles sigmoid
    number_of_pixels = inputs.shape[0] * (512 * 512 * 3)
    sigmoid = nn.Sigmoid()
    inputs = sigmoid(inputs)
    inputs = inputs.view(-1)
    targets = targets.view(-1)
    intersection = (inputs * targets).sum()
    dice_loss = (2. * intersection) / (inputs.sum() + targets.sum())
    dice_loss = 1 - dice_loss
    # Pixel wise log loss is calculated not number of images
    # I checked reduce by mean is correct measure.
    BCE = nn.functional.binary_cross_entropy(inputs, targets, reduce='mean')
    final = dice_loss + BCE
    return final, number_of_pixels

In [17]:
# IOU metric
def iou_score(inputs, targets):
    inputs = (inputs > 0.5).float()
    inputs = inputs.view(-1)
    targets = targets.view(-1)
    TP = torch.sum(torch.logical_and(inputs == 1, targets == 1))
    FP = torch.sum(torch.logical_and(inputs == 1, targets == 0))
    FN = torch.sum(torch.logical_and(inputs == 0, targets == 1))
    iou = TP / (TP + FP + FN)
    return iou

In [18]:
# gather_datasets
train_dataset = Version1Dataset(train)
train_dataloader = DataLoader(train_dataset, shuffle = True, batch_size = 64)
val_dataset = Version1Dataset(val)
val_dataloader = DataLoader(val_dataset, shuffle = False, batch_size = 64)
test_dataset = Version1Dataset(test)
test_dataloader = DataLoader(test_dataset, shuffle = False, batch_size = 64)

In [19]:
len(train_dataloader)

2949

In [20]:
train_image_size = 1000 * (512 * 512 * 3)
val_image_size = len(val) * (512 * 512 * 3)
train_batches = len(train_dataloader)
val_batches = len(val_dataloader)

In [21]:
# model = nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4])

In [22]:
# x = torch.rand((32, 512, 512, 3)).to(device)

In [23]:
# next(model.parameters()).is_cuda

In [24]:
# model(x)

In [25]:
def train_model(model, train_dataset, val_dataset, epochs = 10):
    data_pointers = {
        'train': train_dataset,
        'val': val_dataset
    }
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    if next(model.parameters()).is_cuda == False:
        model = nn.DataParallel(model)
        model = model.to(device)
    for epoch in range(epochs):
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train() # set model to train phase
            else:
                model.eval() # set model to eval phase
            running_loss = 0.0
            running_iou = 0.0
            # TODO: Implement IOU score as metric
            count = 0
            for imgs, labels in data_pointers[phase]:
                imgs = imgs.to(device)
                labels = labels.to(device)

                # init optimizer
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(imgs)
                    loss, _ = dice_bce_loss(outputs, labels)
                    # iou = iou_score(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item()
                # running_iou += iou 
                if count % 10 == 0:
                    print(count)
                count += 1

            if phase == 'train':
                epoch_loss = running_loss / train_batches
                # epoch_iou = running_iou / train_batches
            else:
                epoch_loss = running_loss / val_batches
                # epoch_iou = running_iou / val_batches
            print(f'{phase} Loss: {float(epoch_loss)}')
            print(f'{phase} IOU: {float(epoch_iou)}')
        if epoch == 2:
            break

In [26]:
train_model(model, train_dataloader, val_dataloader)



0
10
20
30
40
50
60
70
80
90
100
110


KeyboardInterrupt: 

In [36]:
# np.random.random((5,5))

In [None]:
def make_and_compile_model(model):
    model = smp.Unet(
        encoder_name='inceptionv4',
        encoder_weights=None,
        in_channels=3,
        classes=1
    )
    criterion = 