In [1]:
import segmentation_models_pytorch as smp
import torch
import torchvision
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
from torch.utils.data.distributed import DistributedSampler
from torch.nn.parallel import DistributedDataParallel as DDP
import PIL
import wandb

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
device

device(type='cuda', index=0)

In [4]:
csv_file = pd.read_csv('../../files/train_ship_segmentations_v2.csv')
csv_file = csv_file.groupby('ImageId')['EncodedPixels'].apply(list).reset_index()
image_ids, pixels = csv_file['ImageId'].values.tolist(), csv_file['EncodedPixels'].values.tolist()

In [5]:
csv_file['fixed_inputs'] = csv_file['ImageId'].apply(lambda x: '../../files/train_v2/' + x)
csv_file['mask_paths'] = csv_file['ImageId'].apply(lambda x: '../../files/masks_v1/train/' + x.split('.')[0] + '.' + 'png')

In [6]:
csv_file['fixed_inputs'] = csv_file['ImageId'].apply(lambda x: '../../files/train_v2/' + x)
csv_file['mask_paths'] = csv_file['ImageId'].apply(lambda x: '../../files/masks_v1/train/' + x.split('.')[0] + '.' + 'png')

In [7]:
for x in tqdm(csv_file['fixed_inputs'].values.tolist()):
    if os.path.exists(x) == False:
        print(x)

100%|██████████| 192556/192556 [00:00<00:00, 425178.47it/s]


In [8]:
for x in tqdm(csv_file['mask_paths'].values.tolist()):
    if os.path.exists(x) == False:
        print(x)

100%|██████████| 192556/192556 [00:00<00:00, 416856.60it/s]


In [9]:
csv_file['fixed_inputs'].values.tolist()[0]

'../../files/train_v2/00003e153.jpg'

In [10]:
csv_file = csv_file[csv_file['fixed_inputs'] != '../../files/train_v2/6384c3e78.jpg']

In [11]:
def split_datasets(csv_file, test_size = 0.01):
    train, test = train_test_split(csv_file, test_size = test_size, random_state=42)
    train, val = train_test_split(train, test_size = test_size, random_state=42)
    return train, val, test

In [12]:
train, val, test = split_datasets(csv_file)

In [13]:
class Version1Dataset(Dataset):
    def __init__(self, csv_file):
        self.input_images = csv_file['fixed_inputs'].values
        self.mask_images = csv_file['mask_paths'].values
    
    def __len__(self):
        return len(self.input_images)
    
    def __getitem__(self, idx):
        img = torchvision.io.read_file(self.input_images[idx])
        img = torchvision.io.decode_jpeg(img, torchvision.io.ImageReadMode.RGB)
        mask = torchvision.io.read_file(self.mask_images[idx])
        mask = torchvision.io.decode_image(mask, torchvision.io.ImageReadMode.GRAY)
        img = torchvision.transforms.Resize((512, 512))(img)
        mask = torchvision.transforms.Resize((512, 512))(mask)
        img = img / 255
        mask = mask / 255
        return img, mask

In [14]:
train_dataset = Version1Dataset(train)
train_dataloader = DataLoader(train_dataset, shuffle = True, batch_size = 32)

In [15]:
model = smp.Unet(
    encoder_name="inceptionv4",
    encoder_weights=None,
    in_channels=3,             
    classes=1,                
)

In [16]:
# create loss, optimizer and other stuff

In [17]:
# Loss function
def dice_bce_loss(inputs, targets):
    # remove if your model inherently handles sigmoid
    number_of_pixels = inputs.shape[0] * (512 * 512 * 3)
    sigmoid = nn.Sigmoid()
    inputs = sigmoid(inputs)
    inputs = inputs.view(-1)
    targets = targets.view(-1)
    intersection = (inputs * targets).sum()
    dice_loss = (2. * intersection) / (inputs.sum() + targets.sum())
    dice_loss = 1 - dice_loss
    # Pixel wise log loss is calculated not number of images
    # I checked reduce by mean is correct measure.
    BCE = nn.functional.binary_cross_entropy(inputs, targets, reduce='mean')
    final = dice_loss + BCE
    return final, number_of_pixels

In [18]:
# IOU metric
def iou_score(inputs, targets):
    inputs = (inputs > 0.5).float()
    inputs = inputs.view(-1)
    targets = targets.view(-1)
    TP = torch.sum(torch.logical_and(inputs == 1, targets == 1))
    FP = torch.sum(torch.logical_and(inputs == 1, targets == 0))
    FN = torch.sum(torch.logical_and(inputs == 0, targets == 1))
    iou = TP / (TP + FP + FN)
    return iou

In [19]:
# gather_datasets
train_dataset = Version1Dataset(train)
train_dataloader = DataLoader(train_dataset, shuffle = True, batch_size = 88, num_workers=24, prefetch_factor=2)
val_dataset = Version1Dataset(val)
val_dataloader = DataLoader(val_dataset, shuffle = False, batch_size = 128)
test_dataset = Version1Dataset(test)
test_dataloader = DataLoader(test_dataset, shuffle = False, batch_size = 128)

In [20]:
len(train_dataloader)

2145

In [21]:
train_image_size = 1000 * (512 * 512 * 3)
val_image_size = len(val) * (512 * 512 * 3)
train_batches = len(train_dataloader)
val_batches = len(val_dataloader)

In [22]:
# model = nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4])

In [23]:
# x = torch.rand((32, 512, 512, 3)).to(device)

In [24]:
# next(model.parameters()).is_cuda

In [25]:
# model(x)

In [18]:
def train_model(model, train_dataset, val_dataset, epochs = 10):
    data_pointers = {
        'train': train_dataset,
        'val': val_dataset
    }
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    if next(model.parameters()).is_cuda == False:
        # model = nn.DataParallel(model)
        model = model.to(device)
    for epoch in range(epochs):
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train() # set model to train phase
            else:
                model.eval() # set model to eval phase
            running_loss = 0.0
            running_iou = 0.0
            # TODO: Implement IOU score as metric
            count = 0
            for imgs, labels in tqdm(data_pointers[phase]):
                imgs = imgs.to(device)
                labels = labels.to(device)

                # init optimizer
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(imgs)
                    loss, _ = dice_bce_loss(outputs, labels)
                    # iou = iou_score(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item()
                # running_iou += iou 
                # if count % 10 == 0:
                #     print(count)
                # count += 1

            if phase == 'train':
                epoch_loss = running_loss / train_batches
                # epoch_iou = running_iou / train_batches
            else:
                epoch_loss = running_loss / val_batches
                # epoch_iou = running_iou / val_batches
            print(f'{phase} Loss: {float(epoch_loss)}')
            print(f'{phase} IOU: {float(epoch_iou)}')
        if epoch == 2:
            break

In [19]:
# train_model(model, train_dataloader, val_dataloader)

Data augmentation with some changes in code base

In [19]:
def get_positive_samples(csv_file):
    sample_list = []
    for x in csv_file['EncodedPixels'].values.tolist():
        if type(x[0]) == str:
            sample_list.append(1)
        else:
            sample_list.append(-1)
    csv_file['sample_type'] = sample_list
    return csv_file

In [20]:
train_updated = get_positive_samples(train)

In [21]:
class AugDataset(Dataset):
    def __init__(self, csv_file):
        self.input_images = csv_file['fixed_inputs'].values
        self.mask_images = csv_file['mask_paths'].values
        self.mask_type = csv_file['sample_type'].values
        self.negative_index = np.where(self.mask_type == -1)[0]
        self.brightness_factors = np.random.uniform(1.0, 2.0, size = len(csv_file))
        self.contrast_factors = np.random.uniform(2.0, 3.5, size = len(csv_file))

    def __len__(self):
        return len(self.input_images)
    
    def change_every_epoch(self):
        new_values = np.random.randint(0, 2, size=(self.__len__()))
        new_values[self.negative_index] = -1
        self.mask_type = new_values
        self.brightness_factors = np.random.uniform(1.0, 2.0, size = self.__len__())
        self.contrast_factors = np.random.uniform(2.0, 3.5, size = self.__len__())


    def aug(self, img, mask, brightness_factor, contrast_factor):
        img = torchvision.transforms.functional.hflip(img)
        mask = torchvision.transforms.functional.hflip(mask)
        img = torchvision.transforms.functional.adjust_brightness(img, brightness_factor)
        img = torchvision.transforms.functional.adjust_contrast(img, contrast_factor)
        return img, mask

    def __getitem__(self, idx):
        # All positive sample images will be augmented with 
        # flip horizontally
        # adjusting brightness
        # adjusting contrast
        img = torchvision.io.read_file(self.input_images[idx])
        img = torchvision.io.decode_jpeg(img)
        mask = torchvision.io.read_file(self.mask_images[idx])
        mask = torchvision.io.decode_image(mask)
        if self.mask_type[idx] != -1:
            if self.mask_type[idx] == 1:
                img, mask = self.aug(img, mask, self.brightness_factors[idx], self.contrast_factors[idx])
        img = torchvision.transforms.functional.resize(img, (512, 512))
        mask = torchvision.transforms.functional.resize(mask, (512, 512))
        img = img / 255
        mask = mask / 255
        mask = torch.where(mask < 1.0, 0.0, 1.0)
        return img, mask

In [333]:
data = AugDataset(train)
train_loader = DataLoader(data, shuffle=False)
img_1 = train_loader.dataset.__getitem__(6)
train_loader.dataset.change_every_epoch()
train_loader = DataLoader(data, shuffle=False)
img_2 = train_loader.dataset.__getitem__(6)

1
0




In [22]:
def train_model(model, train_dataset, val_dataset, weights_path, epochs = 100):
    # If fails reduce protobuf to lower version pip install protobuf==3.19
    wandb.init(project='ship-segmentation-pytorch-wb',
               config = {
                   'arch' : 'Unet- Incepv4',
                   'epochs' : 10
               }
               )
    data_pointers = {
        'train': train_dataset,
        'val': val_dataset
    }
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    if next(model.parameters()).is_cuda == False:
        model = nn.DataParallel(model)
        model = model.to(device)
    for epoch in range(epochs):
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train() # set model to train phase
            else:
                model.eval() # set model to eval phase
            running_loss = 0.0
            running_iou = 0.0
            # TODO: Implement IOU score as metric
            count = 0
            for imgs, labels in tqdm(data_pointers[phase]):
                imgs = imgs.to(device)
                labels = labels.to(device)

                # init optimizer
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(imgs)
                    loss, _ = dice_bce_loss(outputs, labels)
                    iou = iou_score(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item()
                running_iou += iou 
                # if count % 10 == 0:
                #     print(count)
                # count += 1

            if phase == 'train':
                epoch_loss = running_loss / len(train_dataset)
                epoch_iou = running_iou / len(train_dataset)
                wandb.log(
                    {'epoch_loss' : epoch_loss,
                    'epoch_iou' : epoch_iou}
                )
            else:
                epoch_loss = running_loss / len(val_dataset)
                epoch_iou = running_iou / len(val_dataset)
                wandb.log(
                    {'val_epoch_loss' : epoch_loss,
                    'val_epoch_iou' : epoch_iou}
                )
            print(f'{phase} Loss: {float(epoch_loss)}')
            print(f'{phase} IOU: {float(epoch_iou)}')
        train_dataset.dataset.change_every_epoch()
        if os.path.exists(weights_path) == False:
            os.makedirs(weights_path)
        torch.save(model.state_dict(), f'{weights_path}{epoch}.pth')
        # if epoch == 2:
        #     break

In [29]:
# gather_datasets
train_dataset = AugDataset(train_updated)
train_dataloader = DataLoader(train_dataset, shuffle = False, batch_size = 84, num_workers=26, prefetch_factor=2)
val_dataset = Version1Dataset(val)
val_dataloader = DataLoader(val_dataset, shuffle = False, batch_size = 32)
test_dataset = Version1Dataset(test)
test_dataloader = DataLoader(test_dataset, shuffle = False, batch_size = 32)

In [30]:
train_model(model, train_dataloader, val_dataloader, '../../weights/torch_final_model/')

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016671504183333734, max=1.0…

 21%|██        | 461/2247 [08:17<32:07,  1.08s/it]


KeyboardInterrupt: 