# Airbus Ship Detection

In [None]:
import os
import random
import math

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from skimage.io import imread
from sklearn.model_selection import train_test_split
#from skimage.morphology import label
from tqdm import tqdm


# Define the constants
DATASET_PATH = "/kaggle/input/airbus-ship-detection"
TRAIN_FOLDER = os.path.join(DATASET_PATH, "train_v2")
TEST_FOLDER = os.path.join(DATASET_PATH, "test_v2")
CSV_PATH = os.path.join(DATASET_PATH, "train_ship_segmentations_v2.csv")

# Style the plots and display them inline
plt.style.use("fivethirtyeight")
%matplotlib inline

In [None]:
# Define some helper functions for RLE encoding and decoding,
# which will be used later for converting
# the predicted ship masks to the required format

def multi_rle_encode(img):
    labels = label(img[:, :, 0])
    return [rle_encode(labels==k) for k in np.unique(labels[labels>0])]

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    Convert run-length encoded mask to a binary mask

    Args:
        mask_rle (str): Run-length encoded mask string
        shape (tuple): Shape of the output binary mask

    Returns:
        numpy.ndarray: Binary mask array
    '''
    # Split the run-length encoded string
    s = mask_rle.split()
    starts = np.asarray(s[0:][::2], dtype=int) - 1
    lengths = np.asarray(s[1:][::2], dtype=int)
    ends = starts + lengths

    # Initialize an array for the binary mask
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)

    # Set the pixels corresponding to the mask region to 1
    for start, end in zip(starts, ends):
        img[start:end] = 1

    # Reshape the array to the desired shape
    return img.reshape(shape).T


def masks_as_image(in_mask_list, dim = (768, 768)):
    '''
    Combine individual ship masks into a single mask array

    Args:
        in_mask_list (list): List of ship masks (run-length encoded strings)

    Returns:
        numpy.ndarray: Combined mask array
    '''
    # Initialize an array to hold the combined mask
    all_masks = np.zeros(dim, dtype=np.int16)

    # Iterate over the ship masks and add them to the combined mask
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += rle_decode(mask)

    # Expand dimensions to match the expected shape
    return np.expand_dims(all_masks, -1)

In [None]:
# Function to view a random image from the training set
def view_random_image(target_dir):
  '''
  View a random image from the specified directory

  Args:
      target_dir (str): Path to the directory containing images

  Returns:
      numpy.ndarray: Image array
  '''
  # Get a random image path
  random_image = random.sample(os.listdir(target_dir), 1)

  # Read in the image and plot it
  img = mpimg.imread(target_dir + "/" + random_image[0])
  plt.imshow(img)
  plt.axis("off")

  # Print the shape of the image
  print(f"Image shape: {img.shape}")

  return img


# Dataset

In [None]:
import os

for dirpath, dirnames, filenames in os.walk(DATASET_PATH):
  print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
df = pd.read_csv(CSV_PATH)
df.head()

In [None]:
img = view_random_image(TRAIN_FOLDER)

In [None]:
num_images = len(df['ImageId'].unique())
print(f"The number of images in the dataset: {num_images}")

In [None]:
num_ships = len(df[df['EncodedPixels'].notna()])
num_non_ships = num_images - num_ships
print('Number of ships:', num_ships)
print('Number of non-ships:', num_non_ships)

In [None]:
# Plot a bar chart of the ship vs. non-ship distribution
plt.figure(figsize=(10, 6))
plt.bar(['Ships', 'Non-Ships'], [num_ships, num_non_ships])
plt.xlabel('Category')
plt.ylabel('Count')
plt.title('Ship vs. Non-Ship')
plt.show()

In [None]:
number_of_masks_counts = df["ImageId"].value_counts()
number_of_masks_counts

In [None]:
# Create a copy of the working dataframe
ship_df = df.copy()
ship_df['NumberOfShips'] = ship_df['EncodedPixels'].notnull().astype(int)
ship_df['EncodedPixels'] = ship_df['EncodedPixels'].replace(0, '')
ship_df

In [None]:
# Group by the image name
ship_df = ship_df.groupby('ImageId').sum().reset_index()
ship_df

In [None]:
# Plot a histogram
plt.figure(figsize=(10, 6))
plt.hist(number_of_masks_counts, bins=10)  # Create a histogram with 30 bins
plt.title("Number of separate masks per image")
plt.xlabel("Count")
plt.ylabel("Frequency")
plt.show()

# Working with NaNs

In [None]:
rle_with_zeros = rle_encode(np.zeros((768, 768, 1)))
print(rle_with_zeros == "")

In [None]:
df = df.fillna("")
df.head()

## Undersampling negative samples

In [None]:
ship_df['NumberOfShips'].plot.hist()

In [None]:
ship_df['NumberOfShips'].value_counts()

In [None]:
train_ships, valid_ships = train_test_split(ship_df,
                                            test_size = 0.2,
                                            stratify = ship_df['NumberOfShips'])

In [None]:
train_ships

In [None]:
valid_ships

In [None]:
def undersample_zeros(df, n):
    zeros = df[df['NumberOfShips'] == 0].sample(n=n)
    nonzeros = df[df['NumberOfShips'] != 0]
    return pd.concat((nonzeros, zeros))

In [None]:
PERCENTAGE_WITHOUT_SHIPS = 0.2
train_ships = undersample_zeros(train_ships, int(len(train_ships) * PERCENTAGE_WITHOUT_SHIPS))
valid_ships = undersample_zeros(valid_ships, int(len(valid_ships) * PERCENTAGE_WITHOUT_SHIPS))
train_ships['NumberOfShips'].plot.hist(bins=np.arange(10))

In [None]:
(train_ships['NumberOfShips'] > 0).astype(int).value_counts().plot.bar()

In [None]:
(valid_ships['NumberOfShips'] > 0).astype(int).value_counts().plot.bar()

In [None]:
def show(image, mask):
    print(image.shape)
    print(mask.shape)
    # Set up the matplotlib figure
    plt.figure(figsize=(12, 6))

    # Display ground truth mask
    plt.subplot(1, 2, 1)
    plt.imshow(mask, cmap='gray')
    plt.title('Ground Truth Mask')
    plt.axis('off')

    # Display BGR image
    plt.subplot(1, 2, 2)
    plt.imshow(image)
    plt.title('Image')
    plt.axis('off')

    # Show the plot
    plt.show()

In [None]:
import cv2

image_name = np.random.choice(np.array(train_ships['ImageId']))
image_path = os.path.join(TRAIN_FOLDER, image_name)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
mask = masks_as_image(df[df['ImageId'] == image_name]['EncodedPixels'])
show(image, mask)

# Train Unet + Mix Vision Transformer

In [None]:
!pip install git+https://github.com/qubvel/segmentation_models.pytorch

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import segmentation_models_pytorch as smp
from segmentation_models_pytorch import utils
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset

train_dim = (544, 544)

ENCODER = 'mit_b0'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['ship']
ACTIVATION = 'sigmoid'
DEVICE = 'cuda'
BATCH_SIZE = 16

In [None]:
import albumentations as albu


def get_training_augmentation():
    train_transform = [
        albu.HorizontalFlip(p=0.5),
        albu.VerticalFlip(p=0.5),
    ]
    return albu.Compose(train_transform)


def get_validation_augmentation():
    test_transform = [
        #albu.PadIfNeeded(384, 480)
    ]
    return albu.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform

    Args:
        preprocessing_fn (callbale): data normalization function
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose

    """

    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)


# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

In [None]:
class Dataset(BaseDataset):

    CLASSES = ['ship']

    def __init__(
            self,
            images_set,
            masks_set,
            train_dim,
            augmentation=None,
            preprocessing=None
    ):
        self.images = images_set
        self.masks = masks_set
        self.class_values = [1]
        self.augmentation = augmentation
        self.preprocessing = preprocessing
        self.train_dim = train_dim

    def __getitem__(self, i):
        image_path = os.path.join(TRAIN_FOLDER, self.images[i])
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = masks_as_image(self.masks[self.masks['ImageId'] == self.images[i]]['EncodedPixels'])

        image = cv2.resize(image, self.train_dim, interpolation = cv2.INTER_AREA)
        mask = cv2.resize(mask, self.train_dim, interpolation = cv2.INTER_AREA)

        masks = [(mask == v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')

        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        return image, mask

    def __len__(self):
        return len(self.images)

In [None]:
# create segmentation model with pretrained encoder
model = smp.Unet(
    encoder_name=ENCODER,
    encoder_weights=ENCODER_WEIGHTS,
    classes=len(CLASSES),
    activation=ACTIVATION,
)

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

In [None]:
train_dataset = Dataset(
    np.array(train_ships['ImageId']),
    df,
    train_dim,
    augmentation=get_training_augmentation(),
    preprocessing=get_preprocessing(preprocessing_fn),
)

valid_dataset = Dataset(
    np.array(valid_ships['ImageId']),
    df,
    train_dim,
    augmentation=get_validation_augmentation(),
    preprocessing=get_preprocessing(preprocessing_fn),
)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
loss = smp.utils.losses.DiceLoss()
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

optimizer = torch.optim.Adam([
    dict(params=model.parameters(), lr=0.0001),
])

In [None]:
train_epoch = smp.utils.train.TrainEpoch(
    model,
    loss=loss,
    metrics=metrics,
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model,
    loss=loss,
    metrics=metrics,
    device=DEVICE,
    verbose=True,
)

In [None]:
max_score = 0
EPOCHS = 1
weights_dir = "weights"
os.makedirs(weights_dir, exist_ok=True)

for epoch in range(0, EPOCHS):

    print(f'Epoch: {epoch}')
    train_logs = train_epoch.run(train_loader)
    valid_logs = valid_epoch.run(valid_loader)

    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save(model, os.path.join(weights_dir, f"{ENCODER}_{epoch}_epoch.pth"))
        print('Model saved!')

    if epoch == EPOCHS - 10:
        optimizer.param_groups[0]['lr'] = 1e-5
        print('Decrease decoder learning rate to 1e-5!')

# Testing trained model

In [None]:
import os
import time
import glob
import random

import cv2
import numpy as np
import torch
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import segmentation_models_pytorch as smp

In [None]:

class ShipSegmentation:

    def __init__(self, model_path, device, encoder='mit_b0', encoder_weights='imagenet'):
        self.device = device
        self._model = self._load_model(model_path)
        self._encoder = encoder
        self._encoder_weights = encoder_weights
        preprocessing_fn = smp.encoders.get_preprocessing_fn(self._encoder, self._encoder_weights)
        self.model_preprocessing = self._get_preprocessing(preprocessing_fn)

    def _load_model(self, model_path):
        model = torch.load(model_path, map_location=self.device)
        model.to(self.device)
        model.eval()
        return model


    def _to_tensor(self, x):
        return x.transpose(2, 0, 1).astype('float32')

    def _get_preprocessing(self, preprocessing_fn):
        _transform = [
            transforms.Lambda(lambda x: preprocessing_fn(x)),
            transforms.Lambda(lambda x: self._to_tensor(x)),
        ]
        return transforms.Compose(_transform)
    
    def preprocessing(self, image):
        dim = (544, 544)
        image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
        image = self.model_preprocessing(image)
        return torch.from_numpy(image).to(self.device).unsqueeze(0)
    
    def predict(self, orig_image):
        image = self.preprocessing(orig_image)
        ship_mask = self._model.predict(image)
        ship_mask = (ship_mask.squeeze().cpu().numpy().round() * 255).astype(np.uint8)
        ship_mask = cv2.resize(ship_mask, orig_image.shape[:2][::-1], interpolation = cv2.INTER_AREA)
        return ship_mask

In [None]:
def show(rgb_image, predicted_mask):
    # Set up the matplotlib figure
    plt.figure(figsize=(12, 6))

    # Display RGB image
    plt.subplot(1, 2, 1)
    plt.imshow(rgb_image)
    plt.title('Image')
    plt.axis('off')

    # Display predicted mask
    plt.subplot(1, 2, 2)
    plt.imshow(predicted_mask, cmap='gray')
    plt.title('Predicted Mask')
    plt.axis('off')

    # Show the plot
    plt.show()

In [None]:
weights_dir = "weights"
model_path = os.path.join(weights_dir, "mit_b0_18_epoch.pth")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

ship = ShipSegmentation(model_path=model_path, device=device)

images = glob.glob(os.path.join(TEST_FOLDER, "*"))
image_path = random.choice(images)

bgr_image = cv2.imread(image_path)
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)

predicted_mask = ship.predict(rgb_image)
predicted_mask = cv2.cvtColor(predicted_mask, cv2.COLOR_GRAY2BGR)