In [8]:
path_input = '../input/airbus-ship-detection'
path_seg = '../input/airbus-ship-detection/train_ship_segmentations_v2.csv'
path_train_images = '../input/airbus-ship-detection/train_v2'
path_test_images = '../input/airbus-ship-detection/test_v2'

# Import libraries for analys, statisticks and visualisation

In [90]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage.io import imread
import os

# Show number images in train and test dir

In [11]:
train_list = os.listdir(path_train_images)
print(f"Number train images: {len(train_list)}")
      
test_list = os.listdir(path_test_images)
print(f"Number test images: {len(test_list)}")

# Show some images from train dir

In [14]:
load_img = lambda filename: np.array(imread(f"{path_train_images}/{filename}"))

width = 9
height = 3

_, axes_list = plt.subplots(height, width, figsize=(2*width, 2*height))

for axes in axes_list:
    for ax in axes:
        ax.axis('off')
        img = np.random.choice(train_list)
        ax.imshow(load_img(img))
        ax.set_title(img)
        

# Create function important functions
<h3> For decoding rle => mask</h3>
<h3> For applying mask on image</h3>

In [87]:
def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  
    
def masks_as_image(in_mask_list):
    # Take the individual ship masks and create a single mask array for all ships
    all_masks = np.zeros((768, 768), dtype = np.uint16)

    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks |= rle_decode(mask)
    return all_masks

def apply_mask(image, mask):
    # Take image and show mask on image using white color 
    image = image + mask.reshape((768, 768, 1)) * 255
    return image

# Statistics about Data

In [16]:
df = pd.read_csv(path_seg)
df.head()

<p>ImageId in DataFrame can repeat, so in one image can be several ship
<p>And one row in DF is one encoded ship
<p>If NaN -> no ships on image

<h4>Let's check if number of pictures in dir and encoded masks are the same!

In [19]:
print(f'Total number of images(original): {df["ImageId"].value_counts().shape[0]}')

unique_img_ids = df.groupby("ImageId").size().reset_index(name='counts')
unique_img_ids.head()

<h4> Number of pictures same = 192556
<hr>
<h3> Lets count number of images with and without ships

In [29]:
df_with_ships = df.dropna()
df_with_ships = df_with_ships.groupby("ImageId").size().reset_index(name="number ships")

df_without_ships = df[df["EncodedPixels"].isna()]

print(f"Number images with ships: {df_with_ships.shape[0]}\nNumber images without ships: {df_without_ships.shape[0]}")


In [35]:
plt.figure(figsize=(5,5))
plt.bar(["With ships", "Without ships"], [len(df_with_ships), len(df_without_ships)], color=['green','pink'])
plt.ylabel("Number of images")
plt.title("Training Data")
plt.show()


<h2> Plot histogram for show destribution of images with ships

In [38]:
hist = df_with_ships.hist(bins=np.arange(df_with_ships['number ships'].max()) + 0.5)
plt.xticks(range(15))
plt.title('Histogram of ships count')
plt.xlabel("Number of ships")
plt.ylabel("Number of images")
plt.show(hist)

# Show some mask of images

In [42]:
w = 8
h = 3
_, axes_list = plt.subplots(h, w, figsize=(2*w, 2*h))
plt.subplots_adjust(wspace=0.4)
ax.set(xlim=(0, 768), ylim=(0, 768))
for axes in axes_list:
    for ax in axes:
        ax.axis('auto')
        img = np.random.choice(train_list)
        mask = masks_as_image(df[df["ImageId"]==img]["EncodedPixels"].values)
        ax.imshow(mask)

# And finaly show some images with mask
<p> Mask on image has white color

In [89]:
w = 8
h = 3
_, axes_list = plt.subplots(h, w, figsize=(2*w, 2*h))

for axes in axes_list:
    for ax in axes:
        ax.axis('off')
        img = np.random.choice(train_list)
        image = imread(os.path.join(path_train_images, img))
        mask = masks_as_image(df[df["ImageId"]==img]["EncodedPixels"].values)
        img_with_mask = apply_mask(image, mask)
        ax.imshow(img_with_mask)
        ax.set_title(img)
        