**To run this notebook with access to data from competition open https://www.kaggle.com/voluch346/airbus-ship-detection-eda
then click Copy&Edit**

In [None]:
import numpy as np
import pandas as pd
import imageio
import matplotlib.pyplot as plt
import os

**Import Data**

In [None]:
# Load data
masks = pd.read_csv(r"../input/airbus-ship-detection/train_ship_segmentations_v2.csv")
print(f"Dataframe with masks looks \n{masks.head(10)}\n\n")


masks['ships'] = masks['EncodedPixels'].map(lambda c_row: 1 if isinstance(c_row, str) else 0)
unique_img_ids = masks.groupby('ImageId').agg({'ships': 'sum'}).reset_index()
unique_img_ids['is_ship'] = unique_img_ids['ships'].map(lambda x: 1.0 if x>0 else 0.0)

print(f"Count of images with/withot ships \n{unique_img_ids['is_ship'].value_counts()}\n\n")
print(f"Count of images with number (0, 1, 2 etc.) of ships \n{unique_img_ids['ships'].value_counts()}\n\n")


**Visualizing masks over original image**

In [None]:
# Function to encode mask
def rle_decode(mask_rle, IMG_SIZE = (768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(IMG_SIZE[0]*IMG_SIZE[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(IMG_SIZE).T

In [None]:
# Here you can choose how much ships do you want to see on an image - variable 'num_ships'
num_ships = 5
print(f"ImageId's with {num_ships} ships on it \n{unique_img_ids.loc[unique_img_ids.ships == num_ships, 'ImageId'].head(5)}")

In [None]:
# Copy ImageId from output over cell and assign it's value to ImageId
ImageId = '0123b84ee.jpg'

img = imageio.imread('/kaggle/input/airbus-ship-detection/train_v2/' + ImageId)
img_masks = masks.loc[masks['ImageId'] == ImageId, 'EncodedPixels'].tolist()

# Take the individual ship masks and create a single mask array for all ships
all_masks = np.zeros((768, 768))
for mask in img_masks:
    all_masks += rle_decode(mask, (768, 768))
    

fig, axarr = plt.subplots(1, 3, figsize=(15, 40))
axarr[0].axis('off')
axarr[1].axis('off')
axarr[2].axis('off')
axarr[0].imshow(img)
axarr[1].imshow(all_masks)
axarr[2].imshow(img)
axarr[2].imshow(all_masks, alpha=0.4)
plt.tight_layout(h_pad=0.1, w_pad=0.1) # to adjust automatically axis to subplot area
plt.show()
