Libraries

In [1]:
import pandas as pd
import torch
import numpy as np
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm.notebook import tqdm
import cv2
import os
import multiprocessing
import random
from torch.utils.data.dataset import Dataset
from copy import deepcopy

In [2]:
train_imgs_folder = "understanding_cloud_organization/train_image/"
image_width, image_height = 224, 224
num_cores = multiprocessing.cpu_count()
batch_size = 64
model_class_names =  ['Fish', 'Flower', 'Sugar', 'Gravel']

One-hot encoding classes

In [12]:
train_df_orig = pd.read_csv('understanding_cloud_organization/train.csv')
train_df = pd.read_csv('understanding_cloud_organization/train.csv')

In [24]:
train_df = train_df_orig
print("first_print\n",train_df)
train_df = train_df[~train_df['EncodedPixels'].isnull()]
print("second_print\n",train_df)
train_df['Image'] = train_df['Image_Label'].map(lambda x: x.split('_')[0])
print("third_print\n",train_df)
train_df['Class'] = train_df['Image_Label'].map(lambda x: x.split('_')[1])
print("fourth_print\n",train_df)
classes = train_df['Class'].unique()
train_df = train_df.groupby('Image')['Class'].agg(set).reset_index()
print("fifth_print\n",train_df)
for class_name in classes:
    train_df[class_name] = train_df['Class'].map(lambda x: 1 if class_name in x else 0)
print("sixth_print\n",train_df)

first_print
               Image_Label                                      EncodedPixels
0        0011165.jpg_Fish  264918 937 266318 937 267718 937 269118 937 27...
1      0011165.jpg_Flower  1355565 1002 1356965 1002 1358365 1002 1359765...
2      0011165.jpg_Gravel                                                NaN
3       0011165.jpg_Sugar                                                NaN
4        002be4f.jpg_Fish  233813 878 235213 878 236613 878 238010 881 23...
...                   ...                                                ...
22179   ffd6680.jpg_Sugar                                                NaN
22180    ffea4f4.jpg_Fish                                                NaN
22181  ffea4f4.jpg_Flower  1194860 675 1196260 675 1197660 675 1199060 67...
22182  ffea4f4.jpg_Gravel                                                NaN
22183   ffea4f4.jpg_Sugar                                                NaN

[22184 rows x 2 columns]
second_print
               Image_Lab

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['Image'] = train_df['Image_Label'].map(lambda x: x.split('_')[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['Class'] = train_df['Image_Label'].map(lambda x: x.split('_')[1])


In [23]:
# dictionary for fast access to ohe vectors
img_2_ohe_vector = {img:vec for img, vec in zip(train_df['Image'], train_df.iloc[:, 2:].values)}
zip(train_df['Image'], train_df.iloc[:, 2:].values)
print(train_df.columns)
print(train_df['Image'], train_df.iloc[:, 2:].values)
print(img_2_ohe_vector)

Index(['Image', 'Class', 'Fish', 'Flower', 'Sugar', 'Gravel'], dtype='object')
0       0011165.jpg
1       002be4f.jpg
2       0031ae9.jpg
3       0035239.jpg
4       003994e.jpg
           ...     
5541    ffcedf2.jpg
5542    ffd11b6.jpg
5543    ffd3dfb.jpg
5544    ffd6680.jpg
5545    ffea4f4.jpg
Name: Image, Length: 5546, dtype: object [[1 1 0 0]
 [1 1 1 0]
 [1 1 1 0]
 ...
 [0 0 1 0]
 [0 1 0 1]
 [0 1 0 0]]
{'0011165.jpg': array([1, 1, 0, 0], dtype=int64), '002be4f.jpg': array([1, 1, 1, 0], dtype=int64), '0031ae9.jpg': array([1, 1, 1, 0], dtype=int64), '0035239.jpg': array([0, 1, 0, 1], dtype=int64), '003994e.jpg': array([1, 0, 1, 1], dtype=int64), '00498ec.jpg': array([0, 0, 0, 1], dtype=int64), '006bf7c.jpg': array([1, 0, 1, 0], dtype=int64), '006c5a6.jpg': array([1, 0, 1, 0], dtype=int64), '008233e.jpg': array([0, 0, 1, 0], dtype=int64), '008a5ff.jpg': array([1, 0, 1, 0], dtype=int64), '0091591.jpg': array([0, 1, 1, 1], dtype=int64), '0095357.jpg': array([0, 0, 1, 0], dtype=int64),

Split into train and val and test

In [12]:
train_imgs, val_test_imgs = train_test_split(train_df['Image'].values, 
                                        test_size=0.4, 
                                        #stratify=train_df['Class'].map(lambda x: str(sorted(list(x)))), # sorting present classes in lexicographical order, just to be sure
                                        random_state=10)
val_imgs, test_imgs = train_test_split(val_test_imgs, 
                                        test_size=0.5, 
                                        #stratify=train_df['Class'].map(lambda x: str(sorted(list(x)))), # sorting present classes in lexicographical order, just to be sure
                                        random_state=10)

print(type(train_imgs))
print(train_imgs[0])

<class 'numpy.ndarray'>
068d881.jpg


Data preprocessing
Validation images

In [13]:
#Validation set中每个.jpg格式的图片转化为224*224的张量，存在val_imgs_np中
#val_imgs_np为一个四维张量，形状为(样本数, 高度, 宽度, 通道数)
val_imgs_np = np.empty((len(val_imgs), image_height, image_width, 3))
for img_i, img_name in enumerate(tqdm(val_imgs)):
    img_path = os.path.join(train_imgs_folder, img_name)
    val_imgs_np[img_i, :, :, :] = cv2.resize(cv2.imread(img_path), (image_height, image_width)).astype(np.float32)/255.0

  0%|          | 0/1109 [00:00<?, ?it/s]

Masks

In [14]:
def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

def mask2rle(img):
    '''
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def make_mask(df, image_label, shape: tuple = (1400, 2100)):
    """
    Create mask based on df, image name and shape.
    """
    df = df.set_index('Image_Label')
    encoded_mask = df.loc[image_label, 'EncodedPixels']
    mask = np.zeros((shape[0], shape[1]), dtype=np.float32)
    if encoded_mask is not np.nan:
        mask = rle_decode(encoded_mask)
            
    return cv2.resize(mask, (image_height, image_width))

In [15]:
#val_mask_np用于存储validation set中每个图片对应的每种分类下的mask
#val_masks_np为四维张量，形状为（4，验证样本的数量，图片高度，图片宽度）
val_masks_np = np.empty((len(model_class_names), len(val_imgs), image_height, image_width))
for class_i, class_name in enumerate(tqdm(model_class_names)):
    for img_i, img_name in enumerate(val_imgs):
        mask = make_mask(train_df_orig, img_name + '_' + class_name)
        val_masks_np[class_i][img_i] = mask


  0%|          | 0/4 [00:00<?, ?it/s]

In [18]:
print(train_df_orig)

              Image_Label                                      EncodedPixels
0        0011165.jpg_Fish  264918 937 266318 937 267718 937 269118 937 27...
1      0011165.jpg_Flower  1355565 1002 1356965 1002 1358365 1002 1359765...
2      0011165.jpg_Gravel                                                NaN
3       0011165.jpg_Sugar                                                NaN
4        002be4f.jpg_Fish  233813 878 235213 878 236613 878 238010 881 23...
...                   ...                                                ...
22179   ffd6680.jpg_Sugar                                                NaN
22180    ffea4f4.jpg_Fish                                                NaN
22181  ffea4f4.jpg_Flower  1194860 675 1196260 675 1197660 675 1199060 67...
22182  ffea4f4.jpg_Gravel                                                NaN
22183   ffea4f4.jpg_Sugar                                                NaN

[22184 rows x 2 columns]


Data Generator

In [16]:
class Cloud_Dataset(Dataset):
    def __init__(self, 
                 images_list=None, 
                 folder_imgs=train_imgs_folder, 
                 batch_size=32, 
                 shuffle=True, 
                 augmentation=None,
                 resized_height=224, 
                 resized_width=224, 
                 num_channels=3):
        
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augmentation = augmentation
        if images_list is None:
            self.images_list = os.listdir(folder_imgs)
        else:
            self.images_list = deepcopy(images_list)
            self.folder_imgs = folder_imgs
        self.len = len(self.images_list) // self.batch_size
        self.resized_height = resized_height
        self.resized_width = resized_width
        self.num_channels = num_channels
        self.num_classes = 4
        self.is_test = not 'train' in folder_imgs
        if not shuffle and not self.is_test:
            self.labels = [img_2_ohe_vector[img] for img in self.images_list[:self.len*self.batch_size]]

    def __len__(self):
        return self.len
    
    def on_epoch_start(self):
        if self.shuffle:
            random.shuffle(self.images_list)
            
    def __getitem__(self, idx):
        current_batch = self.images_list[idx * self.batch_size: (idx + 1) * self.batch_size]
        X = np.empty((self.batch_size, self.resized_height, self.resized_width, self.num_channels))
        y = np.empty((self.batch_size, self.num_classes))

        for i, image_name in enumerate(current_batch):
            path = os.path.join(self.folder_imgs, image_name)
            img = cv2.resize(cv2.imread(path), (self.resized_height, self.resized_width)).astype(np.float32)
            if not self.augmentation is None:
                augmented = self.augmentation(image=img)
                img = augmented['image']
            X[i, :, :, :] = img/255.0
            if not self.is_test:
                y[i, :] = img_2_ohe_vector[image_name]
                return X, y

    def get_labels(self):
        if self.shuffle:
            images_current = self.images_list[:self.len*self.batch_size]
            labels = [img_2_ohe_vector[img] for img in images_current]
        else:
            labels = self.labels
        return np.array(labels)