In [None]:
!pip install segmentation-models --quiet

In [None]:
! pip install --upgrade imgaug

# Importation des modules utiles

In [None]:
import numpy as np
import pandas as pd
import os
import albumentations as albu
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import segmentation_models as sm
import colorama
from colorama import Fore
from imgaug.augmentables.segmaps import SegmentationMapOnImage

from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 
from tensorflow.keras import backend as K
from tensorflow.keras.utils import Sequence

### Import des scripts de fonctions utiles

In [None]:
from clouds_graph_functions import visualize_image_mask_prediction
from clouds_utilities_functions import np_resize, build_masks
from clouds_utilities_functions import dice_coef, dice_loss, bce_dice_loss, dice_coef_class

# Définition du chemin des données

In [None]:
NUAGES_PATH = '/kaggle/input/understanding_cloud_organization/'

NUAGES_TRAIN_PATH = NUAGES_PATH + 'train_images/'

# Chargement du DataFrame et modification pour le DataGenerator

In [None]:
# Chargement du jeu de données d'entrainement
train_df = pd.read_csv(NUAGES_PATH + 'train.csv')

In [None]:
train_df['ImageId'] = train_df['Image_Label'].apply(lambda x: x.split('_')[0])
train_df['ClassId'] = train_df['Image_Label'].apply(lambda x: x.split('_')[1])
train_df['hasMask'] = ~ train_df['EncodedPixels'].isna()

In [None]:
print(train_df.shape)
train_df.head()

In [None]:
mask_count_df = train_df.groupby('ImageId').agg(np.sum).reset_index()
mask_count_df.sort_values('hasMask', ascending=False, inplace=True)

In [None]:
print(mask_count_df.shape)
mask_count_df.head()

## Parametres generaux

In [None]:
BATCH_SIZE = 6
HEIGHT = 320
WIDTH = 480
CHANNELS = 3 # toujours garder 3 channels car les modèles d'initialisation des poids ont été entrainé sur des images couleurs avec 3 canaux
COLOR_MODE=True # True pour images couleurs et False pour images en noir et blanc
NB_CLASSES = 4

# Data Generator pour image et masque

In [None]:
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, df, target_df=None, mode='fit',
                 base_path=NUAGES_TRAIN_PATH, batch_size=BATCH_SIZE, dim=(1400, 2100),
                 n_channels=CHANNELS, color_mode=True,reshape=None, augment=False,
                 n_classes=NB_CLASSES, random_state=222, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.df = df
        self.mode = mode
        self.base_path = base_path
        self.target_df = target_df
        self.list_IDs = list_IDs
        self.reshape = reshape
        self.n_channels = n_channels
        self.color_mode = color_mode
        self.augment = augment
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.random_state = random_state
        
        self.on_epoch_end()
        np.random.seed(self.random_state)

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_batch = [self.list_IDs[k] for k in indexes]
        
        X = self.__generate_X(list_IDs_batch)
        
        if self.mode == 'fit':
            y = self.__generate_y(list_IDs_batch)
            
            if self.augment:
                X, y = self.__augment_batch(X, y)
            
            return X, y
        
        elif self.mode == 'predict':
            return X

        else:
            raise AttributeError('The mode parameter should be set to "fit" or "predict".')

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.seed(self.random_state)
            np.random.shuffle(self.indexes)          

    def __generate_X(self, list_IDs_batch):
        'Generates data containing batch_size samples'
        # Initialization
        if self.reshape is None:
            X = np.empty((self.batch_size, *self.dim, self.n_channels))
        else:
            X = np.empty((self.batch_size, *self.reshape, self.n_channels))
        
        # Generate data
        for i, ID in enumerate(list_IDs_batch):
            im_name = self.df['ImageId'].iloc[ID]
            img_path = f"{self.base_path}{im_name}"
            
            if self.color_mode == True:
                img = self.__load_rgb(img_path)
                
                if self.reshape is not None:
                    img = np_resize(img, self.reshape)
            
            else:
                img = self.__load_grayscale(img_path)
                
                if self.reshape is not None:
                    img = np_resize(img, self.reshape)
                
                img = np.dstack((img,)*self.n_channels) # stack the black and white image in n channels
            
            # Store samples
            X[i,] = img

        return X

    def __generate_y(self, list_IDs_batch):
        if self.reshape is None:
            y = np.empty((self.batch_size, *self.dim, self.n_classes), dtype=int)
        else:
            y = np.empty((self.batch_size, *self.reshape, self.n_classes), dtype=int)
        
        for i, ID in enumerate(list_IDs_batch):
            im_name = self.df['ImageId'].iloc[ID]
            image_df = self.target_df[self.target_df['ImageId'] == im_name]
            
            rles = image_df['EncodedPixels'].values
            
            if self.reshape is not None:
                masks = build_masks(rles, input_shape=self.dim, reshape=self.reshape)
            else:
                masks = build_masks(rles, input_shape=self.dim)
            
            y[i, ] = masks

        return y
 
    def __load_grayscale(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = img.astype(np.float32) / 255.
        img = np.expand_dims(img, axis=-1)
        return img
    
    def __load_rgb(self, img_path):
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32) / 255.
        return img
    
    def __random_transform(self, img, masks):
        #augmentation random d'images
        composition = albu.Compose([albu.HorizontalFlip(),
                                    albu.VerticalFlip(),
                                    albu.Rotate(limit=20),
                                    albu.GridDistortion(),
                                    albu.ShiftScaleRotate(rotate_limit=45, shift_limit=0.15, scale_limit=0.15)])
        
        composed = composition(image=img, mask=masks)
        aug_img = composed['image']
        aug_masks = composed['mask']
        
        return aug_img, aug_masks  
    
    def __augment_batch(self, img_batch, masks_batch):
        # generation du batch d'augmentation de l'image et de son masque associé
        for i in range(img_batch.shape[0]):
            img_batch[i, ], masks_batch[i, ] = self.__random_transform(img_batch[i, ], masks_batch[i, ])
        
        return img_batch, masks_batch

# Training

## Séparation des données d'entrainement et de validation (par ID unique)

In [None]:
train_idx, val_idx = train_test_split(mask_count_df.index, random_state=69, test_size=0.2)

## Création des instances de la class DataGenerator

In [None]:
train_generator = DataGenerator(train_idx, 
                                df=mask_count_df,
                                target_df=train_df,
                                batch_size=BATCH_SIZE,
                                reshape=(HEIGHT, WIDTH),
                                augment=True,
                                n_channels=CHANNELS,
                                color_mode = COLOR_MODE,
                                n_classes=NB_CLASSES)

valid_generator = DataGenerator(val_idx, 
                                  df=mask_count_df,
                                  target_df=train_df,
                                  batch_size=BATCH_SIZE, 
                                  reshape=(HEIGHT, WIDTH),
                                  augment=False,
                                  n_channels=CHANNELS,
                                  color_mode = COLOR_MODE,
                                  n_classes=NB_CLASSES)

## Callbacks

In [None]:
# ModelCheckpoint callback : pour enregistrer les poids du modele.
checkpoint = ModelCheckpoint("./model.h5",
                             monitor='val_loss',
                             mode='min',
                             save_best_only=True,
                             save_weights_only=True)

early_stopping = EarlyStopping(monitor='val_loss',
                               mode='min',
                               min_delta = 0.01,
                               patience=5,
                               restore_best_weights=True,
                               verbose=1)

reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss',
                                         mode='min',
                                         episilon = 0.01,
                                         patience=3,
                                         factor=0.1,
                                         min_lr=1e-6,
                                         verbose=1)

# CSVLogger callback : pour enregistrer l'historique d'entrainement.
csv_logger = CSVLogger('./training.log')

## MODELE de segmentation

In [None]:
'''ATTENTION: pour le choix du modele si les images d'entrainement sont en noir et blanc,
les modèles et les poids obtenus avec des images en couleur ne sont pas compatibles''' 

BACKBONE = 'resnet50'

model = sm.FPN(BACKBONE, 
                classes=NB_CLASSES,
                input_shape=(HEIGHT, WIDTH, CHANNELS),
                encoder_weights='imagenet',
                activation='sigmoid',
                encoder_freeze=False)

model.compile(optimizer="adam", loss=bce_dice_loss, metrics=[dice_coef])

# Chargement des poids du modèle pré entrainé si existant

In [None]:
input_model_path = "../input/clouds-segmentation-fpn/model.h5"

try:
    model.load_weights(input_model_path)
    print("model.h5 pré entrainé chargé!")
except:
    print("model.h5 non pré entrainé, non chargé")
    pass

## Entrainement

In [None]:
EPOCHS = 10

model_info = model.fit_generator(generator=train_generator,
                                  validation_data=valid_generator,
                                  callbacks=[checkpoint, early_stopping, reduce_learning_rate, csv_logger],
                                  epochs=EPOCHS)

## Courbes des résultats de l'entrainement

In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(model_info.history['loss'])
plt.plot(model_info.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='right')

plt.subplot(122)
plt.plot(model_info.history['dice_coef'])
plt.plot(model_info.history['val_dice_coef'])
plt.title('Model dice_coef by epoch')
plt.ylabel('dice_coef')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='right')
plt.show()

# Chargement des poids du modèle après entrainement

In [None]:
output_model_path = "./model.h5"

try:
    try:
        # Si entrainement du modele, les nouveaux poids sont dans output
        model.load_weights(output_model_path)
        print("model.h5 en output chargé!")
    except:
        print("model.h5 non entrainé en output, non chargé")
        try:
            # Si modele pré entrainé en input
            model.load_weights(input_model_path)
            print("model.h5 pré entrainé chargé!")
        except:
            print("model.h5 non pré entrainé en input, non chargé")
            pass
except:
    print("aucun model.h5 chargé")
    pass

## Resultats sur plusieurs images du set de validation

In [None]:
Test_images = val_idx[0:100]

check_generator = DataGenerator(Test_images,
                                df=mask_count_df,
                                target_df=train_df,
                                shuffle=False,
                                reshape=(HEIGHT, WIDTH),
                                augment=False,
                                n_channels=CHANNELS,
                                color_mode = COLOR_MODE,
                                n_classes=NB_CLASSES,
                                batch_size=1)

batch_pred_masks = model.predict_generator(check_generator, 
                                            workers=1,
                                            verbose=1)

In [None]:
THRESHOLD = 0.5 # seuil à partir duquel on considère le masque prédit à 1

print(Fore.GREEN + 'Masque original')
print(Fore.YELLOW + 'Masque prédit')
print(Fore.BLUE + 'Fit du masque prédit avec le masque original')

for i in range(5):
    batch_pred_masks_thr = np.zeros(batch_pred_masks[i].shape).astype(np.uint8)
    batch_pred_masks_thr[batch_pred_masks[i] > THRESHOLD] = 1
    
    visualize_image_mask_prediction(check_generator.__getitem__(i)[0][0,:,:,:],
                                    check_generator.__getitem__(i)[1][0,:,:,:],
                                    batch_pred_masks_thr,
                                    Transparency=True)

# Calcul du dice moyen par type de nuage

In [None]:
# Calcul du dice moyen /class sur les images de validation

dice_fish=[]
dice_flower=[]
dice_gravel=[]
dice_sugar=[]

for i in range(100):
    batch_pred_masks_thr = np.zeros(batch_pred_masks[i].shape).astype(np.uint8)
    batch_pred_masks_thr[batch_pred_masks[i] > THRESHOLD] = 1
    dice_fish.append(dice_coef_class(check_generator.__getitem__(i)[1][0,:,:,:],batch_pred_masks_thr)[0])
    dice_flower.append(dice_coef_class(check_generator.__getitem__(i)[1][0,:,:,:],batch_pred_masks_thr)[1])
    dice_gravel.append(dice_coef_class(check_generator.__getitem__(i)[1][0,:,:,:],batch_pred_masks_thr)[2])
    dice_sugar.append(dice_coef_class(check_generator.__getitem__(i)[1][0,:,:,:],batch_pred_masks_thr)[3])

print("Moyen dice fish",np.mean(dice_fish))
print("Moyen dice flower",np.mean(dice_flower))
print("Moyen dice gravel",np.mean(dice_gravel))
print("Moyen dice sugar",np.mean(dice_sugar))

In [None]:
file_log = "./training.log"

log=pd.read_csv(file_log)
log.head()

plt.figure(figsize=(12,10))
plt.subplot(221)
plt.plot(log['loss'])
plt.plot(log['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='right')

plt.subplot(222)
plt.plot(log['dice_coef'])
plt.plot(log['val_dice_coef'])
plt.title('Model dice_coef by epoch')
plt.ylabel('dice_coef')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='right')
plt.show()