Imports 

In [153]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split

from sklearn.model_selection import train_test_split
from skimage.transform import resize
from collections import OrderedDict
from tifffile import TiffFile
from pathlib import Path
import os
import numpy as np
import matplotlib.pyplot as plt

import albumentations as A
from albumentations.pytorch import ToTensorV2

from transformers import SegformerModel, SegformerConfig
from transformers import  SegformerForSemanticSegmentation

from torchvision import transforms
import torch
import copy 
import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchvision.transforms import ToTensor, ToPILImage
import matplotlib.pyplot as plt


On réécrit des fonctions utiles car les imports utilisent des parsers donc ça ne marche pas dans un Notebook.

In [154]:
class LandCoverData():
    """Class to represent the S2GLC Land Cover Dataset for the challenge,
    with useful metadata and statistics.
    """
    # image size of the images and label masks
    IMG_SIZE = 256
    # the images are RGB+NIR (4 channels)
    N_CHANNELS = 4
    # we have 9 classes + a 'no_data' class for pixels with no labels (absent in the dataset)
    N_CLASSES = 10
    CLASSES = [
        'no_data',
        'clouds',
        'artificial',
        'cultivated',
        'broadleaf',
        'coniferous',
        'herbaceous',
        'natural',
        'snow',
        'water'
    ]
    # classes to ignore because they are not relevant. "no_data" refers to pixels without
    # a proper class, but it is absent in the dataset; "clouds" class is not relevant, it
    # is not a proper land cover type and images and masks do not exactly match in time.
    IGNORED_CLASSES_IDX = [0, 1]

    # The training dataset contains 18491 images and masks
    # The test dataset contains 5043 images and masks
    TRAINSET_SIZE = 18491
    TESTSET_SIZE = 5043

    # for visualization of the masks: classes indices and RGB colors
    CLASSES_COLORPALETTE = {
        0: [0,0,0],
        1: [255,25,236],
        2: [215,25,28],
        3: [211,154,92],
        4: [33,115,55],
        5: [21,75,35],
        6: [118,209,93],
        7: [130,130,130],
        8: [255,255,255],
        9: [43,61,255]
        }
    CLASSES_COLORPALETTE = {c: np.asarray(color) for (c, color) in CLASSES_COLORPALETTE.items()}

    # statistics
    # the pixel class counts in the training set
    TRAIN_CLASS_COUNTS = np.array(
        [0, 20643, 60971025, 404760981, 277012377, 96473046, 333407133, 9775295, 1071, 29404605]
    )
    # the minimum and maximum value of image pixels in the training set
    TRAIN_PIXELS_MIN = 1
    TRAIN_PIXELS_MAX = 24356

In [155]:
def get_Y(mask2d):
  occurrences = np.bincount(mask2d.flatten(), minlength=10)
  Y = occurrences / np.sum(occurrences)
  return Y

In [156]:
def numpy_parse_image_mask(image_path):
    """Load an image and its segmentation mask as numpy arrays and returning a tuple
    Args:
        image_path : path to image
    Returns:
        (numpy.array[uint16], numpy.array[uint8]): the image and mask arrays
    """
    # image_path = Path(image_path)
    # get mask path from image path:
    # image should be in a images/<image_id>.tif subfolder, while the mask is at masks/<image_id>.tif
    mask_path = image_path.replace("images","masks")
    with TiffFile(image_path) as tifi, TiffFile(mask_path) as tifm:
        image = tifi.asarray()[:, :, :4] 
        mask = tifm.asarray()
        print(f"Image shape: {image.shape}")
    return image, mask

On modifie la classe LandscapeData pour pouvoir insérer de la data augmentation sous conditions. 

In [157]:
class LandscapeData(Dataset):

    def __init__(self, data_folder, transform=ToTensor(), transform_augm=None):
        self.data_folder = data_folder
        self.transform = transform
        self.transform_augm = transform_augm

        # Liste des noms de fichiers dans les dossiers
        image_files = os.listdir(os.path.join(data_folder, 'images'))

        # Utilisez numpy_parse_image_mask pour charger les images et les masques
        self.train_data = [numpy_parse_image_mask(os.path.join(data_folder, 'images', filename)) for filename in image_files]

    def __len__(self):
        return len(self.train_data)

    def __getitem__(self, idx):
        image, mask = self.train_data[idx]

        image = image.astype("float32")
        mask = mask.astype("int64")

        # Memoire seuils : 0.3 / 0.3 / 0.3 / 0.6
        seuil_per_water = 0.3
        seuil_per_city = 0.3
        seuil_per_natural = 0.3
        seuil_per_conif = 0.6

        Y = get_Y(mask) # Y[2] = 'artificial' ; Y[5] = 'coniferous' ; Y[7] = 'natural' ; Y[9] = 'water'

        if Y[2] > seuil_per_city or Y[9] > seuil_per_water or Y[5] > seuil_per_conif  or Y[7] > seuil_per_natural :
            # Augmentation de données
            augmented = self.transform_augm(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

            print("AUGM:Type(image) = ", type(image), " ; image.shape = ", image.shape)
            print("AUGM:Type(mask) = ", type(mask), " ; image.shape = ", mask.shape)

        else:
            # Pas d'augmentation, transformation simple sur l'image.
            image = self.transform(image=image)['image']
            mask = mask.astype("int64")
            mask = torch.tensor(mask, dtype=torch.int64) 
            mask = mask.squeeze()
            print("Type(image) = ", type(image), " ; image.shape = ", image.shape)
            print("Type(mask) = ", type(mask), " ; image.shape = ", mask.shape)

        return image, mask

Voici nos transformées : 

In [158]:
means =  [ 418.19976217,  703.34810956,  663.22678147, 3253.46844222]
stds =  [294.73191962, 351.31328415, 484.47475774, 793.73928079]

# Transformations 
data_transforms = {
    'train': A.Compose([
        A.Normalize(means, stds),
        ToTensorV2()
    ]),
    'train_augmentation': A.Compose([
        A.HorizontalFlip(p=0.7),
        A.VerticalFlip(p=0.7),
        A.RandomRotate90(p=0.7),
        A.Transpose(p=0.7),
        # A.Normalize(means, stds),
        ToTensorV2()
    ]),
    'test': A.Compose([
        A.Normalize(means, stds),
        ToTensorV2()
    ])
}

# On pourrait rajouter dans l'augmentation cette transformation élastique, un peu violente ... ? 
# A.OneOf([
# A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03, p=0.5),
# A.GridDistortion(p=0.5),
# A.OpticalDistortion(distort_limit=2, shift_limit=0.5, p=1)                  
# ], p=0.8)

On créé les datasets ```train```, ```validation```, ```test```

In [159]:
# ------------- DATASET & DATALOADER ----------- 

# Définir le chemin du dossier d'entraînement
data_folder = 'D:/my_git/landscape_data/dataset/small_dataset/'

full_dataset = LandscapeData(data_folder, transform=None)  

train_size = int(0.8 * len(full_dataset))
val_size = int(0.10 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

train_dataset.dataset.transform = data_transforms['train']
train_dataset.dataset.transform_augm = data_transforms['train_augmentation']
val_dataset.dataset.transform = data_transforms['test']
test_dataset.dataset.transform = data_transforms['test']

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Number of images in train and val sets
num_train_images = len(train_dataset)
num_val_images = len(val_dataset)
num_test_images = len(test_dataset)

print(f"Number of images in the training set: {num_train_images}")
print(f"Number of images in the validation set: {num_val_images}")
print(f"Number of images in the test set: {num_test_images}")

Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
Image shape: (256, 256, 4)
I

On lance un entrainement avec le *segformer*

In [160]:
def segformer(lr=0.0001):
    
    config = SegformerConfig(
        num_labels=10,
        num_channels=4,
        semantic_loss_ignore_index=0,
        patch_sizes = [3, 2, 2, 2],
        depths=[3, 4, 18, 3],
        hidden_sizes=[64, 128, 320, 512],
        decoder_hidden_size=768,
    )
    model_name ="SegformerMit-B3"

    # Charger les poids pré-entrainés si le chemin est spécifié
    model = SegformerForSemanticSegmentation(config)

    # define optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr)

    return model,optimizer,model_name

In [161]:
def train_model(model,model_name, optimizer,scheduler, num_epochs,data_loaders, patience=5):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    consecutive_epochs_no_improvement = 0
    best_model_wts = copy.deepcopy(model.state_dict())
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    dataset_sizes = {phase: len(data_loaders[phase].dataset) for phase in ['train', 'val']}

    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs - 1))
        print("-" * 10)

        running_loss = {'train': 0.0, 'val': 0.0}

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            for inputs, targets in data_loaders[phase]:
                pixel_values = inputs.to('cpu')
                labels = targets.to('cpu')
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(pixel_values=pixel_values, labels=labels)
                    loss, logits = outputs.loss, outputs.logits

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss[phase] += loss.item() * inputs.size(0)

            epoch_loss = running_loss[phase] / dataset_sizes[phase]

            print('{} Loss: {:.4f}'.format(phase, epoch_loss))
            if phase == 'val':
                scheduler.step(epoch_loss)  # Step the scheduler on validation loss


            if phase == 'train':
                train_losses.append(epoch_loss)
            else:
                val_losses.append(epoch_loss)

                # Check for early stopping
                if epoch_loss < best_val_loss:
                    best_val_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    consecutive_epochs_no_improvement = 0
                else:
                    consecutive_epochs_no_improvement += 1

        if consecutive_epochs_no_improvement >= patience:
            print(f'Early stopping after {patience} consecutive epochs without improvement.')
            break

    model.load_state_dict(best_model_wts)
    return train_losses, val_losses, model

In [162]:
model,optimizer,model_name=segformer(lr=0.0001)
data_loaders = {'train': train_loader, 'val': val_loader}

Num_epoch=200
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.1)

train_losses, val_losses , model  = train_model(model,model_name, optimizer,scheduler,  Num_epoch,data_loaders)

Epoch 0/199
----------
Type(image) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([4, 256, 256])
Type(mask) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([256, 256])
Type(image) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([4, 256, 256])
Type(mask) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([256, 256])
Type(image) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([4, 256, 256])
Type(mask) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([256, 256])
Type(image) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([4, 256, 256])
Type(mask) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([256, 256])
Type(image) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([4, 256, 256])
Type(mask) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([256, 256])
Type(image) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([4, 256, 256])
Type(mask) =  <class 'torch.Tensor'>  ; image.shape =  torch.Size([256, 256])
Type(image) =  <c

KeyboardInterrupt: 

Bonne nouvelle, ça tourne. Maintenant, est-ce que ça retourne bien le résultat escompté ? Oui, j'ai affiché les graphiques obtenus dans la fonction __getitem__ et on a bien la transformation sur l'image. 

# Affichage des transformées

In [None]:
import cv2
from matplotlib import pyplot as plt
import albumentations as A
from tifffile import TiffFile, TiffWriter


def show_image(image, display_min=0, display_max=2200, ax=None):

    if image.dtype == np.uint16:
        iscale = display_max - display_min
        scale = 255 / iscale
        byte_im = (image) * scale
        byte_im = (byte_im.clip(0, 255) + 0.5).astype(np.uint8)
        image = byte_im

    return image

def load_and_transform_image(image_path, transformation):

    with TiffFile(image_path) as tifi:
        image = tifi.asarray()[:,:,:3]

    print("Image shape = ", image.shape)
    transformed_image = transformation(image=image)['image']
    return image, transformed_image


# Chemin de l'image .tif
image_path = "D:/my_git/landscape_data/dataset/small_dataset/images/23.tif"
aug = A.Compose([
        A.HorizontalFlip(p=0.7),
        A.VerticalFlip(p=0.7),
        A.RandomRotate90(p=0.7),
        A.Transpose(p=0.7),
        A.OneOf([
        A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03, p=1),
        A.GridDistortion(p=1),
        A.OpticalDistortion(distort_limit=2, shift_limit=0.5, p=1)                  
        ], p=0.8)])

original_image, transformed_image = load_and_transform_image(image_path, aug)

imor = show_image(original_image)
imtr = show_image(transformed_image)
plt.figure()
plt.subplot(1,2,1), plt.imshow(imor)
plt.subplot(1,2,2), plt.imshow(imtr)
plt.show()