In [12]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import csv
import math
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets, transforms
from tqdm import tqdm
import pytorch_lightning as pl
import glob

## First, we slice the train images into 31 x 31 pixels with the ground truth in the middle

In [13]:
def ndigit(n, x):
    x = str(x)
    while(len(x) < n):
        x = "0" + x
    return x

In [78]:
def load_data(res, files = 20):
    j = 0
    path = ["02", "train"]
    res = int((res-1)/2)
    
    for p in path:
        for f in range(files):
            image = np.load(f"images_{p}/images/image_{ndigit(3, f)}.npy")
            mask = np.load(f"masks_{p}/masks/mask_{ndigit(3, f)}.npy")
            image = np.reshape(image, (1024,1024,10))
            mask = np.reshape(mask, (1024,1024,1))

            # Add padding to every image (and mask) edge in case there are ground truths which are too close to an edge
            padded_image = np.pad(image, ((res+1, res+1), (res+1, res+1), (0,0)), mode='constant')
            padded_mask = np.pad(mask, ((res+1, res+1), (res+1, res+1), (0,0)), mode='constant')

            # Extract ground truths
            ground_truths_pos = np.array(np.where(padded_mask != 0)).T
            
            # Slice and save patches around each ground truth
            for i in ground_truths_pos: 
                patch = (padded_image[i[0]-res : i[0]+res+1, i[1]-res : i[1]+res+1, :], padded_mask[i[0], i[1], 0])
                np.save(f"patches/train/patch_{p}_{ndigit(3, f)}_{ndigit(5, j)}.npy", np.array(patch, dtype="object"))                                 
                j += 1

In [79]:
# Check number of ground truths
# pos = 0
# for i in range(20):
#     mask = np.load(f"masks_02/masks/mask_{ndigit(3, i)}.npy")
#     ground_truths_pos = np.array(np.where(mask != 0)).T
#     pos = pos + len(ground_truths_pos)
# print(pos)

In [80]:
res = 15
load_data(res)

## Then, we load the data and have a look

In [81]:
batch_size = 128

In [82]:
# Load patches
directory = 'patches/train'
file_paths = glob.glob(directory + '/*.npy')
trainset0 = [np.load(file_path, allow_pickle=True) for file_path in file_paths]
trainset = []
for pic in trainset0:
    trainset.append(pic)
len(trainset)

38863

In [97]:
trainset[0]

array([array([[[1968, 2409, 2409, ..., 2564, 2564, 3006],
               [3006, 3047, 3047, ..., 2631, 2631, 2165],
               [2165, 2307, 2307, ..., 2619, 2619, 2545],
               ...,
               [2543, 2499, 2499, ..., 2129, 2129, 2124],
               [2124, 2121, 2121, ..., 2239, 2239, 2223],
               [2223, 2232, 2232, ..., 2008, 2008, 2101]],

              [[2305, 2245, 2245, ..., 2123, 2123, 2160],
               [2160, 2176, 2176, ..., 2997, 2997, 2350],
               [2350, 2226, 2226, ..., 1983, 1983, 2088],
               ...,
               [2057, 2103, 2103, ..., 2527, 2527, 2737],
               [2737, 2847, 2847, ..., 2684, 2684, 2288],
               [2288, 2296, 2296, ..., 2282, 2282, 2629]],

              [[2355, 2573, 2573, ..., 2291, 2291, 2307],
               [2307, 2341, 2341, ..., 3254, 3254, 2674],
               [2674, 2364, 2364, ..., 2120, 2120, 1991],
               ...,
               [2243, 2289, 2289, ..., 2372, 2372, 2389],
        

In [84]:
def enrich_channels(trainset, veggie, moisture):
    #trainset[pic_no][0][h][w][channel] -> pixel value
    print(f"Shape vorher: Liste mit ({res},{res},10) Bildern")
    counter = 0
    trainset = trainset.copy()  # Make a copy of the trainset

    if veggie:
        pic_no = 0
        for pic in trainset:
            counter += 1
            pixel_values = pic[0]
            channel8 = pixel_values[:, :, 7]
            channel4 = pixel_values[:, :, 3]
            channels = pic[0].shape[2]
            width = pic[0].shape[0]
            height = pic[0].shape[1]

            vegetation_array = np.divide((np.subtract(channel8, channel4)), np.add(channel8, channel4))
            trainset_transformed = np.concatenate((trainset[pic_no][0], vegetation_array[:, :, np.newaxis]), axis=2)
            trainset[pic_no] = (trainset_transformed, trainset[pic_no][1])
            pic_no += 1

        print("Added Vegetation (B8-B4)/(B8+B4)")

    if moisture:
        pic_no = 0
        for pic in trainset:
            pixel_values = pic[0]
            channel8a = pixel_values[:, :, 7]
            channel11 = pixel_values[:, :, 8]
            channels = pic[0].shape[2]
            width = pic[0].shape[0]
            height = pic[0].shape[1]

            moisture_array = np.divide((np.subtract(channel8a, channel11)), np.add(channel8a, channel11))
            trainset_transformed = np.concatenate((trainset[pic_no][0], moisture_array[:, :, np.newaxis]), axis=2)
            trainset[pic_no] = (trainset_transformed, trainset[pic_no][1])
            pic_no += 1

        print("Added Moisture (B8A-B11)/(B8A+B11)")
        
    print("shape nachher", trainset[0][0].shape)  # Print the shape of the first item

    return trainset

In [85]:
trainset_enriched = enrich_channels(trainset, True, True)
len(trainset_enriched)
#trainset[pic_no][0][h][w][channel] -> pixel value
#trainset[pic_no][1] -> Ground truth 

Shape vorher: Liste mit (15,15,10) Bildern


  vegetation_array = np.divide((np.subtract(channel8, channel4)), np.add(channel8, channel4))


Added Vegetation (B8-B4)/(B8+B4)


  moisture_array = np.divide((np.subtract(channel8a, channel11)), np.add(channel8a, channel11))


Added Moisture (B8A-B11)/(B8A+B11)
shape nachher (15, 15, 12)


38863

In [87]:
X,y = trainset_enriched[0]
X.shape

(15, 15, 12)

In [124]:
class CustomDataset(Dataset):
    def __init__(self, trainset, transform, augmentations):
        self.trainset = trainset
        self.transform = transform
        self.augmentations = augmentations

    def __len__(self):
        return len(self.trainset)

    def __getitem__(self, index):
        data, target = self.trainset[index]

        # apply each transformation jointly to each input
        if self.transform:
            data = self.transform(data)

        # apply each augmentation separately to each input
        if self.augmentations:
            for augmentation in self.augmentations:
                data = augmentation(data)


        return data, target

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.ConvertImageDtype(torch.float64),
     transforms.Lambda(lambda x : x / 3000),
     transforms.Lambda(lambda x : torch.where(x > 1, 1, x)), # clip images between 0 and 1
     transforms.Normalize(mean=(0.5,)*12,
                          std=(0.5,)*12)
     ])

augmentations = [
     transforms.RandomRotation(360),
     transforms.RandomAffine(degrees=0, translate=(0.5,0.5)), # shift in both directions along 0.5 * height on y-axis and 0.5 * width on x-axis
     transforms.RandomAffine(0, scale=(10,45)), # scale in range 10 <= scale <= 45
     transforms.RandomAffine(0, shear=[10,30,10,30]), # shear on x- and y-axis between (10,30) 
     transforms.ElasticTransform(alpha=50.0, sigma=3.0) # displaces pixels
]

In [125]:
# Create the custom dataset
trainset_transformed = CustomDataset(trainset_enriched, transform=transform, augmentations=augmentations)

len(trainset_transformed)

38863

In [126]:
trainset_transformed[0]

(tensor([[[ 0.1355,  0.5291,  0.5447,  ...,  0.1558,  0.1412,  0.1429],
          [ 0.4109,  0.5447,  0.5447,  ...,  0.5447,  0.5447,  0.5447],
          [ 0.2089,  0.5447,  0.5447,  ...,  0.5447,  0.5447,  0.5447],
          ...,
          [ 0.5447,  0.5447,  0.5447,  ...,  0.5447,  0.5447,  0.4957],
          [ 0.2896,  0.3310,  0.4346,  ...,  0.5447,  0.5447,  0.5190],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.5447,  0.4898,  0.3490]],
 
         [[ 0.1546,  0.6035,  0.6213,  ...,  0.1778,  0.1611,  0.1631],
          [ 0.4688,  0.6213,  0.6213,  ...,  0.6213,  0.6213,  0.6213],
          [ 0.2383,  0.6213,  0.6213,  ...,  0.6213,  0.6213,  0.6213],
          ...,
          [ 0.6213,  0.6213,  0.6213,  ...,  0.6213,  0.6213,  0.5654],
          [ 0.3304,  0.3776,  0.4958,  ...,  0.6213,  0.6213,  0.5920],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.6213,  0.5587,  0.3981]],
 
         [[ 0.1546,  0.6035,  0.6213,  ...,  0.1778,  0.1611,  0.1631],
          [ 0.4688,  0.6213,

In [133]:
# Calculate the sizes of the training set and validation set
train_size = int(0.8 * len(trainset_transformed))
val_size = len(trainset_transformed) - train_size

# Split trainset into trainset and valset
trainset_load, valset_load = random_split(trainset_transformed, [train_size, val_size])
print(len(trainset_load), len(valset_load))

# Create data loaders for the training set and validation set
trainloader = DataLoader(trainset_load, batch_size=batch_size, shuffle=True, num_workers=4)
validloader = DataLoader(valset_load, batch_size=batch_size, shuffle=False, num_workers=4)


31090 7773


## Next, we define the model and train it

In [134]:
class MyCNNModel(pl.LightningModule): # New! def init(self, layers, lr=0.01, classes=None): super().init() # <- Very important! self.lr = lr self.classes = classes ## Build model self.layers = nn.Sequential(layers) # Create a sequential model

    def __init__(self, *layers, classes=None):
        super().__init__()

        self.lr = 0.01  # Assign the learning rate here
        self.classes = classes

        self.layers = nn.Sequential(*layers)  # Create a sequential model
        
    def forward(self, X):
        return self.layers(X)

    def predict(self, X):
        with torch.no_grad():
            y_hat = self(X).argmax(1)
        if self.classes is not None:
            y_hat = [self.classes[i] for i in y_hat]
        return y_hat

    def training_step(self, batch, batch_idx, log_prefix='train'):
        X, y = batch
        y_hat = self(X)
        loss_fn = nn.MSELoss()
        loss = loss_fn(y_hat, y)
        self.log(f"{log_prefix}_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        with torch.no_grad():
            return self.training_step(batch, batch_idx, log_prefix='valid')

    def configure_optimizers(self):
        # Adam with Weight Decay
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=0.01)

        # Simplest scheduler is ReduceLROnPlateau. This scheduler reduces the learning rate by 0.1
        # if the val_loss has not decreased within the last 10 epochs.
        scheduler = {
            # REQUIRED: The scheduler instance
            "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True),
            # The unit of the scheduler's step size, could also be 'step'.
            # 'epoch' updates the scheduler on epoch end whereas 'step'
            # updates it after a optimizer update.
            "interval": "epoch",
            # How many epochs/steps should pass between calls to
            # `scheduler.step()`. 1 corresponds to updating the learning
            # rate after every epoch/step.
            "frequency": 1,
            # Metric to to monitor for schedulers like `ReduceLROnPlateau`
            "monitor": "val_loss",
            # If set to `True`, will enforce that the value specified 'monitor'
            # is available when the scheduler is updated, thus stopping
            # training if not found. If set to `False`, it will only produce a warning
            "strict": True,
            # If using the `LearningRateMonitor` callback to monitor the
            # learning rate progress, this keyword can be used to specify
            # a custom logged name
            "name": None,
        }
        return {"optimizer": optimizer, 'lr-scheduler': scheduler}

## Implement model

In [135]:
# Implements entry to SepConv2d, see Lang et al. (2019), p. 6
class MyEntryLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.out_channels = out_channels

        self.proj_out = nn.Conv2d(in_channels, out_channels[len(out_channels)-1], (1,1))

        self.entry_blocks = nn.ModuleList()
        for i in range(len(out_channels)):
            self.entry_blocks.append(nn.Sequential(
                nn.Conv2d(in_channels, out_channels[i], (1, 1)),
                nn.BatchNorm2d(out_channels[i]),
                nn.ReLU()
            ))
            in_channels = out_channels[i]  # Update in_channels for next iteration

    def forward(self, x):
        x_entry = x
        for i in range(len(self.out_channels)):
            x_entry = self.entry_blocks[i](x_entry)
        x = self.proj_out(x)
        return (x + x_entry)

In [136]:
# Implements SepConv2D
class MySepConvLayer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel, **kwargs):
        super().__init__()
        if in_channels == out_channels:
            self.proj_out = nn.Identity()
        else:
            self.proj_out = nn.Conv2d(in_channels, out_channels, (1,1), **kwargs)

        self.sep_conv_block = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(in_channels, in_channels, kernel, groups=in_channels, **kwargs), # depthwise SepConv
            nn.Conv2d(in_channels, out_channels, (1,1), **kwargs), # pointwise SepConv
            nn.BatchNorm2d(out_channels)
        )
    
    def forward(self, x):
        x_sep_conv = self.sep_conv_block(x)
        x_sep_conv_2 = self.sep_conv_block(x_sep_conv) # performs second SepConv, see Lang et al. (2019), p. 6
        x = self.proj_out(x)
        return (x + x_sep_conv_2) # adds original input and sep_conv_2 output

In [137]:
tree_model = MyCNNModel(
    MyEntryLayer(12, [128, 256]), # increase number of channels to 512
    MySepConvLayer(256, 256, (3,3), padding='same'),
    MySepConvLayer(256, 256, (3,3), padding='same'),
    MySepConvLayer(256, 256, (3,3), padding='same'),
    nn.AdaptiveMaxPool2d(1),
    nn.Flatten(1),
    nn.Linear(256, 1)
)

In [139]:
# New, we need a trainer class
from pytorch_lightning.callbacks import RichProgressBar, RichModelSummary
trainer1 = pl.Trainer(devices=1, accelerator="cpu", precision='64', max_epochs=1,
                      callbacks=[RichProgressBar(refresh_rate=50),
                                 RichModelSummary(3),
                                ])

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [140]:
trainer1.fit(tree_model, trainloader, validloader)

  rank_zero_warn(


Output()

In [35]:
tree_model.eval()
tree_model = tree_model.float()
batch = next(iter(trainloader))
inputs = batch[0]
inputs = inputs.float()

print(inputs.shape)


with torch.no_grad():
    predictions = tree_model(inputs)


print("Predictions:", predictions)


torch.Size([128, 12, 15, 15])
forward: torch.Size([128, 12, 15, 15])
Predictions: tensor([[nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
   