importing all packages

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
from sklearn.model_selection import train_test_split
from skimage.transform import resize

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from torchvision import transforms
import numpy as np
import os
from tqdm.notebook import tqdm


from torchvision.io.image import read_image
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
from torchvision.transforms.functional import to_pil_image

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

data loading

In [None]:

class CustomDataset(Dataset):
    def __init__(self, data_folder, img_size, image_files, transform=None):
        self.data_folder = data_folder
        self.img_size = img_size
        self.image_files = image_files
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.data_folder, 'img', self.image_files[idx])
        image = np.load(img_path, allow_pickle=True)

        # Load mask
        mask_path = os.path.join(self.data_folder, 'seg', self.image_files[idx])
        mask = np.load(mask_path, allow_pickle=True)

        # Resize image and mask
        image = resize(image, self.img_size, anti_aliasing=True)
        mask = resize(mask, self.img_size, anti_aliasing=False, order=0)

        # Convert image and mask to PyTorch tensors
        image = torch.from_numpy(image).permute(2, 0, 1).float()
        mask = torch.from_numpy(mask).long()
        
        
        if self.transform:
            image = self.transform(image)

        return (image, mask)


normalize = transforms.Normalize(mean=[0.4552, 0.4438, 0.4090], std=[0.2319, 0.2270, 0.2325])



transform = transforms.Compose([ 
    normalize  
])




train_folder = "/kaggle/input/kul-h02a5a-computer-vision-ga2-2024/train/"
img_size = (256, 256)

image_files = sorted(file for file in os.listdir(os.path.join(train_folder, 'img')) if not file.startswith('.'))


train_files, val_files = train_test_split(image_files, test_size=0.2, random_state=42)

train_dataset = CustomDataset(train_folder, img_size, train_files)
val_dataset = CustomDataset(train_folder, img_size, val_files)

full_dataset = CustomDataset(train_folder, img_size, image_files)

batch_size = 8

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

unet model

In [None]:
num_classes = 21
learning_rate =0.001
num_epochs = 100
img_size = (256, 256)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import torch
import torch.nn as nn

def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.BatchNorm2d(out_channels),  # Add batch normalization
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.BatchNorm2d(out_channels),  # Add batch normalization
        nn.ReLU(inplace=True)
    )   

class UNet(nn.Module):

    def __init__(self, num_classes):
        super().__init__()
                
        self.dconv_down1 = double_conv(3, 256)  # Increase the number of channels
        self.dconv_down2 = double_conv(256, 512)
        self.dconv_down3 = double_conv(512, 1024)
        self.dconv_down4 = double_conv(1024, 2048)        

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)        
        
        self.dconv_up3 = double_conv(1024 + 2048, 1024)
        self.dconv_up2 = double_conv(512 + 1024, 512)
        self.dconv_up1 = double_conv(256 + 512, 256)
        
        self.conv_last = nn.Conv2d(256, num_classes, 1)
        
        self.dropout = nn.Dropout(0.5)  # Add dropout for regularization
        
        
    def forward(self, x):
        conv1 = self.dconv_down1(x)
        x = self.maxpool(conv1)

        conv2 = self.dconv_down2(x)
        x = self.maxpool(conv2)
        
        conv3 = self.dconv_down3(x)
        x = self.maxpool(conv3)   
        
        x = self.dconv_down4(x)
        
        x = self.dropout(x)  
        
        x = self.upsample(x)        
        x = torch.cat([x, conv3], dim=1)
        
        x = self.dconv_up3(x)
        x = self.upsample(x)        
        x = torch.cat([x, conv2], dim=1)       

        x = self.dconv_up2(x)
        x = self.upsample(x)        
        x = torch.cat([x, conv1], dim=1)   
        
        x = self.dconv_up1(x)
        
        out = self.conv_last(x)
        
        return out

model = UNet(num_classes=21)


getting the class weights

In [None]:
import os

all_files = os.listdir('/kaggle/input/kul-h02a5a-computer-vision-ga2-2024/train/seg')

npy_files = [file for file in all_files if file.endswith('.npy')]

N = len(npy_files)

from collections import Counter
import numpy as np

# Initialize an empty list to store all labels
all_labels = []


for i in npy_files:
    # Load mask
    mask = np.load(f'/kaggle/input/kul-h02a5a-computer-vision-ga2-2024/train/seg/{i}')
    
    # Add labels to the list
    all_labels.extend(np.unique(mask))

# Now `all_labels` is a list of all your labels
counts = Counter(all_labels)
total_count = len(all_labels)

class_weights = {cls: total_count / count for cls, count in counts.items()}

# If you want to normalize the weights
norm_factor = sum(class_weights.values())
class_weights = {cls: weight / norm_factor for cls, weight in class_weights.items()}

# Convert the dictionary to a list
class_weights_list = [class_weights[i] for i in range(len(class_weights))]

# Convert the list to a tensor
weights = torch.tensor(class_weights_list)

# Move weights to the same device as your model
weights = weights.to(device)

initializing some stuff for training. The scheduler adapts the learning rate after 30 epochs so the model focuses more on learning small details instead of trying to learn large features. Weight decay is L2 regularization (I also used dropout in the unet itself, but maybe these can be increased. I suspect the model may be slightly overfitting again after increasing the model size.)

In [None]:
# Now use the weights in the criterion
criterion = nn.CrossEntropyLoss(weight=weights)

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.01, momentum=0.9) # weight decay is L2 regularization

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

training loop below (almost identical to Thibault's, just changed mine to fit the Unet and added the scheduler)

In [None]:
def compute_iou(outputs, masks):
    # Convert outputs to binary predictions
    predictions = torch.argmax(outputs, dim=1)
    
    # Compute intersection and union
    intersection = torch.sum(predictions & masks)
    union = torch.sum(predictions | masks)
    
    # Avoid division by zero
    iou = torch.true_divide(intersection, union + 1e-8)
    
    return iou

def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)  # Move model to the appropriate device
    
    train_losses = []  # List to store training losses
    val_losses = []    # List to store validation losses
    train_ious = []    # List to store training IoU
    val_ious = []      # List to store validation IoU

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_train_loss = 0.0
        running_train_iou = 0.0
        with tqdm(total=len(train_loader), desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch') as pbar:
            for images, masks in train_loader:
                # Move images and masks to device
                images = images.to(device)
                masks = masks.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                # outputs = model(images)['out']
                outputs = model(images)


                # Calculate loss
                loss = criterion(outputs, masks)
                
                # Compute IoU
                iou = compute_iou(outputs, masks)
                
                # Backward pass and optimize
                loss.backward()
                optimizer.step()

                # Update training loss and IoU
                running_train_loss += loss.item() * images.size(0)
                running_train_iou += iou.item() * images.size(0)
                pbar.set_postfix({'Training Loss': running_train_loss / ((pbar.n + 1) * train_loader.batch_size),
                                  'Training IoU': running_train_iou / ((pbar.n + 1) * train_loader.batch_size)})
                pbar.update()

        # Calculate average training loss and IoU
        epoch_train_loss = running_train_loss / len(train_loader.dataset)
        epoch_train_iou = running_train_iou / len(train_loader.dataset)
        train_losses.append(epoch_train_loss)  # Append to training loss history
        train_ious.append(epoch_train_iou)      # Append to training IoU history

        # Evaluate on validation set
        model.eval()  # Set model to evaluation mode
        running_val_loss = 0.0
        running_val_iou = 0.0
        with torch.no_grad(), tqdm(total=len(val_loader), desc=f'Validation', unit='batch') as pbar:
            for val_images, val_masks in val_loader:
                val_images = val_images.to(device)
                val_masks = val_masks.to(device)

                # val_outputs = model(val_images)['out']
                val_outputs = model(val_images)
                val_loss = criterion(val_outputs, val_masks)
                
                # Compute IoU
                val_iou = compute_iou(val_outputs, val_masks)

                running_val_loss += val_loss.item() * val_images.size(0)
                running_val_iou += val_iou.item() * val_images.size(0)
                pbar.set_postfix({'Validation Loss': running_val_loss / ((pbar.n + 1) * val_loader.batch_size),
                                  'Validation IoU': running_val_iou / ((pbar.n + 1) * val_loader.batch_size)})
                pbar.update()

        # Calculate average validation loss and IoU
        epoch_val_loss = running_val_loss / len(val_loader.dataset)
        epoch_val_iou = running_val_iou / len(val_loader.dataset)
        val_losses.append(epoch_val_loss)  # Append to validation loss history
        val_ious.append(epoch_val_iou)      # Append to validation IoU history
        scheduler.step() 
    return train_losses, val_losses, train_ious, val_ious



# Train the model
train_losses, val_losses, train_ious, val_ious = train(model, train_loader, val_loader, criterion, optimizer, num_epochs)


saving the model

In [None]:
torch.save(model.state_dict(), f'unet_adam_weighted_classes{learning_rate}_{num_epochs}epoch.pth')

quickly plotting the losses

In [None]:
plt.plot(train_losses)
plt.plot(val_losses)

importing test data

In [None]:
import re
img_size = (256,256)
class TestDataset(Dataset):
    def __init__(self, data_folder, img_size, image_files):
        self.data_folder = data_folder
        self.img_size = img_size
        self.image_files = sorted(image_files, key=lambda x: int(re.findall(r'\d+', x)[0])) 

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.data_folder, 'img', self.image_files[idx])
        image = np.load(img_path)

        # Resize image
        image = resize(image, self.img_size, anti_aliasing=True)

        # Convert image to PyTorch tensor
        image = torch.from_numpy(image).permute(2, 0, 1).float()

        return image

# Example usage

test_folder = "/kaggle/input/kul-h02a5a-computer-vision-ga2-2024/test"

# List all image files in the test folder
test_image_files = sorted(file for file in os.listdir(os.path.join(test_folder, 'img')) if not file.startswith('.'))

# Create TestDataset instance for the test set
test_dataset = TestDataset(test_folder, img_size, test_image_files)

batch_size = 1
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

creating function to test the model

In [None]:
def make_predictions(model, test_loader):
    device = next(model.parameters()).device  # Get the device of the model's parameters
    model.eval()  # Set model to evaluation mode
    predictions = []
    i=0
    with torch.no_grad():
        for images_batch in test_loader:
            images_batch = images_batch.to(device)  # Move input data to the same device as the model
            outputs = model(images_batch)
            predictions.append(outputs)
            i+=1
            print(i)
    return torch.cat(predictions)

In [None]:
test_predictions = make_predictions(model, test_loader)

plotting the predictions

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader

class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", 
               "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", 
               "sheep", "sofa", "train", "tvmonitor"]

def plot_test_predictions(test_dataset, test_predictions, num_examples=16):
    test_loader = DataLoader(test_dataset, batch_size=num_examples, shuffle=False)
    images_batch = next(iter(test_loader))
    fig, axes = plt.subplots(num_examples, 2, figsize=(10,50))
    for i in range(num_examples):
        image = images_batch[i].permute(1, 2, 0).cpu().numpy()
        prediction = test_predictions[i].argmax(dim=0).cpu().numpy()  # Convert logits to class predictions
        
        unique_classes = np.unique(prediction)[1:]
        print(unique_classes)
        class_labels = [class_names[c] for c in unique_classes]
        title = ', '.join(class_labels)
        
        axes[i, 0].imshow(image)
        axes[i, 0].axis('off')
        axes[i, 0].set_title(f'{title}')

        axes[i, 1].imshow(prediction, cmap='jet', vmin=0, vmax=prediction.max())
        axes[i, 1].axis('off')
        axes[i, 1].set_title('Predicted Mask')

    plt.tight_layout()
    plt.show()

# Plot some example predictions
plot_test_predictions(test_dataset, test_predictions)
