Mounting Colab with Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Loading necessary libraries

In [2]:
import os
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms

Preparing  the Dataset with Masks

In [3]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.image_files = os.listdir(image_dir)
        self.mask_files = os.listdir(mask_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_files[idx])
        mask_name = os.path.join(self.mask_dir, self.mask_files[idx])

        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Ensure masks are grayscale

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return {'image': image, 'mask': mask}

Transforming the Data

In [4]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

Specifying image and mask directories

In [5]:
image_dir = '/content/drive/MyDrive/images'
mask_dir = '/content/drive/MyDrive/masks'

Creating instance of SegmentationDataset

In [6]:
segmentation_dataset = SegmentationDataset(image_dir, mask_dir, transform=transform)

Segmentation Model Architecture (U-Net)

In [16]:
# Define the U-Net architecture for segmentation
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        # Define encoder layers
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # Define decoder layers
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 64, kernel_size=2, stride=2),  # Additional upsampling layer
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 1, kernel_size=1)
        )

    def forward(self, x):
        x = self.encoder(x)
        print("After encoder:", x.size())  # Print size after encoder
        x = self.decoder(x)
        print("After decoder:", x.size())  # Print size after decoder
        return x


Defining loss function and optimizer for segmentation

In [17]:
criterion_seg = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy Loss for segmentation
optimizer_seg = torch.optim.Adam(segmentation_model.parameters(), lr=0.001)

Spliting the data in test, val and training

In [18]:
train_size_seg = int(0.6 * len(segmentation_dataset))
val_size_seg = int(0.2 * len(segmentation_dataset))
test_size_seg = len(segmentation_dataset) - train_size_seg - val_size_seg

train_dataset_seg, val_dataset_seg, test_dataset_seg = random_split(segmentation_dataset,
                                                                    [train_size_seg, val_size_seg, test_size_seg])

# Defining batch size and create data loaders
batch_size_seg = 8
train_loader_seg = DataLoader(train_dataset_seg, batch_size=batch_size_seg, shuffle=True)
val_loader_seg = DataLoader(val_dataset_seg, batch_size=batch_size_seg, shuffle=False)
test_loader_seg = DataLoader(test_dataset_seg, batch_size=batch_size_seg, shuffle=False)

Training the model

In [20]:
num_epochs_seg = 10

# Training the segmentation model
for epoch in range(num_epochs_seg):
    segmentation_model.train()
    running_loss_seg = 0.0

    for batch_idx, batch_seg in enumerate(train_loader_seg):
        images_seg, masks_seg = batch_seg['image'], batch_seg['mask']

        optimizer_seg.zero_grad()

        outputs_seg = segmentation_model(images_seg)

      
        masks_seg_resized = nn.functional.interpolate(masks_seg, size=outputs_seg.shape[2:], mode='bilinear', align_corners=True)

        loss_seg = criterion_seg(outputs_seg, masks_seg_resized)
        loss_seg.backward()
        optimizer_seg.step()

        running_loss_seg += loss_seg.item() * images_seg.size(0)

    epoch_loss_seg = running_loss_seg / len(train_dataset_seg)
    print(f'Segmentation Epoch [{epoch + 1}/{num_epochs_seg}], Training Loss: {epoch_loss_seg:.4f}')


Segmentation Epoch [1/10], Training Loss: 0.1350
Segmentation Epoch [2/10], Training Loss: 0.0428
Segmentation Epoch [3/10], Training Loss: 0.0418
Segmentation Epoch [4/10], Training Loss: 0.0412
Segmentation Epoch [5/10], Training Loss: 0.0408
Segmentation Epoch [6/10], Training Loss: 0.0406
Segmentation Epoch [7/10], Training Loss: 0.0405
Segmentation Epoch [8/10], Training Loss: 0.0405
Segmentation Epoch [9/10], Training Loss: 0.0404
Segmentation Epoch [10/10], Training Loss: 0.0404


Evaluating the model

In [23]:
# Evaluating the segmentation model on the test set
segmentation_model.eval()
test_loss_seg = 0.0

for batch_idx, batch_seg in enumerate(test_loader_seg):
    images_seg, masks_seg = batch_seg['image'], batch_seg['mask']

    outputs_seg = segmentation_model(images_seg)
    outputs_seg_resized  = nn.functional.interpolate(outputs_seg, size=(256, 256), mode='bilinear', align_corners=False)
    loss_seg = criterion_seg(outputs_seg_resized, masks_seg)

    test_loss_seg += loss_seg.item() * images_seg.size(0)

test_loss_seg /= len(test_dataset_seg)
print(f'Segmentation Test Loss: {test_loss_seg:.4f}')

Segmentation Test Loss: 0.0414


Post Processing

In [25]:
import cv2
import numpy as np


def post_process_segmentation(output_masks):
    biometry_points = []

    for mask in output_masks:
       
        mask_np = mask.squeeze().cpu().numpy()

      
        contours, _ = cv2.findContours((mask_np * 255).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

       
        if contours:
            contour = max(contours, key=cv2.contourArea)
            ellipse = cv2.fitEllipse(contour)

            
            center = ellipse[0]
            top_point = (center[0], center[1] - ellipse[1][1] / 2)
            biometry_points.append((center, top_point))
        else:
            
            biometry_points.append((None, None))

    return biometry_points


output_masks = [torch.rand(1, 128, 128) > 0.5 for _ in range(8)]  # Example output masks
biometry_points = post_process_segmentation(output_masks)
print(biometry_points)


[((61.72146224975586, 61.57270050048828), (61.72146224975586, -9.981918334960938)), ((63.56726837158203, 62.992279052734375), (63.56726837158203, -8.581222534179688)), ((64.40462493896484, 64.58597564697266), (64.40462493896484, -7.562721252441406)), ((65.13460540771484, 62.57162857055664), (65.13460540771484, -10.819377899169922)), ((63.77444076538086, 62.554222106933594), (63.77444076538086, -9.828193664550781)), ((64.01751708984375, 63.684635162353516), (64.01751708984375, -7.604160308837891)), ((63.452945709228516, 64.12628173828125), (63.452945709228516, -6.493339538574219)), ((63.53681564331055, 64.16344451904297), (63.53681564331055, -12.480850219726562))]
