# Creating Dataset with YOLO-Segmentation Model


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install ultralytics==8.0.196 opencv-python-headless

Collecting ultralytics==8.0.196
  Downloading ultralytics-8.0.196-py3-none-any.whl (631 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/631.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.3/631.1 kB[0m [31m6.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m624.6/631.1 kB[0m [31m9.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m631.1/631.1 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics==8.0.196)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics==8.0.196)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics==8.0.196)
  Using cached nvidia_

In [None]:
!pip install focal-loss segmentation-models-pytorch

Collecting focal-loss
  Downloading focal_loss-0.0.7-py3-none-any.whl (19 kB)
Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.3.3-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting timm==0.9.2 (from segmentation-models-pytorch)
  Downloading timm-0.9.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m24.0 MB/s

The Following Code is for creating Dataset

In [None]:
# Upload the dataset with folder name input_images not using imagemet cause of data storage limit in colab

import os
from ultralytics import YOLO
import cv2

# Directories
input_dir = '/content/input_images'
segmented_output_dir = '/content/segmented_images_with_middle_points'
original_output_dir = '/content/original_images_with_middle_points'

# Create output directories if they don't exist
os.makedirs(segmented_output_dir, exist_ok=True)
os.makedirs(original_output_dir, exist_ok=True)

# Load the YOLO model
model = YOLO('yolov8s-seg.pt')

# Process each image in the input directory
for filename in os.listdir(input_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(input_dir, filename)

        # Run inference
        results = model.predict(source=image_path, save=True)

        # Extract bounding boxes from results
        boxes = results[0].boxes

        # Load the segmented image saved by YOLO
        segmented_image_path = os.path.join('runs/segment/predict', filename)
        segmented_image = cv2.imread(segmented_image_path)

        # Load the original image
        original_image = cv2.imread(image_path)

        # Draw middle points on both images
        middle_points = []
        for box in boxes:
            # Extract coordinates and convert to integers
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            middle_point = ((x1 + x2) // 2, (y1 + y2) // 2)
            middle_points.append(middle_point)
            cv2.circle(segmented_image, middle_point, radius=5, color=(0, 255, 0), thickness=-1)  # Draw green dot on segmented image
            cv2.circle(original_image, middle_point, radius=5, color=(0, 255, 0), thickness=-1)   # Draw green dot on original image

        # Save the images with middle points
        segmented_output_path = os.path.join(segmented_output_dir, filename)
        original_output_path = os.path.join(original_output_dir, filename)
        cv2.imwrite(segmented_output_path, segmented_image)
        cv2.imwrite(original_output_path, original_image)

        # Optionally, print middle points coordinates
        print(f"Middle points for {filename}: {middle_points}")

print("Processing complete. Images saved to output directories.")


Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt to 'yolov8s-seg.pt'...
100%|██████████| 22.8M/22.8M [00:00<00:00, 171MB/s]

image 1/1 /content/input_images/2.jpg: 448x640 1 person, 2 ties, 1 couch, 1 book, 1077.0ms
Speed: 16.6ms preprocess, 1077.0ms inference, 53.3ms postprocess per image at shape (1, 3, 448, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 2.jpg: [(678, 516), (852, 476), (698, 693), (40, 859), (791, 510)]


image 1/1 /content/input_images/8.jpg: 448x640 6 persons, 858.6ms
Speed: 4.5ms preprocess, 858.6ms inference, 25.6ms postprocess per image at shape (1, 3, 448, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 8.jpg: [(940, 825), (270, 913), (1749, 823), (2005, 585), (657, 646), (2232, 1283)]


image 1/1 /content/input_images/7.jpg: 448x640 1 person, 1 tie, 877.8ms
Speed: 4.3ms preprocess, 877.8ms inference, 11.4ms postprocess per image at shape (1, 3, 448, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 7.jpg: [(585, 626), (376, 1024)]


image 1/1 /content/input_images/6.jpg: 384x640 1 person, 1 car, 493.5ms
Speed: 2.5ms preprocess, 493.5ms inference, 6.0ms postprocess per image at shape (1, 3, 384, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 6.jpg: [(631, 276), (839, 393)]


image 1/1 /content/input_images/4.jpg: 384x640 1 person, 1 tie, 453.7ms
Speed: 2.9ms preprocess, 453.7ms inference, 6.0ms postprocess per image at shape (1, 3, 384, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 4.jpg: [(882, 580), (906, 975)]


image 1/1 /content/input_images/10.jpg: 288x640 1 person, 1 tie, 393.0ms
Speed: 2.3ms preprocess, 393.0ms inference, 5.0ms postprocess per image at shape (1, 3, 288, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 10.jpg: [(682, 284), (664, 517)]


image 1/1 /content/input_images/5.jpg: 384x640 1 person, 1 bottle, 463.2ms
Speed: 2.8ms preprocess, 463.2ms inference, 6.3ms postprocess per image at shape (1, 3, 384, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 5.jpg: [(865, 328), (357, 600)]


image 1/1 /content/input_images/3.jpg: 320x640 2 persons, 2 ties, 426.2ms
Speed: 2.4ms preprocess, 426.2ms inference, 8.7ms postprocess per image at shape (1, 3, 320, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 3.jpg: [(950, 374), (355, 349), (459, 607), (969, 498)]


image 1/1 /content/input_images/9.jpg: 384x640 5 persons, 496.5ms
Speed: 2.7ms preprocess, 496.5ms inference, 12.2ms postprocess per image at shape (1, 3, 384, 640)
Results saved to [1mruns/segment/predict[0m



Middle points for 9.jpg: [(581, 586), (1455, 537), (954, 632), (840, 628), (1204, 946)]


image 1/1 /content/input_images/1.jpg: 448x640 1 person, 1 tie, 530.9ms
Speed: 2.9ms preprocess, 530.9ms inference, 6.6ms postprocess per image at shape (1, 3, 448, 640)
Results saved to [1mruns/segment/predict[0m


Middle points for 1.jpg: [(460, 336), (493, 637)]
Processing complete. Images saved to output directories.


# Semi-supervised Training

In [None]:
# Install necessary libraries
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from ultralytics import YOLO
import numpy as np
from torchvision import transforms
from focal_loss import SparseCategoricalFocalLoss
from PIL import Image

# Directories
labeled_input_dir = '/content/input_images'
labeled_output_dir = '/content/outputs'
unlabeled_input_dir = '/content/unlabled'

# Create output directories if they don't exist
os.makedirs(labeled_output_dir, exist_ok=True)

# Load the YOLO model
model = YOLO('yolov8s-seg.pt')

# Data transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]),
    transforms.Resize((640, 640)),
])

def pCE_loss(output, mask):
    print("Output shape:", output.shape)
    print("Mask shape:", mask.shape)

    mask = mask.unsqueeze(1)  # Add a channel dimension to the mask
    focal_loss = SparseCategoricalFocalLoss(gamma=2)
    focal_loss_value = focal_loss(output, mask)
    masked_focal_loss = focal_loss_value * mask
    pCE = torch.sum(masked_focal_loss) / torch.sum(mask)
    return pCE


def pCE_loss(output, mask):
    mask = mask.unsqueeze(1)  # Add a channel dimension to the mask

    # Extract the class predictions from YOLO's output
    y_pred = output.pred[0]['class'].argmax(dim=1, keepdim=True)

    focal_loss = focal_loss.SparseCategoricalFocalLoss(gamma=2)  # Instantiate focal loss function
    loss = focal_loss(y_true=y_pred, y_pred=output.pred[0]['class'], from_logits=True)  # Compute focal loss

    return loss


# Load labeled data
labeled_images = [os.path.join(labeled_input_dir, f) for f in os.listdir(labeled_input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
labeled_masks = [os.path.join(labeled_output_dir, f) for f in os.listdir(labeled_output_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

labeled_data = [(img, cv2.imread(mask, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255.0) for img, mask in zip(labeled_images, labeled_masks)]

# Load unlabeled data
unlabeled_images = [os.path.join(unlabeled_input_dir, f) for f in os.listdir(unlabeled_input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

# Training parameters
n_epochs = 1
batch_size = 2

# Initialize model, optimizer, and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Initial training on labeled data
for epoch in range(n_epochs):
    model.train(epochs=n_epochs)
    for image_path, mask in labeled_data:
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = transform(image).unsqueeze(0)
        mask = torch.tensor(mask).unsqueeze(0)
        mask = torch.tensor(mask).unsqueeze(0) / 255.0


        optimizer.zero_grad()
        outputs = model(image)
        loss = pCE_loss(outputs, mask)
        loss.backward()
        optimizer.step()

# Pseudo-labeling on unlabeled data
model.eval()
with torch.no_grad():
    for image_path in unlabeled_images:
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = transform(image).unsqueeze(0)

        outputs = model(image)
        _, pseudo_label = torch.max(outputs, 1)
        pseudo_label = pseudo_label.squeeze(0).cpu().numpy()

        segmented_output_path = os.path.join(labeled_output_dir, os.path.basename(image_path))
        cv2.imwrite(segmented_output_path, pseudo_label)

# Load updated pseudo-labeled data
pseudo_labeled_data = [(img, cv2.imread(os.path.join(labeled_output_dir, os.path.basename(img)), cv2.IMREAD_GRAYSCALE)) for img in unlabeled_images]
labeled_data += pseudo_labeled_data

# Retrain the model with combined dataset
for epoch in range(n_epochs):
    model.train()
    for image_path, mask in labeled_data:
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = transform(image).unsqueeze(0)
        mask = torch.tensor(mask).unsqueeze(0)

        optimizer.zero_grad()
        outputs = model(image)
        loss = pCE_loss(outputs, mask)
        loss.backward()
        optimizer.step()

print("Processing complete. Model trained with semi-supervised learning.")
