In [None]:
!pip install -q ultralytics

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.2 MB[0m [31m46.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
import ultralytics

print("PyTorch version:", torch.__version__)
print("GPU available:", torch.cuda.is_available())

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
PyTorch version: 2.9.0+cu126
GPU available: True


In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

DATASET_DIR = "/content/drive/MyDrive/Sight_Challenge/finger_crossed/train"
IMAGES_DIR = os.path.join(DATASET_DIR, "images")
LABELS_DIR = os.path.join(DATASET_DIR, "labels")
os.chdir(DATASET_DIR)

print("Current folder:", os.getcwd())


Mounted at /content/drive
Current folder: /content/drive/MyDrive/Sight_Challenge/finger_crossed/train


In [None]:
# preparing the multilabel matrix
import numpy as np

num_classes = 8

image_paths = []
labels_matrix = []
for filename in sorted(os.listdir(IMAGES_DIR)):
    if not filename.lower().endswith((".jpg",".jpeg",".png")):
        continue

    img_path = os.path.join(IMAGES_DIR, filename)
    label_path = os.path.join(LABELS_DIR, filename.replace(".jpg", ".txt"))

    image_paths.append(img_path)

    multilabel = np.zeros(num_classes)
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            for line in f:
                cls = int(line.split()[0])
                multilabel[cls] = 1

    labels_matrix.append(multilabel)

labels_matrix = np.array(labels_matrix)

print("Total images:", len(image_paths))
print("Labels matrix shape:", labels_matrix.shape)

In [None]:
!pip install -q iterative-stratification

In [None]:
# Multilabel stratified splitting Step
import os
import numpy as np
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit


num_images = len(image_paths)
print("Total images found:", num_images)


train_size = 0.70
val_size   = 0.20
test_size  = 0.10

assert abs((train_size + val_size + test_size) - 1.0) < 1e-6, "Splits must sum to 1."

# 1st SPLIT: Train vs Temp (val+test together)

msss_train = MultilabelStratifiedShuffleSplit(
    n_splits=1,
    test_size=(1 - train_size),
    random_state=42
)

for train_idx, temp_idx in msss_train.split(image_paths, labels_matrix):
    pass

print("Train size:", len(train_idx))
print("Temp size :", len(temp_idx))

# 2sd SPLIT: Temp -> Val + Test

relative_test_size = test_size / (val_size + test_size)

msss_val_test = MultilabelStratifiedShuffleSplit(
    n_splits=1,
    test_size=relative_test_size,
    random_state=42
)

temp_labels = labels_matrix[temp_idx]

for val_idx, test_idx in msss_val_test.split(
        np.array(image_paths)[temp_idx], temp_labels):
    pass

val_idx  = temp_idx[val_idx]
test_idx = temp_idx[test_idx]


print("\nFINAL SPLIT SIZES:")
print("Train:", len(train_idx))
print("Val  :", len(val_idx))
print("Test :", len(test_idx))

# Checking class distribution balance

def class_counts(split_idx):
    subset = labels_matrix[split_idx]
    return subset.sum(axis=0)

print("\nCLASS DISTRIBUTION CHECK:")
print("Train class counts:", class_counts(train_idx))
print("Val   class counts:", class_counts(val_idx))
print("Test  class counts:", class_counts(test_idx))

Total images found: 1050
Train size: 734
Temp size : 316

FINAL SPLIT SIZES:
Train: 734
Val  : 210
Test : 106

CLASS DISTRIBUTION CHECK:
Train class counts: [        153          88         412         532         106         109         121         173]
Val   class counts: [         44          25         118         152          30          31          35          49]
Test  class counts: [         22          13          59          77          15          16          17          25]


In [None]:
#COPYING FILES TO DRIVE
import os
import shutil

FINAL_BASE = "/content/drive/MyDrive/Sight Challenge/new-change-final"

splits = ["train", "val", "test"]

# Create folder structure
for split in splits:
    os.makedirs(f"{FINAL_BASE}/{split}/images", exist_ok=True)
    os.makedirs(f"{FINAL_BASE}/{split}/labels", exist_ok=True)

def copy_file(src, dst):
    """Safely copy file if it exists."""
    if os.path.exists(src):
        shutil.copy(src, dst)

# COPYING FILES

def process_split(indices, split_name):
    for idx in indices:
        img_path = image_paths[idx]
        filename = os.path.basename(img_path)

        if not filename.lower().endswith(".jpg"):
            continue

        label_path = os.path.join(LABELS_DIR, filename.replace(".jpg", ".txt"))

        # Destination folders
        img_dst = f"{FINAL_BASE}/{split_name}/images/{filename}"
        label_dst = f"{FINAL_BASE}/{split_name}/labels/{filename.replace('.jpg', '.txt')}"

        # Copy files
        copy_file(img_path, img_dst)
        copy_file(label_path, label_dst)


process_split(train_idx, "train")
process_split(val_idx, "val")
process_split(test_idx, "test")

print("Dataset is copied into YOLO format!")


Augemnation

In [None]:
!pip install albumentations==1.4.3 opencv-python

Collecting albumentations==1.4.3
  Downloading albumentations-1.4.3-py3-none-any.whl.metadata (37 kB)
Downloading albumentations-1.4.3-py3-none-any.whl (137 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.0/137.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: albumentations
  Attempting uninstall: albumentations
    Found existing installation: albumentations 2.0.8
    Uninstalling albumentations-2.0.8:
      Successfully uninstalled albumentations-2.0.8
Successfully installed albumentations-1.4.3


In [None]:
# applying realistic marine DATA AUGMENTATIONS to the train datset
import os
import cv2
import random
import numpy as np
from tqdm import tqdm
import albumentations as A

IMG_DIR = "/content/drive/MyDrive/Sight_Challenge/last_try/train/images"
LBL_DIR = "/content/drive/MyDrive/Sight_Challenge/last_try/train/labels"

OUT_IMG = "/content/drive/MyDrive/Sight_Challenge/last_try/augmented/images"
OUT_LBL = "/content/drive/MyDrive/Sight_Challenge/last_try/augmented/labels"

os.makedirs(OUT_IMG, exist_ok=True)
os.makedirs(OUT_LBL, exist_ok=True)

def read_label(path):
    boxes = []
    if not os.path.exists(path):
        return boxes

    with open(path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue
            c, x, y, w, h = map(float, parts[:5])
            boxes.append([x, y, w, h, int(c)])
    return boxes

def clip_boxes(boxes):
    clipped = []
    for x, y, w, h, c in boxes:
        x_min = max(0, x - w/2)
        y_min = max(0, y - h/2)
        x_max = min(1, x + w/2)
        y_max = min(1, y + h/2)

        new_w = max(0, x_max - x_min)
        new_h = max(0, y_max - y_min)
        new_x = x_min + new_w/2
        new_y = y_min + new_h/2

        clipped.append([new_x, new_y, new_w, new_h, c])
    return clipped

rare_aug_pool = A.Compose([
    A.ColorJitter(brightness=0.17, contrast=0.17, saturation=0.17, hue=0.03, p=0.4),

    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),

    A.Rotate(limit=5, border_mode=cv2.BORDER_CONSTANT, p=0.35),

    A.HorizontalFlip(p=0.4),

    A.GaussianBlur(blur_limit=2, p=0.11),

    A.CLAHE(clip_limit=2, p=0.15),
],
    bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"])
)

common_aug_pool = A.Compose([
    A.ColorJitter(brightness=0.17, contrast=0.17, saturation=0.17, hue=0.03, p=0.4),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.Rotate(limit=3, border_mode=cv2.BORDER_CONSTANT, p=0.25),
    A.HorizontalFlip(p=0.3),
    A.GaussianBlur(blur_limit=2, p=0.11),
],
    bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"])
)

RARE_CLASSES = {1, 4, 5, 6}

image_files = [f for f in os.listdir(IMG_DIR) if f.endswith(".jpg")]

for img_name in tqdm(image_files):
    img_path = os.path.join(IMG_DIR, img_name)
    lbl_path = os.path.join(LBL_DIR, img_name.replace(".jpg", ".txt"))

    image = cv2.imread(img_path)
    if image is None:
        continue

    boxes = read_label(lbl_path)
    boxes = clip_boxes(boxes)

    if len(boxes) == 0:
        continue

    bboxes = [b[:4] for b in boxes]
    class_labels = [b[4] for b in boxes]

    contains_rare = any(c in RARE_CLASSES for c in class_labels)

    # realistic numbers
    num_augs = random.randint(2, 3) if contains_rare else random.randint(1, 2)

    for i in range(num_augs):
        pipeline = rare_aug_pool if contains_rare else common_aug_pool

        augmented = pipeline(image=image, bboxes=bboxes, class_labels=class_labels)

        aug_img = augmented["image"]
        aug_boxes = augmented["bboxes"]
        aug_labels = augmented["class_labels"]

        out_img_name = img_name.replace(".jpg", f"_aug{i}.jpg")
        out_lbl_name = img_name.replace(".jpg", f"_aug{i}.txt")

        cv2.imwrite(os.path.join(OUT_IMG, out_img_name), aug_img)

        with open(os.path.join(OUT_LBL, out_lbl_name), "w") as f:
            for bb, cls in zip(aug_boxes, aug_labels):
                x, y, w, h = bb
                f.write(f"{cls} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

print("DONE — realistic marine augmentations saved!")


100%|██████████| 734/734 [01:30<00:00,  8.12it/s]

DONE — realistic marine augmentations saved!





In [None]:
import os

BASE = "/content/drive/MyDrive/Sight_Challenge/last_try"

folders = [
    "train/images",
    "train/labels",
    "val/images",
    "val/labels",
    "augmented/images",
    "augmented/labels",
    "test/images",
    "test/labels"
]

for f in folders:
    path = os.path.join(BASE, f)
    count = len([x for x in os.listdir(path) if not x.startswith('.')])
    print(f"{f}: {count} files")

train/images: 734 files
train/labels: 734 files
val/images: 210 files
val/labels: 210 files
augmented/images: 1314 files
augmented/labels: 1314 files
test/images: 106 files
test/labels: 106 files


In [None]:
#All files copied into a final DATASET DIRECTORY
import os
import shutil

FINAL_DATASET = "/content/drive/MyDrive/Sight_Challenge/final_test"

TRAIN_IMAGES = os.path.join(FINAL_DATASET, "train/images")
TRAIN_LABELS = os.path.join(FINAL_DATASET, "train/labels")

VAL_IMAGES = os.path.join(FINAL_DATASET, "val/images")
VAL_LABELS = os.path.join(FINAL_DATASET, "val/labels")

TEST_IMAGES = os.path.join(FINAL_DATASET, "test/images")
TEST_LABELS = os.path.join(FINAL_DATASET, "test/labels")

# Create folders
for d in [TRAIN_IMAGES, TRAIN_LABELS, VAL_IMAGES, VAL_LABELS, TEST_IMAGES, TEST_LABELS]:
    os.makedirs(d, exist_ok=True)

print("Final dataset structure created!")


ORIG_TRAIN_IMAGES = "/content/drive/MyDrive/Sight_Challenge/last_try/train/images"
ORIG_TRAIN_LABELS = "/content/drive/MyDrive/Sight_Challenge/last_try/train/labels"

# AUGMENTED FOLDERS
AUG_IMAGES = "/content/drive/MyDrive/Sight_Challenge/last_try/augmented/images"
AUG_LABELS = "/content/drive/MyDrive/Sight_Challenge/last_try/augmented/labels"

# VAL + TEST ORIGINAL
ORIG_VAL_IMAGES = "/content/drive/MyDrive/Sight_Challenge/last_try/val/images"
ORIG_VAL_LABELS = "/content/drive/MyDrive/Sight_Challenge/last_try/val/labels"

ORIG_TEST_IMAGES = "/content/drive/MyDrive/Sight_Challenge/last_try/test/images"
ORIG_TEST_LABELS = "/content/drive/MyDrive/Sight_Challenge/last_try/test/labels"


def copy_all(src, dst):
    for f in os.listdir(src):
        shutil.copy(os.path.join(src, f), os.path.join(dst, f))

# Copy original train
copy_all(ORIG_TRAIN_IMAGES, TRAIN_IMAGES)
copy_all(ORIG_TRAIN_LABELS, TRAIN_LABELS)

# Copy augmented train
copy_all(AUG_IMAGES, TRAIN_IMAGES)
copy_all(AUG_LABELS, TRAIN_LABELS)

# Copy val
copy_all(ORIG_VAL_IMAGES, VAL_IMAGES)
copy_all(ORIG_VAL_LABELS, VAL_LABELS)

# Copy test
copy_all(ORIG_TEST_IMAGES, TEST_IMAGES)
copy_all(ORIG_TEST_LABELS, TEST_LABELS)

print("All files copied into /Sight_Challenge/final_test successfully!")


Final dataset structure created!
All files copied into /Sight_Challenge/final_test successfully!


In [None]:
import os

BASE = "/content/drive/MyDrive/Sight_Challenge/final_test"

folders = [
    "train/images",
    "train/labels",
    "val/images",
    "val/labels",
    "test/images",
    "test/labels"
]

for f in folders:
    path = os.path.join(BASE, f)
    count = len([x for x in os.listdir(path) if not x.startswith('.')])
    print(f"{f}: {count} files")


train/images: 2048 files
train/labels: 2048 files
val/images: 210 files
val/labels: 210 files
test/images: 106 files
test/labels: 106 files
