In [1]:
#install dataset

from roboflow import Roboflow
rf = Roboflow(api_key="k7jMaiVY9ij9FtCTHG0S")
project = rf.workspace("dentex").project("dentex-3xe7e")
version = project.version(2)
dataset = version.download("yolov8")

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in dentex-2 to yolov8:: 100%|██████████| 246751/246751 [00:11<00:00, 20618.58it/s]





Extracting Dataset Version Zip to dentex-2 in yolov8:: 100%|██████████| 2168/2168 [00:00<00:00, 6855.42it/s]


In [None]:
# data preprocessing

# enhance contrast
import cv2
import os
from pathlib import Path
from shutil import copy2

def apply_clahe_to_folder(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    img_paths = list(Path(input_dir).rglob("*.jpg")) + list(Path(input_dir).rglob("*.png"))
    count = 0

    for path in img_paths:
        img = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Skipping unreadable image: {path}")
            continue

        # apply CLAHE
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        img_clahe = clahe.apply(img)

        # construct new path
        relative_path = path.relative_to(input_dir)
        output_path = Path(output_dir) / relative_path
        output_path.parent.mkdir(parents=True, exist_ok=True)

        # save processed image
        cv2.imwrite(str(output_path), img_clahe)
        count += 1

    print(f"Processed {count} images from {input_dir} → {output_dir}")

def copy_labels(input_base, output_base):
    for split in ["train", "valid", "test"]:
        label_src = Path(input_base) / split / "labels"
        label_dst = Path(output_base) / split / "labels"
        label_dst.mkdir(parents=True, exist_ok=True)
        for file in label_src.glob("*.txt"):
            copy2(file, label_dst)

original_base = "dentex-2"
processed_base = "dentex-2-clahe"

# apply CLAHE to each split
for split in ["train", "valid", "test"]:
    input_imgs = os.path.join(original_base, split, "images")
    output_imgs = os.path.join(processed_base, split, "images")
    apply_clahe_to_folder(input_imgs, output_imgs)

# copy labels without modification
copy_labels(original_base, processed_base)


Processed 873 images from dentex-2/train/images → dentex-2-clahe/train/images
Processed 40 images from dentex-2/valid/images → dentex-2-clahe/valid/images
Processed 165 images from dentex-2/test/images → dentex-2-clahe/test/images


In [None]:
# data preprocessing

# jaw cropping

import cv2
import os
from pathlib import Path
from shutil import copy2

def crop_jaw_percent(input_dir, output_dir, percent=0.15):
    os.makedirs(output_dir, exist_ok=True)
    img_paths = list(Path(input_dir).rglob("*.jpg")) + list(Path(input_dir).rglob("*.png"))
    count = 0

    for path in img_paths:
        img = cv2.imread(str(path))
        if img is None:
            print(f"Skipping unreadable image: {path}")
            continue

        h, w = img.shape[:2]
        top = int(h * percent)
        bottom = int(h * (1 - percent))
        left = int(w * percent)
        right = int(w * (1 - percent))

        cropped_img = img[top:bottom, left:right]

        relative_path = path.relative_to(input_dir)
        output_path = Path(output_dir) / relative_path
        output_path.parent.mkdir(parents=True, exist_ok=True)

        cv2.imwrite(str(output_path), cropped_img)
        count += 1

    return count

def copy_labels(input_base, output_base):
    for split in ["train", "valid", "test"]:
        label_src = Path(input_base) / split / "labels"
        label_dst = Path(output_base) / split / "labels"
        label_dst.mkdir(parents=True, exist_ok=True)
        for file in label_src.glob("*.txt"):
            copy2(file, label_dst)

original_base = "dentex-2-clahe"
processed_base = "dentex-2-clahe-cropped"

# apply cropping
total_cropped = 0
for split in ["train", "valid", "test"]:
    input_imgs = os.path.join(original_base, split, "images")
    output_imgs = os.path.join(processed_base, split, "images")
    total_cropped += crop_jaw_percent(input_imgs, output_imgs, percent=0.15)

copy_labels(original_base, processed_base)


1078

In [1]:
import os
from pathlib import Path

def keep_only_class_0_labels(label_dir):
    label_paths = list(Path(label_dir).rglob("*.txt"))
    kept, total = 0, 0

    for label_path in label_paths:
        total += 1
        with open(label_path, 'r') as f:
            lines = f.readlines()

        # Keep only lines where the class ID is '0'
        lines_class_0 = [line for line in lines if line.strip().startswith('0 ')]

        # Overwrite only if class 0 exists
        if lines_class_0:
            with open(label_path, 'w') as f:
                f.writelines(lines_class_0)
            kept += 1
        else:
            # Optionally: remove label file if no class 0 exists
            os.remove(label_path)

    print(f"Cleaned {total} label files. {kept} files retained with class 0 only.")

# Example usage: run for train, valid, test sets
base_dir = "dentex-2-clahe-cropped"
for split in ["train", "valid", "test"]:
    label_dir = os.path.join(base_dir, split, "labels")
    keep_only_class_0_labels(label_dir)


Cleaned 873 label files. 775 files retained with class 0 only.
Cleaned 40 label files. 38 files retained with class 0 only.
Cleaned 165 label files. 144 files retained with class 0 only.
