In [1]:
import os
import cv2
import mediapipe as mp
from tqdm import tqdm

In [7]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)
def convert_to_yolo_format(img_width, img_height, x_min, y_min, x_max, y_max):
    x_center = (x_min + x_max) / 2 / img_width
    y_center = (y_min + y_max) / 2 / img_height
    width = (x_max - x_min) / img_width
    height = (y_max - y_min) / img_height
    return x_center, y_center, width, height

dataset_path = r"C:/Users/fortn/OneDrive/Desktop/ISL_Text_conv/processed_dataset"
output_label_path = os.path.join(dataset_path, "labels")
for subset in ["train", "test", "val"]:
    subset_path = os.path.join(dataset_path, subset)
    label_output_path = os.path.join(output_label_path, subset)

    # Ensure output directory exists
    os.makedirs(label_output_path, exist_ok=True)

    # Get only class folders (0-9, A-Z)
    class_folders = sorted([f for f in os.listdir(subset_path) if os.path.isdir(os.path.join(subset_path, f))])

    for class_name in tqdm(class_folders, desc=f"Processing {subset}"):
        class_folder = os.path.join(subset_path, class_name)
        label_class_path = os.path.join(label_output_path, class_name)
        os.makedirs(label_class_path, exist_ok=True)

        # Process all images in the class folder
        for img_name in os.listdir(class_folder):
            if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):  # Skip non-image files
                continue

            img_path = os.path.join(class_folder, img_name)
            label_file_path = os.path.join(label_class_path, img_name.rsplit(".", 1)[0] + ".txt")

            # Read image
            image = cv2.imread(img_path)
            if image is None:
                continue  # Skip unreadable images

            img_height, img_width, _ = image.shape

            # Convert to RGB and process with MediaPipe
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)

            # If hand detected, get bounding box and write annotation
            if results.multi_hand_landmarks:
                x_min, y_min, x_max, y_max = img_width, img_height, 0, 0  # Reset bounding box

                for hand_landmarks in results.multi_hand_landmarks:
                    for lm in hand_landmarks.landmark:
                        x, y = int(lm.x * img_width), int(lm.y * img_height)
                        x_min, y_min, x_max, y_max = min(x_min, x), min(y_min, y), max(x_max, x), max(y_max, y)

                # Ensure valid bounding box (skip small detections)
                if x_max > x_min and y_max > y_min:
                    x_center, y_center, width, height = convert_to_yolo_format(img_width, img_height, x_min, y_min, x_max, y_max)

                    # Save annotation file
                    with open(label_file_path, "w") as f:
                        f.write(f"{class_name} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

print("✅ Annotation complete! YOLO labels saved in 'processed_dataset/labels/'")

Processing train: 100%|██████████| 35/35 [15:25<00:00, 26.45s/it]
Processing test: 100%|██████████| 35/35 [01:56<00:00,  3.34s/it]
Processing val: 100%|██████████| 35/35 [03:49<00:00,  6.56s/it]

✅ Annotation complete! YOLO labels saved in 'processed_dataset/labels/'





In [12]:
import os
import cv2
import random

# Define dataset paths
dataset_path = r"C:/Users/fortn/OneDrive/Desktop/ISL_Text_conv/processed_dataset"
labels_path = os.path.join(dataset_path, "labels")

# Choose which subset to visualize: "train", "test", or "val"
subset = "train"  # Change to "test" or "val" if needed

# Set paths
image_folder = os.path.join(dataset_path, subset)
label_folder = os.path.join(labels_path, subset)

# Get class folders (0-9, A-Z)
class_folders = sorted([f for f in os.listdir(image_folder) if os.path.isdir(os.path.join(image_folder, f))])

# Select a **random class folder**
selected_class = random.choice(class_folders)
class_image_folder = os.path.join(image_folder, selected_class)
class_label_folder = os.path.join(label_folder, selected_class)

# Select a **random image** from this class
image_files = [f for f in os.listdir(class_image_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
if not image_files:
    print(f"No images found in {class_image_folder}")
    exit()

selected_image = random.choice(image_files)
image_path = os.path.join(class_image_folder, selected_image)
label_path = os.path.join(class_label_folder, selected_image.rsplit(".", 1)[0] + ".txt")

# Load the image
image = cv2.imread(image_path)
if image is None:
    print(f"Failed to load image: {image_path}")
    exit()

img_height, img_width, _ = image.shape

# Read YOLO annotations
if os.path.exists(label_path):
    with open(label_path, "r") as f:
        lines = f.readlines()

    for line in lines:
        # Parse YOLO annotation (class_id x_center y_center width height)
        values = line.strip().split()
        class_id = values[0]
        x_center, y_center, width, height = map(float, values[1:])

        # Convert normalized YOLO values back to pixel coordinates
        x1 = int((x_center - width / 2) * img_width)
        y1 = int((y_center - height / 2) * img_height)
        x2 = int((x_center + width / 2) * img_width)
        y2 = int((y_center + height / 2) * img_height)

        # Draw bounding box on the image
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, f"Class {class_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

# Show the image with bounding box
cv2.imshow("YOLO Annotated Image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
