Splitting.py


In [None]:
import os
import shutil

# Define paths
ORIGINAL_DATASET_PATH = "/Users/aayushjain/Downloads/sportsmot_publish/dataset"  # Update this with your dataset path
NEW_DATASET_PATH = "/Users/aayushjain/Downloads/sportsmot_publish/basketball_dataset"  # Where the basketball-only dataset will be stored

# Read the basketball sequences from basketball.txt
basketball_sequences = set()
with open("/Users/aayushjain/Downloads/sportsmot_publish/splits_txt/basketball.txt", "r") as f:  # Update the correct path
    for line in f:
        basketball_sequences.add(line.strip())  # Store sequence names

# Process each split (train, val, test)
for split in ["train", "val", "test"]:
    original_split_path = os.path.join(ORIGINAL_DATASET_PATH, split)
    new_split_path = os.path.join(NEW_DATASET_PATH, split)
    os.makedirs(new_split_path, exist_ok=True)  # Create new split folder

    # Copy only basketball sequences
    for seq in os.listdir(original_split_path):
        if seq in basketball_sequences:
            shutil.copytree(
                os.path.join(original_split_path, seq),
                os.path.join(new_split_path, seq)
            )

print("Basketball dataset created successfully!")


mot_to_yoloTXT.py


In [None]:
import os
import cv2

# Define dataset paths
DATASET_PATH = "/Users/aayushjain/Downloads/sportsmot_publish/basketball_dataset"
SPLITS = ["train", "val", "test"]

for split in SPLITS:
    data_path = os.path.join(DATASET_PATH, split)
    label_path = os.path.join(DATASET_PATH, "labels", split)
    os.makedirs(label_path, exist_ok=True)

    for seq in sorted(os.listdir(data_path)):  # Loop through all folders in train/val/test
        seq_path = os.path.join(data_path, seq)
        img_path = os.path.join(seq_path, "img1")
        ann_path = os.path.join(seq_path, "gt/gt.txt")

        if not os.path.exists(ann_path):
            print(f"Skipping {seq} (No annotation file found)")
            continue  # Skip if no annotations

        print(f"Processing: {seq}")

        with open(ann_path, "r") as f:
            lines = f.readlines()

        for line in lines:
            data = line.strip().split(',')
            frame_id, obj_id, x, y, w, h, _, class_id, _ = map(float, data)

            # Normalize bounding box values
            img_file = f"{int(frame_id):06d}.jpg"
            img_path_full = os.path.join(img_path, img_file)

            if not os.path.exists(img_path_full):
                continue  # Skip if image does not exist

            # Load image dimensions
            img = cv2.imread(img_path_full)
            img_height, img_width, _ = img.shape

            x_center = (x + w / 2) / img_width
            y_center = (y + h / 2) / img_height
            w /= img_width
            h /= img_height

            # YOLO format: class_id x_center y_center width height
            yolo_format = f"0 {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n"

            # Save the annotation in the corresponding text file
            seq_label_path = os.path.join(label_path, seq)
            os.makedirs(seq_label_path, exist_ok=True)

            txt_filename = f"{int(frame_id):06d}.txt"
            label_file = os.path.join(seq_label_path, txt_filename)

            with open(label_file, "a") as out_file:
                out_file.write(yolo_format)

print("✅ All MOT → YOLO conversions complete! Check 'labels/train' and 'labels/val'.")

Flattening.py


In [None]:
import os
import shutil

# Define dataset path
dataset_path = "/Users/aayushjain/Downloads/sportsmot_publish/basketball_dataset_final"
splits = ["train", "val", "test"]  # Add "test" if needed

for split in splits:
    split_path = os.path.join(dataset_path, split)
    new_img_dir = os.path.join(dataset_path, f"{split}_images")
    new_lbl_dir = os.path.join(dataset_path, f"{split}_labels")

    os.makedirs(new_img_dir, exist_ok=True)
    os.makedirs(new_lbl_dir, exist_ok=True)

    for seq_folder in os.listdir(split_path):
        if seq_folder.startswith('.'):  # Ignore hidden files like .DS_Store
            continue

        seq_path = os.path.join(split_path, seq_folder)
        img1_path = os.path.join(seq_path, "img1")
        label_folder = os.path.join(dataset_path, "labels", split, seq_folder)

        # Move & rename images
        if os.path.exists(img1_path):
            for img_file in os.listdir(img1_path):
                if img_file.startswith('.') or not img_file.endswith('.jpg'):  # Ignore hidden/system files
                    continue
                frame_id = img_file.split('.')[0]  # Extract frame number (e.g., 0001)
                new_name = f"{seq_folder}_{frame_id}.jpg"  # Rename with sequence
                shutil.move(os.path.join(img1_path, img_file), os.path.join(new_img_dir, new_name))

        # Move & rename labels
        if os.path.exists(label_folder):
            for lbl_file in os.listdir(label_folder):
                if lbl_file.startswith('.') or not lbl_file.endswith('.txt'):  # Ignore hidden/system files
                    continue
                frame_id = lbl_file.split('.')[0]  # Extract frame number
                new_name = f"{seq_folder}_{frame_id}.txt"  # Rename with sequence
                shutil.move(os.path.join(label_folder, lbl_file), os.path.join(new_lbl_dir, new_name))

print("✅ Renamed and moved all images & labels successfully!")