In [None]:
import os
import shutil
import re

def extract_leading_number(filename):
    """Extract leading number from the filename before the first underscore."""
    match = re.match(r"(\d+)", filename)
    return match.group(1) if match else None

def index_files_by_leading_number(directory):
    """Index files by leading numbers in a directory, supporting multiple matches."""
    index = {}
    for file in os.listdir(directory):
        if file.endswith(('.jpg', '.jpeg', '.png')):
            number = extract_leading_number(file)
            if number:
                index.setdefault(number, []).append(file)
    return index

def copy_split(original_base, enhanced_dir, output_base):
    """Copy matched files from enhanced_dir to output_dir based on original split."""
    for split in ['train', 'valid', 'test']:
        original_images_dir = os.path.join(original_base, split, 'images')
        output_images_dir = os.path.join(output_base, split, 'images')
        os.makedirs(output_images_dir, exist_ok=True)

        print(f"Processing {split} split...")

        
        enhanced_index = index_files_by_leading_number(enhanced_dir)

        for file in os.listdir(original_images_dir):
            number = extract_leading_number(file)
            if number and number in enhanced_index:
                for matched_file in enhanced_index[number]:
                    src_file = os.path.join(enhanced_dir, matched_file)
                    dst_file = os.path.join(output_images_dir, matched_file)
                    shutil.copy2(src_file, dst_file)
            else:
                print(f"[Warning] No match found for original file: {file}")


if __name__ == "__main__":
    original_dataset = "H:/Shared drives/Computer Vision for Energy Efficiency/Yaman/Models_Training/Yolov11_seg/Aug_Greyscale_Original/"
    enhanced_dataset = "H:/Shared drives/Computer Vision for Energy Efficiency/Yaman/Models_Training/Yolov11_seg/Temp/"
    output_dataset = "H:/Shared drives/Computer Vision for Energy Efficiency/Yaman/Models_Training/Yolov11_seg/Aug_Greyscale_Enhanced_x2_split"

    copy_split(original_dataset, enhanced_dataset, output_dataset)
    print("\n✅ Done. Enhanced dataset split created.")


Processing train split...
Processing valid split...
Processing test split...

✅ Done. Enhanced dataset split created.
