In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Base path to the dataset
base_path = '/content/drive/MyDrive/CubDatset'

# Paths to the required files and directories
images_file = os.path.join(base_path, 'images.txt')
image_dir = os.path.join(base_path, 'images')

# Create directories for training and testing images
train_dir = os.path.join(base_path, 'train')
test_dir = os.path.join(base_path, 'test')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Create a dictionary mapping from class name to a list of image filenames
class_to_images = {}
with open(images_file, 'r') as f:
    for line in f.readlines():
        image_id, image_filename = line.strip().split()
        class_name = image_filename.split('/')[0]
        if class_name not in class_to_images:
            class_to_images[class_name] = []
        class_to_images[class_name].append(image_filename)

# Limit the number of images per class to 5
for class_name in class_to_images:
    class_to_images[class_name] = class_to_images[class_name][:5]  # Keep only the first 5 images

# Split each class's images into 80% training and 20% testing, and copy them to their respective directories
for class_name, images in class_to_images.items():
    # Here, since we have 5 images, the split will typically be 4 training and 1 testing
    # Shuffle and split
    train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)

    # Function to copy images to the target directory
    def copy_images(images, target_dir):
        class_dir = os.path.join(target_dir, class_name)
        os.makedirs(class_dir, exist_ok=True)
        for image_filename in images:
            source_path = os.path.join(image_dir, image_filename)
            if os.path.exists(source_path):
                shutil.copy(source_path, os.path.join(class_dir, image_filename.split('/')[1]))
            else:
                print(f"Image {image_filename} not found in subset.")

    # Copy training and testing images
    copy_images(train_images, train_dir)
    copy_images(test_images, test_dir)

print("Dataset splitting into 80:20 complete.")


Dataset splitting into 80:20 complete.
