In [None]:
import os
import shutil
import matplotlib.pyplot as plt
from collections import Counter

# Paths
normalized_dir = 'your-normalized-dataset-path'
fine_tuning_dir = 'your-finetuning-set-path'
testing_dir = 'your-testing-set-path'
os.makedirs(fine_tuning_dir, exist_ok=True)
os.makedirs(testing_dir, exist_ok=True)

# Backgrounds to leave for testing
testing_backgrounds = ['25', '32', '34']  ## change the list to your subsets

# Create directories for fine-tuning and testing sets
os.makedirs(f'{fine_tuning_dir}', exist_ok=True)
os.makedirs(f'{fine_tuning_dir}', exist_ok=True)
os.makedirs(f'{testing_dir}', exist_ok=True)
os.makedirs(f'{testing_dir}', exist_ok=True)


# Function to move or copy files
def move_files(source, dest, backgrounds, mode='copy'):
    object_counts = Counter()
    fine_tuning_count = 0
    testing_count = 0
    for background in backgrounds:
        print("background = ", background)
        image_dir = os.path.join(source, background, )
        label_dir = os.path.join(source, background, )

        for file_name in os.listdir(image_dir):
            if file_name.endswith('.jpg'):
                image_path = os.path.join(image_dir, file_name)
                label_path = os.path.join(label_dir, file_name.replace('.jpg', '.txt'))
                if mode == 'finetune':
                    new_image_name = f'fine_tuning_{fine_tuning_count + 1:06d}.jpg'
                    new_annotation_name = f'fine_tuning_{fine_tuning_count + 1:06d}.txt'
                    fine_tuning_count += 1
                    if os.path.exists(label_path):
                        # Copy or move the images and labels
                        shutil.copy(image_path, os.path.join(dest, new_image_name))
                        shutil.copy(label_path, os.path.join(dest, new_annotation_name))

                elif mode == 'test':
                    new_image_name = f'testing_{testing_count + 1:06d}.jpg'
                    new_annotation_name = f'testing_{testing_count + 1:06d}.txt'
                    testing_count += 1
                    if os.path.exists(label_path):
                        # Copy or move the images and labels
                        shutil.copy(image_path, os.path.join(dest, new_image_name))
                        shutil.copy(label_path, os.path.join(dest, new_annotation_name))

                # Count objects from the annotation file
                with open(label_path, 'r') as f:
                    labels = f.readlines()
                    for idx, label in enumerate(labels):
                        if label.strip() != '0,0,0,0':  # Skip if no object is present
                            class_id = idx   # Map object to one of three classes (vehicles, male, female)
                            object_counts[class_id] += 1
    return object_counts

# Move files to new fine-tuning and testing sets
fine_tuning_backgrounds = [str(i).zfill(2) for i in range(1, 37) if str(i) not in testing_backgrounds]
fine_tuning_counts = move_files(normalized_dir, fine_tuning_dir, fine_tuning_backgrounds, mode='finetune')
testing_counts = move_files(normalized_dir, testing_dir, testing_backgrounds, mode='test')

# Plot distribution of objects
fig, ax = plt.subplots(1, 2, figsize=(12, 6))

# Fine-tuning set distribution
ax[0].bar(fine_tuning_counts.keys(), fine_tuning_counts.values(), color='blue')
ax[0].set_title('Object Distribution in Fine-Tuning Set')
ax[0].set_xlabel('Class ID')
ax[0].set_ylabel('Count')

# Testing set distribution
ax[1].bar(testing_counts.keys(), testing_counts.values(), color='green')
ax[1].set_title('Object Distribution in Testing Set')
ax[1].set_xlabel('Class ID')
ax[1].set_ylabel('Count')

plt.tight_layout()
plt.show()