In [1]:
import os
import shutil

In [6]:
# Define paths for the datasets to merge
datasets = [
    'fight detection.v12i.yolov8',
    'gisc.v1i.yolov8',
    'violence detection.v2i.yolov8'
]

In [7]:
# Destination folder for the merged dataset
combined_dataset = 'dataset'
image_destination = os.path.join(combined_dataset, 'images')
label_destination = os.path.join(combined_dataset, 'labels')

In [10]:
# Create the combined dataset and its directories 'images and labels'
os.makedirs(combined_dataset,exist_ok=True)
os.makedirs(image_destination, exist_ok=True)
os.makedirs(label_destination, exist_ok=True)

In [11]:
# Function to copy files from source to destination
def merge_files(source_folder, destination_folder, file_type='images'):
    for root, _, files in os.walk(source_folder):
        for file in files:
            if file.endswith(('.jpg', '.png', '.txt')): 
                shutil.copy(os.path.join(root, file), destination_folder)


In [12]:
# Loop in datasets to count total numbers of images and labels in each file
for dataset in datasets:
    for split in ['train', 'test', 'valid']:
        image_folder = os.path.join(dataset, split, 'images')
        if os.path.exists(image_folder):
            merge_files(image_folder, image_destination, 'images')
        
        label_folder = os.path.join(dataset, split, 'labels')
        if os.path.exists(label_folder):
            merge_files(label_folder, label_destination, 'labels')
            
print(f"Files merged into {combined_dataset} successfully!")

Files merged into dataset successfully!


In [13]:
# Function to count files in a given folder
def count_files(folder, file_types):
    count = 0
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith(tuple(file_types)):
                count += 1
    return count

In [14]:
# Initialize counters for combined totals
total_image_count = 0
total_label_count = 0

# File types to check
image_types = ['.jpg', '.png']
label_types = ['.txt']

In [16]:
#Loop through each dataset and count files in train, test, and valid folders
for dataset in datasets:
    print(f"\nCounting files in dataset: {dataset}")
    
    dataset_image_count = 0
    dataset_label_count = 0

    for split in ['train', 'test', 'valid']:
        image_folder = os.path.join(dataset, split, 'images')
        label_folder = os.path.join(dataset, split, 'labels')
        
        if os.path.exists(image_folder):
            image_count = count_files(image_folder, image_types)
            dataset_image_count += image_count
            print(f" - {split.capitalize()} images: {image_count}")

        if os.path.exists(label_folder):
            label_count = count_files(label_folder, label_types)
            dataset_label_count += label_count
            print(f" - {split.capitalize()} labels: {label_count}")

    # Update combined totals
    total_image_count += dataset_image_count
    total_label_count += dataset_label_count

    # Print dataset totals
    print(f"Total images in {dataset}: {dataset_image_count}")
    print(f"Total labels in {dataset}: {dataset_label_count}")


Counting files in dataset: fight detection.v12i.yolov8
 - Train images: 213
 - Train labels: 213
 - Test images: 28
 - Test labels: 28
 - Valid images: 59
 - Valid labels: 59
Total images in fight detection.v12i.yolov8: 300
Total labels in fight detection.v12i.yolov8: 300

Counting files in dataset: gisc.v1i.yolov8
 - Train images: 636
 - Train labels: 636
 - Test images: 85
 - Test labels: 85
 - Valid images: 81
 - Valid labels: 81
Total images in gisc.v1i.yolov8: 802
Total labels in gisc.v1i.yolov8: 802

Counting files in dataset: violence detection.v2i.yolov8
 - Train images: 1089
 - Train labels: 1089
Total images in violence detection.v2i.yolov8: 1089
Total labels in violence detection.v2i.yolov8: 1089


In [17]:
# Print combined totals
print("\nCombined total across all datasets:")
print(f"Total images: {total_image_count}")
print(f"Total labels: {total_label_count}")


Combined total across all datasets:
Total images: 2191
Total labels: 2191
