## Rename Images

In [None]:
import os
import glob

# Path to the directory containing the folders
directory_path = '/home/tuhin/Desktop/image_processing/images'

# Function to rename images in a folder
def rename_images(folder_path, folder_name):
    # List all image files in the folder
    image_files = glob.glob(os.path.join(folder_path, '*.png')) + glob.glob(os.path.join(folder_path, '*.jpg')) + glob.glob(os.path.join(folder_path, '*.jpeg'))

    # Iterate over each image file and rename
    for i, old_name in enumerate(image_files):
        # Get the file extension
        ext = os.path.splitext(old_name)[1]
        # Construct the new file name
        new_name = os.path.join(folder_path, f'{folder_name}_{i+1}{ext}')
        # Rename the file
        os.rename(old_name, new_name)
        print(f'Renamed: {old_name} -> {new_name}')

# Iterate over each folder
for folder_name in os.listdir(directory_path):
    folder_path = os.path.join(directory_path, folder_name)
    # Check if the path is a directory
    if os.path.isdir(folder_path):
        rename_images(folder_path, folder_name)

## Create Directory
#### /home/tuhin/Desktop/image_processing
Inside image_processing there are 5 folders images and labels.</br>
images => Here put all the images folders.</br>
labels => Here put all the labels folders.</br>
test => test images and labels (inside create two folders images and labels)</br>
train => train images and labels (inside create two folders images and labels)</br>
valid => valid images and labels (inside create two folders images and labels)</br>
<span style="color:red">Note:</span> before run the below codes place all the image and label folders inside images and labels and make sure test, train and valid are empty.

## Keep only 100 images from each directory <span style="color:red">Optional</span>

In [4]:
import os

# Path to the directory containing the folders
directory_path = '/home/tuhin/Desktop/image_processing/images'
image_limit = 100

# Get all directory names within the specified directory
folder_names = [folder for folder in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, folder))]

# Iterate over each folder
for folder_name in folder_names:
    folder_path = os.path.join(directory_path, folder_name)
    
    # Get a list of all files in the folder
    files = os.listdir(folder_path)
    
    # Filter only image files (assuming image extensions like jpg, png, etc.)
    image_files = [f for f in files if f.endswith(('.jpg', '.png', '.jpeg'))]
    
    # Sort the image files
    image_files.sort()
    
    # Keep only the first n images, delete the rest
    for image_file in image_files[image_limit:]:
        os.remove(os.path.join(folder_path, image_file))


## Update Label Indexing

In [14]:
import os

# Define the directory path
directory = '/home/tuhin/Desktop/image_processing/labels'

# List to store folder names
folder_names_list = []

# Variable to store the total number of labels
total_labels = 0

# Function to replace the first value in each line of a text file
def replace_first_value(file_path, replace_value):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    with open(file_path, 'w') as file:
        for line in lines:
            values = line.strip().split()
            if values:
                values[0] = replace_value
                new_line = ' '.join(values) + '\n'
                file.write(new_line)

# Iterate over each folder in the directory
for idx, foldername in enumerate(os.listdir(directory)):
    folder_path = os.path.join(directory, foldername)
    if os.path.isdir(folder_path):
        replace_value = str(idx)
        folder_names_list.append(foldername)
        print(f"Processing folder '{foldername}', replace_value: {replace_value}")
        total_labels += 1
        # Iterate over each file in the folder
        for filename in os.listdir(folder_path):
            if filename.endswith('.txt'):
                file_path = os.path.join(folder_path, filename)
                replace_first_value(file_path, replace_value)

# Print the list of folder names and the total number of labels
print()
print("List of folder names:", folder_names_list)
print("Total number of labels:", total_labels)

Processing folder 'pest_snail', replace_value: 0
Processing folder 'pest_weevil', replace_value: 1
List of folder names: ['pest_snail', 'pest_weevil']


## Spliting Dataset in 70%, 20% and 10%

In [13]:
import os
import random
import shutil

# Define directory paths
image_source_dir = '/home/tuhin/Desktop/image_processing/images'
label_source_dir = '/home/tuhin/Desktop/image_processing/labels'
train_dir = '/home/tuhin/Desktop/image_processing/train'
valid_dir = '/home/tuhin/Desktop/image_processing/valid'
test_dir = '/home/tuhin/Desktop/image_processing/test'

# Create train, validation, and test directories if they don't exist
for directory in [train_dir, valid_dir, test_dir]:
    os.makedirs(os.path.join(directory, 'images'), exist_ok=True)
    os.makedirs(os.path.join(directory, 'labels'), exist_ok=True)

# Get a list of all subdirectories in the image source directory
subdirectories = next(os.walk(image_source_dir))[1]

# Iterate over each subdirectory
for subdirectory in subdirectories:
    # Get list of image files in the current subdirectory
    image_files = os.listdir(os.path.join(image_source_dir, subdirectory))
    
    # Shuffle the list of image files
    random.seed(42)  # For reproducibility
    random.shuffle(image_files)
    
    # Calculate the number of files for train, validation, and test
    total_files = len(image_files)
    train_count = int(total_files * 0.7)
    valid_count = int(total_files * 0.2)
    test_count = total_files - train_count - valid_count
    
    # Split the list of image files into train, validation, and test sets
    train_files = image_files[:train_count]
    valid_files = image_files[train_count:train_count + valid_count]
    test_files = image_files[train_count + valid_count:]
    
    # Function to move files
    def move_files(files, source_dir, dest_dir):
        for file_name in files:
            # Move images
            source_image_path = os.path.join(source_dir, file_name)
            dest_image_path = os.path.join(dest_dir, 'images', file_name)
            shutil.move(source_image_path, dest_image_path)
            
            # Move corresponding labels
            label_file_name = os.path.splitext(file_name)[0] + '.txt'
            source_label_path = os.path.join(label_source_dir, subdirectory, label_file_name)
            dest_label_path = os.path.join(dest_dir, 'labels', label_file_name)
            shutil.move(source_label_path, dest_label_path)
    
    # Move files to train directory
    move_files(train_files, os.path.join(image_source_dir, subdirectory), train_dir)
    
    # Move files to validation directory
    move_files(valid_files, os.path.join(image_source_dir, subdirectory), valid_dir)
    
    # Move files to test directory
    move_files(test_files, os.path.join(image_source_dir, subdirectory), test_dir)

print("Dataset split successfully into train, validation, and test sets.")

Dataset split successfully into train, validation, and test sets.
