In [2]:
import cv2
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def preprocess_and_augment(input_dir, output_dir, img_size=(128, 128), augmentations=5):
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Data augmentation generator
    datagen = ImageDataGenerator(
        rotation_range=30,        # Rotate images by up to 30 degrees
        width_shift_range=0.1,    # Shift width by 10%
        height_shift_range=0.1,   # Shift height by 10%
        zoom_range=0.2,           # Zoom in and out by 20%
        horizontal_flip=True,     # Flip images horizontally
        fill_mode='nearest'       # Fill missing pixels using nearest neighbors
    )

    # Iterate through each person's folder in the dataset
    for person_name in os.listdir(input_dir):
        person_folder = os.path.join(input_dir, person_name)
        output_person_folder = os.path.join(output_dir, person_name)

        if not os.path.isdir(person_folder):
            continue  # Skip files, only process directories

        # Create corresponding output directory for each person if it doesn't exist
        if not os.path.exists(output_person_folder):
            os.makedirs(output_person_folder)

        # Process each image in the person's folder
        for img_name in os.listdir(person_folder):
            img_path = os.path.join(person_folder, img_name)
            
            # Read the image
            image = cv2.imread(img_path)
            if image is None:
                print(f"Warning: Could not read image {img_path}")
                continue

            # Step 1: Resize the image
            image_resized = cv2.resize(image, img_size)

            # Step 2: Apply Bilateral Filter for noise reduction while preserving edges
            image_bilateral = cv2.bilateralFilter(image_resized, d=9, sigmaColor=75, sigmaSpace=75)

            # Step 3: Convert to grayscale for thresholding
            gray = cv2.cvtColor(image_bilateral, cv2.COLOR_BGR2GRAY)

            # Step 4: Apply simple thresholding for background removal
            _, thresholded = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

            # Apply the threshold mask to the original image
            foreground = cv2.bitwise_and(image_bilateral, image_bilateral, mask=thresholded)

            # Save the preprocessed image
            preprocessed_path = os.path.join(output_person_folder, f"processed_{img_name}")
            cv2.imwrite(preprocessed_path, foreground)
            print(f"Preprocessed and saved: {preprocessed_path}")

            # Step 5: Apply data augmentation
            # Reshape to (1, height, width, channels) for the augmentation generator
            foreground = np.expand_dims(foreground, axis=0)

            # Generate and save augmented images
            aug_iter = datagen.flow(foreground, batch_size=1)
            for i in range(augmentations):
                augmented_image = next(aug_iter)[0].astype(np.uint8)  # Extract augmented image
                aug_img_path = os.path.join(output_person_folder, f"aug_{i}_{img_name}")
                cv2.imwrite(aug_img_path, augmented_image)
                print(f"Augmented and saved: {aug_img_path}")

# Define input and output directories and call the function
input_directory = "Indian_actors_faces"          # Replace with the path to your dataset folder
output_directory = "augmented_dataset"  # Where augmented images will be saved
preprocess_and_augment(input_directory, output_directory, augmentations=2)

Preprocessed and saved: augmented_dataset\pankaj_kapur\processed_090288c011.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_0_090288c011.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_1_090288c011.jpg
Preprocessed and saved: augmented_dataset\pankaj_kapur\processed_0b6f381312.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_0_0b6f381312.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_1_0b6f381312.jpg
Preprocessed and saved: augmented_dataset\pankaj_kapur\processed_12fa72e5b3.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_0_12fa72e5b3.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_1_12fa72e5b3.jpg
Preprocessed and saved: augmented_dataset\pankaj_kapur\processed_1ab9ede00c.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_0_1ab9ede00c.jpg
Augmented and saved: augmented_dataset\pankaj_kapur\aug_1_1ab9ede00c.jpg
Preprocessed and saved: augmented_dataset\pankaj_kapur\processed_1c74bd375f.jpg
Augmented and sa