<a href="https://colab.research.google.com/github/vipin-jangra/face-age-estimation-CNN/blob/main/UAGDDataset_prepration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
from zipfile import ZipFile
import shutil
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#Unzipping the dataset file combined_faces.zip

faces_zip_path = "/content/drive/MyDrive/Dataset/Dataset2/uagd_dataset.zip";

with ZipFile(faces_zip_path, 'r') as myzip:
    myzip.extractall()
    print('Done unzipping faces.zip')

Done unzipping faces.zip


In [4]:
# Define age ranges
age_ranges = [(1, 2), (3, 9), (10, 20), (21, 27), (28, 45), (46, 65), (66, 116)]

In [5]:
def categorize_age(age):
    for range_name, (start, end) in enumerate(age_ranges):
        if start <= age <= end:
            return range_name
    return None

In [7]:
dataset_dir_train = '/content/uagd_dataset/train'
# Get the list of all image file paths

image_paths_train = [os.path.join(dataset_dir_train, fname) for fname in os.listdir(dataset_dir_train) if fname.endswith('.jpg')]
print(f"Images shape: {len(image_paths_train)}")

Images shape: 7851


In [9]:
dataset_dir_val = '/content/uagd_dataset/val'
# Get the list of all image file paths

image_paths_val = [os.path.join(dataset_dir_val, fname) for fname in os.listdir(dataset_dir_val) if fname.endswith('.jpg')]
print(f"Images shape: {len(image_paths_val)}")

Images shape: 4000


In [10]:
dataset_path_train = '/content/uagd_dataset/train'  # Change this to your dataset folder
# Extract age labels from filenames
ages_train = [int(fname.split('_')[1]) for fname in os.listdir(dataset_path_train) if fname.endswith('.jpg')]
print(f"Ages shape: {len(ages_train)}")

Ages shape: 7851


In [11]:
dataset_path_val = '/content/uagd_dataset/val'  # Change this to your dataset folder
# Extract age labels from filenames
ages_val = [int(fname.split('_')[1]) for fname in os.listdir(dataset_path_val) if fname.endswith('.jpg')]
print(f"Ages shape: {len(ages_val)}")

Ages shape: 4000


In [None]:
# Categorize ages into ranges
age_categories = [categorize_age(age) for age in ages]

In [12]:
import collections
# Function to print the distribution of age ranges
def print_age_distribution(labels, dataset_type):
    counter = collections.Counter(labels)
    print(f"{dataset_type} Age Distribution:")
    for age_range, count in counter.items():
        print(f"Age range {age_range}: {count} samples")

In [14]:


def preprocess_and_split_dataset(dataset_path, output_path, resize_dim=(224, 224), test_size=0.2):
    # Create output directories for training and testing
    train_dir = os.path.join(output_path, 'train')
    test_dir = os.path.join(output_path, 'test')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)



        # Print distribution for training set
    print_age_distribution(ages_train, "Training")

    # Print distribution for validation set
    print_age_distribution(ages_val, "Validation")

    # Save training images and create train CSV
    train_data = []
    for img_path, label in zip(image_paths_train, ages_train):
        try:
            # img = Image.open(img_path)  # Open the image
            # img = img.convert('L')  # Convert to grayscale
            # img = img.resize(resize_dim)  # Resize image
            # img.save(os.path.join(train_dir, os.path.basename(img_path)))  # Save image to train folder
            train_data.append({'filepath': os.path.join(train_dir, os.path.basename(img_path)), 'age': label})  # Full path
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    train_df = pd.DataFrame(train_data)
    train_df.to_csv(os.path.join(output_path, 'train_labels.csv'), index=False)

    # Save testing images and create test CSV
    test_data = []
    for img_path, label in zip(image_paths_val, ages_val):
        try:
            # img = Image.open(img_path)  # Open the image
            # img = img.convert('L')  # Convert to grayscale
            # img = img.resize(resize_dim)  # Resize image
            #img.save(os.path.join(test_dir, os.path.basename(img_path)))  # Save image to test folder
            test_data.append({'filepath': os.path.join(test_dir, os.path.basename(img_path)), 'age': label})  # Full path
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    test_df = pd.DataFrame(test_data)
    test_df.to_csv(os.path.join(output_path, 'test_labels.csv'), index=False)


    print(f"Dataset processed and split into '{train_dir}' and '{test_dir}' with labels saved as CSV.")




# Example usage
filepath = '/content/uagd_dataset'
output_path = '/content/UAGDDataset/UAGDDataset'   # Change this to your output folder
preprocess_and_split_dataset(filepath, output_path)


Training Age Distribution:
Age range 53: 100 samples
Age range 69: 100 samples
Age range 65: 100 samples
Age range 6: 89 samples
Age range 73: 100 samples
Age range 32: 100 samples
Age range 58: 100 samples
Age range 51: 100 samples
Age range 55: 100 samples
Age range 29: 100 samples
Age range 64: 100 samples
Age range 43: 100 samples
Age range 21: 100 samples
Age range 74: 80 samples
Age range 3: 100 samples
Age range 48: 100 samples
Age range 10: 100 samples
Age range 31: 100 samples
Age range 59: 100 samples
Age range 16: 100 samples
Age range 4: 100 samples
Age range 77: 85 samples
Age range 35: 100 samples
Age range 33: 100 samples
Age range 46: 100 samples
Age range 56: 100 samples
Age range 12: 100 samples
Age range 80: 100 samples
Age range 68: 100 samples
Age range 62: 100 samples
Age range 24: 100 samples
Age range 30: 100 samples
Age range 36: 100 samples
Age range 13: 100 samples
Age range 18: 100 samples
Age range 26: 100 samples
Age range 70: 100 samples
Age range 47: 100

In [15]:
import shutil

# Path to the output folder where 'train' and 'test' directories are created
output_path = '/content/UAGDDataset'  # Change this to your output folder path
shutil.make_archive(output_path, 'zip', output_path)

from google.colab import files
files.download(output_path + '.zip')  # Download the zip file


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>