## Import Libraries

In [None]:
# Import standard libraries
import os
import random
import shutil
import pathlib

import pandas as pd
import numpy as np
from collections import defaultdict
import ace_tools as tools

# Import PIL for image processing
from PIL import Image

# Import matplotlib for plotting
import matplotlib.pyplot as plt

# Import TensorFlow and Keras libraries
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetV2B0, MobileNetV2

## Data Preparation

In [None]:
dataset_path = 'datasets/hair-type-dataset'
subfolders = [f.name for f in os.scandir(dataset_path) if f.is_dir()]
print("Subfolders (hair types):", subfolders)

Shows images and counts of each class in the dataset and the resolution of the images.

In [None]:
# Dictionary to store image counts and resolutions
image_info = {}

# Loop through each subfolder
for subfolder in subfolders:
    folder_path = os.path.join(dataset_path, subfolder)
    images_in_subfolder = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    # Initialize counters for the number of images and resolution frequencies
    num_images = len(images_in_subfolder)
    resolution_count = defaultdict(int)  # Default dictionary to count occurrences of each resolution

    # Process each image
    for image_file in images_in_subfolder:
        image_path = os.path.join(folder_path, image_file)
        try:
            with Image.open(image_path) as img:
                width, height = img.size
                resolution_count[(width, height)] += 1  # Increment the count for this resolution
        except Exception as e:
            print(f"Error opening image {image_file}: {e}")

    # Store count of images and their resolution frequencies
    image_info[subfolder] = {
        "num_images": num_images,
        "resolution_count": dict(resolution_count)  # Convert defaultdict to a regular dictionary
    }

# print the information for each subfolder
for subfolder, info in image_info.items():
    print(f"Subfolder: {subfolder}")
    print(f"Number of images: {info['num_images']}")
    print("Resolutions and counts:")
    for resolution, count in info['resolution_count'].items():
        print(f"  Resolution {resolution}: {count} image(s)")
    print()

In [None]:
# Create a dataframe to organize the results
rows = []
for subfolder, info in image_info.items():
    for resolution, count in info['resolution_count'].items():
        rows.append({
            'Subfolder': subfolder,
            'Resolution': f"{resolution[0]}x{resolution[1]}",
            'Image Count': count
        })

df = pd.DataFrame(rows)

# Display the dataframe
tools.display_dataframe_to_user(name="Image Resolution and Count", dataframe=df)