## Data Gathering

Import libraries

In [None]:
import opendatasets as od
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

Download datasets

In [None]:
od.download("https://www.kaggle.com/datasets/kavyasreeb/hair-type-dataset")

In [None]:
# Move the downloaded dataset to datasets folder
os.rename("hair-type-dataset", "datasets/hair-type-dataset")

## Data Visualization and Exploration

In [None]:
dataset_path = 'datasets/hair-type-dataset'

In [None]:
subfolders = [f.name for f in os.scandir(dataset_path) if f.is_dir()]
print("Subfolders (hair types):", subfolders)

In [None]:
# Count the number of images in each subfolder
image_counts = {}

for subfolder in subfolders:
    folder_path = os.path.join(dataset_path, subfolder)
    num_images = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])
    image_counts[subfolder] = num_images

In [None]:
# Create a DataFrame from the dictionary
df = pd.DataFrame(list(image_counts.items()), columns=['Hair Type', 'Image Count'])
df

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
sns.barplot(x='Hair Type', y='Image Count', data=df)
plt.title('Count of Images per Hair Type')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
plt.pie(df['Image Count'], labels=df['Hair Type'], autopct='%1.1f%%', startangle=140)
plt.title('Proportion of Images per Hair Type')

plt.tight_layout()
plt.show()

In [None]:
def display_images_from_folders(dataset_path, subfolders, num_images=5):
    fig, axs = plt.subplots(len(subfolders), num_images, figsize=(15, 3 * len(subfolders)))
    
    for i, subfolder in enumerate(subfolders):
        folder_path = os.path.join(dataset_path, subfolder)
        images = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
        
        for j in range(min(num_images, len(images))):
            img_path = os.path.join(folder_path, images[j])
            img = Image.open(img_path)
            axs[i, j].imshow(img)
            axs[i, j].axis('off')
            if j == 0:
                axs[i, j].set_title(subfolder, fontsize=12)
    
    plt.tight_layout()
    plt.show()

display_images_from_folders(dataset_path, subfolders, num_images=5)