In [1]:
import os
import csv
import face_recognition
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans
from scipy.spatial import distance
import pandas as pd
import random

def analyze_face(image_path):
    # Load the image
    image = face_recognition.load_image_file(image_path)
    
    # Find all facial features in all the faces in the image
    face_landmarks_list = face_recognition.face_landmarks(image)
    
    if len(face_landmarks_list) == 0:
        return "No face found in the image."
    
    # We'll work with the first face found
    face_landmarks = face_landmarks_list[0]
    
    # Get face encoding
    face_encodings = face_recognition.face_encodings(image)
    if len(face_encodings) == 0:
        return "No face encoding could be generated."
    face_encoding = face_encodings[0]
    
    # Calculate eye distance
    left_eye = np.mean(face_landmarks['left_eye'], axis=0)
    right_eye = np.mean(face_landmarks['right_eye'], axis=0)
    eye_distance = np.linalg.norm(left_eye - right_eye)
    
    # Estimate hair color
    top_of_forehead = face_landmarks['top_lip'][0]  # Using top lip as reference
    hair_sample = image[max(0, int(top_of_forehead[1]) - 50):int(top_of_forehead[1]), 
                        max(0, int(top_of_forehead[0]) - 25):min(image.shape[1], int(top_of_forehead[0]) + 25)]
    
    # Use K-means clustering to find the dominant color
    hair_sample_rgb = Image.fromarray(hair_sample).convert('RGB')
    hair_sample_array = np.array(hair_sample_rgb).reshape(-1, 3)
    if len(hair_sample_array) > 0:
        kmeans = KMeans(n_clusters=1, n_init=10)
        kmeans.fit(hair_sample_array)
        dominant_color = kmeans.cluster_centers_[0]
    else:
        dominant_color = np.array([0, 0, 0])  # Default to black if no sample available
    
    # Very basic hair color classification
    hair_colors = {
        'black': [0, 0, 0],
        'brown': [165, 42, 42],
        'blonde': [255, 215, 0],
        'red': [255, 0, 0],
        'gray': [128, 128, 128]
    }
    hair_color = min(hair_colors, key=lambda x: np.linalg.norm(np.array(hair_colors[x]) - dominant_color))
    
    # Estimate skin tone (using nose tip as reference)
    nose_tip = face_landmarks['nose_tip'][0]
    skin_sample = image[max(0, int(nose_tip[1])-10):min(image.shape[0], int(nose_tip[1])+10), 
                        max(0, int(nose_tip[0])-10):min(image.shape[1], int(nose_tip[0])+10)]
    skin_sample_rgb = Image.fromarray(skin_sample).convert('RGB')
    skin_sample_array = np.array(skin_sample_rgb).reshape(-1, 3)
    if len(skin_sample_array) > 0:
        kmeans.fit(skin_sample_array)
        skin_tone = kmeans.cluster_centers_[0]
    else:
        skin_tone = np.array([0, 0, 0])  # Default if no sample available

    # Estimate age (very rough approximation)
    # This is a placeholder and would need a proper machine learning model for accuracy
    wrinkle_areas = np.concatenate([face_landmarks['left_eye'], face_landmarks['right_eye'], face_landmarks['top_lip']])
    wrinkle_intensity = np.mean(image[wrinkle_areas[:, 1], wrinkle_areas[:, 0], 0])  # Using red channel

    # Check for receding hairline
    forehead_height = face_landmarks['left_eyebrow'][0][1] - top_of_forehead[1]
    receding_hairline = forehead_height > eye_distance * 0.8  # Arbitrary threshold

    # Determine face shape (very basic approximation)
    face_width = face_landmarks['chin'][16][0] - face_landmarks['chin'][0][0]
    face_height = face_landmarks['chin'][8][1] - top_of_forehead[1]
    face_ratio = face_height / face_width
    face_shape = 'oval' if 1.3 <= face_ratio <= 1.7 else 'round' if face_ratio < 1.3 else 'long'

    # Check for beard and mustache
    lower_lip = np.mean(face_landmarks['bottom_lip'], axis=0)
    chin = face_landmarks['chin'][8]
    potential_beard_area = image[int(lower_lip[1]):int(chin[1]), int(chin[0])-20:int(chin[0])+20]
    beard_intensity = np.mean(potential_beard_area)
    has_beard = beard_intensity < 100  # Arbitrary threshold, lower intensity suggests darker area (beard)

    mustache_area = image[int(face_landmarks['nose_tip'][0][1]):int(face_landmarks['top_lip'][0][1]), 
                          int(face_landmarks['nose_tip'][0][0])-20:int(face_landmarks['nose_tip'][0][0])+20]
    mustache_intensity = np.mean(mustache_area)
    has_mustache = mustache_intensity < 100  # Arbitrary threshold

    # Estimate gender (very basic approximation)
    # This is a placeholder and would need a proper machine learning model for accuracy
    jaw_width = face_landmarks['chin'][16][0] - face_landmarks['chin'][0][0]
    brow_to_jaw = face_landmarks['chin'][8][1] - face_landmarks['left_eyebrow'][0][1]
    gender_ratio = jaw_width / brow_to_jaw
    estimated_gender = 'male' if gender_ratio > 1.1 else 'female'  # Very rough approximation

    left_cheek = face_landmarks['left_eyebrow'][-1]
    right_cheek = face_landmarks['right_eyebrow'][-1]
    nose_tip = face_landmarks['nose_tip'][0]
    mouth_left = face_landmarks['top_lip'][0]
    mouth_right = face_landmarks['top_lip'][6]

    cheekbone_width = distance.euclidean(left_cheek, right_cheek)
    mouth_width = distance.euclidean(mouth_left, mouth_right)

    cheekbone_prominence = cheekbone_width / mouth_width

    high_cheekbones = cheekbone_prominence

    # Check for glasses
    eye_region = np.concatenate([face_landmarks['left_eye'], face_landmarks['right_eye']])
    eye_region_mean = np.mean(image[eye_region[:, 1], eye_region[:, 0]])
    has_glasses = eye_region_mean > 200  # Adjust threshold based on image brightness

    # Check for narrow eyes
    left_eye_width = distance.euclidean(face_landmarks['left_eye'][0], face_landmarks['left_eye'][3])
    right_eye_width = distance.euclidean(face_landmarks['right_eye'][0], face_landmarks['right_eye'][3])
    eye_width_ratio = (left_eye_width + right_eye_width) / (2 * eye_distance)
    narrow_eyes = eye_width_ratio

    # Check for baldness
    forehead = image[max(0, int(top_of_forehead[1]) - 50):int(top_of_forehead[1]), 
                     max(0, int(top_of_forehead[0]) - 25):min(image.shape[1], int(top_of_forehead[0]) + 25)]
    forehead_mean = np.mean(forehead)
    bald = forehead_mean > 200 and hair_color in ['gray', 'blonde']
    

    return {
        'eye_distance': float(eye_distance),
        'hair_color': hair_color,
        'skin_tone': skin_tone.tolist(),
        'receding_hairline': receding_hairline,
        'face_shape': face_shape,
        'has_beard': has_beard,
        'has_mustache': has_mustache,
        'estimated_gender': estimated_gender,
        'face_encoding': face_encoding.tolist(),
        'bald': bald,
        'narrow_eyes': narrow_eyes,
        'has_glasses': has_glasses,
        'high_cheekbones': high_cheekbones
    }

lfw_path = 'lfw-deepfunneled/lfw-deepfunneled'

# Get all person directories
person_dirs = [d for d in os.listdir(lfw_path) if os.path.isdir(os.path.join(lfw_path, d))]

# Randomly select 20 people
selected_people = random.sample(person_dirs, min(20, len(person_dirs)))

# Prepare a list to store all results
results = []

# Process the selected people
for person in selected_people:
    person_dir = os.path.join(lfw_path, person)
    image_files = [f for f in os.listdir(person_dir) if f.endswith('.jpg')]
    
    # If the person has multiple images, randomly select one
    if image_files:
        image_file = random.choice(image_files)
        image_path = os.path.join(person_dir, image_file)
        
        try:
            # Analyze the face
            analysis = analyze_face(image_path)
            
            if isinstance(analysis, dict):  # Check if analysis was successful
                # Add the person's name to the analysis results
                analysis['name'] = person
                results.append(analysis)
            else:
                print(f"Failed to analyze {image_path}: {analysis}")
        except Exception as e:
            print(f"Error processing {image_path}: {str(e)}")

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Reorder columns to have 'name' first
columns = ['name'] + [col for col in df.columns if col != 'name']
df = df[columns]

# Save to CSV
csv_path = 'facial_analysis_results.csv'
df.to_csv(csv_path, index=False)

print(f"Analysis complete. Results for 20 random people saved to {csv_path}")

Analysis complete. Results for 20 random people saved to facial_analysis_results_20_random.csv
