<a href="https://colab.research.google.com/github/yevgenm/face_recognition/blob/main/facial_recognition_ai_python_with_comments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Preliminaries

In [1]:
%%capture
# Suppress the output of all commands in this cell.

%mkdir yearbook
# Create a 'yearbook' directory.

%cd yearbook
# Change current directory to 'yearbook'.

!pip install --upgrade --no-cache-dir gdown
# Install (or upgrades) 'gdown' library, to download files from Google Drive.

!gdown --id "1NHT8NN8ClBEnUC5VqkP3wr2KhyiIQzyU"
# Download a file from Google Drive using its ID.

!unzip PHfiles.zip
# Extract the contents of 'PHfiles.zip'.

%mkdir images
# Create an 'images' directory.

!pip install PyMuPDF
# Install PyMuPDF library for PDF processing.

!pip install dlib
# Install dlib library for image processing and face detection.

!pip install DeepFace
# Install DeepFace library for facial recognition and analysis.

import os, shutil, fitz, cv2, numpy as np, pandas as pd, dlib, tensorflow as tf
# Import various libraries for file handling, vision, and machine learning.

from os.path import dirname, join
# Import functions for working with file paths.

from deepface import DeepFace
# Import DeepFace for face analysis.

##PDF Conversion

In [None]:
path = r'./'
# Set the local 'path' variable to the current directory.

pdfs = [f for f in os.listdir(path) if f.endswith('.pdf')]
# Create a list of all PDF files in the current directory.

for pdf in pdfs:
# Loop through each PDF file in the list.

    os.chdir(os.path.join('./images'))
    # Change directory to the 'images' folder.

    os.mkdir((pdf.split(".")[0]))
    # Create a folder named after the PDF (without the extension).

    newdir = (os.path.join('./images/' + os.path.join(pdf.split(".")[0])))
    # Define the full path for the new folder.

    os.chdir("..")
    # Move back to the previous directory (one level higher).

    print("Now copying images into " + (newdir))
    # Print a message saying where images will be copied.

    shutil.copy(pdf, newdir)
    # Copy the current PDF to the newly created folder.

    os.chdir(newdir)
    # Change directory to the new folder.

    doc = fitz.open(pdf)
    # Open the PDF file using PyMuPDF (or 'fitz', when used from inside Python).

    for page in doc:
    # Loop through each page in the PDF.

        pix = page.get_pixmap()
        # Generate a pixmap (image) of the current page.

        pix.save("page-%i.png" % page.number)
        # Save the page as a PNG image with the page number in the file name.

    os.chdir(os.path.dirname(os.getcwd()))
    # Move back to the parent directory.

    os.chdir("..")
    # Move back one more directory.

##Object Detection and Facial Recognition: Code

In [None]:
path = r'./'
# Set the local 'path' variable to the current directory.

os.chdir(os.path.join(path + 'images'))
# Change current directory to 'images'.

dirs = os.listdir(path)
# List all directories and files in the base directory ('path').

for dir in dirs:
# Loop through each directory in the 'path'.

    os.chdir(os.path.join(path + dir))
    # Change current directory to the one being processed.

    pngs = [f for f in os.listdir(path) if f.endswith('.png')]
    # Create a list of all PNG files in the current directory.

    if not os.path.exists((dir) + ' faces'):
    # Check if 'faces' folder exists in the current directory.

        print("New 'faces' directory created in " + (dir) + " folder")
        # Print a message about creating a new 'faces' folder.

        os.makedirs((dir) + ' faces')
        # Create a 'faces' folder in the current directory.

        count = 0
        # Initialize a counter for face images.

        for png in pngs:
        # Loop through each PNG file.

            image = cv2.imread(png)
            # Read the image file.

            greyscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            # Convert the image to grayscale.

            face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
            # Load the Haar Cascade model for face detection.

            detected_faces = face_cascade.detectMultiScale(image=greyscale_image, scaleFactor=1.9, minNeighbors=4)
            # Detect faces in the grayscale image.

            count = 0
            # Reset the counter (for each directory).

            for (x, y, w, h) in detected_faces:
            # Loop through the coordinates of each detected face.

                try:
                # A statement to prevent code execution from stopping in case of errors.

                    xpadding = 20
                    # Set horizontal padding around the face.

                    ypadding = 40
                    # Set vertical padding around the face.

                    crop_face = image[y-ypadding : y+h+ypadding, x-xpadding : x+w+xpadding]
                    # Crop the face area with padding.

                    count += 1
                    # Increment the face counter.

                    face = cv2.rectangle(crop_face, (x, y), (x+w, y+h), (255, 0, 0), 2)
                    # Draw a rectangle around the detected face.

                    cv2.imwrite(path + (dir) + ' faces/' + str(count) + '_' + png, face)
                    # Save the cropped face image to the 'faces' folder.

                except (Exception):
                # Handle any errors that occur.

                    print("An error happened")
                    # Print an error message.

                    continue
                    # Skip to the next face if an error occurs.

            os.remove(os.path.join(path, png))
            # Remove the original PNG file after processing.

    os.chdir("..")
    # Move back to the parent directory after processing the current directory.


##Identify Smiles: Code

In [None]:
%cd ..
# Move to the parent directory.

number_smiles = 0
# Initialize the smile counter.

smile_counts = []
# Create a list to store smile percentages.

number_nonsmiles = 0
# Initialize the non-smile counter.

nonsmile_counts = []
# Create a list to store non-smile percentages.

num_errors = 0
# Initialize the error counter.

error_counts = []
# Create a list to store error percentages.

pngs = []
# Create a list to store paths of PNG files.

file_count = 0
# Initialize the counter for PNG files per directory.

file_count_list = []
# Create a list to store file counts for each directory.

years = ['1911', '1921', '1931', '1941', '1951', '1961']
# Define a list of years representing the dataset folders.

for year in years:
# Loop through each year in the dataset.

    path = r'./images' + '/' + year
    # Define the base path for each year.

    for root, dirs, files in os.walk(path):
    # Walk through the directory tree for the given year.

        for dir in dirs:
        # Loop through each subdirectory.

            path = path + '/' + (year + ' faces')
            # Append the 'faces' directory path for the current year.

            if (file_count != 0):
            # Check if any files have already been found.

                file_count_list.append(file_count)
                # Store the count of PNG files for this directory.

            file_count = 0
            # Reset the file count for the next directory.

            for f in os.listdir(path):
            # Loop through each file in the directory.

                if f.endswith('.png'):
                # Check if the file is a PNG.

                    pngs.append(path + '/' + f)
                    # Add the file path to the PNG list.

                    file_count = file_count + 1
                    # Increment the file counter.

file_count_list.append(file_count)
# Append the final file count after processing all directories.

total_loops = 0
# Initialize the loop counter for processed images.

count = 0
# Initialize the counter for files in the current year.

iterator = 0
# Initialize an iterator for file count list.

for png in pngs:
# Loop through each PNG file in the list.

    try:
        total_loops = total_loops + 1
        # Increment the loop counter.

        count = count + 1
        # Increment the file counter for the current directory.

        if (count != (file_count_list[iterator] + 1)):
        # Check if the count matches the expected number of files in the current directory.

            demography = DeepFace.analyze(png, actions=['emotion'])
            # Analyze the PNG file for emotions using DeepFace.

            print(demography)
            # Print the analysis results.

            if(demography[0]['dominant_emotion'] == 'happy'):
            # Check if the dominant emotion is happiness.

                number_smiles = number_smiles + 1
                # Increment the smile counter.

            else:
                number_nonsmiles = number_nonsmiles + 1
                # Increment the non-smile counter.

        else:
            count = count - 1
            # Adjust count for the current directory.

            smile_counts.append(number_smiles / count)
            # Append the smile percentage for the directory.

            nonsmile_counts.append(number_nonsmiles / count)
            # Append the non-smile percentage for the directory.

            error_counts.append(num_errors / count)
            # Append the error weight for the directory.

            number_smiles = 0
            # Reset the smile counter for the next directory.

            number_nonsmiles = 0
            # Reset the non-smile counter for the next directory.

            num_errors = 0
            # Reset the error counter for the next directory.

            iterator = iterator + 1
            # Move to the next directory in the iterator.

            count = 0
            # Reset the count for the next directory.

    except (Exception):
    # Handle exceptions that occur during analysis.

        num_errors = num_errors + 1
        # Increment the error counter.

        print("An error happened")
        # Print an error message.

        continue
        # Skip to the next PNG file.

smile_counts.append(number_smiles / count)
# Append the smile percentage for the last directory.

nonsmile_counts.append(number_nonsmiles / count)
# Append the non-smile percentage for the last directory.

error_counts.append(num_errors / count)
# Append the error weight for the last directory.

dict = {'Years': years,
        'Smiles': smile_counts,
        'Non-Smiles': nonsmile_counts,
        "Error Weight": error_counts}
# Create a dictionary to store results for each year.

data = pd.DataFrame(dict)
# Create a DataFrame from the dictionary.

data.to_csv('YearbookOutput.csv', index=False)
# Save the results to a CSV file.

print(count)
# Print the count of the last processed directory.


##Download and results

In [5]:
from google.colab import files
# Import the module to handle file downloads in Google Colab.

files.download('YearbookOutput.csv')
# Download the CSV file to your local system.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>