In [3]:
from methods_data_collection import *

In [4]:
embedder = FN()

This notebook contains the generation of face embeddings for:
1. News images </br>
   a. NOS.nl </br>
   b. NU.nl </br>
2. Politician's images after manual correction of the folders in which the politician's faces were stored, such that a fodler that carries a politician's label has pictures of only that politician and that politician eexclusively

In [None]:
# Initialize the FaceNet model

# Define the path to the folder containing cropped face images
folder_path = 'datasets/images/isolated_news_faces_nos_test'

# Function to preprocess image
def preprocess_image(image_path):
    img = Image.open(image_path).convert('RGB')
    img = img.resize((160, 160))
    img_array = np.asarray(img)
    # Normalize pixel values to range [0, 1]
    img_array = img_array / 255.0
    return img_array

# Get list of image files
image_files = [f for f in os.listdir(folder_path) if f.endswith('.jpg') or f.endswith('.png')]

# Store embeddings in a list
embeddings = []

for image_file in image_files:
    image_path = os.path.join(folder_path, image_file)
    img = preprocess_image(image_path)
    # Expand dimensions to match the input shape for the model (1, 160, 160, 3)
    img = np.expand_dims(img, axis=0)
    # Calculate the embedding
    embedding = embedder.embeddings(img)
    embeddings.append(embedding)

# Convert list of embeddings to numpy array
embeddings = np.array(embeddings)

# Optionally, save embeddings to a file
np.save('face_embeddings.npy', embeddings)

print(f'Calculated embeddings for {len(embeddings)} images.')

In [3]:
X, y, z = [], [], []
empty_embeddings = []

In [4]:
data_folder = 'datasets/images/isolated_news_faces_nos_test'
dataset = 'NOS_test'

In [4]:
# Method to extract the face embedding of an image
def get_face_embedding(face_image):
    detections = embedder.extract(face_image, threshold=0.95)
    
    # Check if a face was found
    if len(detections) > 0:
        # Return the embedding of the found face
        return detections[0]['embedding']
    else: return None

    time.sleep(0.5)

In [None]:
for iso_news_photo in tqdm(os.listdir(data_folder), desc='Embedding, embedding, embedding on the wall...'):
    
    if not iso_news_photo.endswith('.jpg'):
        continue
    
    iso_news_photo_path = os.path.join(data_folder, iso_news_photo)

    if cv2.imread(iso_news_photo_path) is not None:
        face_embedding = get_face_embedding(cv2.imread(iso_news_photo_path))

        # Leave out None-embedded images
        if face_embedding is not None:
            X.append(face_embedding)
            y.append('news_faces_NOS_test')
            z.append(iso_news_photo)
        else:
            empty_embeddings.append(iso_news_photo)

In [5]:
import os
import cv2
from tqdm import tqdm

def process_images(data_folder, dataset, embedder):
    X = []
    y = []
    z = []
    empty_embeddings = []
    images_to_process = []
    image_paths = []
    
    # First pass: collect all images
    for iso_news_photo in tqdm(os.listdir(data_folder), desc='Preparing images'):
        if not iso_news_photo.endswith('.jpg'):
            continue
        
        iso_news_photo_path = os.path.join(data_folder, iso_news_photo)
        image = cv2.imread(iso_news_photo_path)
        
        if image is not None:
            try:
                # Convert image to RGB if needed
                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                images_to_process.append(image_rgb)
                image_paths.append(iso_news_photo_path)
            except Exception as e:
                print(f"Error processing image {iso_news_photo_path}: {e}")
                empty_embeddings.append(iso_news_photo_path)
        else:
            print(f"Warning: Unable to read image {iso_news_photo_path}")
    
    # Second pass: extract embeddings in batch
    try:
        print("Extracting embeddings...")
        embeddings = embedder.embeddings(images_to_process)  # Assuming this is the correct method
        print("Embeddings extracted.")
        
        # Iterate through the results and store them
        for embedding, path in zip(embeddings, image_paths):
            X.append(embedding)
            y.append(dataset)
            z.append(os.path.basename(path))
    
    except Exception as e:
        print(f"Error extracting embeddings: {e}")
    
    return X, y, z, empty_embeddings


In [None]:
X, y, z, empty_embeddings = process_images(data_folder, dataset, embedder)

Preparing images: 100%|███████████████████| 1575/1575 [00:00<00:00, 5696.00it/s]


Extracting embeddings...
