In [1]:
import cv2
import numpy as np
import pandas as pd
import os
import pyarrow as pa
import pyarrow.parquet as pq

from ast import literal_eval
from typing import List, Tuple
from deepface import DeepFace
from tqdm import tqdm
import face_recognition
from sklearn.manifold import TSNE
import sqlite3
import time

In [2]:
img_path = 'datasets/train_test/NL/Caroline_van_der_Plas/0d1e1220_Caroline van der Plas_0.jpg'
dummy_img = cv2.imread(img_path)
if dummy_img is None:
    dummy_img = (255 * np.random.rand(224, 224, 3)).astype(np.uint8)
    cv2.imwrite(img_path, dummy_img)

embedding_objs = DeepFace.represent(img_path, model_name="VGG-Face")
embedding_vector = embedding_objs[0]['embedding']
print(f"The dimensionality of the VGG-Face embedding vector is: {len(embedding_vector)}")

embedding_objs2 = DeepFace.represent(img_path, model_name="SFace")
embedding_vector2 = embedding_objs2[0]['embedding']
print(f"The dimensionality of the SFace embedding vector is: {len(embedding_vector2)}")

The dimensionality of the VGG-Face embedding vector is: 4096
The dimensionality of the SFace embedding vector is: 128


## Compute embeddings

Train/test set

In [3]:
# Set training path
training_path = 'datasets/train_test'
countries = ['NL', 'UK']

In [4]:
# For Deepface model choice
models = [[
    'VGG-Face',
    'Facenet',
    'Facenet512',
    'OpenFace',
    #'DeepFace',
    'ArcFace',
    'Dlib',
    'SFace'],
    'face_recognition'
]

In [5]:
# Embedding models for colnames
embedding_models = [
    'VGG-Face',
    'Facenet',
    'Facenet512',
    'OpenFace',
    #'DeepFace',
    'ArcFace',
    'Dlib',
    'SFace',
    'face_recognition'
]

# DF columns
embeddings_cols = ['embedding_' + embedder for embedder in embedding_models]

In [13]:
print(f'The files found in the train_test directory are listed: {os.listdir(os.path.join(training_path, "NL"))}')

The files found in the train_test directory are listed: ['nl_searchengine_imgref_results.pkl', 'Stephan_van_Baarle', 'Lilian_Marijnissen', 'Geert_Wilders', '.DS_Store', 'Caroline_van_der_Plas', 'Rob_Jetten', 'Pieter_Omtzigt', 'Laurens_Dassen', 'Dilan_Yesilgoz', 'Mirjam_Bikker', 'Joost_Eerdmans', 'Thierry_Baudet', 'Edson_Olf', 'Frans_Timmermans', 'Henri_Bontenbal', 'Esther_Ouwehand', 'Unknown', 'Chris_Stoffer', 'nl_searchengine_imgref_results.csv', 'Wybren_van_Haga']


In [18]:
def compute_face_embeddings(base_path, output_path, country, models):
    '''
    Function that computes the embeddings of the country-specific datasets with different embedder models
    '''
    input_path = os.path.join(base_path, country)
    output_file = os.path.join(output_path, f"{country}_embeddings.parquet")
    
    # Set up dataframe
    columns = ['country', 'politician', 'image_path']
    deepface_models = models[0]
    face_recognition_flag = "face_recognition" in models[1]
    
    # Create different embedding columns
    for model in deepface_models:
        columns.append(f'embedding_{model}')
    if face_recognition_flag:
        columns.append('embedding_face_recognition')
    
    df = pd.DataFrame(columns=columns)
    
    # Go over training classes, i.e. the politicians    
    politicians = [p for p in os.listdir(input_path) 
                 if os.path.isdir(os.path.join(input_path, p)) and p != 'Unknown']
    
    for politician in tqdm(politicians, desc=f"Processing {country}"):
        politician_path = os.path.join(input_path, politician)
        if not os.path.isdir(politician_path):
            continue
        
        print(f"Processing politician: {politician}")
        for img_file in os.listdir(politician_path):
            if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue
            
            img_path = os.path.join(politician_path, img_file)
            
            row_data = {
                'country': country,
                'politician': politician,
                'image_path': img_path
            }
            
            # Compute DeepFace
            for model in deepface_models:
                try:
                    embedding = DeepFace.represent(
                        img_path=img_path, 
                        model_name=model,
                        enforce_detection=False,
                        detector_backend="opencv"
                    )
                    if isinstance(embedding, list) and len(embedding) > 0:
                        row_data[f'embedding_{model}'] = embedding[0]["embedding"]
                    else:
                        row_data[f'embedding_{model}'] = None
                        print(f"No face found with {model} in {img_path}")
                except Exception as e:
                    print(f"Error with {model} on {img_path}: {e}")
                    row_data[f'embedding_{model}'] = None
            
            # Compute face_recognition
            if face_recognition_flag:
                try:
                    image = cv2.imread(img_path)
                    height, width = image.shape[:2]
                    face_encodings = face_recognition.face_encodings(
                        image, 
                        known_face_locations=[(0, width, height, 0)],
                        num_jitters=2, 
                        model="large"
                    )
                    if len(face_encodings) > 0:
                        row_data['embedding_face_recognition'] = face_encodings[0].tolist()
                    else:
                        row_data['embedding_face_recognition'] = None
                        print(f"No face found with face_recognition in {img_path}")
                except Exception as e:
                    print(f"Error with face_recognition on {img_path}: {e}")
                    row_data['embedding_face_recognition'] = None
            
            # Store
            df = pd.concat([df, pd.DataFrame([row_data])], ignore_index=True)
    
    # Write to file
    df.to_parquet(output_file, index=False)
    print(f"Saved embeddings to {output_file}")
    return df

In [19]:
def compute_unknown_embeddings(base_path, output_path, country, models):
    '''
    Function that computes the embeddings specifically for the country-specific unknown dataset with different embedder models
    '''
    input_path = os.path.join(base_path, country, 'Unknown')
    output_file = os.path.join(output_path, f"{country}_unknown_embeddings.parquet")
    
    # Set up dataframe
    columns = ['country', 'politician', 'image_path']
    deepface_models = models[0]
    face_recognition_flag = "face_recognition" in models[1]
    
    for model in deepface_models:
        columns.append(f'embedding_{model}')
    if face_recognition_flag:
        columns.append('embedding_face_recognition')
    
    df = pd.DataFrame(columns=columns)
    
    # Check if Unknown folder exists
    if not os.path.exists(input_path):
        print(f"No 'Unknown' folder found for {country}")
        return df
    
    print(f"Processing Unknown images for {country}")
    for img_file in tqdm(os.listdir(input_path), desc=f"Processing unknown images"):
        if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue
        
        img_path = os.path.join(input_path, img_file)
        
        row_data = {
            'country': country,
            'politician': 'Unknown',
            'image_path': img_path
        }
        
        # Compute DeepFace embeddings
        for model in deepface_models:
            try:
                embedding = DeepFace.represent(
                    img_path=img_path, 
                    model_name=model,
                    enforce_detection=False,
                    detector_backend="opencv"
                )
                if isinstance(embedding, list) and len(embedding) > 0:
                    row_data[f'embedding_{model}'] = embedding[0]["embedding"]
                else:
                    row_data[f'embedding_{model}'] = None
                    print(f"No face found with {model} in {img_path}")
            except Exception as e:
                print(f"Error with {model} on {img_path}: {e}")
                row_data[f'embedding_{model}'] = None
        
        # Compute face_recognition
        if face_recognition_flag:
            try:
                image = cv2.imread(img_path)
                height, width = image.shape[:2]
                face_encodings = face_recognition.face_encodings(
                    image, 
                    known_face_locations=[(0, width, height, 0)],
                    num_jitters=2, 
                    model="large"
                )
                if len(face_encodings) > 0:
                    row_data['embedding_face_recognition'] = face_encodings[0].tolist()
                else:
                    row_data['embedding_face_recognition'] = None
                    print(f"No face found with face_recognition in {img_path}")
            except Exception as e:
                print(f"Error with face_recognition on {img_path}: {e}")
                row_data['embedding_face_recognition'] = None
        
        # Store
        df = pd.concat([df, pd.DataFrame([row_data])], ignore_index=True)
    
    # Write to file
    df.to_parquet(output_file, index=False)
    print(f"Saved unknown embeddings to {output_file}")
    return df

### NL

In [24]:
compute_face_embeddings(
    base_path=training_path,
    output_path=training_path,
    country="NL",
    models=models
)

Processing NL:   0%|          | 0/17 [00:00<?, ?it/s]

Processing politician: Stephan_van_Baarle


Processing NL:   6%|▌         | 1/17 [03:10<50:51, 190.70s/it]

Processing politician: Lilian_Marijnissen


Processing NL:  12%|█▏        | 2/17 [06:47<51:33, 206.25s/it]

Processing politician: Geert_Wilders
Error with VGG-Face on datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg: Input image must not have non-english characters - datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg
Error with Facenet on datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg: Input image must not have non-english characters - datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg
Error with Facenet512 on datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg: Input image must not have non-english characters - datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg
Error with OpenFace on datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg: Input image must not have non-english characters - datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg
Error with ArcFace on datasets/train_test/NL/Geert_Wilders/ea4bd34f_Dilan Yesilgöz_4.jpg: Input image must not have non-englis

Processing NL:  18%|█▊        | 3/17 [10:39<50:53, 218.08s/it]

Processing politician: Caroline_van_der_Plas


Processing NL:  24%|██▎       | 4/17 [14:08<46:26, 214.35s/it]

Processing politician: Rob_Jetten


Processing NL:  29%|██▉       | 5/17 [17:33<42:10, 210.90s/it]

Processing politician: Pieter_Omtzigt
Error with VGG-Face on datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg
Error with Facenet on datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg
Error with Facenet512 on datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg
Error with OpenFace on datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg
Error with ArcFace on datasets/train_test/NL/Pieter_Omtzigt/7ba99f06_Dilan Yesilgöz_1.jpg: Input image must not have 

Processing NL:  35%|███▌      | 6/17 [20:51<37:50, 206.44s/it]

Processing politician: Laurens_Dassen


Processing NL:  41%|████      | 7/17 [23:48<32:47, 196.76s/it]

Processing politician: Dilan_Yesilgoz


Processing NL:  47%|████▋     | 8/17 [27:21<30:18, 202.00s/it]

Processing politician: Mirjam_Bikker
Error with VGG-Face on datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg
Error with Facenet on datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg
Error with Facenet512 on datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg
Error with OpenFace on datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg: Input image must not have non-english characters - datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg
Error with ArcFace on datasets/train_test/NL/Mirjam_Bikker/3f0432ab_Dilan Yesilgöz_1.jpg: Input image must not have non-englis

Processing NL:  53%|█████▎    | 9/17 [30:27<26:15, 196.93s/it]

Processing politician: Joost_Eerdmans


Processing NL:  59%|█████▉    | 10/17 [33:27<22:23, 191.89s/it]

Processing politician: Thierry_Baudet


Processing NL:  65%|██████▍   | 11/17 [36:03<18:05, 180.97s/it]

Processing politician: Edson_Olf


Processing NL:  71%|███████   | 12/17 [37:32<12:44, 152.88s/it]

Processing politician: Frans_Timmermans
Error with VGG-Face on datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg: Input image must not have non-english characters - datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg
Error with Facenet on datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg: Input image must not have non-english characters - datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg
Error with Facenet512 on datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg: Input image must not have non-english characters - datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg
Error with OpenFace on datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg: Input image must not have non-english characters - datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg
Error with ArcFace on datasets/train_test/NL/Frans_Timmermans/ea4bd34f_Dilan Yesilgöz_2.jpg: Input 

Processing NL:  76%|███████▋  | 13/17 [41:01<11:19, 169.75s/it]

Processing politician: Henri_Bontenbal


Processing NL:  82%|████████▏ | 14/17 [44:13<08:50, 176.71s/it]

Processing politician: Esther_Ouwehand


Processing NL:  88%|████████▊ | 15/17 [47:50<06:17, 188.80s/it]

Processing politician: Chris_Stoffer


Processing NL:  94%|█████████▍| 16/17 [51:17<03:14, 194.28s/it]

Processing politician: Wybren_van_Haga


Processing NL: 100%|██████████| 17/17 [54:46<00:00, 193.30s/it]


Saved embeddings to datasets/train_test/NL_embeddings.parquet


Unnamed: 0,country,politician,image_path,embedding_VGG-Face,embedding_Facenet,embedding_Facenet512,embedding_OpenFace,embedding_ArcFace,embedding_Dlib,embedding_SFace,embedding_face_recognition
0,NL,Stephan_van_Baarle,datasets/train_test/NL/Stephan_van_Baarle/7518...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0343820583382...","[0.2907654643058777, 0.5678703784942627, -0.59...","[-0.8811557292938232, 1.1285032033920288, -0.4...","[0.015059384517371655, 0.006262484472244978, -...","[-0.14670303463935852, 0.11384416371583939, -0...","[-0.053955599665641785, 0.09319762885570526, 0...","[-0.7423117756843567, 0.5515708327293396, 1.04...","[-0.08167310059070587, 0.02302662841975689, 0...."
1,NL,Stephan_van_Baarle,datasets/train_test/NL/Stephan_van_Baarle/b5c4...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0109293536791...","[0.01734083890914917, 1.0677095651626587, 0.36...","[-1.9518992900848389, 1.1397768259048462, 0.94...","[-0.02600262314081192, 0.004551914054900408, -...","[-0.18038032948970795, 0.279319167137146, -0.2...","[-0.05004735663533211, 0.0352143831551075, 0.0...","[-0.9400857090950012, -0.5621656775474548, 0.3...","[-0.07158689945936203, 0.050367504358291626, 0..."
2,NL,Stephan_van_Baarle,datasets/train_test/NL/Stephan_van_Baarle/6711...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0565274535416...","[1.2932127714157104, -0.6363705396652222, 1.16...","[-0.5140327215194702, 0.7249234914779663, 0.14...","[0.02915845438838005, 0.06930511444807053, 0.0...","[-0.0846487358212471, 0.38507404923439026, -0....","[-0.0722360759973526, -0.0019726187456399202, ...","[-0.8447213768959045, -0.5323300957679749, 1.1...","[-0.06998300552368164, 0.07013103365898132, 0...."
3,NL,Stephan_van_Baarle,datasets/train_test/NL/Stephan_van_Baarle/034f...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07389121...","[-0.5141052007675171, -0.14589416980743408, -0...","[-0.4285900294780731, 0.46602827310562134, -0....","[0.10006731003522873, -0.0619647316634655, 0.0...","[-0.1664310246706009, 0.3704061508178711, 0.01...","[0.0012769631575793028, 0.06571324914693832, 0...","[1.1664515733718872, 0.22525428235530853, -0.1...","[-0.07564379274845123, 0.11386445164680481, 0...."
4,NL,Stephan_van_Baarle,datasets/train_test/NL/Stephan_van_Baarle/307a...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.310562402009964, 0.8564702272415161, -0.965...","[-0.9611704349517822, 0.8630242943763733, 0.04...","[-0.0113795455545187, 0.0975126326084137, -0.0...","[-0.10379300266504288, 0.14741967618465424, 0....","[-0.03182348236441612, 0.06024117395281792, 0....","[-0.1440213918685913, -0.03564627841114998, 1....","[-0.036771029233932495, 0.04112560674548149, 0..."
...,...,...,...,...,...,...,...,...,...,...,...
5519,NL,Wybren_van_Haga,datasets/train_test/NL/Wybren_van_Haga/b35f650...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6773715019226074, 0.0456889271736145, -1.33...","[1.9740127325057983, -1.551148533821106, 0.282...","[0.019589707255363464, -0.03733227401971817, -...","[-0.021395064890384674, -0.29362645745277405, ...","[-0.16468967497348785, 0.14761099219322205, 0....","[-0.3655371367931366, 0.4421786367893219, 0.38...","[-0.0600978322327137, 0.12427319586277008, 0.0..."
5520,NL,Wybren_van_Haga,datasets/train_test/NL/Wybren_van_Haga/fd3f5c3...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02067234...","[-0.29086899757385254, -0.3036533296108246, -0...","[0.0494399219751358, -0.7900205850601196, -0.9...","[-0.09559787064790726, 0.0699348896741867, 0.1...","[-0.2241268754005432, 0.6755390763282776, -0.0...","[-0.05182550102472305, 0.15010982751846313, 0....","[0.8890799880027771, -0.28052738308906555, -0....","[-0.07927539944648743, 0.1523919254541397, 0.0..."
5521,NL,Wybren_van_Haga,datasets/train_test/NL/Wybren_van_Haga/2cb44dd...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.5010250806808472, -1.0816199779510498, -0....","[1.337747573852539, 0.3847041428089142, 0.8339...","[-0.031416136771440506, 0.16814887523651123, 0...","[0.049261778593063354, -0.023974932730197906, ...","[-0.06443601101636887, 0.12508203089237213, 0....","[-1.2978324890136719, -0.01501662191003561, 0....","[-0.01838686875998974, 0.14356324076652527, -0..."
5522,NL,Wybren_van_Haga,datasets/train_test/NL/Wybren_van_Haga/62f72e7...,"[0.023594513673079572, 0.0, 0.0, 0.00440700893...","[0.9359687566757202, 0.8299816846847534, 0.041...","[1.9004838466644287, -0.8453763723373413, 0.84...","[0.033938217908144, 0.09640984237194061, -0.04...","[-0.10520578920841217, -0.12486226856708527, -...","[-0.10190367698669434, 0.11365978419780731, 0....","[0.16224084794521332, 0.09632733464241028, 1.7...","[-0.1239003986120224, 0.126318097114563, 0.064..."


In [25]:
nl_unknowns = compute_unknown_embeddings(
    base_path=training_path,
    output_path=training_path,
    country="NL",
    models=models
)

Processing Unknown images for NL


Processing unknown images: 100%|██████████| 5569/5569 [51:23<00:00,  1.81it/s]  


Saved unknown embeddings to datasets/train_test/NL_unknown_embeddings.parquet


### UK

In [22]:
compute_face_embeddings(
    base_path=training_path,
    output_path=training_path,
    country="UK",
    models=models
)

Processing UK:   0%|          | 0/17 [00:00<?, ?it/s]

Processing politician: Adrian_Ramsay


Processing UK:   6%|▌         | 1/17 [03:32<56:43, 212.71s/it]

Processing politician: Ed_Davey


Processing UK:  12%|█▏        | 2/17 [06:48<50:39, 202.65s/it]

Processing politician: Rhun_ap_Iorwerth


Processing UK:  18%|█▊        | 3/17 [09:37<43:43, 187.38s/it]

Processing politician: Patrick_Harvie


Processing UK:  24%|██▎       | 4/17 [12:32<39:33, 182.61s/it]

Processing politician: Mary_Lou_McDonald


Processing UK:  29%|██▉       | 5/17 [15:48<37:28, 187.40s/it]

Processing politician: Carla_Denyer


Processing UK:  35%|███▌      | 6/17 [18:56<34:22, 187.53s/it]

Processing politician: Doug_Beattie


Processing UK:  41%|████      | 7/17 [21:39<29:55, 179.56s/it]

Processing politician: Rishi_Sunak


Processing UK:  47%|████▋     | 8/17 [23:54<24:46, 165.19s/it]

Processing politician: George_Galloway


Processing UK:  53%|█████▎    | 9/17 [27:14<23:30, 176.35s/it]

Processing politician: Jim_Allister


Processing UK:  59%|█████▉    | 10/17 [30:22<20:59, 179.91s/it]

Processing politician: John_Swinney


Processing UK:  65%|██████▍   | 11/17 [34:05<19:17, 192.98s/it]

Processing politician: Lorna_Slater


Processing UK:  71%|███████   | 12/17 [37:14<15:58, 191.65s/it]

Processing politician: Keir_Starmer


Processing UK:  76%|███████▋  | 13/17 [40:59<13:27, 201.98s/it]

Processing politician: Nigel_Farage


Processing UK:  82%|████████▏ | 14/17 [43:59<09:45, 195.25s/it]

Processing politician: Gavin_Robinson


Processing UK:  88%|████████▊ | 15/17 [46:52<06:17, 188.56s/it]

Processing politician: Naomi_Long


Processing UK:  94%|█████████▍| 16/17 [50:05<03:09, 189.88s/it]

Processing politician: Colum_Eastwood


Processing UK: 100%|██████████| 17/17 [53:16<00:00, 188.03s/it]


Saved embeddings to datasets/train_test/UK_embeddings.parquet


Unnamed: 0,country,politician,image_path,embedding_VGG-Face,embedding_Facenet,embedding_Facenet512,embedding_OpenFace,embedding_ArcFace,embedding_Dlib,embedding_SFace,embedding_face_recognition
0,UK,Adrian_Ramsay,datasets/train_test/UK/Adrian_Ramsay/62bf5831_...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.35135361552238464, -0.42078346014022827, 0...","[-0.17114633321762085, -0.31198209524154663, -...","[0.06984470784664154, 0.20160618424415588, -0....","[-0.3318551480770111, 0.8573698997497559, -0.2...","[-0.06081914156675339, 0.08557083457708359, 0....","[-0.058141469955444336, 0.06134376674890518, -...","[-0.08713938295841217, 0.029737146571278572, 0..."
1,UK,Adrian_Ramsay,datasets/train_test/UK/Adrian_Ramsay/c99c3e00_...,"[0.0, 0.04321540475950876, 0.0, 0.0, 0.0, 0.00...","[0.14784598350524902, 2.072838306427002, -2.40...","[-0.08605198562145233, -0.9127213954925537, -0...","[0.041459184139966965, 0.03514566645026207, -0...","[0.13335968554019928, 0.1689140945672989, 0.14...","[-0.10037916153669357, 0.13561910390853882, 0....","[-0.12224884331226349, 0.0031710208859294653, ...","[-0.05792800337076187, 0.08529047667980194, 0...."
2,UK,Adrian_Ramsay,datasets/train_test/UK/Adrian_Ramsay/c0b212ec_...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.10455392301082611, -0.30937033891677856, -...","[0.4578588306903839, -0.6233252286911011, -1.7...","[0.03886430710554123, 0.01809820346534252, 0.1...","[-0.17943434417247772, 0.4395715594291687, 0.0...","[-0.0726579949259758, 0.07826431840658188, 0.1...","[0.9908984303474426, 0.504574716091156, -0.128...","[-0.07454099506139755, 0.0897434651851654, 0.0..."
3,UK,Adrian_Ramsay,datasets/train_test/UK/Adrian_Ramsay/fcc56925_...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05657017...","[0.9581767320632935, 2.293910264968872, -2.134...","[-0.4981761872768402, -1.6441878080368042, 0.2...","[-0.09067273885011673, 0.07162158191204071, -0...","[-0.00269246194511652, 0.210192009806633, 0.26...","[-0.10730963945388794, 0.08606453239917755, 0....","[0.17538858950138092, 0.1498776525259018, 0.26...","[-0.12896034121513367, 0.09216073155403137, 0...."
4,UK,Adrian_Ramsay,datasets/train_test/UK/Adrian_Ramsay/b5adab40_...,"[0.0, 0.01716598686508794, 0.0, 0.0, 0.0, 0.0,...","[-0.04719257354736328, 0.8895539045333862, -0....","[-0.8663128614425659, -0.8286941051483154, 0.3...","[0.03145873174071312, 0.013665544800460339, -0...","[0.20371617376804352, 0.3974902927875519, 0.31...","[-0.06672751903533936, 0.07417088747024536, 0....","[0.14213977754116058, -0.1026618555188179, -0....","[-0.06910675019025803, 0.08851906657218933, 0...."
...,...,...,...,...,...,...,...,...,...,...,...
5631,UK,Colum_Eastwood,datasets/train_test/UK/Colum_Eastwood/ca84b4ea...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0745370177117...","[0.44292089343070984, -0.6873096227645874, -1....","[1.0080773830413818, -0.3393586277961731, 0.53...","[-0.03013659454882145, 0.015217386186122894, 0...","[0.05982501059770584, 0.096262127161026, 0.125...","[-0.11214412748813629, 0.05732250213623047, 0....","[-1.5556674003601074, 0.14174199104309082, 1.7...","[-0.0771929994225502, 0.01920188032090664, 0.0..."
5632,UK,Colum_Eastwood,datasets/train_test/UK/Colum_Eastwood/34ed6676...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.5544884204864502, -0.8569743633270264, -0....","[2.184706211090088, -0.0652722418308258, 1.080...","[0.03511003777384758, 0.03452041745185852, 0.0...","[-0.008068430237472057, -0.036101341247558594,...","[-0.006040752865374088, 0.07040788978338242, 0...","[-1.4310362339019775, 0.2638978660106659, 1.30...","[-0.047399863600730896, 0.035299383103847504, ..."
5633,UK,Colum_Eastwood,datasets/train_test/UK/Colum_Eastwood/91f94194...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0391710337269...","[-0.07152532041072845, -0.20605503022670746, -...","[0.9783399105072021, 2.2408640384674072, 0.569...","[0.00859181396663189, -0.05297283083200455, -0...","[0.05596322566270828, -0.03871669992804527, -0...","[-0.06889524310827255, 0.009786844253540039, 0...","[-0.8160473704338074, -0.24205103516578674, 2....","[-0.07766005396842957, 0.11862117052078247, 0...."
5634,UK,Colum_Eastwood,datasets/train_test/UK/Colum_Eastwood/e06748c3...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.6166467666625977, -0.7695242166519165, -0....","[0.4142583906650543, 1.9967061281204224, -0.26...","[0.06115628033876419, 0.015252931974828243, 0....","[-0.0935925766825676, 0.05689245089888573, 0.0...","[-0.04928343743085861, 0.07296428084373474, -0...","[0.3500470221042633, 0.3553246855735779, 0.856...","[-0.11556211113929749, 0.05934468284249306, 0...."


In [23]:
uk_unknowns = compute_unknown_embeddings(
    base_path=training_path,
    output_path=training_path,
    country="UK",
    models=models
)

Processing Unknown images for UK


Processing unknown images: 100%|██████████| 4476/4476 [37:06<00:00,  2.01it/s]


Saved unknown embeddings to datasets/train_test/UK_unknown_embeddings.parquet
