# Compare lists of feature pairs of the Balance Faces in the Wild (BFW) dataset.

Load table in `data/bfw-datatable.pkl` to extract all features and store in the datatable. Overwrites the table to `data/bfw-datatable.pkl`.

## Add project code to PYTHONPATH, if not already there
Check that _path_package_ is set to _code_ directory on respective system

In [16]:
%matplotlib inline
import numpy as np
import swifter
import pathlib
from sklearn.metrics.pairwise import cosine_similarity
# Load out custom tool for loading and processing the data
from facebias.iotools import load_bfw_datatable, save_bfw_datatable, load_features_from_image_list

scorefun = np.dot # function to compare (or score) pairs of features with

In [21]:
dir_data = '../../data/'
dir_features = f'{dir_data}features/sphereface/'
f_datatable = f'{dir_data}meta/bfw-v0.1.5-datatable.csv'
f_datatable_out = f'{dir_data}meta/bfw-v0.1.5-datatable-scores.pkl'
overwrite_pickle = False

In [6]:
import pandas as pd

## Load the data

Read in the data as a pandas.DataFrame and show the first few rows.

In [7]:
data = pd.read_csv(f'{dir_data}meta/bfw-v0.1.5-datatable.csv')
data.head()

Unnamed: 0,fold,p1,p2,label,id1,id2,att1,att2,vgg16,resnet50,senet50,a1,a2,g1,g2,e1,e2
0,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0043_01.jpg,1,0,0,asian_females,asian_females,0.820039,0.703258,0.679089,AF,AF,F,F,A,A
1,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0120_01.jpg,1,0,0,asian_females,asian_females,0.719199,0.523613,0.594268,AF,AF,F,F,A,A
2,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0122_02.jpg,1,0,0,asian_females,asian_females,0.732029,0.527567,0.64368,AF,AF,F,F,A,A
3,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0188_01.jpg,1,0,0,asian_females,asian_females,0.607093,0.348211,0.458883,AF,AF,F,F,A,A
4,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0205_01.jpg,1,0,0,asian_females,asian_females,0.629153,0.384273,0.494913,AF,AF,F,F,A,A


In [11]:
data = load_bfw_datatable(f_datatable.replace(".csv", ".pkl"))
data.head()

Unnamed: 0,fold,p1,p2,label,id1,id2,att1,att2,vgg16,resnet50,senet50,a1,a2,g1,g2,e1,e2,score,ids1,ids2
0,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0043_01.jpg,1,0,0,asian_females,asian_females,0.820039,0.703258,0.679089,AF,AF,F,F,A,A,,0,0
1,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0120_01.jpg,1,0,0,asian_females,asian_females,0.719199,0.523613,0.594268,AF,AF,F,F,A,A,,0,0
2,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0122_02.jpg,1,0,0,asian_females,asian_females,0.732029,0.527567,0.64368,AF,AF,F,F,A,A,,0,0
3,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0188_01.jpg,1,0,0,asian_females,asian_females,0.607093,0.348211,0.458883,AF,AF,F,F,A,A,,0,0
4,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0205_01.jpg,1,0,0,asian_females,asian_females,0.629153,0.384273,0.494913,AF,AF,F,F,A,A,,0,0


In [12]:
len(data)

923898

## Load features and generate scores
First check if scores were calculated for each pairs; else, load and calculate

In [13]:
# create ali_images list of all faces (i.e., unique set)
li_images = list(np.unique(data.p1.to_list() + data.p2.to_list()))

# read features as a dictionary, with keys set as the filepath of the image with values set as the face encodings
features = load_features_from_image_list(li_images, dir_features, ext_feat='npy')

In [14]:
# score all feature pairs, because L2 norm applied on features dot is same as cosine sim
data['sphereface'] = data.swifter.apply(lambda x: scorefun(features[x.p1], features[x.p2].T), axis=1)

Pandas Apply:   0%|          | 0/923898 [00:00<?, ?it/s]

In [15]:
data.head()

Unnamed: 0,fold,p1,p2,label,id1,id2,att1,att2,vgg16,resnet50,...,a1,a2,g1,g2,e1,e2,score,ids1,ids2,sphereface
0,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0043_01.jpg,1,0,0,asian_females,asian_females,0.820039,0.703258,...,AF,AF,F,F,A,A,,0,0,0.392526
1,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0120_01.jpg,1,0,0,asian_females,asian_females,0.719199,0.523613,...,AF,AF,F,F,A,A,,0,0,0.354262
2,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0122_02.jpg,1,0,0,asian_females,asian_females,0.732029,0.527567,...,AF,AF,F,F,A,A,,0,0,0.302028
3,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0188_01.jpg,1,0,0,asian_females,asian_females,0.607093,0.348211,...,AF,AF,F,F,A,A,,0,0,-0.009217
4,1,asian_females/n000009/0010_01.jpg,asian_females/n000009/0205_01.jpg,1,0,0,asian_females,asian_females,0.629153,0.384273,...,AF,AF,F,F,A,A,,0,0,0.132534


In [22]:
if not pathlib.Path(f_datatable_out).exists() or overwrite_pickle:
    save_bfw_datatable(data, fpath=f_datatable_out)
else:
    print('Scores were in datatable. Will not overwrite by default')