In [None]:
from google.colab import drive
drive.mount('/content/drive')

general_path = "/content/drive/My Drive/Colab Notebooks/"
%cd /content/drive/My\ Drive/Colab Notebooks/

In [1]:
!pip install face_recognition

## Evaluation Functions

### Dataframes

In [None]:
import os 
import pandas as pd

# Function that creats the list of column names for the dataframes
def set_col_names(name='Dist', k=1):
  col_names = []
  
  for i in range(k):
    col_name = name + str(i+1)
    col_names.append(col_name)
  
  return col_names

# Function that displays the dataframe of predicted distances / labels for the test set samples
def display_knn(im_names, data_list, df_type='Dist'):
  k = len(data_list[0])
  
  if df_type == 'Dist':
    col_names = set_col_names(k=k)
  elif df_type == 'Label':
    col_names = set_col_names(name='Label', k=k)
  
  df = pd.DataFrame(data_list, columns=col_names)
  df['Test Face'] = im_names
  df.set_index('Test Face', inplace=True)

  return df

# function that displays the dataframe of true labels for the test set samples
def display_true_labels(im_names, im_paths):
  parent_path = [os.path.dirname(path) for path in im_paths]
  labels = [os.path.basename(os.path.normpath(path)) for path in parent_path]

  df = pd.DataFrame(labels, columns=['Label'])
  df['Test Face'] = im_names
  df.set_index('Test Face', inplace=True)

  return df

### Metrics

In [None]:
# Function that calculates the ranked accuracy (correct prediction of at least one neighbor)
def ranked_k_accuracy(true_label_df, pred_label_df):
  # Create an empty list
  true_labels = []
  pred_labels = []

  true_labels = true_label_df.values.tolist()
  pred_labels = pred_label_df.values.tolist()
 
  match = 0
  for i in range(len(true_labels)):
    if true_labels[i][0] in pred_labels[i]:
      match+=1
    
  ranked_acc = match / len(true_labels)
  
  return ranked_acc

# Function that calculates the exact accuracy (correct prediction of the nearest neighbor)
def exact_accuracy(true_label_df, pred_label_df):
  true_labels = []
  pred_labels = []

  true_labels = true_label_df.values.tolist()
  pred_labels = pred_label_df.values.tolist()
  
  match = 0
  for i in range(len(true_labels)):
    if true_labels[i][0] == pred_labels[i][0]:
      match+=1
    
  acc = match / len(true_labels)
  
  return acc

## Face Identification

In [None]:
# Function that executes the whole face identification process and returns the accuracy metrics
def face_identification(test_emb_list, test_im_paths, clf_name, model_path, k=1, exact_flag=True):
  
  all_neigh_dist, all_neigh_label = predict(test_emb_list, test_im_paths, clf_name, model_path, k)

  # extract filenames from paths
  test_im_names = [os.path.basename(os.path.normpath(path)) for path in test_im_paths]

  if clf_name == 'KNN':
    # distances dataframe
    dist_df = display_knn(test_im_names, all_neigh_dist)
  else:
    dist_df = _

  # predicted labels dataframe
  pred_label_df = display_knn(test_im_names, all_neigh_label, df_type='Label') 

  # true labels dataframe
  true_label_df = display_true_labels(test_im_names, test_im_paths) 

  # ranked k accuracy
  ranked_acc = ranked_k_accuracy(true_label_df, pred_label_df)
  
  acc = -1
  if exact_flag:
    # exact match accuracy
    acc = exact_accuracy(true_label_df, pred_label_df)

  return ranked_acc, acc, dist_df, pred_label_df

## Load  faces

In [None]:
import os
import numpy as np
from PIL import Image

# Function that lists image arrays and paths
def get_faces(data_dir, sample=False):
  faces = []
  im_paths = []

  for subdir, dirs, files in os.walk(data_dir):
    for file in files:
        data_path = os.path.join(subdir, file)
        im = Image.open(data_path)
        im_arr = np.asarray(im).astype('float64')
        faces.append(im_arr)
        im_paths.append(data_path)
        
  return faces, im_paths

## Classification

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import pickle
from PIL import Image, ImageDraw
import face_recognition

# Function that creates a database containing clear face encodings
def train(faces, paths, clf_name='KNN', model_path="./trained_knn_model.clf"):  
  X = []
  y = []

  # extract the encoding of every face of the database
  for i, face in enumerate(faces):
    face = face.astype(np.uint8)
    
    X.append(face_recognition.face_encodings(face, known_face_locations=[(0, 100, 102, 0)])[0]) # add encoding to a list

    id = os.path.basename(os.path.normpath(os.path.dirname(paths[i])))
    y.append(id) # add identity ground-truth label to a list
    
  if clf_name == 'KNN':  
    clf = KNeighborsClassifier(n_neighbors=1, algorithm='ball_tree', weights='distance')
  elif clf_name == 'SVM':
    clf = SVC(kernel='linear', probability=True)
  
  clf.fit(X, y) # train classifier

  # store training results
  with open(model_path, 'wb') as f:
    pickle.dump([clf, paths], f)

  return clf

# Function that returns the top predicted identity labels for a number of given occluded faces
def predict(faces, paths, clf_name='KNN', model_path="./trained_knn_model.clf", k=1):
  # load training results
  with open(model_path, 'rb') as f:
    [clf, train_paths] = pickle.load(f)
  
  all_neigh_dist = []
  all_neigh_label = []

  # extract the encoding of every occluded face 
  for i, face in enumerate(faces):
    face = face.astype(np.uint8)
    face_enc = face_recognition.face_encodings(face, known_face_locations=[(0, 100, 102, 0)])

    if clf_name == 'KNN':
      neigh_dist, neigh_label_ind = clf.kneighbors(face_enc, n_neighbors=k)
      neigh_dist, neigh_label_ind = neigh_dist.tolist(), neigh_label_ind.tolist()
      neigh_label = [os.path.basename(os.path.normpath(os.path.dirname(train_paths[ind]))) for ind in neigh_label_ind[0]]

      all_neigh_dist.append(neigh_dist[0])
      all_neigh_label.append(neigh_label)

    elif clf_name == 'SVM':
      neigh_label = clf.predict(face_enc)
      neigh_label = neigh_label.tolist()

      all_neigh_label.append(neigh_label)
      
  return all_neigh_dist, all_neigh_label      

### Train Set

In [None]:
train_dir = os.path.join(general_path, "datasets/small/myceleba_split/train")

train_faces, train_im_paths = get_faces(train_dir)

clf_name = 'KNN'
model_path = "./trained_" + clf_name + "_model.clf"
print(model_path)

print("Training" + clf_name + "classifier...")
knn_clf = train(train_faces, train_im_paths, clf_name=clf_name, model_path=model_path)
print("Training complete!")

## Execution - Classic RPCA

In [None]:
test_classic_lam_dir = os.path.join(general_path, "results/myceleba_classic/")

dirs_L = [f for f in os.listdir(test_classic_lam_dir) if f.find("_L") != -1]

clf_name = 'KNN'
model_path = "./trained_" + clf_name + "_model.clf"

for dir in dirs_L:
  dir_L_path = os.path.join(test_classic_lam_dir, dir)
  test_classic_lam_faces, test_classic_lam_paths = get_faces(dir_L_path)
  
  lam_list = dir.split('_')
  lam = lam_list[-2]

  k=3
  ranked_acc, acc, dist, pred = face_identification(test_classic_lam_faces, test_classic_lam_paths, clf_name, model_path, k=k, exact_flag=False)
  
  print('Ranked accuracy (Classic RPCA) = {} (lamda = {}, k = {})'.format(ranked_acc, lam, k))
   
  k=5
  ranked_acc, acc, dist, pred = face_identification(test_classic_lam_faces, test_classic_lam_paths, clf_name, model_path, k=k)

  print('Ranked accuracy (Classic RPCA) = {} (lamda = {}, k = {})'.format(ranked_acc, lam, k))
  print('Exact accuracy (Classic RPCA) = {} (lamda = {})'.format(acc, lam))
  print()

## Execution - PCPF

In [None]:
test_U_lam_dir = os.path.join(general_path, "results/myceleba_pcpf/")

dirs_L = [f for f in os.listdir(test_U_lam_dir) if f.find("_L") != -1]

clf_name = 'KNN'
model_path = "./trained_" + clf_name + "_model.clf"

for dir in dirs_L:
  dir_L_path = os.path.join(test_U_lam_dir, dir)
  test_U_lam_faces, test_U_lam_paths = get_faces(dir_L_path)
  
  lam_list = dir.split('_')
  lam = lam_list[-2]

  k=3
  ranked_acc, acc, dist, pred = face_identification(test_U_lam_faces, test_U_lam_paths, clf_name, model_path, k=k, exact_flag=False)
  
  print('Ranked accuracy (RPCA with side information - U feature) = {} (lamda = {}, k = {})'.format(ranked_acc, lam, k))
  
  k=5 
  ranked_acc, acc, dist, pred = face_identification(test_U_lam_faces, test_U_lam_paths, clf_name, model_path, k=k)

  print('Ranked accuracy (RPCA with side information - U feature) = {} (lamda = {}, k = {})'.format(ranked_acc, lam, k))
  print('Exact accuracy (RPCA with side information - U feature) = {} (lamda = {})'.format(acc, lam))
  print()

## Execution - PCPFM

In [None]:
test_W_U_lam_dir = os.path.join(general_path, "results/myceleba_pcpfm/")

dirs_L = [f for f in os.listdir(test_W_U_lam_dir) if f.find("_L") != -1]

clf_name = 'KNN'
model_path = "./trained_" + clf_name + "_model.clf"

for dir in dirs_L:
  dir_L_path = os.path.join(test_W_U_lam_dir, dir)
  test_W_U_lam_faces, test_W_U_lam_paths = get_faces(dir_L_path)
  
  lam_list = dir.split('_')
  lam = lam_list[-2]

  k=3
  ranked_acc, acc, dist, pred = face_identification(test_W_U_lam_faces, test_W_U_lam_paths, clf_name, model_path, k=k, exact_flag=False)
  
  print('Ranked accuracy (RPCA with missing values and side information - U feature) = {} (lamda = {}, k = {})'.format(ranked_acc, lam, k))
  
  k=5
  ranked_acc, acc, dist, pred = face_identification(test_W_U_lam_faces, test_W_U_lam_paths, clf_name, model_path, k=k)

  print('Ranked accuracy (RPCA with missing values and side information - U feature) = {} (lamda = {}, k = {})'.format(ranked_acc, lam, k))
  print('Exact accuracy (RPCA with missing values and side information - U feature) = {} (lamda = {})'.format(acc, lam))
  print()

## Execution - Lafin

In [None]:
test_lafin_dir = os.path.join(general_path, "results/myceleba_lafin/inp")

clf_name = 'KNN'
model_path = "./trained_" + clf_name + "_model.clf"

test_lafin_faces, test_lafin_paths = get_faces(test_lafin_dir)

k=3
ranked_acc, acc, dist, pred = face_identification(test_lafin_faces, test_lafin_paths, clf_name, model_path, k=k, exact_flag=False)

print('Ranked accuracy (Lafin) = {} (k = {})'.format(ranked_acc, k))

k=5
ranked_acc, acc, dist, pred = face_identification(test_lafin_faces, test_lafin_paths, clf_name, model_path, k=k)

print('Ranked accuracy (Lafin) = {} (k = {})'.format(ranked_acc, k))
print('Exact  accuracy (Lafin) = {}'.format(acc))