In [1]:
import face_recognition
from email.mime import base
from utils import IMAGE_PATH
import numpy as np
import random as rd
import os
import time
import pickle
import utils
import json
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [2]:
N_SAMPLES = 10
PCA_N_COMPONENTS=25
N_NEIGHBORS=10

In [3]:
def get_image(path):
    picture = face_recognition.load_image_file(path)
    return face_recognition.face_encodings(picture)[0]


def generate_C1(set):
    return get_image(set[0]) - get_image(set[1])

def generate_C2(set):
    return get_image(set[0]) - get_image(set[2])

In [42]:
def new_train_svm(kernel='linear', C=0.1):
    with open("train_set.json") as f:
        individuals = json.load(f)

    C1 = []
    C2 = []

    count = 0
    for dir, set in individuals.items():
        C1.append(generate_C1(set))
        C2.append(generate_C2(set))
        count += 1
        # if count == 50: break
    print("C1 shape: {}".format(np.array(C1).shape))
    print("C2 shape: {}".format(np.array(C2).shape))

    clf = svm.SVC(kernel=kernel, C=C)
    
    print(f"Fitting {kernel} kernel...")
    start = time.time()
    clf.fit([*C1, *C2], [*[1 for individual in C1], *[0 for individual in C2]])
    end = time.time()
    print("Fitted in {} seconds".format(end - start))
    
    return clf

In [49]:
def new_train_knn_pca(n_components=PCA_N_COMPONENTS,k=N_NEIGHBORS):
    with open("train_set.json") as f:
        individuals = json.load(f)

    C1 = []
    C2 = []

    for dir, set in individuals.items():
        print("Generating C1 for {}...".format(dir))
        C1.append(generate_C1(set))
        print("Generating C2 for {}...".format(dir))
        C2.append(generate_C2(set))
    
    X = [*C1, *C2]

    scaler = StandardScaler()
    scaler.fit(X)
    X = scaler.transform(X)
    pca = PCA(n_components=n_components)
    pca.fit(X)
    X = pca.transform(X)
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X, [*[1 for individual in C1], *[0 for individual in C2]])
    return knn

In [52]:
def get_test_sets():
    with open("test_set.json") as f:
        individuals = json.load(f)

    C1 = []
    C2 = []

    for dir, set in individuals.items():
        print("Generating C1 for {}...".format(dir))
        C1.append(generate_C1(set))
        print("Generating C2 for {}...".format(dir))
        C2.append(generate_C2(set))
    
    return[*C1, *C2]

In [44]:
clf = new_train_svm()

C1 shape: (100, 128)
C2 shape: (100, 128)
Fitting linear kernel...
Fitted in 0.004315614700317383 seconds


In [46]:
with open("test_set.json") as f:
        individuals = json.load(f)

switch = 0
correct = 0
for dir, set in individuals.items():
    if (switch):
        res = clf.predict([get_image(set[0]) - get_image(set[1])])
        if (res[0] == np.int64(1)): correct +=1
        # print(type(res[0]))
        switch = 0
    else:
        res = clf.predict([get_image(set[0]) - get_image(set[2])])
        if (res[0] == np.int64(0)): correct +=1
        # print(res[0])
        switch = 1

print("Accuracy = {}".format(correct/len(individuals)))



Accuracy = 0.53


In [53]:
knn = new_train_knn_pca()

Generating C1 for 333...
Generating C2 for 333...
Generating C1 for 722...
Generating C2 for 722...
Generating C1 for 320...
Generating C2 for 320...
Generating C1 for 943...
Generating C2 for 943...
Generating C1 for 309...
Generating C2 for 309...
Generating C1 for 936...
Generating C2 for 936...
Generating C1 for 324...
Generating C2 for 324...
Generating C1 for 811...
Generating C2 for 811...
Generating C1 for 442...
Generating C2 for 442...
Generating C1 for 947...
Generating C2 for 947...
Generating C1 for 141...
Generating C2 for 141...
Generating C1 for 407...
Generating C2 for 407...
Generating C1 for 147...
Generating C2 for 147...
Generating C1 for 611...
Generating C2 for 611...
Generating C1 for 59...
Generating C2 for 59...
Generating C1 for 321...
Generating C2 for 321...
Generating C1 for 640...
Generating C2 for 640...
Generating C1 for 1000...
Generating C2 for 1000...
Generating C1 for 310...
Generating C2 for 310...
Generating C1 for 592...
Generating C2 for 592...


In [54]:
with open("test_set.json") as f:
        individuals = json.load(f)

correct = 0
X = get_test_sets()
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
pca = PCA(n_components=PCA_N_COMPONENTS)
pca.fit(X)
X = pca.transform(X)
count = 0
for set in X:
    if (count < 50):
        res = knn.predict([set])
        if (res[0] == np.int64(1)): correct +=1
        count += 1
    else:
        res = knn.predict([set])
        if (res[0] == np.int64(0)): correct +=1


print("Accuracy = {}".format(correct/len(individuals)))


Generating C1 for 911...
Generating C2 for 911...
Generating C1 for 488...
Generating C2 for 488...
Generating C1 for 277...
Generating C2 for 277...
Generating C1 for 981...
Generating C2 for 981...
Generating C1 for 303...
Generating C2 for 303...
Generating C1 for 656...
Generating C2 for 656...
Generating C1 for 942...
Generating C2 for 942...
Generating C1 for 977...
Generating C2 for 977...
Generating C1 for 990...
Generating C2 for 990...
Generating C1 for 941...
Generating C2 for 941...
Generating C1 for 437...
Generating C2 for 437...
Generating C1 for 863...
Generating C2 for 863...
Generating C1 for 682...
Generating C2 for 682...
Generating C1 for 297...
Generating C2 for 297...
Generating C1 for 435...
Generating C2 for 435...
Generating C1 for 549...
Generating C2 for 549...
Generating C1 for 800...
Generating C2 for 800...
Generating C1 for 189...
Generating C2 for 189...
Generating C1 for 897...
Generating C2 for 897...
Generating C1 for 1008...
Generating C2 for 1008..

In [51]:
knn.predict([get_image("images/00911/00911fa010_960530.tif") - get_image("images/00510/00510fa010h_940519.tif")])

ValueError: X has 128 features, but KNeighborsClassifier is expecting 25 features as input.