In [58]:
import glob
import joblib
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from tensorflow.keras.layers import Dropout
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel

In [59]:
# Read positive embeddings
face_embeddings = []
for file_path in glob.glob('face_embeddings/*.npy'):
    embedding = np.load(file_path)
    face_embeddings.append(embedding)

# Read negative embeddings
neg_embeddings = []
for file_path in glob.glob('neg_embeddings/*.npy'):
    embedding = np.load(file_path)
    # embedding = np.resize(embedding, (1, 8631)) # Resize to 1x8631
    neg_embeddings.append(embedding)

face_embeddings = np.array(face_embeddings)

neg_embeddings = np.array(neg_embeddings)

In [60]:
def normalize(data):
    data = data - data.min()
    data = data / data.max()
    return data

In [61]:
print(face_embeddings.squeeze().shape)
print(neg_embeddings.squeeze().shape)
face_embeddings = face_embeddings.squeeze()
neg_embeddings = neg_embeddings.squeeze()

face_embeddings, neg_embeddings = normalize(face_embeddings), normalize(neg_embeddings)

(793, 8631)
(793, 8631)


### Create X and y data sets with proper labels for y

In [68]:
# Reshape negative data to match face embeddings shape
# negative_data = negative_data.reshape((positive_embeddings.shape[0], -1))

# Create labels for face embeddings (1) and negative data (0)
face_labels = np.ones((face_embeddings.shape[0],))
negative_labels = np.zeros((neg_embeddings.shape[0],))

# Combine face embeddings and negative data
X = np.concatenate((face_embeddings, neg_embeddings), axis=0)
y = np.concatenate((face_labels, negative_labels), axis=0)


# Shuffle the data
X, y = shuffle(X, y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

[1. 1. 1. ... 0. 0. 0.]


In [63]:
# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create an SVM with L1 regularization and dropout
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('feature_selection', SelectFromModel(LinearSVC(penalty='l1', dual=False, random_state=42))),
    ('svm_pipeline', LinearSVC(random_state=42))
])

svm_pipeline.fit(X_train, y_train)



In [64]:
# Make predictions on the test set
y_pred = svm_pipeline.predict(X_test)
# Evaluate the accuracy of the SVM
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
#TODO: Fix the model always outputting 1.

Accuracy: 1.0


In [65]:
filename = 'svm_model.pkl'
joblib.dump(svm_pipeline, filename)

['svm_model.pkl']