In [1]:
import os
import cv2
import numpy as np
from skimage.feature import local_binary_pattern, hog
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import pickle
from collections import Counter
from imblearn.over_sampling import RandomOverSampler
from multiprocessing import Pool

In [2]:
# Paths
dataset_path = 'Faces/Faces/'  # Update with your dataset path
face_cascade_path = 'data/haarcascade_frontalface_default.xml'

In [3]:
# Initialize face cascade
face_cascade = cv2.CascadeClassifier(face_cascade_path)

In [4]:
# Parameters for LBP
radius = 2
n_points = 8 * radius

In [5]:
# Define consistent face size
face_size = (128, 128)

In [6]:
# Feature extraction function using HOG and LBP
def extract_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # HOG Features
    hog_features, _ = hog(gray, pixels_per_cell=(16, 16), cells_per_block=(2, 2), visualize=True, feature_vector=True)
    
    # LBP Features
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    lbp_hist = lbp_hist.astype('float')
    lbp_hist /= (lbp_hist.sum() + 1e-6)  # Normalize histogram
    
    return np.hstack((hog_features, lbp_hist))

In [7]:
def process_image(filename):
    label = filename.rsplit('_', 1)[0]  # Extract label from filename (everything before the last '_')
    image_path = os.path.join(dataset_path, filename)
    image = cv2.imread(image_path)
    
    # Detect faces
    faces = face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    features_list = []
    labels_list = []
    
    for (x, y, w, h) in faces:
        face = image[y:y+h, x:x+w]
        
        # Resize face to a consistent size
        face = cv2.resize(face, face_size)
        
        # Extract features
        features = extract_features(face)
        
        features_list.append(features)
        labels_list.append(label)
    
    return features_list, labels_list

In [8]:
# Process images in parallel
filenames = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.png'))]
with Pool() as pool:
    results = pool.map(process_image, filenames)

In [9]:
# Flatten results
data = [item for sublist in [result[0] for result in results] for item in sublist]
labels = [item for sublist in [result[1] for result in results] for item in sublist]


In [10]:
# Encode labels
le = LabelEncoder()
labels = le.fit_transform(labels)

In [11]:
# Convert data and labels to numpy arrays
X = np.array(data)
y = np.array(labels)


In [12]:
# Check class distribution before resampling
print("Class distribution before resampling:", Counter(y))


Class distribution before resampling: Counter({7: 83, 18: 82, 24: 73, 1: 69, 11: 68, 15: 63, 23: 62, 10: 60, 25: 58, 4: 54, 17: 52, 2: 51, 13: 49, 9: 43, 21: 43, 6: 41, 16: 40, 28: 39, 20: 38, 8: 38, 14: 36, 5: 33, 27: 30, 30: 30, 12: 27, 0: 17, 19: 14, 3: 11, 22: 7, 29: 6, 26: 5})


In [13]:
# Resample the data
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

In [14]:
# Print class distribution after resampling
print("Class distribution after resampling:", Counter(y_resampled))

Class distribution after resampling: Counter({23: 83, 17: 83, 29: 83, 9: 83, 13: 83, 11: 83, 15: 83, 12: 83, 21: 83, 16: 83, 18: 83, 2: 83, 7: 83, 25: 83, 20: 83, 19: 83, 4: 83, 6: 83, 22: 83, 24: 83, 27: 83, 1: 83, 5: 83, 0: 83, 30: 83, 10: 83, 8: 83, 14: 83, 28: 83, 3: 83, 26: 83})


In [15]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto'],
    'kernel': ['linear', 'rbf']
}
grid = GridSearchCV(SVC(probability=True), param_grid, refit=True, verbose=2, cv=3)
grid.fit(X_resampled, y_resampled)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  14.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  12.9s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  14.7s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  37.5s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  31.3s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  25.6s
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=  12.8s
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=   9.5s
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=  11.4s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=  26.8s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=  27.5s
[CV] END ......................C=0.1, gamma=auto

In [16]:
# Save the trained model and label encoder
with open('model/face_recognition_model.pkl', 'wb') as f:
    pickle.dump(grid, f)
with open('model/label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)

print("Model training completed and saved.")

Model training completed and saved.
