In [1]:
import os
import cv2 as cv
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.calibration import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

#### Loading dataset

In [2]:
DATASET_PATH = "..\\data\\resized"

In [3]:
def load_dataset(dataset_path):
		image_paths = []
		for root, dirs, files in os.walk(dataset_path):
			for name in files:
				image_paths.append(os.path.join(root, name))

		data = []
		labels = []
		for image_path in image_paths:
			image = cv.imread(image_path)
			if image.shape != (60, 60, 3):
				print(image_path)
				continue
			# image = Image.open(image_path)
			label = image_path.split(os.path.sep)[-2]
			data.append(image)
			labels.append(label)

		return np.array(data), np.array(labels)

In [4]:
data, labels = load_dataset(DATASET_PATH)

..\data\resized\happy\2556272.png
..\data\resized\sad\6383217.png


#### Transforming dataset

In [5]:
data = data.reshape(data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])

le = LabelEncoder()
labels = le.fit_transform(labels)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state=42)

#### KNN Training

In [7]:
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, y_train)

In [9]:
print(classification_report(y_test, model_knn.predict(X_test), target_names=le.classes_))

              precision    recall  f1-score   support

       happy       0.76      0.82      0.79       442
         sad       0.53      0.42      0.47       205

    accuracy                           0.70       647
   macro avg       0.64      0.62      0.63       647
weighted avg       0.68      0.70      0.69       647



In [10]:
df_knn_report = pd.DataFrame([{
    "Test Accuracy": accuracy_score(y_test, model_knn.predict(X_test)),
    "Train Accuracy":accuracy_score(y_train, model_knn.predict(X_train))
}], index=["KNN"])

df_knn_report

Unnamed: 0,Test Accuracy,Train Accuracy
KNN,0.697063,0.779154


#### Support Vector Machine

In [11]:
model_svc = SVC()
model_svc.fit(X_train, y_train)

In [13]:
test_pred = model_svc.predict(X_test)
train_pred = model_svc.predict(X_train)

In [14]:
print(classification_report(y_test, test_pred, target_names=le.classes_))

              precision    recall  f1-score   support

       happy       0.77      0.95      0.85       442
         sad       0.79      0.38      0.51       205

    accuracy                           0.77       647
   macro avg       0.78      0.67      0.68       647
weighted avg       0.78      0.77      0.74       647



In [15]:
df_svc_report = pd.DataFrame([{
    "Test Accuracy": accuracy_score(y_test, test_pred),
    "Train Accuracy": accuracy_score(y_train, train_pred)
}], index=["SVC"])
df_svc_report

Unnamed: 0,Test Accuracy,Train Accuracy
SVC,0.771252,0.813725


#### Comparison

In [16]:
pd.concat([df_knn_report, df_svc_report])

Unnamed: 0,Test Accuracy,Train Accuracy
KNN,0.697063,0.779154
SVC,0.771252,0.813725


#### Save models

In [17]:
import joblib

joblib.dump(model_knn, "../models/model_knn.pkl")
joblib.dump(model_svc, "../models/model_svc.pkl")

['model_svc.pkl']