In [17]:
import os
import cv2
import numpy as np
import pandas as pd

from skimage.feature import hog

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score, classification_report


In [18]:
dataset_path = r"C:\Users\My PC\Downloads\jaffe\jaffe"

emotion_labels = {
    "AN": 0,
    "DI": 1,
    "FE": 2,
    "HA": 3,
    "NE": 4,
    "SA": 5,
    "SU": 6
}

X = []
y = []

for emotion, label in emotion_labels.items():
    folder = os.path.join(dataset_path, emotion)
    for file in os.listdir(folder):
        img_path = os.path.join(folder, file)

        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (128, 128))

        features = hog(
            img,
            orientations=9,
            pixels_per_cell=(8, 8),
            cells_per_block=(2, 2),
            block_norm="L2-Hys"
        )

        X.append(features)
        y.append(label)

X = np.array(X)
y = np.array(y)

print("Dataset Shape:", X.shape, y.shape)

Dataset Shape: (213, 8100) (213,)


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=42,stratify=y)
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Train shape: (170, 8100)
Test shape: (43, 8100)


In [20]:
pca_components = [30, 50, 80, 100, 120]

In [25]:
models = {
    "Logistic Regression": {
        "model": LogisticRegression(max_iter=5000),
        "params": {
            "pca__n_components": pca_components,
            "clf__C": [0.1, 1, 10]
        }
    },

    "Decision Tree": {
        "model": DecisionTreeClassifier(),
        "params": {
            "pca__n_components": pca_components,
            "clf__max_depth": [10, 20, None],
            "clf__min_samples_split": [2, 5]
        }
    },

    "SVM": {
    "model": SVC(kernel="rbf"),
    "params": {
        "clf__C": [1, 5, 10, 20, 50],
        "clf__gamma": ["scale", 0.01, 0.001]
    }
},


    "Random Forest": {
        "model": RandomForestClassifier(),
        "params": {
            "pca__n_components": [50, 80, 100],
            "clf__n_estimators": [200, 300],
            "clf__max_depth": [None, 20]
        }
    },

    "Naive Bayes": {
        "model": GaussianNB(),
        "params": {
            "pca__n_components": [30, 50, 80]
        }
    }
}

In [27]:
results = []

for name, mp in models.items():
    print(f"\n================ {name} =================")

    if "SVM" in name:
        pipe = Pipeline([
            ("scaler", StandardScaler()),
            ("clf", mp["model"])
        ])
    else:
        pipe = Pipeline([
            ("scaler", StandardScaler()),
            ("pca", PCA(svd_solver="randomized", whiten=True)),
            ("clf", mp["model"])
        ])

    gs = GridSearchCV(
        pipe,
        mp["params"],
        cv=5,
        scoring="accuracy",
        n_jobs=-1
    )

    gs.fit(X_train, y_train)

    best_model = gs.best_estimator_
    y_pred = best_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)

    print("Best Params:", gs.best_params_)
    print("Test Accuracy:", acc)
    print(classification_report(y_test, y_pred))

    results.append([name, acc])


Best Params: {'clf__C': 0.1, 'pca__n_components': 120}
Test Accuracy: 0.813953488372093
              precision    recall  f1-score   support

           0       0.60      1.00      0.75         6
           1       1.00      0.67      0.80         6
           2       1.00      1.00      1.00         7
           3       0.75      1.00      0.86         6
           4       0.75      0.50      0.60         6
           5       0.83      0.83      0.83         6
           6       1.00      0.67      0.80         6

    accuracy                           0.81        43
   macro avg       0.85      0.81      0.81        43
weighted avg       0.85      0.81      0.81        43


Best Params: {'clf__max_depth': 10, 'clf__min_samples_split': 2, 'pca__n_components': 50}
Test Accuracy: 0.4186046511627907
              precision    recall  f1-score   support

           0       0.29      0.33      0.31         6
           1       0.33      0.33      0.33         6
           2       0.40   

In [28]:
results_df = pd.DataFrame(results, columns=["Model", "Test Accuracy"])
results_df.sort_values(by="Test Accuracy", ascending=False)

Unnamed: 0,Model,Test Accuracy
0,Logistic Regression,0.813953
2,SVM,0.72093
3,Random Forest,0.72093
4,Naive Bayes,0.511628
1,Decision Tree,0.418605


In [29]:
import joblib
joblib.dump(best_model, "best_logistic_model.pkl")

['best_logistic_model.pkl']