In [9]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from skimage.feature import hog
from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import KFold, cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, cohen_kappa_score, f1_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

# Define the folders containing the images
train_path = '/Users/ummefahmidaakter/Downloads/cars/Train'
test_path = '/Users/ummefahmidaakter/Downloads/cars/Test'
folders = ['audi', 'lamborghini', 'mercedes']

# Load the training images and labels from the dataset folder
train_images = []
train_labels = []
for folder in folders:
    folder_path = os.path.join(train_path, folder)
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (224, 224))
        hog_features = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
        train_images.append(hog_features)
        train_labels.append(folders.index(folder))

# Load the testing images and labels from the dataset folder
test_images = []
test_labels = []
for folder in folders:
    folder_path = os.path.join(test_path, folder)
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (224, 224))
        hog_features = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
        test_images.append(hog_features)
        test_labels.append(folders.index(folder))

# Convert the images and labels to numpy arrays
X_train = np.array(train_images)
y_train = np.array(train_labels)
X_test = np.array(test_images)
y_test = np.array(test_labels)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.linear_model import LogisticRegression

# Train the logistic regression model with default hyperparameters
logistic_regression = LogisticRegression(max_iter=2000)
logistic_regression.fit(X_train, y_train)

from sklearn.model_selection import cross_val_score

# Evaluate the logistic regression model with 10-fold cross-validation
scores = cross_val_score(logistic_regression, X_train, y_train, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
}

# Create a GridSearchCV object
logistic_regression_grid = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=10)

# Fit the GridSearchCV object to the training data
logistic_regression_grid.fit(X_train, y_train)

# Print the best hyperparameters
print("Best hyperparameters: ", logistic_regression_grid.best_params_)

from sklearn.metrics import confusion_matrix, classification_report

# Calculate confusion matrix
y_pred = logistic_regression.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

from sklearn.metrics import cohen_kappa_score

# Calculate Cohen's Kappa
kappa = cohen_kappa_score(y_test, y_pred)
print("Cohen's Kappa: %.2f" % kappa)

Accuracy: 0.60 (+/- 0.56)
Best hyperparameters:  {'C': 1}
Cohen's Kappa: 0.45


In [10]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, cohen_kappa_score, confusion_matrix
from sklearn.linear_model import LogisticRegression

# Initialize lists to store the performance metrics for each fold
confusion_matrices = []
accuracies = []
precisions = []
recalls = []
f1_scores = []
specificities = []
sensitivities = []
kappas = []

# Initialize variables to keep track of the best average metric and fold
best_metric = 0
best_fold = None

# Define the number of splits for cross-validation
n_splits = 10

# Create a KFold object for cross-validation
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Loop over the splits and train/evaluate the logistic regression model
for i, (train_index, test_index) in enumerate(kf.split(X_train, y_train)):
    # Split the data into training and testing sets
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

    # Train the logistic regression model with optimal hyperparameters
    lr = LogisticRegression(solver='liblinear', penalty='l2', C= 1, random_state=42)
    lr.fit(X_train, y_train)

    # Evaluate the logistic regression model on the testing set
    y_pred = lr.predict(X_test)

    # Calculate the confusion matrix for the current fold
    confusion_matrix_fold = confusion_matrix(y_test, y_pred)
    confusion_matrices.append(confusion_matrix_fold)

    # Calculate the accuracy for the current fold
    accuracy_fold = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy_fold)

    # Calculate the precision, recall, f1 score, specificity, and sensitivity for each class in the current fold
    classes = np.unique(y_train)
    for class_name in classes:
        # Calculate the true positives, false positives, false negatives, and true negatives for the current class
        tp = np.sum((y_test == class_name) & (y_pred == class_name))
        fp = np.sum((y_test != class_name) & (y_pred == class_name))
        fn = np.sum((y_test == class_name) & (y_pred != class_name))
        tn = np.sum((y_test != class_name) & (y_pred != class_name))
        
        # Calculate the precision, recall, f1 score, specificity, and sensitivity for the current class
        precision_class = 0 if (tp + fp) == 0 else tp / (tp + fp)
        recall_class = 0 if (tp + fn) == 0 else tp / (tp + fn)
        f1_score_class = 2 * precision_class * recall_class / (precision_class + recall_class) if (precision_class + recall_class) != 0 else 0
        specificity_class = tn / (tn + fp)
        sensitivity_class = tp / (tp + fn)

        # Append the metrics to the lists
        precisions.append(precision_class)
        recalls.append(recall_class)
        f1_scores.append(f1_score_class)
        specificities.append(specificity_class)
        sensitivities.append(sensitivity_class)

# Calculate the Cohen's kappa score for the current fold
    kappa_fold = cohen_kappa_score(y_test, y_pred, weights='quadratic')
    kappas.append(kappa_fold)

    # Update the best average metric and fold if the current fold has a better average metric
    if accuracy_fold > best_metric:
        best_metric = accuracy_fold
        best_fold = i + 1


    # Print the performance metrics for the current fold
    print("Fold", i+1, "metrics:")
    print("Confusion matrix:\n", confusion_matrix_fold)
    print("Accuracy: %.2f" % accuracy_fold)
    print("Precision: %.2f" % precision_class)
    print("Recall: %.2f" % recall_class)
    print("F1 score: %.2f" % f1_score_class)
    print("Specificity: %.2f" % specificity_class)
    print("Sensitivity: %.2f" % sensitivity_class)
    print("Cohen's kappa: %.2f\n" % kappa_fold)

# Calculate the average performance metrics across all folds
average_confusion_matrix = np.mean(confusion_matrices, axis=0)
print("Average confusion matrix:\n", np.array2string(average_confusion_matrix, separator=', ', floatmode='fixed', precision=1, suppress_small=True))
print("Average precision: %.2f" % np.mean(precisions))
print("Average recall: %.2f" % np.mean(recalls))
print("Average F1 score: %.2f" % np.mean(f1_scores))
print("Average specificity: %.2f" % np.mean(specificities))
print("Average sensitivity: %.2f" % np.mean(sensitivities))
print("Average Cohen's kappa score: %.2f" % np.mean(kappas))
if best_fold is not None:
    print("Best fold: %d" % best_fold)
else:
    print("No best fold found.")

Fold 1 metrics:
Confusion matrix:
 [[ 5  4  0]
 [ 3 27  0]
 [ 5 10  4]]
Accuracy: 0.62
Precision: 1.00
Recall: 0.21
F1 score: 0.35
Specificity: 1.00
Sensitivity: 0.21
Cohen's kappa: 0.23

Fold 2 metrics:
Confusion matrix:
 [[ 5  4  0]
 [ 3 27  0]
 [ 5 10  4]]
Accuracy: 0.62
Precision: 1.00
Recall: 0.21
F1 score: 0.35
Specificity: 1.00
Sensitivity: 0.21
Cohen's kappa: 0.23

Fold 3 metrics:
Confusion matrix:
 [[ 5  4  0]
 [ 3 27  0]
 [ 5 10  4]]
Accuracy: 0.62
Precision: 1.00
Recall: 0.21
F1 score: 0.35
Specificity: 1.00
Sensitivity: 0.21
Cohen's kappa: 0.23

Fold 4 metrics:
Confusion matrix:
 [[ 5  4  0]
 [ 3 27  0]
 [ 5 10  4]]
Accuracy: 0.62
Precision: 1.00
Recall: 0.21
F1 score: 0.35
Specificity: 1.00
Sensitivity: 0.21
Cohen's kappa: 0.23

Fold 5 metrics:
Confusion matrix:
 [[ 5  4  0]
 [ 3 27  0]
 [ 5 10  4]]
Accuracy: 0.62
Precision: 1.00
Recall: 0.21
F1 score: 0.35
Specificity: 1.00
Sensitivity: 0.21
Cohen's kappa: 0.23

Fold 6 metrics:
Confusion matrix:
 [[ 5  4  0]
 [ 3 27  0]
 