In [8]:
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import os

# Define the path to your data folders
train_data_path = '/Users/ummefahmidaakter/Downloads/cars/Train'
test_data_path = '/Users/ummefahmidaakter/Downloads/cars/Test'

# Define transforms to preprocess the data
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # resize the images to 224x224 pixels
    transforms.ToTensor(),  # convert the images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # normalize the images
])

# Load the dataset
train_dataset = datasets.ImageFolder(root=train_data_path, transform=transform)
test_dataset = datasets.ImageFolder(root=test_data_path, transform=transform)

# Define a DataLoader to load the data in batches during training and validation
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
import torch.nn as nn
import torch.optim as optim

# Define the neural network model
class Net(nn.Module):
    def __init__(self, hidden_size=256, dropout=0.2):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(224 * 224 * 3, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, 3)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x.view(-1, 224 * 224 * 3)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout(x)
        x = nn.functional.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Instantiate the neural network model
net = Net()

# Define the optimizer
lr = 0.001
optimizer = optim.Adam(net.parameters(), lr=lr)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Train the model
for epoch in range(10):
    for i, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        if i % 50 == 0:
            print('Epoch [%d/%d], Iter [%d/%d], Loss: %.4f' % (epoch+1, 10, i+1, len(train_dataset)//16, loss.item()))

Epoch [1/10], Iter [1/4], Loss: 1.1020
Epoch [2/10], Iter [1/4], Loss: 2.4884
Epoch [3/10], Iter [1/4], Loss: 1.7794
Epoch [4/10], Iter [1/4], Loss: 2.1000
Epoch [5/10], Iter [1/4], Loss: 1.5948
Epoch [6/10], Iter [1/4], Loss: 0.4654
Epoch [7/10], Iter [1/4], Loss: 1.5268
Epoch [8/10], Iter [1/4], Loss: 0.2683
Epoch [9/10], Iter [1/4], Loss: 0.8797
Epoch [10/10], Iter [1/4], Loss: 0.9538


In [None]:
***
The input size of the model is 224 x 224 x 3 = 150,528, which is flattened to a 1D tensor of size 1 x 150,528. 
The model also includes dropout with a probability of 0.2, which helps in reducing overfitting. 
The number of epochs and learning rate used for training are 10 and 0.001, respectively. 
The batch size used for training is 16.
A grid search is performed to find the best hyperparameters for the model, including the number of neurons in the first hidden layer, 
dropout probability, learning rate, and batch size.
***

In [10]:
from sklearn.model_selection import GridSearchCV
import itertools

hidden_sizes = [128, 256, 512]
dropouts = [0.2, 0.4, 0.6]
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [16, 32, 64]

# Initialize variables to store the best accuracy and corresponding hyperparameters
best_accuracy = 0
best_params = {}

# Loop through all possible combinations of hyperparameters
for params in itertools.product(hidden_sizes, dropouts, learning_rates, batch_sizes):
    
    # Unpack the hyperparameters
    hidden_size, dropout, lr, batch_size = params
    # Create a neural network with the given hyperparameters
    net = Net(hidden_size=hidden_size, dropout=dropout)
    # Define an optimizer with the given learning rate
    optimizer = optim.Adam(net.parameters(), lr=lr)
    # Create data loaders for the training and test sets with the given batch size
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    # Train the network for 10 epochs on the training set
    for epoch in range(10):
        for i, (images, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        # Evaluate the network on the test set and calculate its accuracy
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in test_loader:
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            if total > 0:
                accuracy = 100 * correct / total
            else:
                accuracy = 0
        # Update the best accuracy and corresponding hyperparameters if the current accuracy is better
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {'hidden_size': hidden_size, 'dropout': dropout, 'lr': lr, 'batch_size': batch_size}
        
# Print the best hyperparameters and corresponding accuracy
print('Best hyperparameters: Hidden size: %d, Dropout: %.1f, Learning rate: %.3f, Batch size: %d, Accuracy: %.2f' % (best_params['hidden_size'], best_params['dropout'], best_params['lr'], best_params['batch_size'], best_accuracy))


Best hyperparameters: Hidden size: 256, Dropout: 0.2, Learning rate: 0.010, Batch size: 32, Accuracy: 67.24


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import KFold
from sklearn import metrics

# Define the neural network model
class Net(nn.Module):
    def __init__(self, hidden_size=256, dropout=0.2):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(224 * 224 * 3, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, 3)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x.view(-1, 224 * 224 * 3)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout(x)
        x = nn.functional.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Define the k-fold cross-validation
k = 10
kf = KFold(n_splits=k, shuffle=True)

best_metric = 0.0
best_fold = 0

# Train and evaluate the model on each fold
accuracy_list = []
precision_list = []
recall_list = []
f1_score_list = []
specificity_list = []
sensitivity_list = []
confusion_matrix_list = []
kappa_list = []
for fold, (train_indices, test_indices) in enumerate(kf.split(train_dataset)):
    train_dataset_fold = torch.utils.data.Subset(train_dataset, train_indices)
    test_dataset_fold = torch.utils.data.Subset(train_dataset, test_indices)
    train_loader_fold = torch.utils.data.DataLoader(train_dataset_fold, batch_size=32, shuffle=True)
    test_loader_fold = torch.utils.data.DataLoader(test_dataset_fold, batch_size=32, shuffle=False)

    # Train the model
    net = Net(hidden_size=256, dropout=0.2)
    optimizer = optim.Adam(net.parameters(), lr=0.010)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(10):
        for i, (images, labels) in enumerate(train_loader_fold):
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate the model
    net.eval()
    total_correct = 0
    total_images = 0
    true_labels = []
    predicted_labels = []
    with torch.no_grad():
        for images, labels in test_loader_fold:
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total_correct += (predicted == labels).sum().item()
            total_images += labels.size(0)
            true_labels.extend(labels.tolist())
            predicted_labels.extend(predicted.tolist())

    # Calculate the performance metrics
    accuracy = total_correct / total_images
    precision, recall, f1_score, _ = metrics.precision_recall_fscore_support(true_labels, predicted_labels, average='weighted', zero_division=1)
    confusion_matrix = metrics.confusion_matrix(true_labels, predicted_labels, labels=[0, 1, 2])
    tp = confusion_matrix[1][1]
    tn = confusion_matrix[0][0]
    fp = confusion_matrix[0][1]
    fn = confusion_matrix[1][0]
    specificity = tn / (tn + fp + 1e-10)
    sensitivity = tp / (tp + fn + 1e-10)
    kappa = metrics.cohen_kappa_score(true_labels, predicted_labels)
    
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_score_list.append(f1_score)
    specificity_list.append(specificity)
    sensitivity_list.append(sensitivity)
    confusion_matrix_list.append(confusion_matrix)
    kappa_list.append(kappa)
    print('Fold %d: Confusion Matrix=%s, Accuracy=%.4f, Precision=%.4f, Recall=%.4f, F1 Score=%.4f, Specificity=%.4f, Sensitivity=%.4f, Kappa=%.4f' % (fold+1, confusion_matrix, accuracy, precision, recall, f1_score, specificity, sensitivity, kappa))
    # Update best fold if current fold has a better metric
    if accuracy > best_metric:
        best_metric = accuracy
        best_fold = fold
    
# Calculate the average performance metrics
avg_accuracy = np.mean(accuracy_list)
avg_precision = np.mean(precision_list)
avg_recall = np.mean(recall_list)
avg_f1_score = np.mean(f1_score_list)
avg_specificity = np.mean(specificity_list)
avg_sensitivity = np.mean(sensitivity_list)
avg_confusion_matrix = np.zeros((3, 3), dtype=int)
for cm in confusion_matrix_list:
    # Get the shape of the confusion matrix and use it to dynamically set the shape of the average confusion matrix
    cm_shape = cm.shape
    avg_confusion_matrix += np.pad(cm, [(0, 3 - cm_shape[0]), (0, 3 - cm_shape[1])], mode='constant', constant_values=0)
avg_confusion_matrix = avg_confusion_matrix // k
tn = avg_confusion_matrix[0, 0]
fp = avg_confusion_matrix[0, 1]
fn = avg_confusion_matrix[1, 0]
tp = avg_confusion_matrix[1, 1]
avg_kappa = metrics.cohen_kappa_score(true_labels, predicted_labels)

print('Average Performance Metrics: Accuracy=%.4f, Precision=%.4f, Recall=%.4f, F1 Score=%.4f, Specificity=%.4f, Sensitivity=%.4f, Kappa=%.4f' % (avg_accuracy, avg_precision, avg_recall, avg_f1_score, avg_specificity, avg_sensitivity, avg_kappa))
print('Average Confusion Matrix:\n', avg_confusion_matrix)
print('Best fold:', best_fold+1)

Fold 1: Confusion Matrix=[[1 0 2]
 [0 1 1]
 [0 1 1]], Accuracy=0.4286, Precision=0.6429, Recall=0.4286, F1 Score=0.4524, Specificity=1.0000, Sensitivity=1.0000, Kappa=0.1765
Fold 2: Confusion Matrix=[[2 0 0]
 [0 0 0]
 [2 1 2]], Accuracy=0.5714, Precision=0.8571, Recall=0.5714, F1 Score=0.5986, Specificity=1.0000, Sensitivity=0.0000, Kappa=0.3226
Fold 3: Confusion Matrix=[[0 1 0]
 [1 0 2]
 [2 0 1]], Accuracy=0.1429, Precision=0.1429, Recall=0.1429, F1 Score=0.1429, Specificity=0.0000, Sensitivity=0.0000, Kappa=-0.2353
Fold 4: Confusion Matrix=[[2 1 0]
 [0 2 0]
 [0 2 0]], Accuracy=0.5714, Precision=0.8286, Recall=0.5714, F1 Score=0.5061, Specificity=0.6667, Sensitivity=1.0000, Kappa=0.3636
Fold 5: Confusion Matrix=[[0 1 2]
 [0 1 1]
 [0 1 0]], Accuracy=0.1667, Precision=0.6111, Recall=0.1667, F1 Score=0.1333, Specificity=0.0000, Sensitivity=1.0000, Kappa=-0.1111
Fold 6: Confusion Matrix=[[2 0 2]
 [0 0 1]
 [0 0 1]], Accuracy=0.5000, Precision=0.8750, Recall=0.5000, F1 Score=0.5111, Specifi