In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data
import torchvision
from sklearn.model_selection import KFold
import torch.utils.data as data_utils
import matplotlib.pyplot as plt
import torchvision.models as models
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [None]:
# Define paths to your dataset subfolders
data_root = "C:\\Users\\ai\\Desktop\\RA 2023\\CNN_classification\\extracted features\\resized_images_long10"
class_folders = ["class1", "class2", "class3"]

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Load dataset
dataset = datasets.ImageFolder(root=data_root, transform=transform)

# Create lists to hold indices for each class
class_indices = [[], [], []]
for idx, (data, target) in enumerate(dataset):
    class_indices[target].append(idx)

# Define the sample counts per class for each set
train_samples_per_class = [342, 342, 342]
val_samples_per_class = [115, 115, 115]
test_samples_per_class = [114, 114, 114]

# Create train, validation, and test indices for each class
train_indices = [class_indices[i][:train_samples_per_class[i]] for i in range(3)]
val_indices = [class_indices[i][train_samples_per_class[i]:train_samples_per_class[i] + val_samples_per_class[i]] for i in range(3)]
test_indices = [class_indices[i][train_samples_per_class[i] + val_samples_per_class[i]:train_samples_per_class[i] + val_samples_per_class[i] + test_samples_per_class[i]] for i in range(3)]

# Flatten the indices lists
train_indices = [idx for class_idx in train_indices for idx in class_idx]
val_indices = [idx for class_idx in val_indices for idx in class_idx]
test_indices = [idx for class_idx in test_indices for idx in class_idx]

# Create datasets and data loaders for each split
train_dataset = torch.utils.data.Subset(dataset, train_indices)
val_dataset = torch.utils.data.Subset(dataset, val_indices)
test_dataset = torch.utils.data.Subset(dataset, test_indices)

train_val_dataset = data_utils.ConcatDataset([train_dataset, val_dataset])
# Create data loader for the test set
batch_size = 32   # 2
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [None]:
# Calculate the number of samples from each class in each dataset
train_val_class_counts = [0, 0, 0]
test_class_counts = [0, 0, 0]

for idx, (data, target) in enumerate(train_val_dataset):
    train_val_class_counts[target] += 1

for idx, (data, target) in enumerate(test_dataset):
    test_class_counts[target] += 1

# Print the number of samples from each class in each dataset
print("Train class counts:", train_val_class_counts)
print("Test class counts:", test_class_counts)


In [None]:
vgg16 = models.vgg16(pretrained=True)
for param in vgg16.features.parameters():
    param.requires_grad = False
# Extract the bottom layers
bottom_layers = vgg16.features

In [None]:
# Define the additional fully connected layers
fully_connected_layers = nn.Sequential(
    nn.Linear(512 * 7 * 7, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(0.3),
    nn.Linear(4096, 4096),   # 4096
    nn.ReLU(inplace=True),
    nn.Dropout(0.3),
    nn.Linear(4096, 3)  # 4096
)

# Combine layers to create the complete classification network
classification_network = nn.Sequential(
    bottom_layers,
    nn.Flatten(),  # Flatten the output from the bottom layers
    fully_connected_layers,
)

In [None]:
# Define K for K-fold cross-validation
num_folds = 10

# Create K-fold cross-validator on train+validation data
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
fold_idx = 0

# Lists to store loss and accuracy values for each fold
train_loss_history = []
val_loss_history = []
train_acc_history = []
val_acc_history = []
test_loss_history = []
test_acc_history = []

learning_rate = 0.0001
num_epochs = 15   # 30

# Iterate through each fold
for train_indices, val_indices in kf.split(train_val_dataset):
    fold_idx += 1
    print(f"Fold {fold_idx}:")

    # Create data loaders for this fold's train and validation sets
    fold_train_dataset = torch.utils.data.Subset(train_val_dataset, train_indices)
    fold_val_dataset = torch.utils.data.Subset(train_val_dataset, val_indices)

    fold_train_loader = DataLoader(fold_train_dataset, batch_size=batch_size, shuffle=True)
    fold_val_loader = DataLoader(fold_val_dataset, batch_size=batch_size)

    # Initialize the model, loss function, and optimizer
    model = classification_network
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0
        for images, labels in fold_train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = total_loss / len(fold_train_loader)
        train_acc = 100 * correct / total
        train_loss_history.append(train_loss)
        train_acc_history.append(train_acc)

        print(f"Epoch [{epoch + 1}/{num_epochs}] - Train Loss: {train_loss:.4f} - Train Acc: {train_acc:.2f}%")

    # Validation loop
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in fold_val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = total_loss / len(fold_val_loader)
    val_acc = 100 * correct / total
    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)

    print(f"Validation Loss: {val_loss:.4f} - Validation Acc: {val_acc:.2f}%")

    # Testing loop (for each fold)
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)  # Pass input data 'images' to the model
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss = total_loss / len(test_loader)
    test_acc = 100 * correct / total
    test_loss_history.append(test_loss)
    test_acc_history.append(test_acc)
    print(f"Test Loss: {test_loss:.4f} - Test Acc: {test_acc:.2f}%")
    print()