In [None]:
import numpy as np
from PIL import Image
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from dataset_loader import FashionStyle14
from early_stoping import EarlyStoping

import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
import copy


In [None]:

class NeuralNet(nn.Module):
    def __init__(self, num_classes):
        """
        Custom Convolutional Neural Network for Fashion Style Classification\n
        The architecture is as follows:\n
            Conv2d(3, 32, kernel_size=5) -> BatchNorm2d(32) -> ReLU -> MaxPool2d(kernel_size=2, stride=2)\n
            Conv2d(32, 64, kernel_size=5) -> BatchNorm2d(64) -> ReLU -> MaxPool2d(kernel_size=2, stride=2)\n
            Conv2d(64, 128, kernel_size=5) -> BatchNorm2d(128) -> ReLU -> MaxPool2d(kernel_size=2, stride=2)\n
            Conv2d(128, 256, kernel_size=5) -> BatchNorm2d(256) -> ReLU -> MaxPool2d(kernel_size=2, stride=2)\n
            Flatten -> Linear(256 * 10 * 10, 512) -> ReLU -> Dropout(p=0.5)\n
            Linear(512, num_classes)\n
        Parameters:
            num_classes (int): Number of output classes for the classification task
        """
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5)      # (32, 220, 220)
        self.bn1 = nn.BatchNorm2d(32)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2) # (32, 110, 110)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)     # (64, 106, 106)
        self.bn2 = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=5)    # (128, 49, 49)
        self.bn3 = nn.BatchNorm2d(128)

        self.conv4 = nn.Conv2d(128, 256, kernel_size=5)   # (256, 20, 20)
        self.bn4 = nn.BatchNorm2d(256)

        self.fc1 = nn.Linear(256 * 10 * 10, 512)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [None]:
directory = os.path.join(os.getcwd(), 'FashionStyle14_v1')

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [None]:
#Transformations for images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet mean
                         std =[0.229, 0.224, 0.225]) # ImageNet std
])
#Transformations for augmentation
transform_aug = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std =[0.229, 0.224, 0.225])
])

In [None]:
# Load dataset and split into training and testing sets
dataset = FashionStyle14(csv_file='dataset.csv', root_dir=directory, transform=None)
train_data, test_data = torch.utils.data.random_split(dataset, [0.8, 0.2], generator=torch.Generator().manual_seed(42))

# Apply transformations
train_data.dataset.transform = transform
test_data.dataset.transform = transform

# Augment training data
train_data_aug = copy.deepcopy(train_data)
train_data_aug.dataset.transform = transform_aug
train_data = torch.utils.data.ConcatDataset([train_data, train_data_aug])

# Create DataLoaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True)


In [None]:
# Load class names
class_names = np.load(os.path.join(directory, 'classes.npy'), allow_pickle=True).tolist()
class_names


In [None]:
# Define Neural Network
def choose_model(model_name, class_names):
    if model_name == 'resnet50':
        model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)
        for name, param in model.named_parameters():
            if "layer4" not in name and "fc" not in name :  # Freeze all layers except layer4 and fc
                param.requires_grad = False
        model.fc = nn.Linear(model.fc.in_features, len(class_names))
    elif model_name == 'custom':
        model = NeuralNet(num_classes=len(class_names))
    else:
        raise ValueError("Model not recognized")
    # Move model to GPU
    model = model.to(device)
    return model

net = choose_model('resnet50', class_names)

In [None]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
early_stopping = EarlyStoping(min_delta=0.001)

In [None]:
net.train()
for epoch in range(30):
    print(f"Training epoch {epoch + 1}")
    train_running_loss, val_running_loss = 0.0, 0.0
    correct_train, correct_val = 0, 0
    total_train, total_val = 0, 0 

    # Train the model
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}, Training", leave=True):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_function(outputs, labels.long())
        loss.backward()
        optimizer.step()
        train_running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()
        accuracy_train = 100 * correct_train / total_train

    # Validate the model
    net.eval()
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc=f"Epoch {epoch+1}. Validating", leave=True):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = net(inputs)
            loss = loss_function(outputs, labels.long())
            val_running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()
            accuracy_val = 100 * correct_val / total_val

    # Check for early stopping
    if early_stopping(net, val_running_loss / len(test_loader)):
        print(early_stopping.status)
        break
            

    print(f"Loss: {train_running_loss / len(train_loader):.4f}, Accuracy: {accuracy_train:.2f}%")
    print(f"Validation: Loss: {val_running_loss / len(test_loader):.4f}, Accuracy: {accuracy_val:.2f}%")

# Save the model
torch.save(net.state_dict(), os.path.join(directory, 'resnet_trainded.pth'))


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

# Load the model for evaluation
net.load_state_dict(torch.load(os.path.join(directory, 'resnet_trainded.pth')))

net.eval()
all_labels = []
all_predictions = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc=f"Epoch {epoch+1}", leave=False):
        images = images.to(device)
        labels = labels.to(device)

        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())




In [None]:
conf_matrix = confusion_matrix(all_labels, all_predictions, labels=range(len(class_names)))
print("Confusion Matrix:")
print(conf_matrix)

# Print classification report
class_report = classification_report(all_labels, all_predictions, target_names=class_names)
print("Classification Report:")
print(class_report)

**Testing own images**

In [None]:
image = Image.open("/Users/yuriizaika/Documents/python/Projects/Clothfier/dress.webp")
with torch.no_grad():
    image = transform(image)
    label = net(image.unsqueeze(0).to(device))
    label = torch.argmax(label, dim=1)
    
class_names[label]    
