# Fashion Classification with VGG16 - Google Colab

This notebook trains a VGG16 model on fashion images.

**Dataset Structure Required:**
```
ict303_a1/
‚îî‚îÄ‚îÄ data/
    ‚îî‚îÄ‚îÄ data/
        ‚îú‚îÄ‚îÄ train/
        ‚îú‚îÄ‚îÄ valid/
        ‚îî‚îÄ‚îÄ test/unknown/
```

## 1. Mount Google Drive and Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os

# Set working directory
project_dir = '/content/drive/MyDrive/Colab Notebooks'
os.chdir(project_dir)

print(f"Current directory: {os.getcwd()}")
print("\nFiles in directory:")
!ls

## 2. Check GPU Availability

In [None]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

if device.type == 'cuda':
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB')
    print('\n‚úÖ GPU is available!')
else:
    print('\n‚ö†Ô∏è  GPU not available. Go to Runtime ‚Üí Change runtime type ‚Üí GPU')

## 3. Verify Python Files are Uploaded

In [None]:
# Check if all required Python files exist
required_files = [
    'dataset.py',
    'trainer.py',
    'vgg16_pretrained.py',
    'vgg16_scratch.py',
    'mlp_model.py',
    'utils.py'
]

print("Checking for required files...\n")
all_found = True
for file in required_files:
    if os.path.exists(file):
        print(f"‚úÖ {file} found")
    else:
        print(f"‚ùå {file} NOT FOUND - Please upload this file!")
        all_found = False

if all_found:
    print("\n‚úÖ All required files found!")
else:
    print("\n‚ö†Ô∏è  Some files are missing. Please upload them to the same folder as this notebook.")

## 4. Set Dataset Path and Verify Structure

In [None]:
# UPDATE THIS PATH to where your ict303_a1 folder is located
# Common options:
# Option 1: In Colab Notebooks folder
root_dir = "/content/drive/MyDrive/Colab Notebooks/ICT303 - Fashion Classification"

# Option 2: At root of Drive  
# root_dir = "/content/drive/MyDrive/ICT303 - Fashion Classification"

# Option 3: In a specific folder
# root_dir = "/content/drive/MyDrive/YourFolder/ICT303 - Fashion Classification"

print(f"Dataset root directory: {root_dir}")
print(f"Path exists: {os.path.exists(root_dir)}\n")

if os.path.exists(root_dir):
    # Check structure
    data_data_path = os.path.join(root_dir, 'data', 'data')
    
    if os.path.exists(data_data_path):
        print(f"‚úÖ Found data/data/ structure")
        
        # Check for train, valid, test
        for split in ['train', 'valid', 'test']:
            split_path = os.path.join(data_data_path, split)
            if os.path.exists(split_path):
                print(f"  ‚úÖ {split}/ found")
            else:
                print(f"  ‚ùå {split}/ NOT FOUND")
    else:
        print(f"‚ùå data/data/ structure not found")
        print(f"\nContents of {root_dir}:")
        !ls -la "$root_dir"
else:
    print("‚ùå Root directory not found!")
    print("\nSearching for ict303_a1 or ICT303 folders...")
    !find /content/drive/MyDrive -name "*303*" -type d 2>/dev/null | head -10

## 5. Import Modules

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from dataset import get_dataloaders
from trainer import Trainer
from vgg16_pretrained import create_vgg16_pretrained
from vgg16_scratch import VGG16Scratch
from mlp_model import create_mlp_model
import utils

print("‚úÖ All modules imported successfully!")

## 6. Load Dataset

In [None]:
# Load data
batch_size = 32
img_size = 224

train_loader, val_loader, test_loader, dataset_info = get_dataloaders(
    root_dir=root_dir,
    batch_size=batch_size,
    img_size=img_size,
    num_workers=2  # Good for Colab
)

print("\n" + "="*70)
print("DATASET SUMMARY")
print("="*70)
print(f"Number of classes: {dataset_info['num_classes']}")
print(f"Classes: {dataset_info['classes']}")
print(f"Training samples: {dataset_info['train_size']}")
print(f"Validation samples: {dataset_info['val_size']}")
print(f"Test samples: {dataset_info['test_size']}")
print("="*70)

# Verify data loading
if dataset_info['train_size'] > 0:
    images, labels = next(iter(train_loader))
    print(f"\n‚úÖ Data loaded successfully!")
    print(f"Sample batch shape: {images.shape}")
    print(f"Sample labels: {labels[:5]}")
else:
    print("\n‚ùå No training data found! Please check your root_dir path.")

## 7. Create Model

### Choose ONE of the following options:

### Option 1: VGG16 Pretrained (RECOMMENDED - Best Performance)

In [None]:
# Create VGG16 Pretrained model
model = create_vgg16_pretrained(
    model_type='standard',  # 'standard', 'small_classifier', or 'global_pool'
    num_classes=8,
    freeze_features=True,  # Freeze conv layers, train only classifier
    dropout_rate=0.5
)

model = model.to(device)

# Print model info
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model: VGG16 Pretrained")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen parameters: {total_params - trainable_params:,}")

### Option 2: VGG16 from Scratch (Slower, needs more epochs)

In [None]:
# Uncomment to use VGG16 from scratch
# model = VGG16Scratch(
#     num_classes=8,
#     dropout_rate=0.5,
#     use_batch_norm=False
# )
# model = model.to(device)
# print(f"Model: VGG16 from Scratch")
# print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

### Option 3: MLP Model (Fastest, lower performance)

In [None]:
# Uncomment to use MLP
# model = create_mlp_model(
#     model_size='medium',
#     input_size=224*224*3,
#     num_classes=8,
#     dropout_rate=0.5
# )
# model = model.to(device)
# print(f"Model: MLP Medium")
# print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

## 8. Setup Training

In [None]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer (use lower learning rate for pretrained models)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

# Create trainer
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    scheduler=scheduler,
    log_dir='./runs/vgg16_pretrained',
    class_names=dataset_info['classes']
)

print("‚úÖ Trainer created successfully!")

## 9. Start Training

In [None]:
# Create checkpoint directory
os.makedirs('./checkpoints/vgg16_pretrained', exist_ok=True)

print("\nüöÄ Starting training...")
print("="*70)

# Train the model
trainer.train(
    num_epochs=25,  # Adjust as needed
    save_dir='./checkpoints/vgg16_pretrained',
    early_stopping_patience=10  # Stop if no improvement for 10 epochs
)

print("\n‚úÖ Training completed!")
print(f"Best validation accuracy: {trainer.best_val_acc:.2f}%")

## 10. Evaluate Model

In [None]:
print("\nüìä Evaluating on validation set...")

# Load best model
trainer.load_checkpoint(trainer.best_model_path)

# Evaluate
metrics = trainer.evaluate(val_loader, return_predictions=True)

print("\n" + "="*70)
print("VALIDATION RESULTS")
print("="*70)
print(f"Accuracy: {metrics['accuracy']:.2f}%")
print(f"Precision: {metrics['precision']:.4f}")
print(f"Recall: {metrics['recall']:.4f}")
print(f"F1-Score: {metrics['f1_score']:.4f}")
print(f"Mean Average Precision (MAP): {metrics['map']:.4f}")
print("="*70)

## 11. Visualize Results

In [None]:
# Create results directory
os.makedirs('./results', exist_ok=True)

# Plot training curves
trainer.plot_training_curves(save_path='./results/training_curves.png')
print("‚úÖ Training curves saved")

# Plot confusion matrix
trainer.plot_confusion_matrix(
    metrics['confusion_matrix'],
    save_path='./results/confusion_matrix.png'
)
print("‚úÖ Confusion matrix saved")

# Display images
from IPython.display import Image, display
print("\nTraining Curves:")
display(Image('./results/training_curves.png'))

print("\nConfusion Matrix:")
display(Image('./results/confusion_matrix.png'))

## 12. Visualize Predictions

In [None]:
from utils import visualize_predictions

visualize_predictions(
    model=model,
    data_loader=val_loader,
    device=device,
    class_names=dataset_info['classes'],
    num_images=16,
    save_path='./results/predictions.png'
)

print("\nPrediction Samples:")
display(Image('./results/predictions.png'))

## 13. Per-Class Performance

In [None]:
from utils import compute_per_class_accuracy, plot_per_class_accuracy

per_class_acc = compute_per_class_accuracy(
    metrics['predictions'],
    metrics['labels'],
    dataset_info['classes']
)

print("\n" + "="*70)
print("PER-CLASS ACCURACY")
print("="*70)
for class_name, acc in per_class_acc.items():
    print(f"{class_name:15s}: {acc:.2f}%")
print("="*70)

# Plot
plot_per_class_accuracy(per_class_acc, save_path='./results/per_class_accuracy.png')

print("\nPer-Class Accuracy:")
display(Image('./results/per_class_accuracy.png'))

## 14. Classification Report

In [None]:
from utils import print_classification_report

print_classification_report(
    metrics['predictions'],
    metrics['labels'],
    dataset_info['classes']
)

## 15. Make Predictions on Test Set

In [None]:
print("\nüîÆ Making predictions on test set...")

model.eval()
test_predictions = []
test_probs = []

with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)
        _, predicted = outputs.max(1)
        
        test_predictions.extend(predicted.cpu().numpy())
        test_probs.extend(probs.cpu().numpy())

# Convert to class names
predicted_classes = [dataset_info['classes'][pred] for pred in test_predictions]

print(f"‚úÖ Made predictions for {len(test_predictions)} test images")

# Save predictions
import pandas as pd

results_df = pd.DataFrame({
    'image_id': range(len(test_predictions)),
    'predicted_class': predicted_classes,
    'predicted_index': test_predictions
})

results_df.to_csv('./results/test_predictions.csv', index=False)
print("‚úÖ Test predictions saved to: ./results/test_predictions.csv")

# Display first few predictions
print("\nFirst 10 predictions:")
print(results_df.head(10))

## 16. Save Final Model

In [None]:
# Save final model
torch.save(model.state_dict(), './final_model.pth')
print("‚úÖ Final model saved to: ./final_model.pth")

print("\nüéâ ALL DONE!")
print("="*70)
print("Results saved in './results/' folder:")
print("  - training_curves.png")
print("  - confusion_matrix.png")
print("  - predictions.png")
print("  - per_class_accuracy.png")
print("  - test_predictions.csv")
print("\nBest model saved in './checkpoints/vgg16_pretrained/best_model.pth'")
print("="*70)

## 17. Download Results (Optional)

In [None]:
# Zip all results for easy download
!zip -r results.zip results/ checkpoints/ *.pth

print("‚úÖ Results zipped!")
print("\nTo download:")
print("1. Click on the folder icon on the left")
print("2. Right-click on 'results.zip'")
print("3. Select 'Download'")

# Or use Colab's download function
from google.colab import files
# Uncomment to download directly:
# files.download('results.zip')

## 18. View TensorBoard Logs (Optional)

In [None]:
# Load TensorBoard
%load_ext tensorboard
%tensorboard --logdir runs/