In [1]:
import os
import time
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torchvision.models import vit_b_16, ViT_B_16_Weights
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import KFold
import pandas as pd

In [2]:
# Paths
train_data_directory = 'data/train'
test_data_directory = 'data/test'
weights_directory = 'weights'
predictions_directory = 'predictions'

# Ensure directories exist
os.makedirs(weights_directory, exist_ok=True)
os.makedirs(predictions_directory, exist_ok=True)

# Model parameters
model_name = 'vit_b_16'
num_folds = 2 #5
batch_size = 64  #32
num_epochs = 1 #10
learning_rate = 0.001

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    # Normalize if pretrained weights expect it
    transforms.Normalize([0.485, 0.456, 0.406],  # Mean for ImageNet
                         [0.229, 0.224, 0.225])   # Std for ImageNet
])

In [4]:
# Load dataset from the specified directory
full_dataset = datasets.ImageFolder(root=train_data_directory, transform=data_transforms)

# Get class names and number of classes
class_names = full_dataset.classes
num_classes = len(class_names)

In [5]:
kfold = KFold(n_splits=num_folds, shuffle=True)

In [6]:
# Initialize performance report list
performance_reports = []

# Create folder for weights and reports
timestamp = time.strftime('%Y%m%d-%H%M%S')
model_folder = os.path.join(weights_directory, f"{model_name}_{timestamp}")
os.makedirs(model_folder, exist_ok=True)

for fold, (train_idx, val_idx) in enumerate(kfold.split(full_dataset)):
    print(f'Fold {fold+1}/{num_folds}')
    
    # Create data samplers
    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler = SubsetRandomSampler(val_idx)
    
    # Create data loaders
    train_loader = DataLoader(full_dataset, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(full_dataset, batch_size=batch_size, sampler=val_sampler)
    
    # Initialize the model
    model = vit_b_16(weights=ViT_B_16_Weights.DEFAULT)
    model.heads.head = nn.Linear(model.heads.head.in_features, num_classes)
    model = model.to(device)
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation loop
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        # Compute average losses and accuracy
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        accuracy = 100 * correct / total
        
        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {avg_train_loss:.4f}, '
              f'Val Loss: {avg_val_loss:.4f}, '
              f'Accuracy: {accuracy:.2f}%')
    
    # Save performance metrics
    performance_reports.append({
        'Fold': fold + 1,
        'Train Loss': avg_train_loss,
        'Validation Loss': avg_val_loss,
        'Accuracy': accuracy
    })
    
    # Save model weights
    weights_path = os.path.join(model_folder, 'model_weights.pth')
    torch.save(model.state_dict(), weights_path)
    
    # Save performance report
    report_df = pd.DataFrame([performance_reports[-1]])
    report_path = os.path.join(model_folder, 'performance_report.csv')
    report_df.to_csv(report_path, index=False)

Fold 1/2


  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


In [None]:
# Convert performance reports to DataFrame
overall_performance_df = pd.DataFrame(performance_reports)

# Save to CSV
overall_report_path = os.path.join(weights_directory, f"{model_name}_{timestamp}_overall_performance.csv")
overall_performance_df.to_csv(overall_report_path, index=False)

# Display overall performance
print("\nOverall Performance:")
print(overall_performance_df)

In [None]:
# Load test dataset
test_dataset = datasets.ImageFolder(root=test_data_directory, transform=data_transforms)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Use the last trained model for predictions
model.eval()
test_predictions = []
image_paths = []

with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs.data, 1)
        test_predictions.extend(preds.cpu().numpy())
        image_paths.extend([path for path, _ in test_loader.dataset.samples])

# Save test predictions to CSV
test_predictions_df = pd.DataFrame({
    'ImagePath': image_paths,
    'PredictedClass': test_predictions
})
test_predictions_filename = f"{model_name}_{timestamp}_test_predictions.csv"
test_predictions_path = os.path.join(predictions_directory, test_predictions_filename)
test_predictions_df.to_csv(test_predictions_path, index=False)