In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from torchvision.models import EfficientNet_V2_S_Weights
from PIL import Image
import pandas as pd
import os
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm.notebook import tqdm

In [13]:
class StabilityDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, augment=False):
        self.stability_data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.augment = augment
        self.label_columns = ['shapeset', 'type', 'total_height', 'instability_type', 'cam_angle', 'stable_height']
        
        # Adjust labels to start from 0
        self.stability_data['shapeset'] -= 1
        self.stability_data['type'] -= 1
        self.stability_data['total_height'] -= 2  # Subtract 2 as it starts from 2
        self.stability_data['cam_angle'] -= 1
        self.stability_data['stable_height'] -= 1
        # 'instability_type' already starts from 0, so no adjustment needed

    def __getitem__(self, idx):
        original_idx = idx // 2 if self.augment else idx
        flip = self.augment and idx % 2 == 1

        img_name = str(self.stability_data.iloc[original_idx, 0])
        img_path = os.path.join(self.img_dir, img_name)
        if not os.path.exists(img_path):
            img_path = os.path.join(self.img_dir, f"{img_name}.jpg")
        
        image = Image.open(img_path).convert('RGB')
        
        if flip:
            image = image.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
        
        if self.transform:
            image = self.transform(image)

        labels = {col: torch.tensor(self.stability_data.iloc[original_idx][col], dtype=torch.long)
                  for col in self.label_columns}

        return image, labels

    def __len__(self):
        return len(self.stability_data) * (2 if self.augment else 1)


class StabilityPredictor(nn.Module):
    def __init__(self, num_classes_dict):
        super(StabilityPredictor, self).__init__()
        weights = EfficientNet_V2_S_Weights.DEFAULT
        self.efficientnet = models.efficientnet_v2_s(weights=weights)
        num_ftrs = self.efficientnet.classifier[1].in_features
        self.efficientnet.classifier = nn.Identity()
        
        self.classifiers = nn.ModuleDict({
            name: nn.Sequential(
                nn.Dropout(p=0.3),
                nn.Linear(num_ftrs, num_classes)
            ) for name, num_classes in num_classes_dict.items()
        })

    def forward(self, x):
        features = self.efficientnet(x)
        return {name: classifier(features) for name, classifier in self.classifiers.items()}

In [18]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, patience=5):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model = None
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        
        # Training phase
        model.train()
        train_loss, train_acc = run_epoch(model, train_loader, criterion, optimizer, device, is_training=True)
        
        # Validation phase
        model.eval()
        val_loss, val_acc = run_epoch(model, val_loader, criterion, optimizer, device, is_training=False)
        
        # Learning rate scheduler step
        scheduler.step(sum(val_loss.values()))

        print(f'Train Loss: {sum(train_loss.values()):.4f}, Train Acc: {train_acc}')
        print(f'Val Loss: {sum(val_loss.values()):.4f}, Val Acc: {val_acc}')
        print(f'Learning Rate: {optimizer.param_groups[0]["lr"]:.6f}')
        print('-' * 60)

        # Early stopping check
        if sum(val_loss.values()) < best_val_loss:
            best_val_loss = sum(val_loss.values())
            epochs_no_improve = 0
            best_model = model.state_dict()
        else:
            epochs_no_improve += 1

        if epochs_no_improve == patience:
            print(f'Early stopping triggered after {epoch + 1} epochs')
            model.load_state_dict(best_model)
            break

    return model

def run_epoch(model, data_loader, criterion, optimizer, device, is_training=True):
    running_loss = {name: 0.0 for name in model.classifiers.keys()}
    correct = {name: 0 for name in model.classifiers.keys()}
    total = 0

    progress_bar = tqdm(data_loader, desc="Training" if is_training else "Validating")

    for inputs, labels in progress_bar:
        inputs = inputs.to(device)
        labels = {k: v.to(device) for k, v in labels.items()}
        
        if is_training:
            optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = sum(criterion(outputs[name], labels[name]) for name in outputs.keys())
        
        if is_training:
            loss.backward()
            optimizer.step()
        
        for name in outputs.keys():
            running_loss[name] += criterion(outputs[name], labels[name]).item() * inputs.size(0)
            _, predicted = outputs[name].max(1)
            correct[name] += predicted.eq(labels[name]).sum().item()
        
        total += labels[list(labels.keys())[0]].size(0)

        progress_bar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{sum(correct.values()) / (total * len(correct)):.2f}%'
        })
    
    epoch_loss = {name: loss / len(data_loader.dataset) for name, loss in running_loss.items()}
    epoch_acc = {name: 100. * corr / total for name, corr in correct.items()}

    return epoch_loss, epoch_acc

# Load pre-calculated dataset statistics
stats = torch.load('dataset_stats.pth')
mean, std = stats['mean'], stats['std']
print(f"Loaded dataset mean: {mean}")
print(f"Loaded dataset std: {std}")

# Create transform with loaded normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean.tolist(), std=std.tolist()),
])

# Create full dataset with augmentation and correct normalization
full_dataset = StabilityDataset(csv_file='./COMP90086_2024_Project_train/train.csv', 
                                img_dir='./COMP90086_2024_Project_train/train', 
                                transform=transform,
                                augment=True)  # Enable augmentation



# Split dataset into train and validation
val_ratio = 0.025
dataset_size = len(full_dataset)
val_size = int(val_ratio * dataset_size)
train_size = dataset_size - val_size
print(f'Splitting dataset into {(1 - val_ratio)}:{val_ratio} training/test split')
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=8)

# Define the number of classes for each task
num_classes_dict = {
    'shapeset': 2,
    'stack_type': 2,
    'total_height': 5,  # 2 to 6, so 5 classes
    'instability_type': 3,  # 0 to 2, so 3 classes
    'cam_angle': 2,
    'stable_height': 6
}

print("Number of classes for each task:", num_classes_dict)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

print('Training...')
model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=30, patience=3)

torch.save(model.state_dict(), 'stability_predictor_efficientnetv2_multi_task.pth')

Loaded dataset mean: tensor([0.4677, 0.4412, 0.4065])
Loaded dataset std: tensor([0.2721, 0.2285, 0.1913])
Splitting dataset into 0.975:0.025 training/test split
Number of classes for each task: {'shapeset': 2, 'stack_type': 2, 'total_height': 5, 'instability_type': 3, 'cam_angle': 2, 'stable_height': 6}
Training...


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [15]:
def predict(model, test_loader, device):
    model.eval()
    predictions = {name: [] for name in model.classifiers.keys()}
    image_ids = []

    with torch.no_grad():
        for inputs, ids in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            for name, output in outputs.items():
                _, preds = torch.max(output, 1)
                predictions[name].extend(preds.cpu().numpy() + 1)  # Add 1 to convert back to 1-6 range
            image_ids.extend(ids.numpy())

    return predictions, image_ids

# Set up device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load the unlabeled dataset
test_dataset = StabilityDataset(csv_file='./COMP90086_2024_Project_test/test.csv', 
                                img_dir='./COMP90086_2024_Project_test/test', 
                                transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Load the trained model
model = StabilityPredictor(num_classes_dict)
model.load_state_dict(torch.load('stability_predictor_efficientnetv2_multi_task.pth'))
model.to(device)

# Make predictions
predictions, image_ids = predict(model, test_loader, device)

# Save predictions to CSV
with open('predictions.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    header = ['id'] + [col if col != 'stack_type' else 'type' for col in predictions.keys()]
    writer.writerow(header)
    for i, img_id in enumerate(image_ids):
        row = [int(img_id) + 1]  # Assuming image IDs start from 0
        for name in predictions.keys():
            pred = int(predictions[name][i])
            if name in ['shapeset', 'type', 'cam_angle', 'stable_height']:
                pred += 1
            elif name == 'total_height':
                pred += 2
            # 'instability_type' doesn't need adjustment
            row.append(pred)
        writer.writerow(row)

print("Predictions saved to predictions.csv")


Loaded dataset mean: tensor([0.4677, 0.4412, 0.4065])
Loaded dataset std: tensor([0.2721, 0.2285, 0.1913])
shapeset: min = 1, max = 2, unique values = [1, 2]
type: min = 1, max = 2, unique values = [1, 2]
total_height: min = 2, max = 6, unique values = [2, 3, 4, 5, 6]
instability_type: min = 0, max = 2, unique values = [0, 1, 2]
cam_angle: min = 1, max = 2, unique values = [1, 2]
stable_height: min = 1, max = 6, unique values = [1, 2, 3, 4, 5, 6]
Splitting dataset into 0.975:0.025 training/test split
Training...


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
