In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from torchvision import transforms, models
from torchvision.models import EfficientNet_V2_S_Weights, convnext_base
from PIL import Image
import pandas as pd
import numpy as np
import os
import csv
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm.notebook import tqdm
import platform
import multiprocessing
import cv2
import json

class StabilityDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, augment=False, use_quantized=False, additional_columns=None):
        self.stability_data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.augment = augment
        self.use_quantized = use_quantized
        self.additional_columns = additional_columns or []
        self.image_files = self._get_image_files()
        self.feature_categories = self._get_feature_categories()

    def _get_image_files(self):
        image_files = []
        for idx, row in self.stability_data.iterrows():
            img_name = str(row[0])
            if self.use_quantized:
                image_files.append(f"quantized/{img_name}_quantized.jpg")
                if self.augment:
                    image_files.extend([
                        f"quantized/{img_name}_flipped_quantized.jpg",
                        f"quantized/{img_name}_zoomed_quantized.jpg",
                        f"quantized/{img_name}_zoomed_flipped_quantized.jpg"
                    ])
            else:
                image_files.append(f"{img_name}_original.jpg")
                if self.augment:
                    image_files.extend([
                        f"{img_name}_flipped.jpg",
                        f"{img_name}_zoomed.jpg",
                        f"{img_name}_zoomed_flipped.jpg"
                    ])
        return image_files

    def _get_feature_categories(self):
        feature_categories = {}
        for col in self.additional_columns:
            unique_values = self.stability_data[col].unique()
            feature_categories[col] = {
                'num_categories': len(unique_values),
                'value_to_index': {val: idx for idx, val in enumerate(unique_values)}
            }
        return feature_categories

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        original_idx = idx // 4 if self.augment else idx
        stability_height = self.stability_data.iloc[original_idx, -1]
        stability_class = int(stability_height) - 1

        # Get the image ID (assuming it's the first column in the CSV)
        image_id = self.stability_data.iloc[original_idx, 0]

        if self.transform:
            image = self.transform(image)
        else:
            image = torch.from_numpy(image.transpose((2, 0, 1))).float() / 255.0

        # Get additional column values
        additional_data = []
        for col in self.additional_columns:
            value = self.stability_data.iloc[original_idx][col]
            index = self.feature_categories[col]['value_to_index'][value]
            additional_data.append(torch.tensor(index, dtype=torch.long))

        # Return image tensor first, followed by image_id, label, and additional data
        return (image, image_id, torch.tensor(stability_class, dtype=torch.long), *additional_data)

    def get_feature_dimensions(self):
        return {col: info['num_categories'] for col, info in self.feature_categories.items()}
    
class StabilityPredictor(nn.Module):
    def __init__(self, num_classes=6, dropout_rate=0.3, additional_features=None):
        super(StabilityPredictor, self).__init__()

        # Default pre-trained weights
        weights = EfficientNet_V2_S_Weights.DEFAULT
        self.efficientnet = models.efficientnet_v2_s(weights=weights)

        # Get the number of input features to the final classifier layer
        num_ftrs = self.efficientnet.classifier[1].in_features

        # Embedding layers for additional features
        self.additional_features = additional_features or {}
        self.embedding_layers = nn.ModuleDict()
        self.embedding_dim = 16  # You can adjust this value
        total_embedding_dim = 0

        for feature, num_categories in self.additional_features.items():
            self.embedding_layers[feature] = nn.Embedding(num_categories, self.embedding_dim)
            total_embedding_dim += self.embedding_dim

        # Combine image features with embeddings
        self.combined_layer = nn.Linear(num_ftrs + total_embedding_dim, num_ftrs)

        # Replace the default classifier with a custom one (Dropout + Linear layer)
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_rate, inplace=True),
            nn.Linear(num_ftrs, num_classes)
        )

    def forward(self, x, *additional_inputs):
        # Process the image through EfficientNet
        x = self.efficientnet.features(x)
        x = self.efficientnet.avgpool(x)
        x = torch.flatten(x, 1)

        # Process additional features through embedding layers
        embeddings = []
        for i, (feature, _) in enumerate(self.additional_features.items()):
            embedding = self.embedding_layers[feature](additional_inputs[i])
            embeddings.append(embedding)

        # Concatenate image features with embeddings
        if embeddings:
            x = torch.cat([x] + embeddings, dim=1)
            x = self.combined_layer(x)

        # Final classification
        x = self.classifier(x)
        return x


class EfficientAttentionNet(nn.Module):
    def __init__(self, num_classes=6, dropout_rate=0.3, additional_features=None):
        super(EfficientAttentionNet, self).__init__()

        # Default pre-trained weights for EfficientNet V2 Small
        weights = EfficientNet_V2_S_Weights.DEFAULT
        self.efficientnet = models.efficientnet_v2_s(weights=weights)

        # Spatial attention module
        self.spatial_attention = SpatialAttentionModule(kernel_size=7)

        # Get the number of input features to the final classifier layer
        num_ftrs = self.efficientnet.classifier[1].in_features

        # Embedding layers for additional features
        self.additional_features = additional_features or {}
        self.embedding_layers = nn.ModuleDict()
        self.embedding_dim = 16  # You can adjust this value
        total_embedding_dim = 0

        for feature, num_categories in self.additional_features.items():
            self.embedding_layers[feature] = nn.Embedding(num_categories, self.embedding_dim)
            total_embedding_dim += self.embedding_dim

        # Combine image features with embeddings
        self.combined_layer = nn.Linear(num_ftrs + total_embedding_dim, num_ftrs)

        # Replace the default classifier with a custom one (Dropout + Linear layer)
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_rate, inplace=True),
            nn.Linear(num_ftrs, num_classes)
        )

    def forward(self, x, *additional_inputs):
        # Pass through the feature extractor (EfficientNet backbone) until the last feature map
        features = self.efficientnet.features(x)  # Extract convolutional features
        
        # Apply spatial attention module to the feature maps
        features = self.spatial_attention(features)
        
        # Global average pooling
        x = self.efficientnet.avgpool(features)
        
        # Flatten the pooled features
        x = torch.flatten(x, 1)

        # Process additional features through embedding layers
        embeddings = []
        for i, (feature, _) in enumerate(self.additional_features.items()):
            embedding = self.embedding_layers[feature](additional_inputs[i])
            embeddings.append(embedding)

        # Concatenate image features with embeddings
        if embeddings:
            x = torch.cat([x] + embeddings, dim=1)
            x = self.combined_layer(x)

        # Final classification
        x = self.classifier(x)
        return x

class SpatialAttentionModule(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttentionModule, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Channel-wise max and average pooling (along spatial dimensions)
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        concat = torch.cat([avg_out, max_out], dim=1)
        attention_map = self.sigmoid(self.conv(concat))
        return x * attention_map

class EfficientChannelAttentionNet(nn.Module):
    def __init__(self, num_classes=6, dropout_rate=0.0, additional_features=None):
        super(EfficientChannelAttentionNet, self).__init__()

        # Default pre-trained weights for EfficientNet V2 Small
        weights = EfficientNet_V2_S_Weights.DEFAULT
        self.efficientnet = models.efficientnet_v2_s(weights=weights)

        # Add channel attention modules after specific layers in the EfficientNet backbone
        self.channel_attention1 = ChannelAttentionModule(in_planes=24)  # After first block (features[1])
        self.channel_attention2 = ChannelAttentionModule(in_planes=48)  # After second block (features[2])

        # Get the number of input features to the final classifier layer
        num_ftrs = self.efficientnet.classifier[1].in_features

        # Embedding layers for additional features
        self.additional_features = additional_features or {}
        self.embedding_layers = nn.ModuleDict()
        self.embedding_dim = 16  # You can adjust this value
        total_embedding_dim = 0

        for feature, num_categories in self.additional_features.items():
            self.embedding_layers[feature] = nn.Embedding(num_categories, self.embedding_dim)
            total_embedding_dim += self.embedding_dim

        # Combine image features with embeddings
        self.combined_layer = nn.Linear(num_ftrs + total_embedding_dim, num_ftrs)

        # Replace the default classifier with a custom one (Dropout + Linear layer)
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout_rate, inplace=True),
            nn.Linear(num_ftrs, num_classes)
        )

    def forward(self, x, *additional_inputs):
        # Pass input through the first few layers of EfficientNet
        x = self.efficientnet.features[0](x)  # Initial convolution and stem
        x = self.efficientnet.features[1](x)  # First block (channels: 24)
        x = self.channel_attention1(x)  # Apply channel attention after the first block
        
        x = self.efficientnet.features[2](x)  # Second block (channels: 48)
        x = self.channel_attention2(x)  # Apply channel attention after the second block
        
        # Continue with the rest of the EfficientNet layers
        for i in range(3, len(self.efficientnet.features)):
            x = self.efficientnet.features[i](x)

        # Global average pooling
        x = self.efficientnet.avgpool(x)
        x = torch.flatten(x, 1)

        # Process additional features through embedding layers
        embeddings = []
        for i, (feature, _) in enumerate(self.additional_features.items()):
            embedding = self.embedding_layers[feature](additional_inputs[i])
            embeddings.append(embedding)

        # Concatenate image features with embeddings
        if embeddings:
            x = torch.cat([x] + embeddings, dim=1)
            x = self.combined_layer(x)

        # Final classification
        x = self.classifier(x)

        return x

class ChannelAttentionModule(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttentionModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc = nn.Sequential(
            nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        attention = self.sigmoid(avg_out + max_out)
        return x * attention
    
class ConvnextPredictor(nn.Module):
    def __init__(self, num_classes=6, freeze_layers=True, additional_features=None):
        super(ConvnextPredictor, self).__init__()

        # Default pre-trained weights
        weights = models.convnext.ConvNeXt_Base_Weights.DEFAULT
        self.convnextnet = convnext_base(weights=weights)

        # Get the number of input features to the final classifier layer
        num_ftrs = self.convnextnet.classifier[2].in_features

        # Embedding layers for additional features
        self.additional_features = additional_features or {}
        self.embedding_layers = nn.ModuleDict()
        self.embedding_dim = 16  # You can adjust this value
        total_embedding_dim = 0

        for feature, num_categories in self.additional_features.items():
            self.embedding_layers[feature] = nn.Embedding(num_categories, self.embedding_dim)
            total_embedding_dim += self.embedding_dim

        # Combine ConvNeXt features with embeddings
        self.combined_layer = nn.Linear(num_ftrs + total_embedding_dim, num_ftrs)

        # Replace the default classifier with a custom one
        self.classifier = nn.Sequential(
            nn.LayerNorm(num_ftrs),  # ConvNeXt uses LayerNorm instead of BatchNorm
            nn.Flatten(start_dim=1),
            nn.Linear(num_ftrs, num_classes)
        )

        if freeze_layers:
            print('Layers frozen!')
            # Freeze ConvNeXt backbone layers for quicker fine-tuning training
            for param in self.convnextnet.parameters():
                param.requires_grad = False

            # Only unfreeze the classifier layers and the combined layer
            for param in self.classifier.parameters():
                param.requires_grad = True
            for param in self.combined_layer.parameters():
                param.requires_grad = True
            for embedding_layer in self.embedding_layers.values():
                for param in embedding_layer.parameters():
                    param.requires_grad = True

    def forward(self, x, *additional_inputs):
        # Pass through ConvNeXt backbone
        x = self.convnextnet.features(x)
        x = self.convnextnet.avgpool(x)
        x = torch.flatten(x, 1)

        # Process additional features through embedding layers
        embeddings = []
        for i, (feature, _) in enumerate(self.additional_features.items()):
            embedding = self.embedding_layers[feature](additional_inputs[i])
            embeddings.append(embedding)

        # Concatenate ConvNeXt features with embeddings
        if embeddings:
            x = torch.cat([x] + embeddings, dim=1)
            x = self.combined_layer(x)

        # Final classification
        x = self.classifier(x)

        return x

def colour_quantisation(image, k=20):
    # Convert the image to 2D pixel array
    pixels = np.float32(image.reshape(-1, 3))

    # Define criteria for K-Means (stop after 10 iter or if accuracy reaches 1.0)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)

    # Apply K-Means clustering
    _, labels, palette = cv2.kmeans(pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    # Convert back to 8-bit values
    quantised = np.uint8(palette)[labels.flatten()]

    # Reshape the image to original dimensions
    quantised = quantised.reshape(image.shape)
    
    return quantised

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, patience, device):
    model.to(device)
    
    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model = None
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        
        # Training phase
        model.train()
        train_loss, train_acc = run_epoch(model, train_loader, criterion, optimizer, device, is_training=True)
        
        # Validation phase
        model.eval()
        val_loss, val_acc = run_epoch(model, val_loader, criterion, optimizer, device, is_training=False)
        
        # Learning rate scheduler step
        scheduler.step(val_loss)

        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        print(f'Learning Rate: {optimizer.param_groups[0]["lr"]:.6f}')
        print('-' * 60)

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            best_model = model.state_dict()
        else:
            epochs_no_improve += 1

        if epochs_no_improve == patience:
            print(f'Early stopping triggered after {epoch + 1} epochs')
            model.load_state_dict(best_model)
            break

    return model

def run_epoch(model, data_loader, criterion, optimizer, device, is_training=True):
    running_loss = 0.0
    correct = 0
    total = 0

    # Create progress bar
    progress_bar = tqdm(data_loader, desc="Training" if is_training else "Validating")

    for batch in progress_bar:
        inputs = batch[0].to(device)
        labels = batch[2].to(device)
        additional_inputs = [feature.to(device) for feature in batch[3:]]  # Change this from batch[2:] to batch[3:]
        
        if is_training:
            optimizer.zero_grad()
        
        outputs = model(inputs, *additional_inputs)
        loss = criterion(outputs, labels)
        
        if is_training:
            loss.backward()
            optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Update progress bar
        progress_bar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{100. * correct / total:.2f}%'
        })
    
    epoch_loss = running_loss / len(data_loader.dataset)
    epoch_acc = 100. * correct / total

    return epoch_loss, epoch_acc

def predict(model, test_loader, device):
    model.eval()
    predictions = []
    image_ids = []
    with torch.no_grad():
        for batch in test_loader:
            inputs = batch[0].to(device)
            ids = batch[1]
            additional_inputs = [feature.to(device) for feature in batch[3:]]
            outputs = model(inputs, *additional_inputs)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy() + 1)  # Add 1 to convert back to 1-6 range
            image_ids.extend(ids.numpy())  # Convert tensor to numpy array
    return predictions, image_ids

def calculate_stats(dataset):
    loader = DataLoader(dataset, batch_size=100, num_workers=get_optimal_num_workers(), shuffle=False)
    mean = 0.
    std = 0.
    total_samples = len(dataset)
    
    # Create a tqdm progress bar
    pbar = tqdm(total=total_samples, desc="Calculating Stats", unit="sample")
    
    for batch in loader:
        images = batch[0]  # Assuming images are always the first element
        batch_samples = images.size(0)
        images = images.view(batch_samples, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        
        # Update the progress bar
        pbar.update(batch_samples)
   
    mean /= total_samples
    std /= total_samples
    
    # Close the progress bar
    pbar.close()
    
    return mean, std

def save_split_and_stats(train_indices, val_indices, train_mean, train_std, filename):
    data = {
        'train_indices': train_indices,
        'val_indices': val_indices,
        'train_mean': train_mean.tolist(),
        'train_std': train_std.tolist()
    }
    with open(filename, 'w') as f:
        json.dump(data, f)

def load_split_and_stats(filename):
    with open(filename, 'r') as f:
        data = json.load(f)
    return (
        data['train_indices'],
        data['val_indices'],
        torch.tensor(data['train_mean']),
        torch.tensor(data['train_std'])
    )

# Windows can't do multicore processing
def get_optimal_num_workers():
    if platform.system() == 'Windows':
        return 0
    else:
        return multiprocessing.cpu_count()

def train_and_save(config):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Check if we should load existing split and stats
    split_stats_file = config.get('split_stats_file', 'split_and_stats.json')
    if config.get('use_existing_split', False) and os.path.exists(split_stats_file):
        print(f"Loading existing split and stats from {split_stats_file}")
        train_indices, val_indices, train_mean, train_std = load_split_and_stats(split_stats_file)
    else:
        # Create a base dataset without augmentation for splitting and stats calculation
        base_dataset = StabilityDataset(csv_file=config['train_csv'], 
                                        img_dir=config['train_img_dir'], 
                                        transform=transforms.ToTensor(),
                                        augment=False,
                                        use_quantized=config['use_quantized'],
                                        additional_columns=config['additional_columns'])

        # Split dataset into train and validation
        dataset_size = len(base_dataset)
        indices = list(range(dataset_size))
        np.random.shuffle(indices)
        split = int(np.floor(config['val_ratio'] * dataset_size))
        train_indices, val_indices = indices[split:], indices[:split]

        # Calculate statistics for training set only
        train_subset = Subset(base_dataset, train_indices)

        print("Calculating training dataset statistics...")
        train_mean, train_std = calculate_stats(train_subset)
        print(f"Training dataset mean: {train_mean}")
        print(f"Training dataset std: {train_std}")

        # Save split and stats
        save_split_and_stats(train_indices, val_indices, train_mean, train_std, split_stats_file)
        print(f"Split and stats saved to {split_stats_file}")

    # Create transforms
    normalize_transform = transforms.Normalize(mean=train_mean, std=train_std)
    
    base_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize_transform,
    ])

     # Create datasets with appropriate transforms
    full_dataset = StabilityDataset(csv_file=config['train_csv'], 
                                    img_dir=config['train_img_dir'], 
                                    transform=base_transform,
                                    augment=config['use_augmentation'],
                                    use_quantized=config['use_quantized'],
                                    additional_columns=config['additional_columns'])

    # Get the number of categories for each additional feature
    additional_features = full_dataset.get_feature_dimensions()

    # Apply the split, ensuring augmented images stay with their original counterparts
    if config['use_augmentation']:
        train_indices = [i for idx in train_indices for i in range(idx * 4, (idx + 1) * 4)]
        val_indices = [i for idx in val_indices for i in range(idx * 4, (idx + 1) * 4)]
    
    train_dataset = Subset(full_dataset, train_indices)
    val_dataset = Subset(full_dataset, val_indices[:len(val_indices)//4])  # Only use original images for validation

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=get_optimal_num_workers())
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=get_optimal_num_workers())

    # Initialize model, criterion, optimizer, and scheduler
    if config['model'] == 'StabilityPredictor':
        model = StabilityPredictor(num_classes=config['num_classes'], dropout_rate=config['dropout_rate'], additional_features=additional_features)
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    elif config['model'] == 'EfficientAttentionNet':
        model = EfficientAttentionNet(num_classes=config['num_classes'], dropout_rate=config['dropout_rate'], additional_features=additional_features)
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    elif config['model'] == 'EfficientChannelAttentionNet':
        model = EfficientChannelAttentionNet(num_classes=config['num_classes'], dropout_rate=config['dropout_rate'], additional_features=additional_features)
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    elif config['model'] == 'ConvnextPredictor':
        model = ConvnextPredictor(num_classes=config['num_classes'], freeze_layers=config['freeze_layers'], additional_features=additional_features)
        optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'], weight_decay=1e-5)
    else:
        print('Unrecognised model in config. Defaulting to StabilityPredictor (EfficientNet)')
        model = StabilityPredictor(num_classes=config['num_classes'], dropout_rate=config['dropout_rate'], additional_features=additional_features)
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])

    print('Model: ' + config['model'])
    print('Using quantized images: ' + str(config['use_quantized']))
    print('Additional features:', ', '.join(f"{k}: {v} categories" for k, v in additional_features.items()))

    criterion = nn.CrossEntropyLoss()
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=config['lr_factor'], patience=config['lr_patience'], verbose=True)

    # Train model
    print('Training...')
    model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, 
                        num_epochs=config['num_epochs'], patience=config['early_stopping_patience'], device=device)

    torch.save(model.state_dict(), config['model_save_path'])
    print("Training complete. Model saved.")

    # Prediction on test set
    test_dataset = StabilityDataset(csv_file=config['test_csv'],
                                    img_dir=config['test_img_dir'],
                                    transform=base_transform,
                                    use_quantized=config['use_quantized'],
                                    additional_columns=config['additional_columns'])
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=get_optimal_num_workers())

    predictions, image_ids = predict(model, test_loader, device)

    # Save predictions to CSV
    with open(config['predictions_save_path'], 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['id', 'labels'])
        for img_id, pred in zip(image_ids, predictions):
            writer.writerow([int(img_id), int(pred)])
    print(f"Predictions saved to {config['predictions_save_path']}")

In [None]:
config = {
    'model': 'EfficientAttentionNet',
    'train_csv': './COMP90086_2024_Project_train/train.csv',
    'train_img_dir': './preprocessed_images/train',
    'test_csv': './test_imputed.csv',
    'test_img_dir': './preprocessed_images/test',
    'additional_columns': ['shapeset', 'cam_angle', 'total_height'],
    'val_ratio': 0.1,
    'use_augmentation': True,
    'use_quantized': False,
    'batch_size': 80,
    'num_classes': 6,
    'dropout_rate': 0.3,
    'learning_rate': 0.001,
    'lr_factor': 0.1,
    'lr_patience': 3,
    'freeze_layers': False,
    'num_epochs': 30,
    'early_stopping_patience': 6,
    'model_save_path': 'efficient_attention_net_unquantized_multi.pth',
    'predictions_save_path': 'efficient_attention_predictions_unquantized_multi.csv',
    'use_existing_split': True,  # Set to True to use existing split and stats
    'split_stats_file': 'split_and_stats.json'  # File to save/load split and stats
}

train_and_save(config)

Loading existing split and stats from split_and_stats.json


  img_name = str(row[0])


Model: EfficientAttentionNet
Using quantized images: False
Additional features: shapeset: 2 categories, cam_angle: 2 categories, total_height: 5 categories
Training...
Epoch 1/30




Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 1.2767, Train Acc: 43.21%
Val Loss: 1.1284, Val Acc: 48.44%
Learning Rate: 0.001000
------------------------------------------------------------
Epoch 2/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 0.9793, Train Acc: 59.69%
Val Loss: 1.1037, Val Acc: 60.29%
Learning Rate: 0.001000
------------------------------------------------------------
Epoch 3/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 0.8372, Train Acc: 66.76%
Val Loss: 0.9451, Val Acc: 57.29%
Learning Rate: 0.001000
------------------------------------------------------------
Epoch 4/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 0.7389, Train Acc: 70.91%
Val Loss: 0.9958, Val Acc: 61.85%
Learning Rate: 0.001000
------------------------------------------------------------
Epoch 5/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 0.6360, Train Acc: 75.50%
Val Loss: 1.0338, Val Acc: 61.72%
Learning Rate: 0.001000
------------------------------------------------------------
Epoch 6/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 0.5556, Train Acc: 79.15%
Val Loss: 1.0085, Val Acc: 63.93%
Learning Rate: 0.001000
------------------------------------------------------------
Epoch 7/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 0.4769, Train Acc: 82.21%
Val Loss: 1.1188, Val Acc: 64.06%
Learning Rate: 0.000100
------------------------------------------------------------
Epoch 8/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Validating:   0%|          | 0/10 [00:00<?, ?it/s]

Train Loss: 0.2573, Train Acc: 90.81%
Val Loss: 1.3089, Val Acc: 63.54%
Learning Rate: 0.000100
------------------------------------------------------------
Epoch 9/30


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Exception ignored in: <function _releaseLock at 0x728ad0b3da20>
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 228, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 
