# Libraries

In [1]:
! pip install timm --quiet
! pip install ipywidgets --quiet
! pip install -U transformers albumentations --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.0/66.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m73.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.0/284.0 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m632.7/632.7 kB[0m [31m38.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Standard Libraries
import os
import random
from collections import Counter

# Numerical and Data Handling Libraries
import numpy as np
import pandas as pd

# Image Processing Libraries
import cv2
from PIL import Image

# Plotting and Visualization
import matplotlib.pyplot as plt

# PyTorch and Torchvision
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torchvision import transforms
from torchvision.datasets import ImageFolder

# Transformers for Vision Models
from transformers import ViTConfig, ViTForImageClassification, ViTImageProcessor

# Utility Libraries
from tqdm import tqdm
from sklearn.metrics import accuracy_score

# jupyter nbextension enable --py widgetsnbextension
from google.colab import output
output.enable_custom_widget_manager()

# Data Import

In [3]:
train_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/train_images"
val_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/val_images"
test_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/test_images/mistery_cat"

# Model

In [4]:
# from transformers import AutoImageProcessor, SwinForImageClassification

# # # Set the model name to Emiel's fine-tuned Swin model for bird classification
# model_name = "Emiel/cub-200-bird-classifier-swin"

# # # Load the image processor automatically
# processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)

# # Load the Swin model, specifying the number of classes (200 for CUB-200)
# model = SwinForImageClassification.from_pretrained(
#     model_name,
#     num_labels=20,  # Ensure this matches the number of classes in your dataset
#     ignore_mismatched_sizes=True
# )

In [5]:
# import timm
# model = timm.create_model("vit_base_patch16_224", pretrained=True, num_classes=20)

## Freeze all feature layers of Swin

In [6]:
# # Freeze all parameters in the Swin backbone first.
# for param in model.swin.parameters():
#     param.requires_grad = False

# # Unfreeze the parameters in the last block (layer) of the backbone.
# # This assumes that your model.swin has an attribute 'layers' which is a ModuleList.
# if hasattr(model.swin, "stages"):
#     for param in model.swin.stages[-2].parameters():
#         param.requires_grad = True
#     print("Unfroze the last stage of the Swin backbone.")

# # Ensure the classification head is trainable
# for param in model.classifier.parameters():
#     param.requires_grad = True

## Set layer-wise learning rate

In [7]:
# from transformers import AdamW

# # Set layer-wise learning rate (feature layer learning rate is lower)
# optimizer = AdamW(
#     [
#         {"params": model.swin.parameters(), "lr": 1e-5},  # Feature extraction layer
#         {"params": model.classifier.parameters(), "lr": 3e-4}  # Classification header
#     ],
#     weight_decay=0.01
# )

In [8]:
# # Verify that the classification layer is randomly initialized (correct state should be True)
# print(model.classifier.weight.mean().item())  # Should be close to 0 (normal distribution initialization)
# print(model.classifier.bias.mean().item())    # Should be close to 0

In [9]:
submission_class_order = [
    'Groove_billed_Ani',
    'Red_winged_Blackbird',
    'Rusty_Blackbird',
    'Gray_Catbird',
    'Brandt_Cormorant',
    'Eastern_Towhee',
    'Indigo_Bunting',
    'Brewer_Blackbird',
    'Painted_Bunting',
    'Bobolink',
    'Lazuli_Bunting',
    'Yellow_headed_Blackbird',
    'American_Crow',
    'Fish_Crow',
    'Brown_Creeper',
    'Yellow_billed_Cuckoo',
    'Yellow_breasted_Chat',
    'Black_billed_Cuckoo',
    'Gray_crowned_Rosy_Finch',
    'Bronzed_Cowbird'
]

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class BirdDataset(Dataset):
    def __init__(self, main_dir, transform=None):
        self.dataset = ImageFolder(root=main_dir, transform=transform)
        self.class_to_idx = self.dataset.class_to_idx
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        return image, label

# train_transform = transforms.Compose([
#     transforms.Resize((420, 420)),        
#     transforms.RandomResizedCrop(384, scale=(0.8, 1.0)),  
#     transforms.RandomHorizontalFlip(p=0.5),  
#     transforms.RandomVerticalFlip(p=0.2),    
#     transforms.RandomRotation(degrees=15),   
#     transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  
#     transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),  
#     transforms.RandomGrayscale(p=0.1),  
#     transforms.ToTensor(),  
#     transforms.RandomErasing(p=0.2, scale=(0.02, 0.2), ratio=(0.3, 3.3)),  
#     transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
# ])


# val_transform = transforms.Compose([
#     transforms.Resize((384, 384)),  
#     transforms.ToTensor(),
#     transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
# ])

In [11]:
# train_dataset = BirdDataset(train_dir, transform=train_transform)
# val_dataset = BirdDataset(val_dir, transform=val_transform)

# def validate_class_order(train_class_order, submission_order):
#     """Make sure the category names and order of both lists are exactly the same"""
#     if len(train_class_order) != len(submission_order):
#         raise ValueError(f"The number of categories does not match! Training set: {len(train_class_order)}, Submission Requirements: {len(submission_order)}")
    
#     for train_cls, sub_cls in zip(train_class_order, submission_order):
#         if train_cls != sub_cls:
#             raise ValueError(f"Inconsistent order: training set '{train_cls}' vs Submission Requirements '{sub_cls}'")
#     return True

# train_class_order = sorted(train_dataset.class_to_idx.keys())

# try:
#     validate_class_order(train_class_order, submission_class_order)
# except ValueError as e:
#     print("Category order inconsistency detected, automatically correcting...")
#     from torchvision.datasets import DatasetFolder
    
#     class OrderedImageFolder(DatasetFolder):
#         """Forces the data sets of categories to be loaded in a specified order"""
#         def __init__(self, root, class_order, transform=None):
#             self.class_order = class_order
#             super().__init__(
#                 root,
#                 loader=lambda x: Image.open(x).convert("RGB"),
#                 extensions=('jpg', 'jpeg', 'png'),
#                 transform=transform,
#                 target_transform=None
#             )
            
#         def find_classes(self, directory):
#             classes = self.class_order 
#             class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
#             return classes, class_to_idx
    
#     train_dataset = OrderedImageFolder(
#         train_dir, 
#         class_order=submission_class_order,
#         transform=train_transform
#     )
#     val_dataset = OrderedImageFolder(
#         val_dir,
#         class_order=submission_class_order,
#         transform=val_transform
#     )
    
#     print("Corrected category order：", train_dataset.classes)
    
# # Category index validation (ensuring consistency with submission format)
# assert sorted(train_dataset.class_to_idx.keys()) == sorted(submission_class_order), "Category order mismatch！"

In [12]:
import os
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Exclude fine-grained classes from primary model evaluation
fine_grained_classes = {2, 12, 13}  # Rusty_Blackbird, American_Crow, Fish_Crow

def compute_non_fine_grained_accuracy(all_preds, all_labels):
    """Compute accuracy excluding fine-grained classes."""
    filtered_preds = [p for p, l in zip(all_preds, all_labels) if l not in fine_grained_classes]
    filtered_labels = [l for l in all_labels if l not in fine_grained_classes]
    if len(filtered_labels) == 0:
        return 0.0  # Prevent division by zero
    return sum(p == l for p, l in zip(filtered_preds, filtered_labels)) / len(filtered_labels)

# # Define dataloaders
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
# val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)


class EarlyStopper:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

def train_epoch(model, loader, optimizer, scaler):
    model.train()
    total_loss = 0
    correct = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        with torch.amp.autocast(device_type="cuda"):
            outputs = model(images)
            loss = criterion(outputs.logits, labels)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        total_loss += loss.item() * images.size(0)
        preds = torch.argmax(outputs.logits, dim=1)
        correct += (preds == labels).sum().item()
    
    avg_loss = total_loss / len(loader.dataset)
    accuracy = correct / len(loader.dataset)
    return avg_loss, accuracy

# def validate(model, loader):
#     model.eval()
#     total_loss = 0
#     correct = 0
#     all_preds = []
#     all_labels = []
    
#     with torch.no_grad():
#         for images, labels in loader:
#             images, labels = images.to(device), labels.to(device)
            
#             with torch.amp.autocast(device_type="cuda"):  # Mixed precision
#                 outputs = model(images)
#                 loss = criterion(outputs.logits, labels)
            
#             total_loss += loss.item() * images.size(0)
#             preds = torch.argmax(outputs.logits, dim=1)
#             correct += (preds == labels).sum().item()
            
#             all_preds.extend(preds.cpu().numpy())
#             all_labels.extend(labels.cpu().numpy())
    
#     avg_loss = total_loss / len(loader.dataset)
#     accuracy = correct / len(loader.dataset)
    
#     # Compute non-fine-grained accuracy
#     non_fine_grained_acc = compute_non_fine_grained_accuracy(all_preds, all_labels)

#     # Compute Precision, Recall, and F1-score
#     precision = precision_score(all_labels, all_preds, average="macro", zero_division=1)
#     recall = recall_score(all_labels, all_preds, average="macro", zero_division=1)
#     f1 = f1_score(all_labels, all_preds, average="macro", zero_division=1)

#     # Print detailed classification report
#     print("\nClassification Report:\n", classification_report(all_labels, all_preds))

#     return avg_loss, accuracy, non_fine_grained_acc, precision, recall, f1

# scaler = torch.amp.GradScaler()
# early_stopper = EarlyStopper(patience=10, min_delta=0.0003)
# history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [], 'val_precision': [], 'val_recall': [], 'val_f1': []}

# # Variables to track the best validation accuracy (excluding fine-grained classes)
# best_val_acc = 0.0
# best_model_path = "/kaggle/working/best_model.pth"

# for epoch in range(2):
#     train_loss, train_acc = train_epoch(model, train_loader, optimizer, scaler)
#     val_loss, val_acc, non_fine_grained_acc, val_precision, val_recall, val_f1 = validate(model, val_loader)
    
#     history['train_loss'].append(train_loss)
#     history['val_loss'].append(val_loss)
#     history['train_acc'].append(train_acc)
#     history['val_acc'].append(val_acc)
#     history['val_precision'].append(val_precision)
#     history['val_recall'].append(val_recall)
#     history['val_f1'].append(val_f1)
    
#     print(f"Epoch {epoch+1:02d}:")
#     print(f"Train Loss: {train_loss:.4f} | Acc: {train_acc:.4f}")
#     print(f"Val Loss: {val_loss:.4f} | Acc: {val_acc:.4f} (Overall)")
#     print(f"Non-Fine-Grained Acc: {non_fine_grained_acc:.4f} (Used for saving)")
#     print(f"Precision: {val_precision:.4f} | Recall: {val_recall:.4f} | F1-score: {val_f1:.4f}\n")
    
#     if non_fine_grained_acc > best_val_acc:
#         best_val_acc = non_fine_grained_acc
#         torch.save(model.state_dict(), best_model_path)
#         print(f"New best model saved with non-fine-grained validation accuracy: {non_fine_grained_acc:.4f}")
    
#     if early_stopper.early_stop(val_loss):
#         print("Early stopping triggered!")
#         break

# # Plot training curves
# plt.figure(figsize=(12, 5))
# plt.subplot(1, 2, 1)
# plt.plot(history['train_loss'], label='Train')
# plt.plot(history['val_loss'], label='Validation')
# plt.title('Loss Curve')
# plt.legend()

# plt.subplot(1, 2, 2)
# plt.plot(history['train_acc'], label='Train')
# plt.plot(history['val_acc'], label='Validation')
# plt.title('Accuracy Curve')
# plt.legend()
# plt.show()

In [13]:
import os
import torch
import numpy as np
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset, Subset, DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from transformers import AutoImageProcessor, SwinForImageClassification
from sklearn.metrics import classification_report

# Create model save directory
os.makedirs("/kaggle/working/primary_dir/", exist_ok=True)

# Set the model name
model_name = "Emiel/cub-200-bird-classifier-swin"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define class order for submission
submission_class_order = [
    'Groove_billed_Ani', 'Red_winged_Blackbird', 'Rusty_Blackbird', 'Gray_Catbird',
    'Brandt_Cormorant', 'Eastern_Towhee', 'Indigo_Bunting', 'Brewer_Blackbird',
    'Painted_Bunting', 'Bobolink', 'Lazuli_Bunting', 'Yellow_headed_Blackbird',
    'American_Crow', 'Fish_Crow', 'Brown_Creeper', 'Yellow_billed_Cuckoo',
    'Yellow_breasted_Chat', 'Black_billed_Cuckoo', 'Gray_crowned_Rosy_Finch',
    'Bronzed_Cowbird'
]

# Map class names to submission order
submission_class_to_idx = {cls: i for i, cls in enumerate(submission_class_order)}

# Load image processor
processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)

# Fine-grained classes requiring secondary classification
fine_grained_classes = {2, 12, 13}  # Rusty_Blackbird, American_Crow, Fish_Crow

# Define dataset class with enforced class order
class OrderedBirdDataset(Dataset):
    def __init__(self, main_dir, transform=None):
        self.dataset = ImageFolder(root=main_dir, transform=transform)
        
        # **Step 1: Get the actual class-to-index mapping from dataset**
        original_class_to_idx = self.dataset.class_to_idx  # Example: {'American_Crow': 0, 'Fish_Crow': 1, ...}

        # **Step 2: Convert dataset class indices to submission order**
        self.idx_to_submission_idx = {original_class_to_idx[class_name]: submission_class_to_idx[class_name]
                                      for class_name in original_class_to_idx if class_name in submission_class_to_idx}

        # **Step 3: Remap dataset samples to match submission order**
        self.dataset.samples = [(path, self.idx_to_submission_idx[label]) for path, label in self.dataset.samples]

    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        return image, label

# Define data augmentation and preprocessing
train_transform = transforms.Compose([
    transforms.Resize((420, 420)),        
    transforms.RandomResizedCrop(384, scale=(0.8, 1.0)),  
    transforms.RandomHorizontalFlip(p=0.5),  
    transforms.RandomVerticalFlip(p=0.2),    
    transforms.RandomRotation(degrees=15),   
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),  
    transforms.RandomGrayscale(p=0.1),  
    transforms.ToTensor(),  
    transforms.RandomErasing(p=0.2, scale=(0.02, 0.2), ratio=(0.3, 3.3)),  
    transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
])

val_transform = transforms.Compose([
    transforms.Resize((384, 384)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
])

# Load the dataset with enforced class order
train_dataset = OrderedBirdDataset(train_dir, transform=train_transform)
labels = np.array([label for _, label in train_dataset])

# Define K-Fold Cross Validation
num_folds = 5
kf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

preprocessor_config.json:   0%|          | 0.00/386 [00:00<?, ?B/s]

In [14]:
from sklearn.metrics import classification_report

# Training Loop
for fold, (train_idx, val_idx) in enumerate(kf.split(np.zeros(len(labels)), labels)):
    print(f"\nFold {fold+1}/{num_folds}")

    # Create Subset for Train and Validation
    train_subset = Subset(train_dataset, train_idx)
    val_subset = Subset(train_dataset, val_idx)

    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=1)
    val_loader = DataLoader(val_subset, batch_size=16, shuffle=False, num_workers=1)

    # Load the model
    model = SwinForImageClassification.from_pretrained(
        model_name,
        num_labels=len(submission_class_order),  # Ensure number of labels matches dataset
        ignore_mismatched_sizes=True
    ).to(device)

    # Enable Gradient Checkpointing to save memory
    model.gradient_checkpointing_enable()

    # Freeze all backbone layers except the last one
    for param in model.swin.parameters():
        param.requires_grad = False
    for param in model.swin.encoder.layers[-2].parameters():
        param.requires_grad = True
    for param in model.classifier.parameters():
        param.requires_grad = True

    # Define optimizer and loss function
    optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.01)
    criterion = torch.nn.CrossEntropyLoss()
    scaler = torch.amp.GradScaler()

    # Initialize best validation accuracy
    best_non_fg_acc = 0.0
    best_model_path = f"/kaggle/working/primary_dir/primary_swin_fold{fold+1}.pth"

    # Training loop
    for epoch in range(15):
        print(f"\nEpoch {epoch+1}/15")

        # Training phase
        model.train()
        total_loss, correct = 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.amp.autocast(device_type="cuda", dtype=torch.float16):
                outputs = model(images)
                loss = criterion(outputs.logits, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item() * images.size(0)
            preds = torch.argmax(outputs.logits, dim=1)
            correct += (preds == labels).sum().item()

        train_loss = total_loss / len(train_loader.dataset)
        train_acc = correct / len(train_loader.dataset)

        # Validation phase
        model.eval()
        total_loss, correct = 0, 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                with torch.amp.autocast(device_type="cuda", dtype=torch.float16):
                    outputs = model(images)
                    loss = criterion(outputs.logits, labels)

                total_loss += loss.item() * images.size(0)
                preds = torch.argmax(outputs.logits, dim=1)
                correct += (preds == labels).sum().item()
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_loss = total_loss / len(val_loader.dataset)
        val_acc = correct / len(val_loader.dataset)

        # Compute per-class accuracy
        class_report = classification_report(all_labels, all_preds, output_dict=True, zero_division=1)
        class_accuracy = {int(k): v["recall"] for k, v in class_report.items() if k.isdigit()}

        # Compute non-fine-grained accuracy
        non_fg_correct = sum(1 for p, l in zip(all_preds, all_labels) if l not in fine_grained_classes and p == l)
        non_fg_total = sum(1 for l in all_labels if l not in fine_grained_classes)
        non_fg_acc = non_fg_correct / non_fg_total if non_fg_total > 0 else 0.0

        print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
        print(f"Non-FG Val Acc: {non_fg_acc:.4f}")

        # Print per-class validation accuracy
        print("\nPer-Class Validation Accuracy:")
        for class_idx, acc in sorted(class_accuracy.items()):
            print(f"  - Class {class_idx}: {acc:.4f}")

        # Save best model based on non-fine-grained accuracy
        if non_fg_acc > best_non_fg_acc:
            best_non_fg_acc = non_fg_acc
            torch.save(model.state_dict(), best_model_path)
            print(f"New best model saved for Fold {fold+1}, Non-FG Acc: {non_fg_acc:.4f}")

        torch.cuda.empty_cache()

    del model
    torch.cuda.empty_cache()
    print(f"Fold {fold+1} training completed, memory cleared!\n")


Fold 1/5


config.json:   0%|          | 0.00/6.95k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/786M [00:00<?, ?B/s]

Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([20]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([20, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/15




Train Loss: 1.0605, Acc: 0.7423
Val Loss: 0.3997, Acc: 0.8883
Non-FG Val Acc: 0.9434

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.6000
  - Class 3: 0.8000
  - Class 4: 1.0000
  - Class 5: 0.9000
  - Class 6: 1.0000
  - Class 7: 0.8000
  - Class 8: 0.9000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.6000
  - Class 13: 0.5556
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.8889
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 1, Non-FG Acc: 0.9434

Epoch 2/15




Train Loss: 0.3326, Acc: 0.9079
Val Loss: 0.2894, Acc: 0.9043
Non-FG Val Acc: 0.9371

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.9000
  - Class 3: 0.9000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.5000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.6000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 0.7778

Epoch 3/15




Train Loss: 0.2474, Acc: 0.9212
Val Loss: 0.2755, Acc: 0.8936
Non-FG Val Acc: 0.9119

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.9000
  - Class 3: 0.9000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 0.4000
  - Class 8: 0.8000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.9000
  - Class 13: 0.5556
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 4/15




Train Loss: 0.2248, Acc: 0.9306
Val Loss: 0.2440, Acc: 0.9096
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 0.8000
  - Class 1: 1.0000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.3000
  - Class 13: 0.7778
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 0.8889
New best model saved for Fold 1, Non-FG Acc: 0.9560

Epoch 5/15




Train Loss: 0.1965, Acc: 0.9372
Val Loss: 0.2603, Acc: 0.9149
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8000
  - Class 8: 1.0000
  - Class 9: 0.9000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 0.6667

Epoch 6/15




Train Loss: 0.1378, Acc: 0.9519
Val Loss: 0.2885, Acc: 0.9149
Non-FG Val Acc: 0.9245

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.9000
  - Class 3: 0.8000
  - Class 4: 1.0000
  - Class 5: 0.9000
  - Class 6: 1.0000
  - Class 7: 0.6000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.9000
  - Class 13: 0.7778
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.8889
  - Class 17: 0.8889
  - Class 18: 0.8889
  - Class 19: 0.8889

Epoch 7/15




Train Loss: 0.1235, Acc: 0.9666
Val Loss: 0.2061, Acc: 0.9096
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.9000
  - Class 6: 0.8889
  - Class 7: 0.9000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.5000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.6667
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 1, Non-FG Acc: 0.9623

Epoch 8/15




Train Loss: 0.1388, Acc: 0.9586
Val Loss: 0.2284, Acc: 0.9309
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 0.9000
  - Class 1: 1.0000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.9000
  - Class 6: 1.0000
  - Class 7: 0.7000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.7000
  - Class 13: 0.7778
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 0.8889
  - Class 19: 1.0000

Epoch 9/15




Train Loss: 0.1186, Acc: 0.9599
Val Loss: 0.2264, Acc: 0.9202
Non-FG Val Acc: 0.9308

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 0.6000
  - Class 8: 0.9000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 0.8889
  - Class 12: 0.8000
  - Class 13: 0.7778
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.7778
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 10/15




Train Loss: 0.1026, Acc: 0.9640
Val Loss: 0.2145, Acc: 0.9309
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.9000
  - Class 3: 0.9000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.6000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.7000
  - Class 13: 0.8889
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 0.8889
  - Class 19: 1.0000

Epoch 11/15




Train Loss: 0.0990, Acc: 0.9666
Val Loss: 0.2475, Acc: 0.9202
Non-FG Val Acc: 0.9434

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 0.8000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.8889
  - Class 17: 0.7778
  - Class 18: 0.8889
  - Class 19: 0.8889

Epoch 12/15




Train Loss: 0.0985, Acc: 0.9680
Val Loss: 0.2722, Acc: 0.9043
Non-FG Val Acc: 0.9371

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.8000
  - Class 3: 0.9000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.9000
  - Class 13: 0.4444
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.7778
  - Class 18: 0.8889
  - Class 19: 0.7778

Epoch 13/15




Train Loss: 0.1243, Acc: 0.9666
Val Loss: 0.2210, Acc: 0.9149
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 0.7000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.4000
  - Class 13: 0.8889
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 0.8889
  - Class 19: 1.0000

Epoch 14/15




Train Loss: 0.0767, Acc: 0.9786
Val Loss: 0.2835, Acc: 0.9362
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.8000
  - Class 13: 0.7778
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 1, Non-FG Acc: 0.9686

Epoch 15/15




Train Loss: 0.0932, Acc: 0.9706
Val Loss: 0.2299, Acc: 0.9362
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 0.8000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 1.0000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.7778
  - Class 18: 1.0000
  - Class 19: 1.0000
Fold 1 training completed, memory cleared!


Fold 2/5


Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([20]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([20, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/15




Train Loss: 1.0636, Acc: 0.7196
Val Loss: 0.3572, Acc: 0.8670
Non-FG Val Acc: 0.8868

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.8000
  - Class 6: 1.0000
  - Class 7: 0.3333
  - Class 8: 0.7778
  - Class 9: 0.9000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.9000
  - Class 13: 0.5556
  - Class 14: 0.8889
  - Class 15: 0.7500
  - Class 16: 0.9000
  - Class 17: 0.7778
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 2, Non-FG Acc: 0.8868

Epoch 2/15




Train Loss: 0.3409, Acc: 0.8945
Val Loss: 0.2325, Acc: 0.9362
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 1.0000
  - Class 13: 0.4444
  - Class 14: 0.8889
  - Class 15: 1.0000
  - Class 16: 0.9000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 2, Non-FG Acc: 0.9686

Epoch 3/15




Train Loss: 0.2594, Acc: 0.9252
Val Loss: 0.2890, Acc: 0.9096
Non-FG Val Acc: 0.9371

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.8889
  - Class 8: 0.6667
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.8000
  - Class 13: 0.5556
  - Class 14: 0.8889
  - Class 15: 1.0000
  - Class 16: 0.9000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8000

Epoch 4/15




Train Loss: 0.2238, Acc: 0.9266
Val Loss: 0.2443, Acc: 0.9415
Non-FG Val Acc: 0.9748

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 1.0000
  - Class 13: 0.4444
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.9000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.9000
New best model saved for Fold 2, Non-FG Acc: 0.9748

Epoch 5/15




Train Loss: 0.1664, Acc: 0.9506
Val Loss: 0.2116, Acc: 0.9149
Non-FG Val Acc: 0.9434

Per-Class Validation Accuracy:
  - Class 0: 0.8889
  - Class 1: 1.0000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.6667
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.7778
  - Class 12: 0.5000
  - Class 13: 0.8889
  - Class 14: 0.8889
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 6/15




Train Loss: 0.1524, Acc: 0.9466
Val Loss: 0.2162, Acc: 0.9309
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.6000
  - Class 13: 0.7778
  - Class 14: 0.8889
  - Class 15: 0.8750
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 7/15




Train Loss: 0.1398, Acc: 0.9546
Val Loss: 0.2546, Acc: 0.8989
Non-FG Val Acc: 0.9371

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.9000
  - Class 3: 0.9000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.5000
  - Class 13: 0.6667
  - Class 14: 0.8889
  - Class 15: 0.8750
  - Class 16: 0.9000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.9000

Epoch 8/15




Train Loss: 0.1440, Acc: 0.9546
Val Loss: 0.1871, Acc: 0.9149
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8000
  - Class 3: 0.9000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.7000
  - Class 13: 0.6667
  - Class 14: 0.8889
  - Class 15: 0.8750
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8000

Epoch 9/15




Train Loss: 0.1247, Acc: 0.9599
Val Loss: 0.1646, Acc: 0.9362
Non-FG Val Acc: 0.9874

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.7000
  - Class 13: 0.3333
  - Class 14: 0.8889
  - Class 15: 0.8750
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 2, Non-FG Acc: 0.9874

Epoch 10/15




Train Loss: 0.1145, Acc: 0.9599
Val Loss: 0.1515, Acc: 0.9362
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.9000
  - Class 6: 0.9000
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 0.9000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.7778
  - Class 14: 0.8889
  - Class 15: 0.8750
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 0.9000

Epoch 11/15




Train Loss: 0.1274, Acc: 0.9599
Val Loss: 0.1460, Acc: 0.9468
Non-FG Val Acc: 0.9811

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 1.0000
  - Class 9: 0.9000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.5556
  - Class 14: 0.8889
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 12/15




Train Loss: 0.1284, Acc: 0.9613
Val Loss: 0.2020, Acc: 0.9149
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 0.8889
  - Class 1: 0.9000
  - Class 2: 0.9000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.5000
  - Class 13: 0.4444
  - Class 14: 0.8889
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 13/15




Train Loss: 0.1207, Acc: 0.9666
Val Loss: 0.1825, Acc: 0.9202
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 0.9000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.5000
  - Class 13: 0.6667
  - Class 14: 0.8889
  - Class 15: 0.7500
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 14/15




Train Loss: 0.1070, Acc: 0.9666
Val Loss: 0.1762, Acc: 0.9309
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.7000
  - Class 13: 0.6667
  - Class 14: 0.8889
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 15/15




Train Loss: 0.1014, Acc: 0.9613
Val Loss: 0.2093, Acc: 0.9255
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 0.9000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.9000
  - Class 13: 0.4444
  - Class 14: 0.8889
  - Class 15: 0.8750
  - Class 16: 0.9000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.9000
Fold 2 training completed, memory cleared!


Fold 3/5


Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([20]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([20, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/15




Train Loss: 1.0917, Acc: 0.7040
Val Loss: 0.3755, Acc: 0.8984
Non-FG Val Acc: 0.9371

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 0.8889
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.8889
  - Class 8: 0.6667
  - Class 9: 0.9000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.3333
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 3, Non-FG Acc: 0.9371

Epoch 2/15




Train Loss: 0.3191, Acc: 0.9067
Val Loss: 0.2924, Acc: 0.9037
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8000
  - Class 7: 0.7778
  - Class 8: 0.7778
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8750
  - Class 12: 0.2000
  - Class 13: 0.8889
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.9000
New best model saved for Fold 3, Non-FG Acc: 0.9497

Epoch 3/15




Train Loss: 0.2624, Acc: 0.9173
Val Loss: 0.2202, Acc: 0.9091
Non-FG Val Acc: 0.9434

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 0.8889
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.6667
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.3333
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.7778
  - Class 18: 1.0000
  - Class 19: 0.8000

Epoch 4/15




Train Loss: 0.2174, Acc: 0.9347
Val Loss: 0.2005, Acc: 0.9305
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.9000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.6000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 3, Non-FG Acc: 0.9623

Epoch 5/15




Train Loss: 0.1751, Acc: 0.9427
Val Loss: 0.2295, Acc: 0.9144
Non-FG Val Acc: 0.9308

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8000
  - Class 2: 0.7778
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.9000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 0.7778
  - Class 9: 0.9000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.9000
  - Class 13: 0.7778
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.9000

Epoch 6/15




Train Loss: 0.1617, Acc: 0.9493
Val Loss: 0.2115, Acc: 0.9144
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7778
  - Class 3: 1.0000
  - Class 4: 0.8889
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.6667
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.5000
  - Class 13: 0.8889
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.9000

Epoch 7/15




Train Loss: 0.1223, Acc: 0.9640
Val Loss: 0.1974, Acc: 0.9358
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.5556
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 3, Non-FG Acc: 0.9686

Epoch 8/15




Train Loss: 0.1053, Acc: 0.9680
Val Loss: 0.1926, Acc: 0.9358
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8889
  - Class 3: 0.8889
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.9000
  - Class 13: 0.5556
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.9000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 9/15




Train Loss: 0.0996, Acc: 0.9747
Val Loss: 0.1765, Acc: 0.9251
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 0.6667
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 0.9000
  - Class 19: 1.0000

Epoch 10/15




Train Loss: 0.1441, Acc: 0.9480
Val Loss: 0.1993, Acc: 0.9305
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.4444
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 11/15




Train Loss: 0.1256, Acc: 0.9613
Val Loss: 0.1438, Acc: 0.9465
Non-FG Val Acc: 0.9874

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.4444
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 3, Non-FG Acc: 0.9874

Epoch 12/15




Train Loss: 0.0880, Acc: 0.9747
Val Loss: 0.1466, Acc: 0.9465
Non-FG Val Acc: 0.9811

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7778
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 13/15




Train Loss: 0.1020, Acc: 0.9667
Val Loss: 0.2267, Acc: 0.9198
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.6667
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.7000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 0.7778
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.9000

Epoch 14/15




Train Loss: 0.0947, Acc: 0.9693
Val Loss: 0.1751, Acc: 0.9358
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.7778
  - Class 3: 0.8889
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.6000
  - Class 13: 1.0000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.9000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 15/15




Train Loss: 0.0908, Acc: 0.9747
Val Loss: 0.1976, Acc: 0.9144
Non-FG Val Acc: 0.9434

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 0.8889
  - Class 5: 0.8000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.7000
  - Class 13: 0.6667
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.8889
  - Class 18: 1.0000
  - Class 19: 0.9000
Fold 3 training completed, memory cleared!


Fold 4/5


Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([20]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([20, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/15




Train Loss: 1.1395, Acc: 0.6840
Val Loss: 0.3260, Acc: 0.9037
Non-FG Val Acc: 0.9434

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.8889
  - Class 8: 1.0000
  - Class 9: 0.8889
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.4000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 0.8889
  - Class 17: 0.9000
  - Class 18: 1.0000
  - Class 19: 0.8889
New best model saved for Fold 4, Non-FG Acc: 0.9434

Epoch 2/15




Train Loss: 0.2945, Acc: 0.9147
Val Loss: 0.2422, Acc: 0.9251
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 0.8889
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.6667
  - Class 13: 0.6000
  - Class 14: 1.0000
  - Class 15: 0.7778
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 4, Non-FG Acc: 0.9560

Epoch 3/15




Train Loss: 0.2408, Acc: 0.9200
Val Loss: 0.2434, Acc: 0.9358
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 1.0000
  - Class 13: 0.4000
  - Class 14: 1.0000
  - Class 15: 0.6667
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 4, Non-FG Acc: 0.9623

Epoch 4/15




Train Loss: 0.2160, Acc: 0.9267
Val Loss: 0.1921, Acc: 0.9465
Non-FG Val Acc: 0.9748

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.6667
  - Class 13: 0.7000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.8889
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 4, Non-FG Acc: 0.9748

Epoch 5/15




Train Loss: 0.1865, Acc: 0.9467
Val Loss: 0.2116, Acc: 0.9198
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.8889
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.5000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 6/15




Train Loss: 0.1585, Acc: 0.9547
Val Loss: 0.1532, Acc: 0.9572
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 0.9000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.9000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 7/15




Train Loss: 0.1527, Acc: 0.9493
Val Loss: 0.1744, Acc: 0.9358
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8889
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.8889
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.5556
  - Class 13: 0.8000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 8/15




Train Loss: 0.1765, Acc: 0.9453
Val Loss: 0.1630, Acc: 0.9465
Non-FG Val Acc: 0.9811

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.8889
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.5556
  - Class 13: 0.7000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
New best model saved for Fold 4, Non-FG Acc: 0.9811

Epoch 9/15




Train Loss: 0.1458, Acc: 0.9573
Val Loss: 0.1951, Acc: 0.9412
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 1.0000
  - Class 9: 0.8889
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.8000
  - Class 14: 1.0000
  - Class 15: 0.7778
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 10/15




Train Loss: 0.1268, Acc: 0.9560
Val Loss: 0.1357, Acc: 0.9519
Non-FG Val Acc: 0.9748

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.6667
  - Class 13: 0.8000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.9000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 11/15




Train Loss: 0.1271, Acc: 0.9627
Val Loss: 0.1870, Acc: 0.9358
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.9000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.8889
  - Class 6: 1.0000
  - Class 7: 0.8889
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.6000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 12/15




Train Loss: 0.1221, Acc: 0.9640
Val Loss: 0.1668, Acc: 0.9412
Non-FG Val Acc: 0.9686

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.5556
  - Class 13: 0.8000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 13/15




Train Loss: 0.1142, Acc: 0.9627
Val Loss: 0.1703, Acc: 0.9251
Non-FG Val Acc: 0.9560

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.4444
  - Class 13: 0.8000
  - Class 14: 1.0000
  - Class 15: 0.7778
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.7778

Epoch 14/15




Train Loss: 0.1056, Acc: 0.9693
Val Loss: 0.1727, Acc: 0.9358
Non-FG Val Acc: 0.9623

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.6667
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.5556
  - Class 13: 0.8000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 15/15




Train Loss: 0.1086, Acc: 0.9667
Val Loss: 0.1742, Acc: 0.9412
Non-FG Val Acc: 0.9497

Per-Class Validation Accuracy:
  - Class 0: 0.8889
  - Class 1: 0.8000
  - Class 2: 1.0000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.9000
  - Class 7: 0.7778
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.8889
  - Class 11: 1.0000
  - Class 12: 0.8889
  - Class 13: 0.8000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 1.0000
Fold 4 training completed, memory cleared!


Fold 5/5


Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([20]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([20, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/15




Train Loss: 1.0207, Acc: 0.7200
Val Loss: 0.3581, Acc: 0.8717
Non-FG Val Acc: 0.9304

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.9000
  - Class 8: 0.7778
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.7778
  - Class 13: 0.2000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 0.8889
  - Class 17: 0.8000
  - Class 18: 1.0000
  - Class 19: 0.6667
New best model saved for Fold 5, Non-FG Acc: 0.9304

Epoch 2/15




Train Loss: 0.3061, Acc: 0.9107
Val Loss: 0.3171, Acc: 0.8984
Non-FG Val Acc: 0.9620

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.7778
  - Class 7: 1.0000
  - Class 8: 0.7778
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.2000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889
New best model saved for Fold 5, Non-FG Acc: 0.9620

Epoch 3/15




Train Loss: 0.2331, Acc: 0.9373
Val Loss: 0.2752, Acc: 0.9091
Non-FG Val Acc: 0.9430

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.8000
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.9000
  - Class 11: 1.0000
  - Class 12: 0.8889
  - Class 13: 0.6000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.9000
  - Class 18: 1.0000
  - Class 19: 0.6667

Epoch 4/15




Train Loss: 0.2254, Acc: 0.9360
Val Loss: 0.2892, Acc: 0.8824
Non-FG Val Acc: 0.9430

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.9000
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.9000
  - Class 11: 1.0000
  - Class 12: 0.5556
  - Class 13: 0.4000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8000
  - Class 18: 0.8889
  - Class 19: 0.6667

Epoch 5/15




Train Loss: 0.1512, Acc: 0.9560
Val Loss: 0.2958, Acc: 0.9037
Non-FG Val Acc: 0.9620

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.7778
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 0.8889
  - Class 12: 0.5556
  - Class 13: 0.5000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.7778

Epoch 6/15




Train Loss: 0.1840, Acc: 0.9347
Val Loss: 0.2809, Acc: 0.9037
Non-FG Val Acc: 0.9684

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.6000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 1.0000
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.9000
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.3000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.9000
  - Class 18: 1.0000
  - Class 19: 0.8889
New best model saved for Fold 5, Non-FG Acc: 0.9684

Epoch 7/15




Train Loss: 0.1461, Acc: 0.9480
Val Loss: 0.2521, Acc: 0.9144
Non-FG Val Acc: 0.9557

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 0.8889
  - Class 9: 1.0000
  - Class 10: 0.8000
  - Class 11: 1.0000
  - Class 12: 0.6667
  - Class 13: 0.6000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.8889
  - Class 17: 0.9000
  - Class 18: 1.0000
  - Class 19: 0.7778

Epoch 8/15




Train Loss: 0.1331, Acc: 0.9493
Val Loss: 0.2595, Acc: 0.9037
Non-FG Val Acc: 0.9620

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.6000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 0.8889
  - Class 6: 1.0000
  - Class 7: 0.9000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.6667
  - Class 13: 0.5000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.8000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 9/15




Train Loss: 0.1443, Acc: 0.9467
Val Loss: 0.2922, Acc: 0.8930
Non-FG Val Acc: 0.9620

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.6000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.9000
  - Class 11: 1.0000
  - Class 12: 0.5556
  - Class 13: 0.4000
  - Class 14: 0.9000
  - Class 15: 0.7778
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 10/15




Train Loss: 0.0919, Acc: 0.9720
Val Loss: 0.2633, Acc: 0.9198
Non-FG Val Acc: 0.9747

Per-Class Validation Accuracy:
  - Class 0: 0.9000
  - Class 1: 1.0000
  - Class 2: 0.6000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 0.8889
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.9000
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.5000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889
New best model saved for Fold 5, Non-FG Acc: 0.9747

Epoch 11/15




Train Loss: 0.1305, Acc: 0.9613
Val Loss: 0.2131, Acc: 0.9198
Non-FG Val Acc: 0.9684

Per-Class Validation Accuracy:
  - Class 0: 0.9000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.8889
  - Class 13: 0.4000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 0.8000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 12/15




Train Loss: 0.0977, Acc: 0.9733
Val Loss: 0.2070, Acc: 0.9358
Non-FG Val Acc: 0.9810

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.8000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.6667
  - Class 13: 0.6000
  - Class 14: 1.0000
  - Class 15: 0.8889
  - Class 16: 1.0000
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889
New best model saved for Fold 5, Non-FG Acc: 0.9810

Epoch 13/15




Train Loss: 0.1245, Acc: 0.9653
Val Loss: 0.2483, Acc: 0.9198
Non-FG Val Acc: 0.9620

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 0.8889
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 0.8889
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 0.9000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.9000
  - Class 11: 1.0000
  - Class 12: 0.7778
  - Class 13: 0.6000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.8000
  - Class 18: 1.0000
  - Class 19: 1.0000

Epoch 14/15




Train Loss: 0.1023, Acc: 0.9693
Val Loss: 0.2036, Acc: 0.9358
Non-FG Val Acc: 0.9810

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.7000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 0.9000
  - Class 11: 1.0000
  - Class 12: 0.8889
  - Class 13: 0.5000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 0.8889
  - Class 17: 1.0000
  - Class 18: 1.0000
  - Class 19: 0.8889

Epoch 15/15




Train Loss: 0.0937, Acc: 0.9707
Val Loss: 0.2338, Acc: 0.9144
Non-FG Val Acc: 0.9873

Per-Class Validation Accuracy:
  - Class 0: 1.0000
  - Class 1: 1.0000
  - Class 2: 0.6000
  - Class 3: 1.0000
  - Class 4: 1.0000
  - Class 5: 1.0000
  - Class 6: 1.0000
  - Class 7: 1.0000
  - Class 8: 1.0000
  - Class 9: 1.0000
  - Class 10: 1.0000
  - Class 11: 1.0000
  - Class 12: 0.6667
  - Class 13: 0.3000
  - Class 14: 1.0000
  - Class 15: 1.0000
  - Class 16: 1.0000
  - Class 17: 0.9000
  - Class 18: 1.0000
  - Class 19: 0.8889
New best model saved for Fold 5, Non-FG Acc: 0.9873
Fold 5 training completed, memory cleared!



## Save Best Main Model

In [15]:
# import os
# import torch

# # Define the directory where you'll save the best model in Hugging Face format.
# save_dir = "/kaggle/working/swin_model"
# os.makedirs(save_dir, exist_ok=True)

# # Load the best model state dictionary (from the training loop).
# best_state_dict = torch.load(best_model_path, map_location=torch.device("cpu"))
# model.load_state_dict(best_state_dict)

# # Save the best model in Hugging Face format.
# model.save_pretrained(save_dir)
# processor.save_pretrained(save_dir)

# # Optionally, save additional information such as class mapping and training configuration.
# class_info = {
#     "class_order": submission_class_order,
#     "class_to_idx": train_dataset.class_to_idx,
#     "idx_to_class": {v: k for k, v in train_dataset.class_to_idx.items()}
# }
# torch.save(class_info, os.path.join(save_dir, "class_info.pth"))

# train_config = {
#     "epochs_trained": len(history['train_loss']),
#     "best_val_acc": max(history['val_acc']),
#     "optimizer_state": optimizer.state_dict()
# }
# torch.save(train_config, os.path.join(save_dir, "train_config.pth"))

# # Also save the class order as a plain text file for reference.
# with open(os.path.join(save_dir, "class_info.txt"), "w") as f:
#     f.write("\n".join(submission_class_order))

# print(f"Best model saved in Hugging Face format to: {save_dir}")

# Train the Secondary Model (Rusty_Blackbird, American_Crow, Fish_Crow)

## Filter Dataset for Secondary Model

In [16]:
class FilteredBirdDataset(Dataset):
    def __init__(self, main_dataset, classes_to_keep, transform=None):
        self.dataset = main_dataset.dataset  
        self.transform = transform

        self.indices = [i for i, (_, label) in enumerate(self.dataset.samples) if label in classes_to_keep]

        self.classes_to_keep = {class_idx: i for i, class_idx in enumerate(classes_to_keep)}

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        image_path, label = self.dataset.samples[self.indices[idx]]  
        image = Image.open(image_path).convert("RGB")  

        if self.transform:
            if isinstance(self.transform, A.Compose):
                image = np.array(image)  # Albumentations 
                image = self.transform(image=image)["image"]
            else:
                image = self.transform(image)

        new_label = self.classes_to_keep[label] 
        return image, new_label

In [17]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

albumentations_transform = A.Compose([
    # Basic preprocessing
    A.Resize(height=420, width=420),
    A.RandomResizedCrop(size=(384, 384), scale=(0.8, 1.0), ratio=(0.75, 1.33), p=1.0), 

    # Geometric transformations
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.Affine(scale=(0.9, 1.1), translate_percent=(0.1, 0.1), rotate=(-20, 20), shear=(-10, 10), p=0.7),  

    # Fine-grained texture enhancement
    A.GaussNoise(std_range=(0.01, 0.1), mean_range=(-0.5, 0.5), per_channel=True, noise_scale_factor=1.0, p=0.3),
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.8, 1.2), p=0.5),
    A.Emboss(alpha=(0.1, 0.3), strength=(0.3, 0.6), p=0.4),

    # Color and contrast enhancements
    A.MotionBlur(p=0.3),  
    A.CLAHE(p=0.3),  
    A.RandomBrightnessContrast(p=0.4),
    A.HueSaturationValue(hue_shift_limit=12, sat_shift_limit=25, val_shift_limit=12, p=0.4),

    # Patch-Level Occlusion
    A.CoarseDropout(
        num_holes_range=(3, 8),
        hole_height_range=(0.03, 0.12),
        hole_width_range=(0.03, 0.12),
        fill=0,
        p=0.5
    ),

    # Part-Based Attention Regularization
    A.GridDropout(ratio=0.15, p=0.4),
    A.RandomShadow(p=0.3),
    A.RandomRain(p=0.2, brightness_coefficient=0.9, drop_width=1, blur_value=3),

    # Normalize and convert to tensor
    A.Normalize(mean=processor.image_mean, std=processor.image_std),
    ToTensorV2(),
])

In [18]:
from timm.data.mixup import Mixup

mixup_fn = Mixup(
    mixup_alpha=0.4, 
    cutmix_alpha=0.4, 
    cutmix_minmax=None, 
    prob=0.7,  # Probability of applying MixUp/CutMix
    switch_prob=0.5,  # Probability of switching between MixUp & CutMix
    mode='batch', 
    label_smoothing=0.1
)

## Create the Secondary Model

In [19]:
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Subset, DataLoader
from transformers import SwinForImageClassification, AdamW
from collections import Counter

In [20]:
# # Load secondary model
# secondary_model_name = "Emiel/cub-200-bird-classifier-swin"
# id2label = {0: "Rusty_Blackbird", 1: "American_Crow", 2: "Fish_Crow"}
# label2id = {v: k for k, v in id2label.items()}

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# secondary_model = SwinForImageClassification.from_pretrained(
#     secondary_model_name,
#     num_labels=3,
#     ignore_mismatched_sizes=True,
#     id2label=id2label,
#     label2id=label2id
# ).to(device)

# # Freeze backbone & fine-tune only the last block
# for param in secondary_model.swin.parameters():
#     param.requires_grad = False
# if hasattr(secondary_model.swin, "stages"):
#     for param in secondary_model.swin.stages[-2].parameters():
#         param.requires_grad = True
# for param in secondary_model.classifier.parameters():
#     param.requires_grad = True

## Prepare the Secondary Dataset & Dataloader

In [21]:
# fine_grained_classes = [2, 12, 13]  # Only keep Rusty_Blackbird, American_Crow, and Fish_Crow

# train_secondary_dataset = FilteredBirdDataset(train_dataset, fine_grained_classes, transform=albumentations_transform)
# val_secondary_dataset = FilteredBirdDataset(val_dataset, fine_grained_classes, transform=val_transform)

# train_secondary_loader = DataLoader(train_secondary_dataset, batch_size=32, shuffle=True, num_workers=0)
# val_secondary_loader = DataLoader(val_secondary_dataset, batch_size=64, shuffle=False, num_workers=0)

# # Apply MixUp to Secondary Model Training
# for images, labels in train_secondary_loader:
#     images, labels = mixup_fn(images, labels)
#     break

# print(f"Secondary dataset size: Train={len(train_secondary_dataset)}, Validation={len(val_secondary_dataset)}")

In [22]:
# Load dataset
fine_grained_classes = [2, 12, 13]
all_dataset = FilteredBirdDataset(train_dataset, fine_grained_classes, transform=albumentations_transform)

# Prepare Stratified K-Fold Cross-Validation
K = 5
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)
indices = np.arange(len(all_dataset))
labels = [label for _, label in all_dataset]
os.mkdir("/kaggle/working/secondard_pth")

## Train the Secondary Model

In [23]:
def validate_secondary(model, loader):
    model.eval()
    total_loss = 0
    correct = 0
    all_preds = []
    all_labels = []
    class_correct = {0: 0, 1: 0, 2: 0}  # Track per-class accuracy
    class_total = {0: 0, 1: 0, 2: 0}  # Track total samples per class
    
    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)
            
            with torch.amp.autocast(device_type="cuda"):
                outputs = model(images)
                loss = criterion(outputs.logits, labels)  # Use secondary criterion
            
            total_loss += loss.item() * images.size(0)
            preds = torch.argmax(outputs.logits, dim=1)
            correct += (preds == labels).sum().item()
            
            # Track per-class accuracy
            for label, pred in zip(labels.cpu().numpy(), preds.cpu().numpy()):
                class_correct[label] += (pred == label)
                class_total[label] += 1

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_loss = total_loss / len(loader.dataset)
    accuracy = correct / len(loader.dataset)

    # Ensure 5 values are returned
    if len(set(all_labels)) > 1:  # Avoid error if only one class is present
        precision = precision_score(all_labels, all_preds, average="macro", zero_division=1)
        recall = recall_score(all_labels, all_preds, average="macro", zero_division=1)
        f1 = f1_score(all_labels, all_preds, average="macro", zero_division=1)
    else:
        precision, recall, f1 = 0, 0, 0  # Handle edge cases where only one class exists

    # Compute per-class accuracy
    class_accuracy = {cls: class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0 for cls in class_total}

    return avg_loss, accuracy, precision, recall, f1, class_accuracy  # Returns per-class accuracy

In [24]:
# Define optimizer & loss for secondary model
# secondary_optimizer = AdamW([
#     {"params": secondary_model.swin.parameters(), "lr": 1e-5},
#     {"params": secondary_model.classifier.parameters(), "lr": 3e-4}
# ], weight_decay=0.01)

# secondary_criterion = torch.nn.CrossEntropyLoss()

# from collections import Counter

# # Count occurrences of each class in the training dataset
# train_labels = [label for _, label in train_secondary_dataset]
# class_counts = Counter(train_labels)

# # Convert to ordered list
# num_classes = 3  # You have 3 fine-grained classes
# class_counts_list = [class_counts.get(i, 1) for i in range(num_classes)]  # Avoid zero division

# # Compute class weights dynamically
# total_samples = sum(class_counts_list)
# class_weights = [total_samples / (len(class_counts_list) * count) for count in class_counts_list]
# class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# # Use dynamically computed weights for loss function
# secondary_criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

# # Training loop for the secondary model
# best_secondary_model_path = "/kaggle/working/best_secondary_model.pth"
# best_secondary_val_acc = 0.0

# secondary_model_save_dir = "/kaggle/working/secondary_swin_model"
# os.makedirs(secondary_model_save_dir, exist_ok=True)

# for epoch in range(5):  # Train for 50 epochs
#     train_loss, train_acc = train_epoch(secondary_model, train_secondary_loader, secondary_optimizer, scaler)
#     val_loss, val_acc, val_precision, val_recall, val_f1, class_accuracy = validate_secondary(secondary_model, val_secondary_loader)

#     print(f"\n[Secondary Model] Epoch {epoch+1:02d}:")
#     print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
#     print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
#     print(f"Precision: {val_precision:.4f} | Recall: {val_recall:.4f} | F1-score: {val_f1:.4f}")

#     # Print per-class accuracy
#     print("\nPer-Class Accuracy:")
#     print(f"  - Rusty_Blackbird (Class 0): {class_accuracy[0]:.4f}")
#     print(f"  - American_Crow (Class 1): {class_accuracy[1]:.4f}")
#     print(f"  - Fish_Crow (Class 2): {class_accuracy[2]:.4f}")

#     # Save the best secondary model
#     if val_acc > best_secondary_val_acc:
#         best_secondary_val_acc = val_acc
#         torch.save(secondary_model.state_dict(), best_secondary_model_path)
#         print(f"\nNew best secondary model saved with validation accuracy: {val_acc:.4f}")

#         # Save model in Hugging Face format
#         secondary_model.save_pretrained(secondary_model_save_dir)
#         processor.save_pretrained(secondary_model_save_dir)  # Save processor too

In [25]:
import os
import torch
import numpy as np
from collections import Counter
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Subset, DataLoader
from transformers import SwinForImageClassification, AdamW

# Secondary model settings
secondary_model_name = "Emiel/cub-200-bird-classifier-swin"
id2label = {0: "Rusty_Blackbird", 1: "American_Crow", 2: "Fish_Crow"}
label2id = {v: k for k, v in id2label.items()}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_folds = 5
output_dir = "/kaggle/working/secondary_dir"
os.makedirs(output_dir, exist_ok=True)

# Prepare Stratified K-Fold
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)
indices = np.arange(len(all_dataset))
labels = [label for _, label in all_dataset]

# Training with Stratified K-Fold
for fold, (train_idx, val_idx) in enumerate(skf.split(indices, labels)):
    print(f"\nTraining Fold {fold+1}/{num_folds}...")

    # Create subset datasets
    train_subset = Subset(all_dataset, train_idx)
    val_subset = Subset(all_dataset, val_idx)

    # Create DataLoaders
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_subset, batch_size=64, shuffle=False, num_workers=0)

    # Reload a fresh secondary model for this fold
    secondary_model = SwinForImageClassification.from_pretrained(
        secondary_model_name,
        num_labels=3,
        ignore_mismatched_sizes=True,
        id2label=id2label,
        label2id=label2id
    ).to(device)

    # **Manually initialize classifier layer** (Fix shape mismatch)
    secondary_model.classifier = torch.nn.Linear(1536, 3).to(device)

    # Freeze backbone & fine-tune only the last block
    for param in secondary_model.swin.parameters():
        param.requires_grad = False
    if hasattr(secondary_model.swin, "stages"):
        for param in secondary_model.swin.stages[-2].parameters():
            param.requires_grad = True
    for param in secondary_model.classifier.parameters():
        param.requires_grad = True

    # Compute class weights dynamically
    train_labels = [label for _, label in train_subset]
    class_counts = Counter(train_labels)
    class_weights = [len(train_labels) / (3 * class_counts.get(i, 1)) for i in range(3)]
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

    # Define loss function & optimizer
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights)
    optimizer = AdamW([
        {"params": secondary_model.swin.parameters(), "lr": 1e-5},
        {"params": secondary_model.classifier.parameters(), "lr": 3e-4}
    ], weight_decay=0.01)

    # **Define the missing GradScaler**
    scaler = torch.cuda.amp.GradScaler()

    best_val_acc = 0.0
    best_model_path = f"{output_dir}/secondary_swin_fold{fold+1}.pth"

    # Training loop
    for epoch in range(20):  # Train for 5 epochs per fold
        train_loss, train_acc = train_epoch(secondary_model, train_loader, optimizer, scaler)  # Fix: Pass `scaler`
        val_loss, val_acc, val_precision, val_recall, val_f1, class_accuracy = validate_secondary(secondary_model, val_loader)

        print(f"\nFold {fold+1} | Epoch {epoch+1}:")
        print(f"Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")
        print(f"Precision: {val_precision:.4f} | Recall: {val_recall:.4f} | F1-score: {val_f1:.4f}")

        # Show per-class accuracy
        print("\nPer-Class Accuracy:")
        print(f"  - Rusty_Blackbird (Class 0): {class_accuracy[0]:.4f}")
        print(f"  - American_Crow (Class 1): {class_accuracy[1]:.4f}")
        print(f"  - Fish_Crow (Class 2): {class_accuracy[2]:.4f}")

        # Save best model per fold
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(secondary_model.state_dict(), best_model_path)
            print(f"\nBest model for Fold {fold+1} saved: {best_model_path}")

    # Free GPU memory
    del secondary_model
    torch.cuda.empty_cache()

print("\nAll folds completed. Models saved successfully.")


Training Fold 1/5...


Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([3, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()



Fold 1 | Epoch 1:
Train Acc: 0.4386 | Val Acc: 0.4138
Precision: 0.3983 | Recall: 0.4074 | F1-score: 0.4008

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.6000
  - American_Crow (Class 1): 0.4000
  - Fish_Crow (Class 2): 0.2222

Best model for Fold 1 saved: /kaggle/working/secondary_dir/secondary_swin_fold1.pth

Fold 1 | Epoch 2:
Train Acc: 0.7281 | Val Acc: 0.6552
Precision: 0.6519 | Recall: 0.6519 | F1-score: 0.6519

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8000
  - American_Crow (Class 1): 0.6000
  - Fish_Crow (Class 2): 0.5556

Best model for Fold 1 saved: /kaggle/working/secondary_dir/secondary_swin_fold1.pth

Fold 1 | Epoch 3:
Train Acc: 0.7544 | Val Acc: 0.5172
Precision: 0.5296 | Recall: 0.5111 | F1-score: 0.5193

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8000
  - American_Crow (Class 1): 0.4000
  - Fish_Crow (Class 2): 0.3333

Fold 1 | Epoch 4:
Train Acc: 0.7018 | Val Acc: 0.7241
Precision: 0.7227 | Recall: 0.7222 | F1-score: 0.7185

Per-Class Ac

Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([3, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()



Fold 2 | Epoch 1:
Train Acc: 0.2807 | Val Acc: 0.4483
Precision: 0.4583 | Recall: 0.4481 | F1-score: 0.4486

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.3000
  - American_Crow (Class 1): 0.6000
  - Fish_Crow (Class 2): 0.4444

Best model for Fold 2 saved: /kaggle/working/secondary_dir/secondary_swin_fold2.pth

Fold 2 | Epoch 2:
Train Acc: 0.5702 | Val Acc: 0.5862
Precision: 0.5937 | Recall: 0.5815 | F1-score: 0.5789

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8000
  - American_Crow (Class 1): 0.5000
  - Fish_Crow (Class 2): 0.4444

Best model for Fold 2 saved: /kaggle/working/secondary_dir/secondary_swin_fold2.pth

Fold 2 | Epoch 3:
Train Acc: 0.7632 | Val Acc: 0.7931
Precision: 0.8091 | Recall: 0.7926 | F1-score: 0.7959

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8000
  - American_Crow (Class 1): 0.8000
  - Fish_Crow (Class 2): 0.7778

Best model for Fold 2 saved: /kaggle/working/secondary_dir/secondary_swin_fold2.pth

Fold 2 | Epoch 4:
Train Acc: 0.7018 

Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([3, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()



Fold 3 | Epoch 1:
Train Acc: 0.3421 | Val Acc: 0.5862
Precision: 0.6098 | Recall: 0.5852 | F1-score: 0.5926

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.7000
  - American_Crow (Class 1): 0.5000
  - Fish_Crow (Class 2): 0.5556

Best model for Fold 3 saved: /kaggle/working/secondary_dir/secondary_swin_fold3.pth

Fold 3 | Epoch 2:
Train Acc: 0.7018 | Val Acc: 0.5862
Precision: 0.5690 | Recall: 0.5778 | F1-score: 0.5723

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.9000
  - American_Crow (Class 1): 0.5000
  - Fish_Crow (Class 2): 0.3333

Fold 3 | Epoch 3:
Train Acc: 0.7544 | Val Acc: 0.7931
Precision: 0.7864 | Recall: 0.7889 | F1-score: 0.7861

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 1.0000
  - American_Crow (Class 1): 0.7000
  - Fish_Crow (Class 2): 0.6667

Best model for Fold 3 saved: /kaggle/working/secondary_dir/secondary_swin_fold3.pth

Fold 3 | Epoch 4:
Train Acc: 0.7807 | Val Acc: 0.6897
Precision: 0.7017 | Recall: 0.6815 | F1-score: 0.6807

Per-Class Ac

Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([3, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()



Fold 4 | Epoch 1:
Train Acc: 0.4174 | Val Acc: 0.6429
Precision: 0.6872 | Recall: 0.6519 | F1-score: 0.6403

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8889
  - American_Crow (Class 1): 0.6667
  - Fish_Crow (Class 2): 0.4000

Best model for Fold 4 saved: /kaggle/working/secondary_dir/secondary_swin_fold4.pth

Fold 4 | Epoch 2:
Train Acc: 0.5217 | Val Acc: 0.7500
Precision: 0.7677 | Recall: 0.7519 | F1-score: 0.7582

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8889
  - American_Crow (Class 1): 0.6667
  - Fish_Crow (Class 2): 0.7000

Best model for Fold 4 saved: /kaggle/working/secondary_dir/secondary_swin_fold4.pth

Fold 4 | Epoch 3:
Train Acc: 0.7043 | Val Acc: 0.7857
Precision: 0.7833 | Recall: 0.7889 | F1-score: 0.7844

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 1.0000
  - American_Crow (Class 1): 0.6667
  - Fish_Crow (Class 2): 0.7000

Best model for Fold 4 saved: /kaggle/working/secondary_dir/secondary_swin_fold4.pth

Fold 4 | Epoch 4:
Train Acc: 0.6957 

Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([3, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()



Fold 5 | Epoch 1:
Train Acc: 0.4000 | Val Acc: 0.6071
Precision: 0.6444 | Recall: 0.6074 | F1-score: 0.6127

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.5556
  - American_Crow (Class 1): 0.6667
  - Fish_Crow (Class 2): 0.6000

Best model for Fold 5 saved: /kaggle/working/secondary_dir/secondary_swin_fold5.pth

Fold 5 | Epoch 2:
Train Acc: 0.5565 | Val Acc: 0.6429
Precision: 0.6556 | Recall: 0.6519 | F1-score: 0.6378

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8889
  - American_Crow (Class 1): 0.6667
  - Fish_Crow (Class 2): 0.4000

Best model for Fold 5 saved: /kaggle/working/secondary_dir/secondary_swin_fold5.pth

Fold 5 | Epoch 3:
Train Acc: 0.6609 | Val Acc: 0.7143
Precision: 0.7185 | Recall: 0.7185 | F1-score: 0.7173

Per-Class Accuracy:
  - Rusty_Blackbird (Class 0): 0.8889
  - American_Crow (Class 1): 0.6667
  - Fish_Crow (Class 2): 0.6000

Best model for Fold 5 saved: /kaggle/working/secondary_dir/secondary_swin_fold5.pth

Fold 5 | Epoch 4:
Train Acc: 0.6870 

# Test and Generate Submission File

In [26]:
import os
import torch
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import AutoImageProcessor, SwinForImageClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define class mapping
submission_class_order = [
    'Groove_billed_Ani', 'Red_winged_Blackbird', 'Rusty_Blackbird', 'Gray_Catbird',
    'Brandt_Cormorant', 'Eastern_Towhee', 'Indigo_Bunting', 'Brewer_Blackbird',
    'Painted_Bunting', 'Bobolink', 'Lazuli_Bunting', 'Yellow_headed_Blackbird',
    'American_Crow', 'Fish_Crow', 'Brown_Creeper', 'Yellow_billed_Cuckoo',
    'Yellow_breasted_Chat', 'Black_billed_Cuckoo', 'Gray_crowned_Rosy_Finch',
    'Bronzed_Cowbird'
]
submission_class_to_idx = {cls: idx for idx, cls in enumerate(submission_class_order)}

fine_grained_classes = {2, 12, 13}  # Fine-grained classes
fine_grained_class_map = {0: 2, 1: 12, 2: 13}  # Mapping from secondary model indices to global class indices

# Load primary models (K-Fold Ensemble)
num_folds = 5
primary_model_dir = "/kaggle/working/primary_dir"
processor = AutoImageProcessor.from_pretrained("Emiel/cub-200-bird-classifier-swin")

primary_models = []
for fold in range(num_folds):
    model_path = os.path.join(primary_model_dir, f"primary_swin_fold{fold+1}.pth")
    model = SwinForImageClassification.from_pretrained(
        "Emiel/cub-200-bird-classifier-swin",
        num_labels=len(submission_class_order),  # Ensure correct number of classes
        ignore_mismatched_sizes=True
    ).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    primary_models.append(model)

# Load secondary models (K-Fold Ensemble)
secondary_model_name = "Emiel/cub-200-bird-classifier-swin"
id2label = {0: "Rusty_Blackbird", 1: "American_Crow", 2: "Fish_Crow"}
label2id = {v: k for k, v in id2label.items()}

secondary_model_dir = "/kaggle/working/secondary_dir"
secondary_models = []
for fold in range(num_folds):
    model_path = os.path.join(secondary_model_dir, f"secondary_swin_fold{fold+1}.pth")
    model = SwinForImageClassification.from_pretrained(
        secondary_model_name,
        num_labels=3,
        ignore_mismatched_sizes=True,
        id2label=id2label,
        label2id=label2id
    ).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    secondary_models.append(model)

# Image preprocessing
expected_size = 384
val_transform = transforms.Compose([
    transforms.Resize((expected_size, expected_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
])

# Custom dataset class
class CompetitionTestDataset(Dataset):
    def __init__(self, test_dir, transform=None):
        self.image_files = sorted(os.listdir(test_dir))  # Maintain strict order
        self.image_paths = [os.path.join(test_dir, f) for f in self.image_files]
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, os.path.basename(self.image_paths[idx])

# Function to validate submission order
def validate_submission_order(predictions):
    """ Ensures all predicted class indices match the submission class order """
    invalid_indices = [p for p in predictions if p not in submission_class_to_idx.values()]
    if invalid_indices:
        raise ValueError(f"Invalid class indices found in predictions: {set(invalid_indices)}")
    return predictions

# Function to generate Kaggle submission
def generate_submission(test_dir, output_csv="submission.csv"):
    test_dataset = CompetitionTestDataset(test_dir, transform=val_transform)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

    filenames, predictions = [], []

    with torch.no_grad():
        for images, paths in test_loader:
            images = images.to(device)
            
            # First-stage classification using primary models (Ensemble)
            ensemble_logits = torch.zeros(len(images), len(submission_class_order)).to(device)
            for model in primary_models:
                outputs = model(images)
                ensemble_logits += outputs.logits
            
            primary_preds = torch.argmax(ensemble_logits, dim=1).cpu().numpy()
            
            # Identify samples for secondary model
            secondary_indices = [i for i, pred in enumerate(primary_preds) if pred in fine_grained_classes]
            secondary_images = images[secondary_indices]
            
            if len(secondary_images) > 0:
                # Aggregate predictions from K secondary models
                ensemble_logits = torch.zeros(len(secondary_images), 3).to(device)
                for model in secondary_models:
                    outputs = model(secondary_images)
                    ensemble_logits += outputs.logits
                
                # Get final secondary predictions
                secondary_preds = torch.argmax(ensemble_logits, dim=1).cpu().numpy()
                remapped_secondary_preds = [fine_grained_class_map[pred] for pred in secondary_preds]
                
                # Replace primary predictions with secondary ones
                for i, sec_pred in zip(secondary_indices, remapped_secondary_preds):
                    primary_preds[i] = sec_pred
            
            filenames.extend(paths)
            predictions.extend(primary_preds.tolist())

    # Validate predictions before submission
    predictions = validate_submission_order(predictions)

    # Ensure submission order is respected
    submission_df = pd.DataFrame({'path': filenames, 'class_idx': predictions})

    print("\nValidation Results:")
    print(f"Total Samples: {len(submission_df)}")
    print(f"Number of unique file names: {submission_df['path'].nunique()}")
    print(f"Predicted category distribution:\n{submission_df['class_idx'].value_counts().sort_index()}")

    # Save predictions to CSV
    submission_df.to_csv(output_csv, index=False)
    print(f"\nSubmission CSV saved to: {output_csv}")

# Run submission generation
if __name__ == "__main__":
    test_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/test_images/mistery_cat"
    generate_submission(test_dir)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Some weights of SwinForImageClassification were not initialized from the model checkpoint at Emiel/cub-200-bird-classifier-swin and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([200]) in the checkpoint and torch.Size([20]) in the model instantiated
- classifier.weight: found shape torch.Size([200, 1536]) in the checkpoint and torch.Size([20, 1536]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load(model_path, map_location=device))
Some weights of SwinForImageClassification were not initialized fr


Validation Results:
Total Samples: 400
Number of unique file names: 400
Predicted category distribution:
class_idx
0     20
1     18
2     25
3     19
4     22
5     21
6     15
7     20
8     24
9     22
10    19
11    18
12    18
13    21
14    20
15    23
16    20
17    17
18    18
19    20
Name: count, dtype: int64

Submission CSV saved to: submission.csv
