# Libraries

In [1]:
! pip install transformers timm albumentations --quiet
! pip install ipywidgets --quiet

In [2]:
# Standard Libraries
import os
import random
from collections import Counter

# Numerical and Data Handling Libraries
import numpy as np
import pandas as pd

# Image Processing Libraries
import cv2
from PIL import Image

# Plotting and Visualization
import matplotlib.pyplot as plt

# PyTorch and Torchvision
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torchvision import transforms
from torchvision.datasets import ImageFolder

# Transformers for Vision Models
from transformers import ViTConfig, ViTForImageClassification, ViTImageProcessor

# Utility Libraries
from tqdm import tqdm
from sklearn.metrics import accuracy_score

# jupyter nbextension enable --py widgetsnbextension
from google.colab import output
output.enable_custom_widget_manager()

# Data Import

In [3]:
train_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/train_images"
val_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/val_images"
test_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/test_images/mistery_cat"

In [4]:
import torch
from transformers import AutoImageProcessor, AutoModel
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import os

# Directories
train_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/train_images"
val_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/val_images"

# Hyperparameters
batch_size = 16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
learning_rate = 0.001

# Data Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Datasets and Dataloaders
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Load DINOv2 Model
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-small")
model = AutoModel.from_pretrained("facebook/dinov2-small").to(device)

def extract_features(data_loader, model, processor):
    model.eval()
    features, labels_list = [], []
    with torch.no_grad():
        for images, labels in data_loader:
            images = [transforms.ToPILImage()(img) for img in images]
            inputs = processor(images=images, return_tensors="pt").to(device)
            outputs = model(**inputs)
            features.append(outputs.last_hidden_state[:, 0, :])  # Extract [CLS] token representation
            labels_list.append(labels.to(device))  # Ensure labels are on the same device
    return torch.cat(features), torch.cat(labels_list)

# Extract features for training and validation sets
train_features, train_labels = extract_features(train_loader, model, processor)
val_features, val_labels = extract_features(val_loader, model, processor)

preprocessor_config.json:   0%|          | 0.00/436 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/88.2M [00:00<?, ?B/s]

In [5]:
epochs = 50

# Define MLP Classifier
target_classes = len(train_dataset.classes)
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, num_classes)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

classifier = MLPClassifier(train_features.shape[1], target_classes).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=learning_rate)

# Training loop
def train_classifier(classifier, train_features, train_labels, val_features, val_labels, criterion, optimizer, epochs):
    for epoch in range(epochs):
        classifier.train()
        optimizer.zero_grad()
        outputs = classifier(train_features.to(device))  # Ensure inputs are on the same device
        loss = criterion(outputs, train_labels.to(device))  # Ensure labels are on the same device
        loss.backward()
        optimizer.step()
        
        classifier.eval()
        with torch.no_grad():
            val_outputs = classifier(val_features.to(device))
            val_loss = criterion(val_outputs, val_labels.to(device))
            val_acc = (val_outputs.argmax(dim=1) == val_labels.to(device)).float().mean().item()
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Acc: {val_acc:.4f}")

# Train the classifier
train_classifier(classifier, train_features, train_labels, val_features, val_labels, criterion, optimizer, epochs)

# Save trained classifier
torch.save(classifier.state_dict(), "mlp_classifier.pth")
print("MLP Classifier training completed and saved.")

Epoch 1/50, Loss: 3.0944, Val Loss: 2.1267, Val Acc: 0.5551
Epoch 2/50, Loss: 2.0988, Val Loss: 1.3853, Val Acc: 0.8136
Epoch 3/50, Loss: 1.3442, Val Loss: 0.8928, Val Acc: 0.8686
Epoch 4/50, Loss: 0.8399, Val Loss: 0.6000, Val Acc: 0.8983
Epoch 5/50, Loss: 0.5402, Val Loss: 0.4317, Val Acc: 0.9237
Epoch 6/50, Loss: 0.3691, Val Loss: 0.3343, Val Acc: 0.9280
Epoch 7/50, Loss: 0.2703, Val Loss: 0.2771, Val Acc: 0.9322
Epoch 8/50, Loss: 0.2113, Val Loss: 0.2421, Val Acc: 0.9407
Epoch 9/50, Loss: 0.1732, Val Loss: 0.2192, Val Acc: 0.9407
Epoch 10/50, Loss: 0.1460, Val Loss: 0.2026, Val Acc: 0.9322
Epoch 11/50, Loss: 0.1251, Val Loss: 0.1894, Val Acc: 0.9322
Epoch 12/50, Loss: 0.1079, Val Loss: 0.1782, Val Acc: 0.9322
Epoch 13/50, Loss: 0.0931, Val Loss: 0.1690, Val Acc: 0.9322
Epoch 14/50, Loss: 0.0805, Val Loss: 0.1619, Val Acc: 0.9280
Epoch 15/50, Loss: 0.0701, Val Loss: 0.1566, Val Acc: 0.9280
Epoch 16/50, Loss: 0.0617, Val Loss: 0.1529, Val Acc: 0.9280
Epoch 17/50, Loss: 0.0550, Val Lo

# Test and Generate Submission File

In [6]:
# import os
# import torch
# import pandas as pd
# from PIL import Image
# from torch.utils.data import Dataset, DataLoader
# from torchvision import transforms
# from transformers import AutoImageProcessor, SwinForImageClassification

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# submission_class_order = [
#     'Groove_billed_Ani',
#     'Red_winged_Blackbird',
#     'Rusty_Blackbird',
#     'Gray_Catbird',
#     'Brandt_Cormorant',
#     'Eastern_Towhee',
#     'Indigo_Bunting',
#     'Brewer_Blackbird',
#     'Painted_Bunting',
#     'Bobolink',
#     'Lazuli_Bunting',
#     'Yellow_headed_Blackbird',
#     'American_Crow',
#     'Fish_Crow',
#     'Brown_Creeper',
#     'Yellow_billed_Cuckoo',
#     'Yellow_breasted_Chat',
#     'Black_billed_Cuckoo',
#     'Gray_crowned_Rosy_Finch',
#     'Bronzed_Cowbird'
# ]

# model_dir = "/kaggle/working/swin_model" 
# processor = AutoImageProcessor.from_pretrained(model_dir)

# expected_size = 384
# val_transform = transforms.Compose([
#     transforms.Resize((expected_size, expected_size)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
# ])

# class CompetitionTestDataset(Dataset):
#     def __init__(self, test_dir, transform=None):
#         self.test_dir = test_dir
#         self.image_files = sorted(os.listdir(test_dir))  # 保持严格顺序
#         self.image_paths = [os.path.join(test_dir, f) for f in self.image_files]
#         self.transform = transform
        
#     def __len__(self):
#         return len(self.image_paths)
    
#     def __getitem__(self, idx):
#         image = Image.open(self.image_paths[idx]).convert('RGB')
#         if self.transform:
#             image = self.transform(image)
#         return image, os.path.basename(self.image_paths[idx])

# def load_trained_model(model_dir, class_order):
#     model = SwinForImageClassification.from_pretrained(model_dir).to(device)
    
#     config = model.config
#     if config.id2label != {i: cls for i, cls in enumerate(class_order)}:
#         print("Warning: The order of categories in the model configuration is inconsistent with the submission requirements. Overriding with submission order.")
#         config.id2label = {i: cls for i, cls in enumerate(class_order)}
#         config.label2id = {cls: i for i, cls in enumerate(class_order)}
    
#     model.eval()
#     return model

# def generate_submission(test_dir, model_dir, output_csv="submission.csv"):
#     test_dataset = CompetitionTestDataset(test_dir, transform=val_transform)
#     test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)
    
#     model = load_trained_model(model_dir, submission_class_order)
    
#     filenames = []
#     predictions = []
    
#     with torch.no_grad():
#         for images, paths in test_loader:
#             outputs = model(images.to(device))
#             batch_preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
#             filenames.extend(paths)
#             predictions.extend(batch_preds.tolist())
    
#     submission_df = pd.DataFrame({
#         'path': filenames,
#         'class_idx': predictions
#     })
    
#     print("\nValidation Results:")
#     print(f"Total Samples: {len(submission_df)}")
#     print(f"Number of unique file names: {submission_df['path'].nunique()}")
#     print(f"Predicted category distribution:\n{submission_df['class_idx'].value_counts().sort_index()}")
    
#     submission_df.to_csv(output_csv, index=False)
#     print(f"\nSubmission CSV saved to: {output_csv}")

# if __name__ == "__main__":
#     test_dir = "/kaggle/input/croped-processed-augmented-bird-dataset/CropImage_Dataset/CropImage_Dataset/test_images/mistery_cat"
#     generate_submission(test_dir, model_dir)