In [50]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [52]:
train_data = pd.read_csv("./data/train_images.csv")
train_data['image_path'] = 'data' + train_data['image_path']

train_df, temp_df = train_test_split(train_data, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

print(f"Train: {len(train_df)}, Validation: {len(val_df)}, Test: {len(test_df)}")

Train: 2748, Validation: 589, Test: 589


In [53]:
class BirdieDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.data = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.data.iloc[index, 0])
        image = Image.open(img_path).convert("RGB")
        # 0-based indexing
        label = int(self.data.iloc[index, 1]) - 1  
        
        if self.transform:
            image = self.transform(image)
        return image, label

In [54]:
# tranformations
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [55]:
train_dataset = BirdieDataset(train_df, root_dir="", transform=train_transform)
val_dataset = BirdieDataset(val_df, root_dir="", transform=val_test_transform)
test_dataset = BirdieDataset(test_df, root_dir="", transform=val_test_transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [56]:
model = models.efficientnet_b0(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 200) 



In [57]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [58]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    best_acc = 0.0
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_acc = 100. * correct / total
        val_acc = evaluate_model(model, val_loader)

        print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss:.4f} - Train Acc: {train_acc:.2f}% - Val Acc: {val_acc:.2f}%")
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_model.pth")
    print("Training Complete. Best Validation Accuracy:", best_acc)

In [59]:
def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return 100. * correct / total

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10)

100%|█████████████████████████████████████████████| 86/86 [12:58<00:00,  9.05s/it]


Epoch 1/10 - Loss: 444.0411 - Train Acc: 2.73% - Val Acc: 9.00%


100%|█████████████████████████████████████████████| 86/86 [12:24<00:00,  8.66s/it]


Epoch 2/10 - Loss: 389.3887 - Train Acc: 18.74% - Val Acc: 19.69%


100%|█████████████████████████████████████████████| 86/86 [11:32<00:00,  8.05s/it]


Epoch 3/10 - Loss: 322.3734 - Train Acc: 32.57% - Val Acc: 29.71%


100%|█████████████████████████████████████████████| 86/86 [12:03<00:00,  8.42s/it]


Epoch 4/10 - Loss: 265.7648 - Train Acc: 42.36% - Val Acc: 35.65%


100%|█████████████████████████████████████████████| 86/86 [12:05<00:00,  8.44s/it]


Epoch 5/10 - Loss: 218.4249 - Train Acc: 51.31% - Val Acc: 41.09%


100%|█████████████████████████████████████████████| 86/86 [11:40<00:00,  8.15s/it]


Epoch 6/10 - Loss: 182.2168 - Train Acc: 59.02% - Val Acc: 43.12%


100%|█████████████████████████████████████████████| 86/86 [11:25<00:00,  7.97s/it]


Epoch 7/10 - Loss: 151.4660 - Train Acc: 66.78% - Val Acc: 48.05%


100%|█████████████████████████████████████████████| 86/86 [12:00<00:00,  8.38s/it]


Epoch 8/10 - Loss: 127.5468 - Train Acc: 72.53% - Val Acc: 52.46%


100%|█████████████████████████████████████████████| 86/86 [12:56<00:00,  9.03s/it]


Epoch 9/10 - Loss: 108.3412 - Train Acc: 77.91% - Val Acc: 52.29%


100%|█████████████████████████████████████████████| 86/86 [12:16<00:00,  8.56s/it]


Epoch 10/10 - Loss: 89.5271 - Train Acc: 83.30% - Val Acc: 54.50%
Training Complete. Best Validation Accuracy: 54.49915110356537


In [62]:
test_data = pd.read_csv("./data/test_images_path.csv")
class_names = []
for name in np.load("./data/class_names.npy", allow_pickle=True).item():
    class_names.append(name)

In [63]:
predictions = []

# Predict labels for all test images
for _, row in test_data.iterrows():
    image_path = f"./data/{row['image_path']}"  # Construct the full path to the image

    # Open and preprocess the image
    image = Image.open(image_path).convert("RGB")
    input_tensor = val_test_transform(image).unsqueeze(0).to(device)  # Add batch dimension and move to device

    # Predict using the model
    output = model(input_tensor)
    _, predicted_class = torch.max(output, 1)

    # Map predicted class index to class name
    predicted_label = class_names[predicted_class.item()]
    predictions.append(predicted_label)

# Save predictions to a new CSV file
test_data["predicted_label"] = predictions

In [66]:
submission = test_data[['id', 'predicted_label']]
submission['label'] = [string[0:3] for string in submission['predicted_label']]
submission['label'] = pd.to_numeric(submission['label'])
submission = submission[['id','label']]
submission.index = pd.Index(range(1, len(submission) + 1))
submission.index.name = 'id'
submission = submission[['label']] 
submission

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission['label'] = [string[0:3] for string in submission['predicted_label']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission['label'] = pd.to_numeric(submission['label'])


Unnamed: 0_level_0,label
id,Unnamed: 1_level_1
1,67
2,39
3,74
4,12
5,74
...,...
3996,114
3997,95
3998,135
3999,62
