In [15]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset, Subset
import numpy as np
from sklearn.model_selection import train_test_split
import torchvision.models as models
from torchvision.models import ResNet34_Weights
import tqdm
import time
from torch.optim import lr_scheduler

In [3]:
# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Load CSV data
train_df = pd.read_csv("./data/train_images.csv")
test_df = pd.read_csv("./data/test_images_path.csv")

train_df['image_path'] = 'data' + train_df['image_path']
test_df['image_path'] = 'data' + test_df['image_path']

# Paths
train_dir = "./data/train_images"  # Directory for training images
test_dir = "./data/test_images"    # Directory for test images

# Transformations
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.0, 0.0, 0.0), (1.0/255.0, 1.0/255.0, 1.0/255.0))
])

test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.0, 0.0, 0.0), (1.0/255.0, 1.0/255.0, 1.0/255.0))
])

Using cpu device


In [5]:
def prepare_data(df, transform):
    images = []
    labels = []
    for _, row in df.iterrows():
        image = Image.open(row['image_path']).convert("RGB")
        image = transform(image)
        images.append(image)
        labels.append(row['label'] - 1)
    images = torch.stack(images)
    labels = torch.tensor(labels)
    return images, labels

train_images, train_labels = prepare_data(train_df, train_transform)

In [6]:
train_labels.max()

tensor(199)

In [23]:
# Splitting into training and validation sets
train_indices, val_indices = train_test_split(range(len(train_images)), test_size=0.2, random_state=42)
train_dataset = Subset(TensorDataset(train_images, train_labels), train_indices)
val_dataset = Subset(TensorDataset(train_images, train_labels), val_indices)
# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
batch_size = 32

In [25]:
# Training the model
def train_model(model, criterion, optimizer, scheduler, n_epochs):
    losses = []
    accuracies = []
    valid_accuracies = []

    # set the model to train mode initially
    model.train()
    for epoch in tqdm.tqdm(range(n_epochs)):
        since = time.time()
        running_loss = 0.0
        running_correct = 0.0
        for i, data in enumerate(train_loader, 0):
            
            # get the inputs and assign them to cuda
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # calculate the loss/acc later
            running_loss += loss.item()
            running_correct += (labels==predicted).sum().item()

        epoch_duration = time.time() - since
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100.0 / batch_size * running_correct / len(train_loader)
        print("\nEpoch %s, duration: %d s, loss: %.4f, acc: %.4f" % (epoch+1, epoch_duration, epoch_loss, epoch_acc))

        losses.append(epoch_loss)
        accuracies.append(epoch_acc)

        # switch the model to eval mode
        model.eval()
        valid_acc = eval_model(model)
        valid_accuracies.append(valid_acc)

        # re-set the model to train mode
        model.train()
        scheduler.step()
        since = time.time()
    print('Finished Training')
    return model, losses, accuracies, valid_accuracies

In [27]:
# Validation function
def eval_model(model):
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    valid_acc = 100.0 * correct / total
    print('\nAccuracy of the network on the valid images: %.4f %%' % (
        valid_acc))
    return valid_acc

In [29]:
# Define parameters
bird_classes = 200
model = models.resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, bird_classes)

# Define the loss function
criterion = nn.CrossEntropyLoss()
# Define the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# Define the learning rate scheduler
lrscheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [31]:
model_trained, losses, accuracies, valid_accuracies = train_model(model, criterion, optimizer, lrscheduler, n_epochs=5)

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]


Epoch 1, duration: 602 s, loss: 5.1142, acc: 4.7832


 20%|████████████████▌                                                                  | 1/5 [10:49<43:16, 649.03s/it]


Accuracy of the network on the valid images: 11.5776 %

Epoch 2, duration: 966 s, loss: 3.9572, acc: 28.5077


 40%|█████████████████████████████████▏                                                 | 2/5 [27:39<43:05, 861.84s/it]


Accuracy of the network on the valid images: 25.5725 %

Epoch 3, duration: 846 s, loss: 3.1104, acc: 46.1735


 60%|█████████████████████████████████████████████████▊                                 | 3/5 [43:23<29:58, 899.33s/it]


Accuracy of the network on the valid images: 32.9517 %

Epoch 4, duration: 1060 s, loss: 2.4800, acc: 59.5344


 80%|████████████████████████████████████████████████████████████████▊                | 4/5 [1:01:49<16:20, 980.96s/it]


Accuracy of the network on the valid images: 37.4046 %

Epoch 5, duration: 773 s, loss: 1.9870, acc: 68.0804


100%|█████████████████████████████████████████████████████████████████████████████████| 5/5 [1:16:03<00:00, 912.62s/it]


Accuracy of the network on the valid images: 43.1298 %
Finished Training



