In [1]:
# https://www.kaggle.com/datasets/gpiosenka/100-bird-species

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as img
import torch
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import csv

In [84]:
import cv2

def image_loader(data_dict):
    # Load the image using OpenCV
    images = []
    for filepath in data_dict['filepaths']:
        img = cv2.imread(filepath)
        images.append(img)
    # Convert the image to a tensor
    img_tensors = [torch.from_numpy(img) for img in images]

    labels = [i for i in train_data['class_id']]

    return img_tensors, labels

In [2]:
tabulated_data = pd.read_csv('birds.csv')
tabulated_data.head()

Unnamed: 0,class id,filepaths,labels,scientific label,data set
0,0,train/ABBOTTS BABBLER/001.jpg,ABBOTTS BABBLER,Malacocincla abbotti,train
1,0,train/ABBOTTS BABBLER/002.jpg,ABBOTTS BABBLER,Malacocincla abbotti,train
2,0,train/ABBOTTS BABBLER/003.jpg,ABBOTTS BABBLER,Malacocincla abbotti,train
3,0,train/ABBOTTS BABBLER/004.jpg,ABBOTTS BABBLER,Malacocincla abbotti,train
4,0,train/ABBOTTS BABBLER/005.jpg,ABBOTTS BABBLER,Malacocincla abbotti,train


In [3]:
tabulated_data.rename({'class id':'class_id'}, axis = 1, inplace=True)

#Look for train/test data only
tabulated_data = tabulated_data[(tabulated_data['data set'] == 'train')|(tabulated_data['data set'] == 'test')]
#Reduce Classes for testing purpose
filtered_data = tabulated_data[tabulated_data['class_id'].isin(range(0,10))]

df = filtered_data.sample(frac = 0.1).reset_index(drop = True)

In [4]:
# Set the device to run on
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# Transformations to apply to images

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [95]:
#Passing split data through dictionary

df_train = df[df['data set'] == 'train'].copy()
df_test = df[df['data set'] == 'test'].copy()
df_train = df_train[['filepaths','class_id']]
df_test = df_test[['filepaths','class_id']]

train_data = df_train.to_dict('list')
test_data = df_test.to_dict('list')

filepaths = [train_data['filepaths'] for data in train_data]

In [105]:
# Create the datasets
train_dataset = torchvision.datasets.ImageFolder(root = 'train', transform=transform)

In [110]:
train_dataset.Sub

Dataset ImageFolder
    Number of datapoints: 70626
    Root location: train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [107]:
train_dataset[1]

(tensor([[[-1.4843, -1.4500, -1.4500,  ..., -1.2274, -1.2274, -1.1932],
          [-1.4843, -1.4500, -1.4672,  ..., -1.1932, -1.2103, -1.2103],
          [-1.4672, -1.4500, -1.4672,  ..., -1.1589, -1.1932, -1.2103],
          ...,
          [ 0.5193, -0.0116, -0.6281,  ..., -1.0904, -1.1075, -1.1418],
          [-0.2684, -0.6623, -0.2171,  ..., -1.0904, -1.1075, -1.1418],
          [ 0.0056,  0.6221,  1.0844,  ..., -1.0904, -1.0904, -1.1247]],
 
         [[-0.9503, -0.9153, -0.9503,  ..., -0.6176, -0.6001, -0.5651],
          [-0.9503, -0.9153, -0.9328,  ..., -0.5826, -0.5826, -0.5826],
          [-0.9328, -0.9153, -0.9328,  ..., -0.5476, -0.5651, -0.5826],
          ...,
          [ 0.2227, -0.3550, -1.0728,  ..., -0.5126, -0.5476, -0.5826],
          [-0.7052, -1.1253, -0.6877,  ..., -0.5126, -0.5476, -0.5826],
          [-0.5126,  0.1527,  0.6254,  ..., -0.5126, -0.5301, -0.5651]],
 
         [[-1.2293, -1.1944, -1.2119,  ..., -0.9330, -0.9678, -0.9330],
          [-1.2293, -1.1944,

In [109]:
train_dataset[999]

(tensor([[[ 1.2043,  1.1872,  1.1700,  ...,  1.0159,  1.0331,  1.0159],
          [ 1.2043,  1.2043,  1.1872,  ...,  0.9817,  0.9988,  0.9817],
          [ 1.2043,  1.2214,  1.2214,  ...,  0.9303,  0.9646,  0.9474],
          ...,
          [-0.4739, -0.2856, -0.7822,  ..., -0.1143, -0.2513,  0.1254],
          [-0.2171, -0.8678, -0.9192,  ..., -0.7479, -0.1143, -0.1143],
          [-0.9877, -1.0048, -1.3302,  ..., -0.0629,  0.2624, -0.3541]],
 
         [[ 1.1856,  1.1681,  1.1506,  ...,  0.9930,  1.0105,  0.9930],
          [ 1.1856,  1.1856,  1.1681,  ...,  0.9580,  0.9755,  0.9580],
          [ 1.1856,  1.2031,  1.2031,  ...,  0.9055,  0.9405,  0.9230],
          ...,
          [-0.4951, -0.3200, -0.8627,  ...,  0.2927,  0.1352,  0.5903],
          [-0.1975, -0.9328, -1.0028,  ..., -0.3725,  0.2577,  0.2752],
          [-0.9853, -1.0203, -1.4405,  ...,  0.2927,  0.6254,  0.0301]],
 
         [[ 0.9668,  0.9494,  0.9319,  ...,  0.2871,  0.3045,  0.3219],
          [ 0.9668,  0.9668,