In [1]:
"""
Solution for D2K Audubon project

Create images for bird detection
"""

import numpy as np
import utils.data_processing as dp
import utils.data_vis as vis
from utils.global_const import *

In [2]:
#######################################################################################
# Part 1 - Data processing

def test_get_file_names():
    ''' Test get_file_names() '''
    csv_files = dp.get_file_names(DATA_PATH + 'raw/', 'csv')
    jpg_files = dp.get_file_names(DATA_PATH + 'raw/', 'jpg')
    FILES['dataset'] = {'jpg': jpg_files, 'csv': csv_files}

test_get_file_names()

In [3]:
def test_csv_to_df():
    ''' Test csv_to_df() '''
    file_name = FILES['dataset']['csv'][0]
    data_df = dp.csv_to_df(file_name, COL_NAMES)
    # print(data_df)
    
test_csv_to_df()

In [4]:
def test_concat_frames():
    ''' Test concat_frames() ''' 
    concated_frame =  dp.concat_frames(FILES['dataset']['csv'], COL_NAMES)
    # print(concated_frame)

test_concat_frames()

In [5]:
def test_add_col():
    ''' Test add_col() '''
    values_dict = {}
    for key, vals in GROUPS.items():
        for val in vals:
            values_dict[val] = key
    frame = dp.concat_frames(FILES['dataset']['csv'], COL_NAMES)
    FRAMES['combined annotations'] = dp.add_col(frame, 'group_id', 'class_id', values_dict)
    # print(FRAMES['combined annotations'])

test_add_col()

In [6]:
def test_read_jpg():
    ''' test read_jpg() '''
    file_name = FILES['dataset']['jpg'][0]
    dp.read_jpg(file_name)

test_read_jpg()

In [7]:
#######################################################################################
# Part 2 - Data visualization

def test_plot_distribution():
    ''' Test plot_distribution() ''' 
    vis.plot_distribution(FRAMES['combined annotations'], "class_id", 
                          ("Frequency", "Bird Species", "Bird Species Distribution"), PLOTS_PATH, filt=100)
    vis.plot_distribution(FRAMES['combined annotations'], "group_id", 
                          ("Frequency", "Bird Group", "Bird Group Distribution"), PLOTS_PATH)
    vis.plot_distribution(FRAMES['combined annotations'].loc[FRAMES['combined annotations']['group_id'] == 'BRPE'], "class_id", 
                          ("Frequency", "Bird Species", "BRPE Bird Species Distribution"), PLOTS_PATH)
    vis.plot_distribution(FRAMES['combined annotations'].loc[FRAMES['combined annotations']['group_id'] == 'LGHT'], "class_id", 
                          ("Frequency", "Bird Species", "LGHT Bird Species Distribution"), PLOTS_PATH)            

# test_plot_distribution()

In [8]:
def test_plot_boxes():
    ''' Test plot_boxes() ''' 
    vis.plot_boxes(FILES['dataset']['jpg'][10], FILES['dataset']['csv'][10], 'Annonations', PLOTS_PATH)

# test_plot_boxes()

In [9]:
#######################################################################################
# Part 3 - Dataloader

# Split the dataset into trainset, testset, and valset
def test_split_img_annos():
    ''' Test split_img_annos() '''
    FILES['trainset'], FILES['testset'], FILES['valset'] = dp.split_img_annos(
        FILES['dataset']['jpg'], FILES['dataset']['csv'], (0.8, 0.1, 0.1), seed=2023)

test_split_img_annos()

In [10]:
# cropping
# Make a dataloader

import torch
import torchvision
from torchvision.transforms import functional as F
from utils.data_loader import BirdDataset, collate_fn

In [11]:
# Train a model
# https://pytorch.org/vision/main/models/generated/torchvision.models.detection.fasterrcnn_resnet50_fpn.html#torchvision.models.detection.fasterrcnn_resnet50_fpn
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')
dataset = BirdDataset(FILES['dataset'], F.to_tensor)
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=collate_fn # collate_fn is important otherwise it raises an error
) 

# For Training
images, targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

output = model(images, targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)  
predictions

In [24]:
# use our dataset and defined transformations
trainset = BirdDataset(FILES['trainset'], F.to_tensor)
train_loader = torch.utils.data.DataLoader(
    trainset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=collate_fn # collate_fn is important otherwise it raises an error
) 

testset = BirdDataset(FILES['testset'], F.to_tensor)
test_loader = torch.utils.data.DataLoader(
    testset, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=collate_fn # collate_fn is important otherwise it raises an error
) 

In [32]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [30]:
def train_model(model, optimizer, train_loader, test_loader, n_epochs, device):
    ''' Train a model '''
    model = model.to(device)
    for epoch in range(n_epochs):
        print("Epoch:", epoch)
        model.train()
        epoch_loss = 0
        for images, targets in enumerate(train_loader):
            images = list(image.to(device) for image in images)
            targets = [{key: val.to(device) for key, val in target.items()} for target in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            epoch_loss += losses

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
        print("Loss:", epoch_loss)

In [31]:
train_model(model, optimizer, train_loader, test_loader, 10, device)

Epoch: 0


KeyboardInterrupt: 