Term Project

Team: Rain Price, Weston Scott

ECE 523 | Engineering Applications of Machine Learning and Data Analytics

Professor Abhijit Mahalanobis

# Vehicle and People Detection with the FLIR Thermal Dataset

![alt text](problemStatement.png)

## TODO

- Get simple training model working
- Get resulting images and predictions showing
- Run all images through model and check error
- Rewrite Resnet layers to be our own homegrown solution?
- Survive this class ....

## Import Libraries

In [None]:
import os
import random
import math
from datetime import datetime
from collections import Counter
import pandas as pd
import numpy as np

import cv2
from PIL import Image, ImageFilter
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET
from torchvision.transforms.functional import pad

import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchsummary import summary
from torch.autograd import Variable
from torchvision import transforms as T
from torchvision.transforms import ToTensor, transforms
from copy import copy
import json

## Check to see if input images are the same size

In [None]:
path = 'data/images_thermal_train/data'

imgs = os.listdir(path)
sizes = []
for img in imgs:
    filename = os.path.join(path, img)
    image = Image.open(filename)
    sizes.append(image.size)
print(sizes[-1])
np.unique(sizes, return_counts=True)

## Random Seed, Device Architecture, and Hyperparameters

In [None]:
%matplotlib inline
randomSeed = 2024
np.random.seed(randomSeed)
torch.manual_seed(randomSeed)

print(f'PyTorch Version: {torch.__version__}')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
print(f'PyTorch Device: {device}')

saveModel = True
loadModel = False
modelPath = './model.pt'
numWorkers = 1 
learnRate = 0.006 #2e-5
batchSize = 1
maxEpochs = 10
loadSavedModel = False

trackingLabels = ['person', 'car', 'background']
maxObjects = 83
numClasses = len(trackingLabels)

## Load Training/ Testing Data

In [None]:
trainPath = './data/images_thermal_train'
valPath = './data/images_thermal_val'
testPath = './data/video_thermal_test'
dataDir = 'data'
jsonFile = 'coco.json'

jsonFiles = { 
              'train' : os.path.join(trainPath, jsonFile),
              'val' : os.path.join(valPath, jsonFile),
              'test' : os.path.join(testPath, jsonFile)
            }

imagePaths = { 
              'train' : trainPath,
              'val' : valPath,
              'test' : testPath
            }

for key, val in jsonFiles.items():
    if os.path.isfile(val):
        print(f'coco.json Exists: {key}, {val}')
    
for key, val in imagePaths.items():
    if os.path.isdir(val):
        print(f'Data Directory Exists: {key}, {val}')
        
labelMap = {
            0:  'background',
            1:  'person',
            2:  'bike', #(renamed from "bicycle")
            3:  'car', #(this includes pick-up trucks and vans)
            4:  'motor', #(renamed from "motorcycle" for brevity)
            6:  'bus',
            7:  'train',
            8:  'truck', #(semi/freight truck, excluding pickup truck)
            10: 'light', #(renamed from "traffic light" for brevity)
            11: 'hydrant', #(renamed "fire hydrant" for brevity)
            12: 'sign', #(renamed from "street sign" for brevity)
            17: 'dog',
            18: 'deer',
            37: 'skateboard',
            73: 'stroller', #(four-wheeled carriage for a child, also called pram)
            75: 'scooter',
            79: 'other vehicle' #(less common vehicles like construction equipment and trailers)
          }

In [None]:
def create_mask(bb, x):
    """Creates a mask for the bounding box of same shape as image"""
    rows,cols,*_ = x.shape
    Y = np.zeros((rows, cols))
    bb = bb.astype(int)
    Y[bb[0]:bb[2], bb[1]:bb[3]] = 1.
    return Y

def mask_to_bb(Y):
    """Convert mask Y to a bounding box, assumes 0 as background nonzero object"""
    cols, rows = np.nonzero(Y)
    if len(cols)==0: 
        return np.zeros(4, dtype=np.float32)
    top_row = np.min(rows)
    left_col = np.min(cols)
    bottom_row = np.max(rows)
    right_col = np.max(cols)
    return np.array([left_col, top_row, right_col, bottom_row], dtype=np.float32)

In [None]:
class ThermalCocoDataset(Dataset):
    def __init__(self, json_file, image_dir, labels, labelMap, transform=None):
        self.json_file = json_file
        self.image_dir = image_dir
        self.transform = transform
        self.labels = labels
        self.labelMap = labelMap
        self._load_json()

    def _load_json(self):
        with open(self.json_file, 'r') as f:
            data = json.load(f)
        
        self.annotations = data['annotations']
        self.images = data['images']
            
    def _map_annotations_to_image(self, id, imageWidth, imageHeight):
        
        bboxes = []
        labels = []
        for entry in self.annotations:
            if entry['image_id'] == id:
                if (self.labelMap[entry['category_id']] in self.labels):
                    tmpLabel = self.labelMap[entry['category_id']]
                    tmpBox = copy(entry['bbox'])
                    tmpBox[0] = tmpBox[0] * (640 / imageWidth)
                    tmpBox[1] = tmpBox[1] * (640 / imageHeight)
                    tmpBox[2] = tmpBox[2] * (640 / imageWidth)
                    tmpBox[3] = tmpBox[3] * (640 / imageHeight)
                    bboxes.append(tmpBox)
                    labels.append(self.labels.index(tmpLabel))
        
        if bboxes == []:
            bboxes.append([0, 0, 640, 640])
            tmpLabel = self.labelMap[0]
            labels.append(self.labels.index(tmpLabel))
        
        return bboxes, labels
    
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        
        image = self.images[idx]
        image_file = os.path.join(self.image_dir, f"{image['file_name']}")
        img = cv2.imread(str(image_file), cv2.IMREAD_GRAYSCALE).astype(np.float32)
        img = cv2.resize(img, (640, 640))

        if self.transform:
            img = self.transform(img)
            
        bboxes, labels =  self._map_annotations_to_image(image['id'], image['width'], image['height'])
        numObjects = torch.tensor(len(labels))
        labels = torch.tensor(labels)
        labels = labels.squeeze()  # Remove extra dimensions
        bboxes = torch.tensor(bboxes, dtype=torch.float32)
        
        
        return img, labels, bboxes, numObjects

transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to PyTorch tensor
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize image
])

trainData = ThermalCocoDataset(jsonFiles['train'], imagePaths['train'], trackingLabels, labelMap, transform=transform)
valData = ThermalCocoDataset(jsonFiles['val'], imagePaths['val'], trackingLabels, labelMap, transform=transform)
testData = ThermalCocoDataset(jsonFiles['test'], imagePaths['test'], trackingLabels, labelMap, transform=transform)

trainLoader = DataLoader(trainData, batch_size=batchSize, shuffle=False, num_workers=numWorkers)
valLoader = DataLoader(valData, batch_size=batchSize, shuffle=False, num_workers=numWorkers)
testLoader = DataLoader(testData, batch_size=batchSize, shuffle=False, num_workers=numWorkers)

## Determine Max Objects in Image

In [None]:
# maxObjects = 1
# for loader in [trainLoader, valLoader, testLoader]:
#     for __, __, __, num in loader:
#         if num.item() > maxObjects:
#             maxObjects = num.item()
        
# numClasses = len(trackingLabels)

print(f'Maximum Number of Objects: {maxObjects}')
print(f'Number of Classes: {numClasses}')

## Helpful Function Definitions

In [None]:
def create_corner_rect(bb, color='red'):
    bb = np.array(bb, dtype=np.float32)
    return plt.Rectangle((bb[0], bb[1]), bb[2], bb[3], color=color,
                         fill=False, lw=1)

def show_corner_bb(im, bb, c=None, cLabel='', color='red', createFig=False):
    if createFig:
        plt.figure(figsize=(6,6))
        if not cLabel == '':
            plt.title(f'{cLabel} Class: {c}')
    plt.imshow(im.squeeze(), cmap="gray")
    plt.gca().add_patch(create_corner_rect(bb, color=color))
    
def plot_sample(image, labels, bboxes, num, trackingLabels):
    
    plt.imshow(image.squeeze(), cmap="gray")  # Convert (C, H, W) tensor to (H, W, C) for plotting
    plt.title(f'Number of Objects: {num}')
    try:
        for bbox, label in zip(bboxes, labels):
            x, y, w, h = bbox
            plt.gca().add_patch(plt.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none'))
            plt.text(x+2, y+25, f'{trackingLabels[label]}', color='r')
    except:
        x, y, w, h = bboxes[0]
        plt.gca().add_patch(plt.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none'))
        plt.text(x+2, y+25, f'{trackingLabels[labels]}', color='r')

    plt.axis('off')

## Sample Imagery From Training Data

### Original Images (Training)

In [None]:
figure = plt.figure(figsize=(12, 12))
plt.suptitle('Training Data Sample')
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    idx = torch.randint(len(trainData), size=(1,)).item()
    image, labels, bboxes, num = trainData[idx]
    figure.add_subplot(rows, cols, i)
    plot_sample(image, labels.tolist(), bboxes.tolist(), num.item(), trackingLabels)
plt.show()

### Original Images (Validation)

In [None]:
figure = plt.figure(figsize=(12, 12))
plt.suptitle('Validation Data Sample')
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    idx = torch.randint(len(valData), size=(1,)).item()
    image, labels, bboxes, num = valData[idx]
    figure.add_subplot(rows, cols, i)
    plot_sample(image, labels.tolist(), bboxes.tolist(), num.item(), trackingLabels)
plt.show()

### Original Images (Testing)

In [None]:
figure = plt.figure(figsize=(12, 12))
plt.suptitle('Testing Data Sample')
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    idx = torch.randint(len(testData), size=(1,)).item()
    image, labels, bboxes, num = testData[idx]
    figure.add_subplot(rows, cols, i)
    plot_sample(image, labels.tolist(), bboxes.tolist(), num.item(), trackingLabels)
plt.show()

## Model Definition

In [None]:
class ClassificationDetection(nn.Module):
    def __init__(self, num_classes, max_objects):
        super(ClassificationDetection, self).__init__()
        inplace = True
        resnet = models.resnet50(weights=None)
        
        self.relu = nn.ReLU(inplace=inplace)
        resnet.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1, bias=False)
        
        layers = list(resnet.children())[:6]
        self.features = nn.Sequential(*layers)
        
        self.num_classes = num_classes
        self.max_objects = max_objects
        
        self.classifier = nn.Sequential(nn.Linear(512, 64), 
                                         nn.ReLU(inplace=inplace), 
                                         nn.Linear(64, num_classes*max_objects))
        self.bb = nn.Sequential(nn.Linear(512, 64),
                                nn.ReLU(inplace=inplace),
                                nn.Linear(64, 4*max_objects))
        
    def forward(self, x, num_available_objects=None):
        x = self.features(x)
        x = self.relu(x)
        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        
        classifier_output = self.classifier(x)
        bbox_output = self.bb(x)
        
        if self.training:
            # During training, return predictions for the specified number of objects
            classifier = classifier_output[:, :self.num_classes*num_available_objects]
            bbox = bbox_output[:, :4*num_available_objects]
        else:
            # During evaluation, filter out predictions below a certain probability threshold
            classifier = classifier_output.view(-1, self.max_objects, self.num_classes)
            bbox = bbox_output.view(-1, self.max_objects, 4)
            probabilities = F.softmax(classifier, dim=2)
            # Apply thresholding to filter out predictions with low confidence
            thresholded_probabilities, indices = torch.max(probabilities, dim=2)
            mask = thresholded_probabilities > 0.5
            classifier = classifier[mask]
            bbox = bbox[mask]

        return classifier, bbox
    
model = ClassificationDetection(numClasses, maxObjects)
summary(model, (1,640,640))

model = model.cpu()

if device == 'cuda':
    model = model.to(device)

## Set Criterion and Optimizer

In [None]:
class DetectionLoss(nn.Module):
    def __init__(self):
        super(DetectionLoss, self).__init__()
        self.cls_loss = nn.CrossEntropyLoss() ## Classifier
        self.reg_loss = nn.SmoothL1Loss() ## Bounding Box

    def forward(self, pred_cls, pred_reg, target_cls, target_reg):
        classification_loss = self.cls_loss(pred_cls, target_cls)
        regression_loss = self.reg_loss(pred_reg, target_reg)
        return classification_loss + regression_loss
    
# Define optimizer and learning rate scheduler
optimizer = optim.Adam(model.parameters(), lr=learnRate)
print(f'Optimizer: {optimizer}')

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)  # Adjust the scheduler parameters
print(f'Scheduler: {scheduler}')

# Define the loss function
criterion = DetectionLoss()
print(f'\nCriterion: {criterion}')

criterion = criterion.to(device)

## Load Trained Model (If Needed)

In [None]:
if loadModel:
    model = ClassificationDetection(len(labelMap), len(labelMap))
    optimizer = optimizer
    checkpoint = torch.load(modelPath)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    criterion = checkpoint['loss']  

## Train Model

In [None]:
if not loadModel:
    trainLoss = []

    for epoch in range(1, maxEpochs+1):
        epochLoss = []
        testEpochLoss = []
        model.train()

        for i, (images, labels, bboxes, num) in enumerate(trainLoader):
            optimizer.zero_grad()
            
            if device == 'cuda':
                images = images.to(device)
                bboxes = bboxes.to(device)
                labels = labels.to(device) 
                num = num.to(device)
                        
            predLabels, predBoxes = model(images, num)
            predLabelsMax = torch.max(predLabels.view(num, numClasses), 1)[1].float()
            predBoxesMod = predBoxes.view(-1, num, 4)
            # print(predBoxes.view(-1, maxObjects, 4))
            loss = criterion(predLabelsMax, predBoxes, labels, bboxes)
            loss.backward()          
            optimizer.step()
            lossVal = loss.item()
            epochLoss.append(float(lossVal))

        trainLoss.append(np.mean(epochLoss))
        scheduler.step(trainLoss[-1])
                
        print(f'[Epoch: {epoch}/{maxEpochs}] Loss: {np.round(trainLoss[-1], 5)}')

        if saveModel and epoch % 5 == 0: ## save model every 5th epoch
            torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss,
                    }, modelPath)

## Save Model (If Needed)

In [None]:
# if saveModel and not loadModel:
#     torch.save({
#             'epoch': epoch,
#             'model_state_dict': model.state_dict(),
#             'optimizer_state_dict': optimizer.state_dict(),
#             'loss': criterion,
#             'loss_bbox': criterion_bbox,
#             }, modelPath)

## Learning Curve

In [None]:
# if not loadModel:
#     print(f'Final MSE ({maxEpochs} epochs): {losses[-1]}\n')
    
#     f = plt.figure(figsize=(10,8))
#     plt.plot(losses, label="train")
#     plt.xlabel("epochs")
#     plt.ylabel("cross entropy")
#     plt.title("Epochs vs. Loss Function")
#     plt.legend()
#     plt.grid()
#     plt.tight_layout()
#     plt.show()

## Sample Imagery of Model Output

### Training Images Samples

In [None]:
# trainImages, trainLabels, trainImagesFlipped = next(iter(trainLoader))
# samples = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# model.eval()
# with torch.no_grad():
#     outputs, __ = model(trainImages.to(device))

# j = 0
# for i, label in enumerate(trainLabels):
#     if label.item() in samples:
#         f = plt.figure(figsize=(12, 4))      
#         ax1 = f.add_subplot(131)
#         ax1.imshow(trainImages[i].squeeze(), cmap='gray')
#         ax1.axis('off')
#         ax1.set_title(f'Original - {label.item()}')
#         ax2 = f.add_subplot(132)
#         ax2.imshow(outputs[i].detach().cpu()[0].squeeze(), cmap='gray')
#         ax2.axis('off')
#         ax2.set_title(f'Reconstruction (Flipped) - {label.item()}')
#         plt.tight_layout()
#         plt.show()
        
#         samples.remove(label.item())
#         j += 1

### Test Images Samples

In [None]:
# testImages, testLabels, testImagesFlipped = next(iter(testLoader))
# samples = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# model.eval()
# with torch.no_grad():
#     outputs, __ = model(testImages.to(device))

# j = 0
# for i, label in enumerate(testLabels):
#     if label.item() in samples:
#         f = plt.figure(figsize=(12, 4))      
#         ax1 = f.add_subplot(131)
#         ax1.imshow(testImages[i].squeeze(), cmap='gray')
#         ax1.axis('off')
#         ax1.set_title(f'Original - {label.item()}')
#         ax2 = f.add_subplot(132)
#         ax2.imshow(outputs[i].detach().cpu()[0].squeeze(), cmap='gray')
#         ax2.axis('off')
#         ax2.set_title(f'Reconstruction (Flipped) - {label.item()}')
#         plt.tight_layout()
#         plt.show()
        
#         samples.remove(label.item())
#         j += 1