Term Project

Team: Rain Price, Weston Scott

ECE 523 | Engineering Applications of Machine Learning and Data Analytics

Professor Abhijit Mahalanobis

# Vehicle and People Detection with the FLIR Thermal Dataset

![alt text](problemStatement.png)

## TODO

- Get simple training model working
- Get resulting images and predictions showing
- Run all images through model and check error
- Rewrite Resnet layers to be our own homegrown solution?
- Survive this class ....

## Import Libraries

In [None]:
import os
import random
import math
from datetime import datetime
from collections import Counter
import pandas as pd
import numpy as np

import cv2
from PIL import Image, ImageFilter
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET
from torchvision.transforms.functional import pad

import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchsummary import summary
from torch.autograd import Variable
from torchvision import transforms as T
from torchvision.transforms import ToTensor, transforms
from copy import copy
import json

## Check to see if input images are the same size

In [None]:
path = 'data/images_thermal_train/data'

imgs = os.listdir(path)
sizes = []
for img in imgs:
    filename = os.path.join(path, img)
    image = Image.open(filename)
    sizes.append(image.size)
print(sizes[-1])
np.unique(sizes, return_counts=True)

## Random Seed, Device Architecture, and Hyperparameters

In [None]:
%matplotlib inline
randomSeed = 2024
np.random.seed(randomSeed)
torch.manual_seed(randomSeed)

print(f'PyTorch Version: {torch.__version__}')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
print(f'PyTorch Device: {device}')

saveModel = True
loadModel = False
modelPath = './model.pt'
numWorkers = 1 
learnRate = 0.006 #2e-5
batchSize = 1
maxEpochs = 10
loadSavedModel = False

trackingLabels = ['person', 'car']

## Load Training/ Testing Data

In [None]:
trainPath = './data/images_thermal_train'
valPath = './data/images_thermal_val'
testPath = './data/video_thermal_test'
dataDir = 'data'
jsonFile = 'coco.json'

jsonFiles = { 
              'train' : os.path.join(trainPath, jsonFile),
              'val' : os.path.join(valPath, jsonFile),
              'test' : os.path.join(testPath, jsonFile)
            }

imagePaths = { 
              'train' : trainPath,
              'val' : valPath,
              'test' : testPath
            }

for key, val in jsonFiles.items():
    if os.path.isfile(val):
        print(f'coco.json Exists: {key}, {val}')
    
for key, val in imagePaths.items():
    if os.path.isdir(val):
        print(f'Data Directory Exists: {key}, {val}')
        
labelMap = {
            0:  'background',
            1:  'person',
            2:  'bike', #(renamed from "bicycle")
            3:  'car', #(this includes pick-up trucks and vans)
            4:  'motor', #(renamed from "motorcycle" for brevity)
            6:  'bus',
            7:  'train',
            8:  'truck', #(semi/freight truck, excluding pickup truck)
            10: 'light', #(renamed from "traffic light" for brevity)
            11: 'hydrant', #(renamed "fire hydrant" for brevity)
            12: 'sign', #(renamed from "street sign" for brevity)
            17: 'dog',
            37: 'skateboard',
            73: 'stroller', #(four-wheeled carriage for a child, also called pram)
            77: 'scooter',
            79: 'other vehicle' #(less common vehicles like construction equipment and trailers)
          }

In [None]:
class ThermalCocoDataset(Dataset):
    def __init__(self, json_file, image_dir, labels, labelMap, transform=None):
        self.json_file = json_file
        self.image_dir = image_dir
        self.transform = transform
        self.labels = labels
        self.labelMap = labelMap
        self._load_json()

    def _load_json(self):
        with open(self.json_file, 'r') as f:
            data = json.load(f)
        
        self.annotations = data['annotations']
        self.images = data['images']
            
    def _map_annotations_to_image(self, id, imageWidth, imageHeight):
        
        bboxes = []
        labels = []
        for entry in self.annotations:
            if entry['image_id'] == id:
                if (self.labelMap[entry['category_id']] in self.labels):
                    tmpBox = copy(entry['bbox'])
                    tmpBox[0] = tmpBox[0] * (640 / imageWidth)
                    tmpBox[1] = tmpBox[1] * (640 / imageHeight)
                    tmpBox[2] = tmpBox[2] * (640 / imageWidth)
                    tmpBox[3] = tmpBox[3] * (640 / imageHeight)
                    bboxes.append(tmpBox)
                    labels.append(entry['category_id'])
        
        if bboxes == []:
            bboxes.append([0, 0, 640, 640])
            labels.append(0)
        
        return bboxes, labels
    
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        
        image = self.images[idx]
        image_file = os.path.join(self.image_dir, f"{image['file_name']}")
        img = cv2.imread(str(image_file), cv2.IMREAD_GRAYSCALE).astype(np.float32)
        img = cv2.resize(img, (640, 640))

        if self.transform:
            img = self.transform(img)
            
        bboxes, labels =  self._map_annotations_to_image(image['id'], image['width'], image['height']) 
        labels = torch.tensor(labels)
        labels = labels.squeeze()  # Remove extra dimensions
        bboxes = torch.tensor(bboxes, dtype=torch.float32)
        
        return img, labels, bboxes

transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to PyTorch tensor
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize image
])

trainData = ThermalCocoDataset(jsonFiles['train'], imagePaths['train'], trackingLabels, labelMap, transform=transform)
valData = ThermalCocoDataset(jsonFiles['val'], imagePaths['val'], trackingLabels, labelMap, transform=transform)
testData = ThermalCocoDataset(jsonFiles['test'], imagePaths['test'], trackingLabels, labelMap, transform=transform)

trainLoader = DataLoader(trainData, batch_size=batchSize, shuffle=True, num_workers=numWorkers)
valLoader = DataLoader(valData, batch_size=batchSize, shuffle=True, num_workers=numWorkers)
testLoader = DataLoader(testData, batch_size=batchSize, shuffle=False, num_workers=numWorkers)

## Helpful Function Definitions

In [None]:
def create_corner_rect(bb, color='red'):
    bb = np.array(bb, dtype=np.float32)
    return plt.Rectangle((bb[0], bb[1]), bb[2], bb[3], color=color,
                         fill=False, lw=1)

def show_corner_bb(im, bb, c=None, cLabel='', color='red', createFig=False):
    if createFig:
        plt.figure(figsize=(6,6))
        if not cLabel == '':
            plt.title(f'{cLabel} Class: {c}')
    plt.imshow(im.squeeze(), cmap="gray")
    plt.gca().add_patch(create_corner_rect(bb, color=color))
    
def plot_sample(image, labels, bboxes, labelMap):
    
    plt.imshow(image.squeeze(), cmap="gray")  # Convert (C, H, W) tensor to (H, W, C) for plotting
    try:
        for bbox, label in zip(bboxes, labels):
            x, y, w, h = bbox
            plt.gca().add_patch(plt.Rectangle((x, y), w, h, linewidth=2, edgecolor='r', facecolor='none'))
            plt.text(x, y - 5, f'{labelMap[label]}', color='r')
    except:
        x, y, w, h = bboxes[0]
        plt.gca().add_patch(plt.Rectangle((x, y), w, h, linewidth=2, edgecolor='r', facecolor='none'))
        plt.text(x, y - 5, f'{labelMap[labels]}', color='r')

    plt.axis('off')

## Sample Imagery From Training Data

### Original Images (Training)

In [None]:
figure = plt.figure(figsize=(12, 12))
plt.suptitle('Training Data Sample')
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    idx = torch.randint(len(trainData), size=(1,)).item()
    image, labels, bboxes = trainData[idx]
    figure.add_subplot(rows, cols, i)
    plot_sample(image, labels.tolist(), bboxes.tolist(), labelMap)
plt.show()

### Original Images (Validation)

In [None]:
figure = plt.figure(figsize=(12, 12))
plt.suptitle('Validation Data Sample')
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    idx = torch.randint(len(valData), size=(1,)).item()
    image, labels, bboxes = valData[idx]
    figure.add_subplot(rows, cols, i)
    plot_sample(image, labels.tolist(), bboxes.tolist(), labelMap)
plt.show()

### Original Images (Testing)

In [None]:
figure = plt.figure(figsize=(12, 12))
plt.suptitle('Testing Data Sample')
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    idx = torch.randint(len(testData), size=(1,)).item()
    image, labels, bboxes = testData[idx]
    figure.add_subplot(rows, cols, i)
    plot_sample(image, labels.tolist(), bboxes.tolist(), labelMap)
plt.show()

## Model Definition

In [None]:
# # Define the backbone network
# class Backbone(nn.Module):
#     def __init__(self):
#         super(Backbone, self).__init__()
#         # Convolutional layers for feature extraction
#         self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
#         self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
#         self.conv4 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

#     def forward(self, x):
#         # Feature extraction
#         x = F.relu(self.conv1(x))
#         x = F.max_pool2d(x, kernel_size=2, stride=2)
#         x = F.relu(self.conv2(x))
#         x = F.max_pool2d(x, kernel_size=2, stride=2)
#         x = F.relu(self.conv3(x))
#         x = F.max_pool2d(x, kernel_size=2, stride=2)
#         x = F.relu(self.conv4(x))
#         return x

# # Define the region proposal network
# class RegionProposalNetwork(nn.Module):
#     def __init__(self):
#         super(RegionProposalNetwork, self).__init__()
#         # Anchor layer
#         self.anchor_layer = nn.Conv2d(128, 9 * 4, kernel_size=1)

#     def forward(self, x):
#         # Generate anchor boxes
#         anchors = self.anchor_layer(x)
#         return anchors

# # Define the object detector model
# class ObjectDetector(nn.Module):
#     def __init__(self, num_classes):
#         super(ObjectDetector, self).__init__()
#         self.backbone = Backbone()
#         self.rpn = RegionProposalNetwork()
#         # Classifier and bbox regressor
#         self.cls_layer = nn.Linear(128 * 80 * 80, num_classes)  # Adjust the input size
#         self.bbox_layer = nn.Linear(128 * 80 * 80, num_classes * 4)  # Adjust the input size

#     def forward(self, x):
#         # Backbone feature extraction
#         features = self.backbone(x)
#         # Region Proposal Network
#         anchors = self.rpn(features)
#         # Flatten the features
#         features_flat = features.view(features.size(0), -1)
#         # Classifier and bbox regressor
#         cls_scores = self.cls_layer(features_flat)
#         bbox_deltas = self.bbox_layer(features_flat)
#         return cls_scores, bbox_deltas, anchors

# # Generate summary for ObjectDetector
# summary(ObjectDetector(len(trackingLabels)), (1, 640, 640))
# model = ObjectDetector(len(trackingLabels))

In [None]:
class ClassificationDetection(nn.Module):
    def __init__(self, num_classes):
        super(ClassificationDetection, self).__init__()
        inplace = True
        resnet = models.resnet50(weights=None)  # Fix typo here
        
        self.relu = nn.ReLU(inplace=inplace)
        resnet.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1, bias=False)
        
        layers = list(resnet.children())[:6]
        self.features = nn.Sequential(*layers)
        
        self.classifier = nn.Sequential(nn.Linear(512, 64), 
                                         nn.ReLU(inplace=inplace), 
                                         nn.Linear(64, num_classes), 
                                         nn.Softmax(dim=1))
        self.bb = nn.Sequential(nn.Linear(512, 64),
                                nn.ReLU(inplace=inplace),
                                nn.Linear(64, 4*num_classes))
        
    def forward(self, x):
        x = self.features(x)
        x = self.relu(x)
        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        classifier = self.classifier(x)
        print(classifier.shape)
        bbox = self.bb(x)
        return classifier, bbox
    
summary(ClassificationDetection(len(trackingLabels)), (1,640,640))

model = ClassificationDetection(len(trackingLabels))


## Set Criterion and Optimizer

In [None]:
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=learnRate)
print(f'Optimizer: {optimizer}')

criterion_classifier = nn.CrossEntropyLoss()   
print(f'\nCriterion: {criterion_classifier}')

criterion_bbox = nn.SmoothL1Loss()   
print(f'\nCriterion bbox: {criterion_bbox}')

In [None]:
# class DetectionLoss(nn.Module):
#     def __init__(self):
#         super(DetectionLoss, self).__init__()
#         self.cls_loss = nn.CrossEntropyLoss() ## Classifier
#         self.reg_loss = nn.SmoothL1Loss() ## Bounding Box

#     def forward(self, pred_cls, pred_reg, target_cls, target_reg):
#         classification_loss = self.cls_loss(pred_cls, target_cls)
#         regression_loss = self.reg_loss(pred_reg, target_reg)
#         return classification_loss + regression_loss
    
# # Define optimizer and learning rate scheduler
# optimizer = optim.Adam(model.parameters(), lr=learnRate)
# print(f'Optimizer: {optimizer}')

# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)  # Adjust the scheduler parameters
# print(f'Scheduler: {scheduler}')

# # Define the loss function
# criterion = DetectionLoss()
# print(f'\nCriterion: {criterion}')

In [None]:
# model = model.to(device)
# criterion = criterion.to(device)

# # Training loop
# for epoch in range(maxEpochs):
#     model.train()
#     running_loss = 0.0
#     for images, labels, bboxes in trainLoader:
#         images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)
        
#         # Zero the parameter gradients
#         optimizer.zero_grad()

#         # Forward pass
#         cls_scores, bbox_deltas, anchors = model(images)
#         print(cls_scores.shape, cls_scores)
#         print(labels.shape, labels)
#         # Compute the loss
#         loss = criterion(cls_scores, bbox_deltas, labels, bboxes)
        
#         # Backward pass and optimize
#         loss.backward()
#         optimizer.step()

#         # Print statistics
#         running_loss += loss.item()

#     # Print epoch statistics
#     print(f'Epoch [{epoch+1}/{maxEpochs}], Loss: {running_loss/len(trainLoader)}')

#     # Validation
#     model.eval()
#     with torch.no_grad():
#         val_loss = 0.0
#         for images, labels, bboxes in valLoader:
#             images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)

#             # Forward pass
#             cls_scores, bbox_deltas, anchors = model(images)

#             # Compute the loss
#             loss = criterion(cls_scores, bbox_deltas, labels, bboxes)

#             # Accumulate validation loss
#             val_loss += loss.item()

#         # Print validation statistics
#         print(f'Validation Loss: {val_loss/len(valLoader)}')

#     # Adjust learning rate
#     scheduler.step()

In [None]:
model = model.to(device)
criterion = criterion_classifier.to(device)
criterion_bbox = criterion_bbox.to(device)

losses = []

for epoch in range(1, maxEpochs+1):
    epoch_loss = []
    for images, labels, bboxes in trainLoader:
        images = images.to(device)
        labels = labels.to(device)
        bboxes = bboxes.to(device)
        print(images.shape, labels.shape, bboxes.shape)
        
        optimizer.zero_grad()
        
        # Forward pass
        classifier_output, bbox_output = model(images)
        print(classifier_output.shape, labels.shape)
        print(bbox_output.shape, bboxes.shape)
        
        # Compute loss
        classifier_loss = criterion_classifier(classifier_output, labels)
        bbox_loss = criterion_bbox(bbox_output, bboxes)
        total_loss = classifier_loss + bbox_loss
        
        # Backward pass and optimization
        total_loss.backward()
        optimizer.step()
        
        epoch_loss.append(total_loss.item())
        
    losses.append(np.mean(epoch_loss))
    print(f'[Epoch: {epoch}/{maxEpochs}] Loss: {np.round(losses[-1], 5)}')

## Load Trained Model (If Needed)

In [None]:
# if loadModel:
#     model = ClassificationDetection(len(labelMap), len(labelMap))
#     optimizer = optimizer
#     checkpoint = torch.load(modelPath)
#     model.load_state_dict(checkpoint['model_state_dict'])
#     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
#     epoch = checkpoint['epoch']
#     criterion = checkpoint['loss']  

## Train Model

In [None]:
# model = model.to(device)
# criterion = criterion.to(device)
# criterion_bbox = criterion_bbox.to(device)

# if not loadModel:
     
#     exp_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')
#     trainLoss = []

#     for epoch in range(1, maxEpochs+1):
#         epochLoss = []
#         testEpochLoss = []
#         model.train()

#         for i, (images, bbox, labels) in enumerate(trainLoader):
#             optimizer.zero_grad()
#             images = images.to(device)
#             bbox = bbox.to(device)
#             labels = labels.to(device)         
#             predLabels, predBoxes = model(images)
#             lossLabels = criterion(predLabels, labels.to(device))
#             lossBoxes = criterion_bbox(predBoxes, bbox).sum(1)
#             loss = lossLabels + lossBoxes
#             loss.backward()          
#             optimizer.step()
#             lossVal = loss.item()
#             epochLoss.append(float(lossVal))

#         trainLoss.append(np.mean(epochLoss))
#         exp_lr_scheduler.step(trainLoss[-1])
                
#         print(f'[Epoch: {epoch}/{maxEpochs}] Loss: {np.round(trainLoss[-1], 5)}')

#         if saveModel and epoch % 5 == 0: ## save model every 5th epoch
#             torch.save({
#                     'epoch': epoch,
#                     'model_state_dict': model.state_dict(),
#                     'optimizer_state_dict': optimizer.state_dict(),
#                     'loss': loss,
#                     }, modelPath)

## Save Model (If Needed)

In [None]:
# if saveModel and not loadModel:
#     torch.save({
#             'epoch': epoch,
#             'model_state_dict': model.state_dict(),
#             'optimizer_state_dict': optimizer.state_dict(),
#             'loss': criterion,
#             'loss_bbox': criterion_bbox,
#             }, modelPath)

## Learning Curve

In [None]:
# if not loadModel:
#     print(f'Final MSE ({maxEpochs} epochs): {losses[-1]}\n')
    
#     f = plt.figure(figsize=(10,8))
#     plt.plot(losses, label="train")
#     plt.xlabel("epochs")
#     plt.ylabel("cross entropy")
#     plt.title("Epochs vs. Loss Function")
#     plt.legend()
#     plt.grid()
#     plt.tight_layout()
#     plt.show()

## Sample Imagery of Model Output

### Training Images Samples

In [None]:
# trainImages, trainLabels, trainImagesFlipped = next(iter(trainLoader))
# samples = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# model.eval()
# with torch.no_grad():
#     outputs, __ = model(trainImages.to(device))

# j = 0
# for i, label in enumerate(trainLabels):
#     if label.item() in samples:
#         f = plt.figure(figsize=(12, 4))      
#         ax1 = f.add_subplot(131)
#         ax1.imshow(trainImages[i].squeeze(), cmap='gray')
#         ax1.axis('off')
#         ax1.set_title(f'Original - {label.item()}')
#         ax2 = f.add_subplot(132)
#         ax2.imshow(outputs[i].detach().cpu()[0].squeeze(), cmap='gray')
#         ax2.axis('off')
#         ax2.set_title(f'Reconstruction (Flipped) - {label.item()}')
#         plt.tight_layout()
#         plt.show()
        
#         samples.remove(label.item())
#         j += 1

### Test Images Samples

In [None]:
# testImages, testLabels, testImagesFlipped = next(iter(testLoader))
# samples = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# model.eval()
# with torch.no_grad():
#     outputs, __ = model(testImages.to(device))

# j = 0
# for i, label in enumerate(testLabels):
#     if label.item() in samples:
#         f = plt.figure(figsize=(12, 4))      
#         ax1 = f.add_subplot(131)
#         ax1.imshow(testImages[i].squeeze(), cmap='gray')
#         ax1.axis('off')
#         ax1.set_title(f'Original - {label.item()}')
#         ax2 = f.add_subplot(132)
#         ax2.imshow(outputs[i].detach().cpu()[0].squeeze(), cmap='gray')
#         ax2.axis('off')
#         ax2.set_title(f'Reconstruction (Flipped) - {label.item()}')
#         plt.tight_layout()
#         plt.show()
        
#         samples.remove(label.item())
#         j += 1