# Deer detection

This notebook uses the data from the following hugging face dataset "myyyyw/NTLNP" to train a small YoloV8 model to detect deer in images.

The training_data folder contains the downloaded data from the dataset, which has been extracted from the rar files.

In [41]:
from transformers import YolosImageProcessor, YolosForObjectDetection, AutoModelForObjectDetection
import torch
import requests


model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
image_processor = YolosImageProcessor.from_pretrained("hustvl/yolos-tiny")

In [3]:
dir(model)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_auto_class',
 '_backward_compatibility_gradient_checkpointing',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_check_and_enable_flash_attn_2',
 '_compiled_call_impl',
 '_convert_head_mask_to_5d',
 '_copy_lm_head_original_to_resized',
 '_create_repo',
 '_dispatch_accelerate_model',
 '_expand_inputs_for_generation',
 '_extract_past_from_model_output',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwa

In [36]:
import xml.etree.ElementTree as ET
from pathlib import Path

day_image_folder = './deer_detection/training_data/voc_day/JPEGImages/'
night_image_folder = './deer_detection/training_data/voc_night/JPEGImages/'
day_annotation_folder = './deer_detection/training_data/voc_day/Annotations/'
night_annotation_folder = './deer_detection/training_data/voc_night/Annotations/'

def read_content(xml_file: str):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    filename = root.find('filename').text
    folder = root.find('folder').text
    size_node = root.find('size')

    width = int(size_node.find('width').text)
    height = int(size_node.find('height').text)
    depth = int(size_node.find('depth').text)


    for boxes in root.iter('object'):
        name = boxes.find("name").text
        pose = boxes.find("pose").text
        truncated = int(boxes.find("truncated").text)
        difficult = int(boxes.find("difficult").text)
        ymin, xmin, ymax, xmax = None, None, None, None

        ymin = int(boxes.find("bndbox/ymin").text)
        xmin = int(boxes.find("bndbox/xmin").text)
        ymax = int(boxes.find("bndbox/ymax").text)
        xmax = int(boxes.find("bndbox/xmax").text)

        dict_with_single_box = {
            'name': name,
            'pose': pose,
            'truncated': truncated,
            'difficult': difficult,
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmax,
            'ymax': ymax
        }
        list_with_all_boxes.append(dict_with_single_box)
    return {
        'full_path': str(xml_file.absolute()),
        'size': {
            'width': width,
            'height': height,
            'depth': depth
        },
        'boxes': list_with_all_boxes
    }

def get_xml_files(folder: str):
    xml_files = {}
    for xml_file in Path(folder).glob('*.xml'):
        xml_files[xml_file.stem] = read_content(xml_file)
    return xml_files

# day_xml_dict = get_xml_files(day_annotation_folder)
# night_xml_dict = get_xml_files(night_annotation_folder) 

In [73]:
from pylabel import importer
import pandas as pd

day_annotation_folder = './deer_detection/training_data/voc_day/Annotations/'
night_annotation_folder = './deer_detection/training_data/voc_night/Annotations/'

path_to_images = "../JPEGImages/"

day_dataset = importer.ImportVOC(path=day_annotation_folder, path_to_images=path_to_images)
night_dataset = importer.ImportVOC(path=night_annotation_folder, path_to_images=path_to_images)

datasets = {
    "day": day_dataset,
    "night": night_dataset
}

Importing VOC files...: 100%|██████████| 15314/15314 [00:05<00:00, 2555.23it/s]
Importing VOC files...: 100%|██████████| 10345/10345 [00:04<00:00, 2510.60it/s]


In [81]:
cat_mapping = dict(zip(datasets["day"].df['cat_id'].unique(), datasets["day"].df['cat_name'].unique()))

# Apply the same mapping to the night dataset
datasets["night"].df['cat_name'] = datasets["night"].df['cat_id'].map(cat_mapping)
datasets["night"].df['cat_id'] = datasets["night"].df['cat_name'].map(dict(zip(datasets["night"].df['cat_name'].unique(), datasets["night"].df['cat_id'].unique())))

night_cat_mapping = dict(zip(datasets["night"].df['cat_id'].unique(), datasets["night"].df['cat_name'].unique()))


In [99]:
import copy

datasets['day'].df['img_folder'] = 'voc_day/JPEGImages/'
datasets['night'].df['img_folder'] = 'voc_night/JPEGImages/'
datasets['combined'] = copy.deepcopy(datasets['day'])
datasets['combined'].df = pd.concat([datasets['day'].df, datasets['night'].df]).reset_index()

In [100]:
for dataset_name, dataset in datasets.items():
    print(f"Dataset: {dataset_name}")
    print(f"Number of images: {dataset.analyze.num_images}")
    print(f"Number of classes: {dataset.analyze.num_classes}")
    print(f"Classes:{dataset.analyze.classes}")
    print(f"Class counts:\n{dataset.analyze.class_counts}")

Dataset: day
Number of images: 15313
Number of classes: 17
Classes:['Leopard', 'AmurTiger', 'Badger', 'BlackBear', 'Cow', 'RaccoonDog', 'Dog', 'Hare', 'LeopardCat', 'MuskDeer', 'RedFox', 'RoeDeer', 'Sable', 'SikaDeer', 'Weasel', 'WildBoar', 'Y.T.Marten']
Class counts:
SikaDeer      1849
RoeDeer       1686
WildBoar      1526
Cow           1364
Dog           1360
Leopard       1102
LeopardCat    1045
RedFox        1001
BlackBear      973
Y.T.Marten     968
Badger         955
AmurTiger      828
Weasel         755
RaccoonDog     352
MuskDeer       271
Sable          192
Hare            20
Name: cat_name, dtype: int64
Dataset: night
Number of images: 10344
Number of classes: 17
Classes:['Leopard', 'AmurTiger', 'Badger', 'BlackBear', 'Cow', 'RaccoonDog', 'Dog', 'Hare', 'LeopardCat', 'MuskDeer', 'RedFox', 'RoeDeer', 'Sable', 'SikaDeer', 'Weasel', 'WildBoar', 'Y.T.Marten']
Class counts:
MuskDeer      1264
Dog           1261
LeopardCat    1031
Hare           937
RedFox         872
WildBoar     

In [106]:
datasets['combined'].export.dataset = datasets['combined']
datasets['combined'].export.ExportToCoco(output_path='./deer_detection/training_data/coco_combined.json')

Exporting to COCO file...: 100%|██████████| 26946/26946 [00:14<00:00, 1826.15it/s]


['./deer_detection/training_data/coco_combined.json']

In [None]:
from IPython.display import Image, display
display(datasets['combined'].visualize.ShowBoundingBoxes(1000))
# display(dataset.visualize.ShowBoundingBoxes("BloodImage_00315.jpg"))

In [64]:
# Combine the two datasets


['ReindexCatIds',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'analyze',
 'df',
 'export',
 'labeler',
 'name',
 'path_to_annotations',
 'splitter',
 'visualize']

In [42]:
import torchvision
import os

class DataLoader:
    def __init__(self, annotations, feature_extractor, train=True):
        self.annotations = annotations
        self.train = train

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        img, target = super(CocoDetection, self).__getitem__(idx)
        
        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        # Target looks like {"id": 125686, "category_id": 0, "iscrowd": 0, "segmentation": [[164.81, 417.51,......167.55, 410.64]], "image_id": 242287, "area": 42061.80340000001, "bbox": [19.23, 383.18, 314.5, 244.46]}(x, y, width, height)
        encoding = self.feature_extractor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
        target = encoding["labels"][0] # remove batch dimension

        return pixel_values, target

from transformers import AutoFeatureExtractor

feature_extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-small", size=512, max_size=864)

train_dataset = CocoDetection(img_folder=(dataset.location + '/train'), feature_extractor=feature_extractor)
val_dataset = CocoDetection(img_folder=(dataset.location + '/valid'), feature_extractor=feature_extractor, train=False)

print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(val_dataset))

ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.20.1`: Please run `pip install transformers[torch]` or `pip install accelerate -U`