# Detecation
## Prepare requirements

In [None]:
# clone yolov7 repo
!git clone https://github.com/WongKinYiu/yolov7.git

In [None]:
# get yolov7 pretrained model
!curl -L -o ./yolov7/yolov7.pt https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt

In [None]:
# fix requirements.txt
fileContent = ""
with open("./yolov7/requirements.txt", "r") as file:
    for line in file:
        if line.startswith("numpy"):
            continue
        fileContent += line

with open ("./yolov7/requirements.txt", "w") as file:
    file.write(fileContent)

In [None]:
# install dependencies
!pip install -r ./yolov7/requirements.txt

In [None]:
# install newest numpy version manually
!pip install numpy

In [None]:
# install dependency to collect images
!pip install simple-image-download

## Acquire dataset

In [None]:
# import downloader
from simple_image_download import simple_image_download as sid

response = sid.Downloader()

In [None]:
# define which queries to look for online
downloadQueries = ["cat", "walking-cat", "cat-face"]

In [None]:
# download images
"""for query in downloadQueries:
    response.download(query, limit=100)"""

response.download(downloadQueries[0], limit=300)

In [None]:
response.download(downloadQueries[1], limit=100)

In [None]:
response.download(downloadQueries[2], limit=100)

In [None]:
from os import listdir, remove, rename, mkdir
from os.path import isdir

# remove unwanted images in the wrong format
for query in downloadQueries:
    for image in listdir(f"./simple_images/{query}"):
        if not image.endswith(".jpeg"):
            remove(f"./simple_images/{query}/{image}")

# create new directory for all images
if not isdir("./images"):
    mkdir("./images")

# move all images to one folder
for query in downloadQueries:
    for image in listdir(f"./simple_images/{query}"):
        rename(f"./simple_images/{query}/{image}", f"./images/{image}")

print("Remaining images after this operation: ", len(listdir("./images")))

In [None]:
# remove identical images
from hashlib import md5

hashes = []

for image in listdir("./images"):
    hash = md5(open(f"./images/{image}", "rb").read()).hexdigest()
    if hash in hashes:
        remove(f"./images/{image}")
    else:
        hashes.append(hash)

print("Remaining images after this operation: ", len(listdir("./images")))

## Label & annotate image
For this process, you have to install labelImg... see instructions here: https://github.com/HumanSignal/labelImg

## Divide into training, validation & testing data

In [None]:
from os.path import isfile

if not isdir("./dataset"):
    mkdir("./dataset")
    mkdir(f"./dataset/train")
    mkdir(f"./dataset/val")
    mkdir(f"./dataset/test")

# move classes file to dataset folder
if isfile("./images/classes.txt"):
    rename("./images/classes.txt", "./dataset/classes.txt")

totalImages = len(listdir("./images")) / 2
print("Number of images: ", totalImages)

# percentage of data allocated for each purpose
trainPercentage = 0.8
validationPercentage = 0.15
testingPercentage = 0.05

if trainPercentage + validationPercentage + testingPercentage != 1:
    raise Exception("Training-, Validation-, and Testing-Percentage must sum up to 1 (100%).")

trainImages = int(totalImages * trainPercentage)
validationImages = int(totalImages * validationPercentage)
testingImages = int(totalImages - trainImages - validationImages) if testingPercentage > 0 else 0

print("Number of training images: ", trainImages)
print("Number of validation images: ", validationImages)
print("Number of testing images: ", testingImages)

In [None]:
# move images according to their purpose
from random import shuffle

listOfImages = list(filter(lambda x : x.endswith(".jpeg"), listdir("./images")))
shuffle(listOfImages)

listOfTrainImages = listOfImages[:trainImages]
listOfValidationImages = listOfImages[trainImages:trainImages + validationImages]
listOfTestImages = listOfImages[trainImages + validationImages:]

def moveImagesAndAnnotations(purpose, imageList):
    for img in imageList:
        rename(f"./images/{img}", f"./dataset/{purpose}/{img}")
        rename(f"./images/{img.split('.')[0]}.txt", f"./dataset/{purpose}/{img.split('.')[0]}.txt")

moveImagesAndAnnotations("train", listOfTrainImages)
moveImagesAndAnnotations("val", listOfValidationImages)
moveImagesAndAnnotations("test", listOfTestImages)

In [None]:
# clean up leftover directories
from os import rmdir
from shutil import rmtree

if isdir("./images"):
    rmdir("./images")
if isdir("./simple_images"):
    rmtree("./simple_images")

## Training a YOLOv7 model on the dataset
Config files must be adjusted for our data.

In [None]:
!python ./yolov7/train.py --workers 4 --device 0 --batch-size 16 --epochs 50 --img 640 640 --data ./yolov7-config/data.yaml --hyp ./yolov7-config/hyp.scratch.custom.yaml --cfg ./yolov7-config/yolov7-cfg-custom.yaml --weight ./yolov7/yolov7.pt --name detecation

## Test performance of the model

In [None]:
!python ./yolov7/detect.py --weight ./runs/train/detecation/weights/best.pt --conf 0.25 --img-size 640 --source ./dataset/test/