# Project V. Fish Detection with Deep Learning
1. Split Train and Val dataset
2. Train a detection model based on YOLOv3-tiny
3. Evaluate your model
4. Use your model to detect fish from images in data/samples

## Setup
Please install required packages and make sure the version are valid 

pip install -r requirements.txt

In [1]:
from __future__ import division

import numpy
from utils.logger import *
from utils.utils import *
from utils.datasets import *
from utils.augmentations import *
from utils.transforms import *
from utils.parse_config import *
from utils.test import evaluate
from utils.loss import compute_loss
from utils.models import *

from terminaltables import AsciiTable
from matplotlib.ticker import NullLocator

import os
import sys
import time
import datetime
import argparse
import tqdm

import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim


# Data Preprocess
You should code this part first

In [2]:
#####################################################################################################
#                                            Your Code                                              #
#####################################################################################################
# You should generate valid Train dataset and Val dataset.
# Use data in data/custom/images and data/custom/labels to generate the path file train.txt and 
# val.txt in data/custom/
# a qualified val dataset is smaller than the train dataset and 
# most time there are no overlapped data between two sets.

data_path = os.path.join('data', 'custom', 'images')
res_path = os.path.join('data', 'custom')
files = os.listdir(data_path)
id = 1
os.remove(os.path.join(res_path, 'train.txt'))
os.remove(os.path.join(res_path, 'valid.txt'))
for file in files:
    if id % 4 == 0:
        with open(os.path.join(res_path, 'valid.txt'), 'a') as f:
            f.write(os.path.join(data_path, file) + '\n')
        with open(os.path.join(res_path, 'train.txt'), 'a') as f:
            f.write(os.path.join(data_path, file) + '\n')
    else:
        with open(os.path.join(res_path, 'train.txt'), 'a') as f:
            f.write(os.path.join(data_path, file) + '\n')
    id += 1






#####################################################################################################
#                                                End                                                #
#####################################################################################################

Make some config...

In [3]:
opt = {
    "epochs": 50,
    "model_def": "config/yolov3-tiny.cfg",
    "data_config": "config/custom.data",
    "pretrained_weights": "",
    "n_cpu": 1,
    "img_size": 416,
    "multiscale_training": True,
    "detect_image_folder": "data/samples"
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("output", exist_ok=True)
os.makedirs("checkpoints", exist_ok=True)
# Get data configuration    
data_config = parse_data_config(opt["data_config"])
train_path = data_config["train"]
valid_path = data_config["valid"]
class_names = load_classes(data_config["names"])
print(train_path)
print(valid_path)
print(class_names)

data/custom/train.txt
data/custom/valid.txt
['Fish']


use pytorch to generate our model and dataset

In [4]:
# Initiate model
model = Darknet(opt["model_def"]).to(device)
model.apply(weights_init_normal)

# If specified we start from checkpoint
if opt["pretrained_weights"] != "":
    if opt["pretrained_weights"].endswith(".pth"):
        model.load_state_dict(torch.load(opt["pretrained_weights"]))
    else:
        model.load_darknet_weights(opt["pretrained_weights"])

# Get dataloader
dataset = ListDataset(train_path, multiscale=opt["multiscale_training"], img_size=opt["img_size"],
                      transform=AUGMENTATION_TRANSFORMS)
dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=model.hyperparams['batch'] // model.hyperparams['subdivisions'],
    shuffle=True,
    # num_workers=opt["n_cpu"],
    pin_memory=True,
    collate_fn=dataset.collate_fn,
)

if (model.hyperparams['optimizer'] in [None, "adam"]):
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=model.hyperparams['learning_rate'],
        weight_decay=model.hyperparams['decay'],
    )
elif (model.hyperparams['optimizer'] == "sgd"):
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=model.hyperparams['learning_rate'],
        weight_decay=model.hyperparams['decay'],
        momentum=model.hyperparams['momentum'])
else:
    print("Unknown optimizer. Please choose between (adam, sgd).")


# Train your model!
You are required to complete the DL project training steps (get data batch from dataloader, forward, compute the loss and backward)
see more details in following comments.

In [5]:
for epoch in range(opt["epochs"]):
    # print("\n---- Training Model ----")
    model.train()
    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
    #####################################################################################################
    #                                            Your Code                                              #
    #####################################################################################################
    # Your code need to execute forward and backward steps.
    # Use 'enumerate' to get a batch[_, images, targets]
    # some helpful function
    # - outputs = model.__call__(imgs)(use it by model(imgs))
    # - loss, _ = cumpte_loss(outputs, targets, model)
    # - loss.backward() (backward step)
    # - optimizer.step() (execute params updating)
    # - optimizer.zero_grad() (reset gradients)
    # if you want to see how loss changes in each mini-batch step:
    # -eg print(f'Epoch:{epoch+1}, Step{step+1}/{len(dataloader)}, loss:{loss.item()}')

    for _, data in enumerate(dataloader):
        imgs = Variable(data[1].type(Tensor))
        targets = Variable(data[2].type(Tensor))
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = model(imgs)
        loss, _ = compute_loss(outputs, targets, model)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch:{epoch + 1}, loss:{loss.item()}')

    #####################################################################################################
    #                                                End                                                #
    #####################################################################################################



Epoch:1, loss:0.2007865309715271
Epoch:2, loss:0.17080873250961304
Epoch:3, loss:0.22521547973155975
Epoch:4, loss:0.16347791254520416
Epoch:5, loss:0.16029654443264008
Epoch:6, loss:0.4544885456562042
Epoch:7, loss:0.25425630807876587
Epoch:8, loss:0.5013631582260132
Epoch:9, loss:0.2361803650856018
Epoch:10, loss:0.17969642579555511
Epoch:11, loss:0.509188175201416
Epoch:12, loss:0.4566617012023926
Epoch:13, loss:0.22353094816207886
Epoch:14, loss:0.1903538852930069
Epoch:15, loss:0.31509724259376526
Epoch:16, loss:0.2340620458126068
Epoch:17, loss:0.15219596028327942
Epoch:18, loss:0.2099800556898117
Epoch:19, loss:0.2794630527496338
Epoch:20, loss:0.181960329413414
Epoch:21, loss:0.804013192653656
Epoch:22, loss:0.3129715025424957
Epoch:23, loss:0.22195680439472198
Epoch:24, loss:0.26359301805496216
Epoch:25, loss:0.4767688810825348
Epoch:26, loss:0.18741068243980408
Epoch:27, loss:0.581952691078186
Epoch:28, loss:0.21889112889766693
Epoch:29, loss:0.10869461297988892
Epoch:30, los

# Evaluate and save current model

In [6]:
print("\n---- Evaluating Model ----")
# Evaluate the model on the validation set
metrics_output = evaluate(
    model,
    path=valid_path,
    iou_thres=0.5,
    conf_thres=0.1,
    nms_thres=0.5,
    img_size=opt["img_size"],
    batch_size=model.hyperparams['batch'] // model.hyperparams['subdivisions'],
)

if metrics_output is not None:
    precision, recall, AP, f1, ap_class = metrics_output
    evaluation_metrics = [
        ("validation/precision", precision.mean()),
        ("validation/recall", recall.mean()),
        ("validation/mAP", AP.mean()),
        ("validation/f1", f1.mean()),
    ]
    # Print class APs and mAP
    ap_table = [["Index", "Class name", "AP"]]
    for i, c in enumerate(ap_class):
        print(class_names, c)
        ap_table += [[c, class_names[c], "%.5f" % AP[i]]]
    print(AsciiTable(ap_table).table)
    print(f"---- mAP {AP.mean()}")
else:
    print("---- mAP not measured (no detections found by model)")
torch.save(model.state_dict(), f"checkpoints/yolov3-tiny_ckpt_%d.pth" % epoch)


---- Evaluating Model ----


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Detecting objects: 100%|██████████| 30/30 [00:12<00:00,  2.40it/s]
Computing AP: 100%|██████████| 1/1 [00:00<00:00, 1144.42it/s]

['Fish'] 0
+-------+------------+---------+
| Index | Class name | AP      |
+-------+------------+---------+
| 0     | Fish       | 0.18448 |
+-------+------------+---------+
---- mAP 0.18447890739955408





# Detect and visualize results

In [7]:
model.eval()  # Set in evaluation mode
dataloader = DataLoader(
    ImageFolder(opt["detect_image_folder"], transform=
    transforms.Compose([DEFAULT_TRANSFORMS, Resize(opt["img_size"])])),
    batch_size=1,
    shuffle=False,
)
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
imgs = []  # Stores image paths
img_detections = []  # Stores detections for each image index
print("\nPerforming object detection:")
for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
    # Configure input
    input_imgs = Variable(input_imgs.type(Tensor))
    # Get detections
    with torch.no_grad():
        detections = model(input_imgs)
        detections = non_max_suppression(detections, 0.2, 0.7)
    imgs.extend(img_paths)
    img_detections.extend(detections)
# Bounding-box colors
cmap = plt.get_cmap("tab20b")
colors = [cmap(i) for i in np.linspace(0, 1, 20)]
print("\nSaving images:")
# Iterate through images and save plot of detections
for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
    print("(%d) Image: '%s'" % (img_i, path))
    # Create plot
    img = np.array(Image.open(path))
    plt.figure()
    fig, ax = plt.subplots(1)
    ax.imshow(img)
    # Draw bounding boxes and labels of detections
    if detections is not None:
        # Rescale boxes to original image
        detections = detections.cpu()
        detections = rescale_boxes(detections, opt["img_size"], img.shape[:2])
        unique_labels = detections[:, -1].cpu().unique()
        n_cls_preds = len(unique_labels)
        bbox_colors = random.sample(colors, n_cls_preds)
        for x1, y1, x2, y2, cls_conf, cls_pred in detections:
            print("\t+ Label: %s, Conf: %.5f" % (class_names[int(cls_pred)], cls_conf.item()))
            box_w = x2 - x1
            box_h = y2 - y1
            color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
            # Create a Rectangle patch
            bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
            # Add the bbox to the plot
            ax.add_patch(bbox)
            # Add label
            plt.text(
                x1,
                y1,
                s=class_names[int(cls_pred)],
                color="white",
                verticalalignment="top",
                bbox={"color": color, "pad": 0},
            )
    # Save generated image with detections
    plt.axis("off")
    plt.gca().xaxis.set_major_locator(NullLocator())
    plt.gca().yaxis.set_major_locator(NullLocator())
    filename = os.path.basename(path).split(".")[0]
    output_path = os.path.join("output", f"{filename}.jpg")
    plt.savefig(output_path, bbox_inches="tight", pad_inches=0.0)
    plt.close()


Performing object detection:

Saving images:
(0) Image: 'data/samples/test (1).jpg'
	+ Label: Fish, Conf: 0.22565
(1) Image: 'data/samples/test (10).jpg'
(2) Image: 'data/samples/test (11).jpg'
	+ Label: Fish, Conf: 0.33025
	+ Label: Fish, Conf: 0.21102
(3) Image: 'data/samples/test (2).jpg'
	+ Label: Fish, Conf: 0.26019
	+ Label: Fish, Conf: 0.21604
	+ Label: Fish, Conf: 0.20272
(4) Image: 'data/samples/test (3).jpg'
(5) Image: 'data/samples/test (4).jpg'
(6) Image: 'data/samples/test (5).jpg'
	+ Label: Fish, Conf: 0.39646
	+ Label: Fish, Conf: 0.39193
	+ Label: Fish, Conf: 0.35702
	+ Label: Fish, Conf: 0.35490
	+ Label: Fish, Conf: 0.35178
	+ Label: Fish, Conf: 0.33870
	+ Label: Fish, Conf: 0.32759
	+ Label: Fish, Conf: 0.32286
	+ Label: Fish, Conf: 0.30794
	+ Label: Fish, Conf: 0.27533
	+ Label: Fish, Conf: 0.25366
	+ Label: Fish, Conf: 0.25199
	+ Label: Fish, Conf: 0.24369
	+ Label: Fish, Conf: 0.24283
	+ Label: Fish, Conf: 0.23104
	+ Label: Fish, Conf: 0.22059
(7) Image: 'data/sa

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>