@@ -0,0 +1,155 @@
import numpy as np

import os
import sys
import argparse
import yaml
import time
import datetime
import json
import importlib
import logging
import shutil
import cv2
import random

import torch
import torch.nn as nn

import torchvision

import htracking
from htracking.yolo3 import ModelMain, YOLOLoss
from htracking.yolo3.common.utils import non_max_suppression, bbox_iou
from htracking.utils import read_config, draw_bbox, get_rgb_colors

from PIL import Image


# Construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--config", required=False, default='config.yaml', help="Configuaration file")
args = vars(ap.parse_args())

config_name = args['config']


logging.basicConfig(level=logging.DEBUG,
format="[%(asctime)s %(filename)s] %(message)s")

cwd = os.getcwd()
config_path = os.path.join(cwd, config_name)
config = read_config(config_path)

gpu_devices = config["gpu_devices"]
num_gpus = len(gpu_devices)
batch_size = config["batch_size"] * num_gpus

print("Predicting images:")
print("gpu_devices = {}".format(gpu_devices))
print("batch_size = {}".format(batch_size))

# Start training
os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, gpu_devices))

is_training = False
classes = config["classes"]
num_classes = len(classes)
predict_images_path = config["predict_images_path"]
predict_output_path = config["predict_output_path"]


transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

# Load and initialize network
net = ModelMain(config, is_training=is_training)
net.train(is_training)

# Set data parallel
net = nn.DataParallel(net)
net = net.cuda()

# Restore pretrain model
model_pretrained = config["model_pretrained"]
if model_pretrained:
logging.info("load checkpoint from {}".format(model_pretrained))
state_dict = torch.load(model_pretrained)
net.load_state_dict(state_dict)
else:
raise Exception("missing the model pretrained!!!")

# YOLO loss with 3 scales
yolo_losses = []
for i in range(3):
yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
num_classes, (config["img_w"], config["img_h"])))

# prepare images path
images_name = os.listdir(predict_images_path)
images_path = [os.path.join(predict_images_path, name) for name in images_name]
#images_path = images_path[:3]

if len(images_path) == 0:
raise Exception("no image found in {}".format(predict_images_path))

# Start inference


if not os.path.isdir(predict_output_path):
os.makedirs(predict_output_path)

colors = get_rgb_colors()

for path in images_path:

logging.info("processing: {}".format(path))
image = Image.open(path).convert('RGB')
#image = cv2.imread(path, cv2.IMREAD_COLOR)

open_cv_image = np.array(image)
# Convert RGB to BGR
open_cv_image = open_cv_image[:, :, ::-1].copy()

if image is None:
logging.error("read path error: {}. skip it.".format(path))
continue
image_ori = open_cv_image # save original one
image = transform(image)
image = image.unsqueeze(0) # Add dimention

# inference
with torch.no_grad():
output = net(image)
output_list = []
for i in range(3):
output_list.append(yolo_losses[i](output[i]))
output = torch.cat(output_list, 1)
detections = non_max_suppression(output, num_classes, conf_thres=config["confidence_thresh"])
detections = detections[0]

# write result images. Draw bounding boxes and labels of detections
if detections is not None:
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
#bbox_colors = random.sample(colors, n_cls_preds)
for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:

# Rescale coordinates to original dimensions
ori_h, ori_w = image_ori.shape[:2]
pre_h, pre_w = config["img_h"], config["img_w"]
bbox_h = ((y2 - y1) / pre_h) * ori_h
bbox_w = ((x2 - x1) / pre_w) * ori_w
y1 = (y1 / pre_h) * ori_h
x1 = (x1 / pre_w) * ori_w

# Draw the bbox
bbox = (x1, y1, x1+bbox_w, y1+bbox_h)
cls_index = int(cls_pred)
lb = "{}({:4.2f})".format(classes[cls_index], cls_conf)
draw_bbox(image_ori, bbox, label=lb, color=colors[cls_index])


output_path = os.path.join(predict_output_path, os.path.basename(path))
cv2.imwrite(output_path, np.uint8(image_ori))

logging.info("done")
@@ -0,0 +1,55 @@
# Operation phase: train / eval / predict
phase: train

classes: ['index', 'middle']

# Train / eval / predict paths
train_images_path: /home/andrew/projects/datasets/fingers/train/images
train_ann_path: /home/andrew/projects/datasets/fingers/train/xml
eval_images_path: ""
eval_ann_path: ""
predict_images_path: /home/andrew/projects/datasets/fingers/train/images
predict_output_path: output

# Model settings
model_params:
backbone_name: darknet_53
backbone_pretrained: /home/andrew/projects/htracking/weights/darknet53_weights_pytorch.pth # set empty to disable

# YOLO settings
yolo:
anchors: [[[91,38], [100,204], [103,115]],
[[108,154], [117,219], [123,65]],
[[131,178], [137,112], [142,235]]]

# Optimization
lr:
backbone_lr: 0.001
other_lr: 0.01
freeze_backbone: False # freeze backbone wegiths to finetune
decay_gamma: 0.1
decay_step: 20 # decay lr in every ? epochs

optimizer:
type: sgd
weight_decay: 0.00004 #4e-05, note that yaml doesn't support scientific notation


batch_size: 8 #16
epochs: 30 # Training epochs
img_h: 416 # Height
img_w: 416 # Width
gpu_devices: [0, 1, 2] # config GPU devices
working_dir: /home/andrew/projects/htracking/tools/train/run # replace with your working dir

pretrain_snapshot: "" # load checkpoint
evaluate_type": ""
try: 0

confidence: 0.5
confidence_thresh: 0.5
nms_thresh: 0.4

export_onnx: False


@@ -0,0 +1,222 @@
import numpy as np

import os
import sys
import argparse
import yaml
import time
import datetime
import json
import importlib
import logging
import shutil

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision
from tensorboardX import SummaryWriter

import htracking
from htracking.datasets import VOCDetection
from htracking.transforms import ListToNumpy, NumpyToTensor
from htracking.utils import read_config
from htracking.yolo3 import ModelMain, YOLOLoss


def _save_checkpoint(state_dict, config, evaluate_func=None):
# global best_eval_result
checkpoint_path = os.path.join(config["sub_working_dir"], "model.pth")
torch.save(state_dict, checkpoint_path)
logging.info("Model checkpoint saved to %s" % checkpoint_path)


def _get_optimizer(config, net):
optimizer = None

# Assign different lr for each layer
params = None
base_params = list(
map(id, net.backbone.parameters())
)
logits_params = filter(lambda p: id(p) not in base_params, net.parameters())

if not config["lr"]["freeze_backbone"]:
params = [
{"params": logits_params, "lr": config["lr"]["other_lr"]},
{"params": net.backbone.parameters(), "lr": config["lr"]["backbone_lr"]},
]
else:
logging.info("freeze backbone's parameters.")
for p in net.backbone.parameters():
p.requires_grad = False
params = [
{"params": logits_params, "lr": config["lr"]["other_lr"]},
]

# Initialize optimizer class
if config["optimizer"]["type"] == "adam":
optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"])
elif config["optimizer"]["type"] == "amsgrad":
optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"],
amsgrad=True)
elif config["optimizer"]["type"] == "rmsprop":
optimizer = optim.RMSprop(params, weight_decay=config["optimizer"]["weight_decay"])
else:
# Default to sgd
logging.info("Using SGD optimizer.")
optimizer = optim.SGD(params, momentum=0.9,
weight_decay=config["optimizer"]["weight_decay"],
nesterov=(config["optimizer"]["type"] == "nesterov"))

return optimizer


# Construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--config", required=False, default='config.yaml', help="Configuaration file")
args = vars(ap.parse_args())

config_name = args['config']


logging.basicConfig(level=logging.DEBUG,
format="[%(asctime)s %(filename)s] %(message)s")

# Read the configuration file
cwd = os.getcwd()
config_path = os.path.join(cwd, config_name)
config = read_config(config_path)

gpu_devices = config["gpu_devices"]
num_gpus = len(gpu_devices)
batch_size = config["batch_size"] * num_gpus

# Show parameters
print("Start training:")
print("gpu_devices = {}".format(gpu_devices))
print("batch_size = {}".format(batch_size))

# Create sub_working_dir
sub_working_dir = '{}/{}/size{}x{}_try{}/{}'.format(
config['working_dir'], config['model_params']['backbone_name'],
config['img_w'], config['img_h'], config['try'],
time.strftime("%Y%m%d%H%M%S", time.localtime()))
if not os.path.exists(sub_working_dir):
os.makedirs(sub_working_dir)
config["sub_working_dir"] = sub_working_dir
logging.info("sub working dir: %s" % sub_working_dir)

# Creat tf_summary writer
config["tensorboard_writer"] = SummaryWriter(sub_working_dir)
logging.info("Please using 'python -m tensorboard.main --logdir={}'".format(sub_working_dir))

# Start training
os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, gpu_devices))


config["global_step"] = config.get("start_step", 0)
is_training = False if config.get("export_onnx") else True
classes = config['classes']
num_classes = len(classes)

# Load and initialize network
net = ModelMain(config, is_training=is_training)
net.train(is_training)

# Optimizer and learning rate
optimizer = _get_optimizer(config, net)
lr_scheduler = optim.lr_scheduler.StepLR(
optimizer,
step_size=config["lr"]["decay_step"],
gamma=config["lr"]["decay_gamma"])

# Set data parallel
net = nn.DataParallel(net)
net = net.cuda()

# Restore pretrain model
if config["pretrain_snapshot"]:
logging.info("Load pretrained weights from {}".format(config["pretrain_snapshot"]))
state_dict = torch.load(config["pretrain_snapshot"])
net.load_state_dict(state_dict)


# YOLO loss with 3 scales
yolo_losses = []
for i in range(3):
yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
num_classes, (config["img_w"], config["img_h"])))

# Dataset
train_images_path = config['train_images_path']
train_ann_path = config['train_ann_path']

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
target_transform = torchvision.transforms.Compose([ListToNumpy(), NumpyToTensor()])
dataset = VOCDetection(train_images_path, train_ann_path, transform=transform, target_transform=target_transform)

# Data loader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
shuffle=True, num_workers=32, pin_memory=True)


# Start the training loop
logging.info("Start training.")
for epoch in range(config["epochs"]):
for step, samples in enumerate(dataloader):
images, labels = samples["image"], samples["label"]
start_time = time.time()
config["global_step"] += 1

# Forward and backward
optimizer.zero_grad()
outputs = net(images)
losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"]
losses = [[]] * len(losses_name)
for i in range(3):
_loss_item = yolo_losses[i](outputs[i], labels)
for j, l in enumerate(_loss_item):
losses[j].append(l)
losses = [sum(l) for l in losses]
loss = losses[0]
loss.backward()
optimizer.step()

if step > 0 and step % 10 == 0:
_loss = loss.item()
duration = float(time.time() - start_time)
example_per_second = batch_size / duration
lr = optimizer.param_groups[0]['lr']
logging.info(
"epoch [%.3d] iter = %d loss = %.2f example/sec = %.3f lr = %.5f "%
(epoch, step, _loss, example_per_second, lr)
)
config["tensorboard_writer"].add_scalar("lr",
lr,
config["global_step"])
config["tensorboard_writer"].add_scalar("example/sec",
example_per_second,
config["global_step"])
for i, name in enumerate(losses_name):
value = _loss if i == 0 else losses[i]
config["tensorboard_writer"].add_scalar(name,
value,
config["global_step"])

if step > 0 and step % 1000 == 0:
# net.train(False)
_save_checkpoint(net.state_dict(), config)
# net.train(True)

lr_scheduler.step()

# net.train(False)
_save_checkpoint(net.state_dict(), config)
# net.train(True)
logging.info("Bye~")



@@ -0,0 +1,15 @@
### 0. Overview
All of this weights are working for this project with pytorch's format.

### 1. YOLO v3 weights base on darknet_53 backbone (mAP=59.66%)
* Name: yolov3_weights_pytorch.pth
* Download: [Google Drive](https://drive.google.com/open?id=1Bm_CLv9hP3mMQ5cyerKRjvt7_t1duvjI) or [Baidu Drive](https://pan.baidu.com/s/1gx-XRUE1NTfIMKkQ1L0awQ)

### 2. Backbone <darknet53> weights
* This is a pretrain model. Use for train yourself data set.
* Name: darknet53_weights_pytorch.pth
* Download: [Google Drive](https://drive.google.com/open?id=1VYwHUznM3jLD7ftmOSCHnpkVpBJcFIOA) or [Baidu Drive](https://pan.baidu.com/s/1axXjz6ct9Rn9GtDTust6DA)

### 3. Official weigths.
* Name: official_yolov3_weights_pytorch.pth
* Download: [Google Drive](https://drive.google.com/file/d/1SnFAlSvsx37J7MDNs3WWLgeKY0iknikP/view?usp=sharing) or [Baidu Drive](https://pan.baidu.com/s/1YCcRLPWPNhsQfn5f8bs_0g)