| @@ -0,0 +1,155 @@ | ||
| import numpy as np | ||
|
|
||
| import os | ||
| import sys | ||
| import argparse | ||
| import yaml | ||
| import time | ||
| import datetime | ||
| import json | ||
| import importlib | ||
| import logging | ||
| import shutil | ||
| import cv2 | ||
| import random | ||
|
|
||
| import torch | ||
| import torch.nn as nn | ||
|
|
||
| import torchvision | ||
|
|
||
| import htracking | ||
| from htracking.yolo3 import ModelMain, YOLOLoss | ||
| from htracking.yolo3.common.utils import non_max_suppression, bbox_iou | ||
| from htracking.utils import read_config, draw_bbox, get_rgb_colors | ||
|
|
||
| from PIL import Image | ||
|
|
||
|
|
||
| # Construct the argument parser and parse the arguments | ||
| ap = argparse.ArgumentParser() | ||
| ap.add_argument("-c", "--config", required=False, default='config.yaml', help="Configuaration file") | ||
| args = vars(ap.parse_args()) | ||
|
|
||
| config_name = args['config'] | ||
|
|
||
|
|
||
| logging.basicConfig(level=logging.DEBUG, | ||
| format="[%(asctime)s %(filename)s] %(message)s") | ||
|
|
||
| cwd = os.getcwd() | ||
| config_path = os.path.join(cwd, config_name) | ||
| config = read_config(config_path) | ||
|
|
||
| gpu_devices = config["gpu_devices"] | ||
| num_gpus = len(gpu_devices) | ||
| batch_size = config["batch_size"] * num_gpus | ||
|
|
||
| print("Predicting images:") | ||
| print("gpu_devices = {}".format(gpu_devices)) | ||
| print("batch_size = {}".format(batch_size)) | ||
|
|
||
| # Start training | ||
| os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, gpu_devices)) | ||
|
|
||
| is_training = False | ||
| classes = config["classes"] | ||
| num_classes = len(classes) | ||
| predict_images_path = config["predict_images_path"] | ||
| predict_output_path = config["predict_output_path"] | ||
|
|
||
|
|
||
| transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) | ||
|
|
||
| # Load and initialize network | ||
| net = ModelMain(config, is_training=is_training) | ||
| net.train(is_training) | ||
|
|
||
| # Set data parallel | ||
| net = nn.DataParallel(net) | ||
| net = net.cuda() | ||
|
|
||
| # Restore pretrain model | ||
| model_pretrained = config["model_pretrained"] | ||
| if model_pretrained: | ||
| logging.info("load checkpoint from {}".format(model_pretrained)) | ||
| state_dict = torch.load(model_pretrained) | ||
| net.load_state_dict(state_dict) | ||
| else: | ||
| raise Exception("missing the model pretrained!!!") | ||
|
|
||
| # YOLO loss with 3 scales | ||
| yolo_losses = [] | ||
| for i in range(3): | ||
| yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i], | ||
| num_classes, (config["img_w"], config["img_h"]))) | ||
|
|
||
| # prepare images path | ||
| images_name = os.listdir(predict_images_path) | ||
| images_path = [os.path.join(predict_images_path, name) for name in images_name] | ||
| #images_path = images_path[:3] | ||
|
|
||
| if len(images_path) == 0: | ||
| raise Exception("no image found in {}".format(predict_images_path)) | ||
|
|
||
| # Start inference | ||
|
|
||
|
|
||
| if not os.path.isdir(predict_output_path): | ||
| os.makedirs(predict_output_path) | ||
|
|
||
| colors = get_rgb_colors() | ||
|
|
||
| for path in images_path: | ||
|
|
||
| logging.info("processing: {}".format(path)) | ||
| image = Image.open(path).convert('RGB') | ||
| #image = cv2.imread(path, cv2.IMREAD_COLOR) | ||
|
|
||
| open_cv_image = np.array(image) | ||
| # Convert RGB to BGR | ||
| open_cv_image = open_cv_image[:, :, ::-1].copy() | ||
|
|
||
| if image is None: | ||
| logging.error("read path error: {}. skip it.".format(path)) | ||
| continue | ||
| image_ori = open_cv_image # save original one | ||
| image = transform(image) | ||
| image = image.unsqueeze(0) # Add dimention | ||
|
|
||
| # inference | ||
| with torch.no_grad(): | ||
| output = net(image) | ||
| output_list = [] | ||
| for i in range(3): | ||
| output_list.append(yolo_losses[i](output[i])) | ||
| output = torch.cat(output_list, 1) | ||
| detections = non_max_suppression(output, num_classes, conf_thres=config["confidence_thresh"]) | ||
| detections = detections[0] | ||
|
|
||
| # write result images. Draw bounding boxes and labels of detections | ||
| if detections is not None: | ||
| unique_labels = detections[:, -1].cpu().unique() | ||
| n_cls_preds = len(unique_labels) | ||
| #bbox_colors = random.sample(colors, n_cls_preds) | ||
| for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: | ||
|
|
||
| # Rescale coordinates to original dimensions | ||
| ori_h, ori_w = image_ori.shape[:2] | ||
| pre_h, pre_w = config["img_h"], config["img_w"] | ||
| bbox_h = ((y2 - y1) / pre_h) * ori_h | ||
| bbox_w = ((x2 - x1) / pre_w) * ori_w | ||
| y1 = (y1 / pre_h) * ori_h | ||
| x1 = (x1 / pre_w) * ori_w | ||
|
|
||
| # Draw the bbox | ||
| bbox = (x1, y1, x1+bbox_w, y1+bbox_h) | ||
| cls_index = int(cls_pred) | ||
| lb = "{}({:4.2f})".format(classes[cls_index], cls_conf) | ||
| draw_bbox(image_ori, bbox, label=lb, color=colors[cls_index]) | ||
|
|
||
|
|
||
| output_path = os.path.join(predict_output_path, os.path.basename(path)) | ||
| cv2.imwrite(output_path, np.uint8(image_ori)) | ||
|
|
||
| logging.info("done") |
| @@ -0,0 +1,55 @@ | ||
| # Operation phase: train / eval / predict | ||
| phase: train | ||
|
|
||
| classes: ['index', 'middle'] | ||
|
|
||
| # Train / eval / predict paths | ||
| train_images_path: /home/andrew/projects/datasets/fingers/train/images | ||
| train_ann_path: /home/andrew/projects/datasets/fingers/train/xml | ||
| eval_images_path: "" | ||
| eval_ann_path: "" | ||
| predict_images_path: /home/andrew/projects/datasets/fingers/train/images | ||
| predict_output_path: output | ||
|
|
||
| # Model settings | ||
| model_params: | ||
| backbone_name: darknet_53 | ||
| backbone_pretrained: /home/andrew/projects/htracking/weights/darknet53_weights_pytorch.pth # set empty to disable | ||
|
|
||
| # YOLO settings | ||
| yolo: | ||
| anchors: [[[91,38], [100,204], [103,115]], | ||
| [[108,154], [117,219], [123,65]], | ||
| [[131,178], [137,112], [142,235]]] | ||
|
|
||
| # Optimization | ||
| lr: | ||
| backbone_lr: 0.001 | ||
| other_lr: 0.01 | ||
| freeze_backbone: False # freeze backbone wegiths to finetune | ||
| decay_gamma: 0.1 | ||
| decay_step: 20 # decay lr in every ? epochs | ||
|
|
||
| optimizer: | ||
| type: sgd | ||
| weight_decay: 0.00004 #4e-05, note that yaml doesn't support scientific notation | ||
|
|
||
|
|
||
| batch_size: 8 #16 | ||
| epochs: 30 # Training epochs | ||
| img_h: 416 # Height | ||
| img_w: 416 # Width | ||
| gpu_devices: [0, 1, 2] # config GPU devices | ||
| working_dir: /home/andrew/projects/htracking/tools/train/run # replace with your working dir | ||
|
|
||
| pretrain_snapshot: "" # load checkpoint | ||
| evaluate_type": "" | ||
| try: 0 | ||
|
|
||
| confidence: 0.5 | ||
| confidence_thresh: 0.5 | ||
| nms_thresh: 0.4 | ||
|
|
||
| export_onnx: False | ||
|
|
||
|
|
| @@ -0,0 +1,222 @@ | ||
| import numpy as np | ||
|
|
||
| import os | ||
| import sys | ||
| import argparse | ||
| import yaml | ||
| import time | ||
| import datetime | ||
| import json | ||
| import importlib | ||
| import logging | ||
| import shutil | ||
|
|
||
| import torch | ||
| import torch.nn as nn | ||
| import torch.optim as optim | ||
| import torch.nn.functional as F | ||
|
|
||
| import torchvision | ||
| from tensorboardX import SummaryWriter | ||
|
|
||
| import htracking | ||
| from htracking.datasets import VOCDetection | ||
| from htracking.transforms import ListToNumpy, NumpyToTensor | ||
| from htracking.utils import read_config | ||
| from htracking.yolo3 import ModelMain, YOLOLoss | ||
|
|
||
|
|
||
| def _save_checkpoint(state_dict, config, evaluate_func=None): | ||
| # global best_eval_result | ||
| checkpoint_path = os.path.join(config["sub_working_dir"], "model.pth") | ||
| torch.save(state_dict, checkpoint_path) | ||
| logging.info("Model checkpoint saved to %s" % checkpoint_path) | ||
|
|
||
|
|
||
| def _get_optimizer(config, net): | ||
| optimizer = None | ||
|
|
||
| # Assign different lr for each layer | ||
| params = None | ||
| base_params = list( | ||
| map(id, net.backbone.parameters()) | ||
| ) | ||
| logits_params = filter(lambda p: id(p) not in base_params, net.parameters()) | ||
|
|
||
| if not config["lr"]["freeze_backbone"]: | ||
| params = [ | ||
| {"params": logits_params, "lr": config["lr"]["other_lr"]}, | ||
| {"params": net.backbone.parameters(), "lr": config["lr"]["backbone_lr"]}, | ||
| ] | ||
| else: | ||
| logging.info("freeze backbone's parameters.") | ||
| for p in net.backbone.parameters(): | ||
| p.requires_grad = False | ||
| params = [ | ||
| {"params": logits_params, "lr": config["lr"]["other_lr"]}, | ||
| ] | ||
|
|
||
| # Initialize optimizer class | ||
| if config["optimizer"]["type"] == "adam": | ||
| optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"]) | ||
| elif config["optimizer"]["type"] == "amsgrad": | ||
| optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"], | ||
| amsgrad=True) | ||
| elif config["optimizer"]["type"] == "rmsprop": | ||
| optimizer = optim.RMSprop(params, weight_decay=config["optimizer"]["weight_decay"]) | ||
| else: | ||
| # Default to sgd | ||
| logging.info("Using SGD optimizer.") | ||
| optimizer = optim.SGD(params, momentum=0.9, | ||
| weight_decay=config["optimizer"]["weight_decay"], | ||
| nesterov=(config["optimizer"]["type"] == "nesterov")) | ||
|
|
||
| return optimizer | ||
|
|
||
|
|
||
| # Construct the argument parser and parse the arguments | ||
| ap = argparse.ArgumentParser() | ||
| ap.add_argument("-c", "--config", required=False, default='config.yaml', help="Configuaration file") | ||
| args = vars(ap.parse_args()) | ||
|
|
||
| config_name = args['config'] | ||
|
|
||
|
|
||
| logging.basicConfig(level=logging.DEBUG, | ||
| format="[%(asctime)s %(filename)s] %(message)s") | ||
|
|
||
| # Read the configuration file | ||
| cwd = os.getcwd() | ||
| config_path = os.path.join(cwd, config_name) | ||
| config = read_config(config_path) | ||
|
|
||
| gpu_devices = config["gpu_devices"] | ||
| num_gpus = len(gpu_devices) | ||
| batch_size = config["batch_size"] * num_gpus | ||
|
|
||
| # Show parameters | ||
| print("Start training:") | ||
| print("gpu_devices = {}".format(gpu_devices)) | ||
| print("batch_size = {}".format(batch_size)) | ||
|
|
||
| # Create sub_working_dir | ||
| sub_working_dir = '{}/{}/size{}x{}_try{}/{}'.format( | ||
| config['working_dir'], config['model_params']['backbone_name'], | ||
| config['img_w'], config['img_h'], config['try'], | ||
| time.strftime("%Y%m%d%H%M%S", time.localtime())) | ||
| if not os.path.exists(sub_working_dir): | ||
| os.makedirs(sub_working_dir) | ||
| config["sub_working_dir"] = sub_working_dir | ||
| logging.info("sub working dir: %s" % sub_working_dir) | ||
|
|
||
| # Creat tf_summary writer | ||
| config["tensorboard_writer"] = SummaryWriter(sub_working_dir) | ||
| logging.info("Please using 'python -m tensorboard.main --logdir={}'".format(sub_working_dir)) | ||
|
|
||
| # Start training | ||
| os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, gpu_devices)) | ||
|
|
||
|
|
||
| config["global_step"] = config.get("start_step", 0) | ||
| is_training = False if config.get("export_onnx") else True | ||
| classes = config['classes'] | ||
| num_classes = len(classes) | ||
|
|
||
| # Load and initialize network | ||
| net = ModelMain(config, is_training=is_training) | ||
| net.train(is_training) | ||
|
|
||
| # Optimizer and learning rate | ||
| optimizer = _get_optimizer(config, net) | ||
| lr_scheduler = optim.lr_scheduler.StepLR( | ||
| optimizer, | ||
| step_size=config["lr"]["decay_step"], | ||
| gamma=config["lr"]["decay_gamma"]) | ||
|
|
||
| # Set data parallel | ||
| net = nn.DataParallel(net) | ||
| net = net.cuda() | ||
|
|
||
| # Restore pretrain model | ||
| if config["pretrain_snapshot"]: | ||
| logging.info("Load pretrained weights from {}".format(config["pretrain_snapshot"])) | ||
| state_dict = torch.load(config["pretrain_snapshot"]) | ||
| net.load_state_dict(state_dict) | ||
|
|
||
|
|
||
| # YOLO loss with 3 scales | ||
| yolo_losses = [] | ||
| for i in range(3): | ||
| yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i], | ||
| num_classes, (config["img_w"], config["img_h"]))) | ||
|
|
||
| # Dataset | ||
| train_images_path = config['train_images_path'] | ||
| train_ann_path = config['train_ann_path'] | ||
|
|
||
| transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) | ||
| target_transform = torchvision.transforms.Compose([ListToNumpy(), NumpyToTensor()]) | ||
| dataset = VOCDetection(train_images_path, train_ann_path, transform=transform, target_transform=target_transform) | ||
|
|
||
| # Data loader | ||
| dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, | ||
| shuffle=True, num_workers=32, pin_memory=True) | ||
|
|
||
|
|
||
| # Start the training loop | ||
| logging.info("Start training.") | ||
| for epoch in range(config["epochs"]): | ||
| for step, samples in enumerate(dataloader): | ||
| images, labels = samples["image"], samples["label"] | ||
| start_time = time.time() | ||
| config["global_step"] += 1 | ||
|
|
||
| # Forward and backward | ||
| optimizer.zero_grad() | ||
| outputs = net(images) | ||
| losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"] | ||
| losses = [[]] * len(losses_name) | ||
| for i in range(3): | ||
| _loss_item = yolo_losses[i](outputs[i], labels) | ||
| for j, l in enumerate(_loss_item): | ||
| losses[j].append(l) | ||
| losses = [sum(l) for l in losses] | ||
| loss = losses[0] | ||
| loss.backward() | ||
| optimizer.step() | ||
|
|
||
| if step > 0 and step % 10 == 0: | ||
| _loss = loss.item() | ||
| duration = float(time.time() - start_time) | ||
| example_per_second = batch_size / duration | ||
| lr = optimizer.param_groups[0]['lr'] | ||
| logging.info( | ||
| "epoch [%.3d] iter = %d loss = %.2f example/sec = %.3f lr = %.5f "% | ||
| (epoch, step, _loss, example_per_second, lr) | ||
| ) | ||
| config["tensorboard_writer"].add_scalar("lr", | ||
| lr, | ||
| config["global_step"]) | ||
| config["tensorboard_writer"].add_scalar("example/sec", | ||
| example_per_second, | ||
| config["global_step"]) | ||
| for i, name in enumerate(losses_name): | ||
| value = _loss if i == 0 else losses[i] | ||
| config["tensorboard_writer"].add_scalar(name, | ||
| value, | ||
| config["global_step"]) | ||
|
|
||
| if step > 0 and step % 1000 == 0: | ||
| # net.train(False) | ||
| _save_checkpoint(net.state_dict(), config) | ||
| # net.train(True) | ||
|
|
||
| lr_scheduler.step() | ||
|
|
||
| # net.train(False) | ||
| _save_checkpoint(net.state_dict(), config) | ||
| # net.train(True) | ||
| logging.info("Bye~") | ||
|
|
||
|
|
||
|
|
| @@ -0,0 +1,15 @@ | ||
| ### 0. Overview | ||
| All of this weights are working for this project with pytorch's format. | ||
|
|
||
| ### 1. YOLO v3 weights base on darknet_53 backbone (mAP=59.66%) | ||
| * Name: yolov3_weights_pytorch.pth | ||
| * Download: [Google Drive](https://drive.google.com/open?id=1Bm_CLv9hP3mMQ5cyerKRjvt7_t1duvjI) or [Baidu Drive](https://pan.baidu.com/s/1gx-XRUE1NTfIMKkQ1L0awQ) | ||
|
|
||
| ### 2. Backbone <darknet53> weights | ||
| * This is a pretrain model. Use for train yourself data set. | ||
| * Name: darknet53_weights_pytorch.pth | ||
| * Download: [Google Drive](https://drive.google.com/open?id=1VYwHUznM3jLD7ftmOSCHnpkVpBJcFIOA) or [Baidu Drive](https://pan.baidu.com/s/1axXjz6ct9Rn9GtDTust6DA) | ||
|
|
||
| ### 3. Official weigths. | ||
| * Name: official_yolov3_weights_pytorch.pth | ||
| * Download: [Google Drive](https://drive.google.com/file/d/1SnFAlSvsx37J7MDNs3WWLgeKY0iknikP/view?usp=sharing) or [Baidu Drive](https://pan.baidu.com/s/1YCcRLPWPNhsQfn5f8bs_0g) |