# Load YOLOv5 repository

In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone repo
!pip install -qr yolov5/requirements.txt  # install dependencies (ignore errors)
%cd yolov5

import torch

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.7.0+cu101 _CudaDeviceProperties(name='Tesla P100-PCIE-16GB', major=6, minor=0, total_memory=16280MB, multi_processor_count=56)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import matplotlib.pyplot as plt
import shutil
import json
import numpy as np
import pandas as pd
import cv2
import time

# Dataset configuration

In [None]:
def get_info(data):

  '''
  Read int and float numbers from str line
  Input:
    - data - column as numpy array from .csv table
  Output:
    - new_data - list of lists with int and float numbers (bbox coords or keypoint coords)
  '''

  new_data = []
  for i in range(len(data)):
    example = [float(x.strip()) if '.' in x else int(x.strip()) for x in data[i][1:-1].split(',')]
    new_data.append(example)

  return new_data 

In [None]:
def prepare_yolo_data(unique_filenames, bboxes, images_names, images_path, labels_path):

  '''
  Create textfiles for yolov5 training
  Input:
    - unique_filenames - names of unique images in dataset
    - bboxes - numpy array with coordinates of hands bboxes
    - images_names - numpy array with image filenames with hands
    - images_path - path for folder with images
    - labels_path - path for folder with textfiles
  '''

  # Loop through unique images
  for image_name in unique_filenames:
    img = cv2.imread(os.path.join(images_path, image_name))
    img_high, img_wight = img.shape[0], img.shape[1]
    img_boxes_list = []

    indices = np.where(images_names == image_name)[0]
    # Write each bbox for current image
    for ind in indices:
      bbox = bboxes[ind].copy()
      x_ind, y_ind = bbox[0:2] + bbox[2:4] // 2
      w_ind, h_ind = bbox[2:4]

      str_box = '0' + ' ' + \
                str(round(x_ind/img_wight,4))+ ' ' + \
                str(round(y_ind/img_high,4))+ ' ' + \
                str(round(w_ind/img_wight,4))+ ' ' + \
                str(round(h_ind/img_high,4))

      img_boxes_list.append(str_box)

    # Write annotation information in .txt file for current image
    file_bbx_name = image_name[:-3]
    file_bbx_name += 'txt' 
    with open(labels_path + file_bbx_name, 'w') as output:
      for row in img_boxes_list:
        if row == img_boxes_list[-1]:
          output.write(str(row))
        else:
          output.write(str(row) + '\n')

## Val

In [None]:
!unzip -q /content/drive/MyDrive/pose_estimation/hands_val.zip -d '/content/val_images/'

In [None]:
!rm -r '/content/data/labels/valid'
!rm -r '/content/data/images/valid'
# Make directioris like yolov5 format
os.makedirs('/content/data/labels/valid/', exist_ok=True)
os.makedirs('/content/data/images/valid/', exist_ok=True)

# Define current image folder
tmp_images_path = '/content/val_images/hands_val/'

rm: cannot remove '/content/data/labels/valid': No such file or directory
rm: cannot remove '/content/data/images/valid': No such file or directory


In [None]:
# Define correct image folders
images_val_path = '/content/data/images/valid/'
labels_val_path = '/content/data/labels/valid/'

# Move images to correct directory
for filename in os.listdir(tmp_images_path):
  scr = os.path.join(tmp_images_path, filename)
  dst = os.path.join(images_val_path, filename)
  shutil.move(src=scr, dst=dst)

In [None]:
df = pd.read_csv('/content/drive/MyDrive/pose_estimation/val.csv')
df.head()

Unnamed: 0,image_name,hand_boxes,hand_keypoints
0,000000425226.jpg,"[304.2, 204.65, 68.03000000000003, 42.57999999...","[308.0, 239.0, 0.1408521831035614, 321.0, 226...."
1,000000458992.jpg,"[330.59, 313.06, 91.80000000000001, 76.4100000...","[391.0, 311.0, 0.017855476588010788, 372.5, 33..."
2,000000458992.jpg,"[-20.2, 308.56, 92.57000000000001, 87.51999999...","[10.0, 323.0, 0.09969476610422134, 22.0, 355.0..."
3,000000031817.jpg,"[213.72, 165.49, 57.47999999999999, 40.9599999...","[252.0, 182.0, 0.011041253805160522, 238.5, 18..."
4,000000182611.jpg,"[266.69, 233.57, 60.78000000000003, 38.2400000...","[271.0, 243.0, 0.08970954269170761, 286.0, 246..."


In [None]:
data_numpy = df.to_numpy()
data_numpy.shape

(1051, 3)

In [None]:
# Get bbox information
images_names_val = data_numpy[:,0]
bboxes_val = get_info(data_numpy[:,1])

images_names_val = np.array(images_names_val)
bboxes_val = np.array(bboxes_val)

In [None]:
# Create text files for yolov5 training
unique_filenames, filenames_counts = np.unique(images_names_val, return_counts=True)
print(len(unique_filenames))
prepare_yolo_data(unique_filenames, bboxes_val, images_names_val, images_val_path, labels_val_path)

609


## Trian

In [None]:
!unzip -q /content/drive/MyDrive/pose_estimation/hands_train.zip -d '/content/train_images/'

In [None]:
!rm -r '/content/data/labels/train'
!rm -r '/content/data/images/train'
# Make directioris like yolov5 format
os.makedirs('/content/data/labels/train/', exist_ok=True)
os.makedirs('/content/data/images/train/', exist_ok=True)

# Define current image folder
tmp_images_path = '/content/train_images/hands_train/'

rm: cannot remove '/content/data/labels/train': No such file or directory
rm: cannot remove '/content/data/images/train': No such file or directory


In [None]:
# Define correct image folders
images_train_path = '/content/data/images/train/'
labels_train_path = '/content/data/labels/train/'

# Move images to correct directory
for filename in os.listdir(tmp_images_path):
  scr = os.path.join(tmp_images_path, filename)
  dst = os.path.join(images_train_path, filename)
  shutil.move(src=scr, dst=dst)

In [None]:
df = pd.read_csv('/content/drive/MyDrive/pose_estimation/train.csv')
df.head()

Unnamed: 0,image_name,hand_boxes,hand_keypoints
0,000000184659.jpg,"[380.04, 93.93, 101.64999999999998, 71.06]","[477.0, 138.0, 0.2964617609977722, 464.0, 123...."
1,000000184659.jpg,"[366.14, 47.58, 93.93, 89.61]","[453.0, 69.0, 0.1306665688753128, 431.5, 89.0,..."
2,000000457254.jpg,"[292.85, 328.08, 81.5, 70.88]","[297.0, 344.0, 0.07652954012155533, 317.0, 340..."
3,000000064909.jpg,"[254.84, 432.13, 48.72, 47.56]","[289.0, 442.0, 0.05950009822845459, 277.5, 451..."
4,000000064909.jpg,"[213.33, 317.39, 61.109999999999985, 52.759999...","[214.0, 344.0, 0.13771122694015503, 231.5, 333..."


In [None]:
data_numpy = df.to_numpy()
data_numpy.shape

(27700, 3)

In [None]:
# Get bbox information
images_names_train = data_numpy[:,0]
bboxes_train = get_info(data_numpy[:,1])

images_names_train = np.array(images_names_train)
bboxes_train = np.array(bboxes_train)

In [None]:
# Create text files for yolov5 training
unique_filenames, filenames_counts = np.unique(images_names_train, return_counts=True)
print(len(unique_filenames))
prepare_yolo_data(unique_filenames, bboxes_train, images_names_train, images_train_path, labels_train_path)

16107


# Set model configuration

More complex discription [here](https://models.roboflow.com/object-detection/yolov5)

In [None]:
yaml_file = "train: //content/data/images/train\nval: //content/data/images/valid\n\nnc: 1\nnames: ['hand']"

with open("//content/data.yaml", "w") as f:
  f.write(yaml_file)

In [None]:
# Define number of classes based on YAML
import yaml
with open("/content/data.yaml", 'r') as stream:
    num_classes = str(yaml.safe_load(stream)['nc'])

In [None]:
# Configuration file
%cat /content/yolov5/models/yolov5l.yaml

# parameters
nc: 80  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3, [1024, False]],  # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3,

In [None]:
#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))

In [None]:
# Change model parameters
%%writetemplate /content/custom_yolov5x.yaml

# parameters
nc: {num_classes}  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3, [1024, False]],  # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]

# Training

Some arguments for training:

- **img:** define input image size
- **batch:** determine batch size
- **epochs:** define the number of training epochs. (Note: often, 3000+ are common here!)
- **data:** set the path to our yaml file
- **cfg:** specify our model configuration
- **weights:** specify a custom path to weights. 
- **nosave:** only save the final checkpoint
- **cache:** cache images for faster training

In [None]:
!python '/content/yolov5/train.py' --img 416 --batch 16 --epochs 200 --data '/content/data.yaml' --cfg /content/custom_yolov5x.yaml --weights /content/yolov5/content/drive/MyDrive/pose_estimation/yolov5/yolov5x_results2/weights/last.pt --project /content/drive/MyDrive/pose_estimation/yolov5 --name yolov5x_results  --cache

remote: Enumerating objects: 12, done.[K
remote: Counting objects:   8% (1/12)[Kremote: Counting objects:  16% (2/12)[Kremote: Counting objects:  25% (3/12)[Kremote: Counting objects:  33% (4/12)[Kremote: Counting objects:  41% (5/12)[Kremote: Counting objects:  50% (6/12)[Kremote: Counting objects:  58% (7/12)[Kremote: Counting objects:  66% (8/12)[Kremote: Counting objects:  75% (9/12)[Kremote: Counting objects:  83% (10/12)[Kremote: Counting objects:  91% (11/12)[Kremote: Counting objects: 100% (12/12)[Kremote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects:  12% (1/8)[Kremote: Compressing objects:  25% (2/8)[Kremote: Compressing objects:  37% (3/8)[Kremote: Compressing objects:  50% (4/8)[Kremote: Compressing objects:  62% (5/8)[Kremote: Compressing objects:  75% (6/8)[Kremote: Compressing objects:  87% (7/8)[Kremote: Compressing objects: 100% (8/8)[Kremote: Compressing objects: 100% (8/8), done.[K
remote: Total 12 (de

In [None]:
!python '/content/yolov5/train.py' --img 416 --batch 16 --epochs 200 --data '/content/data.yaml' --cfg /content/custom_yolov5x.yaml --weights /content/drive/MyDrive/pose_estimation/yolov5/yolov5x_results/weights/last.pt --project /content/drive/MyDrive/pose_estimation/yolov5 --name yolov5x_results  --cache --exist

[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 v4.0-69-ga5359f6 torch 1.7.0+cu101 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)

Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='/content/custom_yolov5x.yaml', data='/content/data.yaml', device='', epochs=200, evolve=False, exist_ok=True, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[416, 416], linear_lr=False, local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='yolov5x_results', noautoanchor=False, nosave=False, notest=False, project='/content/drive/MyDrive/pose_estimation/yolov5', quad=False, rect=False, resume=False, save_dir='/content/drive/MyDrive/pose_estimation/yolov5/yolov5x_results', single_cls=False, sync_bn=False, total_batch_size=16, weights='/content/drive/MyDrive/pose_estimation/yolov5/yolov5x_results/weights/last.pt', workers=8, world_size=1)
[34m[1mwandb: [0mInstall Weights & Biases for YOLOv5 logging with 'pip 

In [None]:
!python '/content/yolov5/train.py' --img 416 --batch 16 --epochs 200 --data '/content/data.yaml' --cfg /content/custom_yolov5x.yaml --weights /content/drive/MyDrive/pose_estimation/yolov5/yolov5x_results/weights/last.pt --project /content/drive/MyDrive/pose_estimation/yolov5 --name yolov5x_results  --cache --exist

[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 v4.0-69-ga5359f6 torch 1.7.0+cu101 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)

Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='/content/custom_yolov5x.yaml', data='/content/data.yaml', device='', epochs=200, evolve=False, exist_ok=True, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[416, 416], linear_lr=False, local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='yolov5x_results', noautoanchor=False, nosave=False, notest=False, project='/content/drive/MyDrive/pose_estimation/yolov5', quad=False, rect=False, resume=False, save_dir='/content/drive/MyDrive/pose_estimation/yolov5/yolov5x_results', single_cls=False, sync_bn=False, total_batch_size=16, weights='/content/drive/MyDrive/pose_estimation/yolov5/yolov5x_results/weights/last.pt', workers=8, world_size=1)
[34m[1mwandb: [0mInstall Weights & Biases for YOLOv5 logging with 'pip 