In [50]:
# process the input data to fit yolo training image format
import json
import numpy as np 
import os

DATA_PATH = 'dataset/all_data'
OUTPUT_PATH = 'dataset/yolo_hand_data'
RESIZED_IMAGE_SIZE = (416, 416)

In [51]:
# 
from PIL import Image, ImageDraw

file_path = 'dataset/all_data/000015774_01_l'

with open(f'{file_path}.json') as json_file:
    data = json.load(json_file)

    with Image.open(f'{file_path}.jpg') as img:
      original_width, original_height = img.size
      resized_img = img.resize(RESIZED_IMAGE_SIZE)
      draw = ImageDraw.Draw(resized_img)

      hand_pts = data['hand_pts']
      left_pt = min(hand_pts, key=lambda hand_pt: hand_pt[0])
      right_pt = max(hand_pts, key=lambda hand_pt: hand_pt[0])
      up_pt = min(hand_pts, key=lambda hand_pt: hand_pt[1])
      low_pt = max(hand_pts, key=lambda hand_pt: hand_pt[1])
      horizontal_scalar = RESIZED_IMAGE_SIZE[0] / original_width
      vertical_scalar = RESIZED_IMAGE_SIZE[1] / original_height
      resized_upper_left_coordinate = (left_pt[0] * horizontal_scalar, up_pt[1] * vertical_scalar)
      resized_lower_right_coordinate = (right_pt[0] * horizontal_scalar, low_pt[1] * vertical_scalar)

      margin = 0.2
      resized_bb_width = resized_lower_right_coordinate[0] - resized_upper_left_coordinate[0]
      resized_bb_height = resized_lower_right_coordinate[1] - resized_upper_left_coordinate[1]

      resized_upper_left_coordinate_with_margin = (
          resized_upper_left_coordinate[0] - resized_bb_width * margin,
          resized_upper_left_coordinate[1] - resized_bb_height * margin
      )
      resized_lower_right_coordinate_with_margin = (
          resized_lower_right_coordinate[0] + resized_bb_width * margin,
          resized_lower_right_coordinate[1] + resized_bb_height * margin
      )
      print(f'resized upper left: {resized_upper_left_coordinate_with_margin}'
            f'resized lower right: {resized_lower_right_coordinate_with_margin}')

      draw.rectangle((resized_upper_left_coordinate_with_margin, resized_lower_right_coordinate_with_margin), outline=(255, 255, 255))

      # tuple_hand_pts = tuple((hand_pt[0], hand_pt[1]) for hand_pt in hand_pts)

      # draw.point(tuple((hand_pt[0], hand_pt[1]) for hand_pt in hand_pts), fill=(255, 255, 0))
      # draw.point(((upmost_pt[0], upmost_pt[1])), fill=(255, 255, 0))


      resized_img.show()

resized upper left: (260.70407409667973, 296.49859320746526)resized lower right: (286.4343185424804, 334.85767035590277)


In [57]:
import glob
from sklearn.model_selection import train_test_split

def generate_image_list(data_path=DATA_PATH, test_run=False, head_size=500):

    seed = 1

    jpg_file_paths = glob.glob(f'{data_path}/*.jpg')
    jpg_file_names = []

    if test_run:
        jpg_file_paths = jpg_file_paths[:head_size]

    for jpg_file_path in jpg_file_paths:
      jpg_file_names.append(os.path.split(jpg_file_path)[1])

    file_names = list(map(lambda jpg_filename: jpg_filename.strip('.jpg'), jpg_file_names))
    json_file_names = list(map(lambda filename: f'{filename}.json', file_names))
    # partition the data into training, validation and test set
    filenames_train, filenames_test, label_train, label_test = train_test_split(file_names, json_file_names, test_size=0.1, random_state=seed)
    return filenames_train, filenames_test, label_train, label_test

In [58]:
from tqdm import tqdm

def create_yolo_standard_dataset_from_filenames(filenames,
                                 data_type='train',
                                 resized_img_size=RESIZED_IMAGE_SIZE,
                                 margin=0.15,
                                 output_path=OUTPUT_PATH,
                                 test_run=False,
                                 plot_rec=False):
  # for each filename
  # extract the hand info from the json file create a txt for it
  # copy the file from all_data/ into yolo_hand_data/images/{data_type}
  if test_run:
    filenames = filenames[:10]
    output_path = 'dataset/yolo_hand_data_test'
  for filename in tqdm(filenames):
    json_file_path = f'{DATA_PATH}/{filename}.json'
    jpg_file_path = f'{DATA_PATH}/{filename}.jpg'
    
    with open(json_file_path) as json_file:
      with Image.open(jpg_file_path) as jpg_file:
        original_width, original_height = jpg_file.size
        resized_img = jpg_file.resize(resized_img_size)
        draw = ImageDraw.Draw(resized_img)
      data = json.load(json_file)
      yolo_data = []

      # get the hand points in the image files
      hand_pts = data['hand_pts']
      left_pt = min(hand_pts, key=lambda hand_pt: hand_pt[0])
      right_pt = max(hand_pts, key=lambda hand_pt: hand_pt[0])
      up_pt = min(hand_pts, key=lambda hand_pt: hand_pt[1])
      low_pt = max(hand_pts, key=lambda hand_pt: hand_pt[1])
      
      horizontal_scalar = RESIZED_IMAGE_SIZE[0] / original_width
      vertical_scalar = RESIZED_IMAGE_SIZE[1] / original_height
      resized_upper_left_coordinate = (left_pt[0] * horizontal_scalar, up_pt[1] * vertical_scalar)
      resized_lower_right_coordinate = (right_pt[0] * horizontal_scalar, low_pt[1] * vertical_scalar)

      resized_bb_width = resized_lower_right_coordinate[0] - resized_upper_left_coordinate[0]
      resized_bb_height = resized_lower_right_coordinate[1] - resized_upper_left_coordinate[1]

      resized_upper_left_coordinate_with_margin = (
          int(resized_upper_left_coordinate[0] - resized_bb_width * margin),
          int(resized_upper_left_coordinate[1] - resized_bb_height * margin)
      )
      resized_lower_right_coordinate_with_margin = (
          int(resized_lower_right_coordinate[0] + resized_bb_width * margin),
          int(resized_lower_right_coordinate[1] + resized_bb_height * margin)
      )

      resized_bb_width_with_margin = int(resized_bb_width + resized_bb_width * 2 * margin)
      resized_bb_height_with_margin = int(resized_bb_height + resized_bb_height * 2 * margin)

      if test_run and plot_rec:
        draw.rectangle((resized_upper_left_coordinate_with_margin, resized_lower_right_coordinate_with_margin), outline=(255, 255, 255))

      x_center = resized_upper_left_coordinate_with_margin[0] + resized_bb_width_with_margin / 2
      y_center = resized_lower_right_coordinate_with_margin[1] - resized_bb_height_with_margin / 2


      x_center /= resized_img_size[0]
      y_center /= resized_img_size[1]
      resized_bb_width_with_margin /= resized_img_size[0]
      resized_bb_height_with_margin /= resized_img_size[1]

      yolo_data.append([0,
                        x_center,
                        y_center,
                        resized_bb_width_with_margin,
                        resized_bb_height_with_margin
                        ])

      yolo_data = np.array(yolo_data)
      np.savetxt(
        os.path.join(output_path, f'labels/{data_type}/{filename}.txt'),
        yolo_data, 
        fmt=['%d', '%f', '%f', '%f', '%f'])
      # shutil.copy(jpg_file_path, os.path.join(OUTPUT_PATH, f'images/{data_type}/{filename}.jpg'))
      # write the new resized image to the formatted dataset
      resized_img.save(os.path.join(output_path, f'images/{data_type}/{filename}.jpg'))

In [60]:
# smaller dataset for initial tuning
filenames_train, filenames_test, _, _ = generate_image_list(test_run=True)

create_yolo_standard_dataset_from_filenames(filenames_train,
                                            output_path='dataset/yolo_hand_data_smaller',
                                            data_type='train'
                                            )

create_yolo_standard_dataset_from_filenames(filenames_test,
                                            output_path='dataset/yolo_hand_data_smaller',
                                            data_type='validation'
                                            )

100%|██████████| 450/450 [00:15<00:00, 29.82it/s]
100%|██████████| 50/50 [00:01<00:00, 29.02it/s]


In [62]:
# bigger dataset for final training
filenames_train, filenames_test, _, _ = generate_image_list()

create_yolo_standard_dataset_from_filenames(filenames_train,
                                            data_type='train')

create_yolo_standard_dataset_from_filenames(filenames_test,
                                            data_type='validation')

100%|██████████| 2482/2482 [01:52<00:00, 22.15it/s]
100%|██████████| 276/276 [00:13<00:00, 20.80it/s]
