In [None]:
import json
import numpy as np
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
import os

In [None]:
def order_points(points):
    assert len(points) == 4, 'Length of points must be 4'
    left = sorted(points, key=lambda p: p[0])[:2]
    right = sorted(points, key=lambda p: p[0])[2:]
    tl, bl = sorted(left, key=lambda p: p[1])
    tr, br = sorted(right, key=lambda p: p[1])
    return [tl, tr, br, bl]

In [None]:
def get_warped_images(image, pts):
    rect = order_points(pts)
    tl, tr, br, bl = rect
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(widthA, widthB)

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(heightA, heightB)

    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
    [0, maxHeight - 1]], dtype = "float32")
    
    rect = np.array(rect, dtype = "float32")
    M = cv2.getPerspectiveTransform(src=rect, dst=dst)
    warped = cv2.warpPerspective(image, M=M, dsize=(int(maxWidth), int(maxHeight)))

    return warped

In [None]:
def is_label(label):
    start_key = ['V_', 'SIGN_NAME', 'DATE']
    return any([label.startswith(key) for key in start_key])

In [None]:
# image_patterns = ['*.jpg', '*.png', '*.jpeg', '*.JPG', '*.PNG', '*.JPEG']
patterns = ['*.json']
input_dir = Path('/home/trucly/Documents/DATASET/HANDWRITTEN/completed/vtp_form_test')
output_dir = Path('/home/trucly/Documents/DATASET/HANDWRITTEN/completed/vtp_line_test_merge')
paths = []
for pattern in patterns:
    paths += list(input_dir.glob(f'**/{pattern}'))      

In [None]:
start_key = ['V_', 'SIGN_NAME', 'DATE']

In [None]:
for path in paths:
    filename = str(path)
    with open(filename, 'r') as f:
        obj = json.load(f)
        
    image_name = obj['imagePath']
    image = cv2.imread(str(input_dir.joinpath(image_name)))

    print(str(input_dir.joinpath(image_name)))
    shapes = obj['shapes']
    for x in shapes:
        point = x['points']
        label = x['label']

        if is_label(label) and len(x.get('value', '')) > 0:
            value = x['value']
            if len(point) == 4:
                line = get_warped_images(image, order_points(point))
            elif len(point) == 2:
                line = image[int(point[0][1]):int(point[1][1]), int(point[0][0]):int(point[1][0])]
            else:
                continue
            
#             directory = output_dir.joinpath(label)
#             directory.mkdir(exist_ok=True)

            image_path = output_dir.joinpath(label + '_' + path.stem + '.jpg')
            cv2.imwrite(str(image_path), line)
            text_path = output_dir.joinpath(label + '_' + path.stem + '.txt')
            with open(str(text_path), 'wt') as f:
                f.write(value)

            label_path = output_dir.joinpath('label.txt')
            with open(str(label_path), 'a') as f:
                f.write(label + '_' + path.stem + '.jpg' + '\t' + value + '\n')

# Split train/val/test

In [None]:
# data_dir = output_dir
# # data_dir = Path('/home/trucly/Documents/DATASET/HANDWRITTEN/general/vtp/vtp_line')