# Different order
Trying to increase effectiveness and efficiency.

1. downscale and resave as JPG
2. rotate to landscape
3. orient upright
4. deskew
5. crop to first outer edges using Canny outlines
6. split into 8 major sections
7. slice up the letters based on a simple, manually generated template

In [1]:
from PIL import Image
from pathlib import Path
from os import path
import numpy as np
import cv2
import imutils
from pprint import pprint

In [2]:
# opencv colors, for convenience
BLUE = (255, 0, 0)
RED = (0, 0, 255)
GREEN = (0, 255, 0)
BLACK = (0, 0, 0)
PURPLE = (255, 0, 255)

## 1. downscale and resave as JPG

Everything below this point is run as a single pass through a loop.  
Each image will be:  
1. Converted to JPG
2. Oriented right side up.
3. Downscaled
4. Cropped
5. Cut into 8 major sections.
6. Saved into 8 separate dirs.

In [3]:
# Rename the image file
BATCH_DIR = Path("BatchProcess")
ORIGINAL = "original.jpeg"
img_path = Path(BATCH_DIR, ORIGINAL)
old = str(img_path)
old_name = str(img_path.name)
new_name = old_name.rstrip(str(img_path.suffix))

In [4]:
# load the image and convert to grayscale
new_img = Image.open(img_path).convert("L") # L is grayscale or "luminance"



In [5]:
# save it as a JPG
JPG_IMG = new_name+".jpg"
name = path.join(BATCH_DIR, JPG_IMG)
new_img.save(name)

In [6]:
# downscale
SCALE_PERCENT = 10 # percent of original size

# open the JPG, not JPEG
img_path = Path(BATCH_DIR, JPG_IMG)
img_name = str(img_path)
img = cv2.imread(img_name, cv2.IMREAD_UNCHANGED)

# calculate new size
width = int(img.shape[1] * SCALE_PERCENT / 100)
height = int(img.shape[0] * SCALE_PERCENT / 100)
new_size = (width, height)

# downscale image
DOWNSCALED = "downscaled.jpg"
downscaled = cv2.resize(img, new_size, interpolation = cv2.INTER_AREA)
file_name = str(Path(BATCH_DIR, DOWNSCALED))
cv2.imwrite(file_name, downscaled)

# print(f"Original: {img.nbytes} bytes, downscaled: {downscaled.nbytes} bytes")

Original: 141812970 bytes, downscaled: 1417422 bytes


## 2. rotate to landscape

In [7]:
# load the downscaled image
img_path = Path(BATCH_DIR, DOWNSCALED)
img = Image.open(str(img_path))
width = img.size[0]
height = img.size[1]

# rotate to landscape if needed, don't yet know rightsideup or upsidedown
if width < height:
    landscaped = img.rotate(90, expand=True)
LANDSCAPED = "landscaped.jpg"
file_name = str(Path(BATCH_DIR, LANDSCAPED))
landscaped.save(file_name)

## 3. orient upright

In [8]:
# load template image
# the template image was selected manually by using the GIMP editor
TEMPLATE = "template.jpg"
img_path = Path(BATCH_DIR, TEMPLATE)
template = str(img_path)
template_img = cv2.imread(template, 1) #1 is grayscale enum flag

In [9]:
# load target image
img_path = Path(BATCH_DIR, LANDSCAPED)
target = str(img_path)
target_img = cv2.imread(target, 1) #1 is grayscale enum flag

# find these dimensions in GIMP
#     cropping box to reduce target search area, looking for template
left = 845
top = 790
right = left + 100
bottom = top + 100

# get the target image area from the template to reduce computational expense
#    crop_img = img[y:y+h, x:x+w] #opencv's x and y are flipped
target_search_area = target_img[top:bottom, left:right]

In [10]:
# calculate the likelihood of a match
method = cv2.TM_SQDIFF_NORMED  
result = cv2.matchTemplate(template_img, target_search_area, method) 

# minimum squared difference
#    image similarity score is maxVal, which is what I need
mn, maxVal, mnLoc, maxLoc = cv2.minMaxLoc(result)  

# exaggerate the values to make it easier to set a cutoff point
score = round((maxVal*100)**2)
print(f"Template match score: {score}")

# flip and save in place
# 600 points seems to be a good cutoff, arbitrarily chosen for now
if score < 600:
    RIGHT_SIDE_UP = "rightSideUp.jpg"
    rightsideup = cv2.rotate(target_img, cv2.ROTATE_180)
    file_name = str(Path(BATCH_DIR, RIGHT_SIDE_UP))
    cv2.imwrite(file_name, rightsideup)
else:
    # no need to rotate or save as its already in the correct position.
    pass

Template match score: 165


## 4. deskew

In [11]:
# load image
img_path = Path(BATCH_DIR, RIGHT_SIDE_UP)
img = cv2.imread(str(img_path))

# convert to grayscale, flip foreground and background
#    foreground is now "white" and the background is "black"
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_not(gray)

# threshold the image, setting all foreground pixels to 255 and all background pixels to 0
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

# grab the (x, y) coordinates of all pixel values that are greater than zero, then use these coordinates to
#    compute a rotated bounding box that contains all coordinates
coords = np.column_stack(np.where(thresh > 0))
angle = cv2.minAreaRect(coords)[-1]

# the `cv2.minAreaRect` function returns values in the range [-90, 0); as the rectangle rotates clockwise the
#    returned angle trends to 0 -- in this special case we need to add 90 degrees to the angle
if angle < -45:
    angle = -(90 + angle)

# otherwise, just take the inverse of the angle to make it positive
else:
    angle = -angle
print(f"Angle skew: {angle}")

# rotate the image to deskew it
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)

DESKEWD = "deskewd.jpg"
deskewd = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
deskewd_name = str(Path(BATCH_DIR, DESKEWD))
cv2.imwrite(deskewd_name, deskewd)

# CLEAN_DESKEWD = "cleanDeskewd.jpg"
# clean_deskewd = deskewd.copy() # for use later, without the contour lines drawn in...
# clean_deskewd_name = str(Path(BATCH_DIR, CLEAN_DESKEWD))
# cv2.imwrite(clean_deskewd_name, deskewd)

Angle skew: 0.0


True

## 5. crop to first outer edges using Canny outlines

#### 5a reduce noise while preserving boundaries

In [12]:
# load image
img_path = Path(BATCH_DIR, DESKEWD)
file_name = str(img_path)
uppercase_img = cv2.imread(file_name)

# convert to grayscale
grayscaled = cv2.cvtColor(uppercase_img, cv2.COLOR_BGR2GRAY)

# bilateralFilter reduces noise while preserving boundaries
bilateral_filtered = cv2.bilateralFilter(grayscaled, 11, 17, 17)
BILATERAL_FILTERED = "bilateralFiltered.jpg"
file_name = str(Path(BATCH_DIR, BILATERAL_FILTERED))
cv2.imwrite(file_name, bilateral_filtered)


#### Dont' use this image going forward. It has the cleanest image (noise reduced) which tricks Canny into ignoring the important borders
#### Use the deskewd image...
# base_image = bilateral_filtered.copy()

True

#### 5b invert the image

In [13]:
# invert the image for making bounding boxes
BILATERAL_FILTER_INVERTED = "bilateralFilteredInverted.jpg"
# bilateral_filter_inverted = cv2.bitwise_not(gray)
bilateral_filter_inverted = cv2.bitwise_not(bilateral_filtered)
file_name = str(Path(BATCH_DIR, BILATERAL_FILTER_INVERTED))
cv2.imwrite(file_name, bilateral_filter_inverted)

True

#### 5c find the edges with Canny

In [14]:
# get edges
CANNY = "canny.jpg"
canny_edges = cv2.Canny(bilateral_filter_inverted, 30, 200)
file_name = str(Path(BATCH_DIR, CANNY))
cv2.imwrite(file_name, canny_edges)

True

#### 5d get the contours of the edges

In [15]:
# find contours in the edged image, keep only the largest ones, and initialize our screen contour
contours = cv2.findContours(canny_edges.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours) # convenience function (really simple, see https://github.com/jrosebr1/imutils/blob/master/imutils/convenience.py)
contours = sorted(contours, key = cv2.contourArea, reverse=True)[:10]
# print(f"Contours found: {len(contours)}")
# print(f"Length of first contour: {len(contours[0])}")
# print(type(contours))

#### 5e get the edges of the intended cropping area

In [16]:
# convenience functions
def four_corners(box):
    xs = [el[0][0] for el in box]
    ys = [el[0][1] for el in box]
    x_min = min(xs)
    x_max = max(xs)
    y_min = min(ys)
    y_max = max(ys)
    return (x_min, x_max, y_min, y_max)

def area(corners):
    return (corners[1] - corners[0]) * (corners[3] - corners[2])

In [17]:
boxes = {}
for contour in contours:
    box = contours.index(contour)
    corners = four_corners(contour)
    boxes[box] = {"area": area(corners), "corners": corners}

# pprint(boxes)

  This is separate from the ipykernel package so we can avoid doing imports until


In [18]:
# the minmax across all the box corners
xmin = min([values["corners"][0] for box, values in boxes.items()])
xmax = max([values["corners"][1] for box, values in boxes.items()])
ymin = min([values["corners"][2] for box, values in boxes.items()])
ymax = max([values["corners"][3] for box, values in boxes.items()])

# print(xmin, xmax, ymin, ymax)

In [19]:
# crop the image using the minmax of all the contours
#    need some padding for the auto crop of the 8 major sections... It's too strict
padding = 10
left = xmin - padding
top = ymin - padding
right = xmax + padding
bottom = ymax + padding

# get the target image from the template
#     crop_img = img[y:y+h, x:x+w] #opencv's x and y are flipped
CROPPED = "cropped.jpg"
cropped = deskewd[top:bottom, left:right]
# cropped = clean_deskewd[top:bottom, left:right]
file_name = str(Path(BATCH_DIR, CROPPED))
cv2.imwrite(file_name, cropped)

True

#### 5f crop the image

## 6. split into 8 major sections

The reason that I think that I need to crop the largest area of usable content first is that is was needed to make sure the images are aligned and sized properly for the following stages.  
From this point, I can just apply a template slicing to the sections to divide everything into 8 major pieces.  
Then, on each major piece, assuming the previous steps were successful in aligning everything properly, I can run step 5 (cropping using Canny) again on each of the smaller pieces to maintain the proper sizing and alignment on each smaller piece.  
Finally, I can use another template slicing specific to each of the 8 sections to extract the desired data.

In [20]:
# these are the box boundaries for each section, found manually using GIMP
left = 0
x1 = 330
x2 = 470
x3 = 990
right = cropped.shape[1]  # image width

top = 0
y1 = 110
y2 = 230
y3 = 360
y4 = 510
bottom = cropped.shape[0]  # image height

right, bottom

(1171, 658)

In [21]:
# these 8 sections are the template for slicing up the major parts of the handwriting form
major_sections = [
    {"uppercase": {
        "left": left,
        "top": top,
        "right": right,
        "bottom": y1}
    },
    {"lowercase": {
        "left": left,
        "top": y1,
        "right": right,
        "bottom": y2}
    },
    {"digits": {
        "left": left,
        "top": y2,
        "right": x2,
        "bottom": y3}
    },
    {"punctuation": {
        "left": x2,
        "top": y2,
        "right": right,
        "bottom": y3}
    },
    {"sent1": {
        "left": left,
        "top": y3,
        "right": x1,
        "bottom": y4}
    },
    {"sent2": {
        "left": x2,
        "top": y3,
        "right": x3,
        "bottom": y4}
    },
    {"sent3": {
        "left": left,
        "top": y4,
        "right": x1,
        "bottom": bottom}
    },
    {"sent4": {
        "left": x2,
        "top": y4,
        "right": x3,
        "bottom": bottom}
    }
]

In [22]:
#This function is a condensed version of the above (step 5)
def crop_outer_edges(img):
    # convert to grayscale
    grayscaled = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # bilateralFilter reduces noise while preserving boundaries
    bilateral_filtered = cv2.bilateralFilter(grayscaled, 11, 17, 17)
    
    # invert the image for making bounding boxes
    bilateral_filter_inverted = cv2.bitwise_not(bilateral_filtered)

    # get edges
    canny_edges = cv2.Canny(bilateral_filter_inverted, 30, 200)
    canny_edges = cv2.Canny(img, 30, 200)

    # find contours in the edged image, keep only the largest ones
    contours = cv2.findContours(canny_edges.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = imutils.grab_contours(contours) # convenience function (really simple, see https://github.com/jrosebr1/imutils/blob/master/imutils/convenience.py)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]

    # convenience functions
    def four_corners(box):
        xs = [el[0][0] for el in box]
        ys = [el[0][1] for el in box]
        x_min = min(xs)
        x_max = max(xs)
        y_min = min(ys)
        y_max = max(ys)
        return (x_min, x_max, y_min, y_max)

    def area(corners):
        # x1, x2, y1, y2
        return (corners[1] - corners[0]) * (corners[3] - corners[2])    

    # save the area and corners of the contours for each box
    boxes = {}
    box_index = 0
    for contour in contours:
#         box = contours.index(contour)
        corners = four_corners(contour)
#         boxes[box] = {"area": area(corners), "corners": corners}
        boxes[box_index] = {"area": area(corners), "corners": corners}
        box_index += 1
        
    # the minmax among all the boxes' corners
    xmin = min([values["corners"][0] for box, values in boxes.items()])
    xmax = max([values["corners"][1] for box, values in boxes.items()])
    ymin = min([values["corners"][2] for box, values in boxes.items()])
    ymax = max([values["corners"][3] for box, values in boxes.items()])

    # crop the image using the minmax of all the contours
    left = xmin
    top = ymin
    right = xmax
    bottom = ymax

    # get the target image from the template
    return img[top:bottom, left:right]

In [23]:
# slice major sections from each image and save in their proper dir
#    crop again using canny's help
# img_path = Path(BATCH_DIR, clean_cropped)
# for image in dir:
counter = 0
for section in major_sections:
    for dir_name, box in section.items():
        cropped_img = cropped[box["top"]:box["bottom"], box["left"]:box["right"]]
        tighter_crop = crop_outer_edges(cropped_img)
        file_name = path.join("BatchProcess", dir_name, str(counter)+".jpg")
        cv2.imwrite(file_name, tighter_crop)

__uppercase__

![](BatchProcess/uppercase/0.jpg)

__lowercase__

![](BatchProcess/lowercase/0.jpg)

__digits__

![](BatchProcess/digits/0.jpg)

__punctuation__

![](BatchProcess/punctuation/0.jpg)

__sent1__

![](BatchProcess/sent1/0.jpg)

__sent2__

![](BatchProcess/sent2/0.jpg)

__sent3__

![](BatchProcess/sent3/0.jpg)

__sent4__

![](BatchProcess/sent4/0.jpg)

## 7. slice up the letters based on a simple, manually generated template

In [24]:
# These values were found manually by using GIMP