In [1]:
from PIL import Image
from pathlib import Path
from os import path
import numpy as np
import cv2
import imutils
from pprint import pprint

IMG_DIR = Path("SingleImageProcess")

## Convenience Functions

In [2]:
def save_as_jpg(img, save_to):
#     RGB = "RGB"
    BW = "L"
    old = str(img) #img is a path
    old_name = str(img.name)
    new_name = old_name.rstrip(str(img.suffix))
    new_img = Image.open(img).convert(BW)
    name = path.join(save_to, new_name+".jpg")
    new_img.save(name)

## 1. setup and convert images to jpg

In [3]:
# ensure saved as JPG, from JPEG and PNG
img = Path(IMG_DIR, "original.jpeg")
save_as_jpg(img, IMG_DIR)



![](SingleImageProcess/original.jpg)

## 2. rotate images to landscape

In [4]:
image_path = Path(IMG_DIR, "original.jpg")
actual_img = Image.open(str(image_path))
width = actual_img.size[0]
height = actual_img.size[1]

# rotate to landscape if needed, don't yet know up or down
if width < height:
    rotated = actual_img.rotate(90, expand=True)
rotated_name = str(Path(IMG_DIR, "rotated.jpg"))
rotated.save(rotated_name)

![](SingleImageProcess/rotated.jpg)

## 3. ensure pictures right-side up with template search

In [5]:
def find_match(template, target):
    method = cv2.TM_SQDIFF_NORMED  
    result = cv2.matchTemplate(template, target, method) 

    # minimum squared difference
    # image similarity score is maxVal
    mn, maxVal, mnLoc, maxLoc = cv2.minMaxLoc(result)  

    # exaggerate the values to make it easier to set a cutoff point
    return (maxVal*100)**2

In [6]:
# load images
TEMPLATE = "template.jpg"
template_img = cv2.imread(TEMPLATE, 1) #1 is grayscale enum flag

img_path = Path(IMG_DIR, "rotated.jpg")
test = str(img_path)
test_img = cv2.imread(test, 1) #1 is grayscale enum flag

# cropping box to reduce search area for target template
left = 8500
top = 8000
right = left + 700
bottom = top + 700

# get the target image from the template
# crop_img = img[y:y+h, x:x+w] #opencv's x and y are flipped
cropped_img = test_img[top:bottom, left:right]
score = round(find_match(template_img, cropped_img))

# flip and save in place
# 600 points seems to be a good cutoff
if score < 600:
    upright_image = cv2.rotate(test_img, cv2.ROTATE_180)
    upright_name = str(Path(IMG_DIR, "upright.jpg"))

cv2.imwrite(upright_name, upright_image)

True

![](SingleImageProcess/upright.jpg)

## 4. crop off margins

In [7]:
# cropping box to reduce search area for target template
left = 850
top = 710
right = 12560
bottom = 7300

img_path = Path(IMG_DIR, "upright.jpg")
file_name = str(img_path)
img = cv2.imread(file_name, 1) #1 is grayscale enum flag        
cropped_img = img[top:bottom, left:right]

name = str(Path(IMG_DIR, "cropped.jpg"))
cv2.imwrite(name, cropped_img)

True

![](SingleImageProcess/cropped.jpg)

## 5. downscale images

In [8]:
SCALE_PERCENT = 10 # percent of original size

img_path = Path(IMG_DIR, "cropped.jpg")
img_name = str(img_path)
img = cv2.imread(img_name, cv2.IMREAD_UNCHANGED)

# calculate new size
width = int(img.shape[1] * SCALE_PERCENT / 100)
height = int(img.shape[0] * SCALE_PERCENT / 100)
new_size = (width, height)

# resize image
resized = cv2.resize(img, new_size, interpolation = cv2.INTER_AREA)
resized_name = str(Path(IMG_DIR, "resized.jpg"))
cv2.imwrite(resized_name, resized)

print(f"cropped.jpg: {img.shape}, resized.jpg: {resized.shape}")

cropped.jpg: (6590, 11710, 3), resized.jpg: (659, 1171, 3)


![](SingleImageProcess/resized.jpg)

## 6. deskew images

In [9]:
def load(img):
    # load the image from disk
    image = cv2.imread(str(img))

    # convert to grayscale, flip foreground and background
    # foreground is now "white" and the background is "black"
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(gray)

    # threshold the image, setting all foreground pixels to 255 and all background pixels to 0
    return image, cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

def deskew(image, angle):
    # rotate the image to deskew it
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

# load and get threshold
img_path = Path(IMG_DIR, "resized.jpg")
image, thresh = load(img_path)

# grab the (x, y) coordinates of all pixel values that
# are greater than zero, then use these coordinates to
# compute a rotated bounding box that contains all
# coordinates
coords = np.column_stack(np.where(thresh > 0))
angle = cv2.minAreaRect(coords)[-1]

# the `cv2.minAreaRect` function returns values in the
# range [-90, 0); as the rectangle rotates clockwise the
# returned angle trends to 0 -- in this special case we
# need to add 90 degrees to the angle
if angle < -45:
    angle = -(90 + angle)

# otherwise, just take the inverse of the angle to make it positive
else:
    angle = -angle

deskewd = deskew(image, angle)
deskewd_name = str(Path(IMG_DIR, "deskewd.jpg"))
cv2.imwrite(deskewd_name, deskewd)

True

![](SingleImageProcess/deskewd.jpg)

## 7. slice the images into 8 parts

In [10]:
class CroppingTemplate:
    def __init__(self):
        self.image_file = None  # posix path
        self.img = None  # actual image
        self.counter = 1
        self.dest_dir = "./major_pieces"
        
        self.left = 0
        self.x1 = 330
        self.x2 = 470
        self.x3 = 990
        self.right = None  # set in self.load()
        
        self.top = 0
        self.y1 = 110
        self.y2 = 235
        self.y3 = 360
        self.y4 = 510
        self.bottom = None  # set in self.load()

#     def load(self):
#         self.img = Image.open(str(self.image_file))
#         self.right = self.img.size[0]  # image width
#         self.bottom = self.img.size[1]  # image height
        
#         # need right and bottom values before setting the section boxes
#         self.setup_sections()

#     def crop_all(self):
#         for section in self.major_sections:
#             for dir_name, box in section.items():
#                 cropped_img = self.img.crop((box["left"], box["top"], box["right"], box["bottom"]))
#                 file_name = path.join(self.dest_dir, dir_name, str(self.counter)+".jpg")
#                 cropped_img.save(file_name)
#         self.counter += 1
        
        
    # This method was adapted from the original, just for this notebook
    def single_image_process(self):
        self.img = Image.open(str(self.image_file))
        self.right = self.img.size[0]  # image width
        self.bottom = self.img.size[1]  # image height
        self.setup_sections()
        
        # crop out the parts based on the (manually set) template's dimensions
        for section in self.major_sections:
            for dir_name, box in section.items():
                cropped_img = self.img.crop((box["left"], box["top"], box["right"], box["bottom"]))
                file_name = path.join("SingleImageProcess", dir_name+".jpg")
#                 print("file name: ", file_name)
                cropped_img.save(file_name)
        
    def setup_sections(self):
        # these are the box boundaries for each section, manually set
        self.major_sections = [
            {"uppercase": {
                "left": self.left,
                "top": self.top,
                "right": self.right,
                "bottom": self.y1}
            },
            {"lowercase": {
                "left": self.left,
                "top": self.y1,
                "right": self.right,
                "bottom": self.y2}
            },
            {"digits": {
                "left": self.left,
                "top": self.y2,
                "right": self.x2,
                "bottom": self.y3}
            },
            {"punctuation": {
                "left": self.x2,
                "top": self.y2,
                "right": self.right,
                "bottom": self.y3}
            },
            {"sent1": {
                "left": self.left,
                "top": self.y3,
                "right": self.x1,
                "bottom": self.y4}
            },
            {"sent2": {
                "left": self.x2,
                "top": self.y3,
                "right": self.x3,
                "bottom": self.y4}
            },
            {"sent3": {
                "left": self.left,
                "top": self.y4,
                "right": self.x1,
                "bottom": self.bottom}
            },
            {"sent4": {
                "left": self.x2,
                "top": self.y4,
                "right": self.x3,
                "bottom": self.bottom}
            }
        ]

In [11]:
# extract major sections from each image
img_path = Path(IMG_DIR, "deskewd.jpg")
cropper = CroppingTemplate()
cropper.image_file = img_path
cropper.single_image_process()

#### Uppercase
![](SingleImageProcess/uppercase.jpg)

#### Lowercase
![](SingleImageProcess/lowercase.jpg)

#### Digits
![](SingleImageProcess/digits.jpg)

#### Punctuation
![](SingleImageProcess/punctuation.jpg)

#### Sentence 1
![](SingleImageProcess/sent1.jpg)

#### Sentence 2
![](SingleImageProcess/sent2.jpg)

#### Sentence 3
![](SingleImageProcess/sent3.jpg)

#### Sentence 4
![](SingleImageProcess/sent4.jpg)

## 8. extract individual boxes

In [12]:
img_path = Path(IMG_DIR, "uppercase.jpg")
file_name = str(img_path)

# Read image
uppercase_img = cv2.imread(file_name)
gray = cv2.cvtColor(uppercase_img, cv2.COLOR_BGR2GRAY)

# bilateralFilter reduces noise while preserving boundaries
gray = cv2.bilateralFilter(gray, 11, 17, 17)
file_name = str(Path(IMG_DIR, "bilateralFiltered.jpg"))
cv2.imwrite(file_name, gray)

True

#### bilateralFiltered.jpg
![](SingleImageProcess/bilateralFiltered.jpg)

In [13]:
# invert the image for making bounding boxes
inverted = cv2.bitwise_not(gray)
file_name = str(Path(IMG_DIR, "bilateralFilteredInverted.jpg"))
cv2.imwrite(file_name, inverted)

True

#### bilateralFilteredInverted.jpg
![](SingleImageProcess/bilateralFilteredInverted.jpg)

In [14]:
# get edges
# edged = cv2.Canny(gray, 30, 200)
edged = cv2.Canny(inverted, 30, 200)
file_name = str(Path(IMG_DIR, "canny.jpg"))
cv2.imwrite(file_name, edged)

True

#### canny.jpg
![](SingleImageProcess/canny.jpg)

In [15]:
# find contours in the edged image, keep only the largest ones, and initialize our screen contour
contours = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours) # convenience function (really simple, see https://github.com/jrosebr1/imutils/blob/master/imutils/convenience.py)
contours = sorted(contours, key = cv2.contourArea, reverse=True)[:10]
print(f"Contours found: {len(contours)}")
print(f"Length of first contour: {len(contours[0])}")
print(type(contours))

Contours found: 10
Length of first contour: 190
<class 'list'>


In [16]:
screenCnt = None
# loop over our contours
for c in contours:
    # approximate the contour
    epsilon = 0.015 * cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, epsilon, True)
    
    # if our approximated contour has four points, then we can assume that we have found our screen
    if len(approx) == 4:
        screenCnt = approx
        break

In [17]:
# opencv colors
blue = (255, 0, 0)
red = (0, 0, 255)
green = (0, 255, 0)
black = (0, 0, 0)
purple = (255, 0, 255)

In [18]:
# cv2.drawContours(uppercase_img, screenCnt, -1, (0, 255, 0), 3)
boxed_image = cv2.drawContours(uppercase_img, contours, -1, green, 1)
boxed_name = str(Path(IMG_DIR, "boxed.jpg"))
cv2.imwrite(boxed_name, boxed_image)

True

![](SingleImageProcess/boxed.jpg)

# Experimenting with getting individual boxes

In [19]:
# Exploring how to isolate the row of handwritten letters.
# get the minmax of x/y values
# save the first contoured image
# first = contours[0]
# first.shape   # (190, 1, 2)
# first[0]   # array([[233,   6]], dtype=int32)
# first is an array of [[x, y]] points
# first[0][0][0]   # 233
# first[0][0][1]   # 6

coord = lambda point: tuple(point[0][0])  # tuple(x, y)
radius = 10
thickness = -1 # negative for filled circle

# draw a point to see where it is in the image
# image = cv.circle(image, centerOfCircle, radius, color, thickness)
dotted = cv2.circle(boxed_image, coord(contours[0]), radius, blue, thickness)
file_name = str(Path(IMG_DIR, "dotted.jpg"))
cv2.imwrite(file_name, dotted)

True

![](SingleImageProcess/dotted.jpg)

In [20]:
# get minmax of x and y values
x = lambda point: point[0][0][0]
y = lambda point: point[0][0][1]

xs = [x(contour) for contour in contours]
ys = [y(contour) for contour in contours]

x_min = min(xs)
x_max = max(xs)
y_min = min(ys)
y_max = max(ys)
print(f"Xmin {x_min}  Xmax {x_max}  Ymin {y_min}  Ymax {y_max}")

Xmin 10  Xmax 892  Ymin 6  Ymax 51


In [21]:
# Plot the four extremes of the first contour
upper_left = (x_min, y_min)
lower_left = (x_min, y_max)
upper_right = (x_max, y_min)
lower_right = (x_max, y_max)

four_corners = cv2.circle(uppercase_img, upper_left, radius, red, thickness)
four_corners = cv2.circle(uppercase_img, upper_right, radius, green, thickness)
four_corners = cv2.circle(uppercase_img, lower_left, radius, purple, thickness)
four_corners = cv2.circle(uppercase_img, lower_right, radius, black, thickness)
file_name = str(Path(IMG_DIR, "contour1.jpg"))
cv2.imwrite(file_name, four_corners)

True

![](SingleImageProcess/contour1.jpg)

In [22]:
def four_corners(box):
    xs = [el[0][0] for el in box]
    ys = [el[0][1] for el in box]
    x_min = min(xs)
    x_max = max(xs)
    y_min = min(ys)
    y_max = max(ys)
    return (x_min, x_max, y_min, y_max)

def area(corners):
    return (corners[1] - corners[0]) * (corners[3] - corners[2])

boxes = {}
for contour in contours:
    box = contours.index(contour)
    corners = four_corners(contour)
    boxes[box] = {"area": area(corners), "corners": corners}

pprint(boxes)

{0: {'area': 88150, 'corners': (133, 1158, 6, 92)},
 1: {'area': 21746, 'corners': (275, 537, 6, 89)},
 2: {'area': 6825, 'corners': (892, 1067, 50, 89)},
 3: {'area': 7000, 'corners': (892, 1067, 8, 48)},
 4: {'area': 5070, 'corners': (539, 669, 50, 89)},
 5: {'area': 3731, 'corners': (759, 850, 50, 91)},
 6: {'area': 11039, 'corners': (7, 140, 9, 92)},
 7: {'area': 2604, 'corners': (363, 425, 6, 48)},
 8: {'area': 1932, 'corners': (407, 453, 6, 48)},
 9: {'area': 1720, 'corners': (9, 52, 50, 90)}}


  from ipykernel import kernelapp as app


In [23]:
# box with the greatest area
biggest_area = 0
biggest_box = None
for box, values in boxes.items():
    if values["area"] > biggest_area:
        biggest_area = values["area"]
        biggest_box = values
print(biggest_box)

{'area': 88150, 'corners': (133, 1158, 6, 92)}


In [24]:
# crop the image again using the extremes of the largest-area box
corners = biggest_box["corners"]
left = corners[0]
top = corners[2]
right = corners[1]
bottom = corners[3]

# crop_img = img[y:y+h, x:x+w] #opencv's x and y are flipped
canny_cropped = uppercase_img[top:bottom, left:right]
file_name = str(Path(IMG_DIR, "croppedAfterCanny.jpg"))
cv2.imwrite(file_name, canny_cropped)

True

Going by just the biggest area doesn't work as there may be cut off as on the left...

![](SingleImageProcess/croppedAfterCanny.jpg)

In [25]:
# the minmax across all the box corners
# for box, values in boxes.items()
xmin = min([values["corners"][0] for box, values in boxes.items()])
xmax = max([values["corners"][1] for box, values in boxes.items()])
ymin = min([values["corners"][2] for box, values in boxes.items()])
ymax = max([values["corners"][3] for box, values in boxes.items()])

print(xmin, xmax, ymin, ymax)

7 1158 6 92


In [26]:
# load the original image
img_path = Path(IMG_DIR, "uppercase.jpg")
file_name = str(img_path)

# Read image
uppercase_img = cv2.imread(file_name)

# crop the image again using the extremes of all the contours
left = xmin
top = ymin
right = xmax
bottom = ymax

# get the target image from the template
# crop_img = img[y:y+h, x:x+w] #opencv's x and y are flipped
canny_cropped_all_extremes = uppercase_img[top:bottom, left:right]
file_name = str(Path(IMG_DIR, "croppedAfterCanny-allextremes.jpg"))
cv2.imwrite(file_name, canny_cropped_all_extremes)

True

This worked well. Getting the extreme edges using Canny and cropping the image as close as possbile to the boxes

![](SingleImageProcess/croppedAfterCanny-allextremes.jpg)  

Because this is a template image, perhaps just dividing the boxes up can be done much simpler than using opencv directly.

In [27]:
width = canny_cropped_all_extremes.shape[1]
height = canny_cropped_all_extremes.shape[0]

border = 1.5
boxes_wide = 26
rows_high = 2
l_top = round((height/2) + 2* border) # compensate for border
l_bottom = round(height - border)
#L is between 11th and 12th vertical line
l_left = round((width/boxes_wide) * 11 + (11*border))
l_right = round(width/boxes_wide) * 12

# crop out the letter L to test
left = l_left # xmin
top = l_top # ymin
right = l_right # xmax
bottom = l_bottom # ymax

# a single letter
letter_l = uppercase_img[top:bottom, left:right]
file_name = str(Path(IMG_DIR, "letter_l.jpg"))
cv2.imwrite(file_name, letter_l)

True

This is a single letter extracted from the uppercase template...  
![](SingleImageProcess/letter_l.jpg) 