# Goal
slice the images into 8 parts based on a template, save them into 8 different dirs

# Details
1. all of them should be aligned within a tolerance that prevents cutting off important data
2. The 8 categories are:
    * Captial letters
    * Lowercase letters
    * Digits
    * Punctuation
    * Sentence 1
    * Sentence 2
    * Sentence 3
    * Sentence 4

In [1]:
from PIL import Image
from pathlib import Path
from itertools import tee
from os import path
import cv2
import numpy as np

In [2]:
class CroppingTemplate:
    def __init__(self):
        self.image_file = None  # posix path
        self.img = None  # actual image
        self.counter = 1
        self.dest_dir = "./major_pieces"
        
        self.left = 0
        self.x1 = 330
        self.x2 = 470
        self.x3 = 990
        self.right = None  # set in self.load()
        
        self.top = 0
        self.y1 = 110
        self.y2 = 235
        self.y3 = 360
        self.y4 = 510
        self.bottom = None  # set in self.load()

    def load(self):
        self.img = Image.open(str(self.image_file))
        self.right = self.img.size[0]  # image width
        self.bottom = self.img.size[1]  # image height
        
        # need right and bottom values before setting the section boxes
        self.setup_sections()

    def crop_all(self):
        for section in self.major_sections:
            for dir_name, box in section.items():
                cropped_img = self.img.crop((box["left"], box["top"], box["right"], box["bottom"]))
                file_name = path.join(self.dest_dir, dir_name, str(self.counter)+".jpg")
                cropped_img.save(file_name)
        print("Finished: ", self.counter, self.image_file)
        self.counter += 1
        
    def setup_sections(self):
        # these are the box boundaries for each section
        self.major_sections = [
            {"uppercase": {
                "left": self.left,
                "top": self.top,
                "right": self.right,
                "bottom": self.y1}
            },
            {"lowercase": {
                "left": self.left,
                "top": self.y1,
                "right": self.right,
                "bottom": self.y2}
            },
            {"digits": {
                "left": self.left,
                "top": self.y2,
                "right": self.x2,
                "bottom": self.y3}
            },
            {"punctuation": {
                "left": self.x2,
                "top": self.y2,
                "right": self.right,
                "bottom": self.y3}
            },
            {"sent1": {
                "left": self.left,
                "top": self.y3,
                "right": self.x1,
                "bottom": self.y4}
            },
            {"sent2": {
                "left": self.x2,
                "top": self.y3,
                "right": self.x3,
                "bottom": self.y4}
            },
            {"sent3": {
                "left": self.left,
                "top": self.y4,
                "right": self.x1,
                "bottom": self.bottom}
            },
            {"sent4": {
                "left": self.x2,
                "top": self.y4,
                "right": self.x3,
                "bottom": self.bottom}
            }
        ]

In [3]:
IMG_SRC_DIR = "./jpg"

# collect image paths
cropper = CroppingTemplate()
gen, image_paths = tee(Path(IMG_SRC_DIR).iterdir())
for img_path in image_paths:
    if img_path.suffix == ".jpg":
        
        # extract major sections from each image
        cropper.image_file = img_path
        cropper.load()
        cropper.crop_all()

Finished:  1 jpg/alphabet00794.jpg
Finished:  2 jpg/alphabet00793.jpg
Finished:  3 jpg/alphabet00787.jpg
Finished:  4 jpg/alphabet00368.jpg
Finished:  5 jpg/alphabet00369.jpg
Finished:  6 jpg/alphabet00786.jpg
Finished:  7 jpg/alphabet00792.jpg
Finished:  8 jpg/alphabet00009.jpg
Finished:  9 jpg/alphabet00790.jpg
Finished:  10 jpg/alphabet00791.jpg
Finished:  11 jpg/alphabet00785.jpg
Finished:  12 jpg/alphabet00008.jpg
Finished:  13 jpg/alphabet00005.jpg
Finished:  14 jpg/alphabet00788.jpg
Finished:  15 jpg/alphabet00367.jpg
Finished:  16 jpg/alphabet00366.jpg
Finished:  17 jpg/alphabet00789.jpg
Finished:  18 jpg/alphabet00010.jpg
Finished:  19 jpg/alphabet00004.jpg
Finished:  20 jpg/alphabet00006.jpg
Finished:  21 jpg/alphabet00364.jpg
Finished:  22 jpg/alphabet00365.jpg
Finished:  23 jpg/alphabet00007.jpg
Finished:  24 jpg/alphabet00003.jpg
Finished:  25 jpg/alphabet00361.jpg
Finished:  26 jpg/alphabet00360.jpg
Finished:  27 jpg/alphabet00002.jpg
Finished:  28 jpg/alphabet00362.jpg
F