# Goal
slice the images into 8 parts based on a template, save them into 8 different dirs

# Details
1. all of them should be aligned within a tolerance that prevents cutting off important data
2. The 8 categories are:
    * Captial letters
    * Lowercase letters
    * Digits
    * Punctuation
    * Sentence 1
    * Sentence 2
    * Sentence 3
    * Sentence 4

In [1]:
from PIL import Image
from pathlib import Path
from itertools import tee
from os import path
import cv2
import numpy as np

In [2]:
class CroppingTemplate:
    def __init__(self):
        self.image_file = None  # posix path
        self.img = None
        self.counter = 1
        self.dest_dir = "./major_pieces"
        
        # these are the box boundaries for each section
        self.major_sections = [
            {"uppercase": {
                "left": 720,
                "top": 700,
                "right": 12560,
                "bottom": 1900}
            },
            {"lowercase": {
                "left": 720,
                "top": 1900,
                "right": 12560,
                "bottom": 3100}
            },
            {"digits": {
                "left": 720,
                "top": 3100,
                "right": 5520,
                "bottom": 4360}
            },
            {"punctuation": {
                "left": 5520,
                "top": 3100,
                "right": 12560,
                "bottom": 4360}
            },
            {"sent1": {
                "left": 720,
                "top": 4360,
                "right": 4140,
                "bottom": 5860}
            },
            {"sent2": {
                "left": 5520,
                "top": 4360,
                "right": 10800,
                "bottom": 5860}
            },
            {"sent3": {
                "left": 720,
                "top": 5860,
                "right": 4140,
                "bottom": 7300}
            },
            {"sent4": {
                "left": 5520,
                "top": 5860,
                "right": 10800,
                "bottom": 7300}
            }
        ]

    def load(self):
        self.img = Image.open(str(self.image_file))

    def crop_all2(self):
        for section in self.major_sections:
            for dir_name, box in section.items():
                cropped_img = self.img.crop((box["left"], box["top"], box["right"], box["bottom"]))
                file_name = path.join(self.dest_dir, dir_name, str(self.counter)+".jpg")
                cropped_img.save(file_name)
        print("Finished: ", self.counter)
        self.counter += 1

In [3]:
rotated = "./rotated"

# collect image paths
cropper = CroppingTemplate()
gen, image_paths = tee(Path(rotated).iterdir())
for img_path in image_paths:
    if img_path.suffix == ".jpg":
        
        # extract major sections from each image
        cropper.image_file = img_path
        cropper.load()
        cropper.crop_all2()



Finished:  2
Finished:  3
Finished:  4
Finished:  5
Finished:  6
Finished:  7
Finished:  8
Finished:  9
Finished:  10
Finished:  11
Finished:  12
Finished:  13
Finished:  14
Finished:  15
Finished:  16
Finished:  17
Finished:  18
Finished:  19
Finished:  20
Finished:  21
Finished:  22
Finished:  23
Finished:  24
Finished:  25
Finished:  26
Finished:  27
Finished:  28
Finished:  29
Finished:  30
Finished:  31
