In [1]:
pip install myocr

Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install pdf2image pillow

In [None]:
from myocr import Project
debug=False
project_name ='demo_project'
pdf_path = "demo_input.pdf"
my_class={
    0:'miss',
    1:'check',
    2:'cancel'
}
p=Project(project_name,my_class)



# 1.Convert PDF to images

In [None]:
from pdf2image import convert_from_path
import os

def pdf_to_images(pdf_path, output_folder, dpi=300):
    """
    Convert each page of a PDF into separate image files.

    Parameters:
    - pdf_path: str, path to the input PDF file.
    - output_folder: str, directory where the images should be saved.
    - dpi: int, optional, DPI setting for the output images.

    Returns:
    - A list of paths to the saved image files.
    """
    # Ensure the output directory exists
    os.makedirs(output_folder, exist_ok=True)

    # Convert PDF pages to images
    images = convert_from_path(pdf_path, dpi=dpi)

    # Save each page as an image
    image_paths = []
    for i, image in enumerate(images):
        image_path = os.path.join(output_folder, f"page{i + 1:03d}.png")
        image.save(image_path, "PNG")
        image_paths.append(image_path)
    
    return image_paths

# Example Usage
# pdf_path = "dsi310_2024_raw.pdf"
output_folder = p.landing_dir
image_paths = pdf_to_images(pdf_path, output_folder)

# Print the saved image paths
for img_path in image_paths:
    print(f"Saved: {img_path}")


# 2.Registaer all images

In [None]:
ref_path='ref.png'
reference_image = p.set_reference_image('ref.png')
p.register_image(debug)

# 3.Create BBox template

In [None]:
# Define the bounding box dimensions
box_width = 33.5*2  # approximate width of each choice box
box_height = 20.8*2  # approximate height of each question row

# Coordinates for the left column (questions 1-30)
left_column_start_x = 145*2
left_column_start_y = 156*2

# Coordinates for the right column (questions 31-60)
right_column_start_x = 386*2
right_column_start_y = 156*2

all_bbox=[]   
    
# Draw bounding boxes for the left column (questions 1-30)
for i in range(30):
    for c, choice in enumerate(['a', 'b', 'c', 'd']):
        q=i+1
        x = left_column_start_x + c * box_width  # Each choice moves right by box_width
        y = left_column_start_y + i * box_height  # Each question moves down by box_height
        d={
            'q':q,
            'c':c,
            'x':x,
            'y':y,
            'box_width': box_width, 
            'box_height': box_height
        }
        all_bbox.append(d)
        # print(d)
        # save_cropped_bbox(image,sheet_name, q + 1, choice, x, y, box_width, box_height)

# Draw bounding boxes for the right column (questions 31-60)
for i in range(30):
    for c, choice in enumerate(['a', 'b', 'c', 'd']):
        q=i+31
        x = right_column_start_x + c * box_width  # Each choice moves right by box_width
        y = right_column_start_y + i* box_height  # Each question moves down by box_height
        d={
            'q':q,
            'c':c,
            'x':x,
            'y':y,
            'box_width': box_width, 
            'box_height': box_height
        }
        all_bbox.append(d)
        # print(d)
        # save_cropped_bbox(image,sheet_name, q + 1, choice, x, y, box_width, box_height)
    
p.draw_all_bbox(reference_image,all_bbox)

In [None]:
p.extract_bb(all_bbox, debug)

# 4.Forword

In [None]:
import torchvision.transforms as transforms
from myocr import ImageClassifier
from pathlib import Path
import pandas as pd

num_classes=len(p.my_class)

input_image_size = (32, 32)
transform = transforms.Compose([
    transforms.Resize(input_image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
model_name='lenet'
classifier = ImageClassifier(model_name, num_classes, input_image_size, transform)

pre_train = '_lenet_13200_100.pth'
if Path(pre_train).exists():
    classifier.load(pre_train)

def as_series(func):
    def wrapper(*args, **kwargs):
        result = func(*args, **kwargs)
        return pd.Series(result)
    return wrapper
    
@as_series
def forward(image):
    class_id, confidence = classifier.predict(image)
    return class_id, confidence

# Apply the function to each row and create 'y1' and 'y2' columns
df = p.get_input_df()
df[['predict', 'confidence']] = df.apply(lambda row: forward(row['PIL_image']), axis=1)
df.info()
df.head()

In [None]:
df2=df[['sheet','question','choice','predict','confidence']].copy()
df2=df2.astype({'question':'int64'})
df2.info()
df2.head()

In [None]:
# df2[(df2['sheet']=='page047') & (df2['question']==52) & (df2['choice']==0)].iloc[0,-2]

# 5.Render Result

In [None]:
# all_bbox

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

print('Rendering Results:')
image_files = p.find_all_images(p.staging_dir)
result_dir = (p.project_dir/f'_result').resolve()
result_dir.mkdir(parents=True, exist_ok=True)

for i,image in enumerate(image_files):
    sheet_all_bbox=[]
    image_path = Path(p.staging_dir)/image
    sheet_name = image.split('.')[0] 
    print("image_path: ",image_path)
    print("sheet_name: ",sheet_name)
    image = Image.open(image_path)

    fig, ax = plt.subplots(figsize=(10, 15))
    for i in all_bbox:
        # self.draw_bbox(ax, **i)
        q, c, x, y, box_width, box_height = i['q'], i['c'], i['x'], i['y'], i['box_width'], i['box_height']
        is_check = df2[(df2['sheet']==sheet_name) & (df2['question']==q) & (df2['choice']==str(c))].iloc[0,-2]==1
        if is_check:
        # label = f'{q},{c}'
            rect = patches.Rectangle((x, y), box_width, box_height, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
        # ax.text(x + box_width / 2, y + box_height / 2, label, fontsize=8, color='blue', ha='center', va='center')
    # Display the image
    ax.imshow(image)
    # Show the result with bounding boxes
    plt.axis('off')  # Hide the axes for better visualization
    # plt.show()
    
    plt.savefig(result_dir/f'{sheet_name}.png', format='png', dpi=80, bbox_inches='tight')
plt.close()

# 6.Export File

In [None]:
def get_ans(c1,c2,c3,c4):
    a=(c1==1)
    b=(c2==1)
    c=(c3==1)
    d=(c4==1)
    if (a) and (not b) and (not c) and (not d):
        return 'a'
    elif (not a) and (b) and (not c) and (not d):
        return 'b'
    elif (not a) and (not b) and (c) and (not d):
        return 'c'
    elif (not a) and (not b) and (not c) and (d):
        return 'd'
    else:
        return ''
        
df_pivot = pd.pivot_table(
    df2.reset_index(),
    index=['sheet', 'question'],
    columns=['choice'],
    aggfunc={'predict': "mean",'confidence': "mean"})
# df_pivot.info()
# df_pivot

# Flattening multi-level columns in df_pivot
df_pivot.columns = ['_'.join(col).strip() for col in df_pivot.columns.values]
df3 = df_pivot.reset_index()

# df3.info()
# df3

df4=df3.copy()
df4['ans']=df4.apply(lambda row: get_ans(row['predict_0'],row['predict_1'],row['predict_2'],row['predict_3']), axis=1)
df4.info()
df4.to_csv( result_dir/(f'result_{p.project_name}.csv'))
df4

# Retrain the model
## Create Label Tasks

In [None]:
p.connect_label_studio()
p.create_task(forward)

## Train

In [None]:
df_complete_task=p.lb.fetch_labels_from_label_studio()
# df_complete_task['image_file']=df_complete_task['image_url'].apply(lambda x: x.replace(lb.IMAGE_SERVER_URL+project_name+'/',''))
df_complete_task.info()
df_complete_task.head()

In [None]:
import json
df_complete_task[[ 'class_name']].value_counts().plot(kind='bar')
columns=['image_file','class_id']
annotation = {
    'annotations':df_complete_task[columns].to_dict('records'),
    'class_names':my_class
}
with open('annotations.json', 'w') as json_file:
    json.dump(annotation, json_file, indent=4)

print("Annotations saved to 'annotations.json'.")

In [None]:
from myocr import load_data
# Example Usage
annotations_file = 'annotations.json'  # Path to the JSON file with annotations
img_dir = p.cropped_dir  # Directory where the images are stored


augmentations = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)
])

# Load the data with a custom train-test split, random seed, and data augmentation
train_loader, test_loader = load_data(annotations_file, img_dir, transform, augmentations, train_ratio=0.8, random_seed=42, batch_size=32)

# Train the model
classifier.train(train_loader, epochs=100)

# Evaluate the model
classifier.evaluate(test_loader)

# Save the model
classifier.save('lenet_retrained.pth')