In [87]:
import cv2
import numpy as np
import os
import pytesseract

from ditod import add_vit_config

import torch

from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.data import MetadataCatalog
from detectron2.engine import DefaultPredictor

In [127]:
config = "publaynet_configs/maskrcnn/maskrcnn_dit_base.yaml"
opts = ['MODEL.WEIGHTS', 'https://layoutlm.blob.core.windows.net/dit/dit-fts/publaynet_dit-b_mrcnn.pth']
image = "6.jpg"

In [128]:
def predict(image):
    # Step 1: instantiate config
    cfg = get_cfg()
    add_vit_config(cfg)
    cfg.merge_from_file(config)

    # Step 2: add model weights URL to config
    cfg.merge_from_list(opts)

    # Step 3: set device
    device = "cpu"
    cfg.MODEL.DEVICE = device

    # Step 4: define model
    predictor = DefaultPredictor(cfg)
    # Step 5: run inference
    img = cv2.imread(image)

    md = MetadataCatalog.get(cfg.DATASETS.TEST[0])
    if cfg.DATASETS.TEST[0]=='icdar2019_test':
        md.set(thing_classes=["table"])
    else:
        md.set(thing_classes=["text","title","list","table","figure"])
        
    output = predictor(img)["instances"]
        
    v = Visualizer(img[:, :, ::-1],
                md,
                scale=1.0,
                instance_mode=ColorMode.SEGMENTATION)
    result = v.draw_instance_predictions(output.to("cpu"))
    result_image = result.get_image()[:, :, ::-1]
    
    return img, result_image, output.to("cpu")

In [129]:
img, result_img, output = predict(image)

  "See the documentation of nn.Upsample for details.".format(mode)


In [130]:
cv2.imwrite("out.jpg", result_img)

True

In [131]:
print(output)

Instances(num_instances=10, image_height=3508, image_width=2481, fields=[pred_boxes: Boxes(tensor([[ 295.4206, 1371.1492, 2176.1472, 1793.8269],
        [ 291.8101, 2378.4170, 2068.4028, 2428.4456],
        [ 295.4668,  924.6285, 2014.5366, 1042.3282],
        [ 471.4626,  446.6908, 2024.2179,  609.0714],
        [ 519.3795, 1224.2454, 1273.1481, 1301.1660],
        [ 299.2503,  711.0563, 1291.8029,  767.3207],
        [ 294.8022, 2080.5405, 1418.4680, 2135.6948],
        [ 282.0891, 2166.0715, 2155.0469, 2345.7046],
        [ 517.4215, 1933.5024, 1291.9512, 2007.4747],
        [ 520.6734, 1934.8213, 1292.9557, 2007.6071]])), scores: tensor([0.9955, 0.9887, 0.9836, 0.9764, 0.9681, 0.9577, 0.9507, 0.9502, 0.9118,
        0.0542]), pred_classes: tensor([0, 0, 0, 1, 1, 0, 0, 0, 1, 0]), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
   

In [132]:
def sort_index(instance):
    bbox = instance.pred_boxes.tensor
    sorting = sorted(range(bbox.size()[0]), key=lambda k: bbox[k][1].numpy())
    return sorting


In [133]:
def sort_tensor(tensor, sort_mask):
    return tensor[sort_mask]

In [134]:
def sort(instance):
    sort_mask = sort_index(instance)
    # sort pred_boxes
    instance.pred_boxes.tensor = sort_tensor(instance.pred_boxes.tensor, sort_mask)
    # sort score
    instance.scores = sort_tensor(instance.scores, sort_mask)
    # sort pred_classes
    instance.pred_classes = sort_tensor(instance.pred_classes, sort_mask)
    # sort pred_masks
    instance.pred_masks = sort_tensor(instance.pred_masks, sort_mask)
    return instance

def get_remove_mask(instance, conf):
    scores = instance.scores
    out_mask = []
    for idx,score in enumerate(scores):
        if score >= conf:
            out_mask.append(idx)
    return out_mask

def filter_tensor(tensor, mask):
    return tensor[mask]

def remove_box_lower_than(instance, conf):
    mask = get_remove_mask(instance, conf)
    # sort pred_boxes
    instance.pred_boxes.tensor = filter_tensor(instance.pred_boxes.tensor, mask)
    # sort score
    instance.scores = filter_tensor(instance.scores, mask)
    # sort pred_classes
    instance.pred_classes = filter_tensor(instance.pred_classes, mask)
    # sort pred_masks
    instance.pred_masks = filter_tensor(instance.pred_masks, mask)

    return instance

In [135]:
output = sort(output)
output = remove_box_lower_than(output, 0.85)

boxes = output.to("cpu").pred_boxes if output.to("cpu").has("pred_boxes") else None
scores = output.to("cpu").scores if output.to("cpu").has("scores") else None
classes = output.to("cpu").pred_classes.tolist() if output.to("cpu").has("pred_classes") else None
class_list = ["text","title","list","table","figure"]

print(boxes)
print(scores)
print(classes)

Boxes(tensor([[ 471.4626,  446.6908, 2024.2179,  609.0714],
        [ 299.2503,  711.0563, 1291.8029,  767.3207],
        [ 295.4668,  924.6285, 2014.5366, 1042.3282],
        [ 519.3795, 1224.2454, 1273.1481, 1301.1660],
        [ 295.4206, 1371.1492, 2176.1472, 1793.8269],
        [ 517.4215, 1933.5024, 1291.9512, 2007.4747],
        [ 294.8022, 2080.5405, 1418.4680, 2135.6948],
        [ 282.0891, 2166.0715, 2155.0469, 2345.7046],
        [ 291.8101, 2378.4170, 2068.4028, 2428.4456]]))
tensor([0.9764, 0.9577, 0.9836, 0.9681, 0.9955, 0.9118, 0.9507, 0.9502, 0.9887])
[1, 0, 0, 1, 0, 1, 0, 0, 0]


In [136]:
ocr_output=[]

def add_padding(input_img, padding, color):
    old_image_height, old_image_width, channels = input_img.shape

    # create new image of desired size and color (blue) for padding
    new_image_width = old_image_width + 2 * padding
    new_image_height = old_image_height + 2 * padding
    result = np.full((new_image_height,new_image_width, channels), color, dtype=np.uint8)

    # compute center offset
    x_center = (new_image_width - old_image_width) // 2
    y_center = (new_image_height - old_image_height) // 2

    # copy img image into center of result image
    result[y_center:y_center+old_image_height,
           x_center:x_center+old_image_width] = input_img
    return result

def ocr(img_cv, idx):
    img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)
    ocr_output.append({
        "class": class_list[classes[idx]],
        "text": pytesseract.image_to_string(img_rgb)
    })

def crop_and_save_image(idx, input_img, bbox, filename, padding=25, color=(255,255,255)):
    height, width, channels = input_img.shape
    x1 = int(bbox[0])
    x2 = int(bbox[2])
    y1 = int(bbox[1])
    y2 = int(bbox[3])
    cropped_image = input_img[y1:y2, x1:x2]
    padded_image = add_padding(cropped_image, padding, color)
    ocr(padded_image, idx)
    cv2.imwrite(filename, padded_image)

def format_output():
    print(ocr_output)

out_folder = "out"
try:
    os.mkdir(out_folder)
except:
    pass

img = cv2.imread(image)

for idx, box in enumerate(boxes):
    crop_and_save_image(idx, img, box, os.path.join(out_folder, str(idx) + ".jpg"), 25)

format_output()

[{'class': 'title', 'text': 'A study to test whether different doses of BI 655064 help\npeople with active lupus nephritis\n'}, {'class': 'text', 'text': 'This is a summary of results from 1 clinical study.\n'}, {'class': 'text', 'text': 'We thank all study participants. You helped us to answer important questions about\nBI 655064 and the treatment of lupus nephritis.\n'}, {'class': 'title', 'text': 'What was this study about?\n'}, {'class': 'text', 'text': 'The purpose of this study was to find out whether a medicine called Bl 655064 helps people\nwith lupus nephritis. Lupus nephritis is kidney inflammation caused by the autoimmune\ndisease lupus. The inflammation can be severe, leading to loss of kidney function. New\ntreatments are needed for this condition. Bl 655064 is a medicine that is being developed to\ntreat people with autoimmune disorders. When we develop a new medicine, we need to\nmake sure it works. We wanted to see if different doses of BI 655064 help improve kidney\nfu

In [137]:
print(ocr_output)

[{'class': 'title', 'text': 'A study to test whether different doses of BI 655064 help\npeople with active lupus nephritis\n'}, {'class': 'text', 'text': 'This is a summary of results from 1 clinical study.\n'}, {'class': 'text', 'text': 'We thank all study participants. You helped us to answer important questions about\nBI 655064 and the treatment of lupus nephritis.\n'}, {'class': 'title', 'text': 'What was this study about?\n'}, {'class': 'text', 'text': 'The purpose of this study was to find out whether a medicine called Bl 655064 helps people\nwith lupus nephritis. Lupus nephritis is kidney inflammation caused by the autoimmune\ndisease lupus. The inflammation can be severe, leading to loss of kidney function. New\ntreatments are needed for this condition. Bl 655064 is a medicine that is being developed to\ntreat people with autoimmune disorders. When we develop a new medicine, we need to\nmake sure it works. We wanted to see if different doses of BI 655064 help improve kidney\nfu

In [138]:
final = {}
title_index = 0

for idx, item in enumerate(ocr_output):
    if (item['class'] == 'title'):
        final[f"header{title_index}"] = {}
        final[f"header{title_index}"]["title"] = item["text"]
        final[f"header{title_index}"]["body"] = []
        title_index += 1
    else:
        if (title_index == 0):
            final[f"header{title_index}"] = {}
            final[f"header{title_index}"]["title"] = ""
            final[f"header{title_index}"]["body"] = []
            final[f"header{title_index}"]["body"].append(item["text"])
        else:
            final[f"header{(title_index - 1)}"]["body"].append(item["text"])


In [139]:
print(final)

{'header0': {'title': 'A study to test whether different doses of BI 655064 help\npeople with active lupus nephritis\n', 'body': ['This is a summary of results from 1 clinical study.\n', 'We thank all study participants. You helped us to answer important questions about\nBI 655064 and the treatment of lupus nephritis.\n']}, 'header1': {'title': 'What was this study about?\n', 'body': ['The purpose of this study was to find out whether a medicine called Bl 655064 helps people\nwith lupus nephritis. Lupus nephritis is kidney inflammation caused by the autoimmune\ndisease lupus. The inflammation can be severe, leading to loss of kidney function. New\ntreatments are needed for this condition. Bl 655064 is a medicine that is being developed to\ntreat people with autoimmune disorders. When we develop a new medicine, we need to\nmake sure it works. We wanted to see if different doses of BI 655064 help improve kidney\nfunction in people with lupus nephritis.\n']}, 'header2': {'title': 'Who too