In [None]:
import argparse
import os
import sys
sys.path.append('/users/vesalaia/pipelinv2')
sys.path.append("/users/vesalaia/.local/lib/python3.9/site-packages/bin")
sys.path.append("/users/vesalaia/.local/lib/python3.9/site-packages/lib/python3.9/site-packages")

In [None]:
import logging

import datetime
now = datetime.datetime.now()

timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
log_file_name = f"table_recognition_{timestamp}.log"

logging.basicConfig(
    filename=log_file_name,
    level=logging.INFO,
    format='[%(asctime)s] %(levelname)s - %(message)s',
    datefmt='%H:%M:%S'
)


In [None]:
from config.options import Options

from data.dataset import OCRDatasetInstanceSeg
from pipeline.engine import initFolder, extractText, pipelineTask
from utils.config_check import configuration_ok

In [None]:
def str2bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        logging.error('A serious error occurred. Boolean value expected.')
        raise argparse.ArgumentTypeError('Boolean value expected.')

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def crop_polygon_from_image(image, polygon):
    mask = np.zeros_like(image)
    cv2.fillPoly(mask, [polygon], (255, 255, 255))
    cropped_image = cv2.bitwise_and(image, mask)
    x, y, w, h = cv2.boundingRect(polygon)
    cropped_image = cropped_image[y:y+h, x:x+w]
    return cropped_image


In [None]:
import cv2
import numpy as np
import random

# Function to generate n random colors
def generate_random_colors(n_colors=20):
    random.seed(42)  # Set a seed for reproducibility (optional)
    colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(n_colors)]
    return colors


def draw_polygons_on_image(image, polygons):
    # Copy the original image to avoid modifying it
    output_image = image.copy()

    # Define a list of random colors (you can have fewer colors than polygons)
    colors = generate_random_colors(20)

    # Loop over each polygon and draw it on the image
    for i, polygon in enumerate(polygons):
        # Select a color from the list (cycling through the list if needed)
        color = colors[i % len(colors)]

        # Draw the polygon
        cv2.polylines(output_image, [polygon], isClosed=True, color=color, thickness=2)

        # Fill the polygon with some transparency
        overlay = output_image.copy()
        cv2.fillPoly(overlay, [polygon], color=color)
        alpha = 0.2  # Transparency factor
        cv2.addWeighted(overlay, alpha, output_image, 1 - alpha, 0, output_image)

    return output_image



In [None]:
"""
Tasks:
    init:
    region:
    line:
    table:
    recognize:
    update:
    text:
    json:
"""

In [None]:
def processTask(cfgfile, executeTask, infolder, inpage, outpage):
    logging.info(f"Configuration file: {cfgfile}")
    if cfgfile != None:
        opts =  Options(cfgfile)

    if configuration_ok(opts, executeTask.lower()):    
        if executeTask.lower() in ["init", "i"]:
            executeTask = "init"
            if outpage == "":
                outpage = "page"
            if infolder != "":
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-output: {outpage}")
                initFolder(opts, infolder, outpage) 
        elif executeTask.lower() in ["text", "json", "csv"]:
            executeTask = executeTask.lower()
            if inpage == "":
                inpage = "pageText"
            if outpage == "":
                outpage = "text"
            if infolder != "":
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                pipelineTask(opts, executeTask, infolder, inpage, outpage) 
        elif executeTask.lower() in ["detectregion", "dr", "region"]:
            executeTask = "region"
            if inpage == "":
                inpage = "page"
            if outpage == "":
                outpage = "pageRD"
            if infolder != "":
                RO_groups = opts.RO_region_groups
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                pipelineTask(opts, executeTask, infolder, inpage=inpage, outpage=outpage, tryMerge=tryMerge, 
                             reading_order=reading_order)
        elif executeTask.lower() in ["detectlines", "dl", "line"]:
            executeTask = "line"
            if inpage == "":
                inpage = "pageRD"
            if outpage == "":
                outpage = "pageLD"
            if infolder != "":
                RO_groups = opts.RO_region_groups
                one_page_per_image = True
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                pipelineTask(opts, executeTask, infolder, inpage=inpage, outpage=outpage, tryMerge=tryMerge,
                            reading_order=reading_order, line_model="mask r-cnn")
        elif executeTask.lower() in ["recognizetext", "rt", "recognize"]:
            executeTask = "recognize"
            if inpage == "":
                inpage = "pageLD"
            if outpage == "":
                outpage = "pageText"
            if infolder != None:
                RO_groups = opts.RO_region_groups
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                pipelineTask(opts, executeTask, infolder, inpage=inpage, outpage=outpage, 
                             reading_order=reading_order)
        elif executeTask.lower() in ["update", "u"]:
            executeTask = "update"
            if inpage == "":
                inpage = "pageLD"
            if outpage == "":
                outpage = "pageU"
                if infolder != "":
                    RO_groups = opts.RO_line_groups
                    logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                    pipelineTask(opts, executeTask, infolder, inpage=inpage, outpage=outpage, 
                             reading_order=reading_order, combine=combine)
        elif executeTask.lower() in ["table", "t"]:
            executeTask = "table"
            if inpage == "":
                inpage = "page"
            if outpage == "":
                outpage = "pageTbl"
            if infolder != "":
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                pipelineTask(opts, executeTask, infolder, inpage=inpage, outpage=outpage, 
                             reading_order=False, combine=False)
        elif executeTask.lower() in ["cell", "c"]:
            executeTask = "cell"
            if inpage == "":
                inpage = "pageTbl"
            if outpage == "":
                outpage = "pageCell"
            if infolder != "":
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                pipelineTask(opts, executeTask, infolder, inpage=inpage, outpage=outpage, 
                             reading_order=False, combine=False)
        elif executeTask.lower() in ["cellrecognize", "cr"]:
            executeTask = "cellrecognize"
            if inpage == "":
                inpage = "pageCell"
            if outpage == "":
                outpage = "pageText"
            if infolder != None:
                logging.info(f"Task:{executeTask} Folder:{infolder} XML-input:{inpage} XML-output: {outpage}")
                pipelineTask(opts, executeTask, infolder, inpage=inpage, outpage=outpage)
        else:
            logging.error(f"Task not recognized: {executeTask}")


In [None]:
cfgfile = "/users/vesalaia/config/config_trocr.ini"
opts = Options(cfgfile)

In [None]:
print(opts.installed_pckgs)
configuration_ok(opts, "cellrecognize")


In [None]:
dataset_files = [["/scratch/project_2005488/Muutto/Official/debug",
"/scratch/project_2005488/Muutto/Official/debug/page"]]
dataset = OCRDatasetInstanceSeg(dataset_files, {})
print(dataset.__len__())

In [None]:
dataset_files_train = [["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample_all_printed",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample13-all-printed",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample13-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample_all_printed_2a",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample2a-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample12-all-printed",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample12-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample11-all-printed",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample11-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample10-all-printed",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample10-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample_all_printed_2c",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample2c-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample99-all-printed",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample99-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample9-all-printed",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample9-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/printed/images/sample_all_printed_2b",
"/scratch/project_2005072/moving_records_htr/training-set/printed/xml/sample2b-all-printed-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/handdrawn/images/man-ds-sample3-all-handdrawn",
"/scratch/project_2005072/moving_records_htr/training-set/handdrawn/xml/man-ds-sample3-all-handdrawn"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/handdrawn/images/sample1-all-handdrawn",
"/scratch/project_2005072/moving_records_htr/training-set/handdrawn/xml/sample1-all-handdrawn-xml"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/handdrawn/images/man-ds-sample4-all-handdrawn",
"/scratch/project_2005072/moving_records_htr/training-set/handdrawn/xml/man-ds-sample4-all-handdrawn"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/handdrawn/images/man-ds-sample2-all-handdrawn",
"/scratch/project_2005072/moving_records_htr/training-set/handdrawn/xml/man-ds-sample2-all-handdrawn"],
                       
["/scratch/project_2005072/moving_records_htr/training-set/handdrawn/images/man-ds-sample5-all-handdrawn",
"/scratch/project_2005072/moving_records_htr/training-set/handdrawn/xml/man-ds-sample5-all-handdrawn"]]


dataset_files_test =[["/scratch/project_2005072/moving_records_htr/test-set/printed/images/man-ds-test2-all-printed",
"/scratch/project_2005072/moving_records_htr/test-set/printed/xml/man-ds-test2-all-printed"],
                     
["/scratch/project_2005072/moving_records_htr/test-set/handdrawn/images/man-ds-test2-all-handdrawn",
"/scratch/project_2005072/moving_records_htr/test-set/handdrawn/xml/man-ds-test2-all-handdrawn"]]



dataset_files_val =[["/scratch/project_2005072/moving_records_htr/development-set/printed/images/man-ds-test1-all-printed",
"/scratch/project_2005072/moving_records_htr/development-set/printed/xml/man-ds-test1-all-printed"],
                    
["/scratch/project_2005072/moving_records_htr/development-set/handdrawn/images/man-ds-test1-all-handdrawn",
"/scratch/project_2005072/moving_records_htr/development-set/handdrawn/xml/man-ds-test1-all-handdrawn"]]

dataset_train = OCRDatasetInstanceSeg(dataset_files_train, {})
print(dataset_train.__len__())

dataset_val = OCRDatasetInstanceSeg(dataset_files_val, {})
print(dataset_val.__len__())

dataset_test = OCRDatasetInstanceSeg(dataset_files_test, {})
print(dataset_test.__len__())

In [None]:
fodataset_train.imgs


In [None]:
def check_base_names(file1, file2):
    # Get the base names without extensions
    base1 = os.path.splitext(os.path.basename(file1))[0]
    base2 = os.path.splitext(os.path.basename(file2))[0]

    # Check if they are the same
    return base1 == base2

In [None]:
for idx in range(dataset_train.__len__()):
    x = dataset_train.xmls[idx] 
    i = dataset_train.imgs[idx] 
    if not check_base_names(x, i):
       print(f"XML: {x} image: {i} do not match")

In [None]:
infolder = "/scratch/project_2005488/Muutto/auto-deskewed/end-to-end-printed" 
inpage = "pageTbl2"
dataset_files = [[infolder, os.path.join(infolder, inpage)]]
dataset = OCRDatasetInstanceSeg(dataset_files, {})
print(dataset.__len__())

In [None]:
def get_polygons(page, ptype):
    polygons = []
    for reg in page['regions']:
        if ptype == "table":
            polygons.append(reg['polygon'])
        if 'table' in reg:
            for cell in reg['table']:
                if ptype == "cell":
                    polygons.append(cell['polygon'])
                if 'lines' in cell:
                    for line in cell['lines']:
                        if ptype == "line":
                            polygons.append(line['Textline'])
    return polygons

In [None]:
def get_polygons_of_nth_table(page, ptype, ntable):
    polygons = []
    reg = page['regions'][n]
    if ptype == "table":
       polygons.append(reg['polygon'])
    if 'table' in reg:
        for cell in reg['table']:
            if ptype == "cell":
                polygons.append(cell['polygon'])
            if 'lines' in cell:
                for line in cell['lines']:
                    if ptype == "line":
                        polygons.append(line['Textline'])
    return polygons

In [None]:
vizpage = "viz2"
viz_items = random.sample([x for x in range(dataset.__len__())], 2)
for idx in viz_items:
    infile = dataset.__getfullname__(idx)
    print(infile)
    fname = os.path.basename(infile)
    vizdir = os.path.join(os.path.dirname(infile), vizpage)
#    if not os.path.exists(vizdir):
#        os.makedirs(vizdir)
    basename, _ = os.path.splitext(fname)
    vizfile = os.path.join(vizdir, basename + ".jpg")

    image = cv2.imread(infile)
    page = dataset.__getXMLitem__(idx)
    polygons = get_polygons(page, "cell")
    n_image = draw_polygons_on_image(image, polygons)
    plt.imshow(n_image)
    plt.title(infile)
    plt.show()
    #if idx > 20: break
    #cv2.imwrite(vizfile, n_image)

In [None]:
vizpage = "viz"
for idx in range(dataset.__len__()):
    infile = dataset.__getfullname__(idx)
    print(infile)
    fname = os.path.basename(infile)
    vizdir = os.path.join(os.path.dirname(infile), vizpage)
    if not os.path.exists(vizdir):
        os.makedirs(vizdir)
    basename, _ = os.path.splitext(fname)
    vizfile = os.path.join(vizdir, basename + ".jpg")

    image = cv2.imread(infile)
    page = dataset.__getXMLitem__(idx)
    for n in range(len(page['regions'])):
        polygons = get_polygons_of_nth_table(page, "cell",n)
        n_image = draw_polygons_on_image(image, polygons)
        plt.imshow(n_image)
        plt.title(vizfile)
        plt.show()
    if idx > 5: break
#    cv2.imwrite(vizfile, n_image)

In [None]:
for idx in range(dataset.__len__()):
    img = cv2.imread(dataset.__getfullname__(idx))
   # print(img.shape)
    _, size = dataset.__getsize__(idx)
    print(f"Image: {img.shape} and from XML: {size}")