# **Workflows**

This notebook serves as a comprehensive guide for deploying our models, which include document classification, optical character recognition (OCR) and information extraction, once they have reached a stage where they are ready for deployment and practical use.


# Notebook Setup

In [None]:
!pip install transformers==4.28.0

!sudo apt install tesseract-ocr
!pip install -q pytesseract
!pip install -qqq easyocr==1.6.2 --progress-bar off

Collecting transformers==4.28.0
  Downloading transformers-4.28.0-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0 (from transformers==4.28.0)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.28.0)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m118.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.18.0 tokenizers-0.13.3 transformers-4.28.0
Reading package lists... Done
Building dependency tree... Done
Reading state informat

In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.0.0.tar.gz (718 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/718.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/718.7 kB[0m [31m3.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.7/718.7 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-7.0.0-py3-none-any.whl size=21129 sha256=ab450aede070a8705da4e03388a2b839c911c58ca07b2fe985816a5f475f1cde
  Stored in directory: /root/.cache/pip/wheels/60/29/7b/f64332aa7e5e88fbd56d4002185ae22dcdc83b35b3d1c2cbf5
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-7.0.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from pyngrok import ngrok

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
import glob
from google.colab import drive
from PIL import Image, ImageDraw, ImageFont

import numpy as np
import glob
import matplotlib.pyplot as plt

import imageio.v3 as iio
import skimage.color
import skimage.filters
import easyocr
import pytesseract

from google.colab.patches import cv2_imshow
from transformers import  LayoutLMv3ForTokenClassification,  AutoProcessor ,LayoutLMv3ForSequenceClassification

# Model Loading and Configuration for Deployment

In [None]:
classif_processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)


classif_model = LayoutLMv3ForSequenceClassification.from_pretrained('/content/drive/MyDrive/PFE/saved/saved_model2')
recu_model = LayoutLMv3ForTokenClassification.from_pretrained('/content/drive/MyDrive/PFE/saved/Recu1/')
Form_model = LayoutLMv3ForTokenClassification.from_pretrained('/content/drive/MyDrive/PFE/saved/Formulaire/')

form_id2label = {0: 'O', 1: 'B-HEADER', 2: 'I-HEADER', 3: 'B-QUESTION', 4: 'I-QUESTION', 5: 'B-ANSWER', 6: 'I-ANSWER'}
recu_id2label = {0: 'S-ADDRESS', 1: 'S-COMPANY', 2: 'S-DATE', 3: 'S-TOTAL' , 4: 'O'}


(…)se/resolve/main/preprocessor_config.json:   0%|          | 0.00/275 [00:00<?, ?B/s]

(…)-base/resolve/main/tokenizer_config.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

(…)layoutlmv3-base/resolve/main/config.json:   0%|          | 0.00/856 [00:00<?, ?B/s]

(…)/layoutlmv3-base/resolve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

(…)/layoutlmv3-base/resolve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

# OCR

In [None]:
### ocr
def recuproc(path):
  image = cv2.imread(path)
  if image.shape[2] == 3 :
    image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
  return cv2.threshold(image, 128, 255, cv2.THRESH_BINARY)[1]


def formproc(path):
  image = cv2.imread(path)
  if image.shape[2] == 3 :
    image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
  return image


def create_bounding_box(bbox_data):
    xs = []
    ys = []
    for x, y in bbox_data:
        xs.append(x)
        ys.append(y)

    left = int(min(xs))
    top = int(min(ys))
    right = int(max(xs))
    bottom = int(max(ys))

    return [left, top, right, bottom]


def extract(image):
  reader = easyocr.Reader(['en'])
  ocr_result = reader.readtext(image)
  boxes = []
  text = []
  conf = []
  for bbox, word, confidence in ocr_result:
    boxes.append(create_bounding_box(bbox))
    text.append(word)
    conf.append(confidence)
  return text, boxes , conf


In [None]:
from flask import Flask, render_template, url_for, request, redirect , jsonify , session
from flask_sqlalchemy import SQLAlchemy
import os
from PIL import ImageDraw


ngrok.set_auth_token('*********') # set your auth token
public_url = ngrok.connect(5000).public_url



app = Flask(__name__)
app.secret_key = 'test'

### CLASSIFICATION

@app.route('/document', methods=['GET'])
def document () :
        return render_template('tables.html')

@app.route('/document',methods=['POST'])
def classif () :

    imagefile3=request.files["imagefile3"]

    file_path1 = 'your_images_path'+imagefile3.filename
    exists_in_directory1 = os.path.isfile(file_path1)

    file_path2 = 'your_images_path'+imagefile3.filename
    exists_in_directory2 = os.path.isfile(file_path2)

    if exists_in_directory1:
        doc_path =file_path1
    elif exists_in_directory2:
        doc_path =file_path2
    image_path= "static/docs/" + imagefile3.filename

    imagefile3.save(image_path)

    session['imageX'] = imagefile3.filename
    session['image_path'] = image_path
    session['doc_path'] = doc_path
    image = Image.open(doc_path).convert("RGB")

    classif_inputs = classif_processor(image, return_tensors="pt", truncation=True, padding="max_length")
    for k,v in classif_inputs.items():
      classif_inputs[k] = v.to(classif_model.device)
    outputs = classif_model(**classif_inputs)

    logits = outputs.logits
    predicted_class_idx = logits.argmax(-1).item()
    if predicted_class_idx == 0 :
      message =  'Formulaire'
    if predicted_class_idx == 1 :
      message ='Reçu'

    session['type_doc']=message
    return render_template('tables.html',message = message)




### OCR

@app.route('/extract',methods=['GET'])
def ocr():
    image_path = session.get('image_path', '')
    doc_path = session.get('doc_path', '')
    message = session.get('type_doc', '')
    if message == 'Formulaire':
      image = formproc(doc_path)
      text, boxes , conf = extract(image)
    if message == 'Reçu':
      image = recuproc(doc_path)
      text, boxes , conf = extract(image)
    session['text']=text
    session['boxes']=boxes
    return render_template('extraction.html', message=message,image_path=image_path, text = text, conf = conf)




# ENTITY IDENTIFICATION

def normalize_boxes(bbox, image):
    w, h = image.size
     # Normalize bounding box coordinates
    norm_bbox = [bbox[0]/w, bbox[1]/h, bbox[2]/w, bbox[3]/h]
     # ranGe [0 , 1000]
    return [
         int(1000 * norm_bbox[0]),
         int(1000 * norm_bbox[1]),
         int(1000 * norm_bbox[2]),
         int(1000 * norm_bbox[3])
     ]
def unnormalize_box(bbox, width, height):
     return [
        int(width * (bbox[0] / 1000)),
        int(height * (bbox[1] / 1000)),
        int(width * (bbox[2] / 1000)),
        int(height * (bbox[3] / 1000)),
     ]
def encoding_input(image, texts, boxes ) :
  normalized_boxes = [normalize_boxes(box,image) for box in boxes]

  enc_inputs = processor(image, texts, boxes=normalized_boxes,
                            return_tensors="pt", truncation=True, padding="max_length")

  return enc_inputs

def form_ident(e_inputs) :
  for k,v in e_inputs.items():
    e_inputs[k] = v.to(Form_model.device)

  outputs = Form_model(**e_inputs)
  return e_inputs,outputs

def recu_ident(e_inputs) :
  for k,v in e_inputs.items():
    e_inputs[k] = v.to(recu_model.device)
  outputs = recu_model(**e_inputs)
  return e_inputs , outputs

def prediction(e_inputs,outps,width, height) :
  logits = outps.logits
  predictions = outps.logits.argmax(-1).squeeze().tolist()
  token_boxes = e_inputs.bbox.squeeze().tolist()
  token_words = e_inputs.input_ids.squeeze().tolist()
  true_boxes = [unnormalize_box(b, width, height) for b in token_boxes]
  return true_boxes,predictions , token_words

def form_post_trait(true_boxes, predictions,token_words):
  unique_boxes = []
  unique_pred = []
  data = {}
  true_predictions = [form_id2label[prediction] for prediction in predictions ]
  for box, pred , w in zip(true_boxes, true_predictions,token_words):
    if pred != 'O' :
      if pred not in data:
        data[pred] = [processor.tokenizer.decode(w)]
      else :
        data[pred].append(processor.tokenizer.decode(w))
      if box not in unique_boxes:
        unique_boxes.append(box)
        unique_pred.append(pred)

    if pred =='O':
      if pred not in data:
        data[pred] = [processor.tokenizer.decode(w)]
      else :
        data[pred].append(processor.tokenizer.decode(w))
      if box not in unique_boxes:

        unique_boxes.append(box)
        unique_pred.append(pred)
  return unique_boxes,unique_pred,data


def recu_post_trait(true_boxes, predictions,token_words):
  unique_boxes = []
  unique_pred = []
  data = {}
  true_predictions = [recu_id2label[prediction] for prediction in predictions ]
  for box, pred , w in zip(true_boxes, true_predictions,token_words):
    if pred != 'O' :
      if pred not in data:
        data[pred] = [processor.tokenizer.decode(w)]
      else :
        data[pred].append(processor.tokenizer.decode(w))
      if box not in unique_boxes:
        unique_boxes.append(box)
        unique_pred.append(pred)

    if pred =='O':
      if pred not in data:
        data[pred] = [processor.tokenizer.decode(w)]
      else :
        data[pred].append(processor.tokenizer.decode(w))
      if box not in unique_boxes:

        unique_boxes.append(box)
        unique_pred.append(pred)
  return unique_boxes,unique_pred,data

def iob_to_label(label):
        label = label[2:]
        if not label:
          return 'other'
        return label

@app.route('/ident',methods=['GET'])
def identif():
    message = session.get('type_doc', '')
    text =  session.get('text', '')
    boxes = session.get('boxes', '')
    doc_path = session.get('doc_path', '')
    image_path = session.get('image_path', '')
    image = Image.open(doc_path).convert("RGB")
    width, height = image.size
    if message == 'Formulaire':
      encoding = encoding_input(image , text, boxes )
      encoding,out = form_ident(encoding)
      boxx , predx , worx = prediction(encoding,out,width, height)
      boxx , predx , worx  = form_post_trait (boxx , predx , worx)
      for key in worx :
        wiwi=''
        for i in worx[key]:
          wiwi = wiwi+i
        worx[key] = wiwi

      if 'O' in worx:
        worx.pop('O')

    if message == 'Reçu':
      encoding = encoding_input(image, text, boxes )
      encoding,out = recu_ident(encoding)
      boxx_1 , predx_1 , worx_1 = prediction(encoding,out,width, height)
      boxx , predx , worx   = recu_post_trait (boxx_1 , predx_1 , worx_1)
      for key in worx :
        wiwi=''
        for i in worx[key]:
          wiwi = wiwi+i
        worx[key] = wiwi
      if 'O' in worx:
        worx.pop('O')

    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()
    if message == 'Formulaire':
      label2color = {'header':'blue', 'question':'green', 'answer':'orange', 'other':'violet'}
    if message == 'Reçu':
      label2color = {'date':'blue', 'total':'green', 'address':'orange', 'other':'violet', 'company':'red'}
    for words, box in zip(predx, boxx  ):
        predicted_label = iob_to_label(words).lower()
        draw.rectangle(box, outline=label2color[predicted_label])
        draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)
    image_filename = session.get('imageX')
    saved_image = "static/docs/ident"+image_filename
    image.save(saved_image)

    return render_template('billing.html',image_path=image_path, image_new=saved_image, words = worx  )

if __name__ == '__main__':
    print(f"To acces the Gloable link please click {public_url}")
    app.run(debug=False)