#**CRNN Model for handwritten-text-recognition**

---



Mount your drive and go to the path where all the python files are stored ( For colab workers only)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd"drive/My Drive/OCR/src"

Import statements

In [None]:
import re
import os
import cv2
import html
import string
import numpy as np
import numba as nb
import matplotlib.pyplot as plt
from PIL import Image
import datetime

from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import CSVLogger, TensorBoard, ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from dataloader import Load_dataset
from preprocessing import preprocess_partitions
from augmentation import data_augmentation
from model import Flor_Model
from evaluation import Evaluate

from calc_loss import Loss_Calculation
from generator import DataGenerator, Tokenizer

from google.colab.patches import cv2_imshow

In [None]:
!pip install autocorrect

from autocorrect import Speller

Defining variables

In [None]:
size = {'train':0, 'test':0, 'valid':0}
steps = {'train':0, 'test':0, 'valid':0}
source_path = os.path.join("..", "data") #dataset path
output_path = os.path.join("..", "output") #store results here
target_path = os.path.join(output_path, "checkpoint_weights2.hdf5") #path to save model weights
input_size = (900, 128, 1)
batch_size = 30

print(source_path)
print(target_path)
print(output_path)

#Step 1: Loading the dataset

In [None]:
ds = Load_dataset()
dataset = ds.load_dataset(source_path)

In [None]:
# dataset = {'train':{'dt':[], 'gt':[]}, 'test':{'dt':dataset['test']['dt'], 'gt':dataset['test']['gt']}, 'valid': {'dt':[], 'gt':[]}}

#Step 2: Preprocessing the dataset

In [None]:
dataset =preprocess_partitions(input_size, dataset)

#Step 3: Augmenting the dataset

In [None]:
# dataset = data_augmentation(dataset)

#Step 4: Model creation & training

Create object of tokenizer class to get vocab size

In [None]:
t = Tokenizer(string.printable[:95])
vocab_size = int(t.vocab_size)

Model Creation

In [None]:
md = Flor_Model()
model = md.create_newm(vocab_size+1)

Compile and print model summary

In [None]:
optimizer = RMSprop(learning_rate=0.001)
cl = Loss_Calculation()
model.compile(optimizer=optimizer, loss=cl.ctc_loss_lambda_func)
model.summary()

Callbacks

In [None]:
callbacks = [
    CSVLogger(
        filename=os.path.join(output_path, "epochs.log"), separator=";", append=True),
    TensorBoard(
        log_dir=output_path, histogram_freq=10, profile_batch=0, write_graph=True, write_images=False, update_freq="epoch"),
    ModelCheckpoint(
        filepath=target_path, monitor="val_loss", save_best_only=True, save_weights_only=True, verbose=1),
    EarlyStopping(
        monitor="val_loss", min_delta=1e-8, patience=15, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(
        monitor="val_loss", min_delta=1e-8, factor=0.2,patience=15, verbose=1)
]

Tensorboard for Visualisation

In [None]:
%load_ext tensorboard
%tensorboard --reload_interval=300 --logdir={output_path}

Loading the weights if the model is trained


In [None]:
if os.path.isfile(target_path):
    model.load_weights(target_path)

Decoding labels before training

In [None]:
for pt in ['train', 'test', 'valid']:
    # decode sentences from byte
    dataset[pt]['gt'] = [x.decode() for x in dataset[pt]['gt']]

    # set size and setps
    size[pt] = len(dataset[pt]['dt'])
    steps[pt] = int(np.ceil(size[pt] / batch_size)) 

Creating object of DataGenerator to generate batches for training, test and validation

In [None]:
dtgen = DataGenerator(dataset, batch_size)

Training the model

In [None]:
start_time = datetime.datetime.now()
h = model.fit(
    
                x=dtgen.next_train_batch(), y=None, batch_size=30, epochs=150, verbose=1,
                callbacks=callbacks, validation_split=0.0,
                validation_data=dtgen.next_valid_batch(), shuffle=True,
                class_weight=None, sample_weight=None,
                initial_epoch=0, steps_per_epoch=steps['train'],
                validation_steps=steps['valid'], validation_freq=1,
                max_queue_size=10, workers=1,
                use_multiprocessing=False
            )
total_time = datetime.datetime.now() - start_time

Details of the model after training

In [None]:
loss = h.history['loss']
val_loss = h.history['val_loss']

min_val_loss = min(val_loss)
min_val_loss_i = val_loss.index(min_val_loss)

time_epoch = (total_time / len(loss))
total_item = (size['train'] + size['valid'])

t_corpus = "\n".join([
    f"Total train images:      {dtgen.size['train']}",
    f"Total validation images: {dtgen.size['valid']}",
    f"Batch:                   {dtgen.batch_size}\n",
    f"Total time:              {total_time}",
    f"Time per epoch:          {time_epoch}",
    f"Time per item:           {time_epoch / total_item}\n",
    f"Total epochs:            {len(loss)}",
    f"Best epoch               {min_val_loss_i + 1}\n",
    f"Training loss:           {loss[min_val_loss_i]:.8f}",
    f"Validation loss:         {min_val_loss:.8f}"
])

with open(os.path.join(output_path, "train.txt"), "w") as lg:
    lg.write(t_corpus)
    print(t_corpus)

#Step 5: Prediction

Predict output for test images

In [None]:
start_time = datetime.datetime.now()

predicts, _ = md.predict_model(model, x=dtgen.next_test_batch(), batch_size=None, verbose=1, steps=steps['test'], callbacks=None, max_queue_size=10,
                  workers=1, use_multiprocessing=False, ctc_decode=True)

# decode to string
predicts = [dtgen.tokenizer.decode(x[0]) for x in predicts]

total_time = datetime.datetime.now() - start_time

# mount predict corpus file
with open(os.path.join(output_path, "predict.txt"), "w") as lg:
    for pd, gt in zip(predicts, dtgen.dataset['test']['gt']):
        lg.write(f"TE_L {gt}\nTE_P {pd}\n")

Applying autocorrect

In [None]:
spell = Speller("en")

autocorrect_predicts = []
for j in range(len(predicts)):
    y = []
    words = predicts[j].split(" ")
    for word in words:
        a = spell(word)
        y.append(a)
    y = " ".join(y)
    autocorrect_predicts.append(y)

# mount predict corpus file
with open(os.path.join(output_path, "autocorrect_predict.txt"), "w") as lg:
    for pd, gt in zip(autocorrect_predicts, dtgen.dataset['test']['gt']):
        lg.write(f"TE_L {gt}\nTE_P {pd}\n")

Printing some of predicted outputs

In [None]:
for i in range(0,10):
  img = dataset['test']['dt'][i]
  lbl = dataset['test']['gt'][i]
  cv2_imshow(cv2.transpose(img))
  print(lbl)
  print(predicts[i])
  print(autocorrect_predicts[i])

#Step 6: Evaluation

In [None]:
ev = Evaluate()

Evaluation before applying autocorrect

In [None]:
evaluate = ev.ocr_metrics(predicts=predicts,ground_truth=dataset['test']['gt'])
e_corpus = "\n".join([
    f"Total test images:    {size['test']}",
    f"Total time:           {total_time}",
    f"Time per item:        {total_time / size['test']}\n",
    f"Metrics:",
    f"Character Error Rate: {evaluate[0]:.8f}",
    f"Word Error Rate:      {evaluate[1]:.8f}",
    f"Sequence Error Rate:  {evaluate[2]:.8f}"
])
with open(os.path.join(output_path, "evaluate.txt"), "w") as lg:
    lg.write(e_corpus)
    print(e_corpus)

Evaluation after applying autocorrect

In [None]:
autocorrect_evaluate = ev.ocr_metrics(predicts=autocorrect_predicts,ground_truth=dataset['test']['gt'])
autocorrect_e_corpus = "\n".join([
    f"Total test images:    {size['test']}",
    f"Total time:           {total_time}",
    f"Time per item:        {total_time / size['test']}\n",
    f"Metrics:",
    f"Character Error Rate: {autocorrect_evaluate[0]:.8f}",
    f"Word Error Rate:      {autocorrect_evaluate[1]:.8f}",
    f"Sequence Error Rate:  {autocorrect_evaluate[2]:.8f}"
])
with open(os.path.join(output_path, "evaluate1.txt"), "w") as lg:
    lg.write(autocorrect_e_corpus)
    print(autocorrect_e_corpus)