In [None]:
# Required for extraction of line / word level cropped images from the whole image. Can be done for only pdf images.
! pip install pdfplumber

# Required for converting the pdf images of MTSamples Dataset into images.
! pip install pdf2image

In [None]:
# As per requirement other imports have also been made alongside the codes where it is used.
import cv2
import PIL
from PIL import Image, ImageSequence
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import yaml
import time
import shutil
import math
import random
import sys
import pickle
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Downloading MTSamples Dataset from Kaggle. Make sure kaggle.json is available in content folder of Colab.

! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d shoubhikchakra/mtsamples-dataset
! unzip mtsamples-dataset.zip

Creating the Noisy Dataset from MTSamples dataset

In [None]:
from skimage.util import random_noise
from sklearn.model_selection import train_test_split

In [None]:
# Files ending in 0-6 and 8-9 shall be used for training purposes.

! mkdir '/content/noisy_images'

for filename in tqdm(os.listdir('/content/pdf2img/content/pdf2img')):
    if filename.split('.')[0].endswith('0'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        noise_img = random_noise(img, mode='s&p',amount=0.01)
        noise_img = np.array(255*noise_img, dtype = 'uint8')
        cv2.imwrite(os.path.join('/content/noisy_images', filename), noise_img)
    if filename.split('.')[0].endswith('1'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        noise_img = random_noise(img, mode='localvar')
        noise_img = np.array(255*noise_img, dtype = 'uint8')
        cv2.imwrite(os.path.join('/content/noisy_images', filename), noise_img)
    if filename.split('.')[0].endswith('2') or filename.split('.')[0].endswith('8'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        noise_img_1 = random_noise(img, mode='salt', amount = 0.08)
        noise_img_1 = np.array(255*noise_img_1, dtype = 'uint8')
        noise_img_2 = random_noise(noise_img_1, mode='speckle')
        noise_img_2 = np.array(255*noise_img_2, dtype = 'uint8')
        cv2.imwrite(os.path.join('/content/noisy_images', filename), noise_img_2)
    if filename.split('.')[0].endswith('3') or filename.split('.')[0].endswith('9'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        noise_img = random_noise(img, mode='salt',amount=0.1)
        noise_img = np.array(255*noise_img, dtype = 'uint8')
        cv2.imwrite(os.path.join('/content/noisy_images', filename), noise_img)
    if filename.split('.')[0].endswith('4'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        noise_img = random_noise(img, mode='speckle')
        noise_img = np.array(255*noise_img, dtype = 'uint8')
        cv2.imwrite(os.path.join('/content/noisy_images', filename), noise_img)
    if filename.split('.')[0].endswith('5'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        noise_img = random_noise(img, mode='salt',amount=0.09)
        noise_img = np.array(255*noise_img, dtype = 'uint8')
        cv2.imwrite(os.path.join('/content/noisy_images', filename), noise_img)
    if filename.split('.')[0].endswith('6'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        cv2.imwrite(os.path.join('/content/noisy_images', filename), img)

In [None]:
# Files ending in 7 shall be used for testing purposes.

! mkdir '/content/test_noisy_images'

for filename in tqdm(os.listdir('/content/pdf2img/content/pdf2img')):
    if filename.split('.')[0].endswith('7'):
        img = cv2.imread(os.path.join("/content/pdf2img/content/pdf2img", filename), cv2.IMREAD_GRAYSCALE)
        noise_img = random_noise(img, mode='localvar')
        noise_img = np.array(255*noise_img, dtype = 'uint8')
        cv2.imwrite(os.path.join('/content/test_noisy_images', filename), noise_img)

In [None]:
datapath = '/content/mtsamples_pdf/mtsamples_pdf'

# Creating Line-Level Dataset

df_line = pd.DataFrame(columns= ['pdf', 'width', 'height', 'linelist'])
for pdfname in tqdm(os.listdir(datapath)):
    with pdfplumber.open(os.path.join(datapath, pdfname), laparams={}) as pdf:
        first_page = pdf.pages[0]
        page = first_page.layout
        lines = []
        for element in page:
            if isinstance(element, LTTextBoxHorizontal):
                for line in element:
                    d = {'x0' : line.x0,
                    'x1' : line.x1,
                    'y0' : line.y0,
                    'y1' : line.y1,
                    'line' : line.get_text()}
                lines.append(d) 
        df_line = df_line.append({'pdf': pdfname, 'width': first_page.width, 'height': first_page.height, 'linelist': lines}, ignore_index=True)


Creating the Training and Testing dataframes and image folders with cropped line-level

In [None]:
train, test = train_test_split(df_line, test_size=0.1, random_state= 20, shuffle= True)

In [None]:
train_data = pd.DataFrame(columns=['filename', 'words'])
noisy_data = '/content/noisy_images'
for i, row in tqdm(train.iterrows()):
    count = 0
    filename = row['pdf'].replace('.pdf', '.tiff')
    x_scale, y_scale = (800/row.loc['width']), (1000/row.loc['height'])
    img = cv2.imread(os.path.join(noisy_data, filename))
    try:
        img = cv2.cvtColor(img , cv2.COLOR_BGR2RGB)
        for word in row['linelist']:
            text = word["line"]
            encoded = text.encode()
            encoded = str(encoded)
            x1 = math.ceil(word['x0']*x_scale)
            y1 = 1000 - math.ceil(word['y0']*y_scale)
            x2 = math.ceil(word['x1']*x_scale)
            y2 = 1000 - math.ceil(word['y1']*y_scale)
            cropped = img[y2:y1, x1:x2]
            img_name = filename[:-5] + '_' + str(count) + '_cropped.jpg'
            word_name = str(encoded)[2:-3]
            train_data = train_data.append({'filename': img_name, 'words': word_name}, ignore_index=True)
            cv2.imwrite('/content/train_crop/'+img_name, cropped)
            count += 1
    except Exception as e:
        pass

In [None]:
test_data = pd.DataFrame(columns=['filename', 'words'])
noisy_data = '/content/noisy_images'
for i, row in tqdm(test.iterrows()):
    count = 0
    filename = row['pdf'].replace('.pdf', '.tiff')
    x_scale, y_scale = (800/row.loc['width']), (1000/row.loc['height'])
    img = cv2.imread(os.path.join(noisy_data, filename))
    try:
        img = cv2.cvtColor(img , cv2.COLOR_BGR2RGB)
        for word in row['linelist']:
            text = word["line"]
            encoded = text.encode()
            encoded = str(encoded)
            x1 = math.ceil(word['x0']*x_scale)
            y1 = 1000 - math.ceil(word['y0']*y_scale)
            x2 = math.ceil(word['x1']*x_scale)
            y2 = 1000 - math.ceil(word['y1']*y_scale)
            cropped = img[y2:y1, x1:x2]
            img_name = filename[:-5] + '_' + str(count) + '_cropped.jpg'
            word_name = str(encoded)[2:-3]
            test_data = test_data.append({'filename': img_name, 'words': word_name}, ignore_index=True)
            cv2.imwrite('/content/test_crop/'+img_name, cropped)
            count += 1
    except Exception as e:
        pass

In [None]:
# Confirming that the size of the cropped images folder matches with the size of the respective dataframes.

l = os.listdir('/content/train_crop')
print(len(l))

train_data.drop_duplicates(inplace=True)
print(train_data.shape)

rem = list(set(train_data['filename']) - set(l))
print(len(rem))

for r in rem:
    train_data.drop(train_data[train_data['filename'] == r].index, inplace=True)
    
l = os.listdir('/content/test_crop')
print(len(l))

test_data.drop_duplicates(inplace=True)
print(test_data.shape)

rem = list(set(test_data['filename']) - set(l))
print(len(rem))

for r in rem:
    test_data.drop(test_data[test_data['filename'] == r].index, inplace=True)

Creating the .txt files in the format required for training purposes.

In [None]:
#converting df into required text file
f = open('/content/rec_train_crop.txt', 'a')
for i in range(len(train_data)):
  f.write('train/{}\t{}\n'.format(train_data.loc[i,'filename'],train_data.loc[i,'words']))  # python will convert \n to os.linesep
f.close()  # you can omit in most cases as the destructor will call it
f = open('/content/rec_test_crop.txt', 'a')
for i in range(len(test_data)):
  f.write('test/{}\t{}\n'.format(test_data.loc[i,'filename'],test_data.loc[i,'words']))  # python will convert \n to os.linesep
f.close()  # you can omit in most cases as the destructor will call it

Training Custom Model

In [None]:
!pip3 install --upgrade pip
!python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple #if using gpu
#!python3 -m pip install paddlepaddle-gpu==2.0.0 -i https://mirror.baidu.com/pypi/simple #if non gpu

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pip
  Downloading pip-22.1.2-py3-none-any.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 25.0 MB/s 
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.3
    Uninstalling pip-21.1.3:
      Successfully uninstalled pip-21.1.3
Successfully installed pip-22.1.2
Looking in indexes: https://mirror.baidu.com/pypi/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting paddlepaddle-gpu==2.0.0
  Downloading https://mirror.baidu.com/pypi/packages/d9/24/8eabaed904af5dbdf4f11adeedc0e5ff2bef527d63293b50d0ceb418da1d/paddlepaddle_gpu-2.0.0-cp37-cp37m-manylinux1_x86_64.whl (689.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m689.6/689.6 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting gast==0.3.3
  Downloading https://mirror.baidu.com/pypi/packages/d6/84/759f5dd23fec8ba7195

In [None]:
#cloning the repository
!git clone https://github.com/PaddlePaddle/PaddleOCR

Cloning into 'PaddleOCR'...
remote: Enumerating objects: 38584, done.[K
remote: Counting objects: 100% (560/560), done.[K
remote: Compressing objects: 100% (313/313), done.[K
remote: Total 38584 (delta 324), reused 420 (delta 247), pack-reused 38024[K
Receiving objects: 100% (38584/38584), 320.53 MiB | 16.75 MiB/s, done.
Resolving deltas: 100% (26834/26834), done.


In [None]:
#installing the requirements
!pip install importlib-metadata
!pip3 install -r /content/PaddleOCR/requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting imgaug==0.4.0
  Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m948.0/948.0 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyclipper
  Downloading pyclipper-1.3.0.post3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (604 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m604.2/604.2 kB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
Collecting visualdl
  Downloading visualdl-2.3.0-py3-none-any.whl (2.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m73.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rapidfuzz
  Downloading rapidfuzz-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━

In [None]:
#updated for newer versions
!pip install PyYAML==5.4.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting PyYAML==5.4.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m636.6/636.6 kB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyYAML
  Attempting uninstall: PyYAML
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-5.4.1
[0m

In [None]:
#directory for dataset
#format can be referred in PaddleOCR doc
os.mkdir('/content/PaddleOCR/train_data')

In [None]:
%cd PaddleOCR/
# Download the pre-trained model of CRNN_mv3_none_bilstm
!wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar
%cd pretrain_models
!tar -xf rec_mv3_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_mv3_none_bilstm_ctc_v2.0_train.tar

/content/PaddleOCR
--2022-07-04 17:53:46--  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar
Resolving paddleocr.bj.bcebos.com (paddleocr.bj.bcebos.com)... 103.235.46.61, 2409:8c04:1001:1002:0:ff:b001:368a
Connecting to paddleocr.bj.bcebos.com (paddleocr.bj.bcebos.com)|103.235.46.61|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 51200000 (49M) [application/x-tar]
Saving to: ‘./pretrain_models/rec_mv3_none_bilstm_ctc_v2.0_train.tar’


2022-07-04 17:53:48 (25.3 MB/s) - ‘./pretrain_models/rec_mv3_none_bilstm_ctc_v2.0_train.tar’ saved [51200000/51200000]

/content/PaddleOCR/pretrain_models


After making necessary changes in the configuration file i.e, dataset directory, training parameters etc.

Training the model with pretrained weights and if want to train without pretrained weights, ignore from '-o'

In [None]:
#training the model
%cd /content/PaddleOCR
#!python3 tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
!python3 tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o Global.pretrained_model=/content/rec/mv3_none_bilstm_ctc/best_accuracy.pdparams

/content/PaddleOCR
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
[2022/07/08 02:47:01] ppocr INFO: Architecture : 
[2022/07/08 02:47:01] ppocr INFO:     Backbone : 
[2022/07/08 02:47:01] ppocr INFO:         model_name : large
[2022/07/08 02:47:01] ppocr INFO:         name : MobileNetV3
[2022/07/08 02:47:01] ppocr INFO:         scale : 0.5
[2022/07/08 02:47:01] ppocr INFO:     Head : 
[2022/07/08 02:47:01] ppocr INFO:         fc_decay : 0
[2022/07/08 02:47:01] ppocr INFO:         name : CTCHead
[2022/07/08 02:47:01] ppocr INFO:     Neck : 
[2022/07/08 02:47:01] ppocr IN

In [None]:
#infering the trained model without exporting
%cd PaddleOCR
!python3 tools/infer_rec.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o Global.pretrained_model=/content/PaddleOCR/output/rec/mv3_none_bilstm_ctc/best_accuracy.pdparams Global.infer_img=/content/PaddleOCR/doc/imgs_words/en/word_1.png

/content/PaddleOCR
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
[2022/07/08 05:10:44] ppocr INFO: Architecture : 
[2022/07/08 05:10:44] ppocr INFO:     Backbone : 
[2022/07/08 05:10:44] ppocr INFO:         model_name : large
[2022/07/08 05:10:44] ppocr INFO:         name : MobileNetV3
[2022/07/08 05:10:44] ppocr INFO:         scale : 0.5
[2022/07/08 05:10:44] ppocr INFO:     Head : 
[2022/07/08 05:10:44] ppocr INFO:         fc_decay : 0
[2022/07/08 05:10:44] ppocr INFO:         name : CTCHead
[2022/07/08 05:10:44] ppocr INFO:     Neck : 
[2022/07/08 05:10:44] ppocr IN

In [None]:
!python3 tools/infer_rec.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o Global.pretrained_model=/content/PaddleOCR/output/rec/mv3_none_bilstm_ctc/best_accuracy.pdparams Global.infer_img=/content/PaddleOCR/train_data/content/test_crop/1570_2_cropped.jpg

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
[2022/07/08 05:11:25] ppocr INFO: Architecture : 
[2022/07/08 05:11:25] ppocr INFO:     Backbone : 
[2022/07/08 05:11:25] ppocr INFO:         model_name : large
[2022/07/08 05:11:25] ppocr INFO:         name : MobileNetV3
[2022/07/08 05:11:25] ppocr INFO:         scale : 0.5
[2022/07/08 05:11:25] ppocr INFO:     Head : 
[2022/07/08 05:11:25] ppocr INFO:         fc_decay : 0
[2022/07/08 05:11:25] ppocr INFO:         name : CTCHead
[2022/07/08 05:11:25] ppocr INFO:     Neck : 
[2022/07/08 05:11:25] ppocr INFO:         encoder

In [None]:
%cd /content/PaddleOCR
!python3 tools/infer_rec.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o Global.pretrained_model=/content/PaddleOCR/output/rec/mv3_none_bilstm_ctc/best_accuracy.pdparams Global.infer_img=/content/PaddleOCR/train_data/content/test_crop/1570_20_cropped.jpg

/content/PaddleOCR
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
[2022/07/08 05:12:13] ppocr INFO: Architecture : 
[2022/07/08 05:12:13] ppocr INFO:     Backbone : 
[2022/07/08 05:12:13] ppocr INFO:         model_name : large
[2022/07/08 05:12:13] ppocr INFO:         name : MobileNetV3
[2022/07/08 05:12:13] ppocr INFO:         scale : 0.5
[2022/07/08 05:12:13] ppocr INFO:     Head : 
[2022/07/08 05:12:13] ppocr INFO:         fc_decay : 0
[2022/07/08 05:12:13] ppocr INFO:         name : CTCHead
[2022/07/08 05:12:13] ppocr INFO:     Neck : 
[2022/07/08 05:12:13] ppocr IN

In [None]:
!python3 tools/infer_rec.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o Global.pretrained_model=/content/PaddleOCR/output/rec/mv3_none_bilstm_ctc/best_accuracy.pdparams Global.infer_img=/content/PaddleOCR/train_data/content/test_crop/1570_0_cropped.jpg

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
[2022/07/08 05:12:53] ppocr INFO: Architecture : 
[2022/07/08 05:12:53] ppocr INFO:     Backbone : 
[2022/07/08 05:12:53] ppocr INFO:         model_name : large
[2022/07/08 05:12:53] ppocr INFO:         name : MobileNetV3
[2022/07/08 05:12:53] ppocr INFO:         scale : 0.5
[2022/07/08 05:12:53] ppocr INFO:     Head : 
[2022/07/08 05:12:53] ppocr INFO:         fc_decay : 0
[2022/07/08 05:12:53] ppocr INFO:         name : CTCHead
[2022/07/08 05:12:53] ppocr INFO:     Neck : 
[2022/07/08 05:12:53] ppocr INFO:         encoder

In [None]:
#Exporting the model for inferencing
!python3 tools/export_model.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o Global.pretrained_model=/content/PaddleOCR/output/rec/mv3_none_bilstm_ctc/best_accuracy  Global.save_inference_dir=./inference/CRNN_mv3/


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
W0708 05:13:30.902925  1777 device_context.cc:362] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 11.2, Runtime API Version: 10.2
W0708 05:13:30.910444  1777 device_context.cc:372] device: 0, cuDNN Version: 7.6.
[2022/07/08 05:13:34] ppocr INFO: load pretrain successful from /content/PaddleOCR/output/rec/mv3_none_bilstm_ctc/best_accuracy
[2022/07/08 05:13:35] ppocr INFO: inference model is saved to ./inference/CRNN_mv3/inference


In [None]:
#Prediction with the inferenced model
!python3 tools/infer/predict_rec.py --image_dir="/content/PaddleOCR/train_data/content/test_crop/1570_0_cropped.jpg" --rec_model_dir="/content/PaddleOCR/inference/CRNN_mv3" --rec_image_shape="3, 32, 320" --rec_char_dict_path="/content/PaddleOCR/ppocr/utils/dict/en_dict.txt" --use_gpu=False

In [None]:
# Download DB text detection inference model for concatenating it with text recogniser
# Here directly inferenc model is downloaded for use
# We can also download pretrained models and train and export them for inferencing
!wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.tar
!tar xf en_PP-OCRv3_det_slim_infer.tar

--2022-07-08 05:21:09--  https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.tar
Resolving paddleocr.bj.bcebos.com (paddleocr.bj.bcebos.com)... 103.235.46.61, 2409:8c04:1001:1002:0:ff:b001:368a
Connecting to paddleocr.bj.bcebos.com (paddleocr.bj.bcebos.com)|103.235.46.61|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2726400 (2.6M) [application/x-tar]
Saving to: ‘en_PP-OCRv3_det_slim_infer.tar’


2022-07-08 05:21:14 (624 KB/s) - ‘en_PP-OCRv3_det_slim_infer.tar’ saved [2726400/2726400]



In [None]:
#testing with sample image
from PIL import Image
im = Image.open(r'/content/drive/MyDrive/1.tiff')
im.save('test.jpeg')

In [None]:
#Prediction with inferenced model
!python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./inference/CRNN_mv3/" --rec_image_shape="3, 32, 320" --rec_char_dict_path="./ppocr/utils/en_dict.txt" --use_gpu=False


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
[2022/07/08 05:21:39] ppocr INFO: In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320
[2022/07/08 05:21:39] ppocr INFO: Predicts of ./doc/imgs_words/en/word_1.png:('jOINT', 0.9545997381210327)


In [None]:
#Concatenating both detection and recognition
#mention required parameters like dict, gpu , image_shape etc if needed
#detection + recognition
!python3 tools/infer/predict_system.py --image_dir="./test.jpeg" --det_model_dir="./en_PP-OCRv3_det_slim_infer" --rec_model_dir="./inference/CRNN_mv3" --rec_image_shape="3, 32, 320" --use_angle_cls=false --rec_char_dict_path="./ppocr/utils/en_dict.txt" --use_gpu=False

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def convert_to_list(value, n, name, dtype=np.int):
  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,
  supported_dtypes = [np.typeDict[x] for x in supported_dtypes]
[2022/07/08 05:22:07] ppocr INFO: In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320
[2022/07/08 05:22:07] ppocr DEBUG: dt_boxes num : 48, elapse : 0.7749576568603516
[2022/07/08 05:22:12] ppocr DEBUG: rec_res num  : 48, elapse : 4.123053312301636
[2022/07/08 05:22:12] ppocr DEBUG: 0  Predict time of ./test.jpeg: 4.923s
[2022/07/08 05:22:12] ppocr DEBUG: Medical Specialty:, 1.000
[2022/07/08 05:22:12] p

#Every inference is taken wrt cpu.
##Best predicted time observed was around 4-5 seconds for the detection+recognition.