In [31]:
from easyocr import Reader
import argparse
import cv2
import numpy as np

In [48]:
reader = Reader(['en'])
image = cv2.imread('invoice-sample.jpg')

result = []
result.append(reader.readtext('invoice-sample.jpg', paragraph='true'))


CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.


In [51]:
print(result)

[[[[[652, 38], [779, 38], [779, 56], [652, 56]], 'http:/ {mrsinvoice.com'], [[[57, 71], [549, 71], [549, 203], [57, 203]], 'Invoice Your Company LLC Address 123_ State_ My Country 111-222-333,F 111-222-334'], [[[57, 313], [249, 313], [249, 405], [57, 405]], 'BILL TO: Jonn Doe Alpha Bravo Road 33 P: 111-222-333, F: 111-222-334 client@example net'], [[[425, 325], [483, 325], [483, 339], [425, 339]], 'Invoice #'], [[[649, 325], [689, 325], [689, 339], [649, 339]], 'O0001'], [[[415, 353], [491, 353], [491, 369], [415, 369]], 'Invoice Date'], [[[631, 351], [705, 351], [705, 369], [631, 369]], '12/12/2001'], [[[411, 381], [491, 381], [491, 397], [411, 397]], 'Name of Rep'], [[[657, 383], [683, 383], [683, 397], [657, 397]], 'Bob'], [[[407, 409], [499, 409], [499, 425], [407, 425]], 'Contact Phone'], [[[629, 409], [709, 409], [709, 425], [629, 425]], '101-102-103'], [[[57, 429], [249, 429], [249, 521], [57, 521]], 'SHIPPING TO: John Doe Office Office Road 38 P: 111-333-222, F: 122-222-334 off

<h3 style='color:gray'>Preprocessing </h3>
<br/>

In [40]:
# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

In [41]:
gray = get_grayscale(image)
thresh = thresholding(gray)
opening = opening(gray)
canny = canny(gray)

In [45]:
result.append(reader.readtext(thresh))
result.append(reader.readtext(opening))
result.append(reader.readtext(canny))


In [46]:
resultmax=[]
for r in result: 
    for r1 in r:
        r1[1]r1[2])

http:/ {mrsinvoice.com 
 confidence :  0.6191263480172272
Invoice 
 confidence :  0.8417029914472208
Your Company LLC Address 123_ 
 confidence :  0.9699016629589228
State_ 
 confidence :  0.9993438719407953
My Country 
 confidence :  0.7690886052828926
111-222-333,F 111-222-334 
 confidence :  0.8944311753337679
BILL TO: 
 confidence :  0.6356406546240435
Invoice # 
 confidence :  0.811387206387055
O0001 
 confidence :  0.26906153742125555
Jonn Doe 
 confidence :  0.6923027453876681
Alpha Bravo Road 33 
 confidence :  0.9995074579613783
Invoice Date 
 confidence :  0.9849786514553461
12/12/2001 
 confidence :  0.9999289768449527
P: 111-222-333, 
 confidence :  0.6397503523294078
F: 111-222-334 
 confidence :  0.5610792224035954
Name of Rep 
 confidence :  0.7325150235568189
Bob 
 confidence :  0.9734600232950599
client@example net 
 confidence :  0.8549727458882078
Contact Phone 
 confidence :  0.8005528273143847
101-102-103 
 confidence :  0.7248737651044478
SHIPPING TO: 
 confidence

In [47]:
print(result)

[[([[652, 38], [779, 38], [779, 56], [652, 56]], 'http:/ {mrsinvoice.com', 0.6191263480172272), ([[115, 71], [501, 71], [501, 183], [115, 183]], 'Invoice', 0.8417029914472208), ([[57, 187], [251, 187], [251, 203], [57, 203]], 'Your Company LLC Address 123_', 0.9699016629589228), ([[255, 189], [289, 189], [289, 203], [255, 203]], 'State_', 0.9993438719407953), ([[293, 187], [367, 187], [367, 203], [293, 203]], 'My Country', 0.7690886052828926), ([[377, 187], [549, 187], [549, 203], [377, 203]], '111-222-333,F 111-222-334', 0.8944311753337679), ([[59, 313], [111, 313], [111, 327], [59, 327]], 'BILL TO:', 0.6356406546240435), ([[425, 325], [483, 325], [483, 339], [425, 339]], 'Invoice #', 0.811387206387055), ([[649, 325], [689, 325], [689, 339], [649, 339]], 'O0001', 0.26906153742125555), ([[57, 333], [117, 333], [117, 347], [57, 347]], 'Jonn Doe', 0.6923027453876681), ([[59, 351], [185, 351], [185, 367], [59, 367]], 'Alpha Bravo Road 33', 0.9995074579613783), ([[415, 353], [491, 353], [4