In [2]:
#Libraries
import numpy as np
import cv2
import imutils
import pytesseract

verbose = true

# Original image
image = cv2.imread('1.jpg')

#Pre-Processing
# Resize image - change width to 500
image = imutils.resize(image, width=500)

# Show Original Image
if verbose == True:
    cv2.imshow("Original Image", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


# RGB -> Grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

if verbose == True:
    cv2.imshow("1 - Grayscale Conversion", gray)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


#Image Processing
# Noise removal with iterative bilateral filter (remove noise while preserving edges)
gray = cv2.bilateralFilter(gray, 11, 17, 17)

if verbose == True:
    cv2.imshow("2 - Bilateral Filter", gray)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


# Find Image Borders in Grayscale
edged = cv2.Canny(gray, 170, 200)

if verbose == True:
    cv2.imshow("3 - Canny Edges", edged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


# Find contours based on edges
cnts, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

# Create a copy of the original image to draw all outlines
img1 = image.copy()
cv2.drawContours(img1, cnts, -1, (0.255,0), 3)

if verbose == True:
    cv2.imshow("4 - All Contours", img1)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


#classify contours based on their area, keeping the minimum area required as '30' (anything smaller than this will not be considered)
cnts=sorted(cnts, key = cv2.contourArea, reverse = True)[:30]
NumPlacaCnt = None #No board outline

# Top 30 Contours
img2 = image.copy()
cv2.drawContours(img2, cnts, -1, (0.255,0), 3)

if verbose == True:
    cv2.imshow("5 - Top 30 Contours", img2)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


# loop over the contours to find the best approximation of the plate number contour
count = 0
idx = 1
for c in cnts:
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
        # print ("approx = ",approx)
        if len(approx) == 4: # Select contour with 4 edges
            NumPlacaCnt = approx #Approximation of the plate

            # Crop these outlines and store it in the Cropped Images folder
            x, y, w, h = cv2.boundingRect(c) #Find plate coordinates
            new_img = gray[y:y + h, x:x + w] #Create a new image
            cv2.imwrite('Board' + str(idx) + '.png', new_img) #Store Image
            break


# Drawing the selected outline on the original image
#print(NumberPlateCnt)
cv2.drawContours(image, [NumCntPlate], -1, (0.255,0), 3)

if verbose == True:
    cv2.imshow("Final image with detected card", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


Cropped_img_loc = 'Board' + str(idx) + '.png'

if verbose == True:
    cv2.imshow("Cropped Image ", cv2.imread(Cropped_img_loc))
    cv2.waitKey(0)
    cv2.destroyAllWindows()

### Tesseract Basic Commands
​
**pytesseract.image_to_string(InputImage, config='OCR-Language, OCR Engine Mode (oem), Page Segmentation Mode (psm)')**
​
1. **InputImage** = Image we want to work with.
2. **OCR-Language** = Basic language is English. If you want to use another language, you need to download the other language. After the download you need to access the **'tessdata'** folder, probably in **'C:\Program Files\tesseract-OCR\tessdata'** and add the traineddata file.
3. **OCR Engine Mode (oem)** = Tesseract has 4 OCR engine operating modes.
    * 0 Legacy engine only.
    * 1 Neural nets LSTM engine only.
    * 2 Legacy + LSTM engines.
    * 3 Default, based on what is available.
4. **Page Segentation Mode (psm)** = PSM can be very useful when you have additional information about text structure. There are 11 modes.
      * 0 = Orientation and script detection (OSD) only.
      * 1 = Automatic page segmentation with OSD.
      * 2 = Automatic page segmentation, but no OSD, or OCR.
      *3 = Fully automatic page segmentation, but no OSD. (Default)
      * 4 = Assumes single column of text of variable sizes.
      * 5 = Assumes single uniform block of vertically aligned text.
      * 6 = Assumes single uniform block of text.
      * 7 = Treat the image as a single text line.
      * 8 = Treat the image as a single word.
      * 9 = Treat the image as a single word in a circle.
      * 10 = Treat the image as a single character.

In [7]:
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe' #Line to be used in amb Windows

text = pytesseract.image_to_string(Cropped_img_loc)

print("Number is :", text)

Numero é : L BRA 1234 1
