In [14]:
import pytesseract
import cv2
import os
import numpy as np
import re
from time import time
from skimage import color as skicolor

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def get_year(img):
    start = time()
    
    im = cv2.imread(img)
    
    gray =  cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    print(gray.shape)
#     gray = cv2.imread(img, 0)
#     print(gray)
    
    h, w = gray.shape
    if h/w > 1: 
        gray = cv2.rotate(gray, cv2.cv2.ROTATE_90_CLOCKWISE)
        im = cv2.rotate(im, cv2.cv2.ROTATE_90_CLOCKWISE)
    
    gray = cv2.resize( gray, None, fx = 3, fy = 3, interpolation = cv2.INTER_CUBIC)
    im = cv2.resize(im, None, fx = 3, fy = 3, interpolation = cv2.INTER_CUBIC)
    blur = cv2.GaussianBlur(gray, (5,5), 0)
    gray = cv2.medianBlur(gray, 3)
    ret, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)


    rect_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    dilation = cv2.dilate(thresh, rect_kern, iterations = 1)

    # find contours
    try:
        contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    except:
        ret_img, contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])

    # create copy of image
    im2 = dilation.copy()
    height, width = im2.shape

    plate_num = ""

    color = (0,215,255)
    # loop through contours and find letters in license plate
    for cnt in sorted_contours:
        x,y,w,h = cv2.boundingRect(cnt)

        if height / float(h) > 3: continue
        ratio = h / float(w)
        if ratio < 1.25: continue
        area = h * w
        if width / float(w) > 20: continue
        if area < 100: continue
            
        # draw the rectangle
        rect = cv2.rectangle(im, (x,y), (x+w, y+h), color,2)
        padding = 0
        roi = thresh[y-padding:y+h+padding, x-padding:x+w+padding]
        roi = cv2.bitwise_not(roi)
        roi = cv2.medianBlur(roi, 5)

        text = pytesseract.image_to_string(roi, config='-c tessedit_char_whitelist=0123456789 --psm 8 --oem 3')
        clean_text = re.sub('[\W_]+', '', text)
        plate_num += clean_text
        
        
        
    print(img, plate_num)
    if plate_num != None:
        cv2.putText(im, plate_num, (int(width/2), int(height/2)), cv2.FONT_HERSHEY_SIMPLEX, 2, color, 3)
#     cv2.imshow("Character's Segmented", im2)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()
    end = time() - start
    return im, end

In [15]:
path = "./data/images"

time_lst = []

for file in os.listdir(path):
    if file == ".ipynb_checkpoints": continue
    if os.path.isfile(f"{path}/{file}"):
        img, t = get_year(f"{path}/{file}")
        cv2.imwrite(f"{path}/recognized/{file}", img)
        time_lst.append(t)

(115, 196)
./data/images/00032_2.jpg 
(246, 436)
./data/images/00040_1.jpg 
(81, 188)
./data/images/00050_2.jpg 
(70, 148)
./data/images/00052_2.jpg 
(70, 148)
./data/images/00054_2.jpg 
(53, 79)
./data/images/00060_1.jpg 7
(161, 223)
./data/images/00062_2.jpg 
(74, 171)
./data/images/00070_1.jpg 
(142, 270)
./data/images/00094_1.jpg 4
(142, 270)
./data/images/00096_1.jpg 
(40, 85)
./data/images/00100_1.jpg 4
(40, 89)
./data/images/00102_2.jpg 3
(183, 75)
./data/images/00144_2.jpg 
(49, 126)
./data/images/00146_1.jpg 
(49, 126)
./data/images/00148_1.jpg 
(23, 33)
./data/images/00156_1.jpg 
(48, 58)
./data/images/00174_1.jpg 
(98, 206)
./data/images/00212_2.jpg 
(10, 26)
./data/images/00272_2.jpg 1
(165, 62)
./data/images/00284_1.jpg 
(165, 62)
./data/images/00286_1.jpg 
(74, 152)
./data/images/00320_1.jpg 
(53, 144)
./data/images/00346_2.jpg 1
(76, 194)
./data/images/00378_2.jpg 33
(27, 59)
./data/images/00388_2.jpg 49
(180, 383)
./data/images/00390_1.jpg 
(180, 383)
./data/images/0039

./data/images/02206_2.jpg 
(32, 48)
./data/images/02220_2.jpg 
(172, 270)
./data/images/02276_2.jpg 
(141, 211)
./data/images/02302_2.jpg 
(37, 68)
./data/images/02314_2.jpg 
(37, 68)
./data/images/02316_2.jpg 
(28, 62)
./data/images/02336_2.jpg 
(171, 68)
./data/images/02338_2.jpg 194
(140, 327)
./data/images/02382_2.jpg 
(140, 327)
./data/images/02384_2.jpg 
(23, 39)
./data/images/02390_2.jpg 9
(114, 253)
./data/images/02414_2.jpg 
(114, 253)
./data/images/02416_2.jpg 
(68, 102)
./data/images/02422_2.jpg 
(75, 170)
./data/images/02430_2.jpg 28
(113, 326)
./data/images/02442_2.jpg 
(30, 64)
./data/images/02470_2.jpg 59
(51, 94)
./data/images/02476_2.jpg 69
(71, 175)
./data/images/02524_2.jpg 93
(71, 175)
./data/images/02526_2.jpg 
(46, 74)
./data/images/02546_2.jpg 
(27, 63)
./data/images/02600_2.jpg 
(111, 108)
./data/images/02622_1.jpg 
(118, 191)
./data/images/02640_2.jpg 
(118, 191)
./data/images/02642_2.jpg 
(57, 118)
./data/images/02670_2.jpg 
(77, 115)
./data/images/02678_1.jpg