In [1]:
import cv2
from PIL import Image, ImageEnhance
import numpy as np 
from matplotlib import pyplot as plt 
from craft_text_detector import read_image, load_craftnet_model, load_refinenet_model, get_prediction, export_detected_regions, empty_cuda_cache
from vietocr.tool.predictor import Predictor
from vietocr.tool.config import Cfg
import crop_img 

In [2]:
def group_h_lines(h_lines, thin_thresh):
    new_h_lines = []
    while len(h_lines) > 0:
        thresh = sorted(h_lines, key=lambda x: x[0][1])[0][0]
        lines = [line for line in h_lines if thresh[1] -
                 thin_thresh <= line[0][1] <= thresh[1] + thin_thresh]
        h_lines = [line for line in h_lines if thresh[1] - thin_thresh >
                   line[0][1] or line[0][1] > thresh[1] + thin_thresh]
        x = []
        for line in lines:
            x.append(line[0][0])
            x.append(line[0][2])
        x_min, x_max = min(x) - int(5*thin_thresh), max(x) + int(5*thin_thresh)
        new_h_lines.append([x_min, thresh[1], x_max, thresh[1]])
    return new_h_lines
def group_v_lines(v_lines, thin_thresh, img):
    new_v_lines = []
    while len(v_lines) > 0:
        thresh = sorted(v_lines, key=lambda x: x[0][0])[0][0]
        lines = [line for line in v_lines if thresh[0] -
                 thin_thresh <= line[0][0] <= thresh[0] + thin_thresh]
        v_lines = [line for line in v_lines if thresh[0] - thin_thresh >
                   line[0][0] or line[0][0] > thresh[0] + thin_thresh]
        y = []
        for line in lines:
            y.append(line[0][1])
            y.append(line[0][3])
        y_min, y_max = min(y) - int(4*thin_thresh), max(y) + int(4*thin_thresh)
        if y_max-y_min >= img.shape[0]-10:
            new_v_lines.append([thresh[0], y_min, thresh[0], y_max])
    return new_v_lines

def processImg(img):
    img = crop_img.crop_imgFunc(img)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh , img_bin = cv2.threshold(gray , 90 , 255 , cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    img_bin = 255-img_bin
    kernel_len = gray.shape[1]//120
    hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 1))
    image_horizontal = cv2.erode(img_bin, hor_kernel, iterations=3)
    horizontal_lines = cv2.dilate(image_horizontal, hor_kernel, iterations=3)
    h_lines = cv2.HoughLinesP(
        horizontal_lines, 1, np.pi/180, 100, maxLineGap=250)

    new_horizontal_lines = group_h_lines(h_lines, kernel_len)
    for i in range(len(new_horizontal_lines)):
        cv2.line(img,(new_horizontal_lines[i][0], new_horizontal_lines[i][1]), (new_horizontal_lines[i][2], new_horizontal_lines[i][3]),(0, 255, 0), 1)

    kernel_len = gray.shape[1]//120
    ver_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_len))
    image_vertical = cv2.erode(img_bin, ver_kernel, iterations=3)
    vertical_lines = cv2.dilate(image_vertical, ver_kernel, iterations=3)
    v_lines = cv2.HoughLinesP(vertical_lines, 1, np.pi/180, 30, maxLineGap=250)
    new_vertical_lines = group_v_lines(v_lines, kernel_len, img)
    for i in range(len(new_vertical_lines)):
        cv2.line(img,(new_vertical_lines[i][0], new_vertical_lines[i][1]), (new_vertical_lines[i][2], new_vertical_lines[i][3]),(0, 255, 0), 1)
    def seg_intersect(line1: list, line2: list):
        a1, a2 = line1
        b1, b2 = line2
        da = a2-a1
        db = b2-b1
        dp = a1-b1

        def perp(a):
            b = np.empty_like(a)
            b[0] = -a[1]
            b[1] = a[0]
            return b

        dap = perp(da)
        denom = np.dot(dap, db)
        num = np.dot(dap, dp)
        return (num / denom.astype(float))*db + b1
    points = []
    for hline in new_horizontal_lines:
        x1A, y1A, x2A, y2A = hline
        for vline in new_vertical_lines:
            x1B, y1B, x2B, y2B = vline

            line1 = [np.array([x1A, y1A]), np.array([x2A, y2A])]
            line2 = [np.array([x1B, y1B]), np.array([x2B, y2B])]

            x, y = seg_intersect(line1, line2)
            if x1A <= x <= x2A and y1B <= y <= y2B:
                points.append([int(x), int(y)])
    cells = []
    for i in range(len(points)):
        if (i+1) % 6 == 0:
            continue
        if (i+6) == len(points):
            break
        cells.append([points[i],points[i+1], points[i+6], points[i+7]])
    return cells,img

In [3]:
def load_model():
    refine_net = load_refinenet_model(cuda=True)
    craft_net = load_craftnet_model(cuda=True)
    ##& VietOCR 
    # set device to use cpu
    config1 = Cfg.load_config_from_name('vgg_seq2seq')
    config2 = Cfg.load_config_from_file('config.yml')
    config1['cnn']['pretrained']=False
    config2['cnn']['pretrained']=False
    config2['weights'] = './weights/transformerocr.pth'
    detector2 = Predictor(config2)
    detector1 = Predictor(config1)  
    return detector1,detector2,refine_net,craft_net
def predict(img,detector1,detector2,refine_net,craft_net):
    cells,img = processImg(img)
    result2 = []
    for id,cell in enumerate(cells):
        if id>5 and (id+1)%5 ==0:
            x_min = cell[0][0] ## Top 
            x_max = cell[3][0] ##Right
            y_min = cell[0][1] 
            y_max = cell[3][1] + 10
            cell_image = img[y_min:y_max, x_min:x_max]
            img_text = Image.fromarray(cell_image)
            result = detector2.predict(img_text)
            result2.append(result)
        else:
            x_min = cell[0][0] ## Top 
            x_max = cell[3][0] ##Right
            y_min = cell[0][1] 
            y_max = cell[3][1]
            cell_image = img[y_min:y_max, x_min:x_max]
            prediction_result= get_prediction(
            image=cell_image,
            craft_net=craft_net,
            refine_net=refine_net,
            text_threshold=0.7,
            link_threshold=0.4,
            low_text=0.4,
            cuda=True,
            long_size=1280
            )
            arr = prediction_result['boxes'].transpose(2,0,1).reshape(2,-1)
            x_min = int(arr[0][np.argmin(arr,axis=1)[0]])
            y_min = int(arr[1][np.argmin(arr,axis=1)[1]])
            x_max = int(arr[0][np.argmax(arr,axis=1)[0]])
            y_max = int(arr[1][np.argmax(arr,axis=1)[1]])
            text_image = cell_image[y_min:y_max, x_min:x_max]
            img_text = Image.fromarray(text_image)
            # # # predict
            result = detector1.predict(img_text)
            result2.append(result)
    return result2

In [4]:
detector1,detector2,refine_net,craft_net = load_model()
img = cv2.imread('test/document-02.png')
result = predict(img,detector1,detector2,refine_net,craft_net )
print(result)

File exists: /home/tung/.cache/gdown/https-COLON--SLASH--SLASH-drive.google.com-SLASH-uc-QUESTION-id-EQUAL-1nTKlEog9YFK74kPyX0qLwCWi60_YHHk4
['STT', 'Mã SV', 'Họ và tên', 'Lớp SV', 'Điểm', '33', '20182548', 'Trần Hữu Minh Hoàng', 'Điện tử 03-K63', '3.0', '34', '20181506', 'Nguyễn Bá Phi Hùng', 'Tự động hóa 11-K63', '4.0', '35', '20151819', 'Phan Văn Hùng', 'Điện tử 03 K60', '7.0', '36', '20155765', 'Phan Văn Hùng', 'CN- ĐIỀU KHIỂN & TĐH 2 K60', '5.0', '37', '20155779', 'Bùi Văn Hưng', 'SPKT Kỹ thuật điện K60', '4.0', '38', '20173946', 'Đường Gia Hưng', 'Kỹ thuật điện 01 K62', '3.0', '39', '20151867', 'Nguyễn Trung Hưng', 'Điện tử 09 K60', '8.0', '40', '20142224', 'Nguyễn Thu Hương', 'Kinh tế công nghiệp 1 K59', '4.0', '41', '20161795', 'Đào Quốc Huy', 'Cơ khí động lực 2 K61', '4.0', '42', '20173955', 'Đỗ Quang Huy', 'KT ĐIỀU KHIỂN THH 09 K62', '4.5', '43', '20183340', 'Nguyễn Lâm Huy', 'Nhiệt 01-K63', '3.0', '44', '20161851', 'Phạm Công Huy', 'Kỹ thuật nhiệt 02-K61', '-', '45', '201618

In [7]:
type(img)

numpy.ndarray

In [5]:
for i in range(0,len(result2),5):
    result.append([result2[i], result2[i+1],result2[i+2],result2[i+3],result2[i+4]])
print(result)  

NameError: name 'result2' is not defined