In [1]:
import cv2
import numpy as np
import glob
import os
import matplotlib.pyplot as plt
from pprint import pprint

In [2]:
def get_all_image_input(directory):
    list_image = glob.glob(directory + "*.jpg")
    list_image.extend(glob.glob(directory + "*.png"))
    list_image.extend(glob.glob(directory + "*.jpeg"))
    list_image.extend(glob.glob(directory + "*.PNG"))
    return list_image

In [3]:
def show_image(img):
    image=cv2.imread(img)
    plt.imshow(image)
    plt.title("Image")
    plt.show()

In [4]:
def find_table_box(list_boxes, img_height):
    x_es=[]
    for box in list_boxes:
        x_es.append(box[0])
    
    results=[]
    min_height=0
    while(len(x_es) != 0):
        min_x = min(x_es)
        x_es.remove(min_x)
        min_y = img_height
        
        res = []
        for box in list_boxes:
            if min_x in box and box[1] < min_y and (box[1] + box[3]) > min_height:
                res = box
                min_y = box[1]
                min_height = (box[1] + box[3])
                
        for box in list_boxes:
            if box == res:
                results.append(res)
                list_boxes.remove(box)
    return results

In [19]:
def get_contours(img_bin, filename):
    # Các đường kẻ ngang + dọc
    kernel_length = np.array(img_bin).shape[1] // 10

    ver_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
    img_temp1 = cv2.erode(img_bin, ver_kernel, iterations = 3)
    vertical_lines_img = cv2.dilate(img_temp1, ver_kernel, iterations=3)
    cv2.imwrite("./output/"+filename+"/vertical_lines_img.png", vertical_lines_img)

    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
    img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
    cv2.imwrite("./output/"+filename+"/horizontal_lines_img.png", horizontal_lines_img)


    # matrix 3x3
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))

    # img_final_bin = vertical_lines_img + horizontal_lines_img
    img_final_bin = cv2.addWeighted(vertical_lines_img, 0.5, horizontal_lines_img, 0.5, 0.0)
    img_final_bin = 255 - img_final_bin
    img_final_bin = cv2.erode(img_final_bin, kernel, iterations=2)
    cv2.imwrite("./output/Hinh/img_final_bin.png", img_final_bin)

    (thresh, img_final) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # Tìm các contour của ảnh
    im, contours, hierarchy = cv2.findContours(img_final, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    return (contours, img_final)

In [29]:
def main(list_image):
    for image in list_image:
        print("\nProcessing", image, "---------")
        
        # Tạo thư mục output
        basename = os.path.basename(image)
        filename = os.path.splitext(basename)[0]
        dir = './output/' + filename
        if not os.path.exists(dir):
            os.mkdir(dir)
            
        img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
        height, width = img.shape[:2]
        
        (thresh, img_bin) = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)
        
        img_bin = 255 - img_bin
        
        (contours, img_final) = get_contours(img_bin, filename)
        
        contours.reverse()
        boxes=[]
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            if x != 0 and y !=0 and w != width and h != height:
                cv2.rectangle(img_final, (x, y), (x + w, y + h), (0, 0, 0), 1)
                boxes.append([x,y,w,h])
        
        cv2.imwrite("./output/"+filename+"/img_final.png", img_final)
            
        # ----------------
        table = find_table_box(boxes, height)
        for t in table:
            x,y,w,h = t[:]
            img_table = img[y:y+h, x:x+w]
            
            cv2.imwrite("./output/"+filename+"/_img"+str(table.index(t))+".png", img_table)
            print("\tSaved", filename + "/_img"+str(table.index(t))+".png", end="")
            pprint(img_table.shape[:])

In [30]:
input = get_all_image_input('./input/')
print(input)

['./input/table.png', './input/Hinh.png', './input/1.png', './input/3.PNG', './input/4.PNG', './input/2.PNG']


In [31]:
main(input)


Processing ./input/table.png ---------
	Saved table/_img0.png(243, 738)
	Saved table/_img1.png(243, 738)

Processing ./input/Hinh.png ---------
	Saved Hinh/_img0.png(312, 276)

Processing ./input/1.png ---------
	Saved 1/_img0.png(248, 222)

Processing ./input/3.PNG ---------
	Saved 3/_img0.png(446, 558)

Processing ./input/4.PNG ---------
	Saved 4/_img0.png(13, 325)
	Saved 4/_img1.png(13, 325)

Processing ./input/2.PNG ---------
	Saved 2/_img0.png(12, 326)


In [349]:
#             (thresh, img_bin) = cv2.threshold(img_table, 200, 255, cv2.THRESH_BINARY)
#             img_bin = 255 - img_bin
        
#             (_im, _contours, _hierarchy, _img) = get_contours(img_bin)
# #             contours.reverse()
#             _height, _width = _img.shape[:2]
#             for contour in _contours:
#                 x, y, w, h = cv2.boundingRect(contour)
#                 if w > h and (w/_width)<0.6 and (h/_height)<0.6:
#                     cv2.rectangle(_img, (x, y), (x + w, y + h), (0, 0, 0), 2)
            
#             print("Locate:",x, y, x+w, y+h)