In [1]:
from __future__ import print_function

import glob
import os
import shutil
import sys
import time
from scipy import ndimage

import cv2
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile

sys.path.append(os.getcwd())
from lib.fast_rcnn.config import cfg, cfg_from_file
from lib.fast_rcnn.test import _get_blobs
from lib.text_connector.detectors import TextDetector
from lib.text_connector.text_connect_cfg import Config as TextLineCfg
from lib.rpn_msr.proposal_layer_tf import proposal_layer


def resize_im(im, scale, max_scale=None):
    f = float(scale) / min(im.shape[0], im.shape[1])
    if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale:
        f = float(max_scale) / max(im.shape[0], im.shape[1])
    return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f

#return 4 vertices
def text_boxes(img):    
    img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
    blobs, im_scales = _get_blobs(img, None)
    
    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
    rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)
    
    scores = rois[:, 0]
    boxe = rois[:, 1:5] / im_scales[0]
    textdetector = TextDetector()
    boxe = textdetector.detect(boxe, scores[:, np.newaxis], img.shape[:2])  
    boxes = []
    for box in boxe:
        box = [x / scale for x in box]
        boxes.append(box)
    return boxes

#return [x, y, w, h]
def text_boxes_simple(image):
    gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
    im_area = image.shape[0] * image.shape[1]
    if im_area < 2000000:
        kernel = np.ones((15,15), np.uint8)
    else:
        kernel = np.ones((30,30), np.uint8)
    img_dilation = cv2.dilate(thresh, kernel, iterations=1)
    im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])

    thres = 0
    thres_y = 0
    for i, ctr in enumerate(sorted_ctrs):
        x, y, w, h = cv2.boundingRect(ctr)
        thres += (w * h)
        thres_y += h
    thres = thres/len(sorted_ctrs)
    thres_y = thres_y/len(sorted_ctrs)
    

    boxes = []
    for i, ctr in enumerate(sorted_ctrs):
        box = []
        x, y, w, h = cv2.boundingRect(ctr) 
        area = w * h
        if (area > (thres / 20)) and (area < (thres * 10)) and (area < (im_area * 1/3)) and (area > (im_area /100)) and (h < (thres_y * 10)) and (h < (image.shape[0] /4)):
            cv2.rectangle(image,(x,y),( x + w, y + h ),(90,0,255),2)
            box = [x, y, w, h]
            boxes.append(box)
    cv2.imwrite("result.png", image)
    return boxes

# Test text_boxes

In [3]:
cfg_from_file('data/text.yml')

# init session
config = tf.ConfigProto(allow_soft_placement=True)
sess = tf.Session(config=config)
with gfile.FastGFile('data/ctpn.pb', 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    sess.graph.as_default()
    tf.import_graph_def(graph_def, name='')
sess.run(tf.global_variables_initializer())

input_img = sess.graph.get_tensor_by_name('Placeholder:0')
output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0')


Instructions for updating:
Use tf.gfile.GFile.


In [5]:
im_name = '/home/topica/Desktop/test_hpt7.jpg'
img = cv2.imread(im_name)

start_time = time.time()
boxes= text_boxes(img)
print("--- %s seconds ---" % (time.time() - start_time))

base_name = im_name.split('/')[-1]
color = (255,0,0)
start_time = time.time()
for box in boxes:
    if (box[5]-box[1]) >= (img.shape[0] / 25):
        cv2.line(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
        cv2.line(img, (int(box[0]), int(box[1])), (int(box[4]), int(box[5])), color, 2)
        cv2.line(img, (int(box[6]), int(box[7])), (int(box[2]), int(box[3])), color, 2)
        cv2.line(img, (int(box[4]), int(box[5])), (int(box[6]), int(box[7])), color, 2)
        
cv2.imwrite(os.path.join("data/results", base_name), img)


--- 3.376528739929199 seconds ---


True

# Test text_boxes_simple

In [6]:
image = cv2.imread('/home/topica/Desktop/test_hpt7.jpg')
text_boxes_simple(image)

[[171, 341, 720, 64],
 [175, 291, 727, 42],
 [180, 242, 454, 42],
 [183, 171, 392, 53]]