In [63]:
from imutils.object_detection import non_max_suppression
import numpy as np
import argparse
import time
import cv2

In [64]:
image = cv2.imread("img792.jpg")
orig = image.copy()
(H, W) = image.shape[:2] #Height and width of the image are stored in H and W

In [65]:
print(H, W)

540 720


In [66]:
#Set new width and height
(newW, newH) = (320, 320)
print(newW, newH)

320 320


In [67]:
#Determine the ratio for both the width and height
rW = W/newW
rH = H/newH

In [68]:
#Resize the image (ignoring aspect ratio) and grab the new dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
print(H, W)

320 320


In [69]:
# We need to extract the output feature maps of two layers:
# The first is the output probabilities
# The second can be used to derive the bounding box coordinates of text

layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

Loading the EAST text detector

In [70]:
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet("frozen_east_text_detection.pb") #Load the neural network into memory

[INFO] loading EAST text detector...


In [71]:
# Converting the image to a blob and then perform a forward pass of
# the model to obtain the two output layer sets

blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
start = time.time()
net.setInput(blob) #We simply set blob as input to predict text
(scores, geometry) = net.forward(layerNames) #Returns two feature maps

#The "geometry" map is used to derive the bounding box coordinates of the text in the image
#The "scores" map contains the probability of a given region containing text

end = time.time()

In [72]:
print("[INFO] text detection took {:.6f} seconds".format(end - start))

[INFO] text detection took 0.518517 seconds


In [73]:
scores.shape # 80, 80 is the number of rows and columns respectively

(1, 1, 80, 80)

In [74]:
(numRows, numCols) = scores.shape[2:4]
rects = []    # Stores the bounding box (x,y) coordinates for text regions
confidences = []    # Stores the probability associated with each of the bounding boxes in "rects"

Loop over the number of rows and columns

In [75]:
for y in range(0, numRows):  #Loop over the number of rows ('y' denotes the current row)
    scoresData = scores[0, 0, y]   #Extract the scores(probabilities)
    xData0 = geometry[0, 0, y]     #Extract the geometrical data
    xData1 = geometry[0, 1, y]     #Extract the geometrical data
    xData2 = geometry[0, 2, y]     #Extract the geometrical data
    xData3 = geometry[0, 3, y]     #Extract the geometrical data
    anglesData = geometry[0, 4, y] #Extract the geometrical data
    
    #Loop over each column index for current row
    for x in range(0, numCols):
        # If our score doesn't have sufficient probability, ignore it.
        if scoresData[x] < 0.5:
            continue
        
        # EAST text detector reduces the volume size as the image passes theough the network
        # Thus, the volume size will be 4x smaller than the input image
        # Hence, compute the offset factor by multiplying the coordinates by 4
        (offsetX, offsetY) = (x*4.0, y*4.0)
        
        # Extract the rotation angle for the prediction and then compute the sin and cosine
        angle = anglesData[x]
        cos = np.cos(angle)
        sin = np.sin(angle)
 
        # Use the geometrical data to derive the width and height of the bounding box
        h = xData0[x] + xData2[x]  # height
        w = xData1[x] + xData3[x]  # width
        
        # Compute both the starting and ending (x, y) coordinates for the bounding box
        endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
        endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
        startX = int(endX - w)
        startY = int(endY - h)
        
        # Add the bounding box coordinates and probability score to our respective lists
        rects.append((startX, startY, endX, endY))
        confidences.append(scoresData[x])

Applying "non-maxima suppression" to the bounding boxes to suppress weak, overlapping, bounding boxes

In [76]:
boxes = non_max_suppression(np.array(rects), probs = confidences)

cv2.imshow("Original image", orig)
#Now, Looping over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    #Scaling the coordinates based on the respective ratios
    startX = int(startX * rW) 
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)
    
    #Drawing the bounding box on the image
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)

In [77]:
#cv2.imshow("Text Detection", orig) #Showing the output image with text detected
#cv2.waitKey(0)
#cv2.destroyAllWindows()

In [78]:
cv2.imshow("Text Detection", orig) #Showing the output image with text detected
cv2.waitKey(0)
cv2.destroyAllWindows()