In [1]:
import cv2
import imutils
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.cm as cm

from skimage.filters import threshold_local
from craft_hw_ocr import OCR

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

class ShapeDetector:
	def __init__(self):
		self.ocr_models = OCR.load_models()
		pass

	def predict_shape(self, c):
		
		shape = None
		peri = cv2.arcLength(c, True)
		approx = cv2.approxPolyDP(c, 0.04 * peri, True)
		
		if len(approx) == 3:
			shape = "triangle"
		elif len(approx) == 4:
			(x, y, w, h) = cv2.boundingRect(approx)
			ar = w / float(h)
			shape = "square" if ar >= 0.95 and ar <= 1.05 else "rectangle"
		elif len(approx) == 5:
			shape = "pentagon"
		elif len(approx) > 5:
			shape = "circle"
	
		return shape
	
	def OCR(self, image):

		img_ocr, results = OCR.detection(image, self.ocr_models[2])

		img_text_cleared = img_ocr.copy()

		bboxes, texts = OCR.recoginition(img_ocr, results, self.ocr_models[0], self.ocr_models[1])

		return bboxes, img_text_cleared, texts

	def detect_shapes(self, image):

		# do the below step if your image is tilted by some angle else ignore
		# img = OCR.process_image(img)
		bboxes, img_text_cleared, texts = self.OCR(image)
		image_copy = image.copy()
		
		for i in bboxes.astype('int32'):
			cv2.fillPoly(img_text_cleared, pts=[i], color=(0,0,0))

		resized = imutils.resize(img_text_cleared, width=100)
		ratio = img_text_cleared.shape[0] / float(resized.shape[0])
		
		cnts = cv2.findContours(resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
		cnts = imutils.grab_contours(cnts)

		image_copy = cv2.cvtColor(image_copy,cv2.COLOR_GRAY2RGB)

		for c in cnts:

			if cv2.contourArea(c) < 4:
				continue


			peri = cv2.arcLength(c, True)
			#print(len(cv2.approxPolyDP(c, 0.04 * peri, True)))
			
			M = cv2.moments(c)
			if M["m00"] > 0:
				cX = int((M["m10"] / M["m00"]) * ratio)
				cY = int((M["m01"] / M["m00"]) * ratio)
				shape = self.predict_shape(c)
				if shape == None : continue
				c = c.astype("float")
				c *= ratio
				c = c.astype("int")

				cv2.drawContours(image_copy, [c], -1, (255, 0, 0), 2)
				cv2.putText(image_copy, shape, (cX, cY), cv2.FONT_HERSHEY_SIMPLEX,
					0.5, (0, 0, 255), 2)
				
		for i,j in enumerate(bboxes):

			y1 = int(bboxes[i][0][1])
			y2 = int(bboxes[i][2][1])
			
			x1 = int(bboxes[i][0][0])
			x2 = int(bboxes[i][2][0])
			
			cv2.rectangle(image_copy, (x1, y1), (x2, y2), (0,0,255), 2)
		

		return image_copy, texts
			


In [3]:

class Preprocessing:

    def __init__(self) -> None:
        pass

    def shadow_remove(self, img):
            rgb_planes = cv2.split(img)
            result_norm_planes = []
            for plane in rgb_planes:
                dilated_img = cv2.dilate(plane, np.ones((17,17), np.uint8))
                bg_img = cv2.medianBlur(dilated_img, 21)
                diff_img = 255 - cv2.absdiff(plane, bg_img)
                norm_img = cv2.normalize(diff_img,None, alpha=40, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
                result_norm_planes.append(norm_img)
            shadowremov = cv2.merge(result_norm_planes)
            return shadowremov

    def order_points(self,pts):
    
        rect = np.zeros((4, 2), dtype = "float32")
       
        s = pts.sum(axis = 1) 
        rect[0] = pts[np.argmin(s)] 
        rect[2] = pts[np.argmax(s)] 

        diff = np.diff(pts, axis = 1)
        rect[1] = pts[np.argmin(diff)] 
        rect[3] = pts[np.argmax(diff)] 
       
        return rect
    
    def four_point_transform(self,image, pts):

        rect = self.order_points(pts)
        (tl, tr, br, bl) = rect 

        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) 
        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
        maxWidth = max(int(widthA), int(widthB))

        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
        maxHeight = max(int(heightA), int(heightB))
       
        dst = np.array([
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]], dtype = "float32")

        M = cv2.getPerspectiveTransform(rect, dst)
        warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    
        return warped
    
    def scan_image(self, image):
           
        ratio = image.shape[0] / 500.0
        orig = image.copy()
        image = imutils.resize(image, height = 500)
        
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (5, 5), 0)
        edged = cv2.Canny(gray, 100, 255)

        cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

        cnts = imutils.grab_contours(cnts)
        cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
    

        for c in cnts:
            
            peri = cv2.arcLength(c, True)
            approx = cv2.approxPolyDP(c, 0.02 * peri, True)
            
            if len(approx) == 4:
                screenCnt = approx
                break

        cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)

        warped = self.four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
        warped = warped[20:-20,20:-20]

        return warped

    
    def preprocess(self,image):

        img_copy = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY)

        kernel = np.ones((2,2), np.uint8)
        img_erosion = cv2.erode(img_copy, kernel, iterations=3)
        dilated = cv2.dilate(img_erosion, kernel, iterations=3)
        
        blur = cv2.GaussianBlur(dilated, (15,15), sigmaX=33, sigmaY=33)
        divided = cv2.divide(dilated, blur, scale=255)

        clahe = cv2.createCLAHE(clipLimit = 1)
        final_img = clahe.apply(divided) + 30

        final_img = cv2.bitwise_not(final_img) 

        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
        opening = cv2.morphologyEx(final_img, cv2.MORPH_OPEN, kernel)

        (thresh, im_bw) = cv2.threshold(opening, 20, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        final_img = cv2.bitwise_not(im_bw)

        return final_img

In [4]:
preprocessing = Preprocessing()
shape_detector = ShapeDetector()

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
import os
input_folder = "./images/"
output_folder ="./output/"
filenames = [f for f in os.listdir(input_folder) if os.path.isfile(os.path.join(input_folder, f))]
os.makedirs(os.path.dirname(output_folder), exist_ok=True)

# Batch Process

In [6]:

# for file in filenames:
  
#     filepath = input_folder + file
#     image = cv2.imread(filepath)

#     img_shadow_removed = preprocessing.shadow_remove(image)
#     img_scanned = preprocessing.scan_image(img_shadow_removed)
#     img_preprocess = preprocessing.preprocess(img_scanned)
#     img_shape_detected, texts = shape_detector.detect_shapes(img_preprocess)

#     fig = plt.figure(figsize=(15, 20))

#     figures = { "Original": image,
#                 "Shadow Removed": img_shadow_removed,
#                 "Scanned": img_scanned,
#                 "Shape Detection": img_shape_detected
#                 }

#     for i,key in enumerate(figures):

#         fig.add_subplot(2, 2, i+1)
#         plt.imshow(figures[key])
#         plt.axis('off')
#         plt.title(key)

    
#     fig.savefig(output_folder + file.split('.')[0]+"_figure.jpg")
#     cv2.imwrite(output_folder + file.split('.')[0]+"_detected.jpg",img_shape_detected)
#     with open(output_folder + file.split('.')[0]+".txt", "w") as f:
#         f.write(texts)


# Single Process

In [11]:

image = cv2.imread("./images/1.jpg")

# Remove shadow
img_shadow_removed = preprocessing.shadow_remove(image)

cv2.imshow("orjinal.jpg",img_shadow_removed)
cv2.waitKey()

# Remove background by scanning the image
img_scanned = preprocessing.scan_image(img_shadow_removed)

cv2.imshow("img_scanned.jpg",img_scanned)
cv2.waitKey()

# Convert image to grayscale
img_copy = cv2.cvtColor(img_scanned.copy(), cv2.COLOR_BGR2GRAY)

cv2.imshow("grayscale.jpg",img_copy)
cv2.waitKey()


kernelSizes = [(2, 2), (3, 3), (5, 5)]
blur =[3,5,7]
# loop over the kernels sizes
for i, kernelSize in enumerate(kernelSizes):

    
    img_blur = cv2.medianBlur(img_copy, blur[i])
    cv2.imshow("blur",img_blur)
    cv2.waitKey(10000)

    
    edged = cv2.Canny(img_blur, 30, 30)

    cv2.imshow("edged.jpg",edged)
    cv2.waitKey(10000)

    edged = cv2.bitwise_not(edged)


    # Erosion - Dilation : Removes small blobs from image

    kernel = np.ones((2,2), np.uint8)

    img_erosion = cv2.erode(edged, kernel, iterations=1)

    cv2.imshow("erosion.jpg",img_erosion)
    cv2.waitKey()

    dilated = cv2.dilate(img_erosion, kernel, iterations=1)

    cv2.imshow("dilated.jpg",dilated)
    cv2.waitKey()

    cv2.destroyAllWindows()




In [13]:

image = cv2.imread("./images/1.jpg")

# Remove shadow
img_shadow_removed = preprocessing.shadow_remove(image)

cv2.imshow("orjinal.jpg",img_shadow_removed)
cv2.waitKey()

# Remove background by scanning the image
img_scanned = preprocessing.scan_image(img_shadow_removed)

cv2.imshow("img_scanned.jpg",img_scanned)
cv2.waitKey()

# Convert image to grayscale
img_copy = cv2.cvtColor(img_scanned.copy(), cv2.COLOR_BGR2GRAY)

cv2.imshow("grayscale.jpg",img_copy)
cv2.waitKey()


kernelSizes = [(2, 2), (3, 3), (5, 5)]
blur =[3,5,7]
# loop over the kernels sizes
for i, kernelSize in enumerate(kernelSizes):

    
    img_blur = cv2.medianBlur(img_copy, blur[i])
    cv2.imshow("blur",img_blur)
    cv2.waitKey(10000)

    
    edged = cv2.Canny(img_blur, 30, 30)

    cv2.imshow("edged.jpg",edged)
    cv2.waitKey(10000)

    edged = cv2.bitwise_not(edged)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernelSize)
    opening = cv2.morphologyEx(edged, cv2.MORPH_OPEN, kernel)
    cv2.imshow("Opening: ({}, {})".format(kernelSize[0], kernelSize[1]), opening)
    cv2.waitKey(0)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernelSize)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
    cv2.imshow("Closing: ({}, {})".format(kernelSize[0], kernelSize[1]), closing)
    cv2.waitKey(0)

    cv2.destroyAllWindows()



In [8]:
# image = cv2.imread("./images/7.jpg")

# # Remove shadow
# img_shadow_removed = preprocessing.shadow_remove(image)

# cv2.imshow("orjinal.jpg",img_shadow_removed)
# cv2.waitKey()

# # Remove background by scanning the image
# img_scanned = preprocessing.scan_image(img_shadow_removed)

# cv2.imshow("img_scanned.jpg",img_scanned)
# cv2.waitKey()

# # Convert image to grayscale
# img_copy = cv2.cvtColor(img_scanned.copy(), cv2.COLOR_BGR2GRAY)

# cv2.imshow("grayscale.jpg",img_copy)
# cv2.waitKey()


# # Erosion - Dilation : Removes small blobs from image

# kernel = np.ones((2,2), np.uint8)

# img_erosion = cv2.erode(img_copy, kernel, iterations=3)

# cv2.imshow("erosion.jpg",img_erosion)
# cv2.waitKey()

# dilated = cv2.dilate(img_erosion, kernel, iterations=3)

# cv2.imshow("dilated.jpg",dilated)
# cv2.waitKey()


# # Divide image with its blurred version to clean background

# blur = cv2.GaussianBlur(dilated, (15,15), sigmaX=33, sigmaY=33)

# cv2.imshow("blur.jpg",blur)
# cv2.waitKey()

# divided = cv2.divide(dilated, blur, scale=255)

# cv2.imshow("divide.jpg",divided)
# cv2.waitKey()

# # Over-amplification of the contrast by Clache histogram equaization

# clahe = cv2.createCLAHE(clipLimit = 1)
# img_clahe = clahe.apply(divided) + 30

# cv2.imshow("img_clahe.jpg",img_clahe)
# cv2.waitKey()

# # Morph Open cleans small blobs from the image

# img_clahe = cv2.bitwise_not(img_clahe)

# cv2.imshow("bitwise_not.jpg",img_clahe)
# cv2.waitKey()

# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# opening = cv2.morphologyEx(img_clahe, cv2.MORPH_OPEN, kernel)

# cv2.imshow("MORPH_OPEN.jpg",opening)
# cv2.waitKey()


# #Binarize image

# (thresh, im_bw) = cv2.threshold(opening, 20, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

# # Findcountours expects white lines over black background

# final_img = cv2.bitwise_not(im_bw)

# cv2.imshow("final_img.jpg",final_img)
# cv2.waitKey()


# cv2.destroyAllWindows()