# OMR SHEET Roll Number Extraction


In [21]:
import cv2
import numpy as np

# Load image as greyscale 
image = cv2.imread('screens/rsz_1rsz_1omr4.jpg',0)
cv2.imshow('Original', image)
cv2.waitKey(0)
cv2.destroyAllWindows()



In [22]:
# Let's Scale the size of our image
image2 = cv2.resize(image.copy(), None, fx=1.2, fy=1.2, interpolation = cv2.INTER_CUBIC)
cv2.imshow('Scaling (Interpolation)', image2)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [23]:
# Blur to remove noise using gaussian kernel
blur = cv2.GaussianBlur(image2.copy(), (5,5), 0)
cv2.imshow('Gaussian Blurring', blur)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [24]:
# Second attribute can be manipulated according to the image
ret,thresh = cv2.threshold(blur.copy(), 225, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('Threshold Binary', thresh)
cv2.waitKey(0) 
cv2.destroyAllWindows()

In [25]:
# Canny Edge Detection uses gradient values as thresholds
edged = cv2.Canny(thresh, 75, 200)
cv2.imshow('Canny', edged)
cv2.waitKey(0)

cv2.destroyAllWindows()

In [26]:
# Find contours and print how many were found
warpedEdged =edged.copy()
contours, hierarchy = cv2.findContours(warpedEdged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
print ("Total Number of contours found = ", len(contours))

circularCnts = []
for c in contours:
    (x, y, w, h) = cv2.boundingRect(c)
    ar = w / float(h)
    # in order to label the contour as a bubble, region
    # should be sufficiently wide, sufficiently tall, and
    # have an aspect ratio approximately equal to 1
    if w >= 20 and h >= 20 and ar >= 0.9 and ar <= 1.1:
        circularCnts.append(c)

print ("Number of Flitered contours found = ", len(circularCnts))


Total Number of contours found =  154
Number of Flitered contours found =  61


In [27]:
'''
# This is to apply more strict filter on circles by using 
# matchShapes method of open
# match each filtered contour with this circular image for more strict filtering
# so that to remove all noisy contours
'''
# Load the Circular image
template = cv2.imread('screens/Black_Circle.jpg',0)
cv2.imshow('Template', template)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Threshold image first before using cv2.findContours
ret, thresh1 = cv2.threshold(template, 127, 255, 0)

# Find contours in template
contours2, hierarchy2 = cv2.findContours(thresh1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# We need to sort the contours by area so that we can remove the largest
# contour which is the image outline
sorted_contours = sorted(contours2, key=cv2.contourArea, reverse=True)
#print(len(sorted_contours))

# We extract the second largest contour which will be our template contour
template_contour = sorted_contours[1]

In [28]:
target = image2.copy()

for c in circularCnts:
    # Iterate through each contour in the target image and 
    # use cv2.matchShapes to compare contour shapes
    match = cv2.matchShapes(template_contour, c, 3, 0.0)
    #print(match)                
        
    if(match<0.03):
        cv2.drawContours(target, [c], -1, (0,255,255), 1)
    else:
        circularCnts.remove(c)

#----------------------------------------
print(len(circularCnts))

cv2.imshow('Output', target)
cv2.waitKey()        
cv2.destroyAllWindows()

60


In [29]:
def x_cord_contour(contours):
    #Returns the X cordinate for the contour centroid
    M = cv2.moments(contours)
    return (int(M['m10']/M['m00']))
    
def y_cord_contour(contours):
    #Returns the Y cordinate for the contour centroid
    M = cv2.moments(contours)
    return (int(M['m01']/M['m00']))
    
# Need to sort coloumn wise
# So first sort by x_cord_contour function and then
# sort group of 10 contours by y_cord_contour
contours_top_to_bottom = sorted(questionCnts, key = x_cord_contour, reverse = False)

In [30]:
paper = image2.copy()

In [31]:
warpedThresh = thresh
roll =""

for (q, i) in enumerate(np.arange(0, len(contours_top_to_bottom), 10)):
    # sort the contours for the current coloumn from
    # top to bottom, then initialize the index of the
    # bubbled answer
    
    cnts = sorted(contours_top_to_bottom[i:i + 10], key = y_cord_contour, reverse = False)
    #print(len(cnts))
    bubbled = None
    
    # loop over the sorted contours
    for (j, c) in enumerate(cnts):
        # construct a mask that reveals only the current
        # "bubble" for the column
        mask = np.zeros(warpedThresh.shape, dtype="uint8")
        cv2.drawContours(mask, [c], -1, 255, -1)
        
        # apply the mask to the thresholded image, then
        # count the number of non-zero pixels in the
        # bubble area
        mask = cv2.bitwise_and(warpedThresh, warpedThresh, mask=mask)
        
        #cv2.drawContours(mask, [c], -1, 255, -1)
        #This will illustrate the working of mask 
        if(i==0):    
            cv2.imshow('Outputly', warpedThresh)
            cv2.imshow('Output', mask)
            cv2.waitKey()        
            cv2.destroyAllWindows()

        
        total = cv2.countNonZero(mask)
        #print(total)
        # if the current total has a larger number of total
        # non-zero pixels, then we are examining the currently
        # bubbled-in marked
        if bubbled is None or total > bubbled[0]:
            bubbled = (total, j)
        # initialize the contour color and the index of the
    color = (0, 0, 255)
    k = bubbled[1]
    roll = roll + str(k)
    # draw the outline of the correct answer on the test
    cv2.drawContours(paper, [cnts[k]], -1, color, 3)

In [34]:
cv2.imshow("Original", image)
cv2.imshow("Drawed", paper)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [33]:
print(roll)

062525
