In [None]:
## Vaishu Myadam (vmyadam1208@gmail.com)
## June, 2020

## Inspired by pyimagesearch tutorial
## Additions and improvements were made in terms of using less modules, converting to pdf, doing calculations from scratch, and not grayscaling end result

In [None]:
# Necessary imports

import numpy as np 
import cv2
from fpdf import FPDF

In [None]:
img = cv2.imread("image.jpg", 1)

# Resize, grayscale, blur, edges -> contours

img = cv2.resize(img,(600,800))
greyed_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blurred_image = cv2.GaussianBlur(greyed_image, (5,5),0)
edged_image = cv2.Canny(blurred_image, 0, 50)   

# Finding the biggest contour with 4 edges because this is meant to scan documents

contours = cv2.findContours(edged_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
contours = sorted(contours, key=cv2.contourArea, reverse= True)

for contour in contours:
    elip =  cv2.arcLength(contour True)
    approx = cv2.approxPolyDP(contour 0.08*elip, True)

    if len(approx) == 4: 
        doc = approx 
        break

cv2.drawContours(img, [doc], -1, (0, 255, 0), 2)
doc = doc.reshape((4,2))

In [None]:
# Saving distances to get cropped version 

array = np.zeros((4,2), dtype = "float32")

Sum = doc.sum(axis = 1)
array[0] = doc[np.argmin(Sum)]
array[2] = doc[np.argmax(Sum)]

Difference = np.diff(doc, axis = 1)
array[1] = doc[np.argmin(Difference)]
array[3] = doc[np.argmax(Difference)]

(tl,tr,br,bl) = array

In [None]:
# Finding the maximum distances

left = np.linalg.norm(br - bl)
right = np.linalg.norm(tr - tl)

length = max(int(left), int(right))

up = np.linalg.norm(tr - br)
down = np.linalg.norm(tl - bl)

height = max(int(up), int(down))

dst = np.array([[0,0], [length - 1, 0], [length - 1, height - 1], [0, height - 1]], dtype = "float32")

pers = cv2.getPerspectiveTransform(new_doc, dst)
warp = cv2.warpPerspective(img, pers, (length, height))

scanned_image = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
scanned_image = cv2.resize(scanned_image,(600, 800))

In [None]:
# Showing process

cv2.imshow("original_image.jpg", img)
cv2.imshow("grey_image.jpg", greyed_image)
cv2.imshow("blurred_image.jpg", blurred_image)
cv2.imshow("edged_image.jpg", edged_image)
cv2.imshow("contoured_image.jpg", img)
cv2.imshow("scanned_image.jpg", scanned_image)

cv2.imwrite("scanned_image.jpg", scanned_image) # saving

cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# Converting to pdf

pdf = FPDF()
pdf.add_page()

pdf.image("scanned_image.jpg", 0, 0, 210, 297) # 210 x 294 = Size of A4 paper sheet.
pdf.output("pdf.pdf", "F")