In [1]:
import cv2
import pytesseract
import os, re
import Levenshtein
pytesseract.pytesseract.tesseract_cmd = r'C:/Users/prabh/Tesseract-OCR/tesseract.exe'

## Read the file paths from the folder.

In [2]:

def list_files_in_folder(folder_path):
    file_paths = []
    
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_paths.append(os.path.join(root, file))
    
    return file_paths

folder_path = "assets/"
file_paths = list_files_in_folder(folder_path)

## Match if the face is visible

In [3]:
def is_face_visible(image_path):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    img = cv2.imread(image_path)
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    if len(faces) > 0:
        return True
    else:
        return False

for file in file_paths:
    print(is_face_visible(str(file)))

True
False
True
True


## Match the processed texts with the required list of datas to find the accuracy.

In [4]:
def find_similar_parts(target, input_text, threshold = 0.60):
    similar_parts = []
    words = input_text.split()
    
    for i in range(len(words)):
        for j in range(i+1, len(words) + 1):
            part = ' '.join(words[i:j])
            similarity = 1 - Levenshtein.distance(target, part) / max(len(target), len(part))
            if similarity >= threshold:
                similar_parts.append((part, similarity))
    
    similar_parts.sort(key=lambda x: x[1], reverse=True)
    if similar_parts:
        return similar_parts[0][0]
    else:
        return False


## Main functions

In [5]:
def findMissMatchPercentage(image_path):
    processedImages = multyProcessImages(image_path)
    left_aligned_list = ["Date of Birth", "Address", "Expiration Date"]
    center_aligned_list = ["Blood Type", "Agency Code", "Eyes Color"]
    status = computeStatus(processedImages, left_aligned_list, center_aligned_list)
    for key in status.keys():
        print(key, ":", status[key])
    print()

## Executing functions

In [6]:
parameters = {'Date of Birth': False, 'Address': False, 'Expiration Date': False, 'Blood Type': False, 'Agency Code': False, 'Eyes Color': False}

def compare_left_allignment(part,strings,extracts):
    if(strings == "Date of Birth" or strings == "Expiration Date"):
        pattern = '^(19|20)\d\d/(0[1-9]|1[012])/(0[1-9]|[12][0-9]|3[01])$'
        coord_list = []
        n_boxes = len(extracts['text'])
        for i in range(n_boxes):
            if int(extracts['conf'][i]) > 10:
                if re.match(pattern, extracts['text'][i]):
                    (x, y, w, h) = (extracts['left'][i], extracts['top'][i], extracts['width'][i], extracts['height'][i])
                    coord_list.append(x)
                if extracts['text'][i] == part.split(' ')[0]:
                    (x, y, w, h) = (extracts['left'][i], extracts['top'][i], extracts['width'][i], extracts['height'][i])
                    coord_list.append(x)
                if len(coord_list) == 2:
                    if coord_list[0] - coord_list[1] <= 5:
                        return True
                    else:
                        return False

    if(strings == "Address"):
        return True

def compare_center_allignment(part,strings,extracts):
    pattern = " "
    if(strings == "Blood Type"):
        pattern = '^(A|B|AB|O)[+-]$'
    if(strings == "Agency Code"):
        pattern = r'[A-Z][0-9]{2}'
    if(strings == "Eyes Color"):
        pattern = '^(BLACK|BLUE|GRAY)$'
    coordinate = 0
    coord_list = []
    n_boxes = len(extracts['text'])
    for i in range(n_boxes):
        if int(extracts['conf'][i]) > 10:
            if re.match(pattern, extracts['text'][i]):
                (x, y, w, h) = (extracts['left'][i], extracts['top'][i], extracts['width'][i], extracts['height'][i])
                coord_list.append(x)
                coordinate += x + w/2
            if extracts['text'][i] in part.split(' '):
                (x, y, w, h) = (extracts['left'][i], extracts['top'][i], extracts['width'][i], extracts['height'][i])
                if extracts['text'][i] == part.split(' ')[0]:
                    coord_list.append(x)
                    coordinate -= (x+w/2)
                else:
                    coordinate -= w/2
            
    if abs(coordinate) <= 5 and coord_list[0] - coord_list[1] > 5:
        return True
    else:
        return False

In [7]:
def isFaceVisible(gray_image):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    return len(faces)>0

def multyProcessImages(image_path):
    images = []
    image = cv2.imread(image_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    images.append(gray_image)

    gussian_image = cv2.GaussianBlur(src=gray_image,ksize=(3, 3), sigmaX=0,sigmaY=0)

    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(12, 12))
    clached_image = clahe.apply(gray_image)

    unsharp_image = cv2.addWeighted(clached_image, 2.0, gray_image, -1.0, 0)

    _, thresold_image1 = cv2.threshold(gussian_image, thresh=165, maxval=255, type=cv2.THRESH_TRUNC + cv2.THRESH_OTSU)
    images.append(thresold_image1)

    _, thresold_image2 = cv2.threshold(gray_image, thresh=165, maxval=255, type=cv2.THRESH_TRUNC + cv2.THRESH_OTSU)
    images.append(thresold_image2)

    _, thresold_image3 = cv2.threshold(clached_image, thresh=165, maxval=255, type=cv2.THRESH_TRUNC + cv2.THRESH_OTSU)
    images.append(thresold_image3)

    _, thresold_image4 = cv2.threshold(unsharp_image, thresh=165, maxval=255, type=cv2.THRESH_TRUNC + cv2.THRESH_OTSU)
    images.append(thresold_image4)
    
    return images

def fillStatus(image,left_list,centerd_list):
    global parameters
    extracts = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
    extracted_text = pytesseract.image_to_string(image)
    for strings in left_list:
        part = find_similar_parts(strings,extracted_text)     
        if(part != False):
            if(compare_left_allignment(part, strings, extracts)):
                parameters[strings] = True
    
    for strings in centerd_list:
        part = find_similar_parts(strings,extracted_text)     
        if(part != False):
            if(compare_center_allignment(part, strings, extracts)):
                parameters[strings] = True

def computeStatus(procesd_images,left_list,centerd_list):
    status = {'msimatch percentage': 0.0, 'face visibility': False, 'matched parameters': [], 'mismatched parameters': []}
    global parameters
    mmpercentage = 0.0
        
    face_visibility = isFaceVisible(procesd_images[0])
    
    if face_visibility:
        status['face visibility'] = True
    else:
        mmpercentage += 33.33

    for image in procesd_images:
        fillStatus(image,left_list,centerd_list)

    for key in parameters:
        if parameters[key]:
            status['matched parameters'].append(key)
        else:
            status['mismatched parameters'].append(key)
            mmpercentage += 11.11
    
    status['msimatch percentage'] = mmpercentage
            
    return status

In [8]:
## Unit testing for isFaceVisible function
for file in file_paths:
    print(findMissMatchPercentage(str(file)))


msimatch percentage : 33.33
face visibility : True
matched parameters : ['Date of Birth', 'Address', 'Expiration Date']
mismatched parameters : ['Blood Type', 'Agency Code', 'Eyes Color']

None
msimatch percentage : 55.55
face visibility : False
matched parameters : ['Date of Birth', 'Address', 'Expiration Date', 'Eyes Color']
mismatched parameters : ['Blood Type', 'Agency Code']

None
msimatch percentage : 22.22
face visibility : True
matched parameters : ['Date of Birth', 'Address', 'Expiration Date', 'Eyes Color']
mismatched parameters : ['Blood Type', 'Agency Code']

None
msimatch percentage : 22.22
face visibility : True
matched parameters : ['Date of Birth', 'Address', 'Expiration Date', 'Eyes Color']
mismatched parameters : ['Blood Type', 'Agency Code']

None


In [1]:
import os

# Set the directory path
directory = 'W:/PersonalProject/ID_card_authentication/assets'

# Get a list of all image files in the directory
image_files = [f for f in os.listdir(directory) if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png')]

# Sort the list of image files alphabetically
image_files.sort()

# Rename each image file with a new name
for i, image_file in enumerate(image_files):
    # Construct the new file name
    new_file_name = 'image' + str(i+1) + os.path.splitext(image_file)[1]
    
    # Construct the full file paths
    old_file_path = os.path.join(directory, image_file)
    new_file_path = os.path.join(directory, new_file_name)
    
    # Rename the file
    os.rename(old_file_path, new_file_path)