In [None]:
from matplotlib import pyplot as plt
import numpy as np
import cv2
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.morphology import (erosion, dilation, opening, area_closing)
from skimage.transform import resize
from skimage.measure import label, regionprops, regionprops_table
import pandas as pd

Image Read and Denoise

In [None]:
def read_image(path):
    image = imread(path)
    if len(image.shape) > 2:
        return rgb2gray(image)
    return image

def bilateral_filter(image, diameter=15, sigma_color=75, sigma_space=75):
    print(image.max())
    if image.max() != 255:
        image = image * 255
        image = image.astype('uint8')
        return cv2.bilateralFilter(image, diameter, sigma_color, sigma_space) / 255
    return cv2.bilateralFilter(image, diameter, sigma_color, sigma_space)

def median_filter(image, kernel_size=3):
    if image.max() != 255:
        image = image * 255
        image = image.astype('uint8')
        return cv2.medianBlur(image, kernel_size) / 255
    return cv2.medianBlur(image, kernel_size)


def non_local_mean_filter(image, h=3):
    if image.max() != 255:
        image = image * 255
        image = image.astype('uint8')
        return cv2.fastNlMeansDenoising(image, h=h) / 255
    return cv2.fastNlMeansDenoising(image, h=h)

## Image thresholding and connected components

In [None]:
image_path = "../Data/test_images/noisy_three_sentences.jpg"
image = read_image(image_path)
plt.figure(figsize=(15,15))
plt.imshow(image,cmap="gray")
plt.show()
plt.clf()


In [None]:
# plt.imshow(filtered_image_3, cmap="gray")
# plt.imshow(filtered_image_2, cmap="gray")

In [None]:
filtered_image = bilateral_filter(image)
plt.figure(figsize=(15,15))
plt.imshow(filtered_image, cmap="gray")
plt.show()
plt.clf()

In [None]:
binarized_image = filtered_image < 0.75
# binarized_image = non_local_mean_filter(binarized_image.copy())
plt.figure(figsize=(15,15))
plt.imshow(binarized_image, cmap="gray")
plt.show()
plt.clf()

## Erosion, Dilation

In [None]:
square = np.array([[1,1,1],
                   [1,1,1],
                   [1,1,1]])
                   
def multi_dil(im, num, element=square):
    for i in range(num):
        im = dilation(im, element)
    return im
def multi_ero(im, num, element=square):
    for i in range(num):
        im = erosion(im, element)
    return im

In [None]:
def erosion_dilation(image, plot=False):
    multi_dilated = multi_dil(image, 7)
    area_closed = area_closing(multi_dilated, 5)
    multi_eroded = multi_ero(area_closed, 11)
    opened_image = opening(multi_eroded)
    if plot:
        plt.imshow(opened_image, cmap="gray")
    return opened_image

## Otsu

In [None]:
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.morphology import closing, square

thresh = threshold_otsu(filtered_image)
bw = closing(image > thresh, square(3))
cleared = clear_border(bw)

# label image regions
label_image = label(cleared)
regions = regionprops(label_image)

plt.figure(figsize=(15,15))
plt.imshow(label_image)
plt.show()
plt.clf()

## Connected components

In [None]:
from skimage.color import label2rgb

label_im = label(binarized_image)
image_label_overlay = label2rgb(label_im, image=binarized_image, bg_label=0)
regions = regionprops(label_im)
print(len(regions))
plt.figure(figsize=(15,15))
plt.imshow(image_label_overlay)
plt.show()
plt.clf()

In [None]:
# for lbl in np.unique(label_im):
#      # 0 would always be background
#      # print(lbl)
#      plt.imshow(label_im == lbl)
#      plt.show()

In [None]:
properties = ['area','convex_area','bbox_area', 'extent',  
              'mean_intensity', 'solidity', 'eccentricity', 
              'orientation', 'centroid', 'bbox']
pd.DataFrame(regionprops_table(label_im, filtered_image, 
             properties=properties))

In [None]:
masks = []
bbox = []
list_of_index = []
remove_nums = []
for num, x in enumerate(regions):
    area = x.area
    convex_area = x.convex_area
    
    if (convex_area/area > 0.95)  and area>200:
        print(num, area, convex_area, x.bbox, x.centroid)
        masks.append(regions[num].convex_image)
        bbox.append(regions[num].bbox)   
        list_of_index.append(num)
    else:
        # print(num)
        remove_nums.append(num)
for i in sorted(remove_nums, reverse=True):
    del regions[i]
count = len(masks)
print(count, len(regions))

In [None]:
def find_space_indexes(regions):
    diff_xs = []
    for index, _ in enumerate(regions):
        if index != 0:
            diff_xs.append((regions[index].bbox[1] - regions[index-1].bbox[3]))
        else:
            diff_xs.append(-1)
    max_diff = sorted(diff_xs)[int(len(diff_xs)*0.90)]
    indexes= [1 if diff_xs[index] >= max_diff else 0 for index, _ in enumerate(regions)]
    print("index values", diff_xs, indexes, max_diff, len(diff_xs), len(indexes))
    return indexes

In [None]:
def re_arrange_letters(bbox, regions, limit=25):
    bbox = sorted(bbox, key=lambda x: x[0])
    regions = sorted(regions, key=lambda r: r.bbox[0])
    x_mins = list([str(box[0]) for box in bbox])
    print(x_mins)
    for x_min in x_mins:
        for i in range(1,limit):
            if str(int(x_min)+i) in x_mins:
                # print(str(int(x_min)+i), x_min)
                for k in range(len(x_mins)):
                    if x_mins[k] == str(int(x_min)+i):
                        x_mins[k] = x_min
                # x_mins = list(map(lambda x: x.replace(str(int(x_min)+i), x_min), x_mins))
    print(x_mins)
    x_mins = [int(x_min) for x_min in x_mins]
    for x_min, box in zip(x_mins, bbox):
        box = (x_min, box[1], box[2], box[3])
    x_mins.append(max(x_mins) + 1)
    unique_x_mins = sorted(list(set(x_mins)))
    print("Unique:", unique_x_mins)
    line_bbox, line_regions = [], []
    for index, unique_x_min in enumerate(unique_x_mins):
        if index==0:
            start = x_mins.index(unique_x_min)
            continue
        end = x_mins.index(unique_x_min)
        print(x_mins[start:end])
        line_bbox.append(bbox[start:end])
        line_regions.append(regions[start:end])
        # print(regions[start:end], bbox[start:end])
        start = end
    # print(line_regions)
    new_bbox, new_masks, new_regions, space_indexes, line_end_indexes = [], [], [], [], []
    for line_box, line_region in zip(line_bbox, line_regions):
        # print("Before")
        # [print(r.bbox) for r in line_region]
        line_region = sorted(line_region, key=lambda r: r.bbox[1])
        line_space_indexes = find_space_indexes(regions=line_region)
        space_indexes.extend(line_space_indexes)
        if len(line_end_indexes):
            line_end_indexes.append(line_end_indexes[-1] + len(line_space_indexes))
        else:
            line_end_indexes.append(len(line_space_indexes))
        # print("After")
        # [print(r.bbox) for r in line_region]
        line_box = sorted(line_box, key=lambda x: x[1])
        # print(line_box)
        new_mask = [region.convex_image for region in line_region]
        new_masks.extend(new_mask)
        new_bbox.extend(line_box)
        new_regions.extend(line_region)
    return new_bbox, new_masks, new_regions, space_indexes, line_end_indexes


In [None]:
fig, ax = plt.subplots(15, int(count/15), figsize=(60,60))
bounded_images = []
bbox, masks, regions, space_indexes, line_end_indexes = re_arrange_letters(bbox, regions)
print(len(masks), line_end_indexes)
for index, (axis, box, mask) in enumerate(zip(ax.flatten(), bbox, masks)):
    bounded_image  =  binarized_image[box[0]:box[2], box[1]:box[3]] * mask
    bounded_image = np.pad(bounded_image, pad_width=10)
    bounded_image = bilateral_filter(bounded_image, 1)
    bounded_image = resize(bounded_image, (28, 28))
    # plt.imshow(bounded_image)
    # plt.savefig(f"../Data/test_images/msg_from_annie_{index}.png")
    bounded_images.append(bounded_image)
    axis.imshow(bounded_image, cmap="gray")
fig.tight_layout()


## Model Predict

In [None]:
from predict import CharacterPredictor

predictor = CharacterPredictor(model_path="../Data/model.pth")
output_filename = image_path.replace(".jpg", "_predicted.txt")
output_filename_with_spaces = image_path.replace(".jpg", "_predicted_with_spaces.txt")
output_file = open(output_filename, "w")
output_file_with_spaces = open(output_filename_with_spaces, "w")
for index, bounded_image in enumerate(bounded_images):
    prediction = predictor.predict(bounded_image)
    print(prediction)
    output_file.write(prediction)
    output_file_with_spaces.write(prediction)
    if index+1 < len(bounded_images):
        if space_indexes[index+1] == 1 or index+1 in line_end_indexes:
            output_file_with_spaces.write(" ")
output_file.close()
output_file_with_spaces.close()

In [None]:
def compare_with_gt(gt_filename, predicted_filename):
    print(gt_filename)
    gt_file = open(gt_filename, "r")
    predicted_file = open(predicted_filename, "r")
    gt_line = gt_file.readline().strip()
    print(gt_line)
    predicted_line = predicted_file.readline().strip()
    count = 0
    for gt, predicted in zip(gt_line, predicted_line):
        print(gt, predicted)
        if predicted == gt.upper():
            count+=1
    gt_file.close()
    predicted_file.close()
    return count/len(gt_line)

In [None]:
compare_with_gt(gt_filename="/home/sci/zahid.aziz/Desktop/Projects/CS-6640-Image-Processing/project_5/Data/test_images/three_sentences_gt.txt", 
                predicted_filename= output_filename)