In [56]:
import numpy as np

from scipy.spatial.distance import euclidean

from skimage.io import imread
from skimage.filters import threshold_otsu
from skimage.feature import canny
from skimage.transform import probabilistic_hough_line, rotate, resize
from skimage.morphology import skeletonize

THRESHOLD = 150
SET_RANGES = [6, 20, 20, 20, 20, 200, 200, 20, 100]

In [57]:
def is_upside_down(image, line):
    (x1, y1), (x2, y2) = line
    y = int(np.round((y1 + y2) / 2))
    x = int(np.round((x1 + x2) / 2))

    image_with_border = np.pad(image, pad_width=10, mode='constant', constant_values=0.0)

    up = image_with_border[y, x + 10]
    down = image_with_border[y + 20, x + 10]

    up_value = up < THRESHOLD
    down_value = down > THRESHOLD

    return up_value and down_value

In [58]:
def get_rotation_for_vertical(image, line):
    (x1, y1), (x2, y2) = line
    y = int(np.round((y1 + y2) / 2))
    x = int(np.round((x1 + x2) / 2))

    image_with_border = np.pad(image, pad_width=10, mode='constant', constant_values=0.0)

    left = image_with_border[y + 10, x]
    right = image_with_border[y + 10, x + 20]

    left_value = left < THRESHOLD
    right_value = right > THRESHOLD

    if left_value and right_value:
        return 90.0
    else:
        return -90.0

In [59]:
def get_line(image):
    thresh = threshold_otsu(image)
    normalize = image > thresh

    edges = canny(normalize, 0, 1, 1)

    min_line_length = int(image.shape[0] / 2)

    lines = []
    while not lines:
        min_line_length = int(min_line_length * 0.9)
        lines = probabilistic_hough_line(edges, seed=16, line_length=min_line_length, line_gap=3)

    longest_line = None
    longest_line_distance = 0.0

    for line in lines:
        point_a, point_b = line
        distance = euclidean(point_a, point_b)

        if longest_line_distance < distance:
            longest_line = line
            longest_line_distance = distance

    return longest_line

In [60]:
def get_rotation(image):
    line = get_line(image)
    (x1, y1), (x2, y2) = line

    slope = (y2 - y1) / (x2 - x1) if (x2 - x1) else 0

    rad_angle = np.arctan(slope)
    rotation = np.degrees(rad_angle)

    if x1 == x2:
        rotation += get_rotation_for_vertical(image, line)
    elif is_upside_down(image, line):
        rotation += 180.0

    return rotation

In [61]:
def rotate_image(image):
    rotation = get_rotation(image)
    rotated = rotate(image, rotation, resize=True)
    return rotated

In [62]:
def trim_image(image):
    trimmed = image[:, ~np.all(image < 1.0, axis=0)]
    trimmed = trimmed[~np.all(trimmed < 1.0, axis=1)]
    trimmed = trimmed[~np.all(trimmed > 0.0, axis=1)]
    return trimmed

In [63]:
def resize_image(image):
    ratio = 200 / image.shape[1]

    x_size = int(np.round(image.shape[0] * ratio))
    y_size = int(np.round(image.shape[1] * ratio))

    resized = resize(image, (x_size, y_size), anti_aliasing=False)
    return resized

In [64]:
def binarize_image(image):
    binary = image.copy()
    binary[binary > 0.5] = 1.0
    binary[binary <= 0.5] = 0.0
    return binary

In [65]:
def approximate_values(image, bins=5):
    max_value = image.shape[0]
    previous_value = max_value
    values = []

    for i in range(image.shape[1]):
        column = image[:, i]

        if np.any(column):
            current_value = max_value - np.argmax(column)
        else:
            current_value = previous_value

        values.append(current_value)

        previous_value = current_value

    chunks = np.array_split(values, bins)

    approximated_values = []
    inverted_approximated_values = []

    for chunk in chunks:
        median = np.median(chunk)

        approximated_values.append(median)
        inverted_approximated_values.append(max_value - median)

    inverted_approximated_values.reverse()

    return approximated_values, inverted_approximated_values

In [66]:
def get_image_characteristic(set_number, image_number):
    image = imread(f'test_sets/set{set_number}/{image_number}.png')

    rotated = rotate_image(image)
    trimmed = trim_image(rotated)
    resized = resize_image(trimmed)
    binary = binarize_image(resized)
    edges = canny(binary, 0, 1, 1)
    skeleton = skeletonize(edges)

    approximated_values, inverted_approximated_values = approximate_values(skeleton, 8)

    return approximated_values, inverted_approximated_values

In [67]:
def get_images_characteristics(set_number):
    characteristics = []
    set_range = SET_RANGES[set_number]

    for image_number in range(set_range):
        av, iav = get_image_characteristic(set_number, image_number)
        characteristics.append((image_number, av, iav))

    return characteristics

In [68]:
def compare_characteristics(characteristics):
    results = []
    for characteristic in characteristics:
        image_number, av, iav = characteristic

        scores = {}

        for comparison_characteristic in characteristics:
            comparison_image_number, cav, _ = comparison_characteristic

            if image_number == comparison_image_number:
                continue

            score = 0
            for i in range(len(iav)):
                score += abs(iav[i] - cav[i])
            scores[comparison_image_number] = score
            
        results.append(min(scores, key=scores.get))
    return results

In [69]:
def get_correct_results(set_number):
    with open(f'test_sets/set{set_number}/correct.txt', "r") as file:
        lines = file.readlines()

        result = []
        for line in lines:
            result.append(int(line))

        return result

In [70]:
def compare_results(results, correct_results):
    correct = 0
    results_number = len(correct_results)

    incorrect_ids = []

    for i in range(results_number):
        if results[i] == correct_results[i]:
            correct += 1
        else:
            incorrect_ids.append(i)
    return correct, results_number, incorrect_ids

In [71]:
def test_set(set_number):
    characteristics = get_images_characteristics(set_number)

    results = compare_characteristics(characteristics)
    correct_results = get_correct_results(set_number)

    correct, results_number, incorrect_ids = compare_results(results, correct_results)

    print(f"Set number {set_number}")
    print(f"Score: {correct}/{results_number}")
    print("Incorrect ids:", incorrect_ids)
    print()

In [72]:
def test_sets(sets_range):
    for set_number in range(sets_range):
        test_set(set_number)

In [73]:
test_sets(9)

Set number 0
Score: 6/6
Incorrect ids: []

Set number 1
Score: 20/20
Incorrect ids: []

Set number 2
Score: 20/20
Incorrect ids: []

Set number 3
Score: 20/20
Incorrect ids: []

Set number 4
Score: 20/20
Incorrect ids: []

Set number 5
Score: 172/200
Incorrect ids: [0, 1, 3, 5, 15, 29, 33, 42, 65, 71, 81, 95, 96, 98, 99, 103, 108, 124, 134, 154, 155, 159, 170, 180, 185, 187, 188, 194]

Set number 6
Score: 173/200
Incorrect ids: [5, 19, 20, 40, 42, 58, 59, 61, 82, 92, 96, 97, 103, 104, 115, 119, 129, 135, 144, 152, 159, 160, 166, 171, 186, 190, 193]

Set number 7
Score: 3/20
Incorrect ids: [0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19]

Set number 8
Score: 12/100
Incorrect ids: [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 43, 44, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 