# Imports

In [1]:
import os
import patoolib
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2 as cv
# from keras.models import load_model
from tensorflow.keras.models import load_model, save_model

## Load Images

In [2]:
img = './images.rar'
patoolib.extract_archive(img, outdir=".")

img_width = 250
img_height = 50

model_path = "model"
img_path = "images/"

data_dir = Path(img_path)

images = sorted(list(map(str, list(data_dir.glob("*.png")))))
labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]
characters = set(char for label in labels for char in label)
characters = sorted(list(characters))

batch_size = len(images)

images, labels = np.array(images), np.array(labels)

patool: Extracting ./images.rar ...
patool: running "C:\Program Files\WinRAR\rar.EXE" x -- "g:\Tutorial 2\Code\_ Python Code\0 Captcha\captcha 4\git_files\images.rar"
patool:     with cwd=.
patool: ... ./images.rar extracted to `.'.


## Load Model

In [3]:
model = load_model(model_path)

## Predicting

In [4]:
prediction_model = keras.models.Model(
    model.get_layer(name="image").input, model.get_layer(name="dense2").output
)

In [5]:
# Mapping characters to integers
char_to_num = layers.StringLookup(
    vocabulary=list(characters), mask_token=None
)

# Mapping integers back to original characters
num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)

In [6]:
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :10
    ]
    # Iterate over the results and get back the text
    output_text = []
    for res in results:
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text

In [7]:
def encode_single_sample(img_path, label):
    # 1. Read image
    img = tf.io.read_file(img_path)
    # 2. Decode and convert to grayscale
    img = tf.io.decode_png(img, channels=1)
    # 3. Convert to float32 in [0, 1] range
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. Resize to the desired size
    img = tf.image.resize(img, [img_height, img_width])
    # 5. Transpose the image because we want the time
    # dimension to correspond to the width of the image.
    img = tf.transpose(img, perm=[1, 0, 2])
    # 6. Map the characters in label to numbers
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
    # 7. Return a dict as our model is expecting two inputs
    return {"image": img, "label": label}

In [8]:
images, labels = np.array(images), np.array(labels)

In [9]:
test_dataset = tf.data.Dataset.from_tensor_slices((images, labels))
test_dataset = (
    test_dataset.map(
        encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
    )
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

In [10]:
for batch in test_dataset.take(1):
    images = batch["image"]
    labels = batch["label"]

In [11]:
labels_predicted = decode_batch_predictions(prediction_model.predict(images))

error_rec = 0
for i in range(len(images)):
    orig_texts = ""
    label = tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8")
    orig_texts += label
    if orig_texts != labels_predicted[i]:
        error_rec += 1
    
print(f"error percnt = {error_rec / len(images) * 100}%")

error percnt = 1.3238916256157636%
