In [None]:
# !sudo apt-get install tesseract-ocr
# !pip install tensorflow tensorlayerx numpy easydict tqdm scikit-image pytesseract python-Levenshtein

In [None]:
import os
import numpy

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Epoch Setting

In [None]:
# Modify it as your own.

test_image_path = f'/path/to/test/dataset/folder'

model_name = "your-model-code"

checkpoint_path = f'/path/to/model/folder'

epoch_selection = 3000

save_dir = f'/path/to/saving/the/testing/result/for/qualitative/testing'
ssim_map_path = f"/path/to/saving/the/ssim/map/for/testing"

log_path = f"/path/to/your/log/folder"

# Modify this as you need
used_weight_g = f"{checkpoint_path}/g_{epoch_selection}.npz"

if(not os.path.exists(checkpoint_path)):
    os.makedirs(checkpoint_path)

if(not os.path.exists(log_path)):
    os.makedirs(log_path)

if(not os.path.exists(ssim_map_path)):
    os.makedirs(ssim_map_path)

if(not os.path.exists(save_dir)):
    os.makedirs(save_dir)

In [None]:
from datetime import datetime
from pathlib import Path
from skimage import io, img_as_ubyte

"""
Example log:
[2023-10-01T00:00][INFO] Some message

Put in @see log_path
File name is current time session with format of [Implementation - Session Ymd H:i]
"""
def write_log(log: str, type: str, namespace: str):
    operation = "x"
    time = datetime.now()
    log_location = log_path + "/" + time.strftime("%Y%m%d") + f".{model_name}.test.log"

    if(Path(log_location).is_file()):
        operation = "a"

    fopen = open(log_location, operation)

    message = f"[{time.strftime('%Y-%m-%d %H:%M:%S')}][{type}] [{namespace.upper()}] {log}\n"

    fopen.write(message)
    fopen.close()

"""
save_plotter_image_as_file

This function helps save the plot image from matplotlib or anything.

Should've made this earlier :(
"""
def save_plotter_image_as_file(plot):
    time = datetime.now()
    plot_ubyte =  img_as_ubyte(plot)
    io.imsave(ssim_map_path + "/" + f"{time.strftime('%Y-%m-%d %H:%M:%S')}.ssim_map.png", plot_ubyte)

In [None]:
from skimage.metrics import peak_signal_noise_ratio, structural_similarity
from skimage.color import rgb2gray
from tensorlayerx import convert_to_tensor
import pytesseract
import Levenshtein
import re

In [None]:
"""
hard_round
    Converts the float into string first, and then turn it into float by substring
    the string value according to the configured decimal_places.

    @params float32 number
    @params int decimal_place

    @returns float32
"""
def hard_round(number, decimal_places = 0):
    non_coma_value_count = 2

    number_in_string: str = str(number)

    behind_coma_value_count = len(number_in_string.split(".")[1])

    if(behind_coma_value_count > 4):
        behind_coma_value_count = 4

    if(number > 9.9):
        non_coma_value_count = len(number_in_string.split(".")[0]) + 1

    value_length = len(number_in_string)

    substring_size = decimal_places + non_coma_value_count # Don't forget the front number 0.

    if(value_length < substring_size):
        for i in range(0, (substring_size - value_length)):
            number_in_string = number_in_string + "0"

    return float(number_in_string[0:behind_coma_value_count + non_coma_value_count])

"""
@since July, 1st 2024
MetricManager

Metrics turn so bloaty in this experiment. This class intended to clean up testing / evaluator / metrics code.
This extremely needed since the functional approach starts turns ugly and complex. This class intended to
eliminate the needs of code that needs to run multiple times for example as previous convert_chw_tensor_image_to_standard_image
that run in psnr and ssim.

Also this design pattern will ease the method call.

@example
```python
metrics = MetricManager(ori_image, gen_image)
metrics.ssim().psnr().cer_wer()

print(f"ssim: {metrics.ssim_score} - psnr: {metrics.psnr_score} - cer: {metrics.cer_score}")
```
"""
class MetricManager():

    """
    constructor
    The constructor helps to reverse Tensor images into NumPy<float32>[].
    Replacing the previous convert_chw_tensor_image_to_standard_image

    This function doing this procedure as follow:
        1. Convert current index's Tensor into NumPy array.
        2. Transpose then NumPy array, flip it into 1, 2, 0. This brings back the original HWC array format.
        3. Undo the normalization that done earlier in the dataset preprocessing step.

    @param @Tensor.float32[] | NumPy<float32> original_images: An array of tensor form of original image array.
    @param @Tensor.float32[] generated_images: An array of tensor form of original image array.
    """
    def __init__(self, original_images, generated_images, lowres_images):
        self.original_images = original_images
        self.generated_images = generated_images
        self.lowres_images = lowres_images

        self.original_images = numpy.transpose(self.original_images, [0, 2, 3, 1])
        self.original_images = (self.original_images * 127.5) + 127.5
        self.original_images = self.original_images.astype('uint8')

        self.generated_images = numpy.transpose(self.generated_images, [0, 2, 3, 1])
        self.generated_images = (self.generated_images * 127.5) + 127.5
        self.generated_images = self.generated_images.astype('uint8')

        self.lowres_images = numpy.transpose(self.lowres_images, [0, 2, 3, 1])
        self.lowres_images = (self.lowres_images * 127.5) + 127.5
        self.lowres_images = self.lowres_images.astype('uint8')

    """
    ssim
        Stands for Structural Similarity Index Measurement (SSIM), is a used metric
        upon training Generator model. This endeavors, eliminate the needs of supervised
        corpus layout analysis structure check when training is performed.

        This function presume that the original_images's array and generated_images's array
        had same shape.

        The function will loop for every images in the original_images, getting its length.
        For every index, the function is doing this procedure as follow:

        1. Convert the NumPy array into Grayscale format
        2. Call structural_similarity by skimage
        3. Scoring

        @requirements: skimage

        @return MetricManager
    """
    def ssim(self) -> 'MetricManager':
        ssim_scores = []

        for i in range (0, len(self.original_images)):
            original_image = self.original_images[i]
            generated_image = self.generated_images[i]

            original_image = rgb2gray(original_image)
            generated_image = rgb2gray(generated_image)

            ssim_value, ssim_map = structural_similarity(
                original_image,
                generated_image,
                win_size = 3,
                full = True,
                multi_channel = True,
                data_range = 255
            )

            ssim_scores.append(ssim_value)

            ssim_map = ssim_map.astype('uint8')

            save_plotter_image_as_file(ssim_map)

        self.ssim_scores = hard_round(numpy.mean(ssim_scores), 4)

        return self

    """
    psnr
        Another metric used in this experimentation is Peak Signal-Noise Ratio (PSNR).
        PSNR is more standardized than MSE for scoring things.

        Since TensorlayerX's TrainOneStep keep doing Backpropagation using minimization while
        PSNR is more tend to be maximize to interpret better result, the result on this function
        will be additive inverse, or simply by multiply the PSNR score with -1.

        @requirements: skimage

        @returns MetricManager
    """
    def psnr(self) -> 'MetricManager':
        psnr_scores = []

        for i in range (0, len(self.original_images)):
            original_image = self.original_images[i]
            generated_image = self.generated_images[i]
            lowres_image = self.lowres_images[i]

            # Init psnr
            psnr_score = 0

            # Handle precise image
            if(numpy.array_equal(original_image, generated_image)):
                psnr_score = 99
            else:
                psnr_score = peak_signal_noise_ratio(
                    original_image,
                    generated_image,
                    data_range = 255
                )

            # If somehow the psnr turns infinity / non-string value, do an epsilon check.
            psnr_scores.append(psnr_score)

        self.psnr_scores = hard_round(numpy.mean(psnr_scores), 4)
        return self

    """"
    cer_wer
    This metric function stands for Characters Error Rate (CER), and Words Error Rate (WER).
    As its name, this function runs a live Tesseract OCR through pytesseract API, to extract
    text within image that later be used to count how fixed are the images.

    This function working as follow:
    1. Do OCR the Image
    2. Clean text from escape characters
    3. Calculate Levenshtein distance

    @requirements: pytesseract, python-Levenshtein

    @returns MetricManager
    """
    def cer_wer(self) -> 'MetricManager':
        cer_scores = []
        wer_scores = []

        cer_scores_before_sr = []
        wer_scores_before_sr = []

        for i in range (0, len(self.original_images)):
            original_image = self.original_images[i]
            generated_image = self.generated_images[i]
            lowres_image = self.lowres_images[i]

            original_image = original_image.astype('uint8')
            generated_image = generated_image.astype('uint8')
            lowres_image = lowres_image.astype('uint8')

            original_image_ocr_result = pytesseract.image_to_string(original_image)
            generated_image_ocr_result = pytesseract.image_to_string(generated_image)
            lowres_image_ocr_result = pytesseract.image_to_string(lowres_image)

            # Clean text from escape characters
            pattern = r"\\."
            original_image_ocr_result = re.sub(pattern, "", original_image_ocr_result)
            generated_image_ocr_result = re.sub(pattern, "", generated_image_ocr_result)
            lowres_image_ocr_result = re.sub(pattern, "", lowres_image_ocr_result)

            levenshtein_distance = Levenshtein.distance(original_image_ocr_result, generated_image_ocr_result)
            cer = levenshtein_distance / len(original_image_ocr_result)
            wer = levenshtein_distance / len(original_image_ocr_result.split(" "))

            cer_scores.append(cer)
            wer_scores.append(wer)

            levenshtein_distance_lowres = Levenshtein.distance(original_image_ocr_result, lowres_image_ocr_result)
            cer_scores_before_sr.append(levenshtein_distance_lowres / len(original_image_ocr_result))
            wer_scores_before_sr.append(levenshtein_distance_lowres / len(original_image_ocr_result.split(" ")))

        self.cer_scores = hard_round(numpy.mean(cer_scores), 4)
        self.wer_scores = hard_round(numpy.mean(wer_scores), 4)
        self.cer_scores_before_sr = hard_round(numpy.mean(cer_scores_before_sr), 4)
        self.wer_scores_before_sr = hard_round(numpy.mean(wer_scores_before_sr), 4)

        return self

    def save_result(self) -> 'MetricManager':

        for i in range (0, len(self.original_images)):
            time = datetime.now()
            original_image = self.original_images[i]
            generated_image = self.generated_images[i]
            lowres_image = self.lowres_images[i]

            file_folder = f"{save_dir}/{time.strftime('%Y-%m-%d %H:%M:%S.%f')}"
            os.makedirs(file_folder)
            tlx.vision.save_image(original_image, file_name = f'{file_folder}/valid_hr.png', path = save_dir)
            tlx.vision.save_image(generated_image, file_name = f'{file_folder}/valid_gen.png', path = save_dir)
            tlx.vision.save_image(lowres_image, file_name = f'{file_folder}/valid_lr.png', path = save_dir)

        return self

In [None]:
from tensorlayerx.dataflow import Dataset, DataLoader
from tensorlayerx.vision import load_images
from tensorlayerx.vision.transforms import Compose, Normalize, Resize, HWC2CHW
import numpy
import cv2

In [None]:
"""
image_transformer_eval
    Val and test has data that is uncropped. So, the intension of this
    preprocessor is to generate low-resolution (LowRes) image, while the
    HighRes image is stay as currently in the dataset. This means that the
    transformation process is as follows:

    1. Generate LowRes image from HighRes image, by resizing the HighRes
    into 4 times smaller.

    2. Convert the LowRes image into NumPy<float32>

    3. Perform value normalization

    4. Perform array transpose from HWC into CHW
"""
def image_transformer_eval(image_hr):
    # For even division
    cropper = Compose([
        Resize(size = (1024, 1024))
    ])

    image_hr = cropper(image_hr)

    image_hr_size = [image_hr.shape[0], image_hr.shape[1]]

    image_lr = cv2.resize(image_hr, dsize = (image_hr_size[1] // 4, image_hr_size[0] // 4))

    normalization = Compose([
        Normalize(mean=(127.5), std=(127.5), data_format='HWC'),
        HWC2CHW()
    ])

    return normalization(image_lr), normalization(image_hr)

In [None]:
# Data Loader Pattern
class DatasetLoader(Dataset):

    def __init__(self, highres_image, image_transformer = image_transformer_eval):
        self.hr_data = highres_image
        self.image_transformer = image_transformer

    def __getitem__(self, index):
        return self.image_transformer(
            self.hr_data[index],
        )

    def __len__(self):
        return len(self.hr_data)

In [None]:
# Load data from drive
test_hr_image = load_images(path = test_image_path, n_threads = 32)

test_hr_image = numpy.array(test_hr_image).astype('uint8')

test_dataset = DatasetLoader(test_hr_image, image_transformer = image_transformer_eval)

print(f"Dataset for this batch - Test: {len(test_dataset)}")

test_dataset = DataLoader(test_dataset, batch_size = 16, shuffle = True, drop_last = True)

In [None]:
import tensorlayerx as tlx
os.environ['TL_BACKEND'] = 'tensorflow'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from tensorlayerx.nn import Module
from tensorlayerx.nn import Conv2d, BatchNorm2d, SubpixelConv2d, Sequential
from tensorlayerx import ReLU

In [None]:
class ResidualBlock(Module):

    def __init__(self, resblock_number: int = 1):
        super(ResidualBlock, self).__init__()

        self.conv1 = Conv2d(
            out_channels=128, kernel_size=(3, 3), stride=(1, 1),
            act=ReLU, padding='SAME',
            data_format='channels_first', b_init=None, name = f"conv2d_resblock_{resblock_number}_1"
        )

        self.bn1 = BatchNorm2d(
            num_features=128, act=None,
            data_format='channels_first', name = f"batchnorm2d_resblock_{resblock_number}_1"
        )

        self.conv2 = Conv2d(
            out_channels=128, kernel_size=(3, 3), stride=(1, 1),
            act=ReLU, padding='SAME',
            data_format='channels_first', b_init=None, name = f"conv2d_resblock_{resblock_number}_2"
        )

        self.bn2 = BatchNorm2d(
            num_features=128, act=None,
            data_format='channels_first', name = f"batchnorm2d_resblock_{resblock_number}_2"
        )

    def forward(self, x):
        z = self.conv1(x)
        z = self.bn1(z)
        z = self.conv2(z)
        z = self.bn2(z)
        x = x + z
        return x

In [None]:
class Generator(Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.conv1 = Conv2d(
            out_channels=128, kernel_size=(3, 3), stride=(1, 1),
            act=ReLU,
            data_format='channels_first', name = "conv2d_G_1"
        )

        self.residual_block = self.make_layer()

        self.conv2 = Conv2d(
            out_channels=128, kernel_size=(3, 3), stride=(1, 1),
            act=ReLU,
            data_format='channels_first', b_init=None, name = "conv2d_G_2"
        )

        self.bn1 = BatchNorm2d(
            num_features=128,
            data_format='channels_first', name = "batchnorm2d_G_1"
        )

        self.conv3 = Conv2d(
            out_channels=512, kernel_size=(3, 3), stride=(1, 1),
            data_format='channels_first', name = "conv2d_G_3"
        )

        self.subpixelconv1 = SubpixelConv2d(
            scale=2, act=ReLU,
            data_format='channels_first', name = "subpixelconv2d_G_1"
        )

        self.conv4 = Conv2d(
            out_channels=512, kernel_size=(3, 3), stride=(1, 1),
            data_format='channels_first', name = "conv2d_G_4"
        )

        self.subpixelconv2 = SubpixelConv2d(
            scale=2, act=ReLU,
            data_format='channels_first', name = "subpixelconv2d_G_2"
        )

        self.output = Conv2d(
            out_channels = 3, kernel_size=(1, 1), stride=(1, 1),
            act=tlx.Tanh,
            data_format='channels_first', name = "conv2d_G_output"
        )

    def make_layer(self):
        layer_list = []

        for i in range(16):
            layer_list.append(ResidualBlock(i))

        return Sequential(layer_list)

    def forward(self, x):
        x = self.conv1(x)
        temp = x
        x = self.residual_block(x)
        x = self.conv2(x)
        x = self.bn1(x)
        x = x + temp
        x = self.conv3(x)
        x = self.subpixelconv1(x)
        x = self.conv4(x)
        x = self.subpixelconv2(x)
        x = self.output(x)

        return x

## Model

In [None]:
generator_model = Generator()
generator_model.init_build(tlx.nn.Input(shape=(16, 3, 56, 56)))
generator_model.load_weights(used_weight_g, format = "npz_dict", skip = False)

In [None]:
from tqdm import tqdm

## Test Run

In [None]:
generator_model.set_eval()

test_ssim, test_psnr, test_cer, test_wer, test_cer_non_sr, test_wer_non_sr = [], [], [], [], [], []

for step, (lr_patch, hr_patch) in enumerate(tqdm(test_dataset)):
    gen_image = generator_model(lr_patch)
    metrics = MetricManager(hr_patch, gen_image, lr_patch)
    metrics.ssim().psnr().cer_wer().save_result()
    test_ssim.append(float(metrics.ssim_scores))
    test_psnr.append(float(metrics.psnr_scores))
    test_cer.append(float(metrics.cer_scores))
    test_wer.append(float(metrics.wer_scores))
    test_cer_non_sr.append(float(metrics.cer_scores_before_sr))
    test_wer_non_sr.append(float(metrics.wer_scores_before_sr))

test_ssim = hard_round(numpy.mean(test_ssim), 4)
test_psnr = hard_round(numpy.mean(test_psnr), 4)
test_cer = hard_round(numpy.mean(test_cer), 4)
test_wer = hard_round(numpy.mean(test_wer), 4)
test_cer_non_sr = hard_round(numpy.mean(test_cer_non_sr), 4)
test_wer_non_sr = hard_round(numpy.mean(test_wer_non_sr), 4)

message = f"""
MODEL {model_name} testing result:

model file: {used_weight_g}
SSIM: {test_ssim}
PSNR: {test_psnr}
CER [Before SR -> After SR] : {test_cer_non_sr} -> {test_cer}
WER [Before SR -> After SR]  : {test_wer_non_sr} -> {test_wer}
"""

message