<a href="https://colab.research.google.com/github/vdudiev/text-recognising/blob/main/%D0%A0%D0%B0%D1%81%D0%BF%D0%BE%D0%B7%D0%BD%D0%B0%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5%D0%A2%D0%B5%D0%BA%D1%81%D1%82%D0%B0%D0%9D%D0%B0%D0%9A%D0%B0%D1%80%D1%82%D0%B8%D0%BD%D0%BA%D0%B5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
!pip3 install -q idx2numpy
import cv2
import numpy as np
import tensorflow  as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import RMSprop
import idx2numpy
from google.colab.patches import cv2_imshow
from typing import *
import time
import matplotlib.pyplot as plt


  Building wheel for idx2numpy (setup.py) ... [?25l[?25hdone


In [4]:
def cnn_print_digit(d):
    print(d.shape)
    for x in range(28):
        s = ""
        for y in range(28):
            s += "{0:.1f} ".format(d[28 * y + x])
        print(s)


def cnn_print_digit_2d(d):
    print(d.shape)
    for y in range(d.shape[0]):
        s = ""
        for x in range(d.shape[1]):
            s += "{0:.1f} ".format(d[x][y])
        print(s)

In [6]:
emnist_labels = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
                 75, 76,77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100,
                 101, 102, 103, 104, 105, 106,107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
                 117, 118, 119, 120, 121, 122]


In [7]:
def text_from_img_writer_model():
    model = Sequential([])
    model.add(Convolution2D(filters=32, kernel_size=(3, 3), padding='same', input_shape=(28, 28, 1), activation='relu'))
    model.add(Convolution2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(Convolution2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(len(emnist_labels), activation="softmax"))
    model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0),
                  metrics=['accuracy'])
    return model

In [8]:
def train_model(model):
    t_start = time.time()
    X_train = idx2numpy.convert_from_file('/content/drive/MyDrive/Colab Notebooks/gzip/emnistByclassTrainImagesIdx3Ubyte')
    y_train = idx2numpy.convert_from_file('/content/drive/MyDrive/Colab Notebooks/gzip/emnistByclassTrainLabelsIdx1Ubyte')

    X_test = idx2numpy.convert_from_file('/content/drive/MyDrive/Colab Notebooks/gzip/emnistByclassTestImagesIdx3Ubyte')
    y_test = idx2numpy.convert_from_file('/content/drive/MyDrive/Colab Notebooks/gzip/emnistByclassTestLabelsIdx1Ubyte')

    X_train = np.reshape(X_train, (X_train.shape[0], 28, 28, 1))
    X_test = np.reshape(X_test, (X_test.shape[0], 28, 28, 1))

    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, len(emnist_labels))

    # Test:
    k = 10
    X_train = X_train[:X_train.shape[0] // k]
    y_train = y_train[:y_train.shape[0] // k]
    X_test = X_test[:X_test.shape[0] // k]
    y_test = y_test[:y_test.shape[0] // k]

    # Normalize
    X_train = X_train.astype(np.float32)
    X_train /= 255.0
    X_test = X_test.astype(np.float32)
    X_test /= 255.0

    x_train_cat = keras.utils.to_categorical(y_train, len(emnist_labels))
    y_test_cat = keras.utils.to_categorical(y_test, len(emnist_labels))

    
    # Set a learning rate reduction
    learning_rate_reduction = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.5,
                                                                min_lr=0.00001)

    model.fit(X_train, x_train_cat, validation_data=(X_test, y_test_cat), callbacks=[learning_rate_reduction],
              batch_size=64, epochs=40)
    print("Training done, dT:", time.time() - t_start)


In [9]:
def predict(model, image_file):
    img = keras.preprocessing.image.load_img(image_file, target_size=(28, 28), color_mode='grayscale')
    predict_img(model, img)


def predict_img(model, img):
    img_arr = np.expand_dims(img, axis=0)
    img_arr = 1 - img_arr / 255.0
    img_arr[0] = np.rot90(img_arr[0], 3)
    img_arr[0] = np.fliplr(img_arr[0])
    img_arr = img_arr.reshape((1, 28, 28, 1))

    predict = model.predict([img_arr])
    result = np.argmax(predict, axis=1)
    return chr(emnist_labels[result[0]])


def letters_fetch(image_file: str, out_size=28):
    img = cv2.imread(image_file)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    img_erode = cv2.erode(thresh, np.ones((3, 3), np.uint8), iterations=1)

    # получить контуры
    contours, hierarchy = cv2.findContours(img_erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    output = img.copy()

    letters = []
    for idx, contour in enumerate(contours):
        (x, y, w, h) = cv2.boundingRect(contour)
        if hierarchy[0][idx][3] == 0:
            cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 3)
            letter_crop = gray[y:y + h, x:x + w]

            # Изменить размер холста для письма до квадратного
            size_max = max(w, h)
            letter_square = 255 * np.ones(shape=[size_max, size_max], dtype=np.uint8)
            if w > h:
                # Увеличить изображение сверху вниз
                y_pos = size_max // 2 - h // 2
                letter_square[y_pos:y_pos + h, 0:w] = letter_crop
            elif w < h:
                x_pos = size_max // 2 - w // 2
                letter_square[0:h, x_pos:x_pos + w] = letter_crop
            else:
                letter_square = letter_crop

            # Измените размер буквы на 28x28 и добавьте букву и ее координату X
            letters.append((x, w, cv2.resize(letter_square, (out_size, out_size), interpolation=cv2.INTER_AREA)))

    # Сортировка массива на месте по координате X
    letters.sort(key=lambda x: x[0], reverse=False)

    # cv2_imshow(img)
    cv2_imshow(img_erode)
    cv2_imshow(output)
    cv2.waitKey(0)

    return letters


def img_to_concole(model: Any, image_file: str):
    letters = letters_fetch(image_file)
    s_out = ""
    for i in range(len(letters)):
        dn = letters[i + 1][0] - letters[i][0] - letters[i][1] if i < len(letters) - 1 else 0
        s_out += predict_img(model, letters[i][2])
        if (dn > letters[i][1] / 4):
            s_out += ' '
    return s_out



# model = text_from_img_writer_model()
x_test_for_show = idx2numpy.convert_from_file('/content/drive/MyDrive/Colab Notebooks/gzip/emnistByclassTestImagesIdx3Ubyte')
y_test_for_show = idx2numpy.convert_from_file('/content/drive/MyDrive/Colab Notebooks/gzip/emnistByclassTestLabelsIdx1Ubyte')

x_test_for_show = np.reshape(x_test_for_show, (x_test_for_show.shape[0], 28, 28, 1))
# Test:
k = 10
x_test_for_show = x_test_for_show[:x_test_for_show.shape[0] // k]
y_test_for_show = y_test_for_show[:y_test_for_show.shape[0] // k]

    # Normalize
x_test_for_show = x_test_for_show.astype(np.float32)
x_test_for_show /= 255.0





# plt.figure(figsize=(10, 5))

# for i in range(100) :
#     plt.subplot(10, 10, i+1)
#     plt.xticks([])
#     plt.yticks([])
#     plt.imshow(X_test_for_show[i], cmap=plt.cm.binary)

# plt.show()

# if __name__ == "__main__":
#     model = text_from_img_writer_model()
#     train_model(model)
#     model.save('text_from_img_writer_model.h5')

#     model = keras.models.load_model('text_from_img_writer_model.h5')
#     s_out = img_to_concole(model, "hello.png")
#     print(s_out)

Обучение модели

In [10]:
# Если модель не сохранена на диске - заупустить данный блок
model = text_from_img_writer_model()
train_model(model)
model.save('text_from_img_writer_model.h5')

KeyboardInterrupt: ignored

In [11]:
# Еслимодель сохранена на диске - запустить данный блок 
model_from_rep = tf.keras.models.load_model( 'text_writer_model_2.h5' )

OSError: ignored

In [3]:
# result_for_show = predict_img(model_from_rep,X_test_for_show)


# print(result_for_show)


s_out = img_to_concole(model_from_rep, "/content/drive/MyDrive/Colab Notebooks/hello_world_test.png")
print("надпись на картинке:",s_out)

NameError: ignored