In [37]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from PIL import Image
from sklearn.model_selection import train_test_split

image_folder = 'path_to_images'
csv_path = 'path_to_csv'
data = pd.read_csv(csv_path)

X_train_list = []
y_train_list = []

In [38]:
desired_width, desired_height = 32, 32

# Проход по CSV и загрузка изображений
for index, row in data.iterrows():
    image_path = image_folder + row['file']
    try:
        image = Image.open(image_path).convert('RGB').resize((desired_width, desired_height))
        image_array = np.array(image)
        X_train_list.append(image_array)
        y_train_list.append(row['label'])
    except Exception as e:
        print(f"Ошибка при обработке изображения {image_path}: {e}")

In [40]:
X_train = np.array(X_train_list)
y_train = np.array(y_train_list)

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train_list)

# Нормализация
X_train = X_train.astype('float32') / 255.0

# Разделение данных
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [41]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(desired_width, desired_height, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 15, 15, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 6, 6, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 2304)              0         
                                                                 
 dense (Dense)               (None, 128)               2

In [48]:
history = model.fit(X_train, y_train, epochs=12, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [53]:
model.save('russian_letter_recognition_model.h5')

np.save('label_encoder.npy', label_encoder.classes_)

Точность модели на валидационном наборе данных: 0.8083156943321228


In [68]:
model = tf.keras.models.load_model('russian_letter_recognition_model.h5')

label_encoder = LabelEncoder()
label_encoder.classes_ = np.load('label_encoder.npy')

# Словарь
letter_mapping = {
    0: 'А', 1: 'Б', 2: 'В', 3: 'Г', 4: 'Д', 5: 'Е', 6: 'Ё', 7: 'Ж', 8: 'З', 9: 'И', 10: 'Й',
    11: 'К', 12: 'Л', 13: 'М', 14: 'Н', 15: 'О', 16: 'П', 17: 'Р', 18: 'С', 19: 'Т', 20: 'У',
    21: 'Ф', 22: 'Х', 23: 'Ц', 24: 'Ч', 25: 'Ш', 26: 'Щ', 27: 'Ъ', 28: 'Ы', 29: 'Б', 30: 'Э',
    31: 'Ю', 32: 'Я',   
}

def recognize_letter(image_path):
    desired_width, desired_height = 32, 32
    image = Image.open(image_path).convert('RGB').resize((desired_width, desired_height))
    image_array = np.array(image)
    image_array = image_array.astype('float32') / 255.0
    image_array = np.expand_dims(image_array, axis=0)

    prediction = model.predict(image_array)

    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])[0]
    
    return predicted_label

image_path_to_recognize = 'path_to_test_jpeg'
recognized_letter = recognize_letter(image_path_to_recognize)
print(f"Распознанная буква: {letter_mapping[recognized_letter-1]}")

Распознанная буква: О
