### Либы

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split

### Загружаем данные

In [2]:
images = np.load('dataset/images.npy')
labels = np.load('dataset/labels.npy')
images_sub = np.load('dataset/images_sub.npy')

In [3]:
# нормализуем наши датасетики
images = images.astype('float32') / 255.0
images_sub = images_sub.astype('float32') / 255.0

In [4]:
labels_cat = to_categorical(labels, num_classes=26)

### Создаём рабочие датасеты

In [5]:
X_train, X_val, y_train, y_val = train_test_split(
    images, labels_cat, test_size=0.1, random_state=42)

### Готовим аугментацию чтоб не переобучиться

In [6]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

In [7]:
val_datagen = ImageDataGenerator()

In [8]:
train_gen = train_datagen.flow(X_train, y_train, batch_size=64)
val_gen = val_datagen.flow(X_val, y_val, batch_size=64)

### Будем использовать CNN

> ChatGPT подсказала, как можно её улучшить для большего эффекта 🔥

In [9]:
model = Sequential([
    Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(48,48,3)),
    BatchNormalization(),
    Conv2D(64, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2,2)),
    Dropout(0.3),

    Conv2D(128, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    Conv2D(128, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2,2)),
    Dropout(0.4),

    Conv2D(256, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    GlobalAveragePooling2D(),
    Dropout(0.5),

    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(26, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
# колбэки - помогают повысить эффективность CNN
lr_reduce = ReduceLROnPlateau(
    monitor='val_accuracy', patience=3, factor=0.5, min_lr=1e-5, verbose=1)

early_stop = EarlyStopping(
    monitor='val_accuracy', patience=7, restore_best_weights=True)

### Обучаем

In [12]:
model.fit(
    train_gen,
    epochs=30,
    validation_data=val_gen,
    callbacks=[lr_reduce, early_stop],
    steps_per_epoch=len(train_gen),
    validation_steps=len(val_gen)
)

  self._warn_if_super_not_called()


Epoch 1/30
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 942ms/step - accuracy: 0.0459 - loss: 3.4513 - val_accuracy: 0.0385 - val_loss: 3.2926 - learning_rate: 0.0010
Epoch 2/30
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 951ms/step - accuracy: 0.0775 - loss: 3.1583 - val_accuracy: 0.1195 - val_loss: 3.0217 - learning_rate: 0.0010
Epoch 3/30
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 946ms/step - accuracy: 0.1306 - loss: 2.9332 - val_accuracy: 0.1840 - val_loss: 2.6874 - learning_rate: 0.0010
Epoch 4/30
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 964ms/step - accuracy: 0.1951 - loss: 2.6387 - val_accuracy: 0.2835 - val_loss: 2.3913 - learning_rate: 0.0010
Epoch 5/30
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 942ms/step - accuracy: 0.2997 - loss: 2.2320 - val_accuracy: 0.4375 - val_loss: 1.7627 - learning_rate: 0.0010
Epoch 6/30
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x29f49f058b0>

### Предскажем результат

In [13]:
pred_probs = model.predict(images_sub, batch_size=256)
pred_classes = np.argmax(pred_probs, axis=1)

[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 612ms/step


In [14]:
submission = pd.DataFrame({
    'Id': np.arange(len(pred_classes)),
    'Category': pred_classes
})
submission.to_csv('submission.csv', index=False)