In [73]:
# torch >= 2.0
# torchvision >= 0.15.1

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder

### Подготовка данных

In [74]:
batch_size = 64

train_dir = 'fer-2013/train'
test_dir = 'fer-2013/test'

# Трансформация входных изображений: преобразование в тензор и в оттенки серого
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Grayscale(1)
])

# Переводим папку с изображениями в DataLoader
train = ImageFolder(train_dir, transform=transform)
test = ImageFolder(test_dir, transform=transform)

train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=True)

# Подключаем устройство. На cuda во много-много раз быстрее
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [75]:
# Информация о данных
# Print some basic information about the train and test datasets
print('Number of training examples: {}'.format(len(train)))
print('Number of test examples: {}'.format(len(test)))
print('Batch size: {}'.format(batch_size))

# Print some basic information about the train and test loaders
print('Number of training batches: {}'.format(len(train_loader)))
print('Number of test batches: {}'.format(len(test_loader)))

# Check the shape of the training images
images, labels = next(iter(train_loader))
print("Shape of training images:", images.shape)
print("Shape of training labels:", labels.shape)

# Check the range of the pixel values in the training images
print("Minimum pixel value:", torch.min(images))
print("Maximum pixel value:", torch.max(images))

print('Using device:', device)

Number of training examples: 28709
Number of test examples: 7178
Batch size: 64
Number of training batches: 449
Number of test batches: 113
Shape of training images: torch.Size([64, 1, 48, 48])
Shape of training labels: torch.Size([64])
Minimum pixel value: tensor(0.)
Maximum pixel value: tensor(0.9999)
Using device: cuda


### Модель
из примера на [Kaggle](https://www.kaggle.com/code/anushkakalwale/fer2013-cnn-lstm) только без последних слоев с LSTM

In [76]:
model = nn.Sequential(
    # 48x48, 1
    nn.Conv2d(1, 32, 3, padding='valid'),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    # 46x46, 32
    
    nn.Conv2d(32, 64, 3, padding='same'),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2),
    # 23x23, 64

    nn.Conv2d(64, 64, 3, padding='valid'),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    # 21x21, 64

    nn.Conv2d(64, 128, 3, padding='same'),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2),
    # 10x10, 128

    nn.Conv2d(128, 128, 3, padding='valid'),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2),
    # 4x4, 128

    nn.Flatten(),
    # nn.LSTM(128 * 4 * 4, 128),
    # nn.Flatten(),
    # nn.LSTM(128, 64),

    nn.Linear(128 * 4 * 4, 200),
    nn.ReLU(),
    nn.Dropout(0.6),
    nn.Linear(200, 7),
    nn.Softmax(1)
)

### Тренировка

In [77]:
num_epochs = 3
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Используем cuda, если есть
model.to(device)

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=valid)
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU()
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=valid)
  (8): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU()
  (10): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU()
  (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (14): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=valid)
  (15): BatchNorm2d(128, eps=1e-05, momentum=0.1, affi

In [78]:
total_step = len(train_loader)
loss_list = []
acc_list = []

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Очищаем градиенты
        optimizer.zero_grad()
        
        if (device.type == 'cuda'):
            images = images.cuda()
            labels = labels.cuda()

        # Прямой запуск
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())

        # Обратное распространение и оптимизатор
        loss.backward()
        optimizer.step()

        # Отслеживание точности
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == labels).sum().item()
        acc_list.append(correct / total)

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                          (correct / total) * 100))

Epoch [1/3], Step [100/449], Loss: 1.7947, Accuracy: 34.38%
Epoch [1/3], Step [200/449], Loss: 1.8698, Accuracy: 25.00%
Epoch [1/3], Step [300/449], Loss: 1.7719, Accuracy: 37.50%
Epoch [1/3], Step [400/449], Loss: 1.7690, Accuracy: 37.50%
Epoch [2/3], Step [100/449], Loss: 1.5891, Accuracy: 57.81%
Epoch [2/3], Step [200/449], Loss: 1.7197, Accuracy: 43.75%
Epoch [2/3], Step [300/449], Loss: 1.7053, Accuracy: 45.31%
Epoch [2/3], Step [400/449], Loss: 1.6861, Accuracy: 45.31%
Epoch [3/3], Step [100/449], Loss: 1.7270, Accuracy: 42.19%
Epoch [3/3], Step [200/449], Loss: 1.6546, Accuracy: 50.00%
Epoch [3/3], Step [300/449], Loss: 1.7015, Accuracy: 45.31%
Epoch [3/3], Step [400/449], Loss: 1.7083, Accuracy: 43.75%


### Проверка модели

In [79]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        if (device.type == 'cuda'):
            images = images.cuda()
            labels = labels.cuda()

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format((correct / total) * 100))

Test Accuracy of the model on the 10000 test images: 48.27249930342714 %


### Сохраняем модель

In [80]:
# Сохраняем только веса модели
torch.save(model.state_dict(), 'fer-2013/cnn_model_dict.pt')

# Сохраняем всю модель полностью
torch.save(model, 'fer-2013/entire_model.pt')