In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np

from models import MLP

# 하이퍼파라미터 및 디바이스 설정
batch_size = 128
test_batch_size = 1000
learning_rates = [1e-1, 1e-2, 1e-3, 1e-4]
nb_epochs = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MNIST 데이터셋 로딩
mnist = load_dataset("mnist")

# 데이터셋의 평균과 표준편차 계산 (정규화용)
sample_data = torch.stack([
    transforms.ToTensor()(mnist['train'][i]['image'])
    for i in range(1000)
])
mean = sample_data.mean().item()
std = sample_data.std().item()

# Transform 정의
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean,), (std,))
])

# 데이터 변환 함수 정의
def transform_dataset(dataset):
    """데이터셋에 변환을 적용하는 함수"""
    def transform_fn(batch):
        # 이미지를 텐서로 변환하고 28x28을 784로 평탄화
        images = [transform(img).view(-1) for img in batch["image"]]
        return {
            "image": torch.stack(images),
            "label": torch.tensor(batch["label"])
        }
    return dataset.with_transform(transform_fn)

# 훈련/테스트 데이터셋에 변환 적용
train_dataset = transform_dataset(mnist["train"])
test_dataset = transform_dataset(mnist["test"])

# DataLoader 생성
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test_loader = DataLoader(
    test_dataset,
    batch_size=test_batch_size,
    shuffle=False
)

for learning_rate in learning_rates:
    repetition = 3
    sum_of_accuray = 0 # n번 반복하여 나오는 best_accuracies의 평균을 구하기 위함.

    for i in range (repetition):
        print("-" * 60)
        print(f"learning_rate={learning_rate}, epoch={nb_epochs} : {i+1}번째 학습 실행")
        # 모델, 손실함수, 최적화기 설정
        model = MLP().to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        best_test_acc = 0

        # 훈련 루프 실행
        for epoch in range(nb_epochs):
            # 훈련 모드
            model.train()
            running_loss = 0.0
            correct_train = 0
            total_train = 0

            for batch_idx, batch in enumerate(train_loader):
                imgs = batch["image"].to(device)
                labels = batch["label"].to(device)

                optimizer.zero_grad()

                outputs = model(imgs)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_train += labels.size(0)
                correct_train += (predicted == labels).sum().item()

            # 에포크별 훈련 결과 출력
            epoch_loss = running_loss / len(train_loader)
            epoch_train_acc = 100 * correct_train / total_train
            print(f"Epoch [{epoch+1}/{nb_epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_train_acc:.2f}%")

            # 테스트 정확도 계산
            model.eval()
            correct_test = 0
            total_test = 0

            with torch.no_grad():
                for batch in test_loader:
                    imgs = batch["image"].to(device)
                    labels = batch["label"].to(device)

                    outputs = model(imgs)
                    _, predicted = torch.max(outputs, 1)
                    total_test += labels.size(0)
                    correct_test += (predicted == labels).sum().item()

            test_acc = 100 * correct_test / total_test
            print(f"  {epoch+1}. Test Accuracy: {test_acc:.2f}%")

            # Early Stopping
            if test_acc > best_test_acc:
                best_test_acc = test_acc
            else:
                print(f"Test Accuracy가 감소 : 훈련을 종료합니다. best_accuracy={best_test_acc}") # 한 epoch 전 모델을 저장해야 가장 뛰어난 성능의 모델을 저장할 수 있으나, 간단한 실험이 목적이므로 모델 저장은 생략함
                break

        sum_of_accuray += best_test_acc
        print(f"=== learning_rate={learning_rate} : {i+1}번째 훈련 완료  ===")

    print(f" learning_rate={learning_rate}일때의 accuracy 평균={sum_of_accuray / repetition:.2f}")

------------------------------------------------------------
learning_rate=0.1, epoch=3 : 1번째 학습 실행
Epoch [1/3], Loss: 1.6005, Train Accuracy: 62.10%
  1. Test Accuracy: 63.57%
Epoch [2/3], Loss: 1.2372, Train Accuracy: 59.82%
  2. Test Accuracy: 66.88%
Epoch [3/3], Loss: 1.5342, Train Accuracy: 49.25%
  3. Test Accuracy: 37.57%
Test Accuracy가 감소 : 훈련을 종료합니다. best_accuracy=66.88
=== learning_rate=0.1 : 1번째 훈련 완료  ===
------------------------------------------------------------
learning_rate=0.1, epoch=3 : 2번째 학습 실행
Epoch [1/3], Loss: 1.7077, Train Accuracy: 55.78%
  1. Test Accuracy: 56.44%
Epoch [2/3], Loss: 1.4206, Train Accuracy: 52.67%
  2. Test Accuracy: 45.74%
Test Accuracy가 감소 : 훈련을 종료합니다. best_accuracy=56.44
=== learning_rate=0.1 : 2번째 훈련 완료  ===
------------------------------------------------------------
learning_rate=0.1, epoch=3 : 3번째 학습 실행
Epoch [1/3], Loss: 2.0027, Train Accuracy: 50.03%
  1. Test Accuracy: 51.43%
Epoch [2/3], Loss: 1.4638, Train Accuracy: 47.60%
  2. Tes