In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/assignment2')

Mounted at /content/drive


In [None]:
# SEED 설정
from functions import (
    set_seed,
    experiment1_hidden_size,
    experiment2_hidden_layers,
    experiment3_dropout,
    experiment4_dropout_with_layers
)

SEED = 42
set_seed(SEED)
print(f"random seed = {SEED}")

random seed = 42


In [None]:
# 데이터 로더 준비
from datasets import load_dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import torch

# MNIST 로드
mnist = load_dataset("mnist")

# Transform 정의
sample_data = torch.stack([
    transforms.ToTensor()(mnist['train'][i]['image'])
    for i in range(1000)
])
mean = sample_data.mean().item()
std = sample_data.std().item()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean,), (std,))
])

def transform_dataset(dataset):
    def transform_fn(batch):
        images = [transform(img).view(-1) for img in batch["image"]]
        return {
            "image": torch.stack(images),
            "label": torch.tensor(batch["label"])
        }
    return dataset.with_transform(transform_fn)

train_dataset = transform_dataset(mnist["train"])
test_dataset = transform_dataset(mnist["test"])

batch_size = 128
test_batch_size = 1000
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
learning_rate = 1e-3
nb_epochs = 5

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

mnist/train-00000-of-00001.parquet:   0%|          | 0.00/15.6M [00:00<?, ?B/s]

mnist/test-00000-of-00001.parquet:   0%|          | 0.00/2.60M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/60000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [None]:
# 실험 1


print("실험 1: Hidden Size 최적값 찾기 (50, 100~1000, 100개 단위로 증가하며 실험)")
hidden_sizes = [50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

results_exp1 = experiment1_hidden_size(
    hidden_sizes=hidden_sizes,
    train_loader=train_loader,
    test_loader=test_loader,
    device=device,
    learning_rate=learning_rate,
    nb_epochs=nb_epochs,
    seed=SEED
)

best_hidden_size = results_exp1['best_hidden_size']
best_acc = results_exp1['best_accuracy']

print(f"\n최고 성능: Hidden Size = {best_hidden_size}, 정확도 = {best_acc:.2f}%")

실험 1: Hidden Size 최적값 찾기 (50, 100~1000, 100개 단위로 증가하며 실험)

Hidden Size = 50
Epoch 1/5: Train=88.87%, Test=94.04%
Epoch 2/5: Train=94.86%, Test=95.57%
Epoch 3/5: Train=96.00%, Test=96.28%
Epoch 4/5: Train=96.79%, Test=96.49%
Epoch 5/5: Train=97.34%, Test=96.66%
최종 Test 정확도: 96.66%

Hidden Size = 100
Epoch 1/5: Train=90.49%, Test=94.63%
Epoch 2/5: Train=95.73%, Test=96.26%
Epoch 3/5: Train=97.02%, Test=96.90%
Epoch 4/5: Train=97.75%, Test=97.42%
Epoch 5/5: Train=98.11%, Test=97.46%
최종 Test 정확도: 97.46%

Hidden Size = 200
Epoch 1/5: Train=92.03%, Test=95.87%
Epoch 2/5: Train=96.90%, Test=97.21%
Epoch 3/5: Train=97.79%, Test=97.32%
Epoch 4/5: Train=98.36%, Test=97.45%
Epoch 5/5: Train=98.71%, Test=97.93%
최종 Test 정확도: 97.93%

Hidden Size = 300
Epoch 1/5: Train=92.84%, Test=96.46%
Epoch 2/5: Train=97.10%, Test=97.09%
Epoch 3/5: Train=98.04%, Test=97.63%
Epoch 4/5: Train=98.50%, Test=97.88%
Epoch 5/5: Train=98.82%, Test=97.89%
최종 Test 정확도: 97.89%

Hidden Size = 400
Epoch 1/5: Train=93.17%, Tes

In [None]:
# 실험 2


print(f"실험 2: Hidden Layers 개수 최적값 찾기 (1~10개)")
hidden_layers_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

results_exp2 = experiment2_hidden_layers(
    hidden_layers_list=hidden_layers_list,
    best_hidden_size=best_hidden_size,
    train_loader=train_loader,
    test_loader=test_loader,
    device=device,
    learning_rate=learning_rate,
    nb_epochs=nb_epochs,
    seed=SEED
)

best_hidden_layers = results_exp2['best_hidden_layers']
best_acc_exp2 = results_exp2['best_accuracy']

print(f"\n최고 성능: Hidden Layers = {best_hidden_layers}개, 정확도 = {best_acc_exp2:.2f}%")

실험 2: Hidden Layers 개수 최적값 찾기 (1~10개)
Hidden Size = 800 (실험1에서 구한 최적값)

Hidden Layers = 1
Epoch 1/5: Train=93.62%, Test=97.01%
Epoch 2/5: Train=97.47%, Test=97.49%
Epoch 3/5: Train=98.28%, Test=97.67%
Epoch 4/5: Train=98.78%, Test=98.12%
Epoch 5/5: Train=99.12%, Test=98.00%
최종 Test 정확도: 98.00%

Hidden Layers = 2
Epoch 1/5: Train=93.91%, Test=96.92%
Epoch 2/5: Train=97.38%, Test=97.65%
Epoch 3/5: Train=98.17%, Test=97.47%
Epoch 4/5: Train=98.62%, Test=97.26%
Epoch 5/5: Train=98.81%, Test=98.00%
최종 Test 정확도: 98.00%

Hidden Layers = 3
Epoch 1/5: Train=93.27%, Test=96.05%
Epoch 2/5: Train=97.17%, Test=97.35%
Epoch 3/5: Train=97.89%, Test=97.34%
Epoch 4/5: Train=98.39%, Test=97.57%
Epoch 5/5: Train=98.73%, Test=97.96%
최종 Test 정확도: 97.96%

Hidden Layers = 4
Epoch 1/5: Train=92.89%, Test=96.42%
Epoch 2/5: Train=96.81%, Test=97.06%
Epoch 3/5: Train=97.82%, Test=97.68%
Epoch 4/5: Train=98.26%, Test=97.73%
Epoch 5/5: Train=98.53%, Test=97.52%
최종 Test 정확도: 97.52%

Hidden Layers = 5
Epoch 1/5: Tra

In [None]:
# 실험 3

print(f"실험 3: Dropout 비율 최적값 찾기 (0.05~0.95 사이, 0.1 단위로 증가)")
dropout_rates = [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95]

results_exp3 = experiment3_dropout(
    dropout_rates=dropout_rates,
    best_hidden_size=best_hidden_size,
    best_hidden_layers=best_hidden_layers,
    train_loader=train_loader,
    test_loader=test_loader,
    device=device,
    learning_rate=learning_rate,
    nb_epochs=nb_epochs,
    seed=SEED
)

best_dropout = results_exp3['best_dropout']
best_acc_exp3 = results_exp3['best_accuracy']
print(f"\n최고 성능: Dropout = {best_dropout}, 정확도 = {best_acc_exp3:.2f}%")

실험 3: Dropout 비율 최적값 찾기 (0.05~0.95 사이, 0.1 단위로 증가)
Hidden Size = 800 (실험1에서 구한 최적값)
Hidden Layers = 1 (실험2에서 구한 최적값)

Dropout = 0.05
Epoch 1/5: Train=93.48%, Test=96.86%
Epoch 2/5: Train=97.34%, Test=97.43%
Epoch 3/5: Train=98.14%, Test=97.78%
Epoch 4/5: Train=98.71%, Test=98.10%
Epoch 5/5: Train=98.98%, Test=97.93%
최종 Test 정확도: 97.93%

Dropout = 0.15
Epoch 1/5: Train=93.18%, Test=96.75%
Epoch 2/5: Train=97.17%, Test=97.50%
Epoch 3/5: Train=97.91%, Test=97.69%
Epoch 4/5: Train=98.39%, Test=97.86%
Epoch 5/5: Train=98.79%, Test=97.92%
최종 Test 정확도: 97.92%

Dropout = 0.25
Epoch 1/5: Train=92.83%, Test=96.56%
Epoch 2/5: Train=96.87%, Test=97.14%
Epoch 3/5: Train=97.65%, Test=97.59%
Epoch 4/5: Train=98.24%, Test=97.88%
Epoch 5/5: Train=98.46%, Test=98.06%
최종 Test 정확도: 98.06%

Dropout = 0.35
Epoch 1/5: Train=92.56%, Test=96.42%
Epoch 2/5: Train=96.54%, Test=97.23%
Epoch 3/5: Train=97.30%, Test=97.45%
Epoch 4/5: Train=97.84%, Test=98.02%
Epoch 5/5: Train=98.22%, Test=98.07%
최종 Test 정확도: 98.07%

In [None]:
# 실험 4


print(f"실험 4: Dropout 여부에 따른 차이 비교\nHidden Layers 1~10개에 Dropout를 적용했을 때 실험2와의 차이를 비교합니다.")
hidden_layers_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

results_exp4 = experiment4_dropout_with_layers(
    hidden_layers_list=hidden_layers_list,
    best_hidden_size=best_hidden_size,
    best_dropout=best_dropout,
    train_loader=train_loader,
    test_loader=test_loader,
    device=device,
    learning_rate=learning_rate,
    nb_epochs=nb_epochs,
    seed=SEED
)

best_hidden_layers_exp4 = results_exp4['best_hidden_layers']
best_acc_exp4 = results_exp4['best_accuracy']

print(f"\n최고 성능: Hidden Layers = {best_hidden_layers_exp4}개, 정확도 = {best_acc_exp4:.2f}%")

실험 4: Dropout 여부에 따른 차이 비교
Hidden Layers 1~10개에 Dropout를 적용했을 때 실험2와의 차이를 비교합니다.
Hidden Size = 800 (실험1에서 구한 최적값)
Dropout = 0.35 (실험3에서 구한 최적값)

Hidden Layers = 1, Dropout = 0.35
Epoch 1/5: Train=92.56%, Test=96.42%
Epoch 2/5: Train=96.54%, Test=97.23%
Epoch 3/5: Train=97.30%, Test=97.45%
Epoch 4/5: Train=97.84%, Test=98.02%
Epoch 5/5: Train=98.22%, Test=98.07%
최종 Test 정확도: 98.07%

Hidden Layers = 2, Dropout = 0.35
Epoch 1/5: Train=92.18%, Test=96.52%
Epoch 2/5: Train=96.20%, Test=97.15%
Epoch 3/5: Train=96.91%, Test=97.72%
Epoch 4/5: Train=97.36%, Test=97.82%
Epoch 5/5: Train=97.69%, Test=97.39%
최종 Test 정확도: 97.39%

Hidden Layers = 3, Dropout = 0.35
Epoch 1/5: Train=91.46%, Test=95.80%
Epoch 2/5: Train=95.77%, Test=97.01%
Epoch 3/5: Train=96.70%, Test=97.52%
Epoch 4/5: Train=97.06%, Test=97.13%
Epoch 5/5: Train=97.36%, Test=97.90%
최종 Test 정확도: 97.90%

Hidden Layers = 4, Dropout = 0.35
Epoch 1/5: Train=90.47%, Test=95.72%
Epoch 2/5: Train=95.59%, Test=96.83%
Epoch 3/5: Train=96.28%, Te

In [None]:
# 다른 파일에서 시각화 하기 위해 결과 저장

'''이 셀은 ai로 작성했습니다.'''

import pickle

all_results = {
    'experiment1': results_exp1,
    'experiment2': results_exp2,
    'experiment3': results_exp3,
    'experiment4': results_exp4,
    'seed': SEED,
    'best_hidden_size': best_hidden_size,
    'best_hidden_layers': best_hidden_layers,
    'best_dropout': best_dropout,
    'best_hidden_layers_exp4': best_hidden_layers_exp4
}

with open('experiment_results.pkl', 'wb') as f:
    pickle.dump(all_results, f)

print("모든 실험 결과가 'experiment_results.pkl'에 저장되었습니다.")
print(f"\n최종 최적 하이퍼파라미터:")
print(f"  - Hidden Size: {best_hidden_size}")
print(f"  - Hidden Layers (Dropout 없음): {best_hidden_layers}")
print(f"  - Dropout 비율: {best_dropout}")
print(f"  - Hidden Layers (Dropout 있음): {best_hidden_layers_exp4}")

모든 실험 결과가 'experiment_results.pkl'에 저장되었습니다.

최종 최적 하이퍼파라미터:
  - Hidden Size: 800
  - Hidden Layers (Dropout 없음): 1
  - Dropout 비율: 0.35
  - Hidden Layers (Dropout 있음): 1
