In [21]:
import torch
import torch.nn as nn
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from tqdm import tqdm

In [36]:
training_data = MNIST(root="./MNIST", train=True, download=True, transform=ToTensor())
test_data = MNIST(root="./MNIST", train=False, download=True, transform=ToTensor())
device = torch.device("cuda")

In [43]:
def load_data(batch_size= 64):
    train_dataLoader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
    test_dataLoader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    return train_dataLoader, test_dataLoader

In [38]:
def train(model, epochs, train_dataLoader):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
    for epoch in range(epochs):
        for X, y in tqdm(train_dataLoader):
            X = X.flatten(1).to(device)
            y = y.to(device)
            pred = model(X).to(device)
            loss = loss_fn(pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1} Loss: {loss.item()}")

In [39]:
# evaluation
def eval(model,test_dataLoader):
    correct = 0
    total = 0
    with torch.no_grad():
        for X, y in tqdm(test_dataLoader):
            X = X.flatten(1).to(device)
            y = y.to(device)
            pred = model(X)
            _, predicted = torch.max(pred, 1)
            correct += (predicted == y).sum()
            total += y.size(0)
    print(f"Accuracy: {correct / total}")

In [45]:
# test_1
train_dataLoader, test_dataLoader = load_data(batch_size=64)
model_1 = nn.Sequential(
    nn.Linear(28*28, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
).to(device)
train(model_1, epochs=5, train_dataLoader = train_dataLoader)
eval(model_1, test_dataLoader)

100%|██████████| 938/938 [00:09<00:00, 96.80it/s] 


Epoch 1 Loss: 0.6976597309112549


100%|██████████| 938/938 [00:09<00:00, 95.62it/s] 


Epoch 2 Loss: 0.4598991572856903


100%|██████████| 938/938 [00:09<00:00, 100.35it/s]


Epoch 3 Loss: 0.2616998851299286


100%|██████████| 938/938 [00:09<00:00, 96.48it/s] 


Epoch 4 Loss: 0.4770171046257019


100%|██████████| 938/938 [00:10<00:00, 91.85it/s] 


Epoch 5 Loss: 0.20967990159988403


100%|██████████| 157/157 [00:01<00:00, 122.35it/s]

Accuracy: 0.9154999852180481





In [46]:
# test_2 different batch size
train_dataLoader, test_dataLoader = load_data(batch_size=128)
model_2 = nn.Sequential(
    nn.Linear(28*28, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
).to(device)
train(model_2, epochs=5, train_dataLoader = train_dataLoader)
eval(model_2, test_dataLoader)

100%|██████████| 469/469 [00:08<00:00, 55.40it/s]


Epoch 1 Loss: 1.14773428440094


100%|██████████| 469/469 [00:07<00:00, 60.26it/s]


Epoch 2 Loss: 0.7357723712921143


100%|██████████| 469/469 [00:08<00:00, 55.82it/s]


Epoch 3 Loss: 0.5249956250190735


100%|██████████| 469/469 [00:08<00:00, 55.53it/s]


Epoch 4 Loss: 0.42631861567497253


100%|██████████| 469/469 [00:07<00:00, 60.87it/s]


Epoch 5 Loss: 0.4471134841442108


100%|██████████| 79/79 [00:01<00:00, 71.94it/s]

Accuracy: 0.8981999754905701





In [48]:
# test_3 different model, add one more layer
train_dataLoader, test_dataLoader = load_data(batch_size=64)
model_3 = nn.Sequential(
    nn.Linear(28*28, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
).to(device)
train(model_3, epochs=5, train_dataLoader = train_dataLoader)
eval(model_3, test_dataLoader)

100%|██████████| 938/938 [00:09<00:00, 100.85it/s]


Epoch 1 Loss: 0.7620667219161987


100%|██████████| 938/938 [00:10<00:00, 90.64it/s] 


Epoch 2 Loss: 0.3453406095504761


100%|██████████| 938/938 [00:09<00:00, 95.45it/s] 


Epoch 3 Loss: 0.45932537317276


100%|██████████| 938/938 [00:09<00:00, 101.57it/s]


Epoch 4 Loss: 0.29637157917022705


100%|██████████| 938/938 [00:09<00:00, 100.01it/s]


Epoch 5 Loss: 0.28019192814826965


100%|██████████| 157/157 [00:01<00:00, 108.27it/s]

Accuracy: 0.9158999919891357



