In [66]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [77]:
num_classes = 10
learning_rate = 0.01
batch_size = 128
num_epochs = 50

In [68]:
model = nn.Sequential(
    nn.Linear(784, 50),
    nn.ReLU(),
    nn.Linear(50, num_classes)
).to(device)


In [69]:
!wget www.di.ens.fr/~lelarge/MNIST.tar.gz
!tar -zxvf MNIST.tar.gz

from torchvision.datasets import MNIST
from torchvision import transforms

train_dataset = MNIST('./', download=True,
transform=transforms.Compose([transforms.ToTensor(),]),
 train=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

--2021-03-17 14:23:38--  http://www.di.ens.fr/~lelarge/MNIST.tar.gz
Resolving www.di.ens.fr (www.di.ens.fr)... 129.199.99.14
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.di.ens.fr/~lelarge/MNIST.tar.gz [following]
--2021-03-17 14:23:38--  https://www.di.ens.fr/~lelarge/MNIST.tar.gz
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/x-gzip]
Saving to: ‘MNIST.tar.gz.4’

MNIST.tar.gz.4          [      <=>           ]  33.20M  29.9MB/s    in 1.1s    

2021-03-17 14:23:39 (29.9 MB/s) - ‘MNIST.tar.gz.4’ saved [34813078]

MNIST/
MNIST/raw/
MNIST/raw/train-labels-idx1-ubyte
MNIST/raw/t10k-labels-idx1-ubyte.gz
MNIST/raw/t10k-labels-idx1-ubyte
MNIST/raw/t10k-images-idx3-ubyte.gz
MNIST/raw/train-images-idx3-ubyte
MNIST/raw/train-labels-idx1-ubyte.gz
MNIST/raw/t10k-images-idx3-ubyte
MNIST/raw

In [70]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, factor=0.1, patience=5, verbose=True
) # 5epoch이 넘도록 loss가 줄어들지 않으면 Learning rate를 10%줄인다. 

In [80]:
for epoch in range(1, num_epochs):
    losses = []

    for idx, (data, targets) in enumerate(train_loader):
        data = data.reshape(data.shape[0], -1)
        data = data.to(device)
        targets = targets.to(device)

        scores = model(data)
        loss = criterion(scores, targets)
        losses.append(loss.item())
        
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()
        

    mean_loss = sum(losses) / len(losses)

    scheduler.step(mean_loss)  # mean_loss로 lr을 결정
    print(f"Epoch: {epoch} | Loss: {mean_loss:.5f}")

Epoch: 1 | Loss: 0.18830
Epoch: 2 | Loss: 0.18599
Epoch: 3 | Loss: 0.18741
Epoch: 4 | Loss: 0.18568
Epoch: 5 | Loss: 0.18756
Epoch: 6 | Loss: 0.18373
Epoch: 7 | Loss: 0.18585
Epoch: 8 | Loss: 0.18695
Epoch: 9 | Loss: 0.18665
Epoch: 10 | Loss: 0.18514
Epoch: 11 | Loss: 0.18497
Epoch   160: reducing learning rate of group 0 to 1.0000e-03.
Epoch: 12 | Loss: 0.18707
Epoch: 13 | Loss: 0.16808
Epoch: 14 | Loss: 0.16494
Epoch: 15 | Loss: 0.16427
Epoch: 16 | Loss: 0.16406
Epoch: 17 | Loss: 0.16382
Epoch: 18 | Loss: 0.16363
Epoch: 19 | Loss: 0.16347
Epoch: 20 | Loss: 0.16325
Epoch: 21 | Loss: 0.16302
Epoch: 22 | Loss: 0.16289
Epoch: 23 | Loss: 0.16282
Epoch: 24 | Loss: 0.16285
Epoch: 25 | Loss: 0.16255
Epoch: 26 | Loss: 0.16258
Epoch: 27 | Loss: 0.16263
Epoch: 28 | Loss: 0.16248
Epoch: 29 | Loss: 0.16247
Epoch: 30 | Loss: 0.16225
Epoch: 31 | Loss: 0.16221
Epoch: 32 | Loss: 0.16228
Epoch: 33 | Loss: 0.16215
Epoch: 34 | Loss: 0.16214
Epoch: 35 | Loss: 0.16209
Epoch: 36 | Loss: 0.16183
Epoch: 37 |

In [81]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.reshape(x.shape[0], -1)
            x = x.to(device)
            y = y.to(device)
            
            scores = model(x)
            _, predicted = scores.max(1)
            num_correct += (predicted == y).sum()
            num_samples += predicted.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
        )

    model.train()

check_accuracy(train_loader, model)

Got 57284 / 60000 with accuracy 95.47
