In [47]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

import torch.nn.init
import torch.nn as nn

In [48]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
  torch.cuda.manual_seed_all(777)

In [49]:
#parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [50]:
#MNIST dataset

mnist_train = dsets.MNIST(root = 'MNIST_data/',
                          train = True,
                          transform = transforms.ToTensor(),
                          download = True)

mnist_test = dsets.MNIST(root = 'MNIST_data/',
                         train = False,
                         transform = transforms.ToTensor(),
                         download = True
                         )

In [51]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                          batch_size = batch_size,
                                          shuffle = True,
                                          drop_last = True
                                          )

In [52]:
class CNN(nn.Module):

  def __init__(self):
    super(CNN, self).__init__()
    self.layer1 = nn.Sequential(
        nn.Conv2d(1,32,kernel_size = 3,stride = 1, padding = 1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.layer2 = nn.Sequential(
        nn.Conv2d(32,64, kernel_size =3, stride =1 , padding =1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )

    self.fc = nn.Linear(7*7*64,10, bias = True)
    torch.nn.init.xavier_uniform_(self.fc.weight)
  
  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)

    out = out.view(out.size(0),-1)
    out = self.fc(out)

    return out
  



In [53]:
model = CNN().to(device)

In [54]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate)

In [55]:
# training
total_batch = len(data_loader)


for epoch in range(training_epochs):
  avg_cost = 0
  
  for X, Y in data_loader:
    X = X.to(device)
    Y = Y.to(device)

    optimizer.zero_grad()
    hypothesis = model(X)

    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()

    avg_cost += cost / total_batch

  print('[Epoch:{}] cost = {}'.format(epoch+1, avg_cost))
print('Learning Finished')

[Epoch:1] cost = 0.22565898299217224
[Epoch:2] cost = 0.0629897192120552
[Epoch:3] cost = 0.04626385495066643
[Epoch:4] cost = 0.03747444972395897
[Epoch:5] cost = 0.031500980257987976
[Epoch:6] cost = 0.02605624310672283
[Epoch:7] cost = 0.021788928657770157
[Epoch:8] cost = 0.018415965139865875
[Epoch:9] cost = 0.016283579170703888
[Epoch:10] cost = 0.013325847685337067
[Epoch:11] cost = 0.010029090568423271
[Epoch:12] cost = 0.009885050356388092
[Epoch:13] cost = 0.008574051782488823
[Epoch:14] cost = 0.006150357890874147
[Epoch:15] cost = 0.0072083198465406895
Learning Finished


In [56]:
with torch.no_grad():
  X_test = mnist_test.test_data.view(len(mnist_test),1,28,28).float().to(device)
  Y_test = mnist_test.test_labels.to(device)

  prediction = model(X_test)
  correct_prediction = torch.argmax(prediction, 1) == Y_test
  accuracy = correct_prediction.float().mean()
  print('Accuracy: ', accuracy.item())



Accuracy:  0.9858999848365784
