# MNIST dataset

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [None]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                train=True,
                                transform=transforms.ToTensor(),
                                download=True)

test_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                train=False,
                                transform=transforms.ToTensor(),
                                download=True)

In [None]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [None]:
x, y = train_dataset[0]
x.shape

torch.Size([1, 28, 28])

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.fc1 = nn.Linear(64*7*7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.activation = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.activation(self.conv1(x)))
        x = self.pool(self.activation(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
model = CNN().to(device)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
epochs = 30

model.train()
for epoch in range(epochs):
    model.train()
    avg_cost = 0
    total_batch_num = len(train_dataloader)

    for b_x, b_y in train_dataloader:
        logits = model(b_x.to(device))
        loss = criterion(logits, b_y.to(device))

        avg_cost += loss / total_batch_num
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch : {epoch + 1} / {epochs}, cost : {avg_cost}')

In [None]:
correct = 0 
total = 0

model.eval()
for b_x, b_y in test_dataloader:
    with torch.no_grad():
        logits = model(b_x.to(device))
    
    probs = nn.Softmax(dim=1)(logits)
    predicts = torch.argmax(logits, dim=1)

    total += len(b_y)
    correct += (predicts == b_y.to(device)).sum().item()

print(f'Accuracy of the network on test images :{100 * correct // total} %')

# CIFAR-10 dataset

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [2]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [4]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                train=True,
                                transform=transforms.ToTensor(),
                                download=True)

test_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                train=False,
                                transform=transforms.ToTensor(),
                                download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to CIFAR10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting CIFAR10/cifar-10-python.tar.gz to CIFAR10/
Files already downloaded and verified


In [5]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [10]:
x, y = train_dataset[0]
print(x.shape)
print(y)



torch.Size([3, 32, 32])
<class 'int'>


In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)
        self.activation = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.activation(self.conv1(x)))
        x = self.pool(self.activation(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
model = CNN().to(device)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
epochs = 30

model.train()
for epoch in range(epochs):
    model.train()
    avg_cost = 0
    total_batch_num = len(train_dataloader)

    for b_x, b_y in train_dataloader:
        logits = model(b_x.to(device))
        loss = criterion(logits, b_y.to(device))

        avg_cost += loss / total_batch_num
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch : {epoch + 1} / {epochs}, cost : {avg_cost}')

Epoch : 1 / 30, cost : 1.864212989807129
Epoch : 2 / 30, cost : 1.5597267150878906
Epoch : 3 / 30, cost : 1.4491040706634521
Epoch : 4 / 30, cost : 1.3785216808319092
Epoch : 5 / 30, cost : 1.3285419940948486
Epoch : 6 / 30, cost : 1.282416820526123
Epoch : 7 / 30, cost : 1.2466343641281128
Epoch : 8 / 30, cost : 1.2131173610687256
Epoch : 9 / 30, cost : 1.1840627193450928
Epoch : 10 / 30, cost : 1.1572065353393555
Epoch : 11 / 30, cost : 1.1278780698776245
Epoch : 12 / 30, cost : 1.099143385887146
Epoch : 13 / 30, cost : 1.0852669477462769
Epoch : 14 / 30, cost : 1.062390923500061
Epoch : 15 / 30, cost : 1.0450295209884644
Epoch : 16 / 30, cost : 1.0288891792297363
Epoch : 17 / 30, cost : 1.0132269859313965
Epoch : 18 / 30, cost : 0.9957659244537354
Epoch : 19 / 30, cost : 0.9784051179885864
Epoch : 20 / 30, cost : 0.9656466245651245
Epoch : 21 / 30, cost : 0.9530317187309265
Epoch : 22 / 30, cost : 0.9360508918762207
Epoch : 23 / 30, cost : 0.9234834909439087
Epoch : 24 / 30, cost : 

In [None]:
correct = 0 
total = 0

model.eval()
for b_x, b_y in test_dataloader:
    with torch.no_grad():
        logits = model(b_x.to(device))
    
    probs = nn.Softmax(dim=1)(logits)
    predicts = torch.argmax(logits, dim=1)

    total += len(b_y)
    correct += (predicts == b_y.to(device)).sum().item()

print(f'Accuracy of the network on test images :{100 * correct // total} %')

Accuracy of the network on test images :62 %
