In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [2]:
# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Hyper-parameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs =  5
batch_size = 100
learning_rate = 0.001

In [4]:
train_dataset = torchvision.datasets.MNIST(root='../../data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data',
                                          train=False,
                                          transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 69850491.83it/s]


Extracting ../../data/MNIST/raw/train-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 31521127.72it/s]

Extracting ../../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 19966312.52it/s]


Extracting ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 5927358.05it/s]

Extracting ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw






A neural network using PyTorch, which has an input layer, a hidden layer, and an output layer.

The network is trained to perform a task with a certain number of input features (input_size), a hidden layer of neurons (hidden_size), and produce a certain number of output classes (num_classes).

During training, it uses the CrossEntropyLoss as a measure of the difference between predicted and actual values, and the Adam optimizer to adjust the model's parameters for better performance.

The network is then trained using a training dataset (train_loader).

The goal is to minimize the loss and improve the model's ability to make accurate predictions.

In [6]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)

ReLU activation function is applied using self.relu. ReLU introduces non-linearity to the model by setting all negative values in the output of the linear layer to zero and leaving positive values unchanged. This helps the network learn complex patterns and relationships in the data.

In [7]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [100/600], Loss: 0.3431
Epoch [1/5], Step [200/600], Loss: 0.2816
Epoch [1/5], Step [300/600], Loss: 0.2268
Epoch [1/5], Step [400/600], Loss: 0.2757
Epoch [1/5], Step [500/600], Loss: 0.1113
Epoch [1/5], Step [600/600], Loss: 0.1671
Epoch [2/5], Step [100/600], Loss: 0.1346
Epoch [2/5], Step [200/600], Loss: 0.0698
Epoch [2/5], Step [300/600], Loss: 0.1747
Epoch [2/5], Step [400/600], Loss: 0.0576
Epoch [2/5], Step [500/600], Loss: 0.0207
Epoch [2/5], Step [600/600], Loss: 0.1396
Epoch [3/5], Step [100/600], Loss: 0.1033
Epoch [3/5], Step [200/600], Loss: 0.0391
Epoch [3/5], Step [300/600], Loss: 0.0599
Epoch [3/5], Step [400/600], Loss: 0.1216
Epoch [3/5], Step [500/600], Loss: 0.1270
Epoch [3/5], Step [600/600], Loss: 0.0590
Epoch [4/5], Step [100/600], Loss: 0.1446
Epoch [4/5], Step [200/600], Loss: 0.0311
Epoch [4/5], Step [300/600], Loss: 0.0958
Epoch [4/5], Step [400/600], Loss: 0.0637
Epoch [4/5], Step [500/600], Loss: 0.0294
Epoch [4/5], Step [600/600], Loss:

In [8]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Accuracy of the network on the 10000 test images: 97.88 %
