In [10]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [12]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [13]:
class DenseNet(nn.Module):
    def __init__(self, input_dim=28*28, hidden_dim=100, output_dim=10):
        super(DenseNet, self).__init__()

        # Initial layer
        layers = [nn.Linear(input_dim, hidden_dim), nn.ReLU()]

        # 98 hidden layers
        for _ in range(10):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())

        # Output layer
        layers.append(nn.Linear(hidden_dim, output_dim))

        self.net = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.net(x)

model = DenseNet()
print(model)

# Define the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# Instantiate the model and move to GPU
model = DenseNet().to(device)

DenseNet(
  (net): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReLU()
    (4): Linear(in_features=100, out_features=100, bias=True)
    (5): ReLU()
    (6): Linear(in_features=100, out_features=100, bias=True)
    (7): ReLU()
    (8): Linear(in_features=100, out_features=100, bias=True)
    (9): ReLU()
    (10): Linear(in_features=100, out_features=100, bias=True)
    (11): ReLU()
    (12): Linear(in_features=100, out_features=100, bias=True)
    (13): ReLU()
    (14): Linear(in_features=100, out_features=100, bias=True)
    (15): ReLU()
    (16): Linear(in_features=100, out_features=100, bias=True)
    (17): ReLU()
    (18): Linear(in_features=100, out_features=100, bias=True)
    (19): ReLU()
    (20): Linear(in_features=100, out_features=100, bias=True)
    (21): ReLU()
    (22): Linear(in_features=100, out_features=10, bias=True)
  )
)
cpu


In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [15]:
num_epochs = 25

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")


Epoch [1/25], Step [100/938], Loss: 2.2881
Epoch [1/25], Step [200/938], Loss: 1.7398
Epoch [1/25], Step [300/938], Loss: 1.7505
Epoch [1/25], Step [400/938], Loss: 1.6668
Epoch [1/25], Step [500/938], Loss: 1.8363
Epoch [1/25], Step [600/938], Loss: 1.4283
Epoch [1/25], Step [700/938], Loss: 1.5748
Epoch [1/25], Step [800/938], Loss: 1.5453
Epoch [1/25], Step [900/938], Loss: 1.5766
Epoch [2/25], Step [100/938], Loss: 1.4740
Epoch [2/25], Step [200/938], Loss: 1.3382
Epoch [2/25], Step [300/938], Loss: 1.2210
Epoch [2/25], Step [400/938], Loss: 1.0425
Epoch [2/25], Step [500/938], Loss: 1.2155
Epoch [2/25], Step [600/938], Loss: 1.2652
Epoch [2/25], Step [700/938], Loss: 1.3178
Epoch [2/25], Step [800/938], Loss: 1.0327
Epoch [2/25], Step [900/938], Loss: 0.9655
Epoch [3/25], Step [100/938], Loss: 1.0938
Epoch [3/25], Step [200/938], Loss: 1.0586
Epoch [3/25], Step [300/938], Loss: 1.1197
Epoch [3/25], Step [400/938], Loss: 0.6319
Epoch [3/25], Step [500/938], Loss: 0.9683
Epoch [3/25

In [16]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Accuracy of the network on the 10000 test images: {100 * correct / total} %")


Accuracy of the network on the 10000 test images: 95.53 %


In [17]:
torch.save(model.state_dict(), 'mnist_dense_model.pth')