In [3]:
# ! pip install torchvision 
! pip install -U jupyter ipywidgets

Collecting jupyter
  Using cached jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting ipywidgets
  Downloading ipywidgets-8.0.4-py3-none-any.whl (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.8/137.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting notebook
  Downloading notebook-6.5.2-py3-none-any.whl (439 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.1/439.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting jupyter-console
  Downloading jupyter_console-6.4.4-py3-none-any.whl (22 kB)
Collecting qtconsole
  Downloading qtconsole-5.4.0-py3-none-any.whl (121 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.0/121.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nbconvert
  Downloading nbconvert-7.2.7-py3-none-any.whl (273 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m273.2/273.2 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Col

In [4]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cpu


In [10]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, X):
        x = self.flatten(X)
        logits = self.linear_relu_stack(x)
        return logits

In [11]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [12]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([7])


In [16]:
input_images = torch.rand(3, 28, 28)
print(input_images.size())

torch.Size([3, 28, 28])


In [28]:
flat_images = nn.Flatten()(input_images)
print(flat_images.size())

torch.Size([3, 784])


In [29]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_images)
print(hidden1.size())

torch.Size([3, 20])


In [30]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}\n\n")

Before ReLU: tensor([[ 0.2595, -0.3603, -0.3721, -0.2069, -0.7529, -0.5117, -0.6561,  0.7084,
         -0.4847, -0.3463,  0.1685, -0.0682, -0.6611,  0.0138,  0.1667, -0.7873,
         -0.4785, -0.1551,  0.0798,  0.2295],
        [ 0.1222, -0.3165, -0.2478, -0.5065, -0.3985, -0.2821, -0.4576,  0.6606,
         -0.5157,  0.0631,  0.4221, -0.2335, -0.5419,  0.2954,  0.2555, -0.3980,
         -0.2914, -0.0600,  0.1883,  0.3362],
        [ 0.0818, -0.2787, -0.1886, -0.2042, -0.2483, -0.3967, -0.2607,  0.4131,
         -0.0096, -0.0656,  0.3074, -0.2872, -0.5163,  0.4668,  0.3661, -0.4037,
         -0.4985, -0.1691,  0.4846,  0.2514]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.2595, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.7084, 0.0000,
         0.0000, 0.1685, 0.0000, 0.0000, 0.0138, 0.1667, 0.0000, 0.0000, 0.0000,
         0.0798, 0.2295],
        [0.1222, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6606, 0.0000,
         0.0631, 0.4221, 0.0000, 0.0000, 0.2954, 0.25

In [31]:
seq_modules = nn.Sequential(
    nn.Flatten(),
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
logits = seq_modules(input_images)

In [32]:
print(logits)

tensor([[-0.1288, -0.1107, -0.0095,  0.1742,  0.0152,  0.3659,  0.3549,  0.2874,
          0.0430, -0.0106],
        [-0.2598, -0.1467, -0.0797,  0.0963, -0.0012,  0.4053,  0.3982,  0.3792,
         -0.0487, -0.0198],
        [-0.3747, -0.1224, -0.1074,  0.0546, -0.0758,  0.3543,  0.3363,  0.3805,
         -0.1420, -0.0354]], grad_fn=<AddmmBackward0>)


In [33]:
softmax = nn.Softmax(dim=1)
pred_prob = softmax(logits)

In [34]:
print(pred_prob)

tensor([[0.0785, 0.0799, 0.0884, 0.1062, 0.0906, 0.1287, 0.1273, 0.1190, 0.0932,
         0.0883],
        [0.0698, 0.0782, 0.0836, 0.0997, 0.0905, 0.1358, 0.1349, 0.1323, 0.0863,
         0.0888],
        [0.0650, 0.0837, 0.0849, 0.0999, 0.0877, 0.1348, 0.1324, 0.1384, 0.0820,
         0.0913]], grad_fn=<SoftmaxBackward0>)


In [35]:
print(f"Model structure: {model}")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [36]:
for name, parameter in model.named_parameters():
    print(f"Layer: {name} | Size: {parameter.size()} | Values: {parameter[:2]}")

Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[ 0.0013,  0.0124, -0.0075,  ..., -0.0226,  0.0163, -0.0180],
        [ 0.0065, -0.0286, -0.0174,  ...,  0.0126,  0.0129,  0.0234]],
       grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([ 0.0091, -0.0158], grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[-0.0424, -0.0090,  0.0202,  ...,  0.0184, -0.0179, -0.0202],
        [-0.0037, -0.0015,  0.0179,  ..., -0.0439,  0.0272,  0.0415]],
       grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.2.bias | Size: torch.Size([512]) | Values: tensor([0.0021, 0.0278], grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.4.weight | Size: torch.Size([10, 512]) | Values: tensor([[ 0.0050,  0.0095,  0.0185,  ..., -0.0083, -0.0014, -0.0264],
        [-0.0414, -0.0317,  0.0161,  ..., -0.0303, -0.0156,  0.0270]],
       grad_fn=<SliceBackward0>)
Layer: linear_

In [39]:
mnist = datasets.FashionMNIST(root="fashion_mnist", download=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to fashion_mnist/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:19<00:00, 1377310.17it/s]


Extracting fashion_mnist/FashionMNIST/raw/train-images-idx3-ubyte.gz to fashion_mnist/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to fashion_mnist/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 184167.05it/s]


Extracting fashion_mnist/FashionMNIST/raw/train-labels-idx1-ubyte.gz to fashion_mnist/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to fashion_mnist/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3042875.40it/s]


Extracting fashion_mnist/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to fashion_mnist/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to fashion_mnist/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 13206285.62it/s]

Extracting fashion_mnist/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to fashion_mnist/FashionMNIST/raw






In [47]:
mnist = datasets.FashionMNIST(root="fashion_mnist", transform=transforms.ToTensor())

In [48]:
validation_ratio = 0.2
test_ratio = 0.1
train_num = int((1.0-validation_ratio-test_ratio)*len(mnist))
valid_num = int(validation_ratio*len(mnist))
test_num = int(test_ratio*len(mnist))

In [85]:
train_data, valid_data, test_data = torch.utils.data.random_split(mnist, [train_num, valid_num, test_num])

In [79]:
type(train_data)

torch.utils.data.dataset.Subset

In [107]:
train_data[0][0].size()

torch.Size([1, 28, 28])

In [114]:
example_images = torch.vstack([train_data[i][0] for i in range(3)])

In [115]:
example_images.size()

torch.Size([3, 28, 28])

In [120]:
example_logits = seq_modules(example_images)
pred_prob = softmax(example_logits)

In [121]:
print(pred_prob)

tensor([[0.0663, 0.0798, 0.0975, 0.1016, 0.0904, 0.1272, 0.1348, 0.1234, 0.0839,
         0.0951],
        [0.0752, 0.0827, 0.0969, 0.1034, 0.0973, 0.1197, 0.1205, 0.1119, 0.0887,
         0.1037],
        [0.0700, 0.0841, 0.0984, 0.1077, 0.0899, 0.1211, 0.1274, 0.1205, 0.0856,
         0.0952]], grad_fn=<SoftmaxBackward0>)


In [119]:
print([train_data[i][1] for i in range(3)])

[2, 5, 0]
