<a href="https://colab.research.google.com/github/underthelights/underthelights.github.io/blob/main/_posts/ai/PyTorchMS/1.%20Introduction_to_the_PyTorch_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Automatic Differentiation


In [None]:
import torch
x = torch.ones(5)
y = torch.zeros(3)
w = torch.randn(5,3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x,w+b)

loss = torch.nn.functional.binary_cross_entropy_with_logits(z,y)
x,y,w,b,z,loss

(tensor([1., 1., 1., 1., 1.]),
 tensor([0., 0., 0.]),
 tensor([[-1.7438,  0.4070, -1.4434],
         [ 0.2997, -0.2521, -0.1173],
         [ 0.5596,  1.5509,  0.1543],
         [-0.9715,  0.7192, -0.9757],
         [-0.8247,  2.3340, -0.2798]], requires_grad=True),
 tensor([0.6498, 1.0947, 0.5866], requires_grad=True),
 tensor([ 0.5681, 10.2327,  0.2710], grad_fn=<SqueezeBackward3>),
 tensor(4.0292, grad_fn=<BinaryCrossEntropyWithLogitsBackward>))

In [None]:
print('Gradient function for z =',z.grad_fn)
print('Gradient function for loss =', loss.grad_fn)

Gradient function for z = <SqueezeBackward3 object at 0x7f0eb8f1d590>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x7f0eb8f1da10>


In [None]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.2128, 0.3333, 0.1891],
        [0.2128, 0.3333, 0.1891],
        [0.2128, 0.3333, 0.1891],
        [0.2128, 0.3333, 0.1891],
        [0.2128, 0.3333, 0.1891]])
tensor([1.0639, 1.6666, 0.9456])


In [None]:
z = torch.matmul(x,w)+b
print(z.requires_grad)
with torch.no_grad():
  z = torch.matmul(x,w)+b

print(z.requires_grad)

True
False


In [None]:
z = torch.matmul(x,w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


In [None]:
inp = torch.eye(5, requires_grad=True)
out = (inp+1). pow(2)
out.backward(torch.ones_like(inp), retain_graph = True)
print("first call\n", inp.grad)

out.backward(torch.ones_like(inp), retain_graph = True)
print("second call\n", inp.grad)

inp.grad.zero_()
out.backward(torch.ones_like(inp), retain_graph = True)
print("\n call after zering grad.\n", inp.grad)

out.backward(torch.ones_like(inp), retain_graph = True)
print("second call\n", inp.grad)


out.backward(torch.ones_like(inp), retain_graph = True)
print("third call\n", inp.grad)


first call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])

 call after zering grad.
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])
third call
 tensor([[12.,  6.,  6.,  6.,  6.],
        [ 6., 12.,  6.,  6.,  6.],
        [ 6.,  6., 12.,  6.,  6.],
        [ 6.,  6.,  6., 12.,  6.],
        [ 6.,  6.,  6.,  6., 12.]])


In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets 
from torchvision.transforms import ToTensor, Lambda

training_data = datasets.FashionMNIST(
    root = "data",
    train =True,
    download=True,
    transform=ToTensor()
)
test_data = datasets.FashionMNIST(
    root = "data",
    train =False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size = 64)
test_dataloader = DataLoader(test_data, batch_size = 64)

class NN(nn.Module):
  def __init__(self):
    super(NN, self).__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10),
        nn.ReLU()
    )
  def fwd(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

model = NN()
model

NN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)

In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

loss_fn = nn.CrossEntropyLoss()
loss_fn

CrossEntropyLoss()

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.001
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [None]:
def train_loop (dataloader, model, loss_fn, optimizer):
  size = len (dataloader.dataset)
  for batch, (X,y) in enumerate(dataloader):
    pred = model(X)
    loss = loss_fn (pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch%100 ==0:
      loss, current = loss.item(), batch *len(X)
      print(f"loss : {loss :>7f} [{current:>5d}/{size:>5d}]")

In [None]:
def test_loop(dataloader,model, loss_fn):
  size = len(dataloader.dataset)
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X,y in dataloader:
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

  test_loss /= size
  correct /= size

  print(f"Test Error: \n Accuracy : {(100*correct):>0.1f}%, AvgLoss : {test_loss:>8f} \n")

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


NotImplementedError: ignored

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

learning_rate = 1e-3
batch_size = 64
epochs = 5
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):        
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.299237  [    0/60000]
loss: 2.288951  [ 6400/60000]
loss: 2.280460  [12800/60000]
loss: 2.286466  [19200/60000]
loss: 2.274889  [25600/60000]
loss: 2.260793  [32000/60000]
loss: 2.253695  [38400/60000]
loss: 2.232205  [44800/60000]
loss: 2.245378  [51200/60000]
loss: 2.228816  [57600/60000]
Test Error: 
 Accuracy: 39.3%, Avg loss: 0.035004 

Epoch 2
-------------------------------
loss: 2.220352  [    0/60000]
loss: 2.223371  [ 6400/60000]
loss: 2.188001  [12800/60000]
loss: 2.212621  [19200/60000]
loss: 2.185765  [25600/60000]
loss: 2.162212  [32000/60000]
loss: 2.148218  [38400/60000]
loss: 2.111271  [44800/60000]
loss: 2.144816  [51200/60000]
loss: 2.108546  [57600/60000]
Test Error: 
 Accuracy: 43.3%, Avg loss: 0.033282 

Epoch 3
-------------------------------
loss: 2.102835  [    0/60000]
loss: 2.120657  [ 6400/60000]
loss: 2.044794  [12800/60000]
loss: 2.094587  [19200/60000]
loss: 2.042747  [25600/60000]
loss: 2.008679  [32000/600

In [None]:
import torch
import torch.onnx as onnx
import torchvision.models as models

model = models.vgg16(pretrained=True)
torch.save(model.state_dict(), 'data/model_weights.pth')

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




In [None]:
model = models.vgg16()
model.load_state_dict(torch.load('data/model_weights.pth'))
model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
torch.save(model, 'data/vgg_model.pth')

In [None]:
model = torch.load('data/vgg_model.pth')

In [None]:
input_img = torch.zeros((1,3,244,244))
onnx.export(model, input_img, 'data/model.onnx')

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt



In [None]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [None]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

Shape of X [N, C, H, W]:  torch.Size([64, 1, 28, 28])
Shape of y:  torch.Size([64]) torch.int64


In [None]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [None]:
loss_fn = nn.CrossEntropyLoss()
learning_rate = 1e-3
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 15
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model)
print("Done!")

Epoch 1
-------------------------------
loss: 2.300637  [    0/60000]
loss: 2.294352  [ 6400/60000]
loss: 2.281061  [12800/60000]
loss: 2.281139  [19200/60000]
loss: 2.269872  [25600/60000]
loss: 2.267615  [32000/60000]
loss: 2.256033  [38400/60000]
loss: 2.248545  [44800/60000]
loss: 2.248299  [51200/60000]
loss: 2.229171  [57600/60000]
Test Error: 
 Accuracy: 37.5%, Avg loss: 0.035088 

Epoch 2
-------------------------------
loss: 2.236879  [    0/60000]
loss: 2.233057  [ 6400/60000]
loss: 2.200495  [12800/60000]
loss: 2.210299  [19200/60000]
loss: 2.191704  [25600/60000]
loss: 2.193842  [32000/60000]
loss: 2.167375  [38400/60000]
loss: 2.153785  [44800/60000]
loss: 2.166066  [51200/60000]
loss: 2.123705  [57600/60000]
Test Error: 
 Accuracy: 36.6%, Avg loss: 0.033601 

Epoch 3
-------------------------------
loss: 2.147561  [    0/60000]
loss: 2.140870  [ 6400/60000]
loss: 2.075659  [12800/60000]
loss: 2.098066  [19200/60000]
loss: 2.067461  [25600/60000]
loss: 2.073937  [32000/600

In [None]:
torch.save(model.state_dict(), "data/model.pth")
print("Saved PyTorch Model State to model.pth")


Saved PyTorch Model State to model.pth


In [None]:
model = NeuralNetwork()
model.load_state_dict(torch.load("data/model.pth"))

<All keys matched successfully>

In [None]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
