In [8]:
import torch
import numpy as np

In [4]:
x = torch.rand(4, 4)
x

tensor([[0.1400, 0.5815, 0.7113, 0.8535],
        [0.1408, 0.8632, 0.8835, 0.2878],
        [0.8213, 0.6885, 0.5938, 0.9292],
        [0.5914, 0.2575, 0.3467, 0.4289]])

### reshape

In [6]:
x.view(16)

tensor([0.1400, 0.5815, 0.7113, 0.8535, 0.1408, 0.8632, 0.8835, 0.2878, 0.8213,
        0.6885, 0.5938, 0.9292, 0.5914, 0.2575, 0.3467, 0.4289])

In [7]:
x.view(-1, 8)

tensor([[0.1400, 0.5815, 0.7113, 0.8535, 0.1408, 0.8632, 0.8835, 0.2878],
        [0.8213, 0.6885, 0.5938, 0.9292, 0.5914, 0.2575, 0.3467, 0.4289]])

### tensor <=> numpy

In [9]:
x.numpy() # this operation can only happen on CPU

array([[0.13999838, 0.58145034, 0.7112933 , 0.8535212 ],
       [0.14078647, 0.86318314, 0.88354325, 0.28784817],
       [0.8213417 , 0.68854946, 0.5937836 , 0.9292241 ],
       [0.591387  , 0.25753373, 0.3467251 , 0.42885095]], dtype=float32)

In [11]:
a = np.ones(5)
b = torch.from_numpy(a)
a+=1
print(a), print(b) # points to the same object in memory

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


(None, None)

## autograd

In [13]:
x = torch.randn(3, requires_grad=True)
y = x+2
print(y)
z = y*y*2
z = z.mean()
print(z)
z.backward() 
# ^ calculate the product of jacobian matrix and gradient vector, 
# z is a scalar value so doesn't need to input a vector into the backward fucntion
print(x.grad)

tensor([1.8210, 1.1582, 2.0674], grad_fn=<AddBackward0>)
tensor(5.9543, grad_fn=<MeanBackward0>)
tensor([2.4280, 1.5442, 2.7565])


### 3 ways to stop tracking gradients

In [14]:
x = torch.randn(3, requires_grad=True)
x.requires_grad_(False)
x

tensor([ 1.6567, -0.4944,  0.4248])

In [16]:
x = torch.randn(3, requires_grad=True)
x.detach() # this creates a new vector

tensor([1.7221, 1.0317, 0.2289])

In [17]:
x = torch.randn(3, requires_grad=True)
with torch.no_grad():
    y = x + 2
    print(y)

tensor([0.8453, 2.5635, 2.2527])


### gradients must be emptied out before next iteration

In [18]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() # trailing _ in pytorch fucntion means modify variable in memory

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


## backpropogation

In [19]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# forward pass
y_hat = w * x
loss = (y_hat - y)**2

print(loss)

loss.backward()

print(w.grad)

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


### CrossEntropy

nn.CrossEntropyLoss already implements nn.LogSoftmax + nn.NLLLoss, so no softmax in last layer, Y should have class labels, not one-hot, Y_pred should have raw scores (logits), no softmax

for binary classification, use sigmoid as the last layer in the NN and BCE loss

In [20]:
import torch.nn as nn

loss = nn.CrossEntropyLoss()

Y = torch.tensor([0])

Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[.5, 2.0, .3]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

0.4170299470424652
1.840616226196289


In [21]:
torch.max(Y_pred_good, 1)

torch.return_types.max(
values=tensor([2.]),
indices=tensor([0]))

## activation function

    - sigmoid, for binary classification, [0, 1]
    - tanh (scaled sigmoid), [-1, 1]
    - relu (most popular), [0, x]
    - leaky relu, dimishes vanishing gradients issue
    - softmax, for multi-classification, [0, 1]

## transfer learning

In [22]:
from torchvision import models

In [23]:
# in this case, fine-tune on the last layer
model = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




In [24]:
num_features = model.fc.in_features
print(num_features)

512


In [25]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [33]:
# freeze parameters
for param in model.parameters():
    param.requires_grad = False

In [34]:
model.fc = nn.Linear(num_features, 2)

In [35]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  