### Understand Autograd

In [15]:
# 1. use numpy to finish a computation
import numpy as np

np.random.seed(0)

N, D = 3, 4

# create numpy array
x = np.random.randn(N, D) # standard normal distribution
y = np.random.randn(N, D)
z = np.random.randn(N, D)

# c = (x*y+z)*1
a = x * y
b = a + z
c = np.sum(b)

In [16]:
# 2. use tensor to finish a computation
import torch as t

tx = t.from_numpy(x)
# or you can use t.randn(N, D, requires_grad=True) to create tensor
ty = t.from_numpy(y)
tz = t.from_numpy(z)

tx.requires_grad = True # if false then cannot calculate gradient

ta = tx * ty
tb = ta + tz
tc = t.sum(tb)

tc.item() # get value from scalar object

6.7170085378000675

In [17]:
# 3. use numpy to calculate gradient
grad_c = 1.0
grad_b = grad_c * np.ones((N, D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x

print(grad_x)

[[ 0.76103773  0.12167502  0.44386323  0.33367433]
 [ 1.49407907 -0.20515826  0.3130677  -0.85409574]
 [-2.55298982  0.6536186   0.8644362  -0.74216502]]


In [18]:
# 4. use autograd in pytorch to calculate gradient
tc.backward()
print(tx.grad)
print(ty.grad) # none because yt.requires_grad = False

tensor([[ 0.7610,  0.1217,  0.4439,  0.3337],
        [ 1.4941, -0.2052,  0.3131, -0.8541],
        [-2.5530,  0.6536,  0.8644, -0.7422]], dtype=torch.float64)
None


In [19]:
# 5. about tensor
t1 = t.tensor(3.0, requires_grad = True)

print(t1.requires_grad)
print(t1.data)
print(t1.shape)
print(t1.device)
print(t1.grad_fn)
print(t1.tolist())

True
tensor(3.)
torch.Size([])
cpu
None
3.0


In [20]:
print(tx)
# add one dimension by [None]
print(tx[None])

tensor([[ 1.7641,  0.4002,  0.9787,  2.2409],
        [ 1.8676, -0.9773,  0.9501, -0.1514],
        [-0.1032,  0.4106,  0.1440,  1.4543]], dtype=torch.float64,
       requires_grad=True)
tensor([[[ 1.7641,  0.4002,  0.9787,  2.2409],
         [ 1.8676, -0.9773,  0.9501, -0.1514],
         [-0.1032,  0.4106,  0.1440,  1.4543]]], dtype=torch.float64,
       grad_fn=<UnsqueezeBackward0>)


In [21]:
img_t = t.ones(3, 5, 5)

In [22]:
print(img_t.sum(-2))
print(img_t.mean(-3))

tensor([[5., 5., 5., 5., 5.],
        [5., 5., 5., 5., 5.],
        [5., 5., 5., 5., 5.]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])


In [23]:
weights = t.tensor([0.2, 0.7, 0.1])
print(weights)
# change dimension
unsqueezed_weights = weights.unsqueeze(-1).unsqueeze_(-1)
print(unsqueezed_weights)

tensor([0.2000, 0.7000, 0.1000])
tensor([[[0.2000]],

        [[0.7000]],

        [[0.1000]]])


In [24]:
unsqueezed_weights.shape, img_t.shape

(torch.Size([3, 1, 1]), torch.Size([3, 5, 5]))

In [25]:
img_t* unsqueezed_weights

tensor([[[0.2000, 0.2000, 0.2000, 0.2000, 0.2000],
         [0.2000, 0.2000, 0.2000, 0.2000, 0.2000],
         [0.2000, 0.2000, 0.2000, 0.2000, 0.2000],
         [0.2000, 0.2000, 0.2000, 0.2000, 0.2000],
         [0.2000, 0.2000, 0.2000, 0.2000, 0.2000]],

        [[0.7000, 0.7000, 0.7000, 0.7000, 0.7000],
         [0.7000, 0.7000, 0.7000, 0.7000, 0.7000],
         [0.7000, 0.7000, 0.7000, 0.7000, 0.7000],
         [0.7000, 0.7000, 0.7000, 0.7000, 0.7000],
         [0.7000, 0.7000, 0.7000, 0.7000, 0.7000]],

        [[0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
         [0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
         [0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
         [0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
         [0.1000, 0.1000, 0.1000, 0.1000, 0.1000]]])

In [28]:
import imageio

img_arr = imageio.imread('./data/cat.jpg') # saved in numpy
img_arr.shape # (Height, Width, Channel) 

(853, 1280, 3)

In [29]:
img = t.from_numpy(img_arr)
# change to (Channel, Height, Width)
out = img.permute(2, 0, 1)
out.shape

torch.Size([3, 853, 1280])

### Use auto_gradient for gradient decent

In [30]:
# Data simulation
# y=ax+b, a=5, b=10
x = np.array([1.2, 4, 3.5, 17.3, 2.5, 34.9, 44.2, 14.2, 5, 9.1])
# Simulate gaussian error with mean 0 and variance 2
error = np.random.normal(0, 2, 10)
a = 5
b = 10
y = a*x + b + error

In [48]:
x_t = t.from_numpy(x)
y_t = t.from_numpy(y)
params = t.tensor([1.0, 0.0], requires_grad=True)
epoch = 5000
# first try 1e-2, and loss tends to be inf, so we use a smaller learning rate
learning = 1e-3

# Define Model
def model(in_x, k, b):
    return in_x * k + b

# Define loss function
def loss_fc(fit_y, real_y):
    return ((fit_y - real_y)**2).mean()

def training_loop(epoch, learning_rate, in_x, params, real_y):
    for it in range(1, epoch+1):
        # auto_grad is accumulative, so we need to set it to 0 at each iteration
        if params.grad is not None:
            params.grad.zero_()
            
        fit_y = model(in_x, *params)
        loss = loss_fc(fit_y, real_y)
        loss.backward()
        
        # update params without changing its options
        # without this with block, params won't be updated
        # this step could be done by optim (below)
        with t.no_grad():
            params -= learning_rate * params.grad
        
        if it % 500 == 0:
            print('Epoch: %d, loss: %f' % (it, float(loss)))
    return params

training_loop(epoch, learning, x_t, params, y_t)

Epoch: 500, loss: 25.134116
Epoch: 1000, loss: 11.383090
Epoch: 1500, loss: 6.480441
Epoch: 2000, loss: 4.732525
Epoch: 2500, loss: 4.109334
Epoch: 3000, loss: 3.887142
Epoch: 3500, loss: 3.807925
Epoch: 4000, loss: 3.779682
Epoch: 4500, loss: 3.769612
Epoch: 5000, loss: 3.766022


tensor([ 4.9135, 10.8572], requires_grad=True)

In [51]:
# Use optimizer in pyTorch
import torch.optim as optim

optimizer = optim.SGD([params], lr=learning)

def training_loop(epoch, optimizer, params, in_x, real_y):
    for it in range(1, epoch+1):
        fit_y = model(in_x, *params)
        loss = loss_fc(fit_y, real_y)
        
        # set grad to zero if it is not
        # this could be put anywhere before the backward
        optimizer.zero_grad()
        # backward
        loss.backward()
        # update
        optimizer.step()
        
        if it % 500 == 0:
            print('Epoch: %d, loss: %f' % (it, float(loss)))
    return params

training_loop(epoch, optimizer, params, x_t, y_t)

Epoch: 500, loss: 3.764033
Epoch: 1000, loss: 3.764033
Epoch: 1500, loss: 3.764033
Epoch: 2000, loss: 3.764033
Epoch: 2500, loss: 3.764033
Epoch: 3000, loss: 3.764033
Epoch: 3500, loss: 3.764033
Epoch: 4000, loss: 3.764033
Epoch: 4500, loss: 3.764033
Epoch: 5000, loss: 3.764033


tensor([ 4.9113, 10.9188], requires_grad=True)

In [57]:
# split data into train and validation set
n_validation = int(0.2 * x_t.shape[0])

# return a permutation of input data
shuffled_indices = t.randperm(x_t.shape[0])

# get train index and validation index
train_indices = shuffled_indices[:-n_validation]
val_indices = shuffled_indices[-n_validation:]

# train data and validation data
train_x = x_t[train_indices]
va_x = x_t[val_indices]

In [56]:
train_indices

tensor([7, 0, 1, 4, 9, 6, 8, 5])

In [79]:
# writing a loop using nn module
import torch.nn as nn

linear_model = nn.Linear(1,1) # 1: input size, 1: output size
optimizer = optim.SGD(linear_model.parameters(), lr=1e-3)

# we need to transform input and output into B*Nin
# For linear regression, B is the number of rows, Nin is the features in the column

train_x = x_t.unsqueeze(1)[train_indices].float() # change to float from double
train_y = y_t.unsqueeze(1)[train_indices].float() # float64 is double in pytorch
va_x = x_t.unsqueeze(1)[val_indices].float()
va_y = y_t.unsqueeze(1)[val_indices].float()

def train_loop(epoches, opt, model, loss_fn, trainx, trainy, valx, valy):
    for epoch in range(1, epoches+1):
        model_train_y = model(trainx)
        loss_train = loss_fn(model_train_y, trainy)
        
        validation_y = model(valx)
        loss_validation = loss_fn(validation_y, valy)
        
        opt.zero_grad()
        loss_train.backward()
        opt.step()
        if epoch % 500 == 0:
            print('Epoch: %d, train_loss: %f, validation_loss: %f' % (epoch, float(loss_train), float(loss_validation)))
    return (model.weight, model.bias)

train_loop(5000,
          optimizer,
          linear_model,
          nn.MSELoss(), # dont forget ()
          train_x,
          train_y,
          va_x,
          va_y)

Epoch: 500, train_loss: 27.758680, validation_loss: 13.063032
Epoch: 1000, train_loss: 12.295135, validation_loss: 3.857742
Epoch: 1500, train_loss: 7.044099, validation_loss: 1.464773
Epoch: 2000, train_loss: 5.260950, validation_loss: 1.079283
Epoch: 2500, train_loss: 4.655437, validation_loss: 1.197261
Epoch: 3000, train_loss: 4.449834, validation_loss: 1.382395
Epoch: 3500, train_loss: 4.380020, validation_loss: 1.529747
Epoch: 4000, train_loss: 4.356301, validation_loss: 1.629048
Epoch: 4500, train_loss: 4.348251, validation_loss: 1.691455
Epoch: 5000, train_loss: 4.345523, validation_loss: 1.729375


(Parameter containing:
 tensor([[4.9155]], requires_grad=True),
 Parameter containing:
 tensor([11.0975], requires_grad=True))