In [None]:
# Pytorch basics for DL newbies

[![Open In Colab]()

## numpy to tensor

In [18]:
import numpy as np
n_array = np.arange(10).reshape(2,5)
n_array

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [19]:
n_array.ndim
n_array.shape

(2, 5)

In [20]:
import torch
t_array = torch.FloatTensor(n_array)
t_array

tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])

In [21]:
t_array.ndim

2

In [30]:
print(t_array.shape)
print(t_array.ndim)
print(t_array.size())


torch.Size([2, 5])
2
torch.Size([2, 5])


In [34]:
t_array[1:]

tensor([[5., 6., 7., 8., 9.]])

In [37]:
t_array[:2, :3]

tensor([[0., 1., 2.],
        [5., 6., 7.]])

## tensor operations

In [41]:
n1 = np.arange(10).reshape(2,5)
n2 = np.arange(10).reshape(5,2)

In [45]:
t1 = torch.FloatTensor(n1)
t2 = torch.FloatTensor(n2)
print('Shape of t1: ', t1.shape)
print('Shape of t2: ', t2.shape)
print(t1.matmul(t2))

Shape of t1:  torch.Size([2, 5])
Shape of t2:  torch.Size([5, 2])
tensor([[ 60.,  70.],
        [160., 195.]])


In [46]:
n1.dot(n2)

array([[ 60,  70],
       [160, 195]])

In [48]:
t1 * t2

RuntimeError: The size of tensor a (5) must match the size of tensor b (2) at non-singleton dimension 1

In [49]:
n1 = np.arange(4).reshape(2,2)
n2 = np.arange(4).reshape(2,2)
t1 = torch.FloatTensor(n1)
t2 = torch.FloatTensor(n2)

t1 * t2

tensor([[0., 1.],
        [4., 9.]])

In [50]:
t1.mul(t2)

tensor([[0., 1.],
        [4., 9.]])

In [51]:
t1 * 5

tensor([[ 0.,  5.],
        [10., 15.]])

In [52]:
n1 = np.arange(10)
t1 = torch.FloatTensor(n1)
t1.mean()

tensor(4.5000)

In [57]:
n1 = np.arange(10).reshape(5,2)
t1 = torch.FloatTensor(n1)
t1.mean(dim=0)

tensor([4., 5.])

In [59]:
t1.mean(dim=1)

tensor([0.5000, 2.5000, 4.5000, 6.5000, 8.5000])

In [60]:
n1 = np.arange(10)
t1 = torch.FloatTensor(n1)
t1.view(-1, 2)

tensor([[0., 1.],
        [2., 3.],
        [4., 5.],
        [6., 7.],
        [8., 9.]])

In [62]:
n1.reshape(-1, 2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [66]:
t1.view(-1, 10).shape

torch.Size([1, 10])

In [68]:
t1.view(-1, 10).squeeze()

tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [117]:
t1.view(-1, 10).squeeze().unsqueeze(dim=0)

tensor([[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])

## tensor operations for ML/DL formula

In [119]:
import torch
import torch.nn.functional as F

In [124]:
tensor = torch.FloatTensor([0.5, 0.7, 0.1])
h_tensor = F.softmax(tensor, dim=0)
h_tensor

tensor([0.3458, 0.4224, 0.2318])

In [138]:
y = torch.randint(5, (10,5))
y_label = y.argmax(dim=1)

In [140]:
y_label

tensor([1, 2, 2, 0, 0, 1, 1, 1, 4, 2])

In [141]:
torch.nn.functional.one_hot(y_label)


tensor([[0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 1, 0, 0],
        [1, 0, 0, 0, 0],
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 0, 1],
        [0, 0, 1, 0, 0]])

## torch autogard

$$
y = w^2 \\ 
z = 2*y + 5 \\
z = 2*w^2 + 5 
$$

In [83]:
w = torch.tensor(2.0, requires_grad=True)
y = w**2
z = 2*y + 5

In [84]:
z.backward()

In [85]:
w.grad

tensor(8.)

$$ Q = 3a^3 - b^2  $$

In [103]:
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [104]:
Q = 3*a**3 - b**2


In [108]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time.

$$ \frac{\partial Q}{\partial a} = 9a^2 $$  

$$ \frac{\partial Q}{\partial b} = -2b $$

In [106]:
a.grad

tensor([ 72., 162.])

In [107]:
b.grad

tensor([-24., -16.])

In [None]:
## AutoGrad for Linear Regression
https://towardsdatascience.com/linear-regression-with-pytorch-eb6dedead817

$$ y=2x+1 $$

In [109]:
import numpy as np
# create dummy data for training
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)

y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)

In [112]:
import torch
from torch.autograd import Variable
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        out = self.linear(x)
        return out

In [113]:
inputDim = 1        # takes variable 'x' 
outputDim = 1       # takes variable 'y'
learningRate = 0.01 
epochs = 100

model = linearRegression(inputDim, outputDim)
##### For GPU #######
if torch.cuda.is_available():
    model.cuda()

In [114]:
criterion = torch.nn.MSELoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

In [115]:
for epoch in range(epochs):
    # Converting inputs and labels to Variable
    if torch.cuda.is_available():
        inputs = Variable(torch.from_numpy(x_train).cuda())
        labels = Variable(torch.from_numpy(y_train).cuda())
    else:
        inputs = Variable(torch.from_numpy(x_train))
        labels = Variable(torch.from_numpy(y_train))

    # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
    optimizer.zero_grad()

    # get output from the model, given the inputs
    outputs = model(inputs)

    # get loss for the predicted output
    loss = criterion(outputs, labels)
    print(loss)
    # get gradients w.r.t to parameters
    loss.backward()

    # update parameters
    optimizer.step()

    print('epoch {}, loss {}'.format(epoch, loss.item()))

tensor(253.5460, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 0, loss 253.54598999023438
tensor(20.8421, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 1, loss 20.84214210510254
tensor(1.8594, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 2, loss 1.8594218492507935
tensor(0.3093, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 3, loss 0.3092811107635498
tensor(0.1811, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 4, loss 0.18108133971691132
tensor(0.1689, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 5, loss 0.16888414323329926
tensor(0.1662, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 6, loss 0.16616836190223694
tensor(0.1642, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 7, loss 0.16424500942230225
tensor(0.1624, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 8, loss 0.162405326962471
tensor(0.1606, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 9, loss 0.1605914682149887
tensor(0.1588, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 10, loss 0

In [116]:
with torch.no_grad(): # we don't need gradients in the testing phase
    if torch.cuda.is_available():
        predicted = model(Variable(torch.from_numpy(x_train).cuda())).cpu().data.numpy()
    else:
        predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()
    print(predicted)


[[ 0.5502644]
 [ 2.6150303]
 [ 4.679796 ]
 [ 6.744562 ]
 [ 8.809328 ]
 [10.874094 ]
 [12.93886  ]
 [15.003626 ]
 [17.068392 ]
 [19.133158 ]
 [21.197924 ]]
