<a href="https://colab.research.google.com/github/vaishak-krishnan/PyTorch/blob/main/PytorchIntro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Outline
* PyTorch
* What are tensors
* Initialising, slicing, reshaping tensors
* Numpy and PyTorch interfacing
* GPU support for PyTorch + Enabling GPUs on Google Colab
* Speed comparisons, Numpy -- PyTorch -- PyTorch on GPU
* Autodiff concepts and application
* Writing a basic learning loop using autograd
* Exercises

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

## Initialise tensors

In [None]:
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.6563, 0.7902],
        [0.3246, 0.2367],
        [0.9541, 0.0618]])


In [None]:
x = torch.empty(3, 2)
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[2.2956e-35, 0.0000e+00],
        [3.3631e-44, 0.0000e+00],
        [       nan, 0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [None]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [None]:
x = torch.tensor([[1, 2],
                 [3, 4],
                 [5, 6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


## Slicing tensors

In [None]:
print(x.size())
print(x[:, 1])
print(x[0, :])

torch.Size([3, 2])
tensor([2, 4, 6])
tensor([1, 2])


In [None]:
y = x[1, 1]
print(y)
print(y.item())

tensor(4)
4


## Reshaping tensors

In [None]:
print(x)
y = x.view(2, 3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [None]:
y = x.view(6,-1)
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


## Simple Tensor Operations

In [None]:
x = torch.ones([3, 2])
y = torch.ones([3, 2])
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [None]:
z = y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [None]:
z = y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


## Numpy <> PyTorch

In [None]:
print(x)
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [None]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)

[ 0.45556043 -1.36109539 -1.34309627 -1.73442186  2.14300332]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([ 0.4556, -1.3611, -1.3431, -1.7344,  2.1430], dtype=torch.float64)


In [None]:
np.add(a, 1, out=a)
print(a)
print(a_pt)

[2.45556043 0.63890461 0.65690373 0.26557814 4.14300332]
tensor([2.4556, 0.6389, 0.6569, 0.2656, 4.1430], dtype=torch.float64)


In [None]:
%%time
for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a,b)

CPU times: user 163 ms, sys: 108 ms, total: 271 ms
Wall time: 150 ms


In [None]:
%%time
for i in range(100):
  a = torch.randn([100, 100])
  b = torch.randn([100, 100])
  c = torch.matmul(a,b)

CPU times: user 34 ms, sys: 871 µs, total: 34.8 ms
Wall time: 36.4 ms


In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  c = a + b

CPU times: user 1min 36s, sys: 1.5 s, total: 1min 38s
Wall time: 1min 38s


In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000])
  b = torch.randn([10000, 10000])
  c = a + b

CPU times: user 22.2 s, sys: 9.46 ms, total: 22.2 s
Wall time: 22.2 s


## CUDA support

In [None]:
print(torch.cuda.device_count())

1


In [None]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f1ee9dbb4a8>
Tesla P100-PCIE-16GB


In [None]:
cuda0 = torch.device('cuda:0')

In [None]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')


In [None]:
print(a)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')


In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)

CPU times: user 1min 22s, sys: 548 ms, total: 1min 22s
Wall time: 1min 22s


In [None]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  b_cpu.add_(a_cpu)

CPU times: user 23.3 s, sys: 173 ms, total: 23.5 s
Wall time: 23.5 s


In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  b.add_(a)

CPU times: user 3.15 ms, sys: 1.99 ms, total: 5.14 ms
Wall time: 9 ms


In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)

In [None]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  torch.matmul(a_cpu, b_cpu)

In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  torch.matmul(a, b)

## Autodiff

In [None]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [None]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [None]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [None]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [None]:
t.backward()

In [None]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$


At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [None]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [None]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


$\frac{\partial{s}}{\partial{x}} = \frac{\partial{s}}{\partial{r}} \cdot \frac{\partial{r}}{\partial{x}}$

For the above code $a$ represents $\frac{\partial{s}}{\partial{r}}$ and then $x.grad$ gives directly $\frac{\partial{s}}{\partial{x}}$



## Autodiff example that looks like what we have been doing

In [None]:
x = torch.randn([20, 1], requires_grad=True)
y = 3*x - 2

In [None]:
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [None]:
print(loss)

tensor(234.8742, grad_fn=<SumBackward0>)


In [None]:
loss.backward()

In [None]:
print(w.grad, b.grad)

tensor([-74.1205]) tensor([107.1691])


## Do it in a loop

In [None]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for i in range(100):

  x = torch.randn([20, 1])
  y = 3*x - 2

  y_hat = w*x + b
  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad #w = w - eta * delta_w
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(w.item(), b.item())


1.0 1.0
1.3202018737792969 0.043064236640930176
1.7389421463012695 -0.727949321269989
2.034162998199463 -1.0832536220550537
2.7286624908447266 -1.5322452783584595
2.8162684440612793 -1.717452883720398
2.9011147022247314 -1.8330681324005127
2.9513444900512695 -1.8983395099639893
2.975252151489258 -1.93825364112854
2.9765141010284424 -1.9613136053085327
2.9839746952056885 -1.976440668106079
2.9925506114959717 -1.9887511730194092
2.9942610263824463 -1.9929925203323364
2.995135545730591 -1.995285153388977
2.997545003890991 -1.9970301389694214
2.998394250869751 -1.9982649087905884
2.9987854957580566 -1.9989012479782104
2.9994699954986572 -1.9994953870773315
2.999722957611084 -1.999688982963562
2.999849796295166 -1.9998112916946411
2.999908685684204 -1.9999010562896729
2.99994158744812 -1.9999420642852783
2.9999709129333496 -1.9999589920043945
2.9999866485595703 -1.9999765157699585
2.9999911785125732 -1.9999867677688599
2.999992847442627 -1.9999921321868896
2.999994993209839 -1.9999949932098

## Do it for a large problem

In [None]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):

  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2

  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

#   print(torch.mean(w).item(), b.item())


In [None]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.ones([1], requires_grad=True, device=cuda0)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):

  x = torch.randn([N], device=cuda0)
  y = torch.dot(3*torch.ones([N], device=cuda0), x) - 2

  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  #print(torch.mean(w).item(), b.item())
