2021 Takahiro Shinozaki @ Tokyo Tech

Quick introduction of pytorch tensor and gradient calculation

References: 
*   https://pytorch.org/tutorials/



In [None]:
import torch
import torch.nn as nn
from matplotlib import pyplot

In [None]:
if torch.cuda.is_available():
    print('CUDA（GPU）is available')
else:
  print('CUDA（GPU）is not available')

In [None]:
# scalar 
x = torch.tensor(7)
print('x =', x)

y = x**2+1
print('y =', y)

x = torch.tensor(1.0)
y = torch.exp(x)
print('y =', y)

y = torch.cos(x)
print('y =', y)

In [None]:
# data type
x = torch.tensor(1)
print(x.dtype)

x = torch.tensor(1.0)
print(x.dtype)

x = torch.tensor(1.0, dtype=torch.float64)
print(x.dtype)

In [None]:
# vector
x = torch.tensor([1,2,3,4,5], dtype=torch.float32)
print('x =',x)
print('x**0.5 =', x**0.5)
x = torch.linspace(0,10,41)
print('x =', x)
y = torch.cos(x)
pyplot.plot(x, y)
pyplot.xlabel('x')
pyplot.ylabel('cos(x)')
pyplot.grid()
pyplot.show()

In [None]:
# matrix
x = torch.tensor([[1,2],[3,4]])
print(x)
print(x.dtype)

y = x * 2
print(y)

y= x * x
print(y)

y = torch.mm(x, x)
print('y =', y)

x = torch.tensor([[1.0,2.0],[3.0,4.0]])
b=torch.tensor([[10],[10]],dtype=torch.float32)
print(x)
print(x.dtype)
print(b)
print(b.dtype)

y = torch.mm(x, b)
print(y)

y = torch.transpose(x, 0, 1)
print(y)

In [None]:
# function
def f1(x1, x2):
  return 2*x1+x2

a = torch.tensor(1.0)
b = torch.tensor(2.5)
print(f1(a,b))

def f2(x):
  return x**2-3

x = torch.tensor(4.0)
print('x =', x)
print('f2(x) =', f2(x))

x = torch.linspace(-10,10,21)
print('x =', x)
print('f2(x) =', f2(x))
pyplot.plot(x, f2(x))
pyplot.grid()
pyplot.show()



In [None]:
# library to visualize the strucure of computation
! pip install torchviz
from torchviz import make_dot  

Derivative

$\frac{\partial}{\partial x}(x^2+3)|_{x=2} = 2x |_{x=2} = 4$

$\frac{\partial}{\partial x}(x+5w)^2|_{x=2,w=3} = 2(x+5w) |_{x=2,w=3} = 34$

$\frac{\partial}{\partial w}(x+5w)^2|_{x=2,w=3} = 10(x+5w) |_{x=2,w=3} = 170$

In [None]:
# derivative
x = torch.tensor(2.0, requires_grad=True) # requires_grad=True indicates to store gradient
w = torch.tensor(3.0, requires_grad=True)

print('x =', x)
print('w =', w)

y = x ** 2 + 3

# back-propagation (Obtain gradients using the chain rule)
y.backward()
print('x.grad =', x.grad)

def f1(x):
  return x**2

def f2(x, w):
  return x+5*w

x.grad = None # reset the previously obtained gradient
print('x.grad =', x.grad)

y=f1(f2(x, w))
print('y= ', y)

# back-propagation (Obtain gradients using the chain rule)
y.backward()
print('x.grad =', x.grad)
print('w.grad =', w.grad)

# draw a graph of computation of y
make_dot(y)

# cf.
# retain_grad : To retain gradients of intermediate variables


higher order derivative

$f(x,w) = (x+5w)^2$

$f_x(x,w) = \frac{\partial}{\partial x}(x+5w)^2 = 2(x+5w)$

$f_w(x,w) = \frac{\partial}{\partial w}(x+5w)^2 = 10(x+5w)$

$f_{xx} = \frac{\partial f_x}{\partial x} = 2$

$f_{xw} = \frac{\partial f_x}{\partial w} = 10$

$f_{wx} = \frac{\partial f_w}{\partial x} = 10$

$f_{ww} = \frac{\partial f_w}{\partial w} = 50$

In [None]:
# higher order derivative
x = torch.tensor(2.0, requires_grad=True)
w = torch.tensor(3.0, requires_grad=True)

def f1(x):
  return x**2

def f2(x, w):
  return x+5*w

y=f1(f2(x, w))

(fx, fw) = torch.autograd.grad(outputs=y, inputs=(x, w), create_graph=True)
print('fx =', fx)
print('fw =', fw)

fx.backward(retain_graph=True)
print('x.grad =', x.grad)
print('w.grad =', w.grad)

x.grad = None
w.grad = None
fw.backward()
print('x.grad =', x.grad)
print('w.grad =', w.grad)