# lesson 18 激活函数及其梯度

In [1]:
import numpy as np 
import torch 

## torch.sigmoid 

In [2]:
a = torch.linspace(-100,100,10)
a

tensor([-100.0000,  -77.7778,  -55.5556,  -33.3333,  -11.1111,   11.1111,
          33.3333,   55.5555,   77.7778,  100.0000])

In [3]:
torch.sigmoid(a)

tensor([0.0000e+00, 1.6655e-34, 7.4564e-25, 3.3382e-15, 1.4945e-05, 9.9999e-01,
        1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00])

## torch.tanh 

In [4]:
a = torch.linspace(-1,1,10)
a

tensor([-1.0000, -0.7778, -0.5556, -0.3333, -0.1111,  0.1111,  0.3333,  0.5556,
         0.7778,  1.0000])

In [5]:
torch.tanh(a)

tensor([-0.7616, -0.6514, -0.5047, -0.3215, -0.1107,  0.1107,  0.3215,  0.5047,
         0.6514,  0.7616])

## F.relu 

In [6]:
from torch.nn import functional as F

In [7]:
a = torch.linspace(-1,1,10)
a

tensor([-1.0000, -0.7778, -0.5556, -0.3333, -0.1111,  0.1111,  0.3333,  0.5556,
         0.7778,  1.0000])

In [8]:
torch.relu(a)

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778,
        1.0000])

In [9]:
F.relu(a)

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778,
        1.0000])

## loss 及其梯度

In [12]:
import torch.nn.functional as F

In [11]:
x = torch.ones(1)
w = torch.full([1],2)
w, x

(tensor([2.]), tensor([1.]))

In [14]:
mse = F.mse_loss(torch.ones(1),x*w)
mse

tensor(1.)

In [15]:
torch.autograd.grad(mse, [w])

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [16]:
w.requires_grad_()

tensor([2.], requires_grad=True)

In [17]:
torch.autograd.grad(mse, [w])

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [29]:
# 只有 w 梯度可导是不行的 
# 需要 mse 计算的时候 也可导 

mse = F.mse_loss(torch.ones(1), x*w)
mse

tensor(1., grad_fn=<MeanBackward0>)

In [24]:
torch.autograd.grad(mse, [w])

(tensor([2.]),)

In [30]:
mse.backward(retain_graph=True)

In [31]:
w.grad

tensor([2.])

In [32]:
w.norm()  # norm backward 

tensor(2., grad_fn=<NormBackward0>)

### gradient API

- torch.autograd.grad(loss, [w1, w2,...])
- loss.backward()
        
    w1.grad()
    w2.grad()

## F.softmax

In [33]:
a = torch.rand(3)
a.requires_grad_()

tensor([0.8865, 0.8730, 0.6643], requires_grad=True)

In [34]:
p = F.softmax(a, dim=0)
p

tensor([0.3588, 0.3540, 0.2873], grad_fn=<SoftmaxBackward>)

In [35]:
p.backward()  
"""
如果之前调用过一次 p.backward()
使用过一次以后 把梯度信息给 w.grad，
并且清除了 梯度信息 即 p.backward() 不能再第二次调用
可以通过 加参数 retain_graph = True 
保留梯度信息 
"""

RuntimeError: grad can be implicitly created only for scalar outputs

In [37]:
p

tensor([0.3588, 0.3540, 0.2873], grad_fn=<SoftmaxBackward>)

In [38]:
p[1]

tensor(0.3540, grad_fn=<SelectBackward>)

In [39]:
[a]

[tensor([0.8865, 0.8730, 0.6643], requires_grad=True)]

In [40]:
# loss 一定是 [1,1] 或者 scalar 一个数字

torch.autograd.grad(p[1], [a], retain_graph=True)

(tensor([-0.1270,  0.2287, -0.1017]),)

In [41]:
torch.autograd.grad(p[2], [a])

(tensor([-0.1031, -0.1017,  0.2047]),)