#### 使用 numpy 实现两层神经网络 
1. 一个全连接 Relu神经网络，一个 隐藏层， 没有bias.   用来从x预测y, 使用L2  Loss; 两层神经网络
    - $h = W_1X + b_1$
    - $a = max(0, h)$
    - $y_{hat} = W_2a+b_2$
2. 这一实现 完全使用 numpy 来计算前向神经网络，loss 和 反向传播
    - forward pass
    - loss
    - backward pass
3. numpy ndarray 是一个简单的n维array, 它不知道任何关于深度学习，梯度，以及计算图的知识，它只是一种用来计算数学运算的数据结构

In [4]:
import numpy as np
# 64个输入，1000维，中间层100维， 输出10维
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建一些训练数据
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

# 这个神经网络的结果是 将 1000维的输入向量，转为10维的向量
learning_rate = 1e-6
for t in range(500):
    # forward pass
    h = x.dot(w1) # N*H
    h_relu = np.maximum(h, 0) # h的relu
    y_pred = h_relu.dot(w2)  # N*D_out
    
    # comput loss   假设使用 MSELoss
    loss = np.square(y_pred-y).sum()
    print(t, loss)
    
    # backward pass
    # compute the gradient  这里一步步求导
    grad_y_pred = 2.0*(y_pred-y)  # 1. 先求 y_pred的导数
    grad_w2 = h_relu.T.dot(grad_y_pred)  # 2. 求grad_w2求导
    grad_h_relu = grad_y_pred.dot(w2.T)  # 3. 对 h_relu 求导
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    # update weights of w1 and w2
    w1 -= learning_rate*grad_w1
    w2 -= learning_rate*grad_w2

0 24251920.684185788
1 19832433.20514761
2 18772078.19636458
3 18767757.187690064
4 18250146.067622714
5 16430528.438207828
6 13289310.671567539
7 9725466.109574726
8 6549988.75517132
9 4238674.811533952
10 2726635.8336380883
11 1804983.1674783377
12 1250985.7143057846
13 915300.570883099
14 704059.0981682662
15 564574.7265305863
16 467161.77927604265
17 395442.3164665727
18 340106.67704363796
19 295851.3193919674
20 259493.1754885948
21 229011.94140357722
22 203112.90208505327
23 180847.44728414377
24 161568.43953849006
25 144774.8444829986
26 130076.78270036945
27 117154.76410060977
28 105740.8346956994
29 95627.57604356692
30 86648.03386164829
31 78646.49088693399
32 71505.27228981076
33 65116.1325164584
34 59387.92722967838
35 54241.51084472261
36 49606.51587317536
37 45426.697034839905
38 41649.25546258849
39 38231.29666521035
40 35133.18666051523
41 32322.744634271945
42 29769.017228177516
43 27444.891185884466
44 25326.903209089214
45 23395.181841681504
46 21629.59610253977
47 2

373 0.0036217771648392235
374 0.003472610421406816
375 0.0033295095258596198
376 0.0031923402035721913
377 0.0030608637000080633
378 0.0029348054044384
379 0.0028139871925420288
380 0.0026981082282520946
381 0.002586992287269249
382 0.0024804548107739167
383 0.0023783538539427504
384 0.002280518671794763
385 0.0021866458650715856
386 0.002096676743009701
387 0.0020104333608119843
388 0.0019276882147429388
389 0.0018484012520330227
390 0.0017723510418668651
391 0.0016994354602989704
392 0.0016295214346488862
393 0.0015624749202976002
394 0.0014982501709152078
395 0.0014366579077294821
396 0.0013775735498605998
397 0.0013209183057804473
398 0.0012666121078149643
399 0.001214554886824524
400 0.001164616010506487
401 0.0011167479040527905
402 0.0010708392844538698
403 0.0010268231266976399
404 0.0009846443348932956
405 0.000944191400754601
406 0.000905386425077874
407 0.0008681901048582133
408 0.0008325165657735395
409 0.0007983214052152682
410 0.0007655347038672761
411 0.00073408278835798

In [6]:
# 使用 pytporch实现
import torch
# 64个输入，1000维，中间层100维， 输出10维
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建一些训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

# 这个神经网络的结果是 将 1000维的输入向量，转为10维的向量
learning_rate = 1e-6
for t in range(500):
    # forward pass
    h = x.mm(w1) # N*H
    h_relu = h.clamp(min=0) # 用一个夹子夹一下
    y_pred = h_relu.mm(w2)  # N*D_out
    
    # comput loss   假设使用 MSELoss
    loss = (y_pred-y).pow(2).sum().item()
    print(t, loss)
    
    # backward pass
    # compute the gradient  这里一步步求导
    grad_y_pred = 2.0*(y_pred-y)  # 1. 先求 y_pred的导数
    grad_w2 = h_relu.t().mm(grad_y_pred)  # 2. 求grad_w2求导
    grad_h_relu = grad_y_pred.mm(w2.t())  # 3. 对 h_relu 求导
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0
    grad_w1 = x.t().mm(grad_h)
    
    # update weights of w1 and w2
    w1 -= learning_rate*grad_w1
    w2 -= learning_rate*grad_w2

0 29223368.0
1 26346074.0
2 27946060.0
3 29928930.0
4 28981932.0
5 23560694.0
6 15900366.0
7 9246546.0
8 5068397.0
9 2866593.25
10 1782995.5
11 1238665.625
12 942377.875
13 762039.625
14 639333.9375
15 548156.4375
16 476208.625
17 417354.125
18 368051.90625
19 326164.59375
20 290221.6875
21 259133.625
22 232112.90625
23 208694.359375
24 188161.75
25 170078.65625
26 154104.1875
27 139931.875
28 127321.6875
29 116069.515625
30 106005.9375
31 97006.3984375
32 88918.1171875
33 81628.1484375
34 75042.3359375
35 69086.3359375
36 63686.296875
37 58777.01953125
38 54307.55078125
39 50232.328125
40 46511.2109375
41 43114.27734375
42 40013.85546875
43 37166.87109375
44 34552.71484375
45 32155.880859375
46 29950.162109375
47 27916.46484375
48 26039.44921875
49 24305.42578125
50 22701.8984375
51 21218.486328125
52 19843.46875
53 18567.935546875
54 17383.96875
55 16285.802734375
56 15268.794921875
57 14321.896484375
58 13442.0625
59 12623.0078125
60 11861.046875
61 11150.1240234375
62 10486.2509765

In [7]:
# pytorch优点：可以自动求导，无需自己设置
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

y = w*x+b  # 1个函数 y=2*1+3

y.backward()

print(x.grad)
print(w.grad)
print(b.grad)

tensor(2.)
tensor(1.)
tensor(1.)


In [15]:
# 使用 pytporch实现
import torch
# 64个输入，1000维，中间层100维， 输出10维
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建一些训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H, requires_grad=True)
w2 = torch.randn(H, D_out,  requires_grad=True)

# 这个神经网络的结果是 将 1000维的输入向量，转为10维的向量
learning_rate = 1e-6
for t in range(500):
    # forward pass
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    
    # comput loss   假设使用 MSELoss
    loss = (y_pred-y).pow(2).sum()
    print(t, loss.item())
    
    # backward pass
    # compute the gradient  这里一步步求导
    loss.backward()
    
    # update weights of w1 and w2
    w1 = w1 - learning_rate * w1.grad
    w2 = w2 - learning_rate * w2.grad

0 33792288.0
1 31934004.0




TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'