In [1]:
import torch

自定义nn Modules

In [14]:
BATCH, D_IN, D_OUT, H = 64, 1000, 2, 500

x = torch.randn(BATCH, D_IN)
y = torch.randn(BATCH, D_OUT)
w1 = torch.randn(H_IN, H)
w2 = torch.randn(D, D_OUT)

class TowLayerNet(torch.nn.Module):
    def __init__(self, D_IN, D_OUT, H):
        super(TowLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_IN, H, bias = True)
        self.linear2 = torch.nn.Linear(H, D_OUT, bias = True)
        
    def forward(self, x):
        y_pred = self.linear2(self.linear1(x).clamp(min=0))
        return y_pred    

learning_rate = 1e-4
model = TowLayerNet(D_IN, D_OUT, H)
loss_fn = torch.nn.MSELoss(reduction='sum')
optim = torch.optim.Adam(model.parameters(), lr = learning_rate)

for iter in range(500):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    print('iter: {}, loss: {}'.format(iter, loss))
    model.zero_grad()
    loss.backward()
    optim.step()

numpy实现

In [35]:
import numpy as np
 
# N是训练的batch size; D_in 是input输入数据的维度;
# H是隐藏层的节点数; D_out 输出的维度，即输出节点数.
N, D_in, H, D_out = 64, 1000, 100, 10
 
# 创建输入、输出数据
x = np.random.randn(N, D_in)  #（64，1000）
y = np.random.randn(N, D_out) #（64，10）可以看成是一个10分类问题
 
# 权值初始化
w1 = np.random.randn(D_in, H)  #(1000,100),即输入层到隐藏层的权重
w2 = np.random.randn(H, D_out) #(100,10),即隐藏层到输出层的权重
 
learning_rate = 1e-6   #学习率
 
for t in range(500):
    # 第一步：数据的前向传播，计算预测值p_pred
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
 
    # 第二步：计算计算预测值p_pred与真实值的误差
    loss = np.square(y_pred - y).sum()
    print(t, loss)
 
    # 第三步：反向传播误差，更新两个权值矩阵
    grad_y_pred = 2.0 * (y_pred - y)     #注意：这里的导函数也是自己求的，因为这个地方是很简答的函数表达式
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
 
    # 更新参数
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 28490026.305024628
1 19857907.501338586
2 16364306.251034472
3 14691780.242684353
4 13511491.750480896
5 12091631.16892976
6 10328249.224070456
7 8279651.591877677
8 6308797.625139078
9 4593231.00904953
10 3271803.1836226936
11 2308385.7379010795
12 1644498.737958313
13 1192421.180211418
14 888271.1637964067
15 680959.0211721672
16 537617.4079713157
17 435684.4134226644
18 361316.0179612407
19 305258.8217137256
20 261726.35533791868
21 227029.2471334738
22 198691.80851544475
23 175102.06993686123
24 155185.32550048566
25 138167.89430658444
26 123472.40962177247
27 110680.44917955116
28 99474.52448076573
29 89615.6380941135
30 80907.75627984654
31 73183.69314354623
32 66312.44053255075
33 60186.13724357753
34 54713.61879977671
35 49814.533556679344
36 45413.64396900678
37 41457.070640905615
38 37893.08181320799
39 34676.63360650177
40 31770.00002061329
41 29141.89029127822
42 26759.646492689644
43 24600.980375134346
44 22638.77463277385
45 20852.007784565387
46 19223.238397180325
47 1

366 0.0007274822496380386
367 0.0006948263679731995
368 0.0006636521011267659
369 0.0006338898836249941
370 0.0006054709133970881
371 0.000578337608807114
372 0.000552432404943616
373 0.000527694577823564
374 0.0005040772226779877
375 0.00048152770141558706
376 0.00045999107938834256
377 0.00043943003959994316
378 0.0004197958589827211
379 0.0004010438669799291
380 0.0003831362485959668
381 0.000366034991862981
382 0.00034970535331773993
383 0.0003341084036479484
384 0.00031921254882729576
385 0.0003049880007383096
386 0.0002914047579351399
387 0.0002784285764765987
388 0.00026603418483162663
389 0.0002541977089508629
390 0.0002428907062171432
391 0.00023209074887928823
392 0.00022177522414673135
393 0.00021192168172272217
394 0.0002025106869955446
395 0.00019351958165515898
396 0.00018493084911616018
397 0.00017672685425253027
398 0.0001688889648138579
399 0.00016140096129180697
400 0.0001542481004263862
401 0.00014741441796794542
402 0.00014088590095552776
403 0.00013464899703375134


tensor实现

In [2]:
import numpy as np
import torch
 
# N是训练的batch size; D_in 是input输入数据的维度;
# H是隐藏层的节点数; D_out 输出的维度，即输出节点数.
N, D_in, H, D_out = 64, 1000, 100, 10
 
# 创建输入、输出数据
x = torch.randn(N, D_in)  #（64，1000）
y = torch.randn(N, D_out) #（64，10）可以看成是一个10分类问题
 
# 权值初始化
w1 = torch.randn(D_in, H, requires_grad = True)  #(1000,100),即输入层到隐藏层的权重
w2 = torch.randn(H, D_out, requires_grad = True) #(100,10),即隐藏层到输出层的权重
 
learning_rate = 1e-6   #学习率
 
for t in range(500):
    # 第一步：数据的前向传播，计算预测值p_pred
    h = x.mm(w1)
    h_relu = h.clamp(min = 0)
    y_pred = h_relu.mm(w2)
 
    # 第二步：计算计算预测值p_pred与真实值的误差
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())
    loss.backward()
 
    # 更新参数
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        w1.grad.zero_()
        w2.grad.zero_()

0 36625124.0
1 36385304.0
2 37450744.0
3 33723980.0
4 24110372.0
5 13813252.0
6 6969275.0
7 3632810.0
8 2163157.5
9 1492091.375
10 1141320.0
11 925723.3125
12 774435.0
13 659071.5625
14 566824.5625
15 491208.59375
16 428196.46875
17 375260.71875
18 330316.34375
19 291904.6875
20 258877.78125
21 230352.640625
22 205615.78125
23 184111.03125
24 165280.640625
25 148733.53125
26 134138.59375
27 121228.078125
28 109771.90625
29 99581.4609375
30 90480.4921875
31 82347.4375
32 75062.5078125
33 68534.703125
34 62663.0703125
35 57385.65625
36 52615.38671875
37 48296.25
38 44383.65625
39 40834.58984375
40 37609.2578125
41 34677.05859375
42 32005.12890625
43 29568.314453125
44 27342.83984375
45 25308.45703125
46 23446.142578125
47 21740.125
48 20174.39453125
49 18737.5078125
50 17416.357421875
51 16200.3583984375
52 15080.99609375
53 14048.5302734375
54 13095.9423828125
55 12216.6025390625
56 11404.064453125
57 10652.2763671875
58 9956.2939453125
59 9311.46484375
60 8713.6220703125
61 8158.923339

390 0.019282622262835503
391 0.018655497580766678
392 0.018049078062176704
393 0.01747000776231289
394 0.01690925844013691
395 0.016365624964237213
396 0.01584928296506405
397 0.015331833623349667
398 0.01484463457018137
399 0.014374001882970333
400 0.013918166980147362
401 0.01347291562706232
402 0.013053599745035172
403 0.01262296736240387
404 0.012230884283781052
405 0.01184121984988451
406 0.01146827731281519
407 0.01110571064054966
408 0.010752778500318527
409 0.01041845977306366
410 0.010096958838403225
411 0.009779078885912895
412 0.009470628574490547
413 0.0091788275167346
414 0.008895374834537506
415 0.008614964783191681
416 0.008343376219272614
417 0.008086426183581352
418 0.007833342999219894
419 0.007592307403683662
420 0.007360887713730335
421 0.007131841499358416
422 0.006908709648996592
423 0.006701624020934105
424 0.006496538873761892
425 0.006299510598182678
426 0.006111756898462772
427 0.005927704740315676
428 0.005743077490478754
429 0.005572203546762466
430 0.005407