# [PYTORCH: TENSORS AND AUTOGRAD](https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_autograd.html)

全连接ReLU网络：1层隐藏层、无偏置，根据𝑥预测𝑦，通过最小化欧氏距离训练网络。

pytorch张量计算网络前向传播、损失，autograd计算梯度。

```python
x.requires_grad=True
x.grad
```

In [5]:
import torch

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Tensors during the backward pass.
x = torch.randn(*(N, D_in), device=device, dtype=dtype)
y = torch.randn(*(N, D_out), device=device, dtype=dtype)

# Create random Tensors for weights.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y using operations on Tensors; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    
    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the a scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())
    
    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call w1.grad and w2.grad will be Tensors holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()
    
    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        
        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()


0 32181034.0
1 27363428.0
2 25042764.0
3 21873754.0
4 17196638.0
5 12014108.0
6 7707678.5
7 4772950.0
8 3009209.25
9 1999151.875
10 1419613.75
11 1071914.75
12 849309.5
13 696212.0625
14 583994.1875
15 497719.0
16 428769.3125
17 372139.40625
18 324883.28125
19 284970.59375
20 250938.5625
21 221735.75
22 196567.921875
23 174822.421875
24 155916.09375
25 139384.84375
26 124917.2578125
27 112186.9453125
28 100962.125
29 91031.1796875
30 82216.4140625
31 74383.7578125
32 67401.6640625
33 61165.66796875
34 55583.45703125
35 50586.6328125
36 46103.5859375
37 42069.46484375
38 38437.16015625
39 35159.73828125
40 32198.56640625
41 29516.841796875
42 27093.1015625
43 24892.05078125
44 22892.142578125
45 21072.3984375
46 19414.25
47 17900.259765625
48 16517.447265625
49 15252.9853515625
50 14095.67578125
51 13035.2587890625
52 12062.8173828125
53 11170.2763671875
54 10350.642578125
55 9597.4619140625
56 8904.47265625
57 8266.1171875
58 7677.85888671875
59 7135.7431640625
60 6635.23291015625
61 6

449 9.799198596738279e-05
450 9.599179611541331e-05
451 9.423636947758496e-05
452 9.257062629330903e-05
453 9.087519720196724e-05
454 8.944678120315075e-05
455 8.742039790377021e-05
456 8.553919906262308e-05
457 8.403001993428916e-05
458 8.250909741036594e-05
459 8.131912181852385e-05
460 7.963482494233176e-05
461 7.839352474547923e-05
462 7.721388101344928e-05
463 7.588497828692198e-05
464 7.465945236617699e-05
465 7.340106094488874e-05
466 7.218454265967011e-05
467 7.111355080269277e-05
468 6.999125616857782e-05
469 6.856793334009126e-05
470 6.758483505109325e-05
471 6.638457853114232e-05
472 6.567416130565107e-05
473 6.449961801990867e-05
474 6.331574695650488e-05
475 6.248529098229483e-05
476 6.157668394735083e-05
477 6.0308688262011856e-05
478 5.9341378801036626e-05
479 5.879437958355993e-05
480 5.760627027484588e-05
481 5.6728724302956834e-05
482 5.59602485736832e-05
483 5.496264566318132e-05
484 5.413146209320985e-05
485 5.327472899807617e-05
486 5.260075340629555e-05
487 5.1901