## Let’s define this network:

In [70]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    
    # __init__ 里面必须定义我们的网络
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        
        # Conv2d ：nSamples x nChannels x Height x Width.
        # 卷积权重和偏执 y=wx-b
        self.conv1 = nn.Conv2d(1, 6, 3)
        # 卷积权重和偏执 y=wx-b
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        '''
        
        x = torch.randn(128, 20)  # 输入的维度是（128，20）
        m = torch.nn.Linear(20, 30)  # 20,30是指维度
        output = m(x)
        print('m.weight.shape:\n ', m.weight.shape)
        print('m.bias.shape:\n', m.bias.shape)
        print('output.shape:\n', output.shape)

        
        '''
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        
    # forward 重写forward 函数 ；我们输入的值从这里传进去的
    '''
    
    n=Net(x)  = Net.forward(x)
    '''
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [71]:
params =list(net.parameters()) 
print(len(params))
print(params[0].size())  # conv1's .weight

10
torch.Size([6, 1, 3, 3])


In [72]:
input = torch.randn(1, 1, 32, 32)
print(input)
print(input.size())
out = net(input)
print(out)

tensor([[[[ 0.2219,  0.4635,  0.7848,  ..., -0.1373, -0.4292,  0.1728],
          [ 0.7193, -1.0484, -0.8449,  ...,  0.2562, -0.0103, -0.8813],
          [ 1.2131,  0.8784,  1.0395,  ...,  0.7770, -1.8204, -0.8009],
          ...,
          [ 0.1643,  0.0389,  1.0596,  ...,  0.9114, -0.4790, -1.9483],
          [ 1.4696, -0.3771, -0.3443,  ..., -0.4598,  2.1772,  0.1994],
          [ 0.2922,  0.2270, -2.0304,  ...,  0.9262,  0.2154,  0.9789]]]])
torch.Size([1, 1, 32, 32])
tensor([[-0.0333,  0.0958,  0.1138,  0.1069,  0.0224, -0.0446, -0.0157, -0.0009,
          0.0402, -0.0024]], grad_fn=<AddmmBackward>)


In [73]:
net.zero_grad()
out.backward(torch.randn(1, 10))
print(out)

tensor([[-0.0333,  0.0958,  0.1138,  0.1069,  0.0224, -0.0446, -0.0157, -0.0009,
          0.0402, -0.0024]], grad_fn=<AddmmBackward>)


### Loss Function

In [74]:
output = net(input)
print(output)
target = torch.randn(10)
print(target)
target = target.view(1, -1)
print(target)

tensor([[-0.0333,  0.0958,  0.1138,  0.1069,  0.0224, -0.0446, -0.0157, -0.0009,
          0.0402, -0.0024]], grad_fn=<AddmmBackward>)
tensor([-6.7401e-01, -2.4427e+00, -1.3613e-03,  1.2007e+00, -1.0729e+00,
         6.6124e-01, -1.0013e+00,  2.1938e+00, -9.4258e-01,  2.1258e+00])
tensor([[-6.7401e-01, -2.4427e+00, -1.3613e-03,  1.2007e+00, -1.0729e+00,
          6.6124e-01, -1.0013e+00,  2.1938e+00, -9.4258e-01,  2.1258e+00]])


In [75]:
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(2.1045, grad_fn=<MseLossBackward>)


In [76]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x7f84b27dccc0>
<AddmmBackward object at 0x7f84b27dc358>
<AccumulateGrad object at 0x7f84b27dccc0>


## Backprop

In [77]:
net.zero_grad()
# print('conv1.bias.grad before backward')
# print(net.conv1.bias.grad)
# print(net.conv1.weight.grad)
loss.backward()
print('conv1.bias.grad after backward')
# 卷积权重和卷积偏执 y=wx-b
print(net.conv1.bias.grad.size())
print(net.conv2.bias.grad.size())
print(net.conv1.weight.grad.size())
print(net.conv2.weight.grad.size())

conv1.bias.grad after backward
torch.Size([6])
torch.Size([16])
torch.Size([6, 1, 3, 3])
torch.Size([16, 6, 3, 3])


In [78]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
    print(f.data)

tensor([[[[-0.1586,  0.0067, -0.1866],
          [ 0.0964, -0.3297, -0.2780],
          [ 0.0076, -0.0465, -0.2061]]],


        [[[ 0.3104, -0.1974, -0.0385],
          [-0.1484, -0.2484,  0.3057],
          [ 0.3126, -0.0397,  0.1510]]],


        [[[-0.1758,  0.2552, -0.0886],
          [-0.2462,  0.3127, -0.1820],
          [ 0.0142, -0.2186,  0.3178]]],


        [[[-0.0966,  0.1622,  0.0640],
          [ 0.0778,  0.1683,  0.2821],
          [-0.3312,  0.3273,  0.2066]]],


        [[[ 0.0154,  0.2446,  0.2423],
          [-0.1142, -0.2801, -0.0458],
          [-0.3293,  0.3116,  0.1538]]],


        [[[-0.1638,  0.3297,  0.0954],
          [-0.1683,  0.0992, -0.1208],
          [ 0.1110,  0.1326, -0.0940]]]])
tensor([ 0.0798, -0.0218,  0.2083,  0.1275, -0.1400,  0.0713])
tensor([[[[-1.3206e-01,  2.0059e-02, -1.1063e-01],
          [-8.0788e-02, -4.0699e-02,  1.2198e-01],
          [-1.1408e-01, -9.2944e-02, -1.2255e-01]],

         [[ 7.6423e-02, -3.9137e-02,  6.6615e-02],
      

tensor([[-0.0303,  0.0403,  0.0222,  ..., -0.0229, -0.0413,  0.0272],
        [-0.0142, -0.0287, -0.0173,  ...,  0.0261, -0.0015,  0.0102],
        [ 0.0195,  0.0202, -0.0077,  ...,  0.0151, -0.0109, -0.0214],
        ...,
        [ 0.0234,  0.0031,  0.0098,  ..., -0.0208, -0.0271, -0.0321],
        [-0.0301,  0.0101,  0.0058,  ...,  0.0147,  0.0075, -0.0081],
        [ 0.0379,  0.0240, -0.0249,  ...,  0.0222,  0.0018,  0.0387]])
tensor([-2.7890e-02, -2.8649e-02, -4.1386e-02,  1.9185e-02,  3.0165e-02,
        -3.7586e-02, -3.4182e-02, -1.0102e-02, -2.0088e-02,  6.9623e-03,
         1.1304e-02,  2.1051e-02, -2.6885e-02, -3.1269e-02,  2.3910e-02,
        -8.5627e-03,  2.4425e-02,  1.8542e-02,  1.1683e-02, -4.9505e-03,
         3.2173e-02,  2.4106e-02, -3.1782e-02,  3.4291e-02,  1.0277e-02,
        -8.1558e-03,  3.5383e-02, -5.3250e-04,  1.7716e-02, -3.7894e-02,
        -1.0031e-02,  1.3800e-02,  9.6270e-04,  1.1641e-02, -2.5192e-02,
        -2.6604e-02,  2.9310e-02, -2.1298e-02, -3.7206e

##  use optim 

In [81]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update
