In [33]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim


### 搭建模型

In [34]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [35]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 32)
        self.layer3 = LinearBNAC(32,32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [36]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(params=model.parameters())

In [38]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)
target = torch.tensor([9, 5, 4, 4], dtype=torch.long)

In [39]:
output = model(dummy_input)
print(output)

tensor([[0.0778, 0.2153, 0.0870, 0.0675, 0.2296, 0.0501, 0.0284, 0.1086, 0.0644,
         0.0713],
        [0.0952, 0.1620, 0.0415, 0.0647, 0.2410, 0.0878, 0.0790, 0.0447, 0.1157,
         0.0683],
        [0.0729, 0.1166, 0.1070, 0.0704, 0.1716, 0.1372, 0.0795, 0.0773, 0.0889,
         0.0786],
        [0.1425, 0.0797, 0.0639, 0.0882, 0.1560, 0.1936, 0.1091, 0.0556, 0.0533,
         0.0580]], grad_fn=<SoftmaxBackward0>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [40]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [41]:
criterion = NLLLoss()

In [42]:
loss = criterion(torch.log(output), target)

### 完成back propagation並更新梯度

In [43]:
loss.backward()

In [44]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0367,  0.0324,  0.0499,  ...,  0.0319, -0.0236,  0.0232],
        [ 0.0535,  0.0526,  0.0186,  ...,  0.0344,  0.0095, -0.0515],
        [-0.0520, -0.0169, -0.0487,  ...,  0.0460, -0.0576, -0.0339],
        ...,
        [ 0.0202,  0.0337, -0.0009,  ...,  0.0566,  0.0131, -0.0353],
        [-0.0228, -0.0182, -0.0454,  ...,  0.0361, -0.0077, -0.0392],
        [-0.0530, -0.0192,  0.0044,  ...,  0.0139, -0.0194, -0.0616]],
       requires_grad=True)


grad : tensor([[-0.0040,  0.0041, -0.0047,  ..., -0.0030, -0.0072,  0.0084],
        [ 0.0181,  0.0003,  0.0054,  ...,  0.0127,  0.0087, -0.0075],
        [ 0.0190, -0.0113, -0.0533,  ...,  0.1623, -0.1211, -0.0309],
        ...,
        [ 0.0450,  0.0582,  0.0846,  ..., -0.0955,  0.1056, -0.0054],
        [ 0.0430,  0.0435,  0.0433,  ..., -0.0109,  0.0323, -0.0126],
        [-0.0521, -0.0314,  0.0407,  ..., -0.1907,  0.1420,  0.0328]])


In [45]:
optimizer.step()

In [46]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0357,  0.0314,  0.0509,  ...,  0.0329, -0.0226,  0.0222],
        [ 0.0525,  0.0516,  0.0176,  ...,  0.0334,  0.0085, -0.0505],
        [-0.0530, -0.0159, -0.0477,  ...,  0.0450, -0.0566, -0.0329],
        ...,
        [ 0.0192,  0.0327, -0.0019,  ...,  0.0576,  0.0121, -0.0343],
        [-0.0238, -0.0192, -0.0464,  ...,  0.0371, -0.0087, -0.0382],
        [-0.0520, -0.0182,  0.0034,  ...,  0.0149, -0.0204, -0.0626]],
       requires_grad=True)


grad : tensor([[-0.0040,  0.0041, -0.0047,  ..., -0.0030, -0.0072,  0.0084],
        [ 0.0181,  0.0003,  0.0054,  ...,  0.0127,  0.0087, -0.0075],
        [ 0.0190, -0.0113, -0.0533,  ...,  0.1623, -0.1211, -0.0309],
        ...,
        [ 0.0450,  0.0582,  0.0846,  ..., -0.0955,  0.1056, -0.0054],
        [ 0.0430,  0.0435,  0.0433,  ..., -0.0109,  0.0323, -0.0126],
        [-0.0521, -0.0314,  0.0407,  ..., -0.1907,  0.1420,  0.0328]])


### 清空 gradient

In [47]:
optimizer.zero_grad()

In [48]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0357,  0.0314,  0.0509,  ...,  0.0329, -0.0226,  0.0222],
        [ 0.0525,  0.0516,  0.0176,  ...,  0.0334,  0.0085, -0.0505],
        [-0.0530, -0.0159, -0.0477,  ...,  0.0450, -0.0566, -0.0329],
        ...,
        [ 0.0192,  0.0327, -0.0019,  ...,  0.0576,  0.0121, -0.0343],
        [-0.0238, -0.0192, -0.0464,  ...,  0.0371, -0.0087, -0.0382],
        [-0.0520, -0.0182,  0.0034,  ...,  0.0149, -0.0204, -0.0626]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
