In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim


### 搭建模型

In [3]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [4]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 32)
        self.layer3 = LinearBNAC(32,32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [5]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(params=model.parameters())

In [6]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)
target = torch.tensor([9, 5, 4, 4], dtype=torch.long)

In [7]:
output = model(dummy_input)
print(output)

tensor([[0.0998, 0.1164, 0.0983, 0.1148, 0.0971, 0.1342, 0.0487, 0.0877, 0.0582,
         0.1447],
        [0.1446, 0.0715, 0.0801, 0.1736, 0.0396, 0.0784, 0.1596, 0.0632, 0.0784,
         0.1112],
        [0.1191, 0.0720, 0.1310, 0.1154, 0.1131, 0.0781, 0.1157, 0.1057, 0.0584,
         0.0915],
        [0.0972, 0.0997, 0.1022, 0.1650, 0.1136, 0.0534, 0.0912, 0.1472, 0.0924,
         0.0383]], grad_fn=<SoftmaxBackward0>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [8]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [9]:
criterion = NLLLoss()

In [10]:
loss = criterion(torch.log(output), target)

### 完成back propagation並更新梯度

In [11]:
loss.backward()

In [12]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0192,  0.0475, -0.0610,  ..., -0.0491,  0.0160, -0.0268],
        [-0.0472,  0.0356,  0.0136,  ...,  0.0271, -0.0391, -0.0019],
        [-0.0185,  0.0291,  0.0050,  ..., -0.0454,  0.0508, -0.0152],
        ...,
        [-0.0562, -0.0616,  0.0290,  ..., -0.0068, -0.0337,  0.0215],
        [ 0.0500,  0.0233,  0.0470,  ...,  0.0025, -0.0418,  0.0300],
        [-0.0151, -0.0027, -0.0221,  ...,  0.0040, -0.0377,  0.0336]],
       requires_grad=True)


grad : tensor([[-2.4493e-06, -6.0981e-07, -2.0408e-06,  ...,  2.9309e-06,
          4.0316e-07, -1.7356e-06],
        [-1.5596e-02,  3.2472e-02, -1.1739e-02,  ...,  8.6038e-03,
          1.1438e-02,  1.1598e-02],
        [ 3.0376e-03,  2.6958e-02, -1.3049e-02,  ..., -7.4004e-03,
         -2.2918e-02, -6.2787e-03],
        ...,
        [-1.8330e-06, -2.7785e-05,  1.9422e-05,  ..., -2.8468e-06,
          1.6728e-05,  1.0989e-05],
        [ 4.3595e-03, -1.3811e-01,  8.8220e-02,  ..., -1.8158e-02,
       

In [13]:
optimizer.step()

In [14]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0202,  0.0485, -0.0600,  ..., -0.0501,  0.0150, -0.0258],
        [-0.0462,  0.0346,  0.0146,  ...,  0.0261, -0.0401, -0.0029],
        [-0.0195,  0.0281,  0.0060,  ..., -0.0444,  0.0518, -0.0142],
        ...,
        [-0.0552, -0.0606,  0.0280,  ..., -0.0058, -0.0347,  0.0205],
        [ 0.0490,  0.0243,  0.0460,  ...,  0.0035, -0.0428,  0.0290],
        [-0.0161, -0.0017, -0.0231,  ...,  0.0050, -0.0367,  0.0346]],
       requires_grad=True)


grad : tensor([[-2.4493e-06, -6.0981e-07, -2.0408e-06,  ...,  2.9309e-06,
          4.0316e-07, -1.7356e-06],
        [-1.5596e-02,  3.2472e-02, -1.1739e-02,  ...,  8.6038e-03,
          1.1438e-02,  1.1598e-02],
        [ 3.0376e-03,  2.6958e-02, -1.3049e-02,  ..., -7.4004e-03,
         -2.2918e-02, -6.2787e-03],
        ...,
        [-1.8330e-06, -2.7785e-05,  1.9422e-05,  ..., -2.8468e-06,
          1.6728e-05,  1.0989e-05],
        [ 4.3595e-03, -1.3811e-01,  8.8220e-02,  ..., -1.8158e-02,
       

### 清空 gradient

In [15]:
optimizer.zero_grad()

In [16]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))
#

weight : Parameter containing:
tensor([[ 0.0202,  0.0485, -0.0600,  ..., -0.0501,  0.0150, -0.0258],
        [-0.0462,  0.0346,  0.0146,  ...,  0.0261, -0.0401, -0.0029],
        [-0.0195,  0.0281,  0.0060,  ..., -0.0444,  0.0518, -0.0142],
        ...,
        [-0.0552, -0.0606,  0.0280,  ..., -0.0058, -0.0347,  0.0205],
        [ 0.0490,  0.0243,  0.0460,  ...,  0.0035, -0.0428,  0.0290],
        [-0.0161, -0.0017, -0.0231,  ...,  0.0050, -0.0367,  0.0346]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
