# Neural Network

### Torch.nn
신경망 모델이 존재

- nn.Module : Layer, input, output 모델 존재

### Torch. nn vs Torch.nn.function
- nn.Conv2d(input_channel, out_channel, kernel)
- nn.functional.conv2d(input, weight) : 외부에서 만들어진 weight를 추가

### class을 이용한 Torch 정의
- `__init__`을 이용해 Layer 정의
- `foward` 함수 정의 (backward는 자동으로 정의 된다)
- torch.nn은 mini-batch 이용

### parameter 
- net.parameters()

### Recap
- torch.Tensor : autograd을 지지하는 multi-dimensional array
- torch.nn.Model : NN module
- torch.nn.Parameter : Tensor의 한종류로 Module 생성시 지정
- autograd.Function : forward, backward를 실행

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

### Neural Network 정의
- neural network 정의
- forward
- loss 정의
- weight 업데이트

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Conv2d(input_channel, output_channel, kernel_size)
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # Linear(input_feature, output_feature)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # input(32, 32, 1) -> conv1(28, 28, 6) -> relu -> max_pool2d(14, 14, 6) 
        # -> conv2(10, 10, 16) -> relu -> max_pool2d(5, 5, 16)
        # -> flatten(16 * 5 * 5)
        # -> Linear(16 * 5 * 5) -> Linear(120) -> Linear(84)
        
        
        # max_pool2d(input, kernel) 
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        
        # max_pool2d(input, kernel) : square일 때 scalar로 입력 가능
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        
        # flatten
        x = x.view(-1, self.num_flat_features(x))
        
        # Linear
        x = F.relu(self.fc1(x))
        
        # Linear
        x = F.relu(self.fc2(x))
        
        # Linear
        x = F.relu(self.fc3(x))
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [4]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [8]:
params = list(net.parameters()) # parameter가 layer 순으로 저장 된다.
print("parameter 수 : {}".format(len(params)))
print("1 layer parameter : ", params[3].size())

parameter 수 : 10
1 layer parameter :  torch.Size([16])


In [10]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor(1.00000e-02 *
       [[ 0.0000,  1.7231,  1.4467,  7.7901,  0.0000,  0.0000,  0.1344,
          2.0818,  6.8189,  0.0000]])


In [11]:
net.zero_grad() # parameter로 0으로 세팅
out.backward(torch.randn(1, 10))

### Loss fucntion

In [12]:
output = net(input)
target = torch.arange(1, 11).view(1, -1)

criterion = nn.MSELoss()
loss = criterion(output, target)

print(loss)

tensor(38.2653)


In [14]:
print(loss.grad_fn) # MSELoss grad
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x00000222054AE5C0>
<ReluBackward object at 0x00000222054AE710>
<AddmmBackward object at 0x00000222054AE5C0>


### Backprop

In [15]:
net.zero_grad()

print("conv1.bias.grad before backward")
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([ 0.,  0.,  0.,  0.,  0.,  0.])
conv1.bias.grad after backward
tensor(1.00000e-02 *
       [ 5.3676,  3.2286,  3.3421,  0.6790,  1.3985,  4.6406])


### Update the weight

#### 1. 직접 Update

In [16]:
#SGD
learning_rate = 0.01

for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

#### 2. opti function으로 update

In [17]:
import torch.optim as optim

# set optimizer
optimizer = optim.SGD(net.parameters(), lr = 0.01)

In [18]:
# train
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()

optimizer.step()