# Tensors

- NumPy의 array와 Pytorch의 Tensor를 사용하면 신경망을 구현할 수 있음

### Example
- Fully Connected Network
    - 1 hidden layer
    - ReLU
- 출력과 정답 사이의 유클리드 거리 (Euclidean distance)를 최소화하도록 Optimize
- 경사하강법(gradient descent) 사용

In [1]:
'''
네트워크 구성
'''
# N : batch size
# H : hidden layer의 차원
# D_in : 입력의 차원
# D_out : 출력의 차원
N, H, D_in, D_out = 64, 100, 1000, 10

# learning rate
lr = 1e-6

## Numpy

NumPy를 사용한 신경망 구성

In [2]:
import numpy as np

# random input, output 생성
x = np.random.randn(N, D_in)
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
y = np.random.randn(N, D_out)

for t in range(500):
    # Forward pass
    h = np.dot(x, w1)
    h_relu = np.maximum(h, 0)
    y_pred = np.dot(h_relu, w2)
    
    # Loss 계산
    loss = np.sum(np.square(y_pred - y))
    if t % 10 == 0:
        print(t, loss)
    
    # w1, w2의 gradient 계산
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = np.dot(h_relu.T, grad_y_pred)
    grad_h_relu = np.dot(grad_y_pred, w2.T)
    grad_h = np.copy(grad_h_relu)
    grad_h[h < 0.0] = 0.0
    grad_w1 = np.dot(x.T, grad_h)
    
    # Backprop
    w1 -= lr * grad_w1
    w2 -= lr * grad_w2
    
# Optimize됐는지 확인
print(f"y_pred와 y의 차이 : {(y - y_pred).sum()}")

0 41963560.14308944
10 1056446.2223585618
20 228394.62566683267
30 72842.44306823978
40 27951.10605644983
50 12040.63143443187
60 5641.212561662921
70 2803.451918716597
80 1452.372841771118
90 776.1115150693354
100 425.04174857288893
110 236.8852709751962
120 133.83674895018794
130 76.46799065812289
140 44.10720485513167
150 25.653999015677073
160 15.0327829252869
170 8.87024144471873
180 5.2678618041080725
190 3.1477875011610372
200 1.8920912013714632
210 1.14375759712079
220 0.6952105039097155
230 0.42482694073843397
240 0.2609284004540519
250 0.16105366543905508
260 0.09987699315548308
270 0.06222188613314509
280 0.03892969177658039
290 0.02445606449073281
300 0.015423034109841109
310 0.00976151051065816
320 0.0061991422978108805
330 0.0039491816224595366
340 0.0025232358022391384
350 0.0016164761243093028
360 0.0010381742857325095
370 0.0006682727291755953
380 0.0004310587748544846
390 0.00027858143833720277
400 0.00018034991942784064
410 0.00011693865189273142
420 7.59325977414547

## PyTorch

PyTorch Tensor를 사용한 신경망 구성

In [3]:
# mm : matrix multiplication, [n, m] x [m,p] = [n,p]
# bmm : batch matrix multiplication, [B, n, m] x [B, m, p] = [B, n, p]
# (두 operand가 모두 batch일 때 사용)

In [4]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# random input, output 생성
x = torch.randn(N, D_in, device=device, dtype=torch.float)
w1 = torch.randn(D_in, H, device=device, dtype=torch.float)
w2 = torch.randn(H, D_out, device=device, dtype=torch.float)
y = torch.randn(N, D_out, device=device, dtype=torch.float)

for t in range(500):
    # Forward pass
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)
    
    # Loss 계산
    loss = (y_pred - y).pow(2).sum().item()
    if t % 10 == 0:
        print(t, loss)

    # w1, w2의 gradient 계산
    grad_y_pred = 2.0 * (y_pred - y)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0.0
    grad_w1 = x.t().mm(grad_h)
    
    # Backprop
    w1 -= lr * grad_w1
    w2 -= lr * grad_w2
    
# Optimize됐는지 확인
print(f"y_pred와 y의 차이 : {(y_pred - y).sum()}")

0 33062464.0
10 1217473.5
20 220373.5
30 73573.203125
40 29754.2734375
50 13405.0869140625
60 6470.486328125
70 3283.988037109375
80 1734.6171875
90 945.748779296875
100 529.30029296875
110 302.9834289550781
120 176.92657470703125
130 105.00982666015625
140 63.209625244140625
150 38.512176513671875
160 23.704349517822266
170 14.718262672424316
180 9.207853317260742
190 5.798083305358887
200 3.671607494354248
210 2.336486339569092
220 1.4932191371917725
230 0.9578414559364319
240 0.6164311766624451
250 0.397847980260849
260 0.2574297785758972
270 0.16701817512512207
280 0.10856146365404129
290 0.07072456181049347
300 0.04616745933890343
310 0.03022788278758526
320 0.019839180633425713
330 0.013080425560474396
340 0.00867203064262867
350 0.00579190906137228
360 0.003916237037628889
370 0.002679473487660289
380 0.001866974402219057
390 0.0013255344238132238
400 0.0009643493103794754
410 0.0007132808677852154
420 0.0005390228470787406
430 0.0004157178336754441
440 0.0003268640430178493
450