In [2]:
import numpy as np

In [5]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
X = np.array([1.0, 0.5]) # 1x2
W1 = np.array([[0.1, 0.3, 0.5], 
               [0.2, 0.4, 0.6]]) # 2x3
B1 = np.array([0.1, 0.2, 0.3]) # 1x3

print(W1.shape) # (2, 3)
print(X.shape) # (2,)
print(B1.shape) # (3,)

A1 = np.dot(X, W1) + B1 # 1x3

Z1 = sigmoid(A1) # 1x3

print(A1) # [0.3, 0.7, 1.1]
print(Z1) # [0.57444252, 0.66818777, 0.75026011]

(2, 3)
(2,)
(3,)


In [6]:
Z1 = sigmoid(A1) # 1x3

print(A1) # [0.3, 0.7, 1.1]
print(Z1) # [0.57444252, 0.66818777, 0.75026011]

[0.3 0.7 1.1]
[0.57444252 0.66818777 0.75026011]


In [7]:
W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]]) # 3x2
B2 = np.array([0.1, 0.2]) # 1x2

print(Z1.shape) # (3,)
print(W2.shape) # (3, 2)
print(B2.shape) # (2,)

A2 = np.dot(Z1, W2) + B2 # 1x2
Z2 = sigmoid(A2) # 1x2

(3,)
(3, 2)
(2,)


In [8]:
# 은닉층에서 출력층 과정 구현
# 활성화 함수로 항등 함수를 사용

def identity_function(x):
    return x

W3 = np.array([[0.1, 0.3], [0.2, 0.4]]) # 2x2
B3 = np.array([0.1, 0.2]) # 1x2

A3 = np.dot(Z2, W3) + B3 # 1x2
Y = identity_function(A3) # 1x2

In [9]:
def init_network():
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]]) # 2x3
    network['b1'] = np.array([0.1, 0.2, 0.3]) # 1x3
    network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]]) # 3x2
    network['b2'] = np.array([0.1, 0.2]) # 1x2
    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]]) # 2x2
    network['b3'] = np.array([0.1, 0.2]) # 1x2

    return network

def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3'] # 2x3, 3x2, 2x2
    b1, b2, b3 = network['b1'], network['b2'], network['b3'] # 1x3, 1x2, 1x2

    a1 = np.dot(x, W1) + b1 # 1x3
    z1 = sigmoid(a1) # 1x3
    a2 = np.dot(z1, W2) + b2 # 1x2
    z2 = sigmoid(a2) # 1x2
    a3 = np.dot(z2, W3) + b3 # 1x2
    y = identity_function(a3) # 1x2

    return y

network = init_network()
x = np.array([1.0, 0.5]) # 1x2
y = forward(network, x) # 1x2
print(y) # [0.31682708, 0.69627909]
    




[0.31682708 0.69627909]


## 3.5 출력층 설계하기

### 3.5.1 항등 함수와 소프트맥스 함수 구현하기

In [11]:
a = np.array([0.3, 2.9, 4.0])

exp_a = np.exp(a) # 지수 함수
print(exp_a) # [ 1.34985881 18.17414537 54.59815003]

sum_exp_a = np.sum(exp_a) # 지수 함수의 합
print(sum_exp_a) # 74.1221542101633

y = exp_a / sum_exp_a
print(y) # [0.01821127 0.24519181 0.73659691]

[ 1.34985881 18.17414537 54.59815003]
74.1221542101633
[0.01821127 0.24519181 0.73659691]


In [12]:
def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

### 3.5.2 소프트맥스 함수 구현 시 주의점

In [13]:
a = np.array([1010, 1000, 990])
np.exp(a) / np.sum(np.exp(a)) # [nan nan nan]

c = np.max(a) # 1010
a - c # [  0 -10 -20]

np.exp(a - c) / np.sum(np.exp(a - c)) # [9.99954600e-01 4.53978686e-05 2.06106005e-09]


  np.exp(a) / np.sum(np.exp(a)) # [nan nan nan]
  np.exp(a) / np.sum(np.exp(a)) # [nan nan nan]


array([9.99954600e-01, 4.53978686e-05, 2.06106005e-09])

In [14]:
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c) # 오버플로 대책
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

### 3.5.3 소프트맥스 함수의 특징

In [15]:
a = np.array([0.3, 2.9, 4.0])
y = softmax(a)
print(y) # [0.01821127 0.24519181 0.73659691]

np.sum(y) # 1.0

[0.01821127 0.24519181 0.73659691]


1.0

### 3.5.4 출력층의 뉴런 수 정하기

In [16]:
np.exp(1000)

  np.exp(1000)


inf

### 3.6.1 MNIST 데이터 셋