In [56]:
import numpy as np

# 목적: 다범주 분류 문제(1~5 분류)
# 활성함수: 은닉층-ReLU, 출력층-Softmax
# 신경망 구조
#  입력층: 5*5 행렬이므로 25개 노드
#  은닉층: 3레이어 20개 노드
#  출력층: 5개 범주를 분류하므로 5개 노드
# X: 학습 데이터 변수
# D: 합습 데이터의 정답 변수
# W1: 입력층 - 은닉층1 가중치 행렬 변수
# W2: 은닉층1 - 은닉층2 가중치 행렬 변수
# W3: 은닉층2 - 은닉층3 가중치 행렬 변수
# W4: 은닉층3 - 출력층 가중치 행렬 변수
# Backpropagation algorithm: SGD
# learning rate: 0.1


# 현재 문제점
# 소프트 맥스 함수를 사용하면서 지수법칙이 이용되고..
# 지수가 높아 nan 현상이 발생하여
# 소프트 함수 개선을 했으나.. 
# 모두 음수가 되어 Relu 에서 0으로 처리함...
# 일단 이 소스는 넘김

lr = 0.1
N = 5

def softmax(x) :
    exp_x = np.exp(x)
    sum_exp_x = np.sum(exp_x)
    y = exp_x / sum_exp_x
    
    return y

# 지수함수는 지수가 커질수록 매우 큰 폭으로 증가하기 때문에
# overflow가 발생하기 쉽다.
# overflow를 방지하는 코드임
def softmax2(x):
    m = np.max(x)       #최댓값
    exp_x = np.exp(x-m) # 각각의 원소에 최댓값을 뺀 값에 exp를 취한다.
    sum_exp_x = np.sum(exp_x)
    y = exp_x / sum_exp_x
    
    return y

def derivativeofSoftmax():
    return 1

def reLU(x):
    y = np.maximum(0,x)

    return y

In [76]:
def deepReLU(W1, W2, W3, W4, X, D):
    for k in range(N):
        x = np.asmatrix(X[k])
        d = np.asmatrix(D[k])
        hidden_v1 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        hidden_y1 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        hidden_delta1 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        
        hidden_v2 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        hidden_y2 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        hidden_delta2 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        
        hidden_v3 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        hidden_y3 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        hidden_delta3 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
        
        output_v = np.asmatrix(np.zeros(5, dtype=float).reshape(5,1))
        output_y = np.asmatrix(np.zeros(5, dtype=float).reshape(5,1))
        output_delta = np.asmatrix(np.zeros(5, dtype=float).reshape(5,1))
        
        # 학습
        # 입력층 - 은닉층
        hidden_v1 = np.matmul(W1, np.transpose(x))
        hidden_y1 = reLU(hidden_v1)
        
        hidden_v2 = np.matmul(W2, hidden_y1)
        hidden_y2 = reLU(hidden_v2)
        
        hidden_v3 = np.matmul(W3, hidden_y2)
        hidden_y3 = reLU(hidden_v3)
        
        # 은닉층 - 출력층
        output_v = np.matmul(W4, hidden_y3)
        output_y = softmax2(output_v)
        
        print(output_v)
        
        # 오차
        # 출력층
        output_err = d.reshape(5,1) - output_y
        output_delta = output_err
        # 은닉층3 오차
        # hidden_v3 0보다 크면 참이고 아니면 거짓입니다.
        # 0보다 크면 1이고 아니면 0이다
        hidden_err3 = np.matmul(np.transpose(W4), output_delta)
        for i in range(len(hidden_v3)):
            if hidden_v3[i] > 0:
                hidden_delta3[i] = hidden_err3[i]
            else:
                hidden_delta3[i] = 0

        # 은닉층2 오차
        hidden_err2 = np.matmul(np.transpose(W3), hidden_delta3)
        for i in range(len(hidden_v2)):
            if hidden_v2[i] > 0:
                hidden_delta2[i] = hidden_err2[i]
            else:
                hidden_delta2[i] = 0
        
        # 은닉층1 오차
        hidden_err1 = np.matmul(np.transpose(W2), hidden_delta2)
        for i in range(len(hidden_v1)):
            if hidden_v1[i] > 0:
                hidden_delta1[i] = hidden_err1[i]
            else:
                hidden_delta1[i] = 0
         
        # 가중치 조정
        # 입력층 - 은닉층1
        for i in range(len(W1)):
            delta_weight1 = lr*np.matmul(hidden_delta1[i], x)
            W1[i] = W1[i] + delta_weight1
        
        # 은닉층1 - 은닉층2
        #print(hidden_delta2)
        for i in range(len(W2)):
            delta_weight2 = lr*np.matmul(hidden_delta2[i], np.transpose(hidden_y1))
            W2[i] = W2[i] + delta_weight2
        
        # 은닉층2 - 은닉층3
        for i in range(len(W3)):
            delta_weight3 = lr*np.matmul(hidden_delta3[i], np.transpose(hidden_y2))
            W3[i] = W3[i] + delta_weight3
            
        # 은닉층3 - 출력층
        for i in range(len(W4)):
            delta_weight4 = lr*np.matmul(output_delta[i], np.transpose(hidden_y3))
            W4[i] = W4[i] + delta_weight4
        
    return [W1, W2, W3, W4]

In [78]:
X = np.zeros(((N, 5, 5)), dtype=float);
D = np.zeros(((N, 1, 5)), dtype=float);
X[0] = np.array([[0,1,1,0,0],
                 [0,0,1,0,0],
                 [0,0,1,0,0],
                 [0,0,1,0,0],
                 [0,1,1,1,0]])
X[1] = np.array([[1,1,1,1,0],
                 [0,0,0,0,1],
                 [0,1,1,1,0],
                 [1,0,0,0,0],
                 [1,1,1,1,1]])
X[2] = np.array([[1,1,1,1,0],
                 [0,0,0,0,1],
                 [0,1,1,1,0],
                 [0,0,0,0,1],
                 [1,1,1,1,0]])
X[3] = np.array([[0,0,0,1,0],
                 [0,0,1,1,0],
                 [0,1,0,1,0],
                 [1,1,1,1,1],
                 [0,0,0,1,0]])
X[4] = np.array([[1,1,1,1,1],
                 [1,0,0,0,0],
                 [1,1,1,1,0],
                 [0,0,0,0,1],
                 [1,1,1,1,0]])
D = np.array([[1,0,0,0,0],
              [0,1,0,0,0],
              [0,0,1,0,0],
              [0,0,0,1,0],
              [0,0,0,0,1]])

ran_w1 = np.random.rand(500)
arr_w1 = ran_w1.reshape(20,25)
W1 = np.asmatrix(arr_w1, dtype=float)

ran_w2 = np.random.rand(400)
arr_w2 = ran_w2.reshape(20,20)
W2 = np.asmatrix(arr_w2, dtype=float)

ran_w3 = np.random.rand(400)
arr_w3 = ran_w3.reshape(20,20)
W3 = np.asmatrix(arr_w3, dtype=float)

ran_w4 = np.random.rand(100)
arr_w4 = ran_w4.reshape(5,20)
W4 = np.asmatrix(arr_w4, dtype=float)

X_reshape = X.reshape(5, 25)
for i in range(100):
    W = deepReLU(W1, W2, W3, W4, X_reshape, D)
    W1 = W[0]
    W2 = W[1]
    W3 = W[2]
    W4 = W[3]

[[4405.15069521]
 [4204.01070382]
 [5213.00397618]
 [3848.17186103]
 [4244.64332758]]
[[ 31631.55550949]
 [   348.57489184]
 [-30847.35003148]
 [   458.50991551]
 [   394.82424671]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]


 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[

In [70]:
print(W1)
print(W2)
print(W3)
print(W4)

[[-3.14959094e+02 -3.15201988e+02 -3.14840735e+02 -3.14026748e+02
   9.77803211e-01  3.01657212e-01  9.86553310e-01 -4.26478512e-02
   8.99074825e-01 -3.14031311e+02  1.40516350e-02 -3.14829443e+02
  -3.14915443e+02 -3.14002158e+02  2.72556966e-01 -3.14185589e+02
   9.35880042e-01 -5.97598992e-01  6.61916335e-03  1.10320895e-01
  -3.14394978e+02 -3.15656579e+02 -3.14949646e+02 -3.15223547e+02
  -3.14466226e+02]
 [-2.98226458e+02 -2.99126476e+02 -2.98375452e+02 -2.97907318e+02
   5.22074481e-01  2.74371466e-01  7.47385965e-01  2.53848639e-01
   9.57240149e-01 -2.98285515e+02  7.22768621e-01 -2.98272756e+02
  -2.98961205e+02 -2.98376414e+02  1.28364609e-01 -2.97760805e+02
   8.92035774e-01 -2.49355606e-01  3.93683024e-01  9.52965543e-01
  -2.98055826e+02 -2.98278226e+02 -2.98993138e+02 -2.98655098e+02
  -2.97660975e+02]
 [-1.89859979e+02 -1.90733507e+02 -1.90561814e+02 -1.90081839e+02
   8.91988202e-01  9.07291558e-01  2.64757505e-01 -3.14342453e-02
   3.13294433e-01 -1.89667327e+02  7.7

In [73]:
X_reshape = X.reshape(5, 25)
for k in range(N):
    x = np.asmatrix(X_reshape[k])
    d = np.asmatrix(D[k])
    hidden_v1 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
    hidden_y1 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))

    hidden_v2 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
    hidden_y2 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))

    hidden_v3 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))
    hidden_y3 = np.asmatrix(np.zeros(20, dtype=float).reshape(20,1))

    output_v = np.asmatrix(np.zeros(5, dtype=float).reshape(5,1))
    output_y = np.asmatrix(np.zeros(5, dtype=float).reshape(5,1))

    hidden_v1 = np.matmul(W1, np.transpose(x))
    print(hidden_v1)
    hidden_y1 = reLU(hidden_v1)

    hidden_v2 = np.matmul(W2, hidden_y1)
    print(hidden_v2)
    hidden_y2 = reLU(hidden_v2)

    hidden_v3 = np.matmul(W3, hidden_y2)
    print(hidden_v3)
    hidden_y3 = reLU(hidden_v3)

    output_v = np.matmul(W4, hidden_y3)
    print(output_v)
    output_y = softmax(output_v)
    
    print(output_y)

[[-1891.42818467]
 [-1792.38510218]
 [-1143.54328126]
 [-2232.73963416]
 [-2061.87227751]
 [-1412.17274047]
 [-1708.06272774]
 [-1100.86649721]
 [-1437.75933805]
 [-1676.5183245 ]
 [-1433.30892194]
 [-1870.06270399]
 [ -848.16418858]
 [ -725.32444716]
 [-2209.1716716 ]
 [ -820.37160868]
 [-2100.12892969]
 [ -978.39014097]
 [-1395.73497487]
 [-1308.80501957]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
[[0.2]
 [0.2]
 [0.2]
 [0.2]
 [0.2]]
[[-4405.68348476]
 [-4176.93566358]
 [-2662.31502217]
 [-5200.64002394]
 [-4804.2351874 ]
 [-3287.5161196 ]
 [-3975.23673789]
 [-2566.7158413 ]
 [-3347.46978712]
 [-3902.37696322]
 [-3339.0243888 ]
 [-4353.64954519]
 [-1973.95457817]
 [-1689.5238368 ]
 [-5147.15271251]
 [-1908.49587166]
 [-4891.22564789]
 [-2277.58468651]
 [-324