In [1]:
import numpy as np
from collections import OrderedDict
import copy

In [2]:
mnist = np.loadtxt('mnist.csv', delimiter=',')

In [3]:
def train_test_split(csv_dataset): # i는 0~100 중의 하나의 수로 train_set의 비율을 나타낸다. ex) 70 => train_set 70% test_set 30%
    #코드 작성
    
    train_T = csv_dataset[0:8000,0]
    train_X = csv_dataset[0:8000,1:785]
    test_T = csv_dataset[8000:10000,0]
    test_X = csv_dataset[8000:10000,1:785]
    
    train_X = train_X / 256
    test_X = test_X / 256
    
    return train_X, train_T, test_X, test_T

In [4]:
def one_hot_encoding(T): # T is data의 label
    #코드 작성
    one_hot_label = np.zeros((T.shape[0],10))
    n = 0    
    for i in T:
        one_hot_label[n][int(i)] = 1
        n += 1
    
    return one_hot_label

In [5]:
def Softmax(ScoreMatrix): # 제공.

    if ScoreMatrix.ndim == 2:
        temp = ScoreMatrix
        temp = temp - np.max(temp, axis=1, keepdims=True)
        y_predict = np.exp(temp) / np.sum(np.exp(temp), axis=1, keepdims=True)
        return y_predict
    temp = ScoreMatrix - np.max(ScoreMatrix, axis=0)
    expX = np.exp(temp)
    y_predict = expX / np.sum(expX)
    return y_predict


In [6]:
def setParam_He(neuronlist):

    np.random.seed(1) # seed값 고정을 통해 input이 같으면 언제나 같은 Weight와 bias를 출력하기 위한 함수
    #코드 작성
    #neuronlist = 784,60,30,10
    W1 = np.random.randn(neuronlist[0],neuronlist[1]) / np.sqrt(neuronlist[0]/2)
    W2 = np.random.randn(neuronlist[1],neuronlist[2]) / np.sqrt(neuronlist[1]/2)
    W3 = np.random.randn(neuronlist[2],neuronlist[3]) / np.sqrt(neuronlist[2]/2)
    
    b1 = np.zeros(neuronlist[1])
    b2 = np.zeros(neuronlist[2])
    b3 = np.zeros(neuronlist[3])
    
    return W1, W2, W3, b1, b2, b3

In [7]:
class linearLayer:
    def __init__(self, W, b):
        #backward에 필요한 X, W, b 값 저장 + dW, db값 받아오기
        
        self.X = None
        self.W = W
        self.b = b
        self.dW = None
        self.db = None
        
        
    def forward(self, x):
        self.X = x
        Z = np.dot(x, self.W) + self.b
        #내적연산을 통한 Z값 계산
        return Z
    
    def drop_forward(self, x, k):
        self.X = x
        Z = np.dot(self.X, self.W) + self.b
        u1 = np.random.rand(*Z.shape) > k
        Z *= u1
        #내적연산을 통한 Z값 계산
        return Z
    
    def backward(self, dZ):
#         dz는 dout
#         dx는 input x의 dx
        dx = np.dot(dZ, self.W.T)
        self.dW = np.dot(self.X.T,dZ)
        self.db = np.sum(dZ,axis=0)
        #백워드 함수
        
        return dx

In [8]:
class SiLU:
    def __init__(self):
        self.Z = None # 백워드 시 사용할 로컬 변수

    def sigmoid(self, x):
        return 1 / (1 +np.exp(-x))

    def forward(self, Z):
        self.Z = Z
        Activation = Z * self.sigmoid(Z)
        #수식에 따른 forward 함수 작성
        return Activation
    
#     def swish(x):
#     return x*sig(x)

# def dswish(x):
#     return swish(x) + sig(x)*(1-swish(x))

    def backward(self, dActivation):
        
#       dActivation이 dout
#       dZ가 dx
#       dZ = (self.Z + self.sigmoid(dActivation) * (1-self.Z)) * dActivation
        dZ = (self.forward(self.Z) + self.sigmoid(self.Z)*(1-self.forward(self.Z)))*dActivation
        #수식에 따른 backward 함수 작성
        return dZ

In [9]:
class SoftmaxWithLoss(): # 제공
    
    def __init__(self):
        self.loss = None
        self.softmaxScore = None
        self.label = None
        
    def forward(self, score, one_hot_label):
        
        batch_size = one_hot_label.shape[0]
        self.label = one_hot_label
        self.softmaxScore = Softmax(score)
        self.loss = -np.sum(self.label * np.log(self.softmaxScore + 1e-20)) / batch_size
        
        return self.loss
    
    def backward(self, dout=1):
        batch_size = self.label.shape[0]
        dx = (self.softmaxScore - self.label) / batch_size
        
        return dx
                                      

In [10]:
class ThreeLayerNet :
    
    def __init__(self, paramlist):
        
        W1, W2, W3, b1, b2, b3 = setParam_He(paramlist)
        self.params = {}
        self.params['W1'] = W1
        self.params['W2'] = W2
        self.params['W3'] = W3
        self.params['b1'] = b1
        self.params['b2'] = b2
        self.params['b3'] = b3
        

        self.layers = OrderedDict()
        
        self.layers['L1'] = linearLayer(self.params['W1'], self.params['b1'])
        self.layers['SiLU1'] = SiLU()
        self.layers['L2'] = linearLayer(self.params['W2'], self.params['b2'])
        self.layers['SiLU2'] = SiLU()
        self.layers['L3'] = linearLayer(self.params['W3'], self.params['b3'])
        
        self.lastLayer = SoftmaxWithLoss()
        
    def scoreFunction(self, x):
        
        for layer in self.layers.values():
            x = layer.forward(x)
            # 한 줄이 best
        score = x
        return score
        
    def forward(self, x, label):

        score = self.scoreFunction(x)
        return self.lastLayer.forward(score, label)
#     리턴은 loss
    
    def drop_forward(self, x, label,k1,k2):
        x = self.layers['L1'].drop_forward(x,k1)
        x = self.layers['SiLU1'].forward(x)
        x = self.layers['L2'].drop_forward(x,k2)
        x = self.layers['SiLU2'].forward(x)
        x = self.layers['L3'].forward(x)
        score = x
        return self.lastLayer.forward(score, label)
    
    def accuracy(self, x, label):
        
        score = self.scoreFunction(x)
        score_argmax = np.argmax(score, axis=1)
        
        if label.ndim != 1 : #label이 one_hot_encoding 된 데이터면 if문을 
            label_argmax = np.argmax(label, axis = 1)
            
        accuracy  = np.sum(score_argmax==label_argmax) / int(x.shape[0])
        
        return accuracy
    
    def backpropagation(self, x, label):
        dx = self.lastLayer.backward()
        self.layers = OrderedDict(reversed(list(self.layers.items())))
        for backlayer in self.layers.values():
            dx = backlayer.backward(dx)
        #백워드 함수 작성 스코어펑션을 참고하세요
            
        grads = {}
        grads['W1'] = self.layers['L1'].dW
        grads['b1'] = self.layers['L1'].db
        grads['W2'] = self.layers['L2'].dW
        grads['b2'] = self.layers['L2'].db
        grads['W3'] = self.layers['L3'].dW
        grads['b3'] = self.layers['L3'].db
        
        self.layers = OrderedDict(reversed(list(self.layers.items())))
        
        return grads
    
    def gradientdescent(self, grads, learning_rate):
        self.params['W1'] -= learning_rate*grads['W1']
        self.params['W2'] -= learning_rate*grads['W2']
        self.params['W3'] -= learning_rate*grads['W3']
        self.params['b1'] -= learning_rate*grads['b1']
        self.params['b2'] -= learning_rate*grads['b2']
        self.params['b3'] -= learning_rate*grads['b3']
        

In [11]:
def batchOptimization(dataset, ThreeLayerNet, learning_rate, epoch=1000):
    train_acc_list = []
    test_acc_list = []
    Loss_list = []
    for i in range(epoch+1):
        Loss = ThreeLayerNet.forward(dataset['train_X'],dataset['one_hot_train'])
        grads = ThreeLayerNet.backpropagation(dataset['train_X'],dataset['one_hot_train'])
        ThreeLayerNet.gradientdescent(grads,learning_rate)
        #코드 작성
        if i % 10 == 0:
            train_acc = ThreeLayerNet.accuracy(dataset['train_X'], dataset['one_hot_train'])
            test_acc = ThreeLayerNet.accuracy(dataset['test_X'], dataset['one_hot_test'])
            print(i, '\t번째 Loss = ', Loss)
            print(i, '\t번째 Train_Accuracy : ', train_acc)
            print(i, '\t번째 Test_Accuracy : ', test_acc)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            Loss_list.append(Loss)        
   
    return ThreeLayerNet, train_acc_list, test_acc_list, Loss_list

In [12]:
def minibatch_Optimization(dataset, ThreeLayerNet, learning_rate, epoch=100, batch_size=100):    
    train_acc_list = []
    test_acc_list = []
    Loss_list = []
    np.random.seed(5)
    for i in range(epoch+1):
        s= np.arange(dataset['train_X'].shape[0])
        np.random.shuffle(s)
        dataset['train_X'] = dataset['train_X'][s]
        dataset['one_hot_train'] = dataset['one_hot_train'][s]
        
        for j in range(int(dataset['train_X'].shape[0] / batch_size)):
            first_idx = j*batch_size
            Loss = ThreeLayerNet.forward(dataset['train_X'][first_idx:first_idx+batch_size,:],dataset['one_hot_train'][first_idx:first_idx+batch_size,:])
            grads = ThreeLayerNet.backpropagation(dataset['train_X'][first_idx:first_idx+batch_size,:],dataset['one_hot_train'][first_idx:first_idx+batch_size,:])
            ThreeLayerNet.gradientdescent(grads,learning_rate)
        # 코드 작성
        
        if i % 10 == 0:
            train_acc = ThreeLayerNet.accuracy(dataset['train_X'], dataset['one_hot_train'])
            test_acc = ThreeLayerNet.accuracy(dataset['test_X'], dataset['one_hot_test'])
            print(i, '\t번째 Loss = ', Loss)
            print(i, '\t번째 Train_Accuracy : ', train_acc)
            print(i, '\t번째 Test_Accuracy : ', test_acc)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            Loss_list.append(Loss)  

    return ThreeLayerNet, train_acc_list, test_acc_list, Loss_list

In [13]:
def dropout_use_Optimizer(dataset, ThreeLayerNet, learning_rate, epoch, kill_n_h1 = 0.25, kill_n_h2 = 0.15):
    train_acc_list = []
    test_acc_list = []
    Loss_list = []
    for i in range(epoch+1):
        Loss = ThreeLayerNet.drop_forward(dataset['train_X'],dataset['one_hot_train'],kill_n_h1,kill_n_h2)
        grads = ThreeLayerNet.backpropagation(dataset['train_X'],dataset['one_hot_train'])
        ThreeLayerNet.gradientdescent(grads,learning_rate)
        #코드 작성
        
        if i % 10 == 0:
            train_acc = ThreeLayerNet.accuracy(dataset['train_X'], dataset['one_hot_train'])
            test_acc = ThreeLayerNet.accuracy(dataset['test_X'], dataset['one_hot_test'])
            print(i, '\t번째 Loss = ', Loss)
            print(i, '\t번째 Train_Accuracy : ', train_acc)
            print(i, '\t번째 Test_Accuracy : ', test_acc)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            Loss_list.append(Loss)  
    return ThreeLayerNet, train_acc_list, test_acc_list, Loss_list

In [14]:
#과제 채점을 위한 세팅
train_X, train_label, test_X, test_label = train_test_split(mnist)

one_hot_train = one_hot_encoding(train_label)
one_hot_test = one_hot_encoding(test_label)

dataset = {}
dataset['train_X'] = train_X
dataset['test_X'] = test_X
dataset['one_hot_train'] = one_hot_train
dataset['one_hot_test'] = one_hot_test

neournlist = [784, 60, 30, 10]

TNN_batchOptimizer = ThreeLayerNet(neournlist)
TNN_minibatchOptimizer = copy.deepcopy(TNN_batchOptimizer)
TNN_dropout = copy.deepcopy(TNN_minibatchOptimizer)

In [15]:
#채점은 이 것의 결과값으로 할 예정입니다. 
print(20*'-','BATCH',20*'-')
# trained_batch, tb_train_acc_list, tb_test_acc_list, tb_loss_list =  batchOptimization(dataset, TNN_batchOptimizer, 0.1, 1000)
print(20*'-','MINI_BATCH',20*'-')
#trained_minibatch, tmb_train_acc_list, tmb_test_acc_list, tb_loss_list = minibatch_Optimization(dataset, TNN_minibatchOptimizer, 0.1, epoch=100, batch_size=100)
trained_dropout, td_train_acc_list, td_test_acc_list, td_loss_list = dropout_use_Optimizer(dataset, TNN_dropout, 0.1, 1000, 0.25, 0.15)

-------------------- BATCH --------------------
-------------------- MINI_BATCH --------------------
0 	번째 Loss =  2.3293901072622063
0 	번째 Train_Accuracy :  0.114
0 	번째 Test_Accuracy :  0.1015
10 	번째 Loss =  2.1292754228173814
10 	번째 Train_Accuracy :  0.32675
10 	번째 Test_Accuracy :  0.296
20 	번째 Loss =  1.8981063507988134
20 	번째 Train_Accuracy :  0.442125
20 	번째 Test_Accuracy :  0.444
30 	번째 Loss =  1.6637026338858005
30 	번째 Train_Accuracy :  0.558625
30 	번째 Test_Accuracy :  0.587
40 	번째 Loss =  1.4334236608181121
40 	번째 Train_Accuracy :  0.66925
40 	번째 Test_Accuracy :  0.7055
50 	번째 Loss =  1.265123271740499
50 	번째 Train_Accuracy :  0.734875
50 	번째 Test_Accuracy :  0.7775
60 	번째 Loss =  1.133680073986904
60 	번째 Train_Accuracy :  0.774
60 	번째 Test_Accuracy :  0.8165
70 	번째 Loss =  1.0322740012241796
70 	번째 Train_Accuracy :  0.799375
70 	번째 Test_Accuracy :  0.846
80 	번째 Loss =  0.9605230321497109
80 	번째 Train_Accuracy :  0.818
80 	번째 Test_Accuracy :  0.858
90 	번째 Loss =  0.901238691860

810 	번째 Loss =  0.39820482974215193
810 	번째 Train_Accuracy :  0.931625
810 	번째 Test_Accuracy :  0.94
820 	번째 Loss =  0.40095096500996025
820 	번째 Train_Accuracy :  0.932
820 	번째 Test_Accuracy :  0.9395
830 	번째 Loss =  0.4035237189306093
830 	번째 Train_Accuracy :  0.9325
830 	번째 Test_Accuracy :  0.94
840 	번째 Loss =  0.388809974716343
840 	번째 Train_Accuracy :  0.9325
840 	번째 Test_Accuracy :  0.94
850 	번째 Loss =  0.3965149452037241
850 	번째 Train_Accuracy :  0.9335
850 	번째 Test_Accuracy :  0.9415
860 	번째 Loss =  0.38067245826270135
860 	번째 Train_Accuracy :  0.933625
860 	번째 Test_Accuracy :  0.941
870 	번째 Loss =  0.38585050655810643
870 	번째 Train_Accuracy :  0.93375
870 	번째 Test_Accuracy :  0.9405
880 	번째 Loss =  0.3717529717474842
880 	번째 Train_Accuracy :  0.93425
880 	번째 Test_Accuracy :  0.9405
890 	번째 Loss =  0.37469094313097484
890 	번째 Train_Accuracy :  0.935125
890 	번째 Test_Accuracy :  0.942
900 	번째 Loss =  0.38317407911316353
900 	번째 Train_Accuracy :  0.93475
900 	번째 Test_Accuracy :  0.