<a href="https://colab.research.google.com/github/WOOZi-n/DeepLearning-From-Scratch/blob/main/DLFS_chap4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [96]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version = 1, as_frame = False)

from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse = False)
mnist.target = ohe.fit_transform(mnist.target.reshape(-1,1))


In [52]:
import numpy as np
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(mnist.data, mnist.target)

In [53]:
train_size = X_train.shape[0]
batch_size = [10]
batch_mask = np.random.choice(train_size, batch_size)
x_batch = X_train[batch_mask]
y_batch = y_train[batch_mask]

In [54]:
def cross_entropy_error(y,t):
  if y.ndim == 1:
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)
  
  batch_size = y.shape[0]
  return -np.sum(t*np.log(y + 1e-7))/batch_size

In [55]:
# 수치미분
def numerical_diff(f,x):
  h = 1e-4 # 반올림오차문제 고려한 값
  return (f(x+h)- f(x-h))/(2*h) # 선형근사로 인한 오차를 줄임

In [56]:
# 편미분
def function_2(x):
  return np.sum(x**2)

In [57]:
# 기울기벡터 구하기

def numerical_gradient(f,x):
  h = 1e-4
  grads = np.zeros_like(x)
  for idx in range(x.size): # 각 변수별
    tmp_val = x[idx] # 값 저장
    x[idx] = tmp_val + h
    fxh1 = f(x)

    x[idx] = tmp_val - h
    fxh2 = f(x)

    grads[idx] = (fxh1 - fxh2) / (2*h)
    x[idx] = tmp_val
  
  return grads


In [58]:
# 기울기벡터 구하기(다차원 변수 )

def numerical_gradient(f,x):
  h = 1e-4
  grads = np.zeros_like(x)
  for a in range(x.shape[0]): # 세로
    for b in range(x.shape[1]):
      tmp_val = x[a][b] # 값 저장
      x[a][b] = tmp_val + h
      fxh1 = f(x)

      x[a][b] = tmp_val - h
      fxh2 = f(x)

      grads[a][b] = (fxh1 - fxh2) / (2*h)
      x[a][b] = tmp_val
  
  return grads


In [59]:
# 경사하강법 구현

def gradient_descent(f, init_x, lr = 0.01, step_num = 100):
  x = init_x
  for i in range(step_num):
    grad = numerical_gradient(f, x)
    x -= lr*grad
  return x

In [61]:
# init_x = np.array([-3.0, 4.0])
# gradient_descent(function_2, init_x)

In [62]:
def softmax(x):
  c = np.max(x)
  exp_x = np.exp(x-c)
  sum_x = np.sum(exp_x)
  y = exp_x / sum_x
  return y

def cross_entropy_error(y,t):
  delta = 1e-7
  return -np.sum(t*np.log(y + delta))

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

In [63]:
class simpleNet:
  def __init__(self):
    self.W = np.random.randn(2,3)

  def predict(self, x):
    return np.dot(x, self.W)
  
  def loss(self, x, t):
    z = self.predict(x)
    y = softmax(z)
    loss = cross_entropy_error(y,t) 
    return loss

In [64]:
net = simpleNet()

x = np.array([0.6, 0.9])
p = net.predict(x)
np.argmax(p)
t = np.array([1,0,0])
net.loss(x, t)

1.3970162414994172

In [65]:
def f(W): 
  return net.loss(x,t)

dW = numerical_gradient(f, net.W)
print(dW)

[[-0.45159957  0.21463168  0.23696789]
 [-0.67739935  0.32194752  0.35545183]]


In [77]:
class TwoLayerNet:
  def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeros(output_size)

  def predict(self, x):
    W1, W2 = self.params['W1'], self.params['W2']
    b1, b2=  self.params['b1'], self.params['b2']

    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)

    return y

  def loss(self, x, t):
    y = self.predict(x)
    return cross_entropy_error(y,t)

  def accuracy(self, x ,t):
    y = self.predict(x)
    y = np.argmax(y, axis = 1)
    t = np.argmax(t, axis = 1)

    accuracy = np.sum(y==t) / float(x.shape[0])
    return accuracy

  def numerical_gradient(self, x,t):
    loss_W = lambda W: self.loss(x,t)
    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

    return grads
    

In [94]:
# 미니배치학습구현

iters_num = 10000
train_size = X_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)
train_loss_list = []

for i in range(iters_num):
  batch_mask = np.random.choice(train_size, batch_size)
  X_batch = X_train[batch_mask]
  y_batch = y_train[batch_mask]

  grad = network.numerical_gradient(X_batch,y_train_encoded)
  
  for key in ('W1','b1','W2','b2'):
    network.params[key] -= learning_rate*grad[key]

  loss = network.loss(x_batch, y_batch)
  train_loss_list.append(loss)

IndexError: ignored

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]]
