<a href="https://colab.research.google.com/github/pythonpdnp/MS_kofia/blob/main/10_gradient_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Gradient descent

### 수치미분(numerical_derivative) - Gradient

In [None]:
import numpy as np

In [None]:
# f(x, y) = 2x + 3xy + y^3

def func(X):
  x = X[0]
  y = X[1]

  return (2*x + 3*x*y + np.power(y,3))

In [None]:
X = np.array([1.0, 2.0])
func(X)

16.0

In [None]:
def gradient(f, X, h=1e-5):
    """
    f: 미분할 함수 (함수형)
    X: 점 (numpy 배열)
    h: 작은 값 (차분 간격)
    """
    grad = np.zeros_like(X)  # X와 같은 형상의 배열을 만듭니다.

    # X는 다차원 배열도 가능하게 하고, 수정 가능하게 함
    it = np.nditer(X, flags=['multi_index'], op_flags=['readwrite'])

    while not it.finished:
        idx = it.multi_index
        temp = X[idx] # 현재 X[idx] 값을 저장해 둠

        # f(x+h) 계산
        X[idx] = temp + h
        fxh1 = f(X)

        # f(x-h) 계산
        X[idx] = temp - h
        fxh2 = f(X)

        # 중앙 차분 공식에 의해 gradient 계산
        grad[idx] = (fxh1 - fxh2) / (2 * h)

        # 값을 원래대로 복구
        X[idx] = temp

        # 다음 인덱스로 이동
        it.iternext()

    return grad

In [None]:
gradient(func, X)

array([ 8., 15.])

### 최적화 - simple regression

In [None]:
x_data = np.array([1, 2, 3, 4, 5]).reshape(5,1)
t_data = np.array([2, 3, 4, 5, 6]).reshape(5,1)

In [None]:
W = np.random.rand(1,1)
b = np.random.rand(1)

In [None]:
def loss_func(x, t):
  y = np.dot(x, W) + b

  return (np.sum((t-y)**2)) / len(x)

In [None]:
learning_rate = 1e-2

f = lambda x : loss_func(x_data, t_data) # f(x) = loss_func(x_data, t_data)

for step in range(8000):
  W -= learning_rate * gradient(f, W)
  b -= learning_rate * gradient(f, b)

  if (step % 400 ==0):
    print(f"step = {step}, loss = {loss_func(x_data, t_data)}, W={W}, b={b}")

step = 0, loss = 0.6754743120113933, W=[[0.79380007]], b=[0.85020048]
step = 400, loss = 0.00011450961822949182, W=[[1.00694893]], b=[0.97491831]
step = 800, loss = 7.306390620880875e-06, W=[[1.00175529]], b=[0.99366441]
step = 1200, loss = 4.6619091680016175e-07, W=[[1.00044338]], b=[0.99839964]
step = 1600, loss = 2.9745736600096984e-08, W=[[1.000112]], b=[0.99959575]
step = 2000, loss = 1.897953851103611e-09, W=[[1.00002829]], b=[0.99989789]
step = 2400, loss = 1.2110067635226464e-10, W=[[1.00000715]], b=[0.99997421]
step = 2800, loss = 7.726939091790498e-12, W=[[1.00000181]], b=[0.99999348]
step = 3200, loss = 4.93024395105676e-13, W=[[1.00000046]], b=[0.99999835]
step = 3600, loss = 3.145787106056979e-14, W=[[1.00000012]], b=[0.99999958]
step = 4000, loss = 2.0071981119323005e-15, W=[[1.00000003]], b=[0.99999989]
step = 4400, loss = 1.2807110089166637e-16, W=[[1.00000001]], b=[0.99999997]
step = 4800, loss = 8.171691499141112e-18, W=[[1.]], b=[0.99999999]
step = 5200, loss = 5.214

In [None]:
def predict(X):
  y = np.dot(X, W) + b

  return y

predict(43)

array([[44.]])