In [1]:
import numpy as np

In [2]:
# y = x + 2 (W = 1, b = 2)
x_data = np.array([1, 2, 3, 4, 5]).reshape(5, 1)
t_data = np.array([3, 4, 5, 6, 7]).reshape(5, 1)

In [3]:
W = np.random.rand(1, 1)
b = np.random.rand(1)
print("W =", W, ", b =", b)

W= [[0.22078639]] b= [0.41146477]


In [4]:
def loss_func(x, t):
    y = np.dot(x, W) + b
    return (np.sum((t - y)**2))/len(x)

In [5]:
def numerical_derivative(f, x):
    delta_x = 1e-4
    grad = np.zeros_like(x)
#     print("initial x =", x)
#     print("initial grad =", grad)
#     print("="*50)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
#         print("idx =", idx, ", x[idx] =", x[idx])
        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x)
        
        x[idx] = float(tmp_val) - delta_x
        fx2 = f(x)
        
        grad[idx] = (fx1 - fx2) / (2 * delta_x)
#         print("grad[idx] =", grad[idx])
#         print("grad =", grad)
#         print("="*50)
        
        x[idx] = tmp_val
        it.iternext()

    return grad

In [6]:
def predict(x):
    y = np.dot(x, W) + b
    return y

In [7]:
learning_rate = 1e-2
f = lambda x: loss_func(x_data, t_data)
print("initial error value =", loss_func(x_data, t_data), ", initial W =", W, ", initial b =", b)

for step in range(10001):
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)
    
    if step % 400 == 0:  # 400번마다 출력
        print("step =", step, ", error value =", loss_func(x_data, t_data), ", W =", W, ", b =", b)

initial error value = 16.629206143955948 , initial W = [[0.22078639]] , initial b = [0.41146477]
step = 0 , error value = 9.909922170322227 , W = [[0.4875255]] , b = [0.47398394]
step = 400 , error value = 0.02070913644619054 , W = [[1.09344966]] , b = [1.66269965]
step = 800 , error value = 0.00132136533711885 , W = [[1.02360523]] , b = [1.9147985]
step = 1200 , error value = 8.43109203841485e-05 , W = [[1.00596264]] , b = [1.97847824]
step = 1600 , error value = 5.379535164377093e-06 , W = [[1.00150615]] , b = [1.99456364]
step = 2000 , error value = 3.432461471525388e-07 , W = [[1.00038045]] , b = [1.99862678]
step = 2400 , error value = 2.1901133450038304e-08 , W = [[1.0000961]] , b = [1.99965313]
step = 2800 , error value = 1.3974217930143682e-09 , W = [[1.00002428]] , b = [1.99991238]
step = 3200 , error value = 8.916377191454847e-11 , W = [[1.00000613]] , b = [1.99997787]
step = 3600 , error value = 5.689175780338234e-12 , W = [[1.00000155]] , b = [1.99999441]
step = 4000 , erro

In [8]:
predict(10)  # y = x + 2

array([[12.]])