In [1]:
import numpy as np

# Generate Dataset

In [2]:
x1 = np.random.uniform(low=0.0, high=1.0, size=100)

print(x1.shape)
x1[0:10]

(100,)


array([0.3245415 , 0.33995259, 0.53145313, 0.48824006, 0.76165161,
       0.23689749, 0.15166329, 0.23062371, 0.86347579, 0.47518454])

In [3]:
x2 = np.random.uniform(low=0.0, high=1.0, size=100)

print(x2.shape)
x2[0:10]

(100,)


array([0.79509667, 0.19750426, 0.98259336, 0.95847136, 0.52344763,
       0.46033852, 0.23068875, 0.64698783, 0.78827999, 0.18724888])

In [4]:
y = 0.3 * x1 + 0.5 * x2 + 0.1

print(y.shape)
y[0:10]

(100,)


array([0.59491078, 0.30073791, 0.75073262, 0.7257077 , 0.5902193 ,
       0.40123851, 0.26084336, 0.49268103, 0.75318273, 0.3361798 ])

### x와 y가 만들어진 공식을 알려주지 않고, 데이터만으로 공식을 얻어낼수 있는지 해보자

# 1. Random Search

In [8]:
num_epoch = 100000

best_error = 9999
best_w1 = None
best_w2 = None
best_b = None
best_epoch = None

for epoch in range(num_epoch):
    w1 = np.random.uniform(low=-1.0, high=1.0)
    w2 = np.random.uniform(low=-1.0, high=1.0)
    b = np.random.uniform(low=-1.0, high=1.0)
    
    y_predict = w1 * x1 + w2 * x2 + b
    
    error = np.abs(y_predict - y).mean()
    
    if error < best_error:
        best_error = error
        best_w1 = w1
        best_w2 = w2
        best_b = b
        best_epoch = epoch
        
        print("{} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, error))
        
        
print("---" * 10)
print("{} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, best_w1, best_w2, best_b, best_error))

0 w1 = -0.352231, w2 = 0.621201, b = -0.255448, error = 0.650663
3 w1 = -0.057070, w2 = -0.313805, b = 0.456973, error = 0.278353
6 w1 = 0.566324, w2 = 0.637105, b = -0.097082, error = 0.069649
271 w1 = 0.263510, w2 = 0.350016, b = 0.144773, error = 0.052909
338 w1 = 0.127338, w2 = 0.468527, b = 0.205609, error = 0.041807
2345 w1 = 0.309459, w2 = 0.574841, b = 0.078705, error = 0.023484
2440 w1 = 0.358304, w2 = 0.485578, b = 0.061667, error = 0.016694
12041 w1 = 0.329047, w2 = 0.485228, b = 0.102986, error = 0.012508
21494 w1 = 0.267561, w2 = 0.509483, b = 0.123583, error = 0.011278
34046 w1 = 0.314225, w2 = 0.522836, b = 0.081002, error = 0.006045
------------------------------
99999 w1 = 0.314225, w2 = 0.522836, b = 0.081002, error = 0.006045


# h-step Search

### w값을 보정할때, h값 만큼 이동(변경)하면서 값을 찾는다

In [21]:
num_epoch = 10000
h = 0.01

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = w1 * x1 + w2 * x2 + b
    current_error = np.abs(y_predict - y).mean()
    
    # w1 보정
    y_predict = (w1 + h) * x1 + w2 * x2 + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w1 = w1 + h
    else:
        y_predict = (w1 - h) * x1 + w2 * x2 + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w1 = w1 - h
            
#     print("{}, w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, current_error))
            
    # w2 보정
    y_predict = w1 * x1 + (w2 + h) * x2 + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w2 = w2 + h
    else:
        y_predict = w1 * x1 + (w2 - h) * x2 + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w2 = w2 - h
#     print("{}, w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, current_error))
    
    # b 보정
    y_predict = w1 * x1 + w2 * x2 + (b + h)
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        b = b + h
    else:
        y_predict = w1 * x1 + w2 * x2 + (b + h)
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            b = b - h
            
#     print("{}, w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, current_error))
            
print("---" * 10)
print("{}, w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, current_error))
            

------------------------------
9999, w1 = 0.306322, w2 = 0.525124, b = 0.082362, error = 0.006236


### 결론 : h(이동하는 정도) 값에 따라서 찾는 시간이 달라진다


# 3. Gradient Descent(not yet)

### w값을 보정하는 정도를 예측값과 실제값의 차이가 클때, 이동(변경)값이 크게, 작을때는 작게 적용한다

### h값을 스스로 보정한다

In [23]:
num_epoch = 100
learning_rate = 1.2

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = w1 * x1 + w2 * x2 + b
    
    error = np.abs(y_predict - y).mean()
    if error < 0.005:
        break
        
    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * ((y_predict - y)).mean()
    
    print("{}, w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}". format(epoch, w1, w2, b, error))
    
print("----" * 10)
print("{}, w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}". format(epoch, w1, w2, b, error))

0, w1 = -0.569994, w2 = 0.407021, b = 0.989851, error = 0.405637
1, w1 = -0.732058, w2 = 0.190066, b = 0.548336, error = 0.368741
2, w1 = -0.461240, w2 = 0.378313, b = 0.873860, error = 0.334978
3, w1 = -0.586780, w2 = 0.207291, b = 0.517597, error = 0.299550
4, w1 = -0.366690, w2 = 0.359928, b = 0.774397, error = 0.277743
5, w1 = -0.463523, w2 = 0.225408, b = 0.486627, error = 0.245741
6, w1 = -0.284409, w2 = 0.349400, b = 0.688873, error = 0.231368
7, w1 = -0.358721, w2 = 0.243861, b = 0.456166, error = 0.204679
8, w1 = -0.212737, w2 = 0.344786, b = 0.615140, error = 0.194580
9, w1 = -0.269417, w2 = 0.262227, b = 0.426728, error = 0.171919
10, w1 = -0.150243, w2 = 0.344554, b = 0.551410, error = 0.164655
11, w1 = -0.193153, w2 = 0.280196, b = 0.398657, error = 0.145180
12, w1 = -0.095698, w2 = 0.347507, b = 0.496193, error = 0.140006
13, w1 = -0.127885, w2 = 0.297542, b = 0.372171, error = 0.123412
14, w1 = -0.048046, w2 = 0.352712, b = 0.448241, error = 0.119582
15, w1 = -0.071910, 