In [1]:
# 05_scores_linear_regression

import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [2]:
# 데이터 불러 오기
xy = np.loadtxt('data-02-test-score.csv',delimiter=',',skiprows=1, dtype=np.float32)

# X,Y
x_train = xy[:,:-1]  # X, 마지막 컬럼을 제외
y_train = xy[:,[-1]] # Y, 마지막 컬럼만 2차원으로 추출
print(x_train.shape,y_train.shape)

(25, 3) (25, 1)


In [3]:
# 변수 초기화 : weight, bias
#   X   *   W   = Y
# (m,n) * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (25,3) * (3,1) = (25,1)
W = tf.Variable(tf.random.normal([3,1]),name='weight')
b = tf.Variable(tf.random.normal([1]),name='bias')
print(W)
print(b)

<tf.Variable 'weight:0' shape=(3, 1) dtype=float32, numpy=
array([[-0.18030666],
       [-0.95028627],
       [-0.03964049]], dtype=float32)>
<tf.Variable 'bias:0' shape=(1,) dtype=float32, numpy=array([0.22652863], dtype=float32)>


In [4]:
# 예측 함수(hypothesis) : H(X1,X2,X3) = W1*X1 + W2*X2 + W3*X3 + b
def hypothesis(X):
    return tf.matmul(X,W) + b  # 내적 곱셈

In [5]:
# 비용 함수 : (Hx - y)^2 의 평균
# tf.square()      : 제곱
# tf.reduce_mean() : 합의 평균
def cost_func():
    cost = tf.reduce_mean(tf.square(hypothesis(x_train) - y_train))
    return cost

In [6]:
# 경사 하강법
# optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
# optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.01)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [7]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])
    if step % 100 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
print('***** Learning Finished!!') 

***** Start Learning!!
0000 cost:[ 65066.566 ]  W: [[-0.17030665]
 [-0.9402863 ]
 [-0.02964049]]  b: [0.23652864]
0100 cost:[ 4095.1235 ]  W: [[ 0.61485434]
 [-0.15491977]
 [ 0.7556198 ]]  b: [1.0216748]
0200 cost:[ 47.590633 ]  W: [[0.85247976]
 [0.08370257]
 [0.99372613]]  b: [1.2592261]
0300 cost:[ 12.429815 ]  W: [[0.8754433 ]
 [0.10827628]
 [1.0174667 ]]  b: [1.2820694]
0400 cost:[ 12.382455 ]  W: [[0.87506783]
 [0.10988351]
 [1.0180485 ]]  b: [1.2815446]
0500 cost:[ 12.352392 ]  W: [[0.87393206]
 [0.11105164]
 [1.0180255 ]]  b: [1.2802325]
0600 cost:[ 12.318476 ]  W: [[0.8726458 ]
 [0.11237431]
 [1.0180017 ]]  b: [1.2787429]
0700 cost:[ 12.280793 ]  W: [[0.87121135]
 [0.11384594]
 [1.0179778 ]]  b: [1.2770784]
0800 cost:[ 12.239459 ]  W: [[0.86963135]
 [0.11546459]
 [1.017954  ]]  b: [1.2752405]
0900 cost:[ 12.194555 ]  W: [[0.8679064 ]
 [0.11722923]
 [1.0179302 ]]  b: [1.2732284]
1000 cost:[ 12.146095 ]  W: [[0.8660364 ]
 [0.11914004]
 [1.0179063 ]]  b: [1.2710406]
1100 cost:[ 1

In [8]:
# weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[0.35714296]
 [0.5368131 ]
 [1.147865  ]]
Bias: [-2.3444345]


In [9]:
# 예측
print('***** Predict')
x_data = [[73.,80.,75.],
          [93.,88.,93.],
          [89.,91.,90.],
          [96.,98.,100.],
          [73.,66.,70.]]
x_test = np.array(x_data,dtype=np.float32)
print(hypothesis(x_test).numpy())
# 원본
# 73,80,75,152
# 93,88,93,185
# 89,91,90,180
# 96,98,100,196
# 73,66,70,142

***** Predict
[[152.76192]
 [184.86086]
 [181.59914]
 [199.33546]
 [139.50722]]


In [10]:
# 정확도 측정 : RMSE(Root Mean Squared Error)
def get_rmse(y_test,preds):
    squared_error = 0
    for k,_ in enumerate(y_test):
        squared_error += (preds[k] - y_test[k])**2
    mse = squared_error/len(y_test)  
    rmse = np.sqrt(mse)
    return rmse[0]

# 학습한 데이터를 그대로 검증 데이터로 사용한 경우
x_test = x_train
y_test = y_train

preds = hypothesis(x_test).numpy()
print('RMSE:',get_rmse(y_test,preds))  # RMSE: 2.4112918 

RMSE: 2.4112928
