In [1]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [2]:
# 데이터 불러 오기
xy_train = np.loadtxt('boston_train.csv',delimiter=',',skiprows=1, dtype=np.float32)
xy_test = np.loadtxt('boston_test.csv',delimiter=',',skiprows=1, dtype=np.float32)

# X,Y
x_train = xy_train[:,:-1]  # X, 마지막 컬럼을 제외
y_train = xy_train[:,[-1]] # Y, 마지막 컬럼만 2차원으로 추출
x_test = xy_test[:,:-1]  
y_test = xy_test[:,[-1]]
print(x_train.shape,y_train.shape)
print(x_test.shape,y_test.shape)

(400, 9) (400, 1)
(100, 9) (100, 1)


In [3]:
# 변수 초기화 : weight, bias
#   X   *   W   = Y
# (m,n) * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (400,9) * (9,1) = (400,1)
W = tf.Variable(tf.random.normal([9,1]),name='weight')
b = tf.Variable(tf.random.normal([1]),name='bias')
print(W)
print(b)

<tf.Variable 'weight:0' shape=(9, 1) dtype=float32, numpy=
array([[-0.18030666],
       [-0.95028627],
       [-0.03964049],
       [-0.7425406 ],
       [ 1.3231523 ],
       [-0.61854804],
       [ 0.8540664 ],
       [-0.08899953],
       [ 2.4488697 ]], dtype=float32)>
<tf.Variable 'bias:0' shape=(1,) dtype=float32, numpy=array([0.22652863], dtype=float32)>


In [4]:
# 예측 함수(hypothesis) : H(X1,X2,X3) = W1*X1 + W2*X2 + W3*X3 + b
def hypothesis(X):
    return tf.matmul(X,W) + b  # 내적 곱셈

In [5]:
# 비용 함수 : (Hx - y)^2 의 평균
# tf.square()      : 제곱
# tf.reduce_mean() : 합의 평균
def cost_func():
    cost = tf.reduce_mean(tf.square(hypothesis(x_train) - y_train))
    return cost

In [6]:
# 경사 하강법
# optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
# optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.01)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [7]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])
    if step % 100 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
print('***** Learning Finished!!') 

***** Start Learning!!
0000 cost:[ 3380.4307 ]  W: [[-0.17030665]
 [-0.9402863 ]
 [-0.0296405 ]
 [-0.7325406 ]
 [ 1.3331523 ]
 [-0.60854805]
 [ 0.86406636]
 [-0.07899953]
 [ 2.4588697 ]]  b: [0.23652864]
0100 cost:[ 229.13736 ]  W: [[-0.15476926]
 [-0.31377742]
 [ 0.00398452]
 [-0.60582733]
 [ 1.5075017 ]
 [-0.4209615 ]
 [ 0.9844709 ]
 [-0.00817521]
 [ 2.5139847 ]]  b: [0.36212817]
0200 cost:[ 157.43752 ]  W: [[-0.04703178]
 [-0.13201119]
 [ 0.0094742 ]
 [-0.60902196]
 [ 1.4821295 ]
 [-0.27699775]
 [ 0.6235119 ]
 [-0.02565848]
 [ 2.3416998 ]]  b: [0.27564687]
0300 cost:[ 119.14844 ]  W: [[-0.01426036]
 [-0.03600126]
 [ 0.01243847]
 [-0.63644195]
 [ 1.4332793 ]
 [-0.17521016]
 [ 0.19860902]
 [-0.03165422]
 [ 2.127358  ]]  b: [0.15373822]
0400 cost:[ 96.75279 ]  W: [[-0.05111112]
 [ 0.03484004]
 [-0.00284971]
 [-0.6663554 ]
 [ 1.4074287 ]
 [-0.11243326]
 [-0.18458678]
 [-0.03038936]
 [ 1.9173867 ]]  b: [0.04282528]
0500 cost:[ 83.53447 ]  W: [[-0.10528348]
 [ 0.08501689]
 [-0.0339421 ]
 

In [8]:
# weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-1.5909408e-01]
 [ 4.5220152e-02]
 [-1.2285680e-01]
 [-1.1958078e+01]
 [ 6.7748103e+00]
 [-4.5642413e-02]
 [-1.5349679e+00]
 [-1.1495309e-03]
 [-8.0022174e-01]]
Bias: [12.783213]


In [9]:
# 예측
print('***** Predict')

print(hypothesis(x_test).numpy())


***** Predict
[[22.288443]
 [34.018055]
 [23.608814]
 [31.791716]
 [34.398956]
 [25.312082]
 [24.118732]
 [14.349291]
 [28.448723]
 [21.676699]
 [27.11798 ]
 [19.928455]
 [20.617245]
 [42.513123]
 [15.641199]
 [17.402775]
 [27.89333 ]
 [18.62927 ]
 [29.603498]
 [18.413952]
 [18.230865]
 [23.329603]
 [25.594501]
 [19.528465]
 [19.135303]
 [22.500404]
 [31.157026]
 [37.50821 ]
 [24.671635]
 [21.789696]
 [23.428635]
 [22.312906]
 [32.698616]
 [33.714577]
 [34.080162]
 [22.599178]
 [20.925098]
 [39.330246]
 [22.281376]
 [26.387856]
 [23.415722]
 [22.689146]
 [16.055138]
 [21.966604]
 [23.150196]
 [17.388622]
 [17.589294]
 [22.276691]
 [18.132572]
 [20.944283]
 [ 9.122076]
 [16.078356]
 [22.558825]
 [23.031254]
 [26.63078 ]
 [24.53724 ]
 [18.450928]
 [17.728512]
 [28.797588]
 [20.56004 ]
 [20.204235]
 [17.66756 ]
 [24.062855]
 [30.999205]
 [17.158567]
 [16.00632 ]
 [31.998968]
 [24.219425]
 [29.878717]
 [29.502562]
 [27.914314]
 [23.773895]
 [21.313408]
 [21.31078 ]
 [27.811607]
 [21.044306

In [10]:
# 정확도 측정 : RMSE(Root Mean Squared Error)
def get_rmse(y_test,preds):
    squared_error = 0
    for k,_ in enumerate(y_test):
        squared_error += (preds[k] - y_test[k])**2
    mse = squared_error/len(y_test)  
    rmse = np.sqrt(mse)
    return rmse[0]


preds = hypothesis(x_test).numpy()
print('RMSE:',get_rmse(y_test,preds))  # RMSE: 2.4112918 

RMSE: 4.157873
