In [1]:
# 당뇨병 진단 2진 분류 모델

import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [2]:
# 데이터 불러오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape  # (759, 9)

# 학습용 데이터 분리 : 70%, 531개
x_train = xy[:531,:-1]   # X
y_train = xy[:531,[-1]]  # Y
print(x_train.shape,y_train.shape)  # (531,8)  (531,1)

# 검증용 데이터 분리 : 30%, 228개 
x_test = xy[531:,:-1]    # X
y_test = xy[531:,[-1]]   # Y
print(x_test.shape,y_test.shape)    # (228,8)  (228,1)

(531, 8) (531, 1)
(228, 8) (228, 1)


In [3]:
759*0.7

531.3

In [4]:
759-531

228

In [5]:
# 변수 초기화 : weight, bias
#   X   *   W   = Y
# (m,n) * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (531,8) * (8,1) = (531,1)
W = tf.Variable(tf.random.normal([8,1]),name='weight')
b = tf.Variable(tf.random.normal([1]),name='bias')
print(W)
print(b)

<tf.Variable 'weight:0' shape=(8, 1) dtype=float32, numpy=
array([[-0.18030666],
       [-0.95028627],
       [-0.03964049],
       [-0.7425406 ],
       [ 1.3231523 ],
       [-0.61854804],
       [ 0.8540664 ],
       [-0.08899953]], dtype=float32)>
<tf.Variable 'bias:0' shape=(1,) dtype=float32, numpy=array([0.22652863], dtype=float32)>


In [6]:
# 예측 함수(hypothesis) : H(X1,X2,X3) = W1*X1 + W2*X2 + W3*X3 + b
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  # 0과 1사이의 값이 출력

In [7]:
# 비용 함수 : logloss, 2진분류모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                          (1 - y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [8]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [9]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])
    if step % 100 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
print('***** Learning Finished!!') 

***** Start Learning!!
0000 cost:[ 0.8006097 ]  W: [[-0.19030644]
 [-0.9402895 ]
 [-0.02964124]
 [-0.7525403 ]
 [ 1.3131526 ]
 [-0.6285471 ]
 [ 0.8440665 ]
 [-0.09899934]]  b: [0.23652852]
0100 cost:[ 0.527278 ]  W: [[-0.6510297 ]
 [-1.9078268 ]
 [ 0.23280826]
 [-1.2235696 ]
 [ 0.70609117]
 [-1.4086562 ]
 [ 0.3165267 ]
 [-0.42911762]]  b: [0.64593804]
0200 cost:[ 0.50196886 ]  W: [[-0.700372  ]
 [-2.6286895 ]
 [ 0.16853541]
 [-1.2493114 ]
 [ 0.43074816]
 [-1.8856583 ]
 [ 0.0790083 ]
 [-0.26116478]]  b: [0.66892827]
0300 cost:[ 0.4933479 ]  W: [[-0.7115045 ]
 [-3.0237334 ]
 [ 0.19624364]
 [-1.1704351 ]
 [ 0.2555695 ]
 [-2.166043  ]
 [-0.1320182 ]
 [-0.10620078]]  b: [0.6788835]
0400 cost:[ 0.48995656 ]  W: [[-7.1795529e-01]
 [-3.2340927e+00]
 [ 2.1906522e-01]
 [-1.0466375e+00]
 [ 1.3929123e-01]
 [-2.3197508e+00]
 [-3.0422193e-01]
 [ 1.3162857e-03]]  b: [0.6713181]
0500 cost:[ 0.48841515 ]  W: [[-0.7261078 ]
 [-3.3358548 ]
 [ 0.22019202]
 [-0.9183983 ]
 [ 0.05383505]
 [-2.4010112 ]
 [-0.

In [10]:
# weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-0.76468706]
 [-3.331145  ]
 [ 0.24389529]
 [-0.5480679 ]
 [-0.21599688]
 [-2.5523162 ]
 [-0.8651065 ]
 [ 0.11381915]]
Bias: [0.3824031]


In [11]:
# 평가 : 정확도(accuracy)
def predict(X):
    return tf.cast(hypothesis(X) > 0.5 ,dtype=tf.float32)

# 예측
preds = predict(x_test)   

# 정확도
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))
print('Accuracy:',accuracy.numpy())
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.78070176
