In [1]:
# 03_logistic_regression_diabetes

import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [2]:
# 데이터셋 가져오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape   # (759, 9)

# 학습 데이터 분리 : 70% 
x_train = xy[:556,:-1]
y_train = xy[:556,[-1]]
x_train.shape  # (556, 8)
y_train.shape  # (556, 1)

# 검증 데이터 분리 : 30%
x_test = xy[556:,:-1]
y_test = xy[556:,[-1]]
x_test.shape  # (203, 8)
y_test.shape  # (203, 1)

(203, 1)

In [3]:
# 변수 초기화 : weight, bias
# (m,n) * (n,l)  = (m,l)  , 행렬의 내적 곱셈 공식
# (556,8) * (8,1)  = (556,1)

W = tf.Variable(tf.random.normal([8,1]), name ='weight')
b = tf.Variable(tf.random.normal([1]), name = 'bias')

In [4]:
# hypothesis 예측 함수 : H(X) = sigmoid(W*X + b)
# tf.sigmoid() : tf.div(1. , 1. + tf.exp(-(tf.matmul(X,W) + b)))
def hypothesis(X):
    return   tf.sigmoid(tf.matmul(X,W) + b)  # 0 ~ 1 사이의 값을 출력

In [5]:
# 비용 함수 : logloss ,2진 분류모델
def cost_func():
    # cost = tf.reduce_mean(tf.square(hypothesis(x_train) - y_train)) #회귀모델
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [6]:
# 경사 하강법
# learning_rate(학습율) 을 0.01로 설정하여 optimizer 객체를 생성
optimizer = tf.keras.optimizers.Adam(lr=0.01)

In [7]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    # cost를 minimize한다
    optimizer.minimize(cost_func,var_list=[W,b])
    
    if step % 1000 == 0:
        print('%04d'%step,'cost: [',cost_func().numpy(),']',
             'W:',W.numpy(),'b:',b.numpy())

print('***** Learning Finished')

***** Start Learning!!
0000 cost: [ 0.80423117 ] W: [[-0.19030644]
 [-0.9402894 ]
 [-0.02964122]
 [-0.7525403 ]
 [ 1.3131526 ]
 [-0.6285471 ]
 [ 0.8440665 ]
 [-0.09899934]] b: [0.23652852]
1000 cost: [ 0.48396263 ] W: [[-0.7941301 ]
 [-3.3146176 ]
 [ 0.2550522 ]
 [-0.5946785 ]
 [-0.1666643 ]
 [-2.6123285 ]
 [-0.85612226]
 [ 0.10218763]] b: [0.3757753]
2000 cost: [ 0.4838197 ] W: [[-0.80596256]
 [-3.2786446 ]
 [ 0.3001422 ]
 [-0.5670278 ]
 [-0.2191708 ]
 [-2.6450553 ]
 [-0.9890654 ]
 [ 0.08347502]] b: [0.24310307]
3000 cost: [ 0.4838197 ] W: [[-0.8061921 ]
 [-3.2781093 ]
 [ 0.30098844]
 [-0.5677529 ]
 [-0.2194896 ]
 [-2.6450932 ]
 [-0.99144965]
 [ 0.08311543]] b: [0.24059941]
4000 cost: [ 0.4838197 ] W: [[-0.8061921 ]
 [-3.2781086 ]
 [ 0.30098918]
 [-0.5677529 ]
 [-0.21949011]
 [-2.645095  ]
 [-0.9914505 ]
 [ 0.08311494]] b: [0.24059796]
5000 cost: [ 0.4838197 ] W: [[-0.80619204]
 [-3.2781074 ]
 [ 0.30098864]
 [-0.5677518 ]
 [-0.2194908 ]
 [-2.6450956 ]
 [-0.99145085]
 [ 0.08311501]] b:

In [8]:
# 회귀 계수 : weight과 bias출력
print('Weight :',W.numpy())
print('Bias:',b.numpy())

Weight : [[-0.806192  ]
 [-3.2781074 ]
 [ 0.30098942]
 [-0.5677526 ]
 [-0.21949084]
 [-2.645097  ]
 [-0.99145204]
 [ 0.08311433]]
Bias: [0.24059618]


In [11]:
# 정확도 측정 : accuracy computation
# 학습과 검증데이터를 별도로 사용한 경우 (70%:30%)
def predict(X):
    return tf.cast(hypothesis(X)>0.5,dtype=tf.float32 ) # 타입 변환함수 사용

preds = predict(x_test)  
accuarcy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuarcy.numpy())  # Accuracy: 0.7881773
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.7881773
