In [1]:
import pandas as pd
import numpy as np

In [2]:
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/postoperative-patient-data/post-operative.data"
names =["L-CORE","L-SURF","L-O2","L-BP","SURF-STBL","CORE-STBL","BP-STBL","COMFORT","decision ADM-DECS"]

1. L-CORE (patient's internal temperature in C): 
high (> 37), mid (>= 36 and <= 37), low (< 36) 

2. L-SURF (patient's surface temperature in C):  
high (> 36.5), mid (>= 36.5 and <= 35), low (< 35) 

3. L-O2 (oxygen saturation in %): 
excellent (>= 98), good (>= 90 and < 98), 
fair (>= 80 and < 90), poor (< 80) 

4. L-BP (last measurement of blood pressure): 
high (> 130/90), mid (<= 130/90 and >= 90/70), low (< 90/70) 

5. SURF-STBL (stability of patient's surface temperature): 
stable, mod-stable, unstable 

6. CORE-STBL (stability of patient's core temperature) 
stable, mod-stable, unstable 

7. BP-STBL (stability of patient's blood pressure) 
stable, mod-stable, unstable 

8. COMFORT 
(patient's perceived comfort at discharge, measured as an integer between 0 and 20) 

9. decision ADM-DECS (discharge decision): 
I (patient sent to Intensive Care Unit), 
S (patient prepared to go home), 
A (patient sent to general hospital floor)

In [3]:
df =pd.read_csv(data_url,names=names)
df =df.drop([3,46,48,70]).reset_index() #올바르지 못한 데이터 제거 및 인덱스 Reset
df=df.drop(["index"],axis=1)
df.ix[:10]

Unnamed: 0,L-CORE,L-SURF,L-O2,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT,decision ADM-DECS
0,mid,low,excellent,mid,stable,stable,stable,15,A
1,mid,high,excellent,high,stable,stable,stable,10,S
2,high,low,excellent,high,stable,stable,mod-stable,10,A
3,mid,mid,excellent,high,stable,stable,stable,10,A
4,high,low,good,mid,stable,stable,unstable,15,S
5,mid,low,excellent,high,stable,stable,mod-stable,5,S
6,high,mid,excellent,mid,unstable,unstable,stable,10,S
7,mid,high,good,mid,stable,stable,stable,10,S
8,mid,low,excellent,mid,unstable,stable,mod-stable,10,S
9,mid,mid,good,mid,stable,stable,stable,15,A


In [4]:
names_list=["L-CORE","L-SURF","L-O2","L-BP","SURF-STBL","CORE-STBL","BP-STBL","decision ADM-DECS"]
for column in names_list:
    df[column] =df[column].astype("category")
df["COMFORT"]=df["COMFORT"].astype(np.float32)

#COMFORT를 제외한데이터들의  data type  변경 -->category
#COMFORT --> float32 


In [5]:
x_data_df = df.drop(["decision ADM-DECS"],axis=1)
x_data_df.insert(0, "bias", 1 ) # 절편값
x_data_df.ix[:5,:]

Unnamed: 0,bias,L-CORE,L-SURF,L-O2,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT
0,1,mid,low,excellent,mid,stable,stable,stable,15.0
1,1,mid,high,excellent,high,stable,stable,stable,10.0
2,1,high,low,excellent,high,stable,stable,mod-stable,10.0
3,1,mid,mid,excellent,high,stable,stable,stable,10.0
4,1,high,low,good,mid,stable,stable,unstable,15.0
5,1,mid,low,excellent,high,stable,stable,mod-stable,5.0


In [6]:
x_data = pd.get_dummies(x_data_df[:])
x_data.ix[:5]

Unnamed: 0,bias,COMFORT,L-CORE_high,L-CORE_low,L-CORE_mid,L-SURF_high,L-SURF_low,L-SURF_mid,L-O2_excellent,L-O2_good,...,L-BP_low,L-BP_mid,SURF-STBL_stable,SURF-STBL_unstable,CORE-STBL_mod-stable,CORE-STBL_stable,CORE-STBL_unstable,BP-STBL_mod-stable,BP-STBL_stable,BP-STBL_unstable
0,1,15.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,1,10.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,1,10.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,1,10.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,1,15.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
5,1,5.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [7]:
x_data=x_data.as_matrix()
x_data[:5]

array([[  1.,  15.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,   0.,   0.,
          0.,   1.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  10.,   0.,   0.,   1.,   1.,   0.,   0.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  10.,   1.,   0.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   1.,   0.,   0.],
       [  1.,  10.,   0.,   0.,   1.,   0.,   0.,   1.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  15.,   1.,   0.,   0.,   0.,   1.,   0.,   0.,   1.,   0.,
          0.,   1.,   1.,   0.,   0.,   1.,   0.,   0.,   0.,   1.]])

In [8]:
from sklearn import preprocessing # Min-Max Standardzation

min_max_scaler = preprocessing.MinMaxScaler()
x_data[:, 1] = min_max_scaler.fit_transform(x_data[:, 1])

x_data[:5]



array([[ 1. ,  1. ,  0. ,  0. ,  1. ,  0. ,  1. ,  0. ,  1. ,  0. ,  0. ,
         0. ,  1. ,  1. ,  0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  0. ],
       [ 1. ,  0.5,  0. ,  0. ,  1. ,  1. ,  0. ,  0. ,  1. ,  0. ,  1. ,
         0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  0. ],
       [ 1. ,  0.5,  1. ,  0. ,  0. ,  0. ,  1. ,  0. ,  1. ,  0. ,  1. ,
         0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  0. ,  1. ,  0. ,  0. ],
       [ 1. ,  0.5,  0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  1. ,  0. ,  1. ,
         0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  0. ],
       [ 1. ,  1. ,  1. ,  0. ,  0. ,  0. ,  1. ,  0. ,  0. ,  1. ,  0. ,
         0. ,  1. ,  1. ,  0. ,  0. ,  1. ,  0. ,  0. ,  0. ,  1. ]])

In [9]:
# y_data = np.reshape(df["decision ADM-DECS"].values, (-1.1)) #np.array로 변경
# y_data

In [10]:
y_data=pd.get_dummies(df.ix[:,"decision ADM-DECS"]).as_matrix() 
y_data[:5,:]

array([[ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.]])

In [11]:
import random
x= [i for i in range(y_data.shape[0])]
training_idx = random.sample(x, int(y_data.shape[0] * 0.8))
for l_i in training_idx:
    for x_i in x:
        if l_i== x_i:
            x.remove(l_i)
test_idx =x
print(training_idx,"\n",test_idx)
x_training, x_test = x_data[training_idx,:], x_data[test_idx,:]
y_training, y_test = y_data[training_idx,:], y_data[test_idx,:]

x_training.shape, x_test.shape

[74, 64, 69, 6, 40, 77, 10, 72, 68, 3, 24, 21, 57, 84, 34, 52, 5, 35, 38, 81, 31, 36, 29, 22, 37, 80, 30, 85, 78, 70, 53, 7, 8, 48, 47, 82, 11, 45, 71, 55, 0, 83, 43, 1, 17, 14, 25, 20, 12, 27, 67, 9, 75, 63, 60, 33, 51, 4, 61, 39, 50, 54, 62, 16, 65, 18, 42, 73] 
 [2, 13, 15, 19, 23, 26, 28, 32, 41, 44, 46, 49, 56, 58, 59, 66, 76, 79]


((68, 21), (18, 21))

In [12]:
import tensorflow as tf

X = tf.placeholder("float", [None, len(x_data[0])])
Y = tf.placeholder("float", [None, len(y_data[0])])

W = tf.Variable(tf.zeros([len(x_data[0]), len(y_data[0])]))

hypothesis = tf.nn.softmax(tf.matmul(X,W))
learning_rate = 0.001

cost = tf.reduce_mean(-1 * tf.reduce_sum(Y * tf.log(hypothesis), reduction_indices=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

init = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init)
    
for i in range(10000000):
    sess.run(optimizer, feed_dict={X: x_training, Y:y_training})
    if i % 40000 == 0:
        print ("%d 's iteration" % i)
        print (sess.run(cost, feed_dict={X: x_training, Y:y_training}), sess.run(W))
finale_theta = sess.run(W)

0 's iteration
1.09742 [[  3.57843179e-04  -3.18627484e-04  -3.92157017e-05]
 [  2.17156878e-04  -1.87254918e-04  -2.99019757e-05]
 [  4.90196144e-05  -3.92156944e-05  -9.80392542e-06]
 [  6.37254998e-05  -6.86274652e-05   4.90195907e-06]
 [  2.45098054e-04  -2.10784332e-04  -3.43137399e-05]
 [  6.86274579e-05  -4.90196180e-05  -1.96078508e-05]
 [  7.35294161e-05  -7.35294234e-05  -5.58793550e-12]
 [  2.15686290e-04  -1.96078443e-04  -1.96078545e-05]
 [  1.71568623e-04  -1.66666679e-04  -4.90196908e-06]
 [  1.86274527e-04  -1.51960790e-04  -3.43137326e-05]
 [  1.37254901e-04  -9.80392288e-05  -3.92156944e-05]
 [  9.80392269e-06  -4.90196180e-06  -4.90196180e-06]
 [  2.10784332e-04  -2.15686290e-04   4.90195225e-06]
 [  1.42156874e-04  -1.51960790e-04   9.80391815e-06]
 [  2.15686290e-04  -1.66666679e-04  -4.90196217e-05]
 [  9.80392269e-06  -4.90196180e-06  -4.90196180e-06]
 [  3.48039262e-04  -2.99019652e-04  -4.90196217e-05]
 [ -9.31322619e-13  -1.47058854e-05   1.47058827e-05]
 [  8

In [15]:
print(finale_theta)

[[ 1.42709887 -2.          0.51138794]
 [ 1.41275775 -2.11049509  0.96518505]
 [ 1.62616968 -2.35525465  1.        ]
 [ 0.9010787  -1.71937561  0.75854373]
 [-1.          2.         -1.26108801]
 [ 0.9984374  -1.70976782  0.7103188 ]
 [-0.66398418  1.42779517 -0.76739448]
 [ 1.09483767 -1.86216366  0.67849666]
 [ 2.03119731 -3.96834087  1.57466722]
 [-0.71280283  1.77582836 -1.        ]
 [ 0.5302031  -2.          1.35824895]
 [ 1.75962973 -0.04205898 -1.71363759]
 [-0.90859401  0.07540716  0.9251771 ]
 [-0.30587885  0.50920886 -0.28087038]
 [ 1.85505891 -2.43659878  0.86435235]
 [ 2.         -0.09227526 -2.        ]
 [ 0.62378907 -1.98404014  1.01425731]
 [-1.2921977  -0.0716084   1.48810041]
 [ 0.97726691 -2.          1.        ]
 [ 1.10086739 -2.          0.81219757]
 [-0.61478412  2.         -1.25220323]]


In [14]:
hypotehsis_value = sess.run(hypothesis, feed_dict={X:x_test})
result= [np.argmax(predict) == np.argmax(original_value)  for predict, original_value in zip(hypotehsis_value, y_test)]
sum(result) / len(result)

0.72222222222222221