In [1]:
import pandas as pd
import numpy as np


In [2]:
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/postoperative-patient-data/post-operative.data"
names= ['L-CORE','L-SURF','L-O2','L-BP','SURF-STBL','CORE-STBL','BP-STBL','COMFORT','ADM-DECS']

df = pd.read_csv(data_url, names=names)
df = df.drop([3])
df = df.drop([70])
df = df.drop([46])
df = df.drop([48])
df.ix[:5] 

Unnamed: 0,L-CORE,L-SURF,L-O2,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT,ADM-DECS
0,mid,low,excellent,mid,stable,stable,stable,15,A
1,mid,high,excellent,high,stable,stable,stable,10,S
2,high,low,excellent,high,stable,stable,mod-stable,10,A
4,mid,mid,excellent,high,stable,stable,stable,10,A
5,high,low,good,mid,stable,stable,unstable,15,S


In [3]:
df.shape

(86, 9)

In [4]:
df.dtypes

L-CORE       object
L-SURF       object
L-O2         object
L-BP         object
SURF-STBL    object
CORE-STBL    object
BP-STBL      object
COMFORT      object
ADM-DECS     object
dtype: object

In [5]:
for i in names:
    df[i] = df[i].astype('category')
    
df['COMFORT'] = df['COMFORT'].astype(np.float32)
df.dtypes

L-CORE       category
L-SURF       category
L-O2         category
L-BP         category
SURF-STBL    category
CORE-STBL    category
BP-STBL      category
COMFORT       float32
ADM-DECS     category
dtype: object

In [6]:
y_data = np.reshape(df.pop("ADM-DECS").values, (-1.1))
y_data.shape

(86,)

In [7]:
y_data

[A, S, A, A, S, ..., A, S, A, A, S]
Length: 86
Categories (3, object): [A, I, S]

In [8]:
x_data_df = df.ix[-1:,:]
# x_data_df = df
x_data_df.insert(0,"bias",1)
x_data_df.ix[:5,:]
# x_data_df.dtypes

Unnamed: 0,bias,L-CORE,L-SURF,L-O2,L-BP,SURF-STBL,CORE-STBL,BP-STBL,COMFORT
0,1,mid,low,excellent,mid,stable,stable,stable,15.0
1,1,mid,high,excellent,high,stable,stable,stable,10.0
2,1,high,low,excellent,high,stable,stable,mod-stable,10.0
4,1,mid,mid,excellent,high,stable,stable,stable,10.0
5,1,high,low,good,mid,stable,stable,unstable,15.0


In [9]:
x_data = pd.get_dummies(x_data_df).values

x_data[:5]

array([[  1.,  15.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,   0.,   0.,
          0.,   1.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  10.,   0.,   0.,   1.,   1.,   0.,   0.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  10.,   1.,   0.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   1.,   0.,   0.],
       [  1.,  10.,   0.,   0.,   1.,   0.,   0.,   1.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  15.,   1.,   0.,   0.,   0.,   1.,   0.,   0.,   1.,   0.,
          0.,   1.,   1.,   0.,   0.,   1.,   0.,   0.,   0.,   1.]])

In [10]:
y_data = pd.get_dummies(y_data).values

y_data

array([[ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.],
       [ 0.,  0.

In [11]:
df.shape

(86, 8)

In [12]:

y_data.shape, x_data.shape


((86, 3), (86, 21))

In [13]:
from sklearn import preprocessing # Min-Max Standardzation

min_max_scaler = preprocessing.MinMaxScaler()
x_data[:, 8] = min_max_scaler.fit_transform(x_data[:, 8])

x_data[:5,:]




array([[  1.,  15.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,   0.,   0.,
          0.,   1.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  10.,   0.,   0.,   1.,   1.,   0.,   0.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  10.,   1.,   0.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   1.,   0.,   0.],
       [  1.,  10.,   0.,   0.,   1.,   0.,   0.,   1.,   1.,   0.,   1.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.],
       [  1.,  15.,   1.,   0.,   0.,   0.,   1.,   0.,   0.,   1.,   0.,
          0.,   1.,   1.,   0.,   0.,   1.,   0.,   0.,   0.,   1.]])

In [14]:
import numpy as np 

training_idx = np.random.randint(y_data.shape[0], size=int(y_data.shape[0] * 0.8))
test_idx = np.random.randint(y_data.shape[0], size=int(y_data.shape[0] * 0.2))

len(training_idx), len(test_idx)

(68, 17)

In [15]:
import numpy as np 

training_idx = np.random.randint(y_data.shape[0], size=int(y_data.shape[0] * 0.8))
test_idx = np.random.randint(y_data.shape[0], size=int(y_data.shape[0] * 0.2))

x_training, x_test = x_data[training_idx,:], x_data[test_idx,:]
y_training, y_test = y_data[training_idx,:], y_data[test_idx,:]

# x_training = np.transpose(x_tr)np.t
x_training.shape, x_test.shape,y_training.shape, y_test.shape

((68, 21), (17, 21), (68, 3), (17, 3))

In [16]:
import tensorflow as tf

X = tf.placeholder("float", [None, len(x_data[0])])
Y = tf.placeholder("float", [None, len(y_data[0])])

W = tf.Variable(tf.zeros([len(x_data[0]), len(y_data[0])]))

hypothesis = tf.nn.softmax(tf.matmul(X,W))
learning_rate = 0.001

cost = tf.reduce_mean(-1 * tf.reduce_sum(Y * tf.log(hypothesis), reduction_indices=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

init = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init)
    
for i in range(60001):
    sess.run(optimizer, feed_dict={X: x_training, Y:y_training})
    if i % 6000 == 0:
        print ("%d 's iteration" % i)
        print (sess.run(cost, feed_dict={X: x_training, Y:y_training}))
finale_theta = sess.run(W)


0 's iteration
1.07904
6000 's iteration
0.605059
12000 's iteration
0.533578
18000 's iteration
0.497444
24000 's iteration
0.476312
30000 's iteration
0.462736
36000 's iteration
0.453366
42000 's iteration
0.446517
48000 's iteration
0.441273
54000 's iteration
0.437105
60000 's iteration
0.433688


In [17]:
hypotehsis_value = sess.run(hypothesis, feed_dict={X:x_test})
result= [np.argmax(predict) == np.argmax(original_value)  for predict, original_value in zip(hypotehsis_value, y_test)]
sum(result) / len(result)

0.70588235294117652