# Simple Classification Using Tensorflow to Predict House Price

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [47]:
df = pd.read_csv("data.csv")
print(df.head())
print(df.shape)

   index    area  bathrooms     price    sq_price
0      0  2104.0        3.0  399900.0  190.066540
1      1  1600.0        3.0  329900.0  206.187500
2      2  2400.0        3.0  369000.0  153.750000
3      3  1416.0        2.0  232000.0  163.841808
4      4  3000.0        4.0  539900.0  179.966667
(47, 5)


## Feature Selection:
"index" is not useful, we definitely know that

For "price" and "sq_price" we won't use them in this case

In [48]:
df = df.drop(['index', 'price', 'sq_price'], axis=1)

In [49]:
df = df[0:10]
df

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


## Add Label 
For target variable which indicates whether the customer will buy the house or not

1 is good buy and 0 is bad buy

In [50]:
df['target1'] = [1,1,1,0,0,1,0,1,1,1]
#target2 is a negation of target1
df['target2'] = [1 if x == 0 else 0 for x in df['target1']]
df

Unnamed: 0,area,bathrooms,target1,target2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


## Prepare Data for Tensorflow
First, Convert feature into tensor

In [51]:
inputX = df.loc[:, ['area', 'bathrooms']].as_matrix()
inputY = df.loc[:, ['target1', 'target2']].as_matrix()

  """Entry point for launching an IPython kernel.
  


## Set Hyperparameters

In [52]:
learning_rate = 0.000001
training_epochs = 2000
display_step = 50 
n_samples = inputY.size

## Create Computation Graph/Neural Network

In [54]:
#none means any number of rows
x = tf.placeholder(tf.float32, [None, 2])
#create Weight
W = tf.Variable(tf.zeros([2,2]))
#add biases
b = tf.Variable(tf.zeros([2]))

#multiply weights by inputs, and add bias b 
y_values = tf.add(tf.matmul(x, W), b)
#add softmax activation function
y_pred = tf.nn.softmax(y_values)
#feed in a matrix of labels
y = tf.placeholder(tf.float32, [None, 2])

## Perform Training 
Get cost function,which is the mean square error

Using Gradient Descent to optimize the cost

In [55]:
cost = tf.reduce_sum(tf.pow(y - y_pred, 2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [58]:
#initialize variables and tensorflow session
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [60]:
#training loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x: inputX, y: inputY})
    
    if (i) % display_step == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y: inputY})
        print("Training step:", '%04d' % (i), "cost=", "{:.9f}".format(cc) )

print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={x: inputX, y: inputY})
print( "Training cost=", training_cost,'\n' "W=", sess.run(W),'\n' "b=", sess.run(b), '\n' )


Training step: 0000 cost= 0.109537281
Training step: 0050 cost= 0.109537221
Training step: 0100 cost= 0.109537147
Training step: 0150 cost= 0.109537080
Training step: 0200 cost= 0.109537005
Training step: 0250 cost= 0.109536938
Training step: 0300 cost= 0.109536879
Training step: 0350 cost= 0.109536804
Training step: 0400 cost= 0.109536745
Training step: 0450 cost= 0.109536670
Training step: 0500 cost= 0.109536611
Training step: 0550 cost= 0.109536529
Training step: 0600 cost= 0.109536462
Training step: 0650 cost= 0.109536402
Training step: 0700 cost= 0.109536327
Training step: 0750 cost= 0.109536268
Training step: 0800 cost= 0.109536193
Training step: 0850 cost= 0.109536126
Training step: 0900 cost= 0.109536052
Training step: 0950 cost= 0.109535985
Training step: 1000 cost= 0.109535925
Training step: 1050 cost= 0.109535851
Training step: 1100 cost= 0.109535776
Training step: 1150 cost= 0.109535716
Training step: 1200 cost= 0.109535649
Training step: 1250 cost= 0.109535575
Training ste

In [62]:
sess.run(y_pred, feed_dict={x: inputX})

array([[0.7112309 , 0.2887692 ],
       [0.66498965, 0.33501032],
       [0.73654455, 0.2634555 ],
       [0.64717317, 0.3528268 ],
       [0.7833244 , 0.2166756 ],
       [0.7006994 , 0.29930058],
       [0.6586662 , 0.3413338 ],
       [0.6482943 , 0.3517057 ],
       [0.6436931 , 0.3563069 ],
       [0.65480596, 0.345194  ]], dtype=float32)