In [1]:
import tensorflow as tf

In [2]:
hello = tf.constant('Hi!')
sess = tf.Session()
print(sess.run(hello))

b'Hello, TensorFlow!'


In [3]:
import numpy as np #use number matrices

In [4]:
import pandas as pd #work with data as tables

In [5]:
import matplotlib.pyplot as plt

In [6]:
    #Step 1 load data
dataframe = pd.read_csv('data.csv') #dataframe object
#removed the features we dont care about
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis =1)
#we only use te first 10 rows
dataframe = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [7]:
#Step 2 - add labels
#1 is good buy and 0 is bad buy
dataframe.loc[:,('y1')] = [1,1,1,0,0,1,0,1,1,1]
#y2 is a negation of y1, opposite
dataframe.loc[:,('y2')] = dataframe['y1'] == 0
#turn TRUE/FALSE values to 1s and 0s
dataframe.loc[:,('y2')] = dataframe['y2'].astype(int)
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [8]:
#Step 3 - prepare data for tensorflow (tensors)
#tensors are a generic version of vectors and matrices
#vector is a list of numbers (1D tensor)
#matric is a list  of list of numbers (2D tensor)
#list of list of list of numbers (3D tensor)
#....
#convert features to input tensor
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
#convert labels to input tensors
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()

In [9]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [10]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]], dtype=int64)

In [11]:
#Step 4 - write out our hyperparamenters
#Learning rate defines how fast we reach convergence
#Convergence is when our model is at its optimal fit, where the error is minimized

learning_rate = 0.000001 #controls the rate at which we learn
training_epochs = 2000 #epocas, random number, if the prediction is not accurate, the hyperparamenters are changed
display_step = 50 #how often do wen want to display the process
n_samples = inputY.size #number of samples

In [12]:
#Step 5 - Create our computation graph/neural network
#for features input tensors, none means numbers of examples
#placeholders are gateways
x = tf.placeholder(tf.float32, [None,2]) #2 because there are 2 features

#create weights
#2x2 float matrix, that we will keep updating through the
#training process -> using backpropagation
#variables in tensorflow hold and update paramenters
#in memory buffers containing tensors
W = tf.Variable(tf.zeros([2,2]))

#add biases (example is like b in y = mx + b, b is the bias)
b = tf. Variable(tf.zeros([2]))

#multiply our weights by our inputs, first calculation
#weights are how we govern how data flows in our computation graph
#multiply input by weights and add biases
y_values = tf.add(tf.matmul(x, W),b)

#apply softmax to value we just created
#softmax is our activation function
y = tf.nn.softmax(y_values)

#feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])

In [13]:
#Step 6 perform training
#create pur cost function, mean squared error
#reduce sum computes the sum of elements across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)
#Gradientt descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [14]:
#initialize variables and tensorflow session
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [15]:
#training loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x:inputX, y_: inputY})
    
    #write out logs of training
    if(i) % display_step == 0:
        cc = sess.run(cost, feed_dict={x:inputX, y_:inputY}) #x and y_ are tensors
        print("Training step: ", '%04d' % (i), "cost=", "{:.9f}".format(cc))
        
print("optimization Finished!")
training_cost = sess.run(cost, feed_dict={x:inputX, y_: inputY})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')

Training step:  0000 cost= 0.114958666
Training step:  0050 cost= 0.109539941
Training step:  0100 cost= 0.109539866
Training step:  0150 cost= 0.109539807
Training step:  0200 cost= 0.109539732
Training step:  0250 cost= 0.109539673
Training step:  0300 cost= 0.109539606
Training step:  0350 cost= 0.109539531
Training step:  0400 cost= 0.109539464
Training step:  0450 cost= 0.109539405
Training step:  0500 cost= 0.109539315
Training step:  0550 cost= 0.109539248
Training step:  0600 cost= 0.109539196
Training step:  0650 cost= 0.109539129
Training step:  0700 cost= 0.109539054
Training step:  0750 cost= 0.109538987
Training step:  0800 cost= 0.109538913
Training step:  0850 cost= 0.109538853
Training step:  0900 cost= 0.109538779
Training step:  0950 cost= 0.109538712
Training step:  1000 cost= 0.109538652
Training step:  1050 cost= 0.109538577
Training step:  1100 cost= 0.109538510
Training step:  1150 cost= 0.109538436
Training step:  1200 cost= 0.109538361
Training step:  1250 cost

In [16]:
sess.run(y, feed_dict = {x:inputX})

array([[ 0.71125221,  0.28874779],
       [ 0.66498977,  0.33501023],
       [ 0.73657656,  0.26342347],
       [ 0.64718789,  0.35281211],
       [ 0.78335613,  0.2166439 ],
       [ 0.70069474,  0.29930523],
       [ 0.65866327,  0.34133676],
       [ 0.64828628,  0.35171372],
       [ 0.64368278,  0.35631716],
       [ 0.65480113,  0.3451989 ]], dtype=float32)

In [17]:
#In the left column is y1 and right column is y2
#Its saying all houses are a good buy 7/10,
#because y1 values are closer to 1 and y2 values are close to 0
#How to improve? Add a hidden layer