# About

Chapter 9. Up and Running with TensorFlow

## First Graph

In [1]:
import tensorflow as tf

In [2]:
# resets the default graph
tf.reset_default_graph()

In [9]:
def g01():
    # creates the graph
    x = tf.Variable(3, name = "x")
    y = tf.Variable(4, name = "y")
    f = x * x * y + y + 2

    # creates session and executes graph
    sess = tf.Session()
    sess.run(x.initializer)
    sess.run(y.initializer)
    result = sess.run(f)
    print(result)
    sess.close()

g01()

42


In [10]:
def g02():
    # creates the graph
    x = tf.Variable(3, name = "x")
    y = tf.Variable(4, name = "y")
    f = x * x * y + y + 2

    # creates session and executes graph, using with syntax
    with tf.Session() as sess:
        x.initializer.run()
        y.initializer.run()
        result = f.eval()
        print(result)
    
g02()

42


In [15]:
def g03():
    # creates the graph
    x = tf.Variable(3, name = "x")
    y = tf.Variable(4, name = "y")
    f = x * x * y + y + 2
    
    # using global initializer, instead of calling initize for each variable
    init = tf.global_variables_initializer() # sets up an initializer node
    with tf.Session() as sess:
        init.run() #actually initializes all the variables
        result = f.eval()
        print(result)

g03()

42


## Managing Graphs

In [19]:
def g04():
    # any node created is automatically added to default graph
    x1 = tf.Variable(1)
    t1 = x1.graph is tf.get_default_graph()
    print(t1)
    
g04()

True


In [21]:
def g05():
    # can manage multiple independent graphs
    graph = tf.Graph()
    with graph.as_default():
        x2 = tf.Variable(2)
        
    t1 = x2.graph is graph
    print(t1)
    
    t2 = x2.graph is tf.get_default_graph()
    print(t2)
    
g05()

True
False


## Lifecycle of a Node Value

In [22]:
def g06():
    w = tf.constant(3)
    x = w + 2
    y = x + 5
    z = x * 3
    
    # this code calculates x twice
    with tf.Session() as sess:
        print(y.eval())
        print(z.eval())
    
g06()

10
15


In [23]:
def g07():
    w = tf.constant(3)
    x = w + 2
    y = x + 5
    z = x * 3
    
    # this code calculates x once
    with tf.Session() as sess:
        y_val, z_val = sess.run([y, z])
        print(y.eval())
        print(z.eval())
    
g07()

10
15


## Linear Regression with TensorFlow

In [3]:
import numpy as np
from sklearn.datasets import fetch_california_housing

we're essentially calculating the Normal Equation from chap 4

theta_hat = (X^T * X) ^ -1 * X^T * y

In [6]:
def g08():
    print("========")
    housing = fetch_california_housing()
    m, n = housing.data.shape
    print(housing.data.shape, m, n)
    print(housing.data[0:5])
    
    print("========")
    housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
    print(housing_data_plus_bias.shape)
    print(housing_data_plus_bias[0:5])
    
    print("========")
    X = tf.constant(housing_data_plus_bias, dtype = tf.float32, name = "X")
    y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
    print(housing.target.shape)
    print(housing.target[0:10])
    print(housing.target.reshape(-1, 1).shape)
    print(housing.target.reshape(-1, 1)[0:10])
    
    print("========")
    XT = tf.transpose(X)
    print(X.shape)
    print(X[0:3])
    print(XT.shape)
    print(XT[0:3])
    
    print("========")
    theta = tf.matmul(
        tf.matmul(
            tf.matrix_inverse(
                tf.matmul(
                    XT
                    , X
                )
            )
            , XT
        )
        , y
    )
    
    with tf.Session() as sess:
        theta_value = theta.eval()
    
    print("theta_value")
    print(theta_value)

g08()

(20640, 8) 20640 8
[[ 8.32520000e+00  4.10000000e+01  6.98412698e+00  1.02380952e+00
   3.22000000e+02  2.55555556e+00  3.78800000e+01 -1.22230000e+02]
 [ 8.30140000e+00  2.10000000e+01  6.23813708e+00  9.71880492e-01
   2.40100000e+03  2.10984183e+00  3.78600000e+01 -1.22220000e+02]
 [ 7.25740000e+00  5.20000000e+01  8.28813559e+00  1.07344633e+00
   4.96000000e+02  2.80225989e+00  3.78500000e+01 -1.22240000e+02]
 [ 5.64310000e+00  5.20000000e+01  5.81735160e+00  1.07305936e+00
   5.58000000e+02  2.54794521e+00  3.78500000e+01 -1.22250000e+02]
 [ 3.84620000e+00  5.20000000e+01  6.28185328e+00  1.08108108e+00
   5.65000000e+02  2.18146718e+00  3.78500000e+01 -1.22250000e+02]]
(20640, 9)
[[ 1.00000000e+00  8.32520000e+00  4.10000000e+01  6.98412698e+00
   1.02380952e+00  3.22000000e+02  2.55555556e+00  3.78800000e+01
  -1.22230000e+02]
 [ 1.00000000e+00  8.30140000e+00  2.10000000e+01  6.23813708e+00
   9.71880492e-01  2.40100000e+03  2.10984183e+00  3.78600000e+01
  -1.22220000e+02]
 [

In [9]:
from sklearn.linear_model import LinearRegression
def g08b():
    # sanity check to see if the actual sklearn linear regression classifier does something similar
    print("========")
    housing = fetch_california_housing()
    m, n = housing.data.shape
    
    print("========")
    housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
    
    print("========")
    X = housing_data_plus_bias
    y = housing.target.reshape(-1, 1)
    
    print("========")
    reg = LinearRegression().fit(X, y)
    
    print("========")
    theta_values = reg.coef_
    print(theta_values)
    
g08b()
# well actually pretty close, except for the bias term

[[ 0.00000000e+00  4.36693293e-01  9.43577803e-03 -1.07322041e-01
   6.45065694e-01 -3.97638942e-06 -3.78654265e-03 -4.21314378e-01
  -4.34513755e-01]]


## Implementing Gradient Descent

### manualy computing the gradients

In [4]:
from sklearn.preprocessing import StandardScaler

In [23]:
def g09():
    # gets the housing data
    housing = fetch_california_housing()
    m, n = housing.data.shape
    
    # scales the housing data using sklearn's StandardScaler
    scaler = StandardScaler()
    scaled_housing_data = scaler.fit_transform(housing.data)
    
    # adds the bias
    scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
    
    n_epochs = 1000
    learning_rate = 0.01
    
    X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
    y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
    theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")
    y_pred = tf.matmul(X, theta, name = "predictions")
    error = y_pred - y
    
    # calculates the MSE, so that we can track the error improvements every iteration of n_epochs
    mse = tf.reduce_mean(tf.square(error), name = "mse")
    
    # implements the Batch Gradient Descent step
    # theta_next_step = theta - eta * gradient of theta of MSE of theta [???]
    gradients = 2/m * tf.matmul(tf.transpose(X), error)
    training_op = tf.assign(theta, theta - learning_rate * gradients)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        # at each tick of n_epoch, runs the training_op node, which implements the Batch Gradient Descent step
        # also every 100 ticks, prints out the MSE
        for epoch in range(n_epochs):
            if epoch % 100 == 0:
                print("Epoch", epoch, "MSE =", mse.eval())
            sess.run(training_op)
    
        best_theta = theta.eval()
        print("best theta is:")
        print(best_theta)
    
g09()

Epoch 0 MSE = 9.122907
Epoch 100 MSE = 0.8448336
Epoch 200 MSE = 0.6622453
Epoch 300 MSE = 0.62959146
Epoch 400 MSE = 0.6074647
Epoch 500 MSE = 0.5904154
Epoch 600 MSE = 0.5771039
Epoch 700 MSE = 0.5666507
Epoch 800 MSE = 0.55840087
Epoch 900 MSE = 0.5518588
best theta is:
[[ 2.0685523 ]
 [ 0.94581234]
 [ 0.16511774]
 [-0.4390792 ]
 [ 0.42898908]
 [ 0.01085541]
 [-0.04572579]
 [-0.45277286]
 [-0.43472746]]


### Using Autodiff

In [8]:
def g09b():
    # gets the housing data
    housing = fetch_california_housing()
    m, n = housing.data.shape
    
    # scales the housing data using sklearn's StandardScaler
    scaler = StandardScaler()
    scaled_housing_data = scaler.fit_transform(housing.data)
    
    # adds the bias
    scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
    
    n_epochs = 1000
    learning_rate = 0.01
    
    X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
    y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
    theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")
    y_pred = tf.matmul(X, theta, name = "predictions")
    error = y_pred - y
    
    # calculates the MSE, so that we can track the error improvements every iteration of n_epochs
    mse = tf.reduce_mean(tf.square(error), name = "mse")
    
    # replaced the manual version with tensorflow's autodiff
    # ta.gradients() takes an op, here mse, and a list of variables, here theta, and creates a list of ops
    gradients = tf.gradients(mse, [theta])[0]
    training_op = tf.assign(theta, theta - learning_rate * gradients)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        # at each tick of n_epoch, runs the training_op node, which implements the Batch Gradient Descent step
        # also every 100 ticks, prints out the MSE
        for epoch in range(n_epochs):
            if epoch % 100 == 0:
                print("Epoch", epoch, "MSE =", mse.eval())
            sess.run(training_op)
    
        best_theta = theta.eval()
        print("best theta is:")
        print(best_theta)
    
g09b()

Epoch 0 MSE = 8.983264
Epoch 100 MSE = 0.7885715
Epoch 200 MSE = 0.6231973
Epoch 300 MSE = 0.5940987
Epoch 400 MSE = 0.57496816
Epoch 500 MSE = 0.56116766
Epoch 600 MSE = 0.55116314
Epoch 700 MSE = 0.54390293
Epoch 800 MSE = 0.5386293
Epoch 900 MSE = 0.5347946
best theta is:
[[ 2.0685523 ]
 [ 0.8159443 ]
 [ 0.14849767]
 [-0.17925768]
 [ 0.20778206]
 [ 0.0064953 ]
 [-0.04155089]
 [-0.695883  ]
 [-0.66202307]]


### Using an Optimizer

In [12]:
def g09c():
    # gets the housing data
    housing = fetch_california_housing()
    m, n = housing.data.shape
    
    # scales the housing data using sklearn's StandardScaler
    scaler = StandardScaler()
    scaled_housing_data = scaler.fit_transform(housing.data)
    
    # adds the bias
    scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
    
    n_epochs = 1000
    learning_rate = 0.01
    
    X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
    y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
    theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")
    y_pred = tf.matmul(X, theta, name = "predictions")
    error = y_pred - y
    
    # calculates the MSE, so that we can track the error improvements every iteration of n_epochs
    mse = tf.reduce_mean(tf.square(error), name = "mse")
    
    # replaced the separate gradients and training_op calls with the tf Gradient Descent optimizer
    # ... there are also other optimizer options, such as the tf MomentumOptimizer
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    optimizer = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.9)
    training_op = optimizer.minimize(mse)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        # at each tick of n_epoch, runs the training_op node, which implements the Batch Gradient Descent step
        # also every 100 ticks, prints out the MSE
        for epoch in range(n_epochs):
            if epoch % 100 == 0:
                print("Epoch", epoch, "MSE =", mse.eval())
            sess.run(training_op)
    
        best_theta = theta.eval()
        print("best theta is:")
        print(best_theta)
    
g09c()

Epoch 0 MSE = 9.756287
Epoch 100 MSE = 0.52962554
Epoch 200 MSE = 0.52499217
Epoch 300 MSE = 0.52441007
Epoch 400 MSE = 0.5243329
Epoch 500 MSE = 0.52432257
Epoch 600 MSE = 0.52432114
Epoch 700 MSE = 0.524321
Epoch 800 MSE = 0.524321
Epoch 900 MSE = 0.524321
best theta is:
[[ 2.068558  ]
 [ 0.82962847]
 [ 0.11875335]
 [-0.26554456]
 [ 0.30571088]
 [-0.0045025 ]
 [-0.03932662]
 [-0.89986444]
 [-0.8705207 ]]


## Feeding Data to the Training Algorithm

In [17]:
def g10():
    # trying out placeholders 
    # placeholder nodes don't do any computation, they just output the data
    # placeholders can be used to pass data into mini-batches
    # i.e. replace X and y with next mini-batch of X and y
    A = tf.placeholder(tf.float32, shape = (None, 3))
    B = A + 5
    with tf.Session() as sess:
        # adds 5 to whatever A is, as defined by our feed_dict
        # ... 1,2,3 or 4,5,6,7,8,9, etc
        B_val_1 = B.eval(feed_dict = {A: [[1, 2, 3]]})
        B_val_2 = B.eval(feed_dict = {A: [[4, 5, 6], [7, 8, 9]]})
        B_val_3 = B.eval(feed_dict = {A: [[10, 11, 12], [13, 14, 15], [16, 17, 18]]})
        print(B_val_1)
        print(B_val_2)
        print(B_val_3)
    
g10()

[[6. 7. 8.]]
[[ 9. 10. 11.]
 [12. 13. 14.]]
[[15. 16. 17.]
 [18. 19. 20.]
 [21. 22. 23.]]


In [None]:
def g11():
    # now we are going to try to use placeholders to do mini-batches
    
    # gets the housing data
    housing = fetch_california_housing()
    m, n = housing.data.shape
    
    # scales the housing data using sklearn's StandardScaler
    scaler = StandardScaler()
    scaled_housing_data = scaler.fit_transform(housing.data)
    
    # adds the bias
    scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
    
    n_epochs = 1000
    learning_rate = 0.01
    
    # changes the definition of X and y in construction phase to make them placeholder nodes
    X = tf.placeholder(tf.float32, shape = (None, n + 1), name = "X")
    y = tf.placeholder(tf.float32, shape = (None, 1), name = "y")
    theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")
    y_pred = tf.matmul(X, theta, name = "predictions")
    error = y_pred - y
    
    # calculates the MSE, so that we can track the error improvements every iteration of n_epochs
    mse = tf.reduce_mean(tf.square(error), name = "mse")
    
    # defines the batch size and computes total number of batches
    batch_size = 100
    n_batches = int(np.ceil(m / batch_size))
    
    # replaced the separate gradients and training_op calls with the tf Gradient Descent optimizer
    # ... there are also other optimizer options, such as the tf MomentumOptimizer
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    optimizer = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.9)
    training_op = optimizer.minimize(mse)
    
    # fetches mini-batches one by one
    def fetch_batch(epoch, batch_index, batch_size):
        # mimics load data from disk
#         X_batch = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
#         y_batch = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
        # uh ... TODO figure out how to do this properly!!!
        X_batch, y_batch = None, None
        return X_batch, y_batch
    
    init = tf.global_variables_initializer()
    
    # in execution, feeds in the mini-batches one by one
    with tf.Session() as sess:
        sess.run(init)
        
        for epoch in range(n_epochs):
            # also every 100 ticks, prints out the MSE
            if epoch % 100 == 0:
                print("Epoch", epoch, "MSE =", mse.eval())
                
            for batch_index in range(n_batches):
                X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
                sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
            
        
        best_theta = theta.eval()
        print("best theta is:")
        print(best_theta)
    
g11()

## Saving and Restoring Models

In [23]:
def g12():
    # gets the housing data
    housing = fetch_california_housing()
    m, n = housing.data.shape
    
    # scales the housing data using sklearn's StandardScaler
    scaler = StandardScaler()
    scaled_housing_data = scaler.fit_transform(housing.data)
    
    # adds the bias
    scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
    
    n_epochs = 1000
    learning_rate = 0.01
    
    X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
    y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
    theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name = "theta")
    y_pred = tf.matmul(X, theta, name = "predictions")
    error = y_pred - y
    
    # calculates the MSE, so that we can track the error improvements every iteration of n_epochs
    mse = tf.reduce_mean(tf.square(error), name = "mse")
    
    # replaced the separate gradients and training_op calls with the tf Gradient Descent optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    training_op = optimizer.minimize(mse)
    
    init = tf.global_variables_initializer()
    
    # adds a saver node to the end of the construction phase
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        sess.run(init)
        
        # at each tick of n_epoch, runs the training_op node, which implements the Batch Gradient Descent step
        # also every 100 epochs, saves a checkpoint
        for epoch in range(n_epochs):
            if epoch % 100 == 0:
                print("Epoch", epoch, "MSE =", mse.eval())
                save_path = saver.save(sess, "../../models/ch09/g12_model.ckpt")
            sess.run(training_op)
    
        best_theta = theta.eval()
        print("best theta is:")
        print(best_theta)
    
g12()

Epoch 0 MSE = 5.8287187
Epoch 100 MSE = 0.672561
Epoch 200 MSE = 0.58933055
Epoch 300 MSE = 0.5702154
Epoch 400 MSE = 0.5574596
Epoch 500 MSE = 0.5482897
Epoch 600 MSE = 0.5416666
Epoch 700 MSE = 0.5368799
Epoch 800 MSE = 0.5334194
Epoch 900 MSE = 0.53091663
best theta is:
[[ 2.0685523 ]
 [ 0.7733346 ]
 [ 0.13495734]
 [-0.10913765]
 [ 0.15436631]
 [ 0.00212336]
 [-0.03949714]
 [-0.8346093 ]
 [-0.7962927 ]]


In [19]:
!pwd

/home/yangyq/workspaces/homl/notebooks/c09


In [22]:
# housing = fetch_california_housing()
# type(housing.data)
# x = housing.data
# np.save("../../models/ch09/test.txt", x)