# Install TensorFlow

In [4]:
# Recommend installing with Anaconda
!conda create -n tensorflow
!source activate tensorflow
!pip install --ignore-installed --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.0.1-py3-none-any.whl
# Install CPU-only version of TensorFlow for Python 3.4; 
# For Python 2, py3 -> py2
# For Windows, mac ->windows

Error: prefix already exists: /Users/kylemiao/anaconda/envs/tensorflow
Collecting tensorflow==1.0.1 from https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.0.1-py3-none-any.whl
  Downloading https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.0.1-py3-none-any.whl (39.3MB)
[K    100% |████████████████████████████████| 39.3MB 17kB/s 
[?25hCollecting wheel>=0.26 (from tensorflow==1.0.1)
  Using cached wheel-0.29.0-py2.py3-none-any.whl
Collecting six>=1.10.0 (from tensorflow==1.0.1)
  Using cached six-1.10.0-py2.py3-none-any.whl
Collecting numpy>=1.11.0 (from tensorflow==1.0.1)
  Downloading numpy-1.12.1-cp35-cp35m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (4.4MB)
[K    100% |████████████████████████████████| 4.4MB 130kB/s 
[?25hCollecting protobuf>=3.1.0 (from tensorflow==1.0.1)
  Using cached protobuf-3.2.0-py2.py3-none-any.whl
Collecting setuptools (from protobuf>=3.1.0->tensorflow==1.0.1)
  Downloading s

One problem I met is the environment where tensorflow installed in is not clean, CS guys gave me following command lines

In [3]:
!export LC_ALL=en_US.UTF-8
!export LANG=en_US.UTF-8
!export LANGUAGE=en_US.UTF-8

Activate the tensorflow conda environment(or change the kernel after opening Jupyter) then run following toy code.

In [1]:
# Check installation
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
print(sess.run(hello))

b'Hello, TensorFlow!'


# TensorFlow Core

This part introduces the lowest level API which called *Tensorflow Core*. The API frequently used when you build a computational graph is <font color=#0099ff>tf.constant()</font>, <font color=#0099ff>tf.placeholder()</font> and <font color=#0099ff>tf.Variable()</font>. Then run the graph by creating a <font color=#0099ff>Session</font> object and invoke its <font color=#0099ff>run</font> method

In [3]:
import tensorflow as tf
# tf.constant
node1 = tf.constant(3.0, tf.float32)
node2 = tf.constant(4.0) # also tf.float32 implicitly
print(node1, node2) # print nodes but not nodes values

Tensor("Const_2:0", shape=(), dtype=float32) Tensor("Const_3:0", shape=(), dtype=float32)


To evaluate the nodes, we should create a session.

In [5]:
# tf.Session().run()
sess = tf.Session()
print(sess.run([node1, node2])) # evaluate nodes by creating a "Session" object then invoking its "run"
node3 = tf.add(node1, node2) # nodes can be combination of nodes with operations
print("node3: ", node3)
print("sess.run(node3): ",sess.run(node3))

[3.0, 4.0]
node3:  Tensor("Add_1:0", shape=(), dtype=float32)
sess.run(node3):  7.0


Other two important types is created by <font color=#0099ff>tf.placeholder()</font> and <font color=#0099ff>tf.Variable()</font>.

A **placeholder** is a promise to provide a value later.

**Variables** allow us to add trainable parameters to a graph. They are constructed with a type and initial value. Before using **variables**, we need special operation to initialize it.

In [2]:
# tf.placeholder
a = tf.placeholder(tf.float32)  
b = tf.placeholder(tf.float32)
adder_node = a + b  # + provides a shortcut for tf.add(a, b)
print(sess.run(adder_node, {a: 3, b:4.5}))
print(sess.run(adder_node, {a: [1,3], b: [2, 4]}))
add_and_triple = adder_node * 3.
print(sess.run(add_and_triple, {a: 3, b:4.5}))

7.5
[ 3.  7.]
22.5


In [3]:
# tf.Variable
state = tf.Variable(0, name='counter')
one = tf.constant(1)
new_v = tf.add(state, one)
update = tf.assign(state, new_v)
sess.run(tf.global_variables_initializer()) # before using variables, it should be initialized
print(sess.run(state))
for _ in range(3):
    sess.run(update)
    print(sess.run(state))

0
1
2
3


# Train a linear model.

In [1]:
import tensorflow as tf
import numpy as np

# Model parameters
W_l = tf.Variable([.3], tf.float32)
b_l = tf.Variable([-.3], tf.float32)
# Model input and output
x_l = tf.placeholder(tf.float32)
linear_model = W_l * x_l + b_l
y_l = tf.placeholder(tf.float32)
# loss
loss_l = tf.reduce_sum(tf.square(linear_model - y_l)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01) # use gradient descent to minimize the loss function
train = optimizer.minimize(loss_l)
# training data
x_train = [1,2,3,4]
y_train = [0,-1,-2,-3]
# training loop
init = tf.global_variables_initializer() #  To initialize all the variables in a TensorFlow program, 
                                         #  you must call this initializer but not tf.constant 
sess = tf.Session()
sess.run(init) # reset values to wrong
for i in range(1000):
    sess.run(train, {x_l:x_train, y_l:y_train})

# evaluate training accuracy
curr_W, curr_b, curr_loss  = sess.run([W_l, b_l, loss_l], {x_l:x_train, y_l:y_train}) # ???? 
print("W: %s b: %s loss: %s"%(curr_W, curr_b, curr_loss))


W: [-0.9999969] b: [ 0.99999082] loss: 5.69997e-11


# Softmax Regression for MNIST

If you want to assign probabilities to an object being one of several different things, softmax is the thing to do, because softmax gives us a list of values between 0 and 1 that add up to 1.

A softmax regression has two steps: first we add up the evidence of our input being in certain classes, and then we convert that evidence into probabilities.

The softmax function is as following,
$$\text{softmax}(x)_i = \frac{\exp(x_i)}{\sum_j \exp(x_j)}$$

In [4]:
import tensorflow as tf
import numpy as np

# Import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Create the model
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b # tf.matmul() is the same as np.dot()

# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])

# The raw formulation of cross-entropy,
#
#   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
#                                 reduction_indices=[1]))
#
# can be numerically unstable.
#
# So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
# outputs of 'y', and then average across the batch.
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

sess = tf.InteractiveSession()   
tf.global_variables_initializer().run()
# Train
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)   
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) 

# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  
print(sess.run(accuracy, feed_dict={x: mnist.test.images,
                                  y_: mnist.test.labels}))


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0.9159


# Multilayer Convolutional Network for MNIST

In [15]:
import tensorflow as tf
import numpy as np
# import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784], name='x_in')
y_ = tf.placeholder(tf.float32, [None, 10], name='y_in')

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')
# first layer
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])  # -1 means it is waiting to be fitted
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# second layer
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# densely connected Layer
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# readout layer
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
# train
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print("test accuracy %g"%accuracy.eval(feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
step 0, training accuracy 0.1
step 100, training accuracy 0.72
step 200, training accuracy 0.9
step 300, training accuracy 0.9
step 400, training accuracy 0.98
step 500, training accuracy 0.96
step 600, training accuracy 0.98
step 700, training accuracy 0.96
step 800, training accuracy 0.94
step 900, training accuracy 1
step 1000, training accuracy 0.96
step 1100, training accuracy 0.94
step 1200, training accuracy 0.98
step 1300, training accuracy 0.98
step 1400, training accuracy 0.92
step 1500, training accuracy 0.98
step 1600, training accuracy 0.98
step 1700, training accuracy 1
step 1800, training accuracy 1
step 1900, training accuracy 1
step 2000, training accuracy 1
step 2100, training accuracy 0.92
step 2200, training accuracy 0.96
step 2300, training accuracy 0.98
step 2400, training 

# Graph Visualization

TensorFlow computation graphs are powerful but complicated. The graph visualization can help you understand and debug them. Here's an example of the visualization at work.

Mainly, we need <font color=#0099ff>with tf.name_scope():</font> and <font color=#0099ff>tf.summary.FileWriter()</font>

In [6]:
import tensorflow as tf
import numpy as np
# # import data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
with tf.name_scope('inputs'):
    x = tf.placeholder(tf.float32, [None, 784], name='x_in')
    y_ = tf.placeholder(tf.float32, [None, 10], name='y_in')

def weight_variable(shape, n_layer):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name='W'+'%s'%n_layer)

def bias_variable(shape,n_layer):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name='b'+'%s'%n_layer)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

with tf.name_scope('input_reshape'):
    x_image = tf.reshape(x, [-1,28,28,1]) 

# first layer
with tf.name_scope('First_layer'):
    with tf.name_scope('weights'):
        W_conv1 = weight_variable([5, 5, 1, 32],1)
    with tf.name_scope('biases'):
        b_conv1 = bias_variable([32],1)
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    
# second layer
with tf.name_scope('Second_layer'):
    with tf.name_scope('weights'):
        W_conv2 = weight_variable([5, 5, 32, 64],2)
    with tf.name_scope('biases'):
        b_conv2 = bias_variable([64],2)
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    
# densely connected Layer
with tf.name_scope('Densely_connected_layer'):
    with tf.name_scope('weights'):
        W_fc1 = weight_variable([7 * 7 * 64, 1024],3)
    with tf.name_scope('biases'):
        b_fc1 = bias_variable([1024],3)
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    
# dropout
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32, name='keep_probability')
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    
# readout layer
with tf.name_scope('readout_layer'):
    with tf.name_scope('weights'):
        W_fc2 = weight_variable([1024, 10],4)
    with tf.name_scope('biases'):
        b_fc2 = bias_variable([10],4)
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
# train
with tf.name_scope('loss'):
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    
with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    
with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
with tf.Session() as sess:
    writer = tf.summary.FileWriter("logs/", sess.graph)


In [None]:
!tensorboard --logdir logs

Starting TensorBoard b'41' on port 6006
(You can navigate to http://10.89.229.141:6006)


# Visualizing Learning

Some APIs under <font color=#0099ff>tf.summary</font>, say <font color=#0099ff>tf.summary.scalar()</font>, <font color=#0099ff>tf.summary.histogram()</font> and <font color=#0099ff>tf.summary.merge_all()</font>, will be used in this part. 

In [1]:
import tensorflow as tf
import numpy as np
# import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
with tf.name_scope('inputs'):
    x = tf.placeholder(tf.float32, [None, 784], name='x_in')
    y_ = tf.placeholder(tf.float32, [None, 10], name='y_in')

def weight_variable(shape, n_layer):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name='W'+'%s'%n_layer)

def bias_variable(shape,n_layer):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name='b'+'%s'%n_layer)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)
        
with tf.name_scope('input_reshape'):
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    tf.summary.image('input', x_image, 10)
# first layer
with tf.name_scope('First_layer'):
    with tf.name_scope('weights'):
        W_conv1 = weight_variable([5, 5, 1, 32],1)
        variable_summaries(W_conv1)
    with tf.name_scope('biases'):
        b_conv1 = bias_variable([32],1)
        variable_summaries(b_conv1)
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    tf.summary.histogram('activation', h_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    
# second layer
with tf.name_scope('Second_layer'):
    with tf.name_scope('weights'):
        W_conv2 = weight_variable([5, 5, 32, 64],2)
        variable_summaries(W_conv2)
    with tf.name_scope('weights'):
        b_conv2 = bias_variable([64],2)
        variable_summaries(b_conv2)
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    
# densely connected Layer
with tf.name_scope('Densely_connected_layer'):
    with tf.name_scope('weights'):
        W_fc1 = weight_variable([7 * 7 * 64, 1024],3)
        variable_summaries(W_fc1)
    with tf.name_scope('biases'):
        b_fc1 = bias_variable([1024],3)
        variable_summaries(b_fc1)
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    
# dropout
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32, name='keep_probability')
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    
# readout layer
with tf.name_scope('readout_layer'):
    W_fc2 = weight_variable([1024, 10],4)
    b_fc2 = bias_variable([10],4)
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
# train
with tf.name_scope('loss'):
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    tf.summary.scalar('cross_entropy', cross_entropy)
    
with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    
with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess = tf.InteractiveSession()
merged = tf.summary.merge_all()
graph_writer = tf.summary.FileWriter("logs/", sess.graph)
test_writer = tf.summary.FileWriter("logs/"+"test")

sess.run(tf.global_variables_initializer())
for i in range(2000):
    batch = mnist.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = sess.run(accuracy, feed_dict={
            x:batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
        summary = sess.run(merged, feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
        test_writer.add_summary(summary, i)
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print("test accuracy %g"%sess.run(accuracy, feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
step 0, training accuracy 0.08
step 10, training accuracy 0.26
test accuracy 0.484


In [None]:
!tensorboard --logdir logs

Starting TensorBoard b'41' on port 6006
(You can navigate to http://10.89.229.141:6006)


Official full code can be find [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py)