# Tensorflow introduction

Tensorflow is an open-source software library for dataflow programming across a range of tasks. It is a symbolic math library, and also used for machine learning applications such as neural networks. Here we try to introduce it using some easy elementry examples.

In [1]:
import tensorflow as tf

You can define constant in different format like:

In [2]:
node1 = tf.constant(3.0, dtype=tf.float32)
node2 = tf.constant(4.0) # also tf.float32 implicitly
print(node1)
print(node2)

Tensor("Const:0", shape=(), dtype=float32)
Tensor("Const_1:0", shape=(), dtype=float32)


As mentioned before, tnesorflow is a C basec packages which has python interface. For calculation you should open a tensorflow session like:

In [3]:
with tf.Session() as sess:
    print(sess.run([node1, node2]))

[3.0, 4.0]


For example if you want to add two number using tensorflow, you can make the data flow through operations (here addition) then you have to open a session (here by with command) and run it like:

In [4]:
node3 = tf.add(node1, node2)
with tf.Session() as sess:
    print(sess.run(node3))

7.0


In [5]:
a = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)
# adder_node = tf.add(a, b)
adder_node = a + b
with tf.Session() as sess:
    print(sess.run(adder_node, {a: 3, b:4.5}))
    print(sess.run(adder_node, {a: [1,3], b: [2, 4]}))

7.5
[ 3.  7.]


Or you can define more complicated precedures:

In [6]:
a = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)
c = tf.placeholder(tf.float32)
adder_node = (a + b)*c
with tf.Session() as sess:
    print(sess.run(adder_node, {a: 3, b:4.5, c:2}))
    

15.0


Sinse almost every Machine learning problem is based on optimization, let's try to solve an optimization problem. For this pupose assume that there are an input set "x_train" which we want to find a simple linear model <br>
$y=W\times x+b$ <br>
to predict "y_train" vector. 

In [2]:
x_train = [1,2,3,4]
y_train = [0,-1,-2,-3]

W = tf.Variable([.3], dtype=tf.float32)
b = tf.Variable([-.3], dtype=tf.float32)
x = tf.placeholder(tf.float32)
linear_model = W * x + b
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print(sess.run(linear_model, {x:x_train}))

[ 0.          0.30000001  0.60000002  0.90000004]


In [3]:
W = tf.Variable([.3], dtype=tf.float32)
b = tf.Variable([-.3], dtype=tf.float32)
x = tf.placeholder(tf.float32)
linear_model = W * x + b

y = tf.placeholder(tf.float32)
squared_deltas = tf.square(linear_model - y)
loss = tf.reduce_sum(squared_deltas)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print(sess.run(loss, {x:x_train, y:y_train}))

23.66


## How to train, save and restore a model

In [7]:
tf.reset_default_graph()

W = tf.Variable([.3], dtype=tf.float32,name='W')
b = tf.Variable([-.3], dtype=tf.float32,name='b')
x = tf.placeholder(tf.float32,name='x')
linear_model = W * x + b

y = tf.placeholder(tf.float32)
squared_deltas = tf.square(linear_model - y)
loss = tf.reduce_sum(squared_deltas)

tf.summary.scalar("loss", loss)

optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)

summary_op = tf.summary.merge_all()

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

writer = tf.summary.FileWriter('./logs', graph=tf.get_default_graph())
for i in range(1000):
    _, summary = sess.run([train,summary_op], {x:x_train, y:y_train})
    writer.add_summary(summary,i)
    

print(sess.run([W, b]))

# saving model 
saver = tf.train.Saver()
saver.save(sess,'./model')

sess.close()

[array([-0.9999969], dtype=float32), array([ 0.99999082], dtype=float32)]


In [17]:
import glob
import os
import pandas as pd


# Get all event* runs from logging_dir subdirectories
logging_dir = './logs'
event_paths = glob.glob(os.path.join(logging_dir,"event*"))

# Extraction function
def sum_log(path):
    runlog = pd.DataFrame(columns=['metric', 'value'])
#    try:
    for e in tf.train.summary_iterator(path):
        for v in e.summary.value:
            r = {'metric': v.tag, 'value':v.simple_value}
            runlog = runlog.append(r, ignore_index=True)


    # Dirty catch of DataLossError
#    except:
#        print('Event file possibly corrupt: {}'.format(path))
#        return None

    runlog['epoch'] = [item for sublist in [[i]*5 for i in range(0, len(runlog)//5)] for item in sublist]
    
    return runlog


# Call & append
all_log = pd.DataFrame()
for path in event_paths:
    log = sum_log(path)
    if log is not None:
        if all_log.shape[0] == 0:
            all_log = log
        else:
            all_log = all_log.append(log)


DataLossError: truncated record at 64140

In [2]:
# reset graphs, just for sure
tf.reset_default_graph()

W = tf.Variable([.3], dtype=tf.float32,name='W')
b = tf.Variable([-.3], dtype=tf.float32,name='b')
x = tf.placeholder(tf.float32,name='x')
linear_model = W * x + b

y = tf.placeholder(tf.float32)
squared_deltas = tf.square(linear_model - y)
loss = tf.reduce_sum(squared_deltas)

optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)

In [3]:
# openning a new session 
sess = tf.Session()

saver = tf.train.Saver()
# restoring the session
saver.restore(sess, './model')

INFO:tensorflow:Restoring parameters from ./model


In [4]:
# evaluate training accuracy
x_train = [1,2,3,4]
y_train = [0,-1,-2,-3]

curr_W, curr_b, curr_loss = sess.run([W, b, loss], {x:x_train, y:y_train})
print("W: %s b: %s loss: %s"%(curr_W, curr_b, curr_loss))

W: [-0.50490189] b: [-0.45227283] loss: 1.4097


## You might want to train it more and save it again

In [5]:
for i in range(6):
    sess.run(train, {x:x_train, y:y_train})

print(sess.run([W, b]))

# saving model 
saver = tf.train.Saver()
saver.save(sess,'./model')

sess.close()

[array([-0.54038435], dtype=float32), array([-0.35132164], dtype=float32)]


## Others

In [27]:
import tensorflow as tf
# NumPy is often used to load, manipulate and preprocess data.
import numpy as np
tf.logging.set_verbosity(tf.logging.ERROR)

# Declare list of features. We only have one real-valued feature. There are many
# other types of columns that are more complicated and useful.
features = [tf.contrib.layers.real_valued_column("x", dimension=1)]

# An estimator is the front end to invoke training (fitting) and evaluation
# (inference). There are many predefined types like linear regression,
# logistic regression, linear classification, logistic classification, and
# many neural network classifiers and regressors. The following code
# provides an estimator that does linear regression.
estimator = tf.contrib.learn.LinearRegressor(feature_columns=features, model_dir='./model/')

# TensorFlow provides many helper methods to read and set up data sets.
# Here we use two data sets: one for training and one for evaluation
# We have to tell the function how many batches
# of data (num_epochs) we want and how big each batch should be.
x_train = np.array([1., 2., 3., 4.])
y_train = np.array([0., -1., -2., -3.])
x_eval = np.array([2., 5., 8., 1.])
y_eval = np.array([-1.01, -4.1, -7, 0.])
input_fn = tf.contrib.learn.io.numpy_input_fn({"x":x_train}, y_train,
                                              batch_size=4,
                                              num_epochs=1000)
eval_input_fn = tf.contrib.learn.io.numpy_input_fn(
    {"x":x_eval}, y_eval, batch_size=4, num_epochs=1000)

# We can invoke 1000 training steps by invoking the  method and passing the
# training data set.
estimator.fit(input_fn=input_fn, steps=1000)

# Here we evaluate how well our model did.
train_loss = estimator.evaluate(input_fn=input_fn)
eval_loss = estimator.evaluate(input_fn=eval_input_fn)
print("train loss: %r"% train_loss)
print("eval loss: %r"% eval_loss)


train loss: {'loss': 5.186962e-13, 'global_step': 2000}
eval loss: {'loss': 0.0025250686, 'global_step': 2000}


In [3]:
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

# Declare list of features, we only have one real-valued feature
def model(features, labels, mode):
    # Build a linear model and predict values
    W = tf.get_variable("W", [1], dtype=tf.float64)
    b = tf.get_variable("b", [1], dtype=tf.float64)
    y = W*features['x'] + b
    # Loss sub-graph
    loss = tf.reduce_sum(tf.square(y - labels))
    # Training sub-graph
    global_step = tf.train.get_global_step()
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train = tf.group(optimizer.minimize(loss),
                   tf.assign_add(global_step, 1))
    # ModelFnOps connects subgraphs we built to the
    # appropriate functionality.
    return tf.contrib.learn.ModelFnOps(
      mode=mode, predictions=y,
      loss=loss,
      train_op=train)

estimator = tf.contrib.learn.Estimator(model_fn=model, model_dir='./my_model/')
# define our data sets
x_train = np.array([1., 2., 3., 4.])
y_train = np.array([0., -1., -2., -3.])
x_eval = np.array([2., 5., 8., 1.])
y_eval = np.array([-1.01, -4.1, -7, 0.])
input_fn = tf.contrib.learn.io.numpy_input_fn({"x": x_train}, y_train, 4, num_epochs=1000)
eval_input_fn = tf.contrib.learn.io.numpy_input_fn({"x":x_eval}, y_eval, batch_size=4, num_epochs=1000)

# train
estimator.fit(input_fn=input_fn, steps=1000)
# Here we evaluate how well our model did. 
train_loss = estimator.evaluate(input_fn=input_fn)
eval_loss = estimator.evaluate(input_fn=eval_input_fn)
print("train loss: %r"% train_loss)
print("eval loss: %r"% eval_loss)

train loss: {'loss': 5.5466782e-31, 'global_step': 5000}
eval loss: {'loss': 0.010100022, 'global_step': 5000}
