# Understanding Tensorflow Basics

Roughly speaking, working with TensorFlow involves two main phases: (1) constructing a graph and (2) executing it. Let’s jump into our first example and create something very basic

In [2]:
import tensorflow as tf


## Nodes 

In [3]:
## Constant nodes 
a, b = tf.constant(5), tf.constant(10)

## Nodes perform arithmetic operations
d, e= tf.multiply(a, b), tf.subtract(b, a)

# logical not
z = ~(a > b)


## Session

In [4]:
sess = tf.Session()
outs = sess.run(d)
sess.close()

print 'outs = %d' % outs

# Equivalent form
with tf.Session() as sess:
    print 'outs = %d' % sess.run(d)

outs = 50
outs = 50


## Example 3-1
### A

In [5]:
a, b = tf.constant(10), tf.constant(20)
d = a + b
c = a * b
f = c + d
e = d - c
g = f / e


In [6]:
with tf.Session() as sess:
    print(sess.run(d), sess.run(c), sess.run(f), sess.run(e), sess.run(g))


(30, 200, 230, -170, -2)


### B

In [7]:
a, b = tf.constant(10, dtype=tf.float32), tf.constant(20, dtype=tf.float32)

c = a * b
d = tf.sin(c)
e = b / d


In [8]:
with tf.Session() as sess:
    print(sess.run(e))
    

-22.9017


## Constructing and Managing Our Graph

In [9]:
default_g = tf.get_default_graph()
g = tf.Graph() ## Creates new graph 
k = tf.constant(5)
k.graph is g # Node 'k' is in the default graph


False

The **with** statement can also be used to start a session without having to explicitly
close it.

In [10]:
with g.as_default():
    k = tf.constant(123)
    print k.graph is g

True


In [14]:
with tf.Session() as sess:
    fetches = [a, b, c, d, e, f]
    outs = sess.run(fetches)
print outs, type(outs[0])

[10.0, 20.0, 200.0, -0.87329727, -22.901709, 230] <type 'numpy.float32'>


### Nodes Are Operations, Edges Are Tensor Objects

**Source operations** are operations
that create data, usually without using any previously processed inputs. With these
operations we can create scalars, as we already encountered with the ``tf.constant()``
method, as well as arrays and other types of data.

### Data Types

#### Explicit Vs Implicit

In [15]:
c = tf.constant(4.0, dtype=tf.float64)
d = tf.constant(4.0)

c.dtype, d.dtype

(tf.float64, tf.float32)

#### Casting 

In [16]:
x = tf.constant([1,2,3], name='x', dtype=tf.float32)
print(x.dtype)
x = tf.cast(x, tf.int64)
print(x.dtype)

<dtype: 'float32'>
<dtype: 'int64'>


### Tensor Arrays and Shapes

#### Shapes

In [20]:
tf.constant(10).shape, tf.constant([[1,2,3], [3,5,6]]).shape

(TensorShape([]), TensorShape([Dimension(2), Dimension(3)]))

In [26]:
'{}'.format(tf.constant([10]).get_shape())

'(1,)'

#### Random Numbers

In [43]:
## Evenly Spaced values from 1.0 to 10.0

rand = tf.linspace(1.0, 10.0, 10)
sess = tf.Session()
print(sess.run(rand))
    

[  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.]


#### Fill values

In [46]:
fill = tf.fill((10,10), 1)
print(sess.run(fill))

[[1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]]


In [47]:
sess.run(tf.zeros((2,2)))

array([[ 0.,  0.],
       [ 0.,  0.]], dtype=float32)

In [52]:
sess.run(tf.random_normal((2,2), 1, 0.5)) ## args: mean, stddev

array([[ 1.40343952,  0.86059642],
       [ 1.47445035,  1.12114859]], dtype=float32)

In [55]:
sess.run(tf.truncated_normal((2,2), 1, 0.5))

array([[ 1.71181309,  0.43175948],
       [ 0.69321871,  0.35593814]], dtype=float32)

In [61]:
sess.run(tf.random_uniform((2,2), 0, 10))

array([[ 1.46346688,  8.87239838],
       [ 9.19039917,  0.01349807]], dtype=float32)

### Matrix Multiplication

In [85]:
A = tf.constant([[1,2,3],
                 [4,5,6]])
x = tf.constant([[1],[0],[1]])
print(A.get_shape())
print(x.get_shape())

(2, 3)
(3, 1)


In [86]:

b = tf.matmul(A, x)
print(b.get_shape())

(2, 1)


#### Alternate way of expanding dimension

In [89]:
A = tf.constant([[1,2,3],
                 [4,5,6]])
x = tf.constant([1,0,1]) ## Shape (3,)
x = tf.expand_dims(x, 1) ## Shape (3, 1)

print(A.get_shape())
print(x.get_shape())
b = tf.matmul(A, x)
print(b.get_shape())

sess.run(b)

(2, 3)
(3, 1)
(2, 1)


array([[ 4],
       [10]], dtype=int32)

### Transposing Matix

In [94]:
a = tf.constant([[1,2, 3],
                [3,4,5]])
transpose = tf.transpose(a)
print(a.get_shape())
print(transpose.get_shape())
sess.run(transpose)

(2, 3)
(3, 2)


array([[1, 3],
       [2, 4],
       [3, 5]], dtype=int32)

## Names
Objects residing within the same graph cannot have the same name
—TensorFlow forbids it. As a consequence, it will automatically
add an underscore and a number to distinguish the two. Of course,
both objects can have the same name when they are associated with
different graphs.

In [100]:
with tf.Graph().as_default(): ## Creating a new graph
    c1 = tf.constant(4, dtype=tf.float32, name='c')
    c2 = tf.constant(5, dtype=tf.float32, name='c')
c1.name, c2.name ## name arg + index of tensor 

(u'c:0', u'c_1:0')

### Name Scope
Sometimes when dealing with a large, complicated graph, we would like to create
some node grouping to make it easier to follow and manage. For that we can hier‐
archically group nodes together by name.

* Very useful when divding graphs into subgraph

In [104]:
with tf.Graph().as_default():
    c1 = tf.constant(4, name='c')
    with tf.name_scope('prefix'):
        c2 = tf.constant(45, name='0')
        c3 = tf.constant(45, name='k')
print(c1.name, c2.name, c3.name)

(u'c:0', u'prefix/0:0', u'prefix/k:0')


### Variables, Placeholder and Simple Optimization

In [112]:
init_val = tf.random_normal((1,5), 0, 1) ## Rand tensor generator
var = tf.Variable(init_val, name='var') ## init_val: default value
print('Pre run \n{}\n'.format(var))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    post_var = sess.run(var)

print('Post run: \n{}'.format(post_var))

Pre run 
<tf.Variable 'var_4:0' shape=(1, 5) dtype=float32_ref>

Post run: 
[[ 0.59393317  1.04858518 -1.14982152 -0.41200474  0.829445  ]]


#### Placeholders
Placeholders can be thought of as empty Variables that will be filled with data later on. We use them by first constructing our graph and only when it is exe‐
cuted feeding them with the input data.
Placeholders

In [119]:
ph = tf.placeholder(tf.float32, (None, 10))

In [129]:
import numpy as np
x_data = np.random.randn(5,10)
w_data = np.random.randn(10,1)

x_data.shape, w_data.shape

with tf.Graph().as_default():
    x = tf.placeholder(tf.float32, shape=(5,10))
    w = tf.placeholder(tf.float32, shape=(10,1))
    b = tf.fill((5,1), -1) ## Fill with -1
    xw = tf.matmul(x, w)
    
    xwb = xw + tf.cast(b, tf.float32)
    s = tf.reduce_max(xwb)
    with tf.Session() as sess:
        outs = sess.run(s, feed_dict={x: x_data, w: w_data})
        
print('outs {}'.format(outs))



outs 3.52244710922


### Optimization

f(xi) = wTxi + b

yi = f(xi) + ε

In [131]:
## Variables and placeholder
x = tf.placeholder(tf.float32, shape=[None, 3])
y_true = tf.placeholder(tf.float32, shape=None)
w = tf.Variable([[0,0,0]], dtype=tf.float32, name='weights')
b = tf.Variable(0, dtype=tf.float32, name='bias')

y_pred = tf.matmul(w, tf.transpose(x)) + b

#### Defining a loss function
The most commonly used loss is **MSE**(Mean Squared Error) and **cross entropy** especially for categorical data.
* Cross entropy is a measure of similarity between two distributions. Since the classifi‐cation models used in deep learning typically output probabilities for each class, we can compare the true class (distribution p) with the probabilities of each class given by the model (distribution q). The more similar the two distributions, the smaller our cross entropy will be.

In [133]:
loss = tf.reduce_mean(tf.square(y_true - y_pred)) ## MSE

In [134]:
loss_cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)
loss_cross_entropy = tf.reduce_mean(loss)


#### The gradient descent optimizer

The gradient descent algorithms work well on highly complicated network architectures and therefore are suitable for a wide variety of problems. More specifically,
recent advances make it possible to compute these gradients by utilizing massively
parallel systems, so the approach scales well with dimensionality (though it can still
be painfully time-consuming for large real-world problems). While convergence to
the global minimum is guaranteed for convex functions, for nonconvex problems
(which are essentially all problems in the world of deep learning) they can get stuck
in local minima. In practice, this is often good enough, as is evidenced by the huge
success of the field of deep learning.

###  Why need sampling
It becomes very slow and is intractable when the dataset requires more memory

A more popular technique is the **stochastic gradient descent (SGD)**, where instead of
feeding the entire dataset to the algorithm for the computation of each step, a subset
of the data is sampled sequentially. The number of samples ranges from one sample at
a time to a few hundred, but the most common sizes are between around 50 to
around 500 (usually referred to as mini-batches).

#### Tensorflow Optimizer

#### Linear Regression 

In [None]:
import numpy as np
x_data = np.random.randn(2000, 3)
w_real = [0.3, 0.5, 0.1]
b_real = -0.2

noise = np.random.randn(1,2000) * .1
y_data = np.matmul(w_real, x_data.T) + b_real + noise

In [147]:
NUM_STEPS = 10
g = tf.Graph()
wb_ = []

with g.as_default():
    x = tf.placeholder(tf.float32, shape=[None, 3])
    y_true = tf.placeholder(tf.float32, shape=None)
    
    with tf.name_scope('inference') as scope:
        w = tf.Variable([[0,0,0]], dtype=tf.float32, name='weights')
        b = tf.Variable(0, dtype=tf.float32, name='bias')
        y_pred = tf.matmul(w, tf.transpose(x)) + b
        
    with tf.name_scope('lose') as scope:
        loss =tf.reduce_mean(tf.square(y_true - y_pred)) # MSE
    
    with tf.name_scope('train') as scope:
        learning_rate = 0.5
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train = optimizer.minimize(loss)
    
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        
        for step in range(NUM_STEPS + 1):
            sess.run(train, {x: x_data, y_true: y_data})
            
            if step % 5 == 0: # Every fifth step
                print(step, sess.run([w, b]))
                wb_.append(sess.run([w, b]))

(0, [array([[ 0.28649634,  0.46323544,  0.09672525]], dtype=float32), -0.18146181])
(5, [array([[ 0.29955566,  0.50041938,  0.09742881]], dtype=float32), -0.2047427])
(10, [array([[ 0.29955569,  0.50041944,  0.09742881]], dtype=float32), -0.20474274])


#### Logistic Regression


In [177]:
import numpy as np
w_real = [0.3, 0.5, 0.1]
b_real = -0.2

In [181]:
N = 20000
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

x_data = np.random.randn(N, 3)

wxb = np.matmul(w_real, x_data.T) + b_real

y_data_pre_noise = sigmoid(wxb)
y_data = np.random.binomial(1, y_data_pre_noise)

`y_pred = tf.sigmoid(y_pred)  
loss = y_true * tf.log(y_pred) - (1 -y_true) * tf.log(1 - y_pred)
loss = tf.reduce_mean(loss)`

#### Equivalent to the above commented code 
`tf.nn.sigmoid_cross_entropy_with_logits(labels=, logits=)`


In [182]:
NUM_STEPS = 50
with g.as_default():
    with tf.name_scope('loss') as scope:
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true,logits=y_pred)
        loss = tf.reduce_mean(loss)
    # Before starting, initialize the variables. We will 'run' this first.
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for step in range(NUM_STEPS):
            sess.run(train,{x: x_data, y_true: y_data})
#             print(step, 'It is now')
            if step % 5 == 0:
                print(step, sess.run([w,b]))
            wb_.append(sess.run([w,b]))
        print(50, sess.run([w,b]))

(0, [array([[ 0.07276371,  0.11800744,  0.02292892]], dtype=float32), 0.45440653])
(5, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(10, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(15, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(20, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(25, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(30, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(35, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(40, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(45, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
(50, [array([[ 0.07099903,  0.11634681,  0.02178503]], dtype=float32), 0.45349506])
