## Computational Graphs

#### Numpy

In [16]:
import numpy as np
import tensorflow as tf
np.random.seed(0)

N, D = 3, 4

x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)

a = x * y
b = a + z
c = np.sum(b)

grad_c = 1.0
grad_b = grad_c * np.ones((N, D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x


#### TensorFlow (CPU)

In [17]:
N, D = 3, 4

# Create forward computational graph
with tf.device("/cpu:0"): # Run in CPU
    
    x = tf.placeholder( tf.float32 )
    y = tf.placeholder( tf.float32 )
    z = tf.placeholder( tf.float32 )

    a = x * y
    b = a + z
    c = tf.reduce_sum(b)

# TensorFlow to compute gradients
grad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])

with tf.Session() as sess:
    
    values = {
        x: np.random.randn(N, D),
        y: np.random.randn(N, D),
        z: np.random.randn(N, D),
    }
    
    out = sess.run([c, grad_x, grad_y, grad_z], feed_dict = values)
    
    c_val, grad_x_val, grad_y_val, grad_z_val = out

#### TensorFlow (GPU)

In [18]:
N, D = 3, 4

# Create forward computational graph
with tf.device("/gpu:0"): # Run in CPU
    
    x = tf.placeholder( tf.float32 )
    y = tf.placeholder( tf.float32 )
    z = tf.placeholder( tf.float32 )

    a = x * y
    b = a + z
    c = tf.reduce_sum(b)

# TensorFlow to compute gradients
grad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])

with tf.Session() as sess:
    
    values = {
        x: np.random.randn(N, D),
        y: np.random.randn(N, D),
        z: np.random.randn(N, D),
    }
    
    out = sess.run([c, grad_x, grad_y, grad_z], feed_dict = values)
    
    c_val, grad_x_val, grad_y_val, grad_z_val = out

#### TensorFlow (Neural Network): Two-layer ReLU network on random data with L2 loss.

In [19]:
N, D, H = 64, 1000, 100

x = tf.placeholder( tf.float32, shape = (N, D) )
y = tf.placeholder( tf.float32, shape = (N, D) )
w1 = tf.placeholder( tf.float32, shape = (D, H) )
w2 = tf.placeholder( tf.float32, shape = (H, D) )

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis = 1)) # L2 distance

grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

with tf.Session() as sess: # Build the graph
    
    values = {x: np.random.randn(N, D),
              w1: np.random.randn(D, H),
              w2: np.random.randn(H, D),
              y: np.random.randn(N, D),}
    
    learning_rate = 1e-5
 
    # Train the network: update weights using gradients
    for t in range(50):
        out = sess.run([loss, grad_w1, grad_w2], feed_dict = values)
        loss_val, grad_w1_val, grad_w2_val = out
        values[w1] -= learning_rate * grad_w1_val
        values[w2] -= learning_rate * grad_w2_val

Problem: copy weights between CPU/GPU each step!

In [20]:
N, D, H = 64, 1000, 100

x = tf.placeholder( tf.float32, shape = (N, D) )
y = tf.placeholder( tf.float32, shape = (N, D) )
# Change from placeholder to Variable (persists in the graph between calls)
w1 = tf.Variable( tf.random_normal((D, H)) )
w2 = tf.Variable( tf.random_normal((H, D)) )

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis = 1)) 

grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

# Add assign to update w1 and w2 as part of the graph
learning_rate = 1e-5
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)


with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) # Run graph once to initilize w1 and w2
    values = {x: np.random.randn(N, D),
              y: np.random.randn(N, D),}
    
    for t in range(50):
        loss_val, = sess.run([loss], feed_dict = values)
  
        

Problem: loss not going down! Assign calls not actually being executed.

In [21]:
N, D, H = 64, 1000, 100

x = tf.placeholder( tf.float32, shape = (N, D) )
y = tf.placeholder( tf.float32, shape = (N, D) )
w1 = tf.Variable( tf.random_normal((D, H)) )
w2 = tf.Variable( tf.random_normal((H, D)) )

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis = 1)) 

grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

learning_rate = 1e-5
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)
updates = tf.group(new_w1, new_w2) # Add dummpy graph node that depends on updates


with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    values = {x: np.random.randn(N, D),
              y: np.random.randn(N, D),}
    
    for t in range(50):
        loss_val,_ = sess.run([loss, updates], feed_dict = values)
  
        

#### TensorFlow (Neural Network): Optimization 

In [22]:
N, D, H = 64, 1000, 100

x = tf.placeholder( tf.float32, shape = (N, D) )
y = tf.placeholder( tf.float32, shape = (N, D) )
w1 = tf.Variable( tf.random_normal((D, H)) )
w2 = tf.Variable( tf.random_normal((H, D)) )

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis = 1)) 

optimizer = tf.train.GradientDescentOptimizer(1e-5) # Use an optimizer to computer gradient and update weights
updates = optimizer.minimize(loss)


with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    values = {x: np.random.randn(N, D),
              y: np.random.randn(N, D),}
    
    for t in range(50):
        loss_val,_ = sess.run([loss, updates], feed_dict = values)
  
        

#### TensorFlow (Neural Network): Loss 

In [23]:
N, D, H = 64, 1000, 100

x = tf.placeholder( tf.float32, shape = (N, D) )
y = tf.placeholder( tf.float32, shape = (N, D) )
w1 = tf.Variable( tf.random_normal((D, H)) )
w2 = tf.Variable( tf.random_normal((H, D)) )

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.losses.mean_squared_error(y_pred, y) # Common predefined losses

optimizer = tf.train.GradientDescentOptimizer(1e-5) 
updates = optimizer.minimize(loss)


with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    values = {x: np.random.randn(N, D),
              y: np.random.randn(N, D),}
    
    for t in range(50):
        loss_val,_ = sess.run([loss, updates], feed_dict = values)
  
        

#### TensorFlow (Neural Network): Layers

In [25]:
N, D, H = 64, 1000, 100

x = tf.placeholder( tf.float32, shape = (N, D) )
y = tf.placeholder( tf.float32, shape = (N, D) )

init = tf.contrib.layers.xavier_initializer() # Use xavier initializer

# tf.layers automatically sets up weight and bias
h= tf.layers.dense(inputs=x, units=H, activation=tf.nn.relu, kernel_initializer=init)
y_pred= tf.layers.dense(inputs=h, units=D, kernel_initializer=init)

loss = tf.losses.mean_squared_error(y_pred, y) 

optimizer = tf.train.GradientDescentOptimizer(1e-5) 
updates = optimizer.minimize(loss)


with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer()) 
    values = {x: np.random.randn(N, D),
              y: np.random.randn(N, D),}
    
    for t in range(50):
        loss_val,_ = sess.run([loss, updates], feed_dict = values)
  
        