## Simple graph example

In [None]:
import numpy as np

# simple computational graph - numpy
N, D = 3, 4
np.random.seed(0)
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)

a = x * y  # shape (N, D)
b = a + z  # shape (N, D)
c = np.sum(b)

grad_c = 1.0
grad_b = grad_c * np.ones((N,D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x

print(c)
print(grad_x)
print(grad_y)
print(grad_z)

In [None]:
#TENSORFLOW 1.X
import numpy as np
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior() # to use former syntax of
                         # tensorflow 1.X, eg 1.14

# simple computational graph with tensorflow 
N, D = 3, 4
np.random.seed(0)
x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, D))
z = tf.placeholder(tf.float32, shape=(N, D))

a = x * y
b = a + z
c = tf.reduce_sum(b)

grad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])

with tf.Session() as sess:
    values = {
        x: np.random.randn(N, D),
        y: np.random.randn(N, D),
        z: np.random.randn(N, D)
    }
    out = sess.run([c, grad_x, grad_y, grad_z], 
                   feed_dict=values)
    c_val, grad_x_val, grad_y_val, grad_z_val = out

    print(c_val)
    print(grad_x_val)
    print(grad_y_val)

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))

In [None]:
import numpy as np
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior() # to use former syntax of
                         # tensorflow 1.X, eg 1.14

# simple computational graph - tensorflow on gpu
N, D = 3, 4
with tf.device('/CPU:0'): # '/GPU:0' for cpu exec 
    x = tf.placeholder(tf.float32, shape=(N, D))
    y = tf.placeholder(tf.float32, shape=(N, D))
    z = tf.placeholder(tf.float32, shape=(N, D))
    a = x * y
    b = a + z
    c = tf.reduce_sum(b)

    grad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])

with tf.Session() as sess:
    values = {
        x: np.random.randn(N, D),
        y: np.random.randn(N, D),
        z: np.random.randn(N, D)
    }
    out = sess.run([c, grad_x, grad_y, grad_z],
                   feed_dict=values)
    c_val, grad_x_val, grad_y_val, grad_z_val = out

    print(c_val)
    print(grad_x_val)
    print(grad_y_val)

In [None]:
# Just as example to show how it looks in PyTorch 
# You can skip this cell if you don't want to install PyTorch
import torch
from torch.autograd import Variable

# simple computational graph - torch on gpu
N, D = 3, 4

x = Variable(torch.randn(N, D), 
             requires_grad=True)
y = Variable(torch.randn(N, D), 
             requires_grad=True)
z = Variable(torch.randn(N, D), 
             requires_grad=True)

a = x * y
b = a + z
c = torch.sum(b)

c.backward()

print(x.grad.data)
print(y.grad.data)
print(z.grad.data)

## A more complex example of graph

### Utility function - plot history

In [None]:
import matplotlib.pyplot as plt

def plot_history(epoch_vector, losses_list, name_list, y_label):
    # plt.figure(figsize=(12,8))
    for i, losses in enumerate(losses_list):
        plt.plot(epoch_vector, losses, label=name_list[i])
    plt.xlabel('Epochs')
    plt.ylabel(y_label)
    plt.legend()
    plt.show()

### Step 0: get some data

In [None]:
# First get some data and prepare it
mnist = tf.keras.datasets.mnist
(x_train, y_train_vec),(x_test, y_test_vec) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train_vec, 10, dtype='float64')
y_test = tf.keras.utils.to_categorical(y_test_vec, 10, dtype='float64')
N = x_train.shape[0]  # number of samples
D = x_train.shape[1]  # dimension of input sample
n_classes = y_train.shape[1] # output dim
print('MNIST data set ready. N={}, D={}, n_classes={}'.format(N,D,n_classes))

### Step 1
This graph is actually creating the necessary nodes to:
1. Do the forward pass (line 13)
2. Compute the MSE loss (lines 14 and 15)
3. Compute the gradients of the loss w.r.t. the weights (line 17)

In line 23, we init the weights (outside of the graph, using numpy). In line 25 we use the graph, passing the whole train set in the feed_dict (inputs, targets, weight) and getting out of the graph the loss and the gradient values.

In [None]:
# TensorFlow 1.x - a more complex example
# step 1 - forward pass on single layer ANN with ReLu
import numpy as np
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior() # to use former syntax of
                         # tensorflow 1.X, eg 1.14

x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, n_classes))
w1 = tf.placeholder(tf.float32, shape=(D, n_classes))

y_pred = tf.maximum(tf.matmul(x, w1), 0) # ReLU on logit
diff = y_pred - y
loss = tf.reduce_mean(tf.pow(diff, 2))

grad_w1 = tf.gradients(loss, [w1])

with tf.Session() as sess:
    values = {
        x: x_train,
        y: y_train,
        w1: np.random.randn(D, n_classes)
    }
    out = sess.run([loss, grad_w1], feed_dict=values)
    loss_val, grad_w1_val = out

print(loss_val, np.array(grad_w1_val).shape)

In [None]:
# TensorFlow 1.x - a more complex example
# step 2 - training but weights are passing from gpu to cpu
import numpy as np
import tensorflow.compat.v1 as tf
from matplotlib import pyplot as pl

tf.disable_v2_behavior() # to use former syntax of
                         # tensorflow 1.X, eg 1.14

x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, n_classes))
w1 = tf.placeholder(tf.float32, shape=(D, n_classes))

y_pred = tf.maximum(tf.matmul(x, w1), 0) # ReLU on logit
diff = y_pred - y
loss = tf.reduce_mean(tf.pow(diff, 2))

grad_w1 = tf.gradients(loss, [w1])[0] # returns a list of
                                      # sums of gradients

with tf.Session() as sess:
    values = {
        x: x_train,
        y: y_train,
        w1: np.random.randn(D, n_classes)
    }
    alpha = 1e-2
    J = []
    for epoch in range(20):
        out = sess.run([loss, grad_w1], feed_dict=values)
        loss_val, grad_w1_val = out
        values[w1] -= alpha * grad_w1_val
        J.append(loss_val)
        print("epoch", epoch, loss_val)
    pl.plot(J)

In [None]:
# TensorFlow 1.x - a more complex example
# step 3 - change weights from placehoders to variables
import numpy as np
import tensorflow.compat.v1 as tf
from matplotlib import pyplot as pl

tf.disable_v2_behavior() # to use former syntax of
                         # tensorflow 1.X, eg 1.14

x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, n_classes))
w1 = tf.Variable(tf.random_normal((D, n_classes)))

y_pred = tf.maximum(tf.matmul(x, w1), 0.0) # ReLU on logit
diff = y_pred - y
loss = tf.reduce_mean(tf.pow(diff, 2))
grad_w1 = tf.gradients(loss, [w1])[0] # returns a list of
                                      # sums of gradients

alpha = 1e-2
new_w1 = w1.assign(w1 - alpha * grad_w1)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    values = { x: x_train, y: y_train}
    J = []
    for epoch in range(20):
        loss_val = sess.run([loss], feed_dict=values)
        J.append(loss_val)
        print("epoch", epoch, loss_val)
    pl.plot(J)

In [None]:
# TensorFlow 1.x - a more complex example
# step 3bis - add dummy graph node that depends on updates
import numpy as np
import tensorflow.compat.v1 as tf
from matplotlib import pyplot as pl

x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, n_classes))
w1 = tf.Variable(tf.random_normal((D, n_classes)))

y_pred = tf.maximum(tf.matmul(x, w1), 0.0) # ReLU on logit
diff = y_pred - y
loss = tf.reduce_mean(tf.pow(diff, 2))
grad_w1 = tf.gradients(loss, [w1])[0] # returns a list of
                                      # sums of gradients

alpha = 1e-2
new_w1 = w1.assign(w1 - alpha * grad_w1)
updates = tf.group(new_w1)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    values = { x: x_train, y: y_train}
    J = []
    for epoch in range(20):
        loss_val = sess.run([loss, updates], feed_dict=values)
        J.append(loss_val)
        print("epoch", epoch, loss_val)
    pl.plot(J)

In [None]:
# TensorFlow 1.x - a more complex example
# step 4 - use optimizer and pre-defined loss
import numpy as np
import tensorflow.compat.v1 as tf
from matplotlib import pyplot as pl

x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, n_classes))
w1 = tf.Variable(tf.random_normal((D, n_classes)))

y_pred = tf.maximum(tf.matmul(x, w1), 0.0) # ReLU on logit
diff = y_pred - y
loss = tf.losses.mean_squared_error(y_pred, y)

optimizer = tf.train.GradientDescentOptimizer(1e-2)
updates = optimizer.minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    values = { x: x_train, y: y_train}
    J = []
    for epoch in range(20):
        loss_val = sess.run([loss, updates], feed_dict=values)
        J.append(loss_val)
        print("epoch", epoch, loss_val)
    pl.plot(J)

In [None]:
# TensorFlow 1.x - a more complex example
# step 5 - use layers and initilizer
import numpy as np
import tensorflow.compat.v1 as tf
from matplotlib import pyplot as pl

x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, n_classes))

init = tf.variance_scaling_initializer(2.0) # He init
y_pred = tf.layers.dense(inputs=x, units=n_classes, 
                         activation=tf.nn.relu, 
                         kernel_initializer=init)

loss = tf.losses.mean_squared_error(y_pred, y)

optimizer = tf.train.GradientDescentOptimizer(1e-2)
updates = optimizer.minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    values = { x: x_train, y: y_train}
    J = []
    for epoch in range(20):
        loss_val = sess.run([loss, updates], 
                            feed_dict=values)
        J.append(loss_val)
        print("epoch", epoch, loss_val[0])
    pl.plot(J)