## Simple graph example

In [None]:
import numpy as np

# simple computational graph - numpy
N, D = 3, 4
np.random.seed(0)
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)

a = x * y  # shape (N, D)
b = a + z  # shape (N, D)
c = np.sum(b)

grad_c = 1.0
grad_b = grad_c * np.ones((N,D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x

print(c)
print(grad_x)
print(grad_y)
print(grad_z)

In [None]:
#TENSORFLOW 2
import numpy as np
import tensorflow as tf

# simple computational graph with tensorflow

@tf.function # this decorator tells tf that a graph is defined
def simple_graph(x, y, z):
    a = x * y
    b = a + z
    c = tf.reduce_sum(input_tensor=b)
    grad_x, grad_y, grad_z = tf.gradients(ys=c, xs=[x, y, z])
    return c, grad_x, grad_y, grad_z

N, D = 3, 4
np.random.seed(0)
x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)

c_val, grad_x_val, grad_y_val, grad_z_val = simple_graph(x, y, z)

print('c_val =', c_val)
print('grad_x_val =', grad_x_val)
print('grad_y_val =', grad_y_val)
print('grad_z_val =', grad_z_val)

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))

In [None]:
# Just as example to show how it looks in PyTorch 
# You can skip this cell if you don't want to install PyTorch
import torch
from torch.autograd import Variable

# simple computational graph - torch
N, D = 3, 4

x = Variable(torch.randn(N, D), 
             requires_grad=True)
y = Variable(torch.randn(N, D), 
             requires_grad=True)
z = Variable(torch.randn(N, D), 
             requires_grad=True)

a = x * y
b = a + z
c = torch.sum(b)

c.backward()

print(x.grad.data)
print(y.grad.data)
print(z.grad.data)

## A more complex example of graph

### Utility function - plot history

In [None]:
import matplotlib.pyplot as plt

def plot_history(epoch_vector, losses_list, name_list, y_label):
    # plt.figure(figsize=(12,8))
    for i, losses in enumerate(losses_list):
        plt.plot(epoch_vector, losses, label=name_list[i])
    plt.xlabel('Epochs')
    plt.ylabel(y_label)
    plt.legend()
    plt.show()

### Step 0: get some data

In [None]:
# First get MNIST data and prepare it
mnist = tf.keras.datasets.mnist
(x_train, y_train_vec),(x_test, y_test_vec) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train_vec, 10, dtype='float64')
y_test = tf.keras.utils.to_categorical(y_test_vec, 10, dtype='float64')
N = x_train.shape[0]         # number of samples
D = x_train.shape[1]         # dimension of input sample
n_classes = y_train.shape[1] # output dim
print('MNIST data set ready. N={}, D={}, n_classes={}'.format(N,D,n_classes))

### Step 1
This graph is actually creating the necessary nodes to:
1. Do the forward pass (line 8)
2. Compute the MSE loss (lines 9 and 10)
3. Compute the gradients of the loss w.r.t. the weights (line 11)

In line 20, we init the weights (outside of the graph, using numpy). In line 23 we use the graph, passing the whole train set (inputs and targets) and getting out of the graph the predicted outputs, the loss and the gradient values.

In [None]:
# TensorFlow 2.0 - a more complex example
# step 1 - forward pass on single layer ANN with ReLu
import numpy as np
import tensorflow as tf

@tf.function # this decorator tells tf that a graph is defined
def simple_ann_train(x, w1, y):
    y_pred = tf.maximum(tf.matmul(x, w1), 0) # ReLU on logit
    diff = y_pred - y
    loss = tf.reduce_mean(tf.pow(diff, 2))
    grad = tf.gradients(ys=loss, xs=[w1]) 
    # tf.gradients returns a list of sum(dy/dx) for each x in xs
    return y_pred, loss, grad

N = x_train.shape[0]         # number of samples
D = x_train.shape[1]         # dimension of input sample
n_classes = y_train.shape[1] # output dim

np.random.seed(0)
w1 = np.random.randn(D, n_classes)

with tf.device('/CPU:0'):  # change to /GPU:0 to move to GPU
    out = simple_ann_train(x_train, w1, y_train)

y_pred, loss_val, grad = out
grad_w1 = grad[0] #grad is a list
print(loss_val)
print(grad_w1)

In [None]:
# TensorFlow 2.0 - a more complex example
# step 2 - training but weights are passing from gpu to cpu
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

@tf.function # this decorator tells tf that a graph is defined
def simple_ann_train(x, w1, y):
    y_pred = tf.maximum(tf.matmul(x, w1), 0) # ReLU on logit
    diff = y_pred - y
    loss = tf.reduce_mean(tf.pow(diff, 2))
    grad = tf.gradients(ys=loss, xs=[w1])
    # tf.gradients returns a list of sum(dy/dx) for each x in xs
    return y_pred, loss, grad

np.random.seed(0)
w1 = np.random.randn(D, n_classes)
alpha = 1e-2
J = []
for epoch in range(40):
    with tf.device('/CPU:0'):  # change to /GPU:0 to move it to GPU
        out = simple_ann_train(x_train, w1, y_train)
    y_pred, loss_val, grad = out
    grad_w1 = grad[0] # grad is a list of gradients
    w1 -= alpha * grad_w1.numpy()
    J.append(loss_val)
    print("epoch = {}, loss = {}".format(epoch,loss_val))

plt.plot(J)

In [None]:
# TensorFlow 2.0 - a more complex example
# step 3 - change weights to persistant in-graph variables
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

@tf.function # this decorator tells tf that a graph is defined
def simple_ann_train(x, y, alpha):
    y_pred = tf.maximum(tf.matmul(x, w1), 0) # ReLU on logit
    diff = y_pred - y
    loss = tf.reduce_mean(tf.pow(diff, 2))
    grad = tf.gradients(ys=loss, xs=[w1])
    # tf.gradients returns a list of sum(dy/dx) for each x in xs
    grad_w1 = grad[0]
    w1.assign(w1 - alpha * grad_w1)
    return y_pred, loss

np.random.seed(0)
alpha = 1e-2
J = []
w1 = tf.Variable(tf.random.normal((D, n_classes), dtype='float64'))
for epoch in range(40):
    with tf.device('/CPU:0'):  # change to /GPU:0 to move it to GPU
        out = simple_ann_train(x_train, y_train, alpha)
    y_pred, loss_val = out
    J.append(loss_val)
    print("epoch = {}, loss = {}".format(epoch,loss_val))

plt.plot(J)

In [None]:
# TensorFlow 2.0 - a more complex example
# step 4 - use pre-defined losses and optimizers
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

@tf.function # this decorator tells tf that a graph is defined
def simple_ann_train(x, y, alpha):
    y_pred = tf.nn.relu(tf.matmul(x, w1))  # ReLU on logit
    mse = tf.keras.losses.MeanSquaredError()
    loss = mse(y, y_pred)
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(1e-2)
    updates = optimizer.minimize(loss, var_list=w1)
    return y_pred, loss

np.random.seed(0)
alpha = 1e-2
J = []
w1 = tf.Variable(tf.random.normal((D, n_classes), dtype='float64'))
for epoch in range(40):
    with tf.device('/CPU:0'):  # change to /GPU:0 to move it to GPU
        out = simple_ann_train(x_train, y_train, alpha)
    y_pred, loss_val = out
    J.append(loss_val)
    print("epoch = {}, loss = {}".format(epoch,loss_val))

plt.plot(J)

### Using pre-defined models with Keras 

In [None]:
# Using Keras
# Single layer network architecture, similar to the one used in the previous steps.
# Full batch SGD
import tensorflow as tf

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(10, input_shape=(D,), use_bias=False, activation='relu'))
model.summary()

sgd = tf.keras.optimizers.SGD(learning_rate=0.5)

model.compile(optimizer=sgd, loss='mse', metrics=['accuracy'])

history1 = model.fit(x_train, y_train, batch_size=N, epochs=40)

model.evaluate(x_test,  y_test, verbose=2)

In [None]:
# Using Keras
# Single layer network architecture, similar to the one used in the previous steps.
# Batch=128 SGD
import tensorflow as tf


model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(10, input_shape=(D,), use_bias=False, activation='relu'))
model.summary()

sgd = tf.keras.optimizers.SGD(learning_rate=0.01)

model.compile(optimizer=sgd, loss='mse', metrics=['accuracy'])

history2 = model.fit(x_train, y_train, batch_size=128, epochs=40)

model.evaluate(x_test,  y_test, verbose=2)

In [None]:
plot_history(history1.epoch, [history1.history['loss'], history2.history['loss']], 
             ['full batch', 'batch=128'], 'mse')