In [0]:
%matplotlib inline


TensorFlow: Static Graphs
-------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation uses basic TensorFlow operations to set up a computational
graph, then executes the graph many times to actually train the network.

One of the main differences between TensorFlow and PyTorch is that TensorFlow
uses static computational graphs while PyTorch uses dynamic computational
graphs.

In TensorFlow we first set up the computational graph, then execute the same
graph many times.



In [4]:
import tensorflow as tf
import numpy as np

# First we set up the computational graph:

# N: batch size, D_in: input dim, H: hidden dim, D_out: output dim
N, D_in, H, D_out = 64, 1000, 100, 10

# Create placeholders for the input and target data.
# These will be filled with real data when we execute the graph.
x = tf.placeholder(tf.float32, shape=(None, D_in))
y = tf.placeholder(tf.float32, shape=(None, D_out))

# Create Variables for the weights and initialize them with random data.
# A TensorFlow Variable persists its value across executions of the graph.
w1 = tf.Variable(tf.random_normal((D_in, H)))
w2 = tf.Variable(tf.random_normal((H, D_out)))

# Forward pass: Compute the predicted y using operations on TensorFlow Tensors.
# Note that this code does not actually perform any numeric operations.
# It merely sets up the computational graph that will later execute.
h = tf.matmul(x, w1)
h_relu = tf.maximum(h, tf.zeros(1))
y_pred = tf.matmul(h_relu, w2)

# Compute loss using operations on TensorFlow Tensors
loss = tf.reduce_sum((y - y_pred) ** 2.0)

# Compute gradient of the loss with respect to w1 and w2.
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

# Update the weights using gradient descent. To actually update the weights
# we need to evaluate new_w1 and new_w2 when executing the graph. Note that 
# in TensorFlow the act of updating the value of the weights is part of the 
# computational graph. In Python this happens outside the computational graph.
learning_rate = 1e-6
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

# Now we have built our computational graph, so we enter a TensorFlow session
# to actually execute the graph.
with tf.Session() as sess:
    # Run the graph once to initialize the Variable w1 and w2.
    sess.run(tf.global_variables_initializer())
    
    # Create numpy arrays holding the actual data for the inputs x and 
    # targets y.
    x_value = np.random.randn(N, D_in)
    y_value = np.random.randn(N, D_out)
    
    for _ in range(500):
        # Execute the graph many times. Each time it execure we want to bind.
        # x_value to x and y_value to y, specified with the feed_dict argument.
        # Each times we execure the graph we want to compute the values for loss,
        # new_w1 and new_w2. The values of these Tensors are returned as numpy
        # array.
        loss_value, _, _, = sess.run([loss, new_w1, new_w2],
                                     feed_dict={x: x_value, y: y_value})
        print(loss_value)

35269136.0
31126378.0
28563530.0
24069556.0
17564582.0
11244671.0
6649901.0
3916117.0
2433987.8
1642084.5
1198964.8
930652.25
752766.75
625063.9
527808.5
450801.38
388186.75
336317.6
292914.6
256314.45
225198.78
198556.88
175639.25
155847.81
138672.12
123718.39
110650.12
99186.94
89100.19
80195.91
72314.266
65317.727
59091.117
53540.63
48580.9
44140.246
40154.832
36572.43
33348.676
30446.52
27829.887
25469.746
23332.432
21394.152
19634.305
18034.406
16577.959
15251.022
14042.302
12938.73
11929.872
11007.061
10162.078
9387.184
8676.488
8023.966
7424.686
6873.7305
6367.1714
5900.7817
5471.3086
5075.2373
4710.012
4372.9346
4061.6575
3773.9932
3508.0369
3262.0608
3034.462
2823.767
2628.6067
2447.6963
2280.02
2124.4448
1980.1294
1846.1383
1721.7529
1606.1918
1498.8269
1398.9661
1306.0951
1219.6857
1139.3041
1064.4677
994.7883
929.87555
869.3976
813.0355
760.70483
711.9252
666.42346
623.973
584.32825
547.32275
512.773
480.4791
450.30548
422.10764
395.74847
371.10004
348.05286
326.4908
306.31