In [3]:
# This tutorial is analogous to theano1.py
# It introduces basic variables and functions
# and shows how you can optimize a function.
# I compare this to theano1.py multiple times.
# So you might want to check that out first.

# For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import tensorflow as tf



In [4]:

# you have to specify the type
A = tf.placeholder(tf.float32, shape=(5, 5), name='A')


# but shape and name are optional
v = tf.placeholder(tf.float32)


# I think this name is more appropriate than 'dot'
w = tf.matmul(A, v)


In [6]:

# similar to Theano, you need to "feed" the variables values.
# In TensorFlow you do the "actual work" in a "session".

with tf.Session() as session:
    # the values are fed in via the appropriately named argument "feed_dict"
    # v needs to be of shape=(5, 1) not just shape=(5,)
    # it's more like "real" matrix multiplication
    output = session.run(w, feed_dict={A: np.random.randn(5, 5), v: np.random.randn(5, 1)})

    # what's this output that is returned by the session? let's print it
    print(output, type(output))

    # luckily, the output type is just a numpy array. back to safety!

[[ 2.2450833]
 [-0.3730781]
 [ 1.3929094]
 [-0.6095692]
 [ 2.4458745]] <class 'numpy.ndarray'>


In [7]:

# TensorFlow variables are like Theano shared variables.
# But Theano variables are like TensorFlow placeholders.
# Are you confused yet?

# A tf variable can be initialized with a numpy array or a tf array
# or more correctly, anything that can be turned into a tf tensor
shape = (2, 2)
x = tf.Variable(tf.random_normal(shape))
# x = tf.Variable(np.random.randn(2, 2))
t = tf.Variable(0) # a scalar

# you need to "initialize" the variables first
init = tf.global_variables_initializer()

with tf.Session() as session:
    out = session.run(init) # and then "run" the init operation
    print(out) # it's just None

    # eval() in tf is like get_value() in Theano
    print(x.eval()) # the initial value of x
    print(t.eval())


None
[[ 1.5826521  -0.80077946]
 [ 0.2737102  -0.3905468 ]]
0


In [11]:
# let's now try to find the minimum of a simple cost function like we did in Theano
u = tf.Variable(20.0)
cost = u*u + (4*u) + 1.0

# One difference between Theano and TensorFlow is that you don't write the updates
# yourself in TensorFlow. You choose an optimizer that implements the algorithm you want.
# 0.3 is the learning rate. Documentation lists the params.
train_op = tf.train.GradientDescentOptimizer(0.3).minimize(cost)

# let's run a session again
init = tf.global_variables_initializer()
with tf.Session() as session:
    session.run(init)

    # Strangely, while the weight update is automated, the loop itself is not.
    # So we'll just call train_op until convergence.
    # This is useful for us anyway since we want to track the cost function.
    for i in range(44):
        session.run(train_op)
        print("i = %d, cost = %.3f, u = %.3f" % (i, cost.eval(), u.eval()))



i = 0, cost = 74.440, u = 6.800
i = 1, cost = 9.390, u = 1.520
i = 2, cost = -1.018, u = -0.592
i = 3, cost = -2.683, u = -1.437
i = 4, cost = -2.949, u = -1.775
i = 5, cost = -2.992, u = -1.910
i = 6, cost = -2.999, u = -1.964
i = 7, cost = -3.000, u = -1.986
i = 8, cost = -3.000, u = -1.994
i = 9, cost = -3.000, u = -1.998
i = 10, cost = -3.000, u = -1.999
i = 11, cost = -3.000, u = -2.000
i = 12, cost = -3.000, u = -2.000
i = 13, cost = -3.000, u = -2.000
i = 14, cost = -3.000, u = -2.000
i = 15, cost = -3.000, u = -2.000
i = 16, cost = -3.000, u = -2.000
i = 17, cost = -3.000, u = -2.000
i = 18, cost = -3.000, u = -2.000
i = 19, cost = -3.000, u = -2.000
i = 20, cost = -3.000, u = -2.000
i = 21, cost = -3.000, u = -2.000
i = 22, cost = -3.000, u = -2.000
i = 23, cost = -3.000, u = -2.000
i = 24, cost = -3.000, u = -2.000
i = 25, cost = -3.000, u = -2.000
i = 26, cost = -3.000, u = -2.000
i = 27, cost = -3.000, u = -2.000
i = 28, cost = -3.000, u = -2.000
i = 29, cost = -3.000, u = 

In [12]:
# 2-hidden layer NN in TensorFlow
# This code is not optimized for speed.
# It's just to get something working, using the principles we know.

# For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt

from util import get_normalized_data, y2indicator


def error_rate(p, t):
    return np.mean(p != t)


# copy this first part from theano2.py
def main():
    # step 1: get the data and define all the usual variables
    Xtrain, Xtest, Ytrain, Ytest = get_normalized_data()

    max_iter = 15
    print_period = 50

    lr = 0.00004
    reg = 0.01

    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    N, D = Xtrain.shape
    batch_sz = 500
    n_batches = N // batch_sz

    # add an extra layer just for fun
    M1 = 300
    M2 = 100
    K = 10
    W1_init = np.random.randn(D, M1) / np.sqrt(D)
    b1_init = np.zeros(M1)
    W2_init = np.random.randn(M1, M2) / np.sqrt(M1)
    b2_init = np.zeros(M2)
    W3_init = np.random.randn(M2, K) / np.sqrt(M2)
    b3_init = np.zeros(K)


    # define variables and expressions
    X = tf.placeholder(tf.float32, shape=(None, D), name='X')
    T = tf.placeholder(tf.float32, shape=(None, K), name='T')
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))

    # define the model
    Z1 = tf.nn.relu( tf.matmul(X, W1) + b1 )
    Z2 = tf.nn.relu( tf.matmul(Z1, W2) + b2 )
    Yish = tf.matmul(Z2, W3) + b3 # remember, the cost function does the softmaxing! weird, right?

    # softmax_cross_entropy_with_logits take in the "logits"
    # if you wanted to know the actual output of the neural net,
    # you could pass "Yish" into tf.nn.softmax(logits)
    cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Yish, labels=T))

    # we choose the optimizer but don't implement the algorithm ourselves
    # let's go with RMSprop, since we just learned about it.
    # it includes momentum!
    train_op = tf.train.RMSPropOptimizer(lr, decay=0.99, momentum=0.9).minimize(cost)

    # we'll use this to calculate the error rate
    predict_op = tf.argmax(Yish, 1)

    costs = []
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        for i in range(max_iter):
            for j in range(n_batches):
                Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
                Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]

                session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
                if j % print_period == 0:
                    test_cost = session.run(cost, feed_dict={X: Xtest, T: Ytest_ind})
                    prediction = session.run(predict_op, feed_dict={X: Xtest})
                    err = error_rate(prediction, Ytest)
                    print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err))
                    costs.append(test_cost)

    plt.plot(costs)
    plt.show()
    # increase max_iter and notice how the test cost starts to increase.
    # are we overfitting by adding that extra layer?
    # how would you add regularization to this model?


if __name__ == '__main__':
    main()


Reading in and transforming data...
Looking for ../large_files/train.csv
You have not downloaded the data and/or not placed the files in the correct location.
Please get the data from: https://www.kaggle.com/c/digit-recognizer
Place train.csv in the folder large_files adjacent to the class folder


NameError: name 'exit' is not defined

In [15]:
# Theano basics.
# For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import theano.tensor as T

# just some different types of variables
c = T.scalar('c')
v = T.vector('v')
A = T.matrix('A')


# we can define a matrix multiplication
w = A.dot(v)

# how do these variables actually take on values?
import theano

matrix_times_vector = theano.function(inputs=[A, v], outputs=w)

# let's import numpy so we can create real arrays
import numpy as np
A_val = np.array([[1,2], [3,4]])
v_val = np.array([5,6])

w_val = matrix_times_vector(A_val, v_val)
print(w_val)

# let's create a shared variable to we can do gradient descent
# this adds another layer of complexity to the theano function

x = theano.shared(20.0, 'x')

# the first argument is its initial value, the second is its name

# a cost function that has a minimum value
cost = x*x + x + 1

# in theano, you don't have to compute gradients yourself!
x_update = x - 0.3*T.grad(cost, x)

# x is not an "input", it's a thing you update
# in later examples, data and labels would go into the inputs
# and model params would go in the updates
# updates takes in a list of tuples, each tuple has 2 things in it:
# 1) the shared variable to update, 2) the update expression
train = theano.function(inputs=[], outputs=cost, updates=[(x, x_update)])

# write your own loop to call the training function.
# it has no arguments!
for i in range(25):
    cost_val = train()
    print(cost_val)

# print the optimal value of x
print(x.get_value())




[17. 39.]
421.0
67.99000000000001
11.508400000000002
2.4713440000000007
1.0254150400000002
0.7940664064
0.7570506250240001
0.75112810000384
0.7501804960006143
0.7500288793600982
0.7500046206976159
0.7500007393116186
0.750000118289859
0.7500000189263775
0.7500000030282203
0.7500000004845152
0.7500000000775223
0.7500000000124035
0.7500000000019845
0.7500000000003176
0.7500000000000506
0.7500000000000082
0.7500000000000013
0.7500000000000001
0.7500000000000001
-0.4999999976919052


In [14]:
pip install theano

Processing c:\users\yanni\appdata\local\pip\cache\wheels\33\e0\86\12647586a15bd29c062c9996231380908fb2dcf6a5df1c6f84\theano-1.0.4-py3-none-any.whl
Installing collected packages: theano
Successfully installed theano-1.0.4
Note: you may need to restart the kernel to use updated packages.
