In [1]:
# looking at https://www.youtube.com/watch?v=u4alGiomYP4&t=1832s
# this is the first version, having just one layer of neurons

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# one_hot = True means we represent the data in a vector way, see below
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [9]:
# input of image data

# we declare a placeholder variable which will receive None (1-100 something) images of 28 by 28 with one value
# in each column
X = tf.placeholder(tf.float32, [None, 784], 'image')

# we delcare the expected correct answers placeholder which will be used to provide the expected results
# for each set of images. This will be used in the flow to calculate the error value
Y_ = tf.placeholder(tf.float32, shape=[None, 10], name='expected_value')

# we declare the initial weights of the first layer, all zeroes for simplicity
W = tf.Variable(tf.zeros([784,10]), 'weights')

# we declare the initial baiases for the first layer, all zeroes for simplicity. 
# we will have one baias for each neuron.
b = tf.Variable(tf.zeros([10]), 'baiases')

In [10]:
# We declare how the correct answers would look like, this is the output variable:
# 
# This line of code will do the following steps:
#
# 1. We process the X input (the images, which will be of size 28, 28 and have 1 value in each cell) and 
# return an array of numbers. So the image pixel matrix will become a image 1D vector
#
# 2. We multiply the resulting X matrix (which is composed by a number of images, each one being one row 
# with 28*28 values) with the weights matrix, which is a compatible matrix for multiplication
#
# 3. We add the b 1D vector with the matrix. This operation is not in mathematics. Basically, because 
# the resulting matrix from the multiplication from step 2 is the number of images as rows and 10 as columns,
# and b is a 1D vector with 10 values, it will mean b becomes a matrix with 100 rows containing the indentical
# 10 values. This is just a shortcut so we process multiple images at the same time in the matrix. 
#
# 4. For all the values resulting in the matrix, we apply softmax on them, which will force values to be 
# between 0 and 1. 
Y = tf.nn.softmax(tf.matmul(X, W) + b)

In [11]:
# We declare the loss function
# this function will calculate a value that tells us how bad we scrued up. How different
# is the value we got from the netwrok with the value we wanted. It is a function based
# on the expected result and the received result. 
# 
# Note: now it is just a declaration. A component that will serve this purpose when we hook things
# up in the tensurflow flow
cross_entropy = -tf.reduce_sum(Y_ * tf.log(Y))

In [12]:
# % of correct answers found in batch
# based on the Y and Y_ which will be geneated during the runtime of the tests
# we compose this is_correct to compute how right we are during training
is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))

accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

In [13]:
# we declare the optimizer, a gradient descent which will take the
# declared corss_entropy function and based on the values fromm there
# will go down, trying to reduce the value of that function by changing the weights.
# this declaration will be used by tensorflow to change the weights from the
# W and b matrices

optimizer = tf.train.GradientDescentOptimizer(0.003)
train_step = optimizer.minimize(cross_entropy)

In [14]:
# we start our trainig session
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    
    for _ in range(10):
        print('iteration', _)
        for i in range(10000):
            # load a btach of images, thanks to mnist
            batch_X, batch_Y = mnist.train.next_batch(100)
            train_data = {X:batch_X, Y_:batch_Y}

            # train the network one time
            sess.run(train_step, feed_dict=train_data)

        # get the accuracy after training
        a, c = sess.run([accuracy, cross_entropy], feed_dict=train_data)
        print('Current training accuracy', a, ' current error', c)

        a, c = sess.run([accuracy, cross_entropy], feed_dict={
            X: mnist.test.images,
            Y_: mnist.test.labels
        })
        print('Current testing accuracy', a, ' current error', c)


('iteration', 0)
('Current training accuracy', 0.95999998, ' current error', 19.65764)
('Current testing accuracy', 0.92430001, ' current error', 2727.7896)
('iteration', 1)
('Current training accuracy', 0.97000003, ' current error', 14.226048)
('Current testing accuracy', 0.92290002, ' current error', 2762.6941)
('iteration', 2)
('Current training accuracy', 0.97000003, ' current error', 29.934399)
('Current testing accuracy', 0.92460001, ' current error', 2714.0867)
('iteration', 3)
('Current training accuracy', 0.97000003, ' current error', 13.523406)
('Current testing accuracy', 0.92629999, ' current error', 2694.7781)
('iteration', 4)
('Current training accuracy', 0.93000001, ' current error', 18.873686)
('Current testing accuracy', 0.92570001, ' current error', 2733.6001)
('iteration', 5)
('Current training accuracy', 0.88999999, ' current error', 30.410782)
('Current testing accuracy', 0.92680001, ' current error', 2737.7739)
('iteration', 6)


KeyboardInterrupt: 