In [141]:
import pandas as pd
import numpy as np
import tensorflow as tf
import csv
import math

In [142]:
#Reading data file
raw_data = pd.read_csv('letter-recognition.data', sep=',',delimiter=None, header=None)

#Processing data as X_all and Y_all
X_all = raw_data.iloc[:,1:16]
Y_all = raw_data.iloc[:,0]

#Converting Y_all to hot vectors
converter = lambda x: ord(x)-ord('A')
Y_all = list(map(converter,Y_all.values.T.tolist()))
Y_all = pd.get_dummies(Y_all).values


X_all = pd.DataFrame(X_all)
Y_all = pd.DataFrame(Y_all)

In [143]:
print(X_all.shape)
print(Y_all.shape)

(20000, 15)
(20000, 26)


In [144]:
NUM_CLASSES = Y_all.shape[1]
INPUT_SIZE = X_all.shape[1]
BATCH_SIZE = 10000
NR_STEP = int(X_all.shape[0]/BATCH_SIZE)
NR_EPOCH = 1000

layer_info = [INPUT_SIZE, 20, 30, 40, NUM_CLASSES]
layer_size = len(layer_info)

print(layer_info)

[15, 20, 30, 40, 26]


In [145]:
#Inference graph construction

"""The choice of standard deviation, refere following paper:
   https://github.com/bulletcross/ML-paper-collection/blob/master/Supervised/Backprop%20thumb%20rule.pdf"""

def inference_graph(X, layer_info):
    #Define weight, bias and output under different namescope
    layer_input = X
    for i in range(1,len(layer_info)-1):
        with tf.name_scope('layer_'+str(i)):
            W = tf.Variable(tf.truncated_normal([layer_info[i-1],layer_info[i]], 
                                                stddev=1.0/math.sqrt(float(layer_info[i-1]))),
                           name = 'weight_'+str(i))
            
            b = tf.Variable(tf.zeros([layer_info[i]]),
                           name = 'bias_'+str(i))
            layer_output = tf.nn.relu(tf.matmul(layer_input, W) + b)
            
            #print(layer_output)
            
            layer_input = layer_output
    
    nr_layer = len(layer_info)
    with tf.name_scope('layer_'+str(nr_layer-1)):
        W = tf.Variable(tf.truncated_normal([layer_info[nr_layer-2],layer_info[nr_layer-1]], 
                                            stddev=1.0/math.sqrt(float(layer_info[nr_layer-2]))),
                       name = 'weight_'+str(nr_layer-1))
        b = tf.Variable(tf.zeros([layer_info[nr_layer-1]]),
                        name = 'bias_'+str(nr_layer-1))
        layer_output = tf.matmul(layer_input, W) + b

    return layer_output

In [146]:
# Training graph construction

def train_graph(logit, Y, learning_rate):
    print(logit.shape)
    with tf.name_scope('cross_entropy'):
        softmax_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logit, name = 'xentropy')
        loss = tf.reduce_mean(softmax_entropy, name = 'loss')
    
    with tf.name_scope('train'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.minimize(loss)
        
    with tf.name_scope('accuracy'):
        nr_correct = tf.equal(tf.argmax(Y,1), tf.argmax(logits,1))
        accuracy = tf.reduce_mean(tf.cast(nr_correct, tf.float32))
    
    return loss, train_op, accuracy

In [147]:
#Tensorflow full graph construction

model_graph = tf.Graph()
with model_graph.as_default():
    with tf.name_scope('input'):
        X = tf.placeholder(tf.float32, shape = [None, INPUT_SIZE], name = 'X_INPUT')
        Y = tf.placeholder(tf.float32, shape = [None, NUM_CLASSES], name = 'Y_INPUT')

        logits = inference_graph(X, layer_info)
        loss, train_op, accuracy = train_graph(logits, Y, 0.1)
        init = tf.initialize_all_variables()


(?, 26)
Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [None]:
#Training model

with tf.Session(graph = model_graph) as sess:
    sess.run(init)
    
    for epoch in range(0, NR_EPOCH):
        for step in range(0, NR_STEP):
            feed_X = X_all.iloc[step*BATCH_SIZE:(step+1)*BATCH_SIZE, :]
            feed_Y = Y_all.iloc[step*BATCH_SIZE:(step+1)*BATCH_SIZE, :]
            _, step_accuracy, step_loss = sess.run([train_op, accuracy, loss],
                                                  feed_dict={X:feed_X, Y:feed_Y})
            
            if step % 1000 == 0:
                print('Epoch= %d, step= %d, accuracy= %.2f loss= %.2f' % (epoch, step, step_accuracy, step_loss))

Epoch= 0, step= 0, accuracy= 0.04 loss= 3.64
Epoch= 1, step= 0, accuracy= 0.06 loss= 3.24
Epoch= 2, step= 0, accuracy= 0.07 loss= 3.18
Epoch= 3, step= 0, accuracy= 0.08 loss= 3.14
Epoch= 4, step= 0, accuracy= 0.09 loss= 3.10
Epoch= 5, step= 0, accuracy= 0.11 loss= 3.06
Epoch= 6, step= 0, accuracy= 0.12 loss= 3.02
Epoch= 7, step= 0, accuracy= 0.14 loss= 2.97
Epoch= 8, step= 0, accuracy= 0.16 loss= 2.94
Epoch= 9, step= 0, accuracy= 0.14 loss= 3.01
Epoch= 10, step= 0, accuracy= 0.14 loss= 3.02
Epoch= 11, step= 0, accuracy= 0.20 loss= 2.85
Epoch= 12, step= 0, accuracy= 0.18 loss= 2.88
Epoch= 13, step= 0, accuracy= 0.16 loss= 2.90
Epoch= 14, step= 0, accuracy= 0.23 loss= 2.77
Epoch= 15, step= 0, accuracy= 0.20 loss= 2.85
Epoch= 16, step= 0, accuracy= 0.17 loss= 2.88
Epoch= 17, step= 0, accuracy= 0.12 loss= 2.85
Epoch= 18, step= 0, accuracy= 0.21 loss= 2.67
Epoch= 19, step= 0, accuracy= 0.31 loss= 2.48
Epoch= 20, step= 0, accuracy= 0.32 loss= 2.43
Epoch= 21, step= 0, accuracy= 0.14 loss= 2.9