In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import csv
import math

In [None]:
#Reading data file
raw_data = pd.read_csv('letter-recognition.data', sep=',',delimiter=None, header=None)

#Processing data as X_all and Y_all
X_all = raw_data.iloc[:,1:16]
Y_all = raw_data.iloc[:,0]

#Converting Y_all to hot vectors
converter = lambda x: ord(x)-ord('A')
Y_all = list(map(converter,Y_all.values.T.tolist()))
Y_all = pd.get_dummies(Y_all).values


X_all = pd.DataFrame(X_all)
Y_all = pd.DataFrame(Y_all)

In [None]:
print(X_all.shape)
print(Y_all.shape)

In [None]:
NUM_CLASSES = Y_all.shape[1]
INPUT_SIZE = X_all.shape[1]
BATCH_SIZE = 10000
NR_STEP = int(X_all.shape[0]/BATCH_SIZE)
NR_EPOCH = 1000

layer_info = [INPUT_SIZE, 20, 30, 40, NUM_CLASSES]
layer_size = len(layer_info)

logs_path = '/tf_log/'

print(layer_info)

In [None]:
#Inference graph construction

"""The choice of standard deviation, refere following paper:
   https://github.com/bulletcross/ML-paper-collection/blob/master/Supervised/Backprop%20thumb%20rule.pdf"""

def inference_graph(X, layer_info):
    #Define weight, bias and output under different namescope
    layer_input = X
    for i in range(1,len(layer_info)-1):
        with tf.name_scope('layer_'+str(i)):
            W = tf.Variable(tf.truncated_normal([layer_info[i-1],layer_info[i]], 
                                                stddev=1.0/math.sqrt(float(layer_info[i-1]))),
                           name = 'weight_'+str(i))
            tf.summary.histogram('weight_histogram_'+str(i), W)
            b = tf.Variable(tf.zeros([layer_info[i]]),
                           name = 'bias_'+str(i))
            tf.summary.histogram('bias_histogram_'+str(i), b)
            layer_output = tf.nn.relu(tf.matmul(layer_input, W) + b)
            
            #print(layer_output)
            
            layer_input = layer_output
    
    nr_layer = len(layer_info)
    with tf.name_scope('layer_'+str(nr_layer-1)):
        W = tf.Variable(tf.truncated_normal([layer_info[nr_layer-2],layer_info[nr_layer-1]], 
                                            stddev=1.0/math.sqrt(float(layer_info[nr_layer-2]))),
                       name = 'weight_'+str(nr_layer-1))
        tf.summary.histogram('weight_histogram_'+str(nr_layer-1), W)
        b = tf.Variable(tf.zeros([layer_info[nr_layer-1]]),
                        name = 'bias_'+str(nr_layer-1))
        tf.summary.histogram('bias_histogram_'+str(nr_layer-1), b)
        layer_output = tf.matmul(layer_input, W) + b

    return layer_output

In [None]:
# Training graph construction

def train_graph(logit, Y, learning_rate):
    print(logit.shape)
    with tf.name_scope('cross_entropy'):
        softmax_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logit, name = 'xentropy')
        loss = tf.reduce_mean(softmax_entropy, name = 'loss')
        tf.summary.scalar('loss', loss)
    
    with tf.name_scope('train'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.minimize(loss)
        
    with tf.name_scope('accuracy'):
        nr_correct = tf.equal(tf.argmax(Y,1), tf.argmax(logits,1))
        accuracy = tf.reduce_mean(tf.cast(nr_correct, tf.float32))
        tf.summary.scalar('accuracy_mean', accuracy)
    
    return loss, train_op, accuracy

In [None]:
#Tensorflow full graph construction

model_graph = tf.Graph()
with model_graph.as_default():
    with tf.name_scope('input'):
        X = tf.placeholder(tf.float32, shape = [None, INPUT_SIZE], name = 'X_INPUT')
        Y = tf.placeholder(tf.float32, shape = [None, NUM_CLASSES], name = 'Y_INPUT')

    logits = inference_graph(X, layer_info)
    loss, train_op, accuracy = train_graph(logits, Y, 0.1)

    merged_summary_op = tf.summary.merge_all()

    init = tf.initialize_all_variables()


In [None]:
#Training model

with tf.Session(graph = model_graph) as sess:
    
    sess.run(init)
    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
    
    for epoch in range(0, NR_EPOCH):
        for step in range(0, NR_STEP):
            feed_X = X_all.iloc[step*BATCH_SIZE:(step+1)*BATCH_SIZE, :]
            feed_Y = Y_all.iloc[step*BATCH_SIZE:(step+1)*BATCH_SIZE, :]
            _, step_accuracy, step_loss, summary = sess.run([train_op, accuracy, loss, merged_summary_op],
                                                  feed_dict={X:feed_X, Y:feed_Y})
            summary_writer.add_summary(summary, epoch)
            
            if step % 1000 == 0:
                print('Epoch= %d, step= %d, accuracy= %.2f loss= %.2f' % (epoch, step, step_accuracy, step_loss))