In [52]:
import numpy as np
import tensorflow as tf
import pandas as pd
import random as random

In [22]:
PATH_TRAIN = './input/train.csv'
PATH_TEST = './input/test.csv'


def import_data(path_train, path_test):
    '''
    Import train data and test data from path.
    '''
    data_train = pd.read_csv(path_train).as_matrix()
    data_test = pd.read_csv(path_test).as_matrix()

    random.shuffle(data_train)
    random.shuffle(data_test)
    
    return data_train[:, 1:], data_train[:, 0:1].reshape(data_train.shape[0], 1),\
            data_test[:, 1:], data_test[:, 0:1].reshape(data_test.shape[0], 1)

def kernel_initializer(shape):
    '''
    Initial kernel in convolution layer or weight in full connection layer.
    '''
    initial = tf.truncated_normal(shape=shape, stddev=0.1, dtype="float32")
    return tf.Variable(initial_value=initial)

def bias_initializer(shape):
    initial = tf.constant(shape=shape, value=1, dtype="float32")
    return tf.Variable(initial_value=initial)
    

In [20]:
data_train, label_train, data_test, label_test = import_data(PATH_TRAIN, PATH_TEST)

# Divide to validation data
# 42000 = 4200 + 37800
data_valid = data_train[0:4200, :]
label_valid = label_train[0:4200, :].reshape([4200, 1])

data_train = data_train[4200:, :]
label_train = label_train[4200:, :].reshape([label_train.shape[0]-4200, 1])

In [69]:
## Prepare for the trainable variables.

# Convolution layer C1 trainable variables.
kernel_c1 = kernel_initializer((5, 5, 1, 6))
bias_c1 = bias_initializer((6, ))

# Subsample layer S2 trainable variables.
kernel_s2 = kernel_initializer((6,))
bias_s2 = bias_initializer((6,))

# Convolution layer C3 trainable variables.
kernel_c3 = kernel_initializer((5, 5, 6, 16))
bias_c3 = bias_initializer((16, ))

# Subsample layer S4 trainable variables.
kernel_s4 = kernel_initializer((16,))
bias_s4 = bias_initializer((16,))

# Convolution layer C5 trainable variables.
kernel_c5 = kernel_initializer((4, 4, 16, 120))
bias_c5 = bias_initializer((120,))

# Full connection layer F6 trainable variables.
kernel_f6 = kernel_initializer((1, 1, 120, 84))
bias_f6 = bias_initializer((84,))

# Full connection layer output trainable variables.
kernel_f7 = kernel_initializer((1, 1, 84, 10))
bias_f7 = bias_initializer((10,))



In [78]:
## Forward propapation (connect the graph).

batch = 420

# Make placeholder.
X = tf.placeholder(shape=(batch, 28, 28, 1), dtype="float32")
Y = tf.placeholder(shape=(1, batch), dtype="float32")

# Convolution layer 1, kernel = 5x5x6, output batchx24x24x6
X1 = tf.nn.conv2d(X, kernel_c1, strides=[1, 1, 1, 1], padding="VALID")+bias_c1

# Subsample layer 2, output batchx12x12x6
X2 = tf.nn.avg_pool(X1, [1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")*kernel_s2+bias_s2

# Convolution layer 3, kernel = 5x5x6x16, output batchx8x8x16
X3 = tf.nn.conv2d(X2, kernel_c3, strides=[1, 1, 1, 1], padding="VALID")+bias_c3

# Subsample layer 4, output batchx4x4x16
X4 = tf.nn.avg_pool(X3, [1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")*kernel_s4+bias_s4

# Convolution layer 5, kernel = 4x4x16x160, output batchx1x1x160
X5 = tf.nn.conv2d(X4, kernel_c5, strides=[1, 1, 1, 1], padding="VALID")+bias_c5

# Full Connection Layer 6, kernel = 1x1x120x84, output batchx1x1x84
X6 = tf.sigmoid(tf.nn.conv2d(X5, kernel_f6, strides=[1, 1, 1, 1], padding="VALID")+bias_f6)

# Full Connection Layer 7, kernel = 1x1x10x84, output batchx1x1x10
X7 = tf.sigmoid(tf.nn.conv2d(X6, kernel_f7, strides=[1, 1, 1, 1], padding="VALID")+bias_f7)

# Compute loss and make optimite
loss = tf.nn.softmax_cross_entropy_with_logits(labels=Y,\
                                               logits=tf.reduce_max(X7, axis=-1))
optimiter = tf.train.AdagradOptimizer(learning_rate=0.01)
optimite = optimiter.minimize(loss)

# Compute accuracy
Y_ = tf.arg_max(X7, dimension=1)
accuracy = 1-tf.count_nonzero(tf.to_int64(Y)-Y_)/batch


ValueError: Dimension 0 in both shapes must be equal, but are 420 and 1 for 'SoftmaxCrossEntropyWithLogits_19' (op: 'SoftmaxCrossEntropyWithLogits') with input shapes: [420,1], [1,420].

In [None]:
sess = tf.Session()
g_init = tf.global_variables_initializer()
sess.run(g_init)

In [68]:
# Train
for i in range(round(data_train.shape[0]/batch)):
    accuracy_epoch, _ = sess.run([accuracy, optimite], \
                                 feed_dict={X: data_train[i*batch:(i+1)*batch, :].reshape([batch, 28, 28, 1]),\
                                            Y: label_train[i*batch:(i+1)*batch].reshape([1, batch])})
    print(accuracy_epoch)

0.06904761904761902
0.08809523809523812
0.080952380952381
0.08809523809523812
0.0976190476190476
0.07857142857142863
0.09523809523809523
0.10238095238095235
0.09285714285714286
0.07380952380952377
0.07380952380952377
0.10238095238095235
0.06904761904761902
0.09285714285714286
0.07380952380952377
0.0714285714285714
0.05714285714285716
0.11904761904761907
0.09523809523809523
0.09047619047619049
0.09523809523809523
0.08809523809523812
0.08571428571428574
0.09523809523809523
0.080952380952381
0.09285714285714286
0.11190476190476195
0.10238095238095235
0.06904761904761902
0.080952380952381
0.09285714285714286
0.09047619047619049
0.08333333333333337
0.07619047619047614
0.07619047619047614
0.11190476190476195
0.08809523809523812
0.08333333333333337
0.08809523809523812
0.07380952380952377
0.08333333333333337
0.09285714285714286
0.11190476190476195
0.08809523809523812
0.06190476190476191
0.0714285714285714
0.10238095238095235
0.09285714285714286
0.1166666666666667
0.0976190476190476
0.097619047

In [40]:
i

0