In [1]:
import tensorflow as tf

from sklearn import datasets
from sklearn.cross_validation import train_test_split

import sys



In [2]:
class RNN_cell(object):
    def __init__(self, input_size, hidden_layer_size, target_size):
        self.input_size = input_size
        self.hidden_layer_size = hidden_layer_size
        self.target_size = target_size
        
        self.Wx = tf.Variable(tf.zeros([self.input_size, self.hidden_layer_size]))
        self.Wr = tf.Variable(tf.zeros([self.input_size, self.hidden_layer_size]))
        self.Wz = tf.Variable(tf.zeros([self.input_size, self.hidden_layer_size]))
        
        self.br = tf.Variable(tf.truncated_normal([self.hidden_layer_size], mean=1))
        self.bz = tf.Variable(tf.truncated_normal([self.hidden_layer_size], mean=1))
        
        self.Wh = tf.Variable(tf.zeros([self.hidden_layer_size, self.hidden_layer_size]))
        
        self.Wo = tf.Variable(tf.truncated_normal([self.hidden_layer_size, self.target_size], mean=1, stddev=.01))
        self.bo = tf.Variable(tf.truncated_normal([self.target_size], mean=1, stddev=.01))
        self._inputs = tf.placeholder(tf.float32, shape=[None, None, self.input_size], name="inputs")
        
        self.processed_input = process_batch_input_for_RNN(self._inputs)
        
        self.initial_hidden = self._inputs[:, 0, :]
        self.initial_hidden = tf.matmul(self.initial_hidden, tf.zeros([input_size, hidden_layer_size]))
    
    def GRU(self, previous_hidden_state, x):
        z = tf.sigmoid(tf.matmul(x, self.Wz) + self.bz)
        r = tf.sigmoid(tf.matmul(x, self.Wr) + self.br)
        
        h_ = tf.tanh(tf.matmul(x, self.Wx) + tf.matmul(previous_hidden_state, self.Wh) * r)
        
        current_hidden_state = tf.multiply((1-z), h_) + tf.multiply(previous_hidden_state, z)
        return current_hidden_state
    
    def get_states(self):
        all_hidden_states = tf.scan(self.GRU,
                                    self.processed_input,
                                    initializer=self.initial_hidden,
                                    name="states")
        return all_hidden_states
    
    def get_output(self, hidden_state):
        output = tf.nn.relu(tf.matmul(hidden_state, self.Wo) + self.bo)
        return output
    
    def get_outputs(self):
        all_hidden_states = self.get_states()
        all_outputs = tf.map_fn(self.get_output, all_hidden_states)
        return all_outputs

def process_batch_input_for_RNN(batch_input):
    batch_input_ = tf.transpose(batch_input, perm=[2, 0, 1])
    X = tf.transpose(batch_input_)
    return X

In [3]:
hidden_layer_size = 30
input_size = 8
target_size = 10

y = tf.placeholder(tf.float32, shape=[None, target_size], name="inputs")

rnn = RNN_cell(input_size, hidden_layer_size, target_size)

outputs = rnn.get_outputs()

last_output = outputs[-1]

output = tf.nn.softmax(last_output)

cross_entropy = -tf.reduce_sum(y * tf.log(output))

train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(output, 1))
accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32))) * 100

In [None]:
# data preperation
def get_on_hot(number):
    on_hot = [0] * 10
    on_hot[number] = 1
    return on_hot

digits = datasets.load_digits()
X = digits.images
Y_ = digits.target
#Y = map(get_on_hot, Y_)


sess = tf.InteractiveSession()
Y = sess.run(tf.one_hot(indices=Y_, depth=target_size))


X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.22, random_state=42)
X_train = X_train[:1400]
y_train = y_train[:1400]


sess.run(tf.initialize_all_variables())
for epoch in range(200):
    start, end = 0, 100
    for i in range(14):
        X = X_train[start:end]
        Y = y_train[start:end]
        start = end
        end = start + 100
        sess.run(train_step, feed_dict={rnn._inputs: X, y: Y})
    
    Loss = str(sess.run(cross_entropy, feed_dict={rnn._inputs: X, y: Y}))
    Train_accuracy = str(sess.run(accuracy, feed_dict={rnn._inputs: X_train, y: y_train}))
    Test_accuracy = str(sess.run(accuracy, feed_dict={rnn._inputs: X_test, y: y_test}))
    
    print("{} loss: {} \t tr_acc: {} \t ts_acc: {}".format((epoch+1), Loss, Train_accuracy, Test_accuracy))
    

1 loss: 228.765 	 tr_acc: 22.3571 	 ts_acc: 20.202
2 loss: 222.952 	 tr_acc: 27.0 	 ts_acc: 26.0101
3 loss: 209.629 	 tr_acc: 47.3571 	 ts_acc: 43.6869
4 loss: 189.739 	 tr_acc: 52.2857 	 ts_acc: 51.0101
5 loss: 169.972 	 tr_acc: 51.7857 	 ts_acc: 51.0101
6 loss: 152.825 	 tr_acc: 55.7143 	 ts_acc: 54.0404
7 loss: 138.068 	 tr_acc: 61.5714 	 ts_acc: 59.596
8 loss: 124.75 	 tr_acc: 64.2143 	 ts_acc: 62.6263
9 loss: 112.763 	 tr_acc: 67.5714 	 ts_acc: 66.1616
10 loss: 102.034 	 tr_acc: 68.8571 	 ts_acc: 67.6768
11 loss: 92.3516 	 tr_acc: 70.2857 	 ts_acc: 68.9394
12 loss: 83.6129 	 tr_acc: 72.0714 	 ts_acc: 69.9495
13 loss: 75.8086 	 tr_acc: 74.2857 	 ts_acc: 70.9596
14 loss: 68.8952 	 tr_acc: 76.7857 	 ts_acc: 73.2323
15 loss: 62.7923 	 tr_acc: 78.8571 	 ts_acc: 75.5051
16 loss: 57.4139 	 tr_acc: 80.2857 	 ts_acc: 76.2626
17 loss: 52.6989 	 tr_acc: 81.5714 	 ts_acc: 78.2828
18 loss: 48.5951 	 tr_acc: 82.6429 	 ts_acc: 79.5455
19 loss: 45.0632 	 tr_acc: 83.7143 	 ts_acc: 80.0505
20 loss: