In [1]:
import tensorflow as tf, numpy as np, shutil, random, os, csv
from tensorflow.python.ops import rnn, rnn_cell
import matplotlib.pyplot as plt
import glob

In [2]:
# define variables
train_path='./data/td/training/'
validation_path='./data/td/validation/'
reference_train_path=train_path + 'REFERENCE.csv'
reference_validation_path=validation_path + 'REFERENCE.csv'
train_dirs=os.listdir(train_path)
train_size=len(train_dirs)
batch_size=20

batch_train_files=random.sample(os.listdir(train_path), batch_size)

In [3]:
def get_batch(path, batch_size):
    filenames = glob.glob(os.path.join(path, '*npy'))
    return random.sample(filenames, batch_size)

def get_batch_max_len(batch_files):
    batch_max_len=0
    for f in batch_files:
        batch_max_len=max(batch_max_len, np.load(f).shape[0])
    return batch_max_len

def get_reference(reference_path, reference):
    with open(reference_path) as csvfile:
        spamreader=csv.reader(csvfile, delimiter=',', quotechar='|')
        for row in spamreader:
            reference[row[0]]=row[1]

def generate_batch(path):
    batch_files=get_batch(path, batch_size)
   
    max_seq=get_batch_max_len(batch_files)

    batchX=np.zeros((batch_size, 1, max_seq), dtype='float32')
    i=0
    for f in batch_files:
        arr=np.load(f)
        arr=np.asmatrix(arr)
        batchX[i][0][0:arr.shape[1]]=arr

    i=0
    batchY=np.zeros(batch_size, dtype='int32')
    for f in batch_files:
        batchY[i] = reference_train[f.split('/')[-1].split('.')[0]]
        i += 1
    
    # reshape batchX
    batchX = np.swapaxes(batchX, 1, 2)
    batchX = np.swapaxes(batchX, 0, 1)
    
    # convert batchY to onehot
    y_onehot = np.zeros((batch_size, 2))
    y_onehot[np.where(batchY==1)] = np.array([0,1])
    y_onehot[np.where(batchY==-1)] = np.array([1,0])
    
    print('batchX, y_onehot, sizes', batchX.shape, y_onehot.shape)
    
    return batchX, y_onehot

In [4]:
# read in reference train/validation file
reference_train, reference_validation = {}, {}
get_reference(reference_train_path, reference_train)
get_reference(reference_validation_path, reference_validation)

In [5]:
hm_epochs = 5
n_classes = 2
state_size = 3
n_units = 30

x_placeholder = tf.placeholder('float32', [None, batch_size, 1])
y_placeholder = tf.placeholder('int32', [batch_size, n_classes])


In [None]:
def recurrent_neural_network(input_data):
    layer = {'weights':tf.Variable(tf.random_normal([n_units, n_classes])),
             'biases':tf.Variable(tf.random_normal([n_classes]))}
    
    rnn_cell = tf.nn.rnn_cell.BasicRNNCell(n_units, activation=tf.nn.relu)
    initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
    outputs, state = tf.nn.dynamic_rnn(rnn_cell, 
                                       input_data,
                                       dtype=tf.float32,
                                       time_major=True)
    output = tf.matmul(outputs[-1],layer['weights']) + layer['biases']

    return output

def train_neural_network(): 
    prediction = recurrent_neural_network(x_placeholder)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=tf.reshape(y_placeholder, [batch_size, n_classes])))
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    epoch_batch_itr = 10
    
    correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_placeholder, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, 'float32'))
        
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for epoch in range(hm_epochs):
            epoch_loss = 0
            for b in range(epoch_batch_itr):
                batchX, batchY = generate_batch(train_path)
                _, c = sess.run([optimizer, cost], feed_dict={x_placeholder: batchX, y_placeholder: batchY})
                epoch_loss += c
            validation_batchX, validation_batchY = generate_batch(validation_path) 
            print('Epoch', epoch, 'loss:', epoch_loss, 'Validation Accuracy:', accuracy.eval({x_placeholder: validation_batchX, y_placeholder: validation_batchY}))        

In [None]:
train_neural_network()

('batchX, y_onehot, sizes', (119232, 20, 1), (20, 2))
('Training Accuracy:', 0.80000001)
('batchX, y_onehot, sizes', (93824, 20, 1), (20, 2))
('Training Accuracy:', 0.75)
('batchX, y_onehot, sizes', (77824, 20, 1), (20, 2))
('Training Accuracy:', 0.60000002)
('batchX, y_onehot, sizes', (106816, 20, 1), (20, 2))
('Training Accuracy:', 0.89999998)
('batchX, y_onehot, sizes', (103634, 20, 1), (20, 2))
('Training Accuracy:', 0.75)
('batchX, y_onehot, sizes', (128458, 20, 1), (20, 2))
('Training Accuracy:', 0.75)
('batchX, y_onehot, sizes', (212099, 20, 1), (20, 2))
('Training Accuracy:', 0.69999999)
('batchX, y_onehot, sizes', (114945, 20, 1), (20, 2))
('Training Accuracy:', 0.80000001)
('batchX, y_onehot, sizes', (72447, 20, 1), (20, 2))
('Training Accuracy:', 0.80000001)
('batchX, y_onehot, sizes', (187520, 20, 1), (20, 2))
('Training Accuracy:', 0.89999998)
('Epoch', 0, 'loss:', 57.902577430009842)
('batchX, y_onehot, sizes', (82962, 20, 1), (20, 2))
('Training Accuracy:', 0.80000001)
(