In [1]:
import tensorflow as tf #version == 1.2
import numpy as np
from utils import *
import cells
from models import Stack_Layers_Model
import time
import os
import datetime

In [2]:
############## Read data ##################
train_feature_dir = './data/TIMIT/phn/train/mfcc/'
train_label_dir = './data/TIMIT/phn/train/label/'
test_feature_dir = './data/TIMIT/phn/test/mfcc/'
test_label_dir = './data/TIMIT/phn/test/label/'
# read data from local path 
# a list of feature, each one has shape [feature_num, time_step]
train_feature_list = read_ndarray_from(train_feature_dir)
# a list of label, each one has shape [label_num]
train_label_list = read_ndarray_from(train_label_dir)
test_feature_list = read_ndarray_from(test_feature_dir)
test_label_list = read_ndarray_from(test_label_dir)


In [3]:
############ have a glance of the preprocessed data ############

# # only pick a part of the dataset for quick debug
train_feature_list = train_feature_list[:200]
train_label_list = train_label_list[:200]
test_feature_list = test_feature_list[:100]
test_label_list = test_label_list[:100]

print("for the train feature, type: {}, length: {}, type of each element: {}".format(type(train_feature_list), len(train_feature_list), type(train_feature_list[0])))
print("for the train label, type: {}, length: {}, type of each element: {}".format(type(train_label_list), len(train_label_list), type(train_label_list[0])))
print("for the test feature, type: {}, length: {}, type of each element: {}".format(type(test_feature_list), len(test_feature_list), type(test_feature_list[0])))
print("for the test label, type: {}, length: {}, type of each element: {}".format(type(test_label_list), len(test_label_list), type(test_label_list[0])))



for the train feature, type: <type 'list'>, length: 200, type of each element: <type 'numpy.ndarray'>
for the train label, type: <type 'list'>, length: 200, type of each element: <type 'numpy.ndarray'>
for the test feature, type: <type 'list'>, length: 100, type of each element: <type 'numpy.ndarray'>
for the test label, type: <type 'list'>, length: 100, type of each element: <type 'numpy.ndarray'>


In [4]:
################### Define hyper-paramaters ########################
class Argument(object):
    def __init__(self):
        self.max_epoch = 500
        self.num_layer = 2
        self.num_hidden = 128
        self.num_featue = train_feature_list[0].shape[0]
        self.num_class = 62       

        
        self.lr_rate = 0.001
        self.batch_size = 32
        self.max_timestep = get_max_timestep(train_feature_list, test_feature_list)
        self.layer_norm = True #only available for LSTMCell
        self.dropout = 0.1 # drouput is only used between input and the first hidden layer, 
                           # and last hidden layer and output layer
        self.isTrain = True #set a tag to judge whethe is training, used for dropout
        
        self.cell_type = 'LSTMCell' #option: LSTMCell, RNNCell, GRUCell, HyperLSTMCell
        self.model_type = 'bidirection' #option: unidirection, bidirection, resnet, highway, seq2seq
        
        self.model_dir = "./model/" #file path to store trained model
        self.log_dir = "./log/" #file path to store logs
        self.log_name = str(datetime.datetime.now())+'.txt'

In [5]:
################### Build a model #################################
args = Argument()
#define input and output tensor

inputs = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.max_timestep, args.num_featue], name="intputs")
targets_idx = tf.placeholder(tf.int64)
targets_val = tf.placeholder(tf.int32)
targets_shape = tf.placeholder(tf.int64)
targets = tf.SparseTensor(targets_idx, targets_val, targets_shape)
seq_len = tf.placeholder(tf.int32, [args.batch_size], name="seq_len")
#stack multi-layer networks
layers_model = Stack_Layers_Model(args, inputs, targets, seq_len)
logits = layers_model.build_model() #the logits is a tensor with shape: [batch_size, max_timestep, num_class]

logits = tf.transpose(logits, [1,0,2]) #time major, shape: [max_timestep, batch_size, num_class]


#optimizer
loss = tf.nn.ctc_loss(targets, logits, seq_len)
cost = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(args.lr_rate).minimize(cost)
predictions = tf.nn.ctc_beam_search_decoder(logits, seq_len, merge_repeated=False)[0][0]


In [6]:
############# Start Trainging ####################
batch_size = args.batch_size
max_epoch = args.max_epoch

#split dataset into serveral batches
level = 'phn'

# feature_list = train_feature_list + test_feature_list
# label_list = train_label_list + test_label_list


# (batch_list, _) = data_lists_to_batches(feature_list, label_list, batch_size, level)
# train_batch_list = batch_list[:len(train_feature_list)]
# test_batch_list = batch_list[len(test_feature_list):]

# num_train_batch = len(train_batch_list)
# num_test_batch = len(test_batch_list)
# train_error_list = [] #define a list to store each epoch error in training
# test_error_list = [] #define a list to store each epoch error in testing


(train_batch_list, _) = data_lists_to_batches(train_feature_list, train_label_list, batch_size, level, args.max_timestep)
num_train_batch = len(train_batch_list)
train_error_list = [] #define a list to store each epoch error in training

(test_batch_list, _) = data_lists_to_batches(test_feature_list, test_label_list, batch_size, level, args.max_timestep)
num_test_batch = len(test_batch_list)
test_error_list = [] #define a list to store each epoch error in testing

saver = tf.train.Saver()

with tf.Session() as sess:
    num_trainable_params = np.sum([np.prod(v.shape) for v in tf.trainable_variables()]) #count params
    print("num of trainable params is {}".format(num_trainable_params))
    sess.run(tf.global_variables_initializer())
    for epoch in range(max_epoch):
        #train the neural network
        start_time = time.time()
        train_batch_error = np.zeros(num_train_batch) #define a ndarray to store each batch error
        args.isTrain = True
        for i in range(num_train_batch):
            train_inputs, train_targets, train_seq_len = train_batch_list[i]
            train_targets_idx, train_targets_val, train_targets_shp = train_targets
            feed_dict = {
                inputs: train_inputs, targets_idx: train_targets_idx, targets_val: train_targets_val,
                targets_shape: train_targets_shp, seq_len: train_seq_len}
            _, train_cost, train_preditions, train_targets = sess.run([optimizer, cost, predictions, targets], 
                                                                      feed_dict=feed_dict)
            train_batch_error[i] = get_edit_distance([train_preditions.values], [train_targets.values], True, level)
            print("during training mode, batch: {}/{}, epoch: {}, PER: {}".\
                  format(i+1, num_train_batch, epoch+1, train_batch_error[i]))
            
        train_epoch_error = np.mean(train_batch_error) #calculate the mean value of batches in specify epoch
        end_time = time.time()
        train_time = end_time - start_time
        print("in train mode, epoch: {}/{}, PER: {:.2f}, time: {:.2f}s".\
              format(epoch+1, max_epoch, train_epoch_error, train_time))
        train_error_list.append(train_epoch_error)
        
        #test the neural network
        test_batch_error = np.zeros(num_test_batch) #define a ndarray to store each batch error
        start_time = time.time()
        args.isTrain = False
        for i in range(num_test_batch):
            test_inputs, test_targets, test_seq_len = test_batch_list[i]            
            test_targets_idx, test_targets_val, test_targets_shp = test_targets
            feed_dict = {
                inputs: test_inputs, targets_idx: test_targets_idx, targets_val: test_targets_val,
                targets_shape: test_targets_shp, seq_len: test_seq_len}
            test_cost, test_predictions, test_targets = sess.run([cost, predictions, targets], feed_dict=feed_dict)
            test_batch_error[i] = get_edit_distance([test_predictions.values], [test_targets.values], True, level)
            
        test_epoch_error = np.mean(test_batch_error) #calculate the mean value of batches in specify epoch
        end_time = time.time()
        test_time = end_time - start_time
        print("in test mode, epoch: {}/{}, PER: {:.2f}, time: {:.2f}s".format(epoch+1, max_epoch, test_epoch_error, test_time))
        test_error_list.append(test_epoch_error)
        print("test truth:\n"+output_to_sequence(test_targets))
        print("test prediction:\n"+output_to_sequence(test_predictions))

        ################ save model and log info ##################
        
        #store trained model and logs
        model_dir = args.model_dir
        if not os.path.exists(model_dir):
            os.mkdir(model_dir) #create a new folder if not exist
        check_point_path = os.path.join(model_dir, "model.ckpt")
        saver.save(sess, check_point_path, global_step=epoch)
        print("Model has been saved in {}".format(model_dir))
        #store logs in local file
        log_dir = args.log_dir
        if not os.path.exists(log_dir):
            os.mkdir(log_dir) #create a new log directionary if not exist
        #open a log file
        log_name = args.log_name
        with open(os.path.join(log_dir, log_name), 'a') as f:
            f.write("for train mode, epoch: {}, PER: {}, run time: {}\n".format(epoch+1, train_epoch_error, train_time))
            f.write("for test mode, epoch: {}, PER: {}, run time: {}\n".format(epoch+1, test_epoch_error, test_time))
            f.write("=================================================\n")
            print("log has been saved in {}".format(log_name))
        print("===========================================")
            
        
        
        
        



num of trainable params is 446270


KeyboardInterrupt: 