In [1]:
import os
import data
import utils
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from IPython.display import clear_output
from pprint import pprint


In [2]:
data.load()
hyper_params = {
    'kernels_width': [3000, 1500, 750],
    'channels_out': [5, 10, 20],
    'strides': [300, 150, 75],
    'num_hiddens_rnn': [200, 200],
    'num_classes': 28,
    'learning_rate': 1e-2
}

In [3]:
def create_graph(hyper_params=hyper_params):
    tf.reset_default_graph()

    graph = tf.Graph()

    with graph.as_default():
        global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

        with tf.name_scope('input') as scope:
            X = tf.placeholder(tf.float32, [None, None, 1], name='input_x') 
            Y = tf.sparse_placeholder(tf.int32, name='input_y')
            seq_len = tf.placeholder(tf.float32, [None], name='seq_len')

        with tf.name_scope('batch_size') as scope:
            shape = tf.shape(X)
            batch_size = shape[0]

        out = X
        last_channel_out = 1 
        out_seq_len = seq_len
        
        for i in range(len(hyper_params['kernels_width'])):
            with tf.name_scope('conv%d' % i) as scope:
                kernel = tf.get_variable('conv_kernel%d' % i, 
                    [hyper_params['kernels_width'][i], last_channel_out, hyper_params['channels_out'][i]], 
                    initializer=tf.truncated_normal_initializer())
                biases = tf.get_variable('conv_biases%d' % i, 
                    [hyper_params['channels_out'][i]], 
                    initializer=tf.random_normal_initializer())        
                out = tf.nn.conv1d(out, kernel, stride=hyper_params['strides'][i], padding='SAME')
                out = tf.nn.relu(out + biases, name='conv%d' % i)
                last_channel_out = hyper_params['channels_out'][i]
                
                out_seq_len = tf.ceil(out_seq_len / hyper_params['strides'][i])

        with tf.name_scope('rnn') as scope:
            cells = []
            for num_hidden in hyper_params['num_hiddens_rnn']:
                cell = tf.contrib.rnn.LSTMCell(num_hidden, state_is_tuple=True)
                cells.append(cell)
            stack = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
            out, _ = tf.nn.dynamic_rnn(stack, out, out_seq_len, dtype=tf.float32)

        with tf.name_scope('fc') as scope:
            last_hidden = hyper_params['num_hiddens_rnn'][len(hyper_params['num_hiddens_rnn']) - 1]
            out = tf.reshape(out, [-1, last_hidden])
            W = tf.Variable(tf.truncated_normal([last_hidden, hyper_params['num_classes']],stddev=0.1, name='fc_w%d' % i))
            b = tf.Variable(tf.constant(0., shape=[hyper_params['num_classes']], name='fc_b%d' % i))
            logits = tf.nn.xw_plus_b(out, W, b)

        with tf.name_scope('ctc_loss_function') as scope:
            logits = tf.reshape(logits, [batch_size, -1, hyper_params['num_classes']])
            logits = tf.transpose(logits, (1, 0, 2))
            out_seq_len = tf.cast(out_seq_len, tf.int32)
            loss = tf.nn.ctc_loss(Y, logits, out_seq_len)
            cost = tf.reduce_mean(loss)

        optimizer = tf.train.AdamOptimizer(hyper_params['learning_rate']).minimize(cost, global_step=global_step)

        with tf.name_scope('decoding') as scope:
            decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, out_seq_len)
            ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32),Y))

        with tf.name_scope('loss_by_step') as scope:
            tf.summary.scalar('cost', cost)
            tf.summary.scalar('ler', ler)
            summary_op = tf.summary.merge_all()
    model_vars = {
        'X': X, 'Y': Y, 'seq_len': seq_len, 'global_step': global_step, 
        'ler': ler, 'summary_op': summary_op, 'optimizer': optimizer, 'decoded': decoded, 'cost': cost,
        'logits': logits, 'log_prob': log_prob,
        'out_seq_len': out_seq_len
    }
    return graph, model_vars

In [4]:
tf.reset_default_graph()
graph, model_vars = create_graph(hyper_params)

In [7]:
train_params = {
    'batch_size' : 2,
    'skip_step' : 4,
    'n_epochs' : 10000000
}

with tf.Session(graph=graph) as sess:
    # Mot vai thu tuc
    tf.global_variables_initializer().run()
    writer = tf.summary.FileWriter('./graphs/', sess.graph)
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state('./checkpoints/')
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
    
    # Mot vai variable
    initial_step = model_vars['global_step'].eval()
    num_train, num_test = data.stats()
    print('num_train', num_train, 
          'num_test', num_test, 
          'n_batch',int(train_params['n_epochs'] * num_train/train_params['batch_size']), 
          'batch_per_epochs', int(num_train/train_params['batch_size']))
    train_batch = data.train_batch_generator(train_params['batch_size'])

    # Training
    for i in range(initial_step,  int(train_params['n_epochs'] * num_train/train_params['batch_size'])):
        X_batch, seq_len_batch, Y_batch = next(train_batch)
        feed = {model_vars['X']: X_batch,
            model_vars['Y']: Y_batch,
            model_vars['seq_len']: seq_len_batch}
        batch_cost, _, batch_ler, summary = sess.run([model_vars['cost'], 
                                                      model_vars['optimizer'], 
                                                      model_vars['ler'], 
                                                      model_vars['summary_op']], feed)
        writer.add_summary(summary, global_step=i)

        if i % train_params['skip_step'] == 0:
            print('batch', i, 'cost', batch_cost, 'ler', batch_ler)
            saver.save(sess, './checkpoints/', i)


num_train 240 num_test 60 n_batch 1200000000 batch_per_epochs 120


InvalidArgumentError: label SparseTensor is not valid: indices[1] = [0,1] is out of bounds: need 0 <= index < [2,1]
	 [[Node: ctc_loss_function/CTCLoss = CTCLoss[ctc_merge_repeated=true, ignore_longer_outputs_than_inputs=false, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/cpu:0"](ctc_loss_function/transpose/_45, _arg_input/input_y/indices_0_1, _arg_input/input_y/values_0_3, ctc_loss_function/Cast/_47)]]

Caused by op 'ctc_loss_function/CTCLoss', defined at:
  File "C:\Users\root\Anaconda3\lib\runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\root\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\root\Anaconda3\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "C:\Users\root\Anaconda3\lib\site-packages\traitlets\config\application.py", line 653, in launch_instance
    app.start()
  File "C:\Users\root\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\root\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\root\Anaconda3\lib\site-packages\tornado\ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "C:\Users\root\Anaconda3\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\root\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\root\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\root\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\root\Anaconda3\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\root\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\root\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\root\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\root\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\root\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\root\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\root\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\root\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-4c0eefccc44f>", line 2, in <module>
    graph, model_vars = create_graph(hyper_params)
  File "<ipython-input-3-104917a899c8>", line 55, in create_graph
    loss = tf.nn.ctc_loss(Y, logits, out_seq_len)
  File "C:\Users\root\Anaconda3\lib\site-packages\tensorflow\python\ops\ctc_ops.py", line 152, in ctc_loss
    ignore_longer_outputs_than_inputs=ignore_longer_outputs_than_inputs)
  File "C:\Users\root\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_ctc_ops.py", line 170, in _ctc_loss
    name=name)
  File "C:\Users\root\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "C:\Users\root\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\Users\root\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): label SparseTensor is not valid: indices[1] = [0,1] is out of bounds: need 0 <= index < [2,1]
	 [[Node: ctc_loss_function/CTCLoss = CTCLoss[ctc_merge_repeated=true, ignore_longer_outputs_than_inputs=false, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/cpu:0"](ctc_loss_function/transpose/_45, _arg_input/input_y/indices_0_1, _arg_input/input_y/values_0_3, ctc_loss_function/Cast/_47)]]


In [None]:
train_params = {
    'batch_size' : 2,
    'skip_step' : 1,
    'n_epochs' : 10000000
}

with tf.Session(graph=graph) as sess:
    tf.global_variables_initializer().run()
    X_batch, seq_len_batch, Y_batch = next(train_batch)