diff --git a/.gitignore b/.gitignore index c823960..5c7da07 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ *.pyc *.log *.swp +*.tmp +*.gz +*.txt diff --git a/cmds/run_DNN.py b/cmds/run_DNN.py index ceecff8..fb8dcdb 100755 --- a/cmds/run_DNN.py +++ b/cmds/run_DNN.py @@ -94,17 +94,17 @@ log('> ... finetuning the model') while (cfg.lrate.get_rate() != 0): - # one epoch of sgd training + # one epoch of sgd training train_error = train_sgd(train_fn, cfg) log('> epoch %d, training error %f ' % (cfg.lrate.epoch, 100*numpy.mean(train_error)) + '(%)') - # validation + # validation valid_error = validate_by_minibatch(valid_fn, cfg) log('> epoch %d, lrate %f, validation error %f ' % (cfg.lrate.epoch, cfg.lrate.get_rate(), 100*numpy.mean(valid_error)) + '(%)') cfg.lrate.get_next_rate(current_error = 100*numpy.mean(valid_error)) # output nnet parameters and lrate, for training resume if cfg.lrate.epoch % cfg.model_save_step == 0: _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') - _lrate2file(cfg.lrate, wdir + '/training_state.tmp') + _lrate2file(cfg.lrate, wdir + '/training_state.tmp') # save the model and network configuration if cfg.param_output_file != '': @@ -117,8 +117,8 @@ # output the model into Kaldi-compatible format if cfg.kaldi_output_file != '': dnn.write_model_to_kaldi(cfg.kaldi_output_file) - log('> ... the final Kaldi model is ' + cfg.kaldi_output_file) + log('> ... the final Kaldi model is ' + cfg.kaldi_output_file) - # remove the tmp files (which have been generated from resuming training) + # remove the tmp files (which have been generated from resuming training) os.remove(wdir + '/nnet.tmp') - os.remove(wdir + '/training_state.tmp') + os.remove(wdir + '/training_state.tmp') diff --git a/cmds/run_MTL.py b/cmds/run_MTL.py index d07e1b9..e249fab 100755 --- a/cmds/run_MTL.py +++ b/cmds/run_MTL.py @@ -32,11 +32,11 @@ from utils.utils import parse_arguments, parse_data_spec_mtl, parse_nnet_spec_mtl from utils.learn_rates import _lrate2file, _file2lrate -from utils.network_config import NetworkConfig +from utils.network_config import NetworkConfig from learning.sgd import validate_by_minibatch # Implements Multi-Task Learning (MTL) in which several tasks share some lower hidden -# layers (shared representation learning). Each task has its specific higher layers (in +# layers (shared representation learning). Each task has its specific higher layers (in # the simplest case, a task-specific softmax layer). References include: # J. Huang, J. Li, D. Yu, L. Deng, and Y. Gong. Cross-language knowledge transfer using @@ -44,13 +44,13 @@ # Y. Miao, and F. Metze. Improving language-universal feature extraction with deep maxout # and convolutional neural networks. Interspeech 2014. - + if __name__ == '__main__': # check the arguments arg_elements = [sys.argv[i] for i in range(1, len(sys.argv))] - arguments = parse_arguments(arg_elements) + arguments = parse_arguments(arg_elements) required_arguments = ['train_data', 'valid_data', 'task_number', 'shared_nnet_spec', 'indiv_nnet_spec', 'wdir'] for arg in required_arguments: @@ -64,25 +64,25 @@ wdir = arguments['wdir'] # various lists used in MTL - config_array = [] + config_array = [] train_fn_array = []; valid_fn_array = [] dnn_array = [] - + # parse data specification train_data_spec_array = parse_data_spec_mtl(train_data_spec) valid_data_spec_array = parse_data_spec_mtl(valid_data_spec) if len(train_data_spec_array) != task_number or len(valid_data_spec_array) != task_number: print "Error: #datasets in data specification doesn't match #tasks"; exit(1) # split shared_spec ans indiv_spec into individual task's networks - nnet_spec_array, shared_layers_num = parse_nnet_spec_mtl(shared_spec, indiv_spec) + nnet_spec_array, shared_layers_num = parse_nnet_spec_mtl(shared_spec, indiv_spec) if len(nnet_spec_array) != task_number: print "Error: #networks specified by --indiv-spec doesn't match #tasks"; exit(1) # parse network configuration from arguments, and initialize data reading for n in xrange(task_number): network_config = NetworkConfig() network_config.parse_config_dnn(arguments, nnet_spec_array[n]) - network_config.init_data_reading(train_data_spec_array[n], valid_data_spec_array[n]) - config_array.append(network_config) + network_config.init_data_reading(train_data_spec_array[n], valid_data_spec_array[n]) + config_array.append(network_config) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) @@ -104,7 +104,7 @@ # get the training, validation and testing function for the model log('> ... getting the finetuning functions for task %d' % (n)) train_fn, valid_fn = dnn.build_finetune_functions((cfg.train_x, cfg.train_y), (cfg.valid_x, cfg.valid_y), batch_size=cfg.batch_size) - # add dnn and the functions to the list + # add dnn and the functions to the list dnn_array.append(dnn) train_fn_array.append(train_fn); valid_fn_array.append(valid_fn) # check the working dir to decide whether it's resuming training; if yes, load the tmp network files for initialization @@ -137,7 +137,7 @@ batch_numbers_per_chunk[n] = config_array[n].train_sets.cur_frame_num / config_array[n].batch_size # although we set one single trunk size, the actual size of data chunks we read in may differ # across the tasks. this is because we may reach the end of the data file. thus, we loop over - # the max number of mini-batches, but do the checking on each individual task + # the max number of mini-batches, but do the checking on each individual task for batch_index in xrange(max(batch_numbers_per_chunk)): # loop over mini-batches for n in active_tasks: if batch_index < batch_numbers_per_chunk[n]: diff --git a/learning/sgd.py b/learning/sgd.py index 19376f6..b321c6b 100755 --- a/learning/sgd.py +++ b/learning/sgd.py @@ -28,7 +28,11 @@ def validate_by_minibatch_verbose(valid_fn, valid_sets, valid_xy, batch_size): while (not valid_sets.is_finish()): valid_sets.load_next_partition(valid_xy) for batch_index in xrange(valid_sets.cur_frame_num / batch_size): # loop over mini-batches - valid_error.append(valid_fn(index=batch_index)) + mean_error = valid_fn(index=batch_index) + valid_error += [mean_error] * batch_size + if valid_sets.cur_frame_num % batch_size > 0: + mean_error = valid_fn(index = valid_sets.cur_frame_num / batch_size) + valid_error += [mean_error] * (valid_sets.cur_frame_num % batch_size) valid_sets.initialize_read() return valid_error @@ -39,7 +43,11 @@ def validate_by_minibatch(valid_fn, cfg): while (not valid_sets.is_finish()): valid_sets.load_next_partition(valid_xy) for batch_index in xrange(valid_sets.cur_frame_num / batch_size): # loop over mini-batches - valid_error.append(valid_fn(index=batch_index)) + mean_error = valid_fn(index=batch_index) + valid_error += [mean_error] * batch_size + if valid_sets.cur_frame_num % batch_size > 0: + mean_error = valid_fn(index = valid_sets.cur_frame_num / batch_size) + valid_error += [mean_error] * (valid_sets.cur_frame_num % batch_size) valid_sets.initialize_read() return valid_error @@ -56,19 +64,27 @@ def train_sgd_verbose(train_fn, train_sets, train_xy, batch_size, learning_rate, while (not train_sets.is_finish()): train_sets.load_next_partition(train_xy) for batch_index in xrange(train_sets.cur_frame_num / batch_size): # loop over mini-batches - train_error.append(train_fn(index=batch_index, learning_rate = learning_rate, momentum = momentum)) + mean_error = train_fn(index=batch_index, learning_rate = learning_rate, momentum = momentum) + train_error += [mean_error] * batch_size + if train_sets.cur_frame_num % batch_size > 0: + mean_error = train_fn(index=train_sets.cur_frame_num / batch_size, learning_rate = learning_rate, momentum = momentum) + train_error += [mean_error] * (train_sets.cur_frame_num % batch_size) train_sets.initialize_read() return train_error def train_sgd(train_fn, cfg): train_sets = cfg.train_sets; train_xy = cfg.train_xy batch_size = cfg.batch_size - learning_rate = cfg.lrate.get_rate(); momentum = cfg.momentum - + learning_rate = cfg.lrate.get_rate(); momentum = cfg.momentum + train_error = [] while (not train_sets.is_finish()): train_sets.load_next_partition(train_xy) for batch_index in xrange(train_sets.cur_frame_num / batch_size): # loop over mini-batches - train_error.append(train_fn(index=batch_index, learning_rate = learning_rate, momentum = momentum)) + mean_error = train_fn(index=batch_index, learning_rate = learning_rate, momentum = momentum) + train_error += [mean_error] * batch_size + if train_sets.cur_frame_num % batch_size > 0: + mean_error = train_fn(index=train_sets.cur_frame_num / batch_size, learning_rate = learning_rate, momentum = momentum) + train_error += [mean_error] * (train_sets.cur_frame_num % batch_size) train_sets.initialize_read() return train_error diff --git a/utils/sda_config.py b/utils/sda_config.py index 927fbcb..f6208a7 100755 --- a/utils/sda_config.py +++ b/utils/sda_config.py @@ -24,12 +24,12 @@ class SdAConfig(): def __init__(self): - # parameters related with training + # parameters related with training self.epochs = 5 # number of training epochs for each layer self.batch_size = 128 # size of mini-batches self.corruption_levels=[0.2 for n in xrange(100)] # denoising factor; we use an array for future extension to layer-specific factor self.learning_rates = [.01 for n in xrange(100)] # learning rate for each layer - self.momentum = 0 # momentum + self.momentum = 0 # momentum self.ptr_layer_number = 0 # number of layers to be trained self.hidden_activation = T.nnet.sigmoid # activation function of the hidden layer/output @@ -37,7 +37,7 @@ def __init__(self): # if we normaze the input data with mean (maybe also with variance) # normalization, then we need the tanh activation function to reconstruct # the input - + # for maxout autoencoder self.do_maxout = False # whether to apply maxout on the hidden layer self.pool_size = 1 # pooling size of maxout @@ -48,7 +48,7 @@ def __init__(self): self.train_x = None self.train_y = None - # interfaces for validation data. we don't do validation for RBM, so these variables will be None + # interfaces for validation data. we don't do validation for RBM, so these variables will be None # we have these variables because we want to use the _cfg2file function from io_func/model_io.py self.valid_sets = None self.valid_xy = None @@ -78,7 +78,7 @@ def init_data_reading(self, train_data_spec): def init_activation(self): self.activation = parse_activation(self.activation_text) - # parse the arguments to get the values for various variables + # parse the arguments to get the values for various variables def parse_config_common(self, arguments): if arguments.has_key('corruption_level'): self.corruption_levels = [float(arguments['corruption_level']) for n in xrange(100)]