Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding dropout-by row #8

Open
wants to merge 12 commits into
base: dropout_schedule
Choose a base branch
from
24 changes: 12 additions & 12 deletions egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
# same as 1i but with frame level dropout
# (num-params 1g:21309812 1i: 43447156)
# results on sdm1 using ihm ali
#System tdnn_lstm1i_sp_bi_ihmali_ld5
#WER on dev 37.6 36.7
#WER on eval 40.9 39.9
#Final train prob -0.114135 -0.118
#Final valid prob -0.245208 -0.246
#Final train prob (xent) -1.47648 -1.54
#Final valid prob (xent) -2.16365 -2.10
#System tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1i_dp_sp_bi_ihmali_ld5

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when you do a new experiment you should create a different letter/number combination, e.g. 1j, and use the 'compare_wer_general.sh' script or whatever it's called to compare with the baseline, if possible. please stay within the existing conventions for script naming.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... also, if the per-frame dropout turns out, in the end, not to be that useful, we might not want to check it into Kaldi. But let's see how your experiments turn out.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danpovey it would be better if you could have a look at whether my nnet-simple-component.cc in PR has the right format.....

#WER on dev 37.6 36.5
#WER on eval 40.9 39.7
#Final train prob -0.114135 -0.124
#Final valid prob -0.245208 -0.249
#Final train prob (xent) -1.47648 -1.55
#Final valid prob (xent) -2.16365 -2.11


set -e -o pipefail
Expand All @@ -28,7 +28,7 @@ gmm=tri3_cleaned # the gmm for the target data
ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true).
num_threads_ubm=32
nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
dropout_schedule='0,0@0.20,0.5@0.50,0@0.50,0'
dropout_schedule='0,0@0.20,0.5@0.5,0@0.75,0'
chunk_width=150
chunk_left_context=40
chunk_right_context=0
Expand All @@ -37,7 +37,7 @@ label_delay=5
# are just hardcoded at this level, in the commands below.
train_stage=-10
tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
tlstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
tlstm_affix=1i_dp #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
common_egs_dir= # you can set this to use previously dumped egs.


Expand Down Expand Up @@ -193,15 +193,15 @@ if [ $stage -le 15 ]; then
relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024

# check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false
relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false
relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false

## adding the layers for chain branch
output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/libs/nnet3/train/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def _parse_dropout_string(num_archives_to_process, dropout_str):
value_x_pair = parts[i].split('@')
if len(value_x_pair) == 1:
# Dropout proportion at half of training
dropout_proportion = float(value_x_pair)
dropout_proportion = float(value_x_pair[0])
num_archives = int(0.5 * num_archives_to_process)
else:
assert len(value_x_pair) == 2
Expand Down
12 changes: 9 additions & 3 deletions egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ def set_default_configs(self):
'self-repair-scale-nonlinearity' : 0.00001,
'zeroing-interval' : 20,
'zeroing-threshold' : 15.0,
'dropout-proportion' : -1.0 # -1.0 stands for no dropout will be added
'dropout-proportion' : -1.0 ,# -1.0 stands for no dropout will be added
'dropout-per-frame' : 'false'
}

def set_derived_configs(self):
Expand Down Expand Up @@ -285,6 +286,10 @@ def check_configs(self):
self.config['dropout-proportion'] < 0.0) and
self.config['dropout-proportion'] != -1.0 ):
raise xparser_error("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion']))

if (self.config['dropout-per-frame'] != 'false' and
self.config['dropout-per-frame'] != 'true'):
raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame']))

def auxiliary_outputs(self):
return ['c_t']
Expand Down Expand Up @@ -347,7 +352,8 @@ def generate_lstm_config(self):
pes_str = self.config['ng-per-element-scale-options']
lstm_dropout_value = self.config['dropout-proportion']
lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion'])

lstm_dropout_per_frame_value = self.config['dropout-per-frame']
lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame'])
# Natural gradient per element scale parameters
# TODO: decide if we want to keep exposing these options
if re.search('param-mean', pes_str) is None and \
Expand Down Expand Up @@ -427,7 +433,7 @@ def generate_lstm_config(self):
# add the recurrent connections
configs.append("# projection matrices : Wrm and Wpm")
if lstm_dropout_value != -1.0:
configs.append("component name={0}.W_rp.m.dropout type=DropoutComponent dim={1} {2}".format(name, cell_dim, lstm_dropout_str))
configs.append("component name={0}.rp_t.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim, lstm_dropout_str, lstm_dropout_per_frame_str))
configs.append("component name={0}.W_rp.m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, rec_proj_dim + nonrec_proj_dim, affine_str))
configs.append("component name={0}.r type=BackpropTruncationComponent dim={1} {2}".format(name, rec_proj_dim, bptrunc_str))

Expand Down
1 change: 0 additions & 1 deletion egs/wsj/s5/steps/nnet3/chain/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ def process_args(args):
"value={0}. We recommend using the option "
"--trainer.deriv-truncate-margin.".format(
args.deriv_truncate_margin))

if (not os.path.exists(args.dir)
or not os.path.exists(args.dir+"/configs")):
raise Exception("This scripts expects {0} to exist and have a configs "
Expand Down
Loading