Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding dropout-by row #8

Open
wants to merge 12 commits into
base: dropout_schedule
Choose a base branch
from
24 changes: 12 additions & 12 deletions egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
# same as 1i but with frame level dropout
# (num-params 1g:21309812 1i: 43447156)
# results on sdm1 using ihm ali
#System tdnn_lstm1i_sp_bi_ihmali_ld5
#WER on dev 37.6 36.7
#WER on eval 40.9 39.9
#Final train prob -0.114135 -0.118
#Final valid prob -0.245208 -0.246
#Final train prob (xent) -1.47648 -1.54
#Final valid prob (xent) -2.16365 -2.10
#System tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1i_dp_sp_bi_ihmali_ld5

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when you do a new experiment you should create a different letter/number combination, e.g. 1j, and use the 'compare_wer_general.sh' script or whatever it's called to compare with the baseline, if possible. please stay within the existing conventions for script naming.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... also, if the per-frame dropout turns out, in the end, not to be that useful, we might not want to check it into Kaldi. But let's see how your experiments turn out.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danpovey it would be better if you could have a look at whether my nnet-simple-component.cc in PR has the right format.....

#WER on dev 37.6 36.5
#WER on eval 40.9 39.7
#Final train prob -0.114135 -0.124
#Final valid prob -0.245208 -0.249
#Final train prob (xent) -1.47648 -1.55
#Final valid prob (xent) -2.16365 -2.11


set -e -o pipefail
Expand All @@ -28,7 +28,7 @@ gmm=tri3_cleaned # the gmm for the target data
ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true).
num_threads_ubm=32
nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
dropout_schedule='0,0@0.20,0.5@0.50,0@0.50,0'
dropout_schedule='0,0@0.20,0.5@0.5,0@0.75,0'
chunk_width=150
chunk_left_context=40
chunk_right_context=0
Expand All @@ -37,7 +37,7 @@ label_delay=5
# are just hardcoded at this level, in the commands below.
train_stage=-10
tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
tlstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
tlstm_affix=1i_dp #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
common_egs_dir= # you can set this to use previously dumped egs.


Expand Down Expand Up @@ -193,15 +193,15 @@ if [ $stage -le 15 ]; then
relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024

# check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false
relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false
relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false

## adding the layers for chain branch
output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/libs/nnet3/train/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def _parse_dropout_string(num_archives_to_process, dropout_str):
value_x_pair = parts[i].split('@')
if len(value_x_pair) == 1:
# Dropout proportion at half of training
dropout_proportion = float(value_x_pair)
dropout_proportion = float(value_x_pair[0])
num_archives = int(0.5 * num_archives_to_process)
else:
assert len(value_x_pair) == 2
Expand Down
12 changes: 9 additions & 3 deletions egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ def set_default_configs(self):
'self-repair-scale-nonlinearity' : 0.00001,
'zeroing-interval' : 20,
'zeroing-threshold' : 15.0,
'dropout-proportion' : -1.0 # -1.0 stands for no dropout will be added
'dropout-proportion' : -1.0 ,# -1.0 stands for no dropout will be added
'dropout-per-frame' : 'false'
}

def set_derived_configs(self):
Expand Down Expand Up @@ -285,6 +286,10 @@ def check_configs(self):
self.config['dropout-proportion'] < 0.0) and
self.config['dropout-proportion'] != -1.0 ):
raise xparser_error("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion']))

if (self.config['dropout-per-frame'] != 'false' and
self.config['dropout-per-frame'] != 'true'):
raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame']))

def auxiliary_outputs(self):
return ['c_t']
Expand Down Expand Up @@ -347,7 +352,8 @@ def generate_lstm_config(self):
pes_str = self.config['ng-per-element-scale-options']
lstm_dropout_value = self.config['dropout-proportion']
lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion'])

lstm_dropout_per_frame_value = self.config['dropout-per-frame']
lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame'])
# Natural gradient per element scale parameters
# TODO: decide if we want to keep exposing these options
if re.search('param-mean', pes_str) is None and \
Expand Down Expand Up @@ -427,7 +433,7 @@ def generate_lstm_config(self):
# add the recurrent connections
configs.append("# projection matrices : Wrm and Wpm")
if lstm_dropout_value != -1.0:
configs.append("component name={0}.W_rp.m.dropout type=DropoutComponent dim={1} {2}".format(name, cell_dim, lstm_dropout_str))
configs.append("component name={0}.rp_t.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim, lstm_dropout_str, lstm_dropout_per_frame_str))
configs.append("component name={0}.W_rp.m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, rec_proj_dim + nonrec_proj_dim, affine_str))
configs.append("component name={0}.r type=BackpropTruncationComponent dim={1} {2}".format(name, rec_proj_dim, bptrunc_str))

Expand Down
1 change: 0 additions & 1 deletion egs/wsj/s5/steps/nnet3/chain/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ def process_args(args):
"value={0}. We recommend using the option "
"--trainer.deriv-truncate-margin.".format(
args.deriv_truncate_margin))

if (not os.path.exists(args.dir)
or not os.path.exists(args.dir+"/configs")):
raise Exception("This scripts expects {0} to exist and have a configs "
Expand Down
69 changes: 52 additions & 17 deletions src/nnet3/nnet-simple-component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,27 +87,33 @@ void PnormComponent::Write(std::ostream &os, bool binary) const {
}


void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion) {
void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion,
bool dropout_per_frame) {
dropout_proportion_ = dropout_proportion;
dropout_per_frame_ = dropout_per_frame;
dim_ = dim;
}

void DropoutComponent::InitFromConfig(ConfigLine *cfl) {
int32 dim = 0;
BaseFloat dropout_proportion = 0.0;
bool dropout_per_frame = false;
bool ok = cfl->GetValue("dim", &dim) &&
cfl->GetValue("dropout-proportion", &dropout_proportion);
cfl->GetValue("dropout-proportion", &dropout_proportion) &&
cfl->GetValue("dropout-per-frame", &dropout_per_frame);
if (!ok || cfl->HasUnusedValues() || dim <= 0 ||
dropout_proportion < 0.0 || dropout_proportion > 1.0)
dropout_proportion < 0.0 || dropout_proportion > 1.0 ||
(dropout_per_frame != false and dropout_per_frame != true))
KALDI_ERR << "Invalid initializer for layer of type "
<< Type() << ": \"" << cfl->WholeLine() << "\"";
Init(dim, dropout_proportion);
Init(dim, dropout_proportion, dropout_per_frame);
}

std::string DropoutComponent::Info() const {
std::ostringstream stream;
stream << Type() << ", dim=" << dim_
<< ", dropout-proportion=" << dropout_proportion_;
<< ", dropout-proportion=" << dropout_proportion_
<< ", dropout-per-frame=" << dropout_per_frame_;
return stream.str();
}

Expand All @@ -119,16 +125,30 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes,

BaseFloat dropout = dropout_proportion_;
KALDI_ASSERT(dropout >= 0.0 && dropout <= 1.0);
if(dropout_per_frame_) {
// This const_cast is only safe assuming you don't attempt
// to use multi-threaded code with the GPU.
const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);

// This const_cast is only safe assuming you don't attempt
// to use multi-threaded code with the GPU.
const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
out->Add(-dropout); // now, a proportion "dropout" will be <0.0
out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will
// be zero and (1 - dropout) will be 1.0.

out->Add(-dropout); // now, a proportion "dropout" will be <0.0
out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will
// be zero and (1 - dropout) will be 1.0.
out->MulElements(in);
} else {

out->MulElements(in);
// This const_cast is only safe assuming you don't attempt
// to use multi-threaded code with the GPU.
const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
out->Add(-dropout); // now, a proportion "dropout" will be <0.0
out->ApplyHeaviside();
CuVector<BaseFloat> *random_drop_vector = new CuVector<BaseFloat>(in.NumRows(), kSetZero);
MatrixIndexT i = 0;
random_drop_vector->CopyColFromMat(*out, i);
out->SetZero();
out->AddVecToCols(1.0 , *random_drop_vector, 1.0);
out->MulElements(in);
}
}


Expand All @@ -150,11 +170,24 @@ void DropoutComponent::Backprop(const std::string &debug_info,


void DropoutComponent::Read(std::istream &is, bool binary) {
ExpectOneOrTwoTokens(is, binary, "<DropoutComponent>", "<Dim>");
ReadBasicType(is, binary, &dim_);
ExpectToken(is, binary, "<DropoutProportion>");
ReadBasicType(is, binary, &dropout_proportion_);
ExpectToken(is, binary, "</DropoutComponent>");
//back-compatibility code.
std::string token;
ReadToken(is, binary, &token);
if(token == "<DropoutComponent>"){
ReadToken(is, binary, &token);
}
KALDI_ASSERT(token == "<Dim>");
ReadBasicType(is, binary, &dim_); // read dimension.
ReadToken(is, binary, &token);
if(token == "<DropoutProportion>"){
ReadBasicType(is, binary, &dropout_proportion_); // read dropout rate
}
ReadToken(is, binary, &token);
if(token == "<DropoutPerFrame>"){
ReadBasicType(is, binary, &dropout_per_frame_); // read dropout mode
}
ReadToken(is, binary, &token);
KALDI_ASSERT(token == "</DropoutComponent>");
}

void DropoutComponent::Write(std::ostream &os, bool binary) const {
Expand All @@ -163,6 +196,8 @@ void DropoutComponent::Write(std::ostream &os, bool binary) const {
WriteBasicType(os, binary, dim_);
WriteToken(os, binary, "<DropoutProportion>");
WriteBasicType(os, binary, dropout_proportion_);
WriteToken(os, binary, "<DropoutPerFrame>");
WriteBasicType(os, binary, dropout_per_frame_);
WriteToken(os, binary, "</DropoutComponent>");
}

Expand Down
17 changes: 11 additions & 6 deletions src/nnet3/nnet-simple-component.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,13 @@ class PnormComponent: public Component {
// "Dropout: A Simple Way to Prevent Neural Networks from Overfitting".
class DropoutComponent : public RandomComponent {
public:
void Init(int32 dim, BaseFloat dropout_proportion = 0.0);
void Init(int32 dim, BaseFloat dropout_proportion = 0.0, bool dropout_per_frame = false);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please watch line length.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line too long


DropoutComponent(int32 dim, BaseFloat dropout = 0.0) { Init(dim, dropout); }
DropoutComponent(int32 dim, BaseFloat dropout = 0.0, bool dropout_per_frame = false) {
Init(dim, dropout, dropout_per_frame);
}

DropoutComponent(): dim_(0), dropout_proportion_(0.0) { }
DropoutComponent(): dim_(0), dropout_proportion_(0.0), dropout_per_frame_(false) { }

virtual int32 Properties() const {
return kLinearInInput|kBackpropInPlace|kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput;
Expand Down Expand Up @@ -120,17 +122,20 @@ class DropoutComponent : public RandomComponent {
Component *to_update,
CuMatrixBase<BaseFloat> *in_deriv) const;
virtual Component* Copy() const { return new DropoutComponent(dim_,
dropout_proportion_); }
dropout_proportion_,
dropout_per_frame_); }
virtual std::string Info() const;

void SetDropoutProportion(BaseFloat dropout_proportion) { dropout_proportion_ = dropout_proportion; }
void SetDropoutProportion(BaseFloat dropout_proportion) {
dropout_proportion_ = dropout_proportion;
}

private:
int32 dim_;
/// dropout-proportion is the proportion that is dropped out,
/// e.g. if 0.1, we set 10% to zero value.
BaseFloat dropout_proportion_;

bool dropout_per_frame_;
};

class ElementwiseProductComponent: public Component {
Expand Down