vimalmanohar · GaofengCheng · Dec 16, 2016 · Dec 16, 2016 · Dec 16, 2016 · Dec 17, 2016
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i_dp.sh
@@ -5,13 +5,13 @@
 # same as 1i but with frame level dropout
 # (num-params 1g:21309812 1i: 43447156)
 # results on sdm1 using ihm ali
-#System            tdnn_lstm1i_sp_bi_ihmali_ld5
-#WER on dev        37.6            36.7
-#WER on eval       40.9            39.9
-#Final train prob      -0.114135   -0.118
-#Final valid prob      -0.245208   -0.246
-#Final train prob (xent)      -1.47648  -1.54
-#Final valid prob (xent)      -2.16365  -2.10
+#System   tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1i_dp_sp_bi_ihmali_ld5
+#WER on dev        37.6            36.5
+#WER on eval       40.9            39.7
+#Final train prob      -0.114135   -0.124
+#Final valid prob      -0.245208   -0.249
+#Final train prob (xent)      -1.47648  -1.55
+#Final valid prob (xent)      -2.16365  -2.11
 
 
 set -e -o pipefail
@@ -28,7 +28,7 @@ gmm=tri3_cleaned  # the gmm for the target data
 ihm_gmm=tri3  # the gmm for the IHM system (if --use-ihm-ali true).
 num_threads_ubm=32
 nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
-dropout_schedule='0,0@0.20,0.5@0.50,0@0.50,0'
+dropout_schedule='0,0@0.20,0.5@0.5,0@0.75,0'
 chunk_width=150
 chunk_left_context=40
 chunk_right_context=0
@@ -37,7 +37,7 @@ label_delay=5
 # are just hardcoded at this level, in the commands below.
 train_stage=-10
 tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
-tlstm_affix=1i  #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
+tlstm_affix=1i_dp  #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration.
 common_egs_dir=  # you can set this to use previously dumped egs.
 
 
@@ -193,15 +193,15 @@ if [ $stage -le 15 ]; then
   relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
 
   # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
-  lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+  lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false
   relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
   relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
   relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024
-  lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+  lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false
   relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024
   relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024
   relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024
-  lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0
+  lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=false
 
   ## adding the layers for chain branch
   output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5

diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py
@@ -401,7 +401,7 @@ def _parse_dropout_string(num_archives_to_process, dropout_str):
             value_x_pair = parts[i].split('@')
             if len(value_x_pair) == 1:
                 # Dropout proportion at half of training
-                dropout_proportion = float(value_x_pair)
+                dropout_proportion = float(value_x_pair[0])
                 num_archives = int(0.5 * num_archives_to_process)
             else:
                 assert len(value_x_pair) == 2

diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py
@@ -250,7 +250,8 @@ def set_default_configs(self):
                         'self-repair-scale-nonlinearity' : 0.00001,
                         'zeroing-interval' : 20,
                         'zeroing-threshold' : 15.0,
-                        'dropout-proportion' : -1.0 # -1.0 stands for no dropout will be added
+                        'dropout-proportion' : -1.0 ,# -1.0 stands for no dropout will be added
+                        'dropout-per-frame' : 'false'
                        }
 
     def set_derived_configs(self):
@@ -285,6 +286,10 @@ def check_configs(self):
              self.config['dropout-proportion'] < 0.0) and
              self.config['dropout-proportion'] != -1.0 ):
              raise xparser_error("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion']))
+
+        if (self.config['dropout-per-frame'] != 'false' and
+            self.config['dropout-per-frame'] != 'true'):
+            raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame']))
 
     def auxiliary_outputs(self):
         return ['c_t']
@@ -347,7 +352,8 @@ def generate_lstm_config(self):
         pes_str = self.config['ng-per-element-scale-options']
         lstm_dropout_value = self.config['dropout-proportion']
         lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion'])
-
+        lstm_dropout_per_frame_value = self.config['dropout-per-frame']
+        lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame'])
         # Natural gradient per element scale parameters
         # TODO: decide if we want to keep exposing these options
         if re.search('param-mean', pes_str) is None and \
@@ -427,7 +433,7 @@ def generate_lstm_config(self):
         # add the recurrent connections
         configs.append("# projection matrices : Wrm and Wpm")
         if lstm_dropout_value != -1.0:
-            configs.append("component name={0}.W_rp.m.dropout type=DropoutComponent dim={1} {2}".format(name, cell_dim, lstm_dropout_str))
+            configs.append("component name={0}.rp_t.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim, lstm_dropout_str, lstm_dropout_per_frame_str))
         configs.append("component name={0}.W_rp.m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, rec_proj_dim + nonrec_proj_dim, affine_str))
         configs.append("component name={0}.r type=BackpropTruncationComponent dim={1} {2}".format(name, rec_proj_dim, bptrunc_str))
 

diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py
@@ -202,7 +202,6 @@ def process_args(args):
             "value={0}. We recommend using the option "
             "--trainer.deriv-truncate-margin.".format(
                 args.deriv_truncate_margin))
-
     if (not os.path.exists(args.dir)
             or not os.path.exists(args.dir+"/configs")):
         raise Exception("This scripts expects {0} to exist and have a configs "

diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc
@@ -87,27 +87,33 @@ void PnormComponent::Write(std::ostream &os, bool binary) const {
 }
 
 
-void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion) {
+void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion,
+                            bool dropout_per_frame) {
   dropout_proportion_ = dropout_proportion;
+  dropout_per_frame_ = dropout_per_frame;
   dim_ = dim;
 }
 
 void DropoutComponent::InitFromConfig(ConfigLine *cfl) {
   int32 dim = 0;
   BaseFloat dropout_proportion = 0.0;
+  bool dropout_per_frame = false;
   bool ok = cfl->GetValue("dim", &dim) &&
-    cfl->GetValue("dropout-proportion", &dropout_proportion);
+    cfl->GetValue("dropout-proportion", &dropout_proportion) &&
+    cfl->GetValue("dropout-per-frame", &dropout_per_frame);
   if (!ok || cfl->HasUnusedValues() || dim <= 0 ||
-      dropout_proportion < 0.0 || dropout_proportion > 1.0)
+      dropout_proportion < 0.0 || dropout_proportion > 1.0 ||
+     (dropout_per_frame != false and dropout_per_frame != true))
     KALDI_ERR << "Invalid initializer for layer of type "
               << Type() << ": \"" << cfl->WholeLine() << "\"";
-  Init(dim, dropout_proportion);
+  Init(dim, dropout_proportion, dropout_per_frame);
 }
 
 std::string DropoutComponent::Info() const {
   std::ostringstream stream;
   stream << Type() << ", dim=" << dim_
-         << ", dropout-proportion=" << dropout_proportion_;
+         << ", dropout-proportion=" << dropout_proportion_
+         << ", dropout-per-frame=" << dropout_per_frame_;
   return stream.str();
 }
 
@@ -119,16 +125,30 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes,
 
   BaseFloat dropout = dropout_proportion_;
   KALDI_ASSERT(dropout >= 0.0 && dropout <= 1.0);
+  if(dropout_per_frame_) {
+    // This const_cast is only safe assuming you don't attempt
+    // to use multi-threaded code with the GPU.
+    const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
 
-  // This const_cast is only safe assuming you don't attempt
-  // to use multi-threaded code with the GPU.
-  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
+    out->Add(-dropout); // now, a proportion "dropout" will be <0.0
+    out->ApplyHeaviside(); // apply the function (x>0?1:0).  Now, a proportion "dropout" will
+                          // be zero and (1 - dropout) will be 1.0.
 
-  out->Add(-dropout); // now, a proportion "dropout" will be <0.0
-  out->ApplyHeaviside(); // apply the function (x>0?1:0).  Now, a proportion "dropout" will
-                         // be zero and (1 - dropout) will be 1.0.
+    out->MulElements(in);
+  } else {
 
-  out->MulElements(in);
+    // This const_cast is only safe assuming you don't attempt
+    // to use multi-threaded code with the GPU.
+    const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
+    out->Add(-dropout); // now, a proportion "dropout" will be <0.0
+    out->ApplyHeaviside();
+    CuVector<BaseFloat> *random_drop_vector = new CuVector<BaseFloat>(in.NumRows(), kSetZero);
+    MatrixIndexT i = 0;
+    random_drop_vector->CopyColFromMat(*out, i);
+    out->SetZero();
+    out->AddVecToCols(1.0 , *random_drop_vector, 1.0);
+    out->MulElements(in);
+  }
 }
 
 
@@ -150,11 +170,24 @@ void DropoutComponent::Backprop(const std::string &debug_info,
 
 
 void DropoutComponent::Read(std::istream &is, bool binary) {
-  ExpectOneOrTwoTokens(is, binary, "<DropoutComponent>", "<Dim>");
-  ReadBasicType(is, binary, &dim_);
-  ExpectToken(is, binary, "<DropoutProportion>");
-  ReadBasicType(is, binary, &dropout_proportion_);
-  ExpectToken(is, binary, "</DropoutComponent>");
+  //back-compatibility code.
+  std::string token;
+  ReadToken(is, binary, &token);
+  if(token == "<DropoutComponent>"){
+    ReadToken(is, binary, &token);
+  }
+  KALDI_ASSERT(token == "<Dim>");
+  ReadBasicType(is, binary, &dim_); // read dimension.
+  ReadToken(is, binary, &token);
+  if(token == "<DropoutProportion>"){
+    ReadBasicType(is, binary, &dropout_proportion_); // read dropout rate
+  }
+  ReadToken(is, binary, &token);
+  if(token == "<DropoutPerFrame>"){
+    ReadBasicType(is, binary, &dropout_per_frame_); // read dropout mode
+  }
+  ReadToken(is, binary, &token);
+  KALDI_ASSERT(token == "</DropoutComponent>");
 }
 
 void DropoutComponent::Write(std::ostream &os, bool binary) const {
@@ -163,6 +196,8 @@ void DropoutComponent::Write(std::ostream &os, bool binary) const {
   WriteBasicType(os, binary, dim_);
   WriteToken(os, binary, "<DropoutProportion>");
   WriteBasicType(os, binary, dropout_proportion_);
+  WriteToken(os, binary, "<DropoutPerFrame>");
+  WriteBasicType(os, binary, dropout_per_frame_);
   WriteToken(os, binary, "</DropoutComponent>");
 }
 

diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h
@@ -87,11 +87,13 @@ class PnormComponent: public Component {
 // "Dropout: A Simple Way to Prevent Neural Networks from Overfitting".
 class DropoutComponent : public RandomComponent {
  public:
-  void Init(int32 dim, BaseFloat dropout_proportion = 0.0);
+  void Init(int32 dim, BaseFloat dropout_proportion = 0.0, bool dropout_per_frame = false);
 
-  DropoutComponent(int32 dim, BaseFloat dropout = 0.0) { Init(dim, dropout); }
+  DropoutComponent(int32 dim, BaseFloat dropout = 0.0, bool dropout_per_frame = false) {
+    Init(dim, dropout, dropout_per_frame);
+  }
 
-  DropoutComponent(): dim_(0), dropout_proportion_(0.0) { }
+  DropoutComponent(): dim_(0), dropout_proportion_(0.0), dropout_per_frame_(false) { }
 
   virtual int32 Properties() const {
     return kLinearInInput|kBackpropInPlace|kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput;
@@ -120,17 +122,20 @@ class DropoutComponent : public RandomComponent {
                         Component *to_update,
                         CuMatrixBase<BaseFloat> *in_deriv) const;
   virtual Component* Copy() const { return new DropoutComponent(dim_,
-                                                                dropout_proportion_); }
+                                                                dropout_proportion_,
+                                                                dropout_per_frame_); }
   virtual std::string Info() const;
 
-  void SetDropoutProportion(BaseFloat dropout_proportion) { dropout_proportion_ = dropout_proportion; }
+  void SetDropoutProportion(BaseFloat dropout_proportion) {
+    dropout_proportion_ = dropout_proportion;
+  }
 
  private:
   int32 dim_;
   /// dropout-proportion is the proportion that is dropped out,
   /// e.g. if 0.1, we set 10% to zero value.
   BaseFloat dropout_proportion_;
-
+  bool dropout_per_frame_;
 };
 
 class ElementwiseProductComponent: public Component {