-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
adding dropout-by row #8
base: dropout_schedule
Are you sure you want to change the base?
Changes from 6 commits
6548b55
23ae730
614a868
c1d1ad1
14662b6
1d22219
5b8b98b
4137c9d
d721e59
1e2adab
463a4dc
d0290c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -511,7 +511,7 @@ def _get_component_dropout(dropout_schedule, num_archives_processed): | |
+ initial_dropout) | ||
|
||
|
||
def apply_dropout(dropout_proportions, raw_model_string): | ||
def apply_dropout(dropout_proportions, dropout_per_frame, raw_model_string): | ||
"""Adds an nnet3-copy --edits line to modify raw_model_string to | ||
set dropout proportions according to dropout_proportions. | ||
|
||
|
@@ -523,10 +523,10 @@ def apply_dropout(dropout_proportions, raw_model_string): | |
|
||
for component_name, dropout_proportion in dropout_proportions: | ||
edit_config_lines.append( | ||
"set-dropout-proportion name={0} proportion={1}".format( | ||
component_name, dropout_proportion)) | ||
dropout_info.append("pattern/dropout-proportion={0}/{1}".format( | ||
component_name, dropout_proportion)) | ||
"set-dropout-proportion name={0} proportion={1} dropout-per-frame={2}".format( | ||
component_name, dropout_proportion, dropout_per_frame)) | ||
dropout_info.append("pattern/dropout-proportion={0}/{1} dropout-per-frame={2}".format( | ||
component_name, dropout_proportion, dropout_per_frame)) | ||
|
||
return ("""{raw_model_string} nnet3-copy --edits='{edits}' \ | ||
- - |""".format(raw_model_string=raw_model_string, | ||
|
@@ -771,6 +771,11 @@ def __init__(self): | |
lstm*=0,0.2,0'. More general should precede | ||
less general patterns, as they are applied | ||
sequentially.""") | ||
self.parser.add_argument("--trainer.dropout-per-frame", type=str, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this option required? Do you expect to change whether dropout is per frame or not during the training iterations? |
||
action=common_lib.NullstrToNoneAction, | ||
dest='dropout_per_frame', default=None, | ||
help="""this option is used to control whether | ||
using dropout by frame level or by vector level""") | ||
|
||
# General options | ||
self.parser.add_argument("--stage", type=int, default=-4, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,6 +64,7 @@ void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d); | |
void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, | ||
bool include_sign, MatrixDim d); | ||
void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); | ||
void cudaF_apply_heaviside_by_row(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is no need for any of these changes in cudamatrix/... just use CopyColsFromVec. |
||
void cudaF_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, | ||
MatrixDim d); | ||
void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, | ||
|
@@ -330,6 +331,7 @@ void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d); | |
void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, | ||
bool include_sign, MatrixDim d); | ||
void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); | ||
void cudaD_apply_heaviside_by_row(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); | ||
void cudaD_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, | ||
MatrixDim d); | ||
void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1628,6 +1628,23 @@ static void _apply_heaviside(Real* mat, MatrixDim d) { | |
mat[index] = (mat[index] > 0.0 ? 1.0 : 0.0); | ||
} | ||
|
||
template<typename Real> | ||
__global__ | ||
static void _apply_heaviside_by_row(Real* mat, MatrixDim d) { | ||
int i = blockIdx.x * blockDim.x + threadIdx.x; // col index | ||
int j = blockIdx.y * blockDim.y + threadIdx.y; // row index | ||
int j_tempt = blockIdx.y * blockDim.y + threadIdx.y; // row index using to control setting heavyside() in the first rows | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you want to get the 0th row or something? You have given the same expression as j. |
||
int index = i + j * d.stride; | ||
if (i < d.cols && j < d.rows) | ||
if (j = j_tempt) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. == |
||
mat[index] = (mat[index] > 0.0 ? 1.0 : 0.0); | ||
} | ||
else { | ||
mat[index] = mat[index-d.stride-d.cols]; | ||
} | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @danpovey I think there may exist some problem:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is probably because of not using ==. |
||
|
||
template<typename Real> | ||
__global__ | ||
static void _apply_floor(Real* mat, Real floor_val, MatrixDim d) { | ||
|
@@ -3233,6 +3250,10 @@ void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) { | |
_apply_heaviside<<<Gr,Bl>>>(mat, d); | ||
} | ||
|
||
void cudaF_apply_heaviside_by_row(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) { | ||
_apply_heaviside_by_row<<<Gr,Bl>>>(mat, d); | ||
} | ||
|
||
void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, | ||
const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, | ||
int src_stride) { | ||
|
@@ -3880,6 +3901,10 @@ void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) { | |
_apply_heaviside<<<Gr,Bl>>>(mat, d); | ||
} | ||
|
||
void cudaD_apply_heaviside_by_row(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) { | ||
_apply_heaviside_by_row<<<Gr,Bl>>>(mat, d); | ||
} | ||
|
||
void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, | ||
const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, | ||
int src_stride) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,7 @@ NnetCombiner::NnetCombiner(const NnetCombineConfig &config, | |
nnet_params_(std::min(num_nnets, config_.max_effective_inputs), | ||
NumParameters(first_nnet)), | ||
tot_input_weighting_(nnet_params_.NumRows()) { | ||
SetDropoutProportion(0, &nnet_); | ||
SetDropoutProportion(0, false, &nnet_); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can remove this 'false' argument from the function... just make it a fixed property of the component that can't be changed after you initialize. |
||
SubVector<BaseFloat> first_params(nnet_params_, 0); | ||
VectorizeNnet(nnet_, &first_params); | ||
tot_input_weighting_(0) += 1.0; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,27 +87,37 @@ void PnormComponent::Write(std::ostream &os, bool binary) const { | |
} | ||
|
||
|
||
void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion) { | ||
void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion, bool dropout_per_frame) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please watch line length (80-char limit) |
||
dropout_proportion_ = dropout_proportion; | ||
dropout_per_frame_ = dropout_per_frame; | ||
dim_ = dim; | ||
} | ||
|
||
void DropoutComponent::InitFromConfig(ConfigLine *cfl) { | ||
int32 dim = 0; | ||
BaseFloat dropout_proportion = 0.0; | ||
bool dropout_per_frame = false; | ||
bool ok = cfl->GetValue("dim", &dim) && | ||
cfl->GetValue("dropout-proportion", &dropout_proportion); | ||
bool ok2 = cfl->GetValue("dropout-per-frame", &dropout_per_frame); | ||
if (!ok || cfl->HasUnusedValues() || dim <= 0 || | ||
dropout_proportion < 0.0 || dropout_proportion > 1.0) | ||
KALDI_ERR << "Invalid initializer for layer of type " | ||
<< Type() << ": \"" << cfl->WholeLine() << "\""; | ||
Init(dim, dropout_proportion); | ||
if( ! ok2 ) | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need a branch here because dropout_per_frame defaults to false if not set (that's how you |
||
dropout_per_frame = false; | ||
Init(dim, dropout_proportion, dropout_per_frame); | ||
} else { | ||
Init(dim, dropout_proportion, dropout_per_frame); | ||
} | ||
} | ||
|
||
std::string DropoutComponent::Info() const { | ||
std::ostringstream stream; | ||
stream << Type() << ", dim=" << dim_ | ||
<< ", dropout-proportion=" << dropout_proportion_; | ||
<< ", dropout-proportion=" << dropout_proportion_ | ||
<< ", dropout-per-frame=" << dropout_per_frame_; | ||
return stream.str(); | ||
} | ||
|
||
|
@@ -119,16 +129,27 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes, | |
|
||
BaseFloat dropout = dropout_proportion_; | ||
KALDI_ASSERT(dropout >= 0.0 && dropout <= 1.0); | ||
if(dropout_per_frame_) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use the correct code style. Should be
and note the space after if. You can run misc/maintenance/cpplint.py on your code to check for style problems. |
||
{ | ||
// This const_cast is only safe assuming you don't attempt | ||
// to use multi-threaded code with the GPU. | ||
const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, you'll want to create a temporary vector with dimension equal to the num-rows in your 'in'/'out' matrices, and do the rand stuff on that, then you'll need CopyColsFromVec(). |
||
|
||
// This const_cast is only safe assuming you don't attempt | ||
// to use multi-threaded code with the GPU. | ||
const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out); | ||
out->Add(-dropout); // now, a proportion "dropout" will be <0.0 | ||
out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will | ||
// be zero and (1 - dropout) will be 1.0. | ||
|
||
out->Add(-dropout); // now, a proportion "dropout" will be <0.0 | ||
out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will | ||
// be zero and (1 - dropout) will be 1.0. | ||
out->MulElements(in); | ||
} else { | ||
|
||
out->MulElements(in); | ||
// This const_cast is only safe assuming you don't attempt | ||
// to use multi-threaded code with the GPU. | ||
const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out); | ||
out->Add(-dropout); // now, a proportion "dropout" will be <0.0 | ||
out->ApplyHeavisideByRow(); // apply the function (x>0?1:0). Now, a proportion "dropout" will | ||
// be zero and (1 - dropout) will be 1.0 by row. | ||
out->MulElements(in); | ||
} | ||
} | ||
|
||
|
||
|
@@ -154,6 +175,8 @@ void DropoutComponent::Read(std::istream &is, bool binary) { | |
ReadBasicType(is, binary, &dim_); | ||
ExpectToken(is, binary, "<DropoutProportion>"); | ||
ReadBasicType(is, binary, &dropout_proportion_); | ||
ExpectToken(is, binary, "<DropoutPerFrame>"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make this backcompatible. Change this to ReadToken and then add an if condition to check which token is present. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @GaofengCheng, you need to understand what Vimal was saying here- there needs to be back compatibility code for the old format. Search for ReadToken() in the file for examples. However, the reason for your error is that you need to recompile in 'chainbin/' (and possibly chain/'). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. .. and, of course, make this back compatible. |
||
ReadBasicType(is, binary, &dropout_per_frame_); | ||
ExpectToken(is, binary, "</DropoutComponent>"); | ||
} | ||
|
||
|
@@ -163,6 +186,8 @@ void DropoutComponent::Write(std::ostream &os, bool binary) const { | |
WriteBasicType(os, binary, dim_); | ||
WriteToken(os, binary, "<DropoutProportion>"); | ||
WriteBasicType(os, binary, dropout_proportion_); | ||
WriteToken(os, binary, "<DropoutPerFrame>"); | ||
WriteBasicType(os, binary, dropout_per_frame_); | ||
WriteToken(os, binary, "</DropoutComponent>"); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as Vimal says, please remove this from the training code... does not need to be there.