Skip to content

Commit

Permalink
Fix issue #283: word_format in text features is now properly used …
Browse files Browse the repository at this point in the history
…+ mode defensive behaviour in seq2seq_sequence_loss
  • Loading branch information
w4nderlust committed Apr 12, 2019
1 parent eaf6ca7 commit e6c0dd2
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
18 changes: 7 additions & 11 deletions ludwig/features/text_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ def __init__(self, feature):
}

@staticmethod
def feature_meta(column, most_common_characters, most_common_words,
lowercase):
def feature_meta(column, preprocessing_parameters):
(
char_idx2str,
char_str2idx,
Expand All @@ -65,8 +64,8 @@ def feature_meta(column, most_common_characters, most_common_words,
) = create_vocabulary(
column,
'characters',
num_most_frequent=most_common_characters,
lowercase=lowercase
num_most_frequent=preprocessing_parameters['char_most_common'],
lowercase=preprocessing_parameters['lowercase']
)
(
word_idx2str,
Expand All @@ -75,9 +74,9 @@ def feature_meta(column, most_common_characters, most_common_words,
word_max_len
) = create_vocabulary(
column,
'english_tokenize',
num_most_frequent=most_common_words,
lowercase=lowercase
preprocessing_parameters['word_format'],
num_most_frequent=preprocessing_parameters['word_most_common'],
lowercase=preprocessing_parameters['lowercase']
)
return (
char_idx2str,
Expand All @@ -93,10 +92,7 @@ def feature_meta(column, most_common_characters, most_common_words,
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
tf_meta = TextBaseFeature.feature_meta(
column,
preprocessing_parameters['char_most_common'],
preprocessing_parameters['word_most_common'],
preprocessing_parameters['lowercase']
column, preprocessing_parameters
)
(
char_idx2str,
Expand Down
3 changes: 2 additions & 1 deletion ludwig/models/modules/loss_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def seq2seq_sequence_loss(targets, targets_sequence_length, logits,
softmax_function=None):
batch_max_targets_sequence_lenght = tf.shape(targets)[1]
batch_max_logits_sequence_lenght = tf.shape(logits)[1]
difference = batch_max_targets_sequence_lenght - batch_max_logits_sequence_lenght
difference = tf.maximum(0, batch_max_targets_sequence_lenght - batch_max_logits_sequence_lenght)
padded_logits = tf.pad(logits, [[0, 0], [0, difference], [0, 0]])
padded_logits = padded_logits[:, :batch_max_targets_sequence_lenght, :]

with tf.variable_scope('sequence_loss'):
sequence_loss = tf.contrib.seq2seq.sequence_loss(
Expand Down

0 comments on commit e6c0dd2

Please sign in to comment.