update

bob7783 · bob7783 · commit 3740a6fa26ca · 2019-12-26T03:02:24.000-05:00
diff --git a/nlp_class3/attention.py b/nlp_class3/attention.py
@@ -324,18 +324,42 @@ def stack_and_transpose(x):
   outputs=outputs
 )
 
+
+def custom_loss(y_true, y_pred):
+  # both are of shape N x T x K
+  mask = K.cast(y_true > 0, dtype='float32')
+  out = mask * y_true * K.log(y_pred)
+  return -K.sum(out) / K.sum(mask)
+
+
+def acc(y_true, y_pred):
+  # both are of shape N x T x K
+  targ = K.argmax(y_true, axis=-1)
+  pred = K.argmax(y_pred, axis=-1)
+  correct = K.cast(K.equal(targ, pred), dtype='float32')
+
+  # 0 is padding, don't include those
+  mask = K.cast(K.greater(targ, 0), dtype='float32')
+  n_correct = K.sum(mask * correct)
+  n_total = K.sum(mask)
+  return n_correct / n_total
+
+
 # compile the model
-model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
+model.compile(optimizer='adam', loss=custom_loss, metrics=[acc])
+# model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])
 
 # train the model
-z = np.zeros((NUM_SAMPLES, LATENT_DIM_DECODER)) # initial [s, c]
+z = np.zeros((len(encoder_inputs), LATENT_DIM_DECODER)) # initial [s, c]
 r = model.fit(
   [encoder_inputs, decoder_inputs, z, z], decoder_targets_one_hot,
   batch_size=BATCH_SIZE,
   epochs=EPOCHS,
   validation_split=0.2
 )
 
+
+
 # plot some data
 plt.plot(r.history['loss'], label='loss')
 plt.plot(r.history['val_loss'], label='val_loss')
@@ -371,6 +395,9 @@ def stack_and_transpose(x):
 # combine context with last word
 decoder_lstm_input = context_last_word_concat_layer([context, decoder_inputs_single_x])
 
+
+
+
 # lstm and final dense
 o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[initial_s, initial_c])
 decoder_outputs = decoder_dense(o)