Skip to content

Commit

Permalink
refactor: working embed encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
jimthompson5802 committed Jun 12, 2020
1 parent d26a648 commit faa4d66
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 69 deletions.
3 changes: 1 addition & 2 deletions ludwig/features/h3_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ def call(self, inputs, training=None, mask=None):
# assert inputs.dtype == tf.float32 or inputs.dtype == tf.float64
# assert len(inputs.shape) == 1

inputs_exp = inputs[:, tf.newaxis]
inputs_encoded = self.encoder_obj(
inputs_exp, training=training, mask=mask
inputs, training=training, mask=mask
)

return inputs_encoded
Expand Down
120 changes: 53 additions & 67 deletions ludwig/models/modules/h3_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import logging

import tensorflow as tf
from tensorflow.keras.layers import Layer

from ludwig.models.modules.embedding_modules import Embed
from ludwig.models.modules.fully_connected_modules import FCStack
Expand All @@ -26,7 +27,7 @@
logger = logging.getLogger(__name__)


class H3Embed:
class H3Embed(Layer):

def __init__(
self,
Expand Down Expand Up @@ -88,6 +89,8 @@ def __init__(
is greater than 0).
:type regularize: Boolean
"""
super(H3Embed, self).__init__()

self.embedding_size = embedding_size
self.reduce_output = reduce_output

Expand All @@ -99,9 +102,9 @@ def __init__(
pretrained_embeddings=None,
force_embedding_size=True,
embeddings_on_cpu=embeddings_on_cpu,
dropout=dropout_rate,
dropout_rate=dropout_rate,
initializer=weights_initializer,
regularize=weights_regularizer
regularizer=weights_regularizer
)
self.embed_edge = Embed(
[str(i) for i in range(7)],
Expand All @@ -111,9 +114,9 @@ def __init__(
pretrained_embeddings=None,
force_embedding_size=True,
embeddings_on_cpu=embeddings_on_cpu,
dropout=dropout_rate,
dropout_rate=dropout_rate,
initializer=weights_initializer,
regularize=weights_regularizer
regularizer=weights_regularizer
)
self.embed_resolution = Embed(
[str(i) for i in range(16)],
Expand All @@ -123,9 +126,9 @@ def __init__(
pretrained_embeddings=None,
force_embedding_size=True,
embeddings_on_cpu=embeddings_on_cpu,
dropout=dropout_rate,
dropout_rate=dropout_rate,
initializer=weights_initializer,
regularize=weights_regularizer
regularizer=weights_regularizer
)
self.embed_base_cell = Embed(
[str(i) for i in range(122)],
Expand All @@ -135,9 +138,9 @@ def __init__(
pretrained_embeddings=None,
force_embedding_size=True,
embeddings_on_cpu=embeddings_on_cpu,
dropout=dropout_rate,
dropout_rate=dropout_rate,
initializer=weights_initializer,
regularize=weights_regularizer
regularizer=weights_regularizer
)
self.embed_cells = Embed(
[str(i) for i in range(8)],
Expand All @@ -147,9 +150,9 @@ def __init__(
pretrained_embeddings=None,
force_embedding_size=True,
embeddings_on_cpu=embeddings_on_cpu,
dropout=dropout_rate,
dropout_rate=dropout_rate,
initializer=weights_initializer,
regularize=weights_regularizer
regularizer=weights_regularizer
)

self.fc_stack = FCStack(
Expand All @@ -170,62 +173,49 @@ def __init__(
default_dropout_rate=dropout_rate,
)

def __call__(
def call(
self,
input_vector,
regularizer,
dropout_rate,
is_training=True
inputs,
training=None,
mask=None
):
"""
:param input_vector: The input vector fed into the encoder.
Shape: [batch x 19], type tf.int8
:type input_vector: Tensor
:param regularizer: The regularizer to use for the weights
of the encoder.
:type regularizer:
:param dropout_rate: Tensor (tf.float) of the probability of dropout
:type dropout_rate: Tensor
:param is_training: Tesnor (tf.bool) specifying if in training mode
(important for dropout)
:type is_training: Tensor
"""
:param training: bool specifying if in training mode (important for dropout)
:type training: bool
:param mask: bool specifying masked values
:type mask: bool
"""
input_vector = tf.cast(inputs, tf.int32)

# ================ Embeddings ================
with tf.variable_scope('mode', reuse=tf.AUTO_REUSE):
embedded_mode, _ = self.embed_mode(
input_vector[:, 0:1],
regularizer,
dropout_rate,
is_training=is_training
)
with tf.variable_scope('edge', reuse=tf.AUTO_REUSE):
embedded_edge, _ = self.embed_edge(
input_vector[:, 1:2],
regularizer,
dropout_rate,
is_training=is_training
)
with tf.variable_scope('resolution', reuse=tf.AUTO_REUSE):
embedded_resolution, _ = self.embed_resolution(
input_vector[:, 2:3],
regularizer,
dropout_rate,
is_training=True
)
with tf.variable_scope('base_cell', reuse=tf.AUTO_REUSE):
embedded_base_cell, _ = self.embed_base_cell(
input_vector[:, 3:4],
regularizer,
dropout_rate,
is_training=True
)
with tf.variable_scope('cells', reuse=tf.AUTO_REUSE):
embedded_cells, _ = self.embed_cells(
input_vector[:, 4:],
regularizer,
dropout_rate,
is_training=is_training
)
embedded_mode = self.embed_mode(
input_vector[:, 0:1],
training=training,
mask=mask
)
embedded_edge = self.embed_edge(
input_vector[:, 1:2],
training=training,
mask=mask
)
embedded_resolution = self.embed_resolution(
input_vector[:, 2:3],
training=training,
mask=mask
)
embedded_base_cell = self.embed_base_cell(
input_vector[:, 3:4],
training=training,
mask=mask
)
embedded_cells = self.embed_cells(
input_vector[:, 4:],
training=training,
mask=mask
)

# ================ Masking ================
resolution = input_vector[:, 2]
Expand All @@ -245,19 +235,15 @@ def __call__(
hidden = reduce_sequence(concatenated, self.reduce_output)

# ================ FC Stack ================
hidden_size = hidden.shape.as_list()[-1]
logger.debug(' flatten hidden: {0}'.format(hidden))

hidden = self.fc_stack(
hidden,
hidden_size,
regularizer=regularizer,
dropout_rate=dropout_rate,
is_training=is_training
training=training,
mask=mask
)
hidden_size = hidden.shape.as_list()[-1]

return hidden, hidden_size
return {'encoder_output': hidden}


class H3WeightedSum:
Expand Down

0 comments on commit faa4d66

Please sign in to comment.