# Masking and padding with Keras
- Masking: 데이터가 누락되었을 때 이를 건너뛰고 처리하도록 하는 방식 (sequence data -> 누락된 시점을 건너뛰도록 함)
- Padding: a special form of masking where the masked steps are at the start or at the beginning of a sequence

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras import layers

sample마다 데이터 길이가 다른 경우가 종종 있음
- 딥러닝 모델 input 데이터는 단일 shape이므로 -> 길이가 짧은 샘플은 패딩 처리 
- tf.keras.preprocessing.sequence.pad_sequences

In [4]:
raw_inputs = [
    [711, 632, 71], # 길이 3
    [73, 8, 3215, 55, 927], # 길이 5
    [83, 91, 1, 645, 1253, 927], # 길이 6
]

padded_inputs = tf.keras.preprocessing.sequence.pad_sequences(
        raw_inputs, padding='post' # pre or post 둘 중 하나 선택 가능 -> post 추천
)
print(padded_inputs)

[[ 711  632   71    0    0    0]
 [  73    8 3215   55  927    0]
 [  83   91    1  645 1253  927]]


모델 생성 시, input data가 Padding 되었음을 알려야 함 -> 그래야 padding된 데이터를 무시할 수 있으므로 (masking)
- keras.layers.Masking
- keras.layers.Embeddin w/ mask_zero=True
- pass a 'mask' argument manually..?

In [6]:
# create a mask tensor (2D tensor with shape - (batch, sequence_length))
embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)
masked_output = embedding(padded_inputs)

masking_layer = layers.Masking()
unmasked_embedding = tf.cast( # expanding 2D input to 3D 
    tf.tile(tf.expand_dims(padded_inputs, axis=-1), [1, 1, 10]), tf.float32
)

masked_embedding = masking_layer(unmasked_embedding)

In [8]:
# padding input을 Embedding하거나 Masking하면 mask 값도 같이 전달
masked_output._keras_mask

<tf.Tensor: shape=(3, 6), dtype=bool, numpy=
array([[ True,  True,  True, False, False, False],
       [ True,  True,  True,  True,  True, False],
       [ True,  True,  True,  True,  True,  True]])>

In [7]:
masked_embedding._keras_mask

<tf.Tensor: shape=(3, 6), dtype=bool, numpy=
array([[ True,  True,  True, False, False, False],
       [ True,  True,  True,  True,  True, False],
       [ True,  True,  True,  True,  True,  True]])>

a mask generated by an Embedding or Masking layer -> 어느 레이어에서도 활용 가능

In [9]:
# Sequential model
model = keras.Sequential([
    layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True), # Embedding 레이어에서 알아서 Padding
    layers.LSTM(32)
])

In [10]:
# Functional API
inputs = keras.Input(shape=(None,), dtype='int32')
x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)  # Embedding 레이어에서 알아서 Padding
outputs = layers.LSTM(32)(x)

model = keras.Model(inputs, outputs)

### Passing mask tensors directly to layers
- LSTM 같은 레이어(mask-consuming layer)에서는 __call__ 메서드에 mask를 argument로 주어 직접 다룰 수 있음
- Embedding 같은 레이어(mask-producing layer)에서는 mask를 만듦 -> compute_mask(input, previous mask) 메서드를 LTST 레이어에 전달

In [13]:
class MyLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(MyLayer, self).__init__(**kwargs)
        self.embedding = layers.Embedding(input_dim=5000, output_dim=6, mask_zero=True)
        self.lstm = layers.LSTM(32)
    
    def call(self, inputs):
        x = self.embedding(inputs)
        mask = self.embedding.compute_mask(inputs)
        output = self.lstm(x, mask=mask)
        return output
    
layer = MyLayer()
x = np.random.random((32, 10)) * 100
x = x.astype('int32')
layer(x)

<tf.Tensor: shape=(32, 32), dtype=float32, numpy=
array([[-5.0492650e-03,  1.0275564e-03, -1.5185910e-03, ...,
        -3.2672107e-03,  1.2886111e-03, -1.5768912e-03],
       [-8.9791315e-03,  1.5349587e-03,  3.8674767e-03, ...,
         3.2387141e-03, -1.6375385e-04,  1.5844356e-03],
       [-3.3967171e-04,  1.5671948e-03, -7.3049875e-04, ...,
         3.5871912e-03, -7.8019494e-04, -2.8426962e-03],
       ...,
       [ 1.2105663e-03,  1.2680746e-03, -4.0347474e-03, ...,
        -1.2906976e-03,  8.3061773e-04, -4.2560901e-03],
       [-4.4841692e-03,  2.2434755e-03,  4.8386324e-03, ...,
         2.9819105e-03,  6.5205015e-05,  3.2284940e-03],
       [ 7.7692280e-04,  5.7852559e-04,  1.1240725e-03, ...,
         7.0996182e-03, -3.2469053e-03,  2.2084371e-03]], dtype=float32)>

In [14]:
class TemporalSplit(keras.layers.Layer):
    def call(self, inputs):
        return tf.split(inputs, 2, axis=1)
    
    def compute_mask(self, inputs, mask=None):
        if mask is None:
            return None
        return tf.split(mask, 2, axis=1)


In [15]:
first_half, second_half = TemporalSplit()(masked_embedding)
print(first_half._keras_mask)
print(second_half._keras_mask)

tf.Tensor(
[[ True  True  True]
 [ True  True  True]
 [ True  True  True]], shape=(3, 3), dtype=bool)
tf.Tensor(
[[False False False]
 [ True  True False]
 [ True  True  True]], shape=(3, 3), dtype=bool)


In [16]:
class CustomEmbedding(keras.layers.Layer):
    def __init__(self, input_dim, output_dim, mask_zero=False, **kwargs):
        super(CustomEmbedding, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.mask_zero = mask_zero
    
    def build(self, input_shape):
        self.embeddings = self.add_weight(
            shape=(self.input_dim, self.output_dim),
            initializer='random_normal',
            dtype='float32'
        )
    
    def call(self, inputs):
        return tf.nn.embedding_lookup(self.embeddings, inputs)

In [17]:
layer = CustomEmbedding(10, 32, mask_zero=True)
x = np.random.random((3, 10)) * 9
x = x.astype("int32")

y = layer(x)
mask = layer.compute_mask(x)

print(mask)

None


In [19]:
class MyActivation(keras.layers.Layer):
    def __init__(self, **kwargs):
        super(MyActivation, self).__init__(**kwargs)
        # Signal that the layer is safe for mask propagation
        self.supports_masking = True

    def call(self, inputs):
        return tf.nn.relu(inputs)

inputs = keras.Input(shape=(None,), dtype="int32")
x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs)
x = MyActivation()(x)  # supports_masking = True
print("Mask found:", x._keras_mask)
outputs = layers.LSTM(32)(x)  # Will receive the mask

model = keras.Model(inputs, outputs)

Mask found: KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.bool, name=None), name='Placeholder_1:0')


In [20]:
class TemporalSoftmax(keras.layers.Layer):
    def call(self, inputs, mask=None):
        broadcast_float_mask = tf.expand_dims(tf.cast(mask, "float32"), -1)
        inputs_exp = tf.exp(inputs) * broadcast_float_mask
        inputs_sum = tf.reduce_sum(inputs * broadcast_float_mask, axis=1, keepdims=True)
        return inputs_exp / inputs_sum


inputs = keras.Input(shape=(None,), dtype="int32")
x = layers.Embedding(input_dim=10, output_dim=32, mask_zero=True)(inputs)
x = layers.Dense(1)(x)
outputs = TemporalSoftmax()(x)

model = keras.Model(inputs, outputs)
y = model(np.random.randint(0, 10, size=(32, 100)), np.random.random((32, 100, 1)))
