Skip to content

Commit

Permalink
Recover bert_encoder_transformer.h (PaddlePaddle#1029)
Browse files Browse the repository at this point in the history
remove useless parameters, and remove redefination of pretrain model, and update layer replacement for considering each layer independently.

Co-authored-by: Zeyu Chen <chenzeyu01@baidu.com>
  • Loading branch information
LiuChiachi and ZeyuChen committed Sep 17, 2021
1 parent 729198e commit c63a9ad
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 57 deletions.
23 changes: 2 additions & 21 deletions examples/semantic_indexing/faster_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import TransformerEncoder, TransformerEncoderLayer

from paddlenlp.transformers import ErnieTokenizer, ErnieModel
from paddlenlp.data import Pad, Tuple
Expand Down Expand Up @@ -73,20 +72,8 @@ def parse_args():


class SemanticIndexingPredictor(nn.Layer):
def __init__(self,
pretrained_model,
output_emb_size,
n_layer=12,
n_head=12,
hidden_size=768,
dim_feedforward=3072,
activation="relu",
bos_id=0,
dropout=0,
max_seq_len=128,
is_gelu=False):
def __init__(self, pretrained_model, output_emb_size, bos_id=0, dropout=0):
super(SemanticIndexingPredictor, self).__init__()
size_per_head = hidden_size // n_head
self.bos_id = bos_id
self.ptm = pretrained_model
self.dropout = nn.Dropout(dropout if dropout is not None else 0.0)
Expand All @@ -96,9 +83,6 @@ def __init__(self,
initializer=paddle.nn.initializer.TruncatedNormal(std=0.02))
self.emb_reduce_linear = paddle.nn.Linear(
768, output_emb_size, weight_attr=weight_attr)
encoder_layer = TransformerEncoderLayer(
hidden_size, n_head, dim_feedforward, dropout=dropout)
self.ptm.encoder = TransformerEncoder(encoder_layer, n_layer)

def get_pooled_embedding(self,
input_ids,
Expand Down Expand Up @@ -189,10 +173,7 @@ def do_predict(args):
pretrained_model = ErnieModel.from_pretrained("ernie-1.0")

model = SemanticIndexingPredictor(
pretrained_model,
args.output_emb_size,
max_seq_len=args.max_seq_length,
dropout=args.dropout)
pretrained_model, args.output_emb_size, dropout=args.dropout)
model.eval()
model.load(args.params_path)
model = enable_faster_encoder(model)
Expand Down
70 changes: 34 additions & 36 deletions paddlenlp/ops/faster_transformer/transformer/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def encoder_layer_forward(self,
After inference, `disable_faster_encoder` could be called to restore the
`forward` function of `paddle.nn.TransformerEncoder` and
`paddle.nn.TransformerEncoder`.
`paddle.nn.TransformerEncoderLayer`.
Args:
src (Tensor):
Expand All @@ -130,14 +130,13 @@ def encoder_layer_forward(self,
src_mask (Tensor, optional):
A tensor used in multi-head attention to prevents attention to some
unwanted positions, usually the paddings or the subsequent
positions. It is a tensor with shape broadcasted to
`[batch_size, n_head, sequence_length, sequence_length]`. When the
data type is bool, the unwanted positions have `False` values and
the others have `True` values. When the data type is int, the
unwanted positions have 0 values and the others have 1 values. When
the data type is float, the unwanted positions have `-INF` values
and the others have 0 values. It can be None when nothing wanted or
needed to be prevented attention to. Defaults to None.
positions. It is a tensor with shape `[batch_size, 1, 1, sequence_length]`.
When the data type is bool, the unwanted positions have `False`
values and the others have `True` values. When the data type is int,
the unwanted positions have 0 values and the others have 1 values.
When the data type is float, the unwanted positions have `-INF`
values and the others have 0 values. It can be None when nothing
wanted or needed to be prevented attention to. Defaults to None.
Returns:
src(Tensor|tuple):
Expand Down Expand Up @@ -192,7 +191,7 @@ def encoder_forward(self, src, src_mask=None, cache=None):
After inference, `disable_faster_encoder` could be called to restore the
`forward` function of `paddle.nn.TransformerEncoder` and
`paddle.nn.TransformerEncoder`.
`paddle.nn.TransformerEncoderLayer`.
Args:
src (Tensor):
Expand All @@ -202,14 +201,14 @@ def encoder_forward(self, src, src_mask=None, cache=None):
src_mask (Tensor, optional):
A tensor used in multi-head attention to prevents attention to
some unwanted positions, usually the paddings or the subsequent
positions. It is a tensor with shape broadcasted to
`[batch_size, n_head, sequence_length, sequence_length]`. When the
data type is bool, the unwanted positions have `False` values and
the others have `True` values. When the data type is int, the
unwanted positions have 0 values and the others have 1 values.
When the data type is float, the unwanted positions have `-INF`
values and the others have 0 values. It can be None when nothing
wanted or needed to be prevented attention to. Default None.
positions. It is a tensor with shape `[batch_size, 1, 1, sequence_length]`.
When the data type is bool, the unwanted positions have `False`
values and the others have `True` values. When the data type is
int, the unwanted positions have 0 values and the others have 1
values. When the data type is float, the unwanted positions have
`-INF` values and the others have 0 values. It can be None when
nothing wanted or needed to be prevented attention to. Defaults
to None.
Returns:
output (Tensor|tuple):
Expand Down Expand Up @@ -252,35 +251,34 @@ def enable_faster_encoder(self):
model = disable_faster_encoder(model)
"""

def check_if_usable(layer):
for sub_layer in layer.children():
if isinstance(sub_layer,
TransformerEncoderLayer) and sub_layer._config[
'bias_attr'] == False:
def init_func(layer):
if isinstance(layer, TransformerEncoderLayer):
is_usable = True
if layer._config['bias_attr'] == False:
logger.warning("`False` for paddle.nn.TransformerEncoder's" \
" parameter `bias_attr` is not supported in " \
"FasterTransformer by now. Original Paddle API " \
"would be called.")
return False
elif not check_if_usable(sub_layer):
return False
return True

def init_func(layer):
if isinstance(layer, (TransformerEncoderLayer, TransformerEncoder)):
"FasterTransformer by now. The original forward" \
" will be involved.")
is_usable = False
if layer._config['activation'] not in ('relu', 'gelu'):
logger.warning("Only 'relu' or 'gelu' is supported by now. " \
"The original forward will be involved.")
is_usable = False
if is_usable:
layer.forward = layer._ft_forward
elif isinstance(layer, TransformerEncoder):
layer.forward = layer._ft_forward

if not self.training:
if not check_if_usable(self):
return self
try:
load("FasterTransformer", verbose=True)
for layer in self.children():
layer.apply(init_func)
except Exception:
logger.warning(
"Exception occurs when using Faster Transformer. " \
"The original forward will be involved. ")
return self
for layer in self.children():
layer.apply(init_func)
return self


Expand Down

0 comments on commit c63a9ad

Please sign in to comment.