Skip to content

Commit

Permalink
update testings
Browse files Browse the repository at this point in the history
  • Loading branch information
zheyuye committed Jul 13, 2020
1 parent 2719e81 commit 4defc7a
Show file tree
Hide file tree
Showing 9 changed files with 39 additions and 21 deletions.
3 changes: 2 additions & 1 deletion src/gluonnlp/models/albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,8 @@ def list_pretrained_albert():

def get_pretrained_albert(model_name: str = 'google_albert_base_v2',
root: str = get_model_zoo_home_dir(),
load_backbone=True, load_mlm=False)\
load_backbone: str = True,
load_mlm: str = False)\
-> Tuple[CN, SentencepieceTokenizer, str, str]:
"""Get the pretrained Albert weights
Expand Down
3 changes: 2 additions & 1 deletion src/gluonnlp/models/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,8 @@ def list_pretrained_bert():

def get_pretrained_bert(model_name: str = 'google_en_cased_bert_base',
root: str = get_model_zoo_home_dir(),
load_backbone=True, load_mlm=False)\
load_backbone: str = True,
load_mlm: str = False)\
-> Tuple[CN, HuggingFaceWordPieceTokenizer, str, str]:
"""Get the pretrained bert weights
Expand Down
3 changes: 2 additions & 1 deletion src/gluonnlp/models/mobilebert.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,8 @@ def list_pretrained_mobilebert():

def get_pretrained_mobilebert(model_name: str = 'google_uncased_mobilebert',
root: str = get_model_zoo_home_dir(),
load_backbone=True, load_mlm=True)\
load_backbone: str = True,
load_mlm: str = False)\
-> Tuple[CN, HuggingFaceWordPieceTokenizer, str, str]:
"""Get the pretrained mobile bert weights
Expand Down
2 changes: 1 addition & 1 deletion src/gluonnlp/models/roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
'merges': 'fairseq_roberta_base/gpt2-396d4d8e.merges',
'vocab': 'fairseq_roberta_base/gpt2-f1335494.vocab',
'params': 'fairseq_roberta_base/model-09a1520a.params',
'mlm_params': 'google_uncased_mobilebert/model_mlm-29889e2b.params',
'mlm_params': 'fairseq_roberta_base/model_mlm-29889e2b.params',
},
'fairseq_roberta_large': {
'cfg': 'fairseq_roberta_large/model-6e66dc4a.yml',
Expand Down
5 changes: 3 additions & 2 deletions tests/test_models_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,13 @@ def test_albert_get_pretrained(model_name):
assert len(list_pretrained_albert()) > 0
with tempfile.TemporaryDirectory() as root:
cfg, tokenizer, backbone_params_path, mlm_params_path =\
get_pretrained_albert(model_name, root=root, load_mlm=True)
get_pretrained_albert(model_name, load_backbone=True, load_mlm=True, root=root)
assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
albert_model = AlbertModel.from_cfg(cfg)
albert_model.load_parameters(backbone_params_path)
albert_mlm_model = AlbertForMLM(cfg)
albert_mlm_model.load_parameters(mlm_params_path)
if mlm_params_path is not None:
albert_mlm_model.load_parameters(mlm_params_path)
# Just load the backbone
albert_mlm_model = AlbertForMLM(cfg)
albert_mlm_model.backbone_model.load_parameters(backbone_params_path)
2 changes: 1 addition & 1 deletion tests/test_models_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_bert_get_pretrained(model_name):
assert len(list_pretrained_bert()) > 0
with tempfile.TemporaryDirectory() as root:
cfg, tokenizer, backbone_params_path, mlm_params_path =\
get_pretrained_bert(model_name, root=root)
get_pretrained_bert(model_name, load_backbone=True, load_mlm=True, root=root)
assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
bert_model = BertModel.from_cfg(cfg)
bert_model.load_parameters(backbone_params_path)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_models_mobilebert.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_list_pretrained_mobilebert():
def test_bert_get_pretrained(model_name):
with tempfile.TemporaryDirectory() as root:
cfg, tokenizer, backbone_params_path, mlm_params_path =\
get_pretrained_mobilebert(model_name, root=root)
get_pretrained_mobilebert(model_name, load_backbone=True, load_mlm=True, root=root)
assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
mobilebert_model = MobileBertModel.from_cfg(cfg)
mobilebert_model.load_parameters(backbone_params_path)
Expand Down
21 changes: 14 additions & 7 deletions tests/test_models_roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import mxnet as mx
import tempfile
from gluonnlp.models.roberta import RobertaModel,\
from gluonnlp.models.roberta import RobertaModel, RobertaForMLM, \
list_pretrained_roberta, get_pretrained_roberta
from gluonnlp.loss import LabelSmoothCrossEntropyLoss

Expand All @@ -19,12 +19,12 @@ def test_roberta(model_name):
# test from pretrained
assert len(list_pretrained_roberta()) > 0
with tempfile.TemporaryDirectory() as root:
cfg, tokenizer, params_path =\
get_pretrained_roberta(model_name, root=root)
cfg, tokenizer, params_path, mlm_params_path =\
get_pretrained_roberta(model_name, load_backbone=True, load_mlm=True, root=root)
assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
roberta_model = RobertaModel.from_cfg(cfg)
roberta_model.load_parameters(params_path)

# test forward
batch_size = 3
seq_length = 32
Expand All @@ -45,12 +45,19 @@ def test_roberta(model_name):
),
dtype=np.int32
)
x = roberta_model(input_ids, valid_length)
contextual_embeddings, pooled_out = roberta_model(input_ids, valid_length)
mx.npx.waitall()
# test backward
label_smooth_loss = LabelSmoothCrossEntropyLoss(num_labels=vocab_size)
with mx.autograd.record():
x = roberta_model(input_ids, valid_length)
loss = label_smooth_loss(x, input_ids)
contextual_embeddings, pooled_out = roberta_model(input_ids, valid_length)
loss = label_smooth_loss(contextual_embeddings, input_ids)
loss.backward()
mx.npx.waitall()

# test for mlm model
roberta_mlm_model = RobertaForMLM(cfg)
if mlm_params_path is not None:
roberta_mlm_model.load_parameters(mlm_params_path)
roberta_mlm_model = RobertaForMLM(cfg)
roberta_mlm_model.backbone_model.load_parameters(params_path)
19 changes: 13 additions & 6 deletions tests/test_models_xlmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import mxnet as mx
import tempfile
from gluonnlp.models.xlmr import XLMRModel,\
from gluonnlp.models.xlmr import XLMRModel, XLMRForMLM, \
list_pretrained_xlmr, get_pretrained_xlmr
from gluonnlp.loss import LabelSmoothCrossEntropyLoss

Expand All @@ -19,8 +19,8 @@ def test_xlmr():
assert len(list_pretrained_xlmr()) > 0
for model_name in list_pretrained_xlmr():
with tempfile.TemporaryDirectory() as root:
cfg, tokenizer, params_path =\
get_pretrained_xlmr(model_name, root=root)
cfg, tokenizer, params_path, mlm_params_path =\
get_pretrained_xlmr(model_name, load_backbone=True, load_mlm=True, root=root)
assert cfg.MODEL.vocab_size == len(tokenizer.vocab)
xlmr_model = XLMRModel.from_cfg(cfg)
xlmr_model.load_parameters(params_path)
Expand All @@ -44,12 +44,19 @@ def test_xlmr():
),
dtype=np.int32
)
x = xlmr_model(input_ids, valid_length)
contextual_embeddings, pooled_out = xlmr_model(input_ids, valid_length)
mx.npx.waitall()
# test backward
label_smooth_loss = LabelSmoothCrossEntropyLoss(num_labels=vocab_size)
with mx.autograd.record():
x = xlmr_model(input_ids, valid_length)
loss = label_smooth_loss(x, input_ids)
contextual_embeddings, pooled_out = xlmr_model(input_ids, valid_length)
loss = label_smooth_loss(contextual_embeddings, input_ids)
loss.backward()
mx.npx.waitall()

# test for mlm model
xlmr = XLMRForMLM(cfg)
if mlm_params_path is not None:
xlmr.load_parameters(mlm_params_path)
xlmr = XLMRForMLM(cfg)
xlmr.backbone_model.load_parameters(params_path)

0 comments on commit 4defc7a

Please sign in to comment.