In [13]:
import os
import torch
from transformers import pipeline

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
model = "./models/hfl/chinese-macbert-base"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nlp = pipeline("fill-mask",
               model=model,
               tokenizer=model,
               device=device  # gpu device id
               )
from pprint import pprint

pprint(nlp(f"明天天{nlp.tokenizer.mask_token}很好?"))
print("*" * 42)
pprint(nlp(f"明天心{nlp.tokenizer.mask_token}很好?"))
print("*" * 42)
pprint(nlp(f"张亮在哪里任{nlp.tokenizer.mask_token}?"))
print("*" * 42)
pprint(nlp(f"少先队员{nlp.tokenizer.mask_token}该为老人让座位。"))

Some weights of the model checkpoint at ./models/hfl/chinese-macbert-base were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'score': 0.30065131187438965,
  'sequence': '明 天 天 气 很 好?',
  'token': 3698,
  'token_str': '气'},
 {'score': 0.10563581436872482,
  'sequence': '明 天 天 会 很 好?',
  'token': 833,
  'token_str': '会'},
 {'score': 0.09691523760557175,
  'sequence': '明 天 天 还 很 好?',
  'token': 6820,
  'token_str': '还'},
 {'score': 0.08303287625312805,
  'sequence': '明 天 天 就 很 好?',
  'token': 2218,
  'token_str': '就'},
 {'score': 0.08257950097322464,
  'sequence': '明 天 天 都 很 好?',
  'token': 6963,
  'token_str': '都'}]
******************************************
[{'score': 0.6035325527191162,
  'sequence': '明 天 心 情 很 好?',
  'token': 2658,
  'token_str': '情'},
 {'score': 0.20563046634197235,
  'sequence': '明 天 心 会 很 好?',
  'token': 833,
  'token_str': '会'},
 {'score': 0.05586212873458862,
  'sequence': '明 天 心 也 很 好?',
  'token': 738,
  'token_str': '也'},
 {'score': 0.026620039716362953,
  'sequence': '明 天 心 就 很 好?',
  'token': 2218,
  'token_str': '就'},
 {'score': 0.015123298391699791,
  'sequence': '明 天 心 态 很 好?

In [14]:
from transformers import AutoModelWithLMHead, AutoTokenizer
import torch

tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModelWithLMHead.from_pretrained(model)

sequence = f"明天心{nlp.tokenizer.mask_token}很好."
input = tokenizer.encode(sequence, return_tensors="pt")
mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
token_logits = model(input).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
    print(sequence.replace(tokenizer.mask_token, tokenizer.decode([token])))

Some weights of the model checkpoint at ./models/hfl/chinese-macbert-base were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


明天心会很好.
明天心情很好.
明天心也很好.
明天心就很好.
明天心还很好.
