示例来源：[huggingface](https://huggingface.co/gpt2)


In [None]:
from huggingface_hub import snapshot_download
# snapshot_download(repo_id="gpt2-large", ignore_regex=["*.h5", "*.ot", "*.msgpack"])

In [None]:
from transformers import GPT2Tokenizer, GPT2Model ,GPT2Config
import torch


In [None]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt-2')


In [None]:
modelConfig = GPT2Config.from_pretrained('gpt-2')
model = GPT2Model.from_pretrained('gpt-2',config=modelConfig)


In [None]:
text = "i am "


In [None]:
encoded_input = tokenizer(text, return_tensors='pt')


In [None]:
model.eval()
with torch.no_grad():
    output = model(**encoded_input)
    predictions = output[0]
print(predictions.size())

In [None]:

predicted_index = torch.argmax(predictions[0, 2, :]).item()
print(predicted_index)
# 反向解码为我们需要的文本
predicted_text = tokenizer.decode([predicted_index])
print(predicted_text)
# 解码后的文本：'Who was Jim Henson? Jim Henson was a man'
# 成功预测出单词 'man'
print(text + predicted_text)

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
 
# 案例描述：Transformers库中的GPT-2模型，并用它实现下一词预测功能，即预测一个未完成句子的下一个可能出现的单词。
# 下一词预测任务是一个常见的任务，在Transformers库中有很多模型都可以实现该任务。也可以使用BERT模型来实现。选用GPT-2模型，主要在于介绍手动加载多词表文件的特殊方式。
 
# 1.1 加载词表文件
 
# 自动加载预训练模型（权重）
# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
# 手动加载词表文件：gpt2-merges.txt gpt2-vocab.json。
# from_pretrained方法是支持从本地载入多个词表文件的，但对载入的词表文件名称有特殊的要求：该文件名称必须按照源码文件tokenization_gpt2.py的VOCAB_FILES_NAMES字典对象中定义的名字来命名。
# 故使用from_pretrained方法，必须对已经下载好的词表文件进行改名将gpt2-vocab.json和gpt2-merges.txt这两个文件，分别改名为“gpt2/vocab.json和/gpt2/merges.txt
tokenizer = GPT2Tokenizer.from_pretrained('./models/gpt-2') 
 
# 编码输入
indexed_tokens = tokenizer.encode("Who is Li BiGor ? Li BiGor is a")
print("输入语句为：",tokenizer.decode(indexed_tokens))
tokens_tensor = torch.tensor([indexed_tokens])  # 将输入语句转换为张量
 
# 自动加载预训练模型（权重）
# model = GPT2LMHeadModel.from_pretrained('gpt2')
# 手动加载：配置文件gpt2-config.json 与 权重文件pt2-pytorch_model.bin
model = GPT2LMHeadModel.from_pretrained('./models/gpt-2/pytorch_model.bin',config='./models/gpt-2/config.json')
 
# 将模型设置为评估模式
model.eval()
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokens_tensor = tokens_tensor.to(DEVICE)
model.to(DEVICE)
 
# 预测所有标记
with torch.no_grad():
    outputs = model(tokens_tensor)
    predictions = outputs[0]
 
# 得到预测的下一词
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_text = tokenizer.decode(indexed_tokens + [predicted_index])
print("输出语句为：",predicted_text) # GPT-2模型没有为输入文本添加特殊词。
# 输出:Who is Li BiGor? Li BiGor is a Chinese
 

In [1]:
from transformers import GPT2Tokenizer
from my_gpt2 import GPT2LMHeadModel
import random
import torch


model_path = './models/gpt2-large/'
data_path  = './data/'
tokenizer = GPT2Tokenizer.from_pretrained(model_path) 
DEVICE = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
random.seed(42)
 
def select_top_k(predictions, k=10):
    '''
    执行top-k选择，避免出现单词循环现象的出现
    @params prediction: GPT-2模型对下一个词的预测向量
    @params     k     : 默认 k=10
    '''
    predicted_index = random.choice(
        predictions[0, -1, :].sort(descending=True)[1][:10]).item()
    return predicted_index


class BeamHypotheses(object):
    def __init__(self, num_beams, max_length, length_penalty, early_stopping = False, src_length=0):
        """
        Initialize n-best list of hypotheses.
        """
        self.max_length = max_length - 1  # ignoring bos_token
        self.length_penalty = length_penalty
        self.early_stopping = early_stopping
        self.num_beams = num_beams
        self.src_length = src_length
        self.beams = []
        self.worst_score = 1e9

    def __len__(self):
        """
        Number of hypotheses in the list.
        """
        return len(self.beams)

    def add(self, hyp, sum_logprobs):
        """
        Add a new hypothesis to the list.
        """
        score = sum_logprobs / (len(hyp) - self.src_length) ** self.length_penalty
        # score = sum_logprobs / len(hyp) ** self.length_penalty
        if len(self) < self.num_beams or score > self.worst_score:
            self.beams.append((score, hyp))
            if len(self) > self.num_beams:
                sorted_scores = sorted([(s, idx) for idx, (s, _) in enumerate(self.beams)])
                del self.beams[sorted_scores[0][1]]
                self.worst_score = sorted_scores[1][0]
            else:
                self.worst_score = min(score, self.worst_score)

    def is_done(self, best_sum_logprobs):
        """
        If there are enough hypotheses and that none of the hypotheses being generated
        can become better than the worst one in the heap, then we are done with this sentence.
        """
        if len(self) < self.num_beams:
            return False
        elif self.early_stopping:
            return True
        else:
            return self.worst_score >= best_sum_logprobs / self.max_length ** self.length_penalty

def do_test(text,length,model):
    '''
    模型根据前文内容预测后续句子
    @params text: 前文
    @params length: 后续句子的长度
    @params model: 使用的预训练模型(GPT-2)
    '''
    indexed_tokens = tokenizer.encode(text)
    tokens_tensor = torch.tensor([indexed_tokens])
    
    model.eval()
    total_predicted_text = text
    
    # 使训练后的模型进行 若干 次预测
    for _ in range(length):
        tokens_tensor = tokens_tensor.to(DEVICE)
    
        with torch.no_grad():
            outputs = model(tokens_tensor)
            predictions = outputs[0]
    
        predicted_index = select_top_k(predictions, k=1)
    
        total_predicted_text += tokenizer.decode(predicted_index)
        if '<|endoftext|>' in total_predicted_text:
            # 如果出现文本结束标志，就结束文本生成
            break
    
        indexed_tokens += [predicted_index]
    
        if len(indexed_tokens) > 1023:
            # 模型最长输入长度为1024，如果长度过长则截断
            indexed_tokens = indexed_tokens[-1023:]
    
        tokens_tensor = torch.tensor([indexed_tokens])
    
    print(total_predicted_text)

## load model
model = GPT2LMHeadModel.from_pretrained(model_path+'pytorch_model.bin',config=model_path+'config.json')
model.to(DEVICE)

  from .autonotebook import tqdm as notebook_tqdm


GPT2LMHeadModel(
  (shared_parameters): ModuleDict()
  (transformer): GPT2Model(
    (shared_parameters): ModuleDict()
    (invertible_adapters): ModuleDict()
    (wte): Embedding(50257, 1280)
    (wpe): Embedding(1024, 1280)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
          (prefix_tuning): PrefixTuningShim(
            (pool): PrefixTuningPool(
              (prefix_tunings): ModuleDict()
            )
          )
        )
        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
   

In [8]:
## do a test
do_test(text="|Name : David , Profession : Docter | Name : Tom , Profession : Teacher | Name : Cat , Profession : Student | What is David's profession? A: Docter. What is Tom's profession? A: Teacher. What is Cat's profession? A:",length=2,model=model)

|Name : David , Profession : Docter | Name : Tom , Profession : Teacher | Name : Cat , Profession : Student | What is David's profession? A: Docter. What is Tom's profession? A: Teacher. What is Cat's profession? A: Student What
