In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM


model_path = "/home/xwj/Model/llama2-7b-hf"

model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

texts = [
    "the quick brown fox" , 
    "the lazy dog sleeps"
]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
tokenizer.pad_token = tokenizer.eos_token
encoded_texts = tokenizer(texts, return_tensors="pt", padding=True, max_length=128)
print(encoded_texts)
print(encoded_texts["input_ids"].shape)

{'input_ids': tensor([[    1,   278,  4996, 17354,  1701, 29916],
        [    1,   278, 17366, 11203, 12844,  8961]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1]])}
torch.Size([2, 6])




In [7]:
input_ids = encoded_texts["input_ids"]
attention_mask = encoded_texts["attention_mask"]

In [13]:
with torch.no_grad():
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
print(outputs)
logits = outputs.logits

CausalLMOutputWithPast(loss=None, logits=tensor([[[-12.9832,  -7.4134,  -0.4327,  ...,  -6.8297,  -8.0880,  -7.5863],
         [-14.1865, -12.8350,  -7.1458,  ...,  -9.7179, -10.8815,  -9.9526],
         [-11.4141, -13.2432,  -2.1016,  ...,  -7.7475,  -8.8215,  -7.6209],
         [ -9.7292,  -6.5780,   5.4661,  ...,  -4.5612,  -5.7276,  -5.6714],
         [ -7.4323,  -7.9920,   9.5942,  ...,  -0.8915,  -2.7065,  -0.3414],
         [-11.2905,  -9.2060,   2.6828,  ...,  -5.9094,  -7.0724,  -5.8816]],

        [[-12.9832,  -7.4134,  -0.4327,  ...,  -6.8297,  -8.0880,  -7.5863],
         [-14.1865, -12.8350,  -7.1458,  ...,  -9.7179, -10.8815,  -9.9526],
         [ -9.3224,  -7.6790,   0.1042,  ...,  -8.0256,  -6.5289,  -7.0641],
         [-13.4260, -13.3056,   1.9306,  ...,  -7.2037,  -8.3358, -10.3643],
         [ -4.3707,  -2.0274,   7.6155,  ...,   1.2817,  -0.4330,   0.7789],
         [ -7.7859,  -7.8256,   8.4216,  ...,  -3.0686,  -5.1709,  -6.4460]]]), past_key_values=<transformers.

In [15]:
next_token_logits = logits[:, : -1, :]
target_ids = input_ids[:, 1: ]
probs = torch.nn.functional.softmax(next_token_logits, dim=-1)
print(probs)

tensor([[[4.3589e-10, 1.1438e-07, 1.2302e-04,  ..., 2.0504e-07,
          5.8260e-08, 9.6210e-08],
         [2.0143e-09, 7.7822e-09, 2.3008e-06,  ..., 1.7573e-07,
          5.4891e-08, 1.3896e-07],
         [5.3176e-10, 8.5377e-11, 5.8896e-06,  ..., 2.0801e-08,
          7.1065e-09, 2.3608e-08],
         [7.4380e-11, 1.7379e-09, 2.9559e-04,  ..., 1.3059e-08,
          4.0676e-09, 4.3027e-09],
         [2.1137e-13, 1.2077e-13, 5.2427e-06,  ..., 1.4645e-10,
          2.3847e-11, 2.5384e-10]],

        [[4.3589e-10, 1.1438e-07, 1.2302e-04,  ..., 2.0504e-07,
          5.8260e-08, 9.6210e-08],
         [2.0144e-09, 7.7821e-09, 2.3008e-06,  ..., 1.7573e-07,
          5.4891e-08, 1.3896e-07],
         [1.0697e-09, 5.5328e-09, 1.3278e-05,  ..., 3.9125e-09,
          1.7476e-08, 1.0233e-08],
         [1.4699e-10, 1.6580e-10, 6.8635e-04,  ..., 7.4057e-08,
          2.3874e-08, 3.1402e-09],
         [4.6585e-12, 4.8524e-11, 7.4784e-07,  ..., 1.3276e-09,
          2.3899e-10, 8.0300e-10]]])


In [None]:
first_sentence = texts[0]
print(f"\n原句: {first_sentence}")
for i in range(min(3, input_ids.shape[1] - 1)):
    input_token = tokenizer.decode([input_ids[0, i]])
    target_token = tokenizer.decode([target_ids[0, i]])
    predicted_token_id = torch.argmax(probs[0, i]).item()
    predicted_token = tokenizer.decode([predicted_token_id])
    
    print(f"位置 {i} 输入token: '{input_token}'")
    print(f"        模型预测: '{predicted_token}'，真实下一个: '{target_token}'\n")


原句：the quick brown fox
位置 0 输入token: '<s>'
        模型预测: '#'，真实下一个: 'the'

位置 1 输入token: 'the'
        模型预测: ''，真实下一个: 'quick'

位置 2 输入token: 'quick'
        模型预测: 'est'，真实下一个: 'brown'

