In [1]:
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaTokenizerFast
from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = r"G:\code\pretrain_model_dir\open_llama_3b_v2"
tokenizer = LlamaTokenizer.from_pretrained(model_path)
print(type(tokenizer))
print(tokenizer)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=True`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


<class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>
LlamaTokenizer(name_or_path='G:\code\pretrain_model_dir\open_llama_3b_v2', vocab_size=32000, model_max_length=2048, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True)}, clean_up_tokenization_spaces=False)


In [3]:
# 加载模型
model = LlamaForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map='auto',
)
print(model.dtype, model.device)

torch.float16 cuda:0


In [4]:
# 显存占用 10 GB
prompt = "I look forward to"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=40)
tokenizer.batch_decode(outputs, skip_special_tokens=True)

['I look forward to the day when I can say that I have been a part of the world of blogging for a year. I have been blogging for a year now, and I have to say that I have enjoyed it']

In [5]:
prompt = "I love beijing , because"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=40)
tokenizer.batch_decode(outputs, skip_special_tokens=True)

['I love beijing , because it is a city of contrasts.\nI love beijing , because it is a city of contrasts.\nI love beijing , because it is a city of contrasts.\nI']

In [6]:
model.generation_config

GenerationConfig {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 0,
  "transformers_version": "4.32.1"
}

In [7]:
print(model.config.is_encoder_decoder)
print(model.main_input_name)
print(inputs)
print(model.config.max_position_embeddings)

False
input_ids
{'input_ids': tensor([[    1,   306,  1219,   339, 17336,  1518,   940]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}
2048


In [8]:
model.config.use_return_dict

True

# 试试看每层的输入

In [9]:
prompt = "I love beijing , because"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
print(inputs)
print(type(inputs))
print(inputs["input_ids"].shape)

{'input_ids': tensor([[    1,   306,  1219,   339, 17336,  1518,   940]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}
<class 'transformers.tokenization_utils_base.BatchEncoding'>
torch.Size([1, 7])


In [10]:
print(tokenizer.add_bos_token)
print(tokenizer.add_eos_token)
print(tokenizer.bos_token)
print(tokenizer.eos_token)
print(tokenizer.pad_token_id)

True
False
<s>
</s>
None


In [11]:
result = model(**inputs)
print(type(result))
print(result.keys())

<class 'transformers.modeling_outputs.CausalLMOutputWithPast'>
odict_keys(['logits', 'past_key_values'])


In [12]:
result["logits"].shape

torch.Size([1, 7, 32000])

In [13]:
result["logits"]

tensor([[[-86.4375, -82.7500, -75.1875,  ..., -83.2500, -83.6875, -84.0625],
         [-81.6250, -78.5000, -70.3750,  ..., -80.6250, -80.2500, -79.1250],
         [-84.3125, -78.5000, -73.7500,  ..., -81.5625, -81.4375, -81.3750],
         ...,
         [-70.0625, -68.0625, -57.2500,  ..., -71.3750, -66.6250, -66.7500],
         [-79.5625, -77.9375, -66.6875,  ..., -77.6875, -77.1250, -74.8750],
         [-82.5625, -82.7500, -71.1875,  ..., -81.5625, -81.1250, -80.6875]]],
       device='cuda:0', grad_fn=<ToCopyBackward0>)

In [14]:
# 这里只要选最后一个位置就行
logits = result["logits"][:, -1, :]
torch.argmax(logits, dim=-1)

tensor([358], device='cuda:0')

In [15]:
tokenizer.batch_decode(torch.argmax(logits, dim=-1), skip_special_tokens=True)

['it']

In [24]:
new_prompt = "I love beijing , because" + " " +  tokenizer.batch_decode(torch.argmax(logits, dim=-1), skip_special_tokens=True)[0]
print(new_prompt)

I love beijing , because it


In [67]:
def generate_one(prompt, verbose=False):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    if verbose:
        print(inputs)
        print(type(inputs))
        print(inputs["input_ids"].shape)

    result = model(**inputs)
    if verbose:
        print(type(result))
        print(result.keys())

    logits = result["logits"][:, -1, :]
    token_id = torch.argmax(logits, dim=-1)
    # 解码的时候怎么知道是否应加空格?
    # 解码单个 token_id 的时候, 如果前缀是 ▁, 就表示一个单词的开始, 要加空格. 如果不是 ▁, 就是单词的中间或结尾, 不加空格.
    new_input_ids = torch.cat([inputs["input_ids"], token_id.unsqueeze(0)], dim=-1)
    new_prompt = tokenizer.batch_decode(new_input_ids, skip_special_tokens=True)[0]

    return new_prompt

In [65]:
generate_one(new_prompt)

tensor([325], device='cuda:0')


'I love beijing , because it is'

In [70]:
print(tokenizer.convert_ids_to_tokens(7352))
print(tokenizer.convert_ids_to_tokens(29508))

▁contrast
s


In [48]:
# 是个特殊符号, 不是普通的下划线
ord("▁"), ord("_")

(9601, 95)

In [71]:
# ['I love beijing , because it is a city of contrasts.\nI love beijing , because it is a city of contrasts.\nI love beijing , because it is a city of contrasts.\nI']
cur_prompt = prompt
for i in range(10):
    cur_prompt = generate_one(cur_prompt)
    print(cur_prompt)

I love beijing , because it
I love beijing , because it is
I love beijing , because it is a
I love beijing , because it is a city
I love beijing , because it is a city of
I love beijing , because it is a city of contrast
I love beijing , because it is a city of contrasts
I love beijing , because it is a city of contrasts.
I love beijing , because it is a city of contrasts.

I love beijing , because it is a city of contrasts.
I
