In [1]:
from transformers import AutoTokenizer
from thuglm.modeling_chatglm import ChatGLMForConditionalGeneration
import torch
from peft import get_peft_model, LoraConfig, TaskType


In [2]:

model = ChatGLMForConditionalGeneration.from_pretrained("thuglm").half().cuda()

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1,
    target_modules=['query_key_value',],
)
model = get_peft_model(model, peft_config)

# 在这里加载lora模型，注意修改chekpoint
peft_path = "test004/checkpoint-400/chatglm-lora.pt"
model.load_state_dict(torch.load(peft_path), strict=False)
model.eval()

Loading checkpoint shards: 100%|██████████| 8/8 [00:06<00:00,  1.16it/s]


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): ChatGLMForConditionalGeneration(
      (transformer): ChatGLMModel(
        (word_embeddings): Embedding(150528, 4096)
        (layers): ModuleList(
          (0): GLMBlock(
            (input_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
            (attention): SelfAttention(
              (rotary_emb): RotaryEmbedding()
              (query_key_value): MergedLinear(
                in_features=4096, out_features=12288, bias=True
                (lora_dropout): Dropout(p=0.1, inplace=False)
                (lora_A): Linear(in_features=4096, out_features=16, bias=False)
                (lora_B): Conv1d(16, 8192, kernel_size=(1,), stride=(1,), groups=2, bias=False)
              )
              (dense): Linear(in_features=4096, out_features=4096, bias=True)
            )
            (post_attention_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
            (mlp): GLU(
              (d

In [3]:
tokenizer = AutoTokenizer.from_pretrained("thuglm", trust_remote_code=True)

Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


In [4]:
text ="为什么冰红茶和柠檬茶的味道一样？"

with torch.autocast("cuda"):
    res, history = model.chat(tokenizer=tokenizer, query=text,max_length=300)
    print(res)

{'input_ids': tensor([[ 87193,  85093,  96131,  83826,  91661,  84328,  88253,  84042,  20031,
         150001, 150004]], device='cuda:0'), 'max_length': 300, 'num_beams': 1, 'do_sample': True, 'top_p': 0.7, 'temperature': 0.95, 'eos_token_id': 150005}
冰红茶和柠檬茶的味道之所以相似,可能是因为它们都使用了茶和柠檬汁作为主要调味剂。茶和柠檬汁都是天然的味道,在相同的温度下,它们的味道会非常相似。此外,它们都使用了糖和其他添加剂来增强味道。因此,尽管它们使用的材料不同,但它们的味道可能会非常相似。
