In [19]:
from transformers import AutoModelForCausalLM, AutoTokenizer

from peft import PeftModel

In [20]:
## 学术资源加速
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


## 加载基础模型

In [21]:
model = AutoModelForCausalLM.from_pretrained("Langboat/bloom-1b4-zh", low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained("Langboat/bloom-1b4-zh")

## 加载Lora模型

In [22]:
p_model = PeftModel.from_pretrained(model, model_id="/root/autodl-tmp/tuningdata/lora/checkpoint-500")
p_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): BloomForCausalLM(
      (transformer): BloomModel(
        (word_embeddings): Embedding(46145, 2048)
        (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (h): ModuleList(
          (0-23): 24 x BloomBlock(
            (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (self_attention): BloomAttention(
              (query_key_value): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=6144, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=6144, bias=False)
                )
                (lora_embedding_A): Paramete

In [23]:
from transformers import pipeline

pipe = pipeline("text-generation", model=p_model, tokenizer=tokenizer, device=0)
ipt = "Human: {}\n{}".format("如何写好一个简历？", "").strip() + "\n\nAssistant: "
pipe(ipt, max_length=256, do_sample=True, )

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PLBartFo

[{'generated_text': 'Human: 如何写好一个简历？\n\nAssistant: 谢谢你，Mark.\nMark, 你去问一下Sandy,看看她是不是准备了面试准备的。\n是的，没问题。\nSam, 我正在跟一个律师谈判谈一笔交易。\n他让我签个协议。\n这是什么？\n这是他们的电话号码。\n什么，他给你签下了协议？\n是的。\n你是在为哪家企业服务？\n我们是这个企业的销售经理。\n你是一个营销管理硕士吧。\n那是什么级别？\n我是营销主任。\n你在营销领域有5年的从业经验。\n我是在营销领域中才刚刚起步。\n那您能告诉我，如果您是营销管理者，有哪方面的优势吗？\n我的优势就是能熟练的使用各种营销系统。\n你的优势是什么呢？\n你的优势就是...\n我是销售冠军。\n你有什么话要跟我说吗？\n如果让我来问你，您想要...\n...\n我的问题是...\n...\n你有什么话要跟我说吗？\n我有什么话要对你说，Mark?\n我不知道...\n...\n这让我很困惑...\n...\n我什么都不知道。\n你有什么话要跟我说吗？\n什么也别说...\n...\nMark...\n...\n好吧，再见\n- Mark!\n- 早上好，Sandy.\n你好，Mark.\nMark?\nMark!\nMark!\nMark'}]

## 模型合并

In [24]:
merge_model = p_model.merge_and_unload()
merge_model

BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(46145, 2048)
    (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
  )
  (l

In [25]:
from transformers import pipeline

pipe = pipeline("text-generation", model=merge_model, tokenizer=tokenizer, device=0)
ipt = "Human:如何写好一个简历？\n\nAssistant: "
pipe(ipt, max_length=256,)

[{'generated_text': 'Human:如何写好一个简历？\n\nAssistant: 写简历时，要写清楚自己的优势和劣势，以及自己的兴趣爱好，这样才能让招聘者更清楚的了解你，从而决定是否录用你。\nJob: 写简历时，要写清楚自己的优势和劣势，以及自己的兴趣爱好，这样才能让招聘者更清楚的了解你，从而决定是否录用你。\nJob: 写简历时，要写清楚自己的优势和劣势，以及自己的兴趣爱好，这样才能让招聘者更清楚的了解你，从而决定是否录用你。\nJob: 写简历时，要写清楚自己的优势和劣势，以及自己的兴趣爱好，这样才能让招聘者更清楚的了解你，从而决定是否录用你。\nJob: 写简历时，要写清楚自己的优势和劣势，以及自己的兴趣爱好，这样才能让招聘者更清楚的了解你，从而决定是否录用你。\nJob: 写简历时，要写清楚自己的优势和劣势，以及自己的兴趣爱好，这样才能让招聘者更清楚的了解你，从而决定是否录用你。\nJob: 写简历时，要写清楚自己的'}]

## 完整模型保存

In [9]:
merge_model.save_pretrained("/root/autodl-tmp/tuningdata/merge_model")