<a href="https://colab.research.google.com/github/yuyu990116/transformers_tutorials/blob/main/P3_IA3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir("/content/drive/MyDrive/nlp")
!pip install datasets
!pip install accelerate==0.22.0
!pip install transformers==4.33.1
!pip install peft==0.5.0
from transformers import AutoTokenizer,AutoModelForCausalLM,DataCollatorForSeq2Seq,TrainingArguments,Trainer,pipeline
from datasets import Dataset,load_dataset
from peft import IA3Config, TaskType, get_peft_model, PeftModel
ds = load_dataset("zhengr/alpaca-chinese-dataset")
model = AutoModelForCausalLM.from_pretrained("Langboat/bloom-1b4-zh",low_cpu_mem_usage=True,device_map="auto",torch_dtype='auto')
#low_cpu_mem_usage=True + 'torch_dtype'='auto'  会节省内存，注意low cpu mem 在ZeRO stage 3不能用
tokenizer = AutoTokenizer.from_pretrained("Langboat/bloom-1b4-zh")

In [2]:
def data_process(example): #这次数据处理不进行batched，只处理单个的数据，因为label部分不太容易做
  max_length=256
  tokenized_input=tokenizer("\n".join(["User:"+example["instruction"],example["input"]]).strip()+"\nAssistant:")
  tokenized_output=tokenizer(example["output"]+tokenizer.eos_token)
  input_ids=tokenized_input["input_ids"]+tokenized_output["input_ids"]
  attention_mask=tokenized_input["attention_mask"]+tokenized_output["attention_mask"]
  labels= [-100]*len(tokenized_input["input_ids"])+tokenized_output["input_ids"]
  if len(input_ids)>max_length:
    input_ids=input_ids[:max_length]
    attention_mask=attention_mask[:max_length]
    labels=labels[:max_length]
  return {
      "input_ids":input_ids,
      "attention_mask":attention_mask,
      "labels":labels
  }
tokenized_ds=ds.map(data_process,remove_columns=ds["train"].column_names)
tokenized_ds

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 1000
    })
})

In [3]:
model

BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(46145, 2048)
    (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
  )
  (l

In [5]:
config = IA3Config(task_type=TaskType.CAUSAL_LM)
#target_modules一般来说是k,v 但是bloom比较特殊，qkv是一起的，所以这里针对bloom的话config里面target_modules就是qkv，feedforward_module就是ff
#IA3是对K,V,FF，三部分的值进行调整（抑制或放大内部激活，通过可学习的向量对激活值进行抑制或者放大）
config

IA3Config(peft_type=<PeftType.IA3: 'IA3'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, inference_mode=False, target_modules=None, feedforward_modules=None, fan_in_fan_out=False, modules_to_save=None, init_ia3_weights=True)

In [6]:
model = get_peft_model(model, config)

In [7]:
model
#query_key_value和dense_4h_to_h后面多了ia3_l

PeftModelForCausalLM(
  (base_model): IA3Model(
    (model): BloomForCausalLM(
      (transformer): BloomModel(
        (word_embeddings): Embedding(46145, 2048)
        (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (h): ModuleList(
          (0-23): 24 x BloomBlock(
            (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (self_attention): BloomAttention(
              (query_key_value): Linear(
                in_features=2048, out_features=6144, bias=True
                (ia3_l): ParameterDict(  (default): Parameter containing: [torch.cuda.FloatTensor of size 6144x1 (cuda:0)])
              )
              (dense): Linear(in_features=2048, out_features=2048, bias=True)
              (attention_dropout): Dropout(p=0.0, inplace=False)
            )
            (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (mlp): BloomMLP(
              (dense_h_to_4h)

In [8]:
config

IA3Config(peft_type=<PeftType.IA3: 'IA3'>, auto_mapping=None, base_model_name_or_path='Langboat/bloom-1b4-zh', revision=None, task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, inference_mode=False, target_modules=['query_key_value', 'mlp.dense_4h_to_h'], feedforward_modules=['mlp.dense_4h_to_h'], fan_in_fan_out=False, modules_to_save=None, init_ia3_weights=True)

In [10]:
model.print_trainable_parameters()
#参与微调的参数很少，比Lora还少

trainable params: 344,064 || all params: 1,303,455,744 || trainable%: 0.026396293206254036


In [None]:
# args = TrainingArguments(
#     output_dir="./chatbot",
#     per_device_train_batch_size=1,
#     gradient_accumulation_steps=8,
#     logging_steps=10,
#     num_train_epochs=1,
#     learning_rate=3e-3
# )

In [None]:
args = TrainingArguments(
    output_dir="./IA3",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    logging_steps=5,
    num_train_epochs=1,
    save_steps=5,
    learning_rate=3e-3 #官方推荐3e-3
)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_ds["train"],
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)
trainer.train()

In [20]:
model = AutoModelForCausalLM.from_pretrained("Langboat/bloom-1b4-zh",low_cpu_mem_usage=True,torch_dtype='auto')
p_model = PeftModel.from_pretrained(model, model_id="/content/drive/MyDrive/nlp/IA3/checkpoint-10")
model = p_model.cuda()
ipt = tokenizer("Human: {}\n{}".format("考试有哪些技巧？", "").strip() + "\n\nAssistant: ", return_tensors="pt").to(model.device)
tokenizer.decode(model.generate(**ipt)[0], skip_special_tokens=True)



'Human: 考试有哪些技巧？\n\nAssistant: 考试技巧有很多，比如：\n'

In [23]:
ipt = tokenizer("Human: {}\n{}".format("考试有哪些技巧？", "").strip() + "\n\nAssistant: ", return_tensors="pt").to(model.device)
tokenizer.decode(model.generate(**ipt,max_length=128,do_sample=True,repetition_penalty=1.3)[0], skip_special_tokens=True)

'Human: 考试有哪些技巧？\n\nAssistant: 面试和考前计划对成功至关重要，在准备过程中应密切关注重要信息并确定主要内容。最重要的是找到自己的优势并为即将发生的冲突创建良好的情况模型。您还将学习必要的沟通礼仪并与他人进行有效的互动。\n参加英语语言专业学位 (LLM)学生应该具有熟练的听、说及其它交流技能以及批判性思维能力和科学分析能力以便有效地与世界其他地方的人交谈并获得他人的尊重并在竞争激烈的工作环境中获得职业机会。\n通过提供专门研究主题的特定课程来帮助您的教育项目保持竞争力并且培养更具相关性和创造'

In [12]:
model.generate(**ipt, max_length=128)

tensor([[26283,    29,   210, 12913, 26620, 16012,  1518,   189,   189,  4340,
         17245,    29,   210, 12913, 26620, 16012,  1518,   189,  4340, 17245,
            29,   210, 12913, 26620, 16012,  1518,   189,  4340, 17245,    29,
           210, 12913, 26620, 16012,  1518,   189,  4340, 17245,    29,   210,
         12913, 26620, 16012,  1518,   189,  4340, 17245,    29,   210, 12913,
         26620, 16012,  1518,   189,  4340, 17245,    29,   210, 12913, 26620,
         16012,  1518,   189,  4340, 17245,    29,   210, 12913, 26620, 16012,
          1518,   189,  4340, 17245,    29,   210, 12913, 26620, 16012,  1518,
           189,  4340, 17245,    29,   210, 12913, 26620, 16012,  1518,   189,
          4340, 17245,    29,   210, 12913, 26620, 16012,  1518,   189,  4340,
         17245,    29,   210, 12913, 26620, 16012,  1518,   189,  4340, 17245,
            29,   210, 12913, 26620, 16012,  1518,   189,  4340, 17245,    29,
           210, 12913, 26620, 16012,  1518,   189,  