In [None]:
!git clone https://github.com/mymusise/ChatGLM-Tuning.git
%cd  ChatGLM-Tuning
!pip install -r requirements.txt 

In [None]:
!python tokenize_dataset_rows.py \
    --jsonl_path data/alpaca_data.jsonl \
    --save_path data/alpaca \
    --max_seq_length 128

In [1]:
from transformers import AutoTokenizer, AutoModel, TrainingArguments, AutoConfig
from modeling_chatglm import ChatGLMForConditionalGeneration
import torch
import torch.nn as nn
from peft import get_peft_model, LoraConfig, TaskType


class CastOutputToFloat(nn.Sequential):
    def forward(self, x): return super().forward(x).to(torch.float32)


model = ChatGLMForConditionalGeneration.from_pretrained("/root/autodl-tmp/chatglm-6b", load_in_8bit=True, trust_remote_code=True, device_map='auto')
model.gradient_checkpointing_enable()
model.enable_input_require_grads()
model.lm_head = CastOutputToFloat(model.lm_head)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

  from .autonotebook import tqdm as notebook_tqdm
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.



Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /usr/local/cuda-11.5/targets/x86_64-linux/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 115
CUDA SETUP: Loading binary /home/mymusise/pro/stable-diffusion-webui/venv/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda115.so...


Loading checkpoint shards: 100%|██████████| 8/8 [00:06<00:00,  1.17it/s]


In [2]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, inference_mode=False,
    r=8,
    lora_alpha=32, lora_dropout=0.1,
)
peft_config.enable_lora = None
print(peft_config.enable_lora)
model = get_peft_model(model, peft_config)

None


In [3]:
import datasets

dataset_path = "data/alpaca/"

dataset = datasets.load_from_disk(dataset_path)

In [4]:
from transformers import Trainer, HfArgumentParser

model.is_parallelizable = True
model.model_parallel = True

# (training_args, ) = HfArgumentParser((TrainingArguments, )).parse_args_into_dataclasses()
training_args = TrainingArguments("output",
                                  fp16 =True,
                                  gradient_accumulation_steps=1,
                                  per_device_train_batch_size = 1,
                                  learning_rate = 2e-5,
                                  max_steps=1000,
                                  logging_steps=10,
                                  )


def data_collator(features: list) -> dict:
    len_ids = [len(feature['input_ids']) for feature in features]
    longest = max(len_ids)
    input_ids = []
    for ids_l, feature in sorted(zip(len_ids, features), key=lambda x:-x[0]):
        ids = feature['input_ids']
        _ids = torch.LongTensor(ids + [150004] * (longest - ids_l))
        input_ids.append(_ids)
    return {"input_ids": torch.stack(input_ids)}


class ModifiedTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False):
        input_shape = inputs["input_ids"].shape
        return model(
            input_ids=inputs["input_ids"],
            attention_mask=torch.ones(1, 1, input_shape[-1], input_shape[-1]).bool(),
            labels=inputs["input_ids"],
        ).loss

In [5]:
training_args.max_steps=2000

In [None]:
trainer = ModifiedTrainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    data_collator=data_collator,
)
trainer.train()

In [7]:
import os


def save_tunable_parameters(model, path):
    saved_params = {
        k: v.to("cpu")
        for k, v in model.named_parameters()
        if v.requires_grad
    }
    torch.save(saved_params, path)


save_tunable_parameters(model, os.path.join("output", "chatglm-lora.pt"))