In [1]:
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForCausalLM
import string
from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM
from peft import LoraConfig

In [2]:
## モデル名

model_name = "cyberagent/calm2-7b-chat"

In [3]:
## トークナイザーのインポート

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    use_fast = True
)

tokenizer_config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.27M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/585 [00:00<?, ?B/s]

In [4]:
## モデルのインポート
## リソースが足りないので、1/4量子化

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map = 'auto', # 複数のGPUがあるときに、それらを均等に使用する
    load_in_8bit = True, # 精度と引き換えに、パラメータを8bitに省略したLLMを使用して計算を軽量化
)

config.json:   0%|          | 0.00/669 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.04G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [5]:
chat_template = string.Template(
    "USER:${user}\nASSISTANT:${assistant}"
)
response_key = "ASSISTANT:"

In [6]:
sample_text = chat_template.safe_substitute({
    "user": "人気タレント・タモリの本名は何でしょう？",
     "assistant": "" # AIに回答させるために空けておく
})

In [7]:
def generate(model, text):
    input_ids = tokenizer.encode(
        text,
        return_tensors = 'pt',
        add_special_tokens = True
    ).to(model.device)
    output_ids = model.generate(
        input_ids,
        max_new_tokens = 100,
        do_sample = True,
        temperature = 0.8,
    )
    print(
        tokenizer.decode(
            output_ids[0],
            skip_special_tokens = True
        )
    )

In [8]:
%%time
generate(model, sample_text)

USER:人気タレント・タモリの本名は何でしょう？
ASSISTANT: タレントのタモリの本名は森田一義であり、「タモリ」は芸名である。
CPU times: user 3.68 s, sys: 412 ms, total: 4.09 s
Wall time: 4.47 s


In [9]:
## 外部のデータセットをインポート

rs = "izumi-lab/llm-japanese-dataset"
datasets = load_dataset(
    rs,
    split = 'train'
)

README.md:   0%|          | 0.00/3.21k [00:00<?, ?B/s]

data-cc-by-sa.jsonl:   0%|          | 0.00/2.38G [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9074340 [00:00<?, ? examples/s]

In [10]:
## 学習データセットの整形プロセス

def update_datasets(example):
    output_texts = []
    for instruction_, input_, output_ in zip(example['instruction'], example['input'], example['output']):
        text = chat_template.safe_substitute({
            "user": "{0}\n{1}".format(instruction_, input_),
            "assistant": output_
        })
        output_texts.append(text)
    return output_texts

In [11]:
## ??

collator = DataCollatorForCompletionOnlyLM(
    response_key,
    tokenizer = tokenizer
)

In [12]:
peft_config = LoraConfig(
    lora_alpha = 16,
    lora_dropout = 0.1,
    r = 16,
    bias = "lora_only",
    task_type = "CAUSAL_LM",
)

In [13]:
## アダプタの付与
## 量子化したモデルをそのままファインチューニングできないため
## 全体チューニングではなく、出力換算するための行列を修飾する(Lora)
## 実際にtrainされるのは行列の部分だけ

model.add_adapter(peft_config)

In [14]:
%%time
generate(model, sample_text)

USER:人気タレント・タモリの本名は何でしょう？
ASSISTANT: タレントのタモリは本名を森田豊（もりた ゆたか）さんといいます。
CPU times: user 3.61 s, sys: 45 ms, total: 3.66 s
Wall time: 3.83 s


In [15]:
%%time

## 訓練オブジェクトの定義

trainer = SFTTrainer(
    model,
    train_dataset = datasets.select(range(10000)),
    args = SFTConfig(
        output_dir = "./tmp",
        per_device_train_batch_size = 16
    ),
    formatting_func = update_datasets,
    data_collator = collator,
    max_seq_length = 128,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

CPU times: user 3.1 s, sys: 97.6 ms, total: 3.2 s
Wall time: 1.03 s


In [16]:
%%time

## 訓練実行

trainer.train()

Step,Training Loss
500,14.5831
1000,14.6365
1500,14.6381



ASSISTANT:(お)太鼓結び<|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|> This instance will be ignored in loss calculation. Note, if this happens often, consider increasing the `max_seq_length`.

ASSISTANT:(浅草)花やしき<|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|> This instance will be ignored in loss calculation. Note, if this happens often, consider increasing the `max_seq_length`.

ASSISTANT:(ロバート・)ピアリ(ー)<|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|><|padding|> This instance will be ignored in loss calculation. Note, if this happens often, conside

CPU times: user 17min 38s, sys: 4min 2s, total: 21min 41s
Wall time: 21min 43s


TrainOutput(global_step=1875, training_loss=14.616333072916667, metrics={'train_runtime': 1303.7761, 'train_samples_per_second': 23.01, 'train_steps_per_second': 1.438, 'total_flos': 6.322045482329702e+16, 'train_loss': 14.616333072916667, 'epoch': 3.0})

In [17]:
%%time
generate(model, sample_text)

USER:人気タレント・タモリの本名は何でしょう？
ASSISTANT:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:USER:
CPU times: user 16.5 s, sys: 22.7 ms, total: 16.5 s
Wall time: 16.5 s
