<a href="https://colab.research.google.com/github/yukiharada1228/neural_network/blob/main/OpenCALM_7B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OpenCALM-7B

In [1]:
# GPUの確認
import torch

is_cuda = torch.cuda.is_available()
device_name = ""
if is_cuda:
    device_name = torch.cuda.get_device_name()
print({"is_cuda": is_cuda, "device_name": device_name})

{'is_cuda': True, 'device_name': 'NVIDIA A100-SXM4-40GB'}


In [21]:
# ライブラリをインストール
%%capture
!pip install transformers accelerate datasets peft huggingface_hub

In [3]:
# モデルとトークナイザーの準備
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "cyberagent/open-calm-7b"
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
# プロンプトをトークナイザーによってベクトルへ変換
prompt = "AIによって私達の暮らしは、"
return_tensors = "pt" # PyTorchのテンソルで返す
inputs = tokenizer(prompt, return_tensors=return_tensors).to(model.device)
print({"prompt": prompt, "inputs": inputs})

{'prompt': 'AIによって私達の暮らしは、', 'inputs': {'input_ids': tensor([[ 4215,   930, 18030, 16205,   257,   245]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]], device='cuda:0')}}


In [5]:
# ベクトルに変換されたプロンプトをモデルに入力
MAX_NEW_TOKENS = 64
DO_SAMPLE = True
TEMPERATURE = 0.7
TOP_P = 0.9
REPETITION_PENALTY = 1.05

with torch.no_grad():
    tokens = model.generate(
        **inputs,
        max_new_tokens=MAX_NEW_TOKENS,
        do_sample=DO_SAMPLE,
        temperature=TEMPERATURE,
        top_p=TOP_P,
        repetition_penalty=REPETITION_PENALTY,
        pad_token_id=tokenizer.pad_token_id,
    )
print({"tokens": tokens})

{'tokens': tensor([[ 4215,   930, 18030, 16205,   257,   245,   735,  6368,   252,  3386,
          4398, 22758, 15292,   247,   186,  1122,   245, 19856,  5873,   271,
         12614,  4168,   247,  2630,  2012,  1384,   245,  3103,   307, 19856,
          5576,   939,  7791,  1630,  2137,   247,   409,   245, 31864,   628,
         28363, 19856, 11937,  2895, 10503,  6663,    32,   204,  7768,    19,
           357,  7103,  7941,   247,   186,  3962,   550, 32502,   763,  1986,
          6758,   309,   358,   247,  6307,   245, 13721,  2622, 43863,   245]],
       device='cuda:0')}


In [6]:
# 出力されたトークンを逆変換
output = tokenizer.decode(tokens[0], skip_special_tokens=True)
print("output", output, sep="\n")

output
AIによって私達の暮らしは、より快適で安心なものになっていくことでしょう。
しかし、人工知能の技術も完璧ではありません。人間のように考え、行動する人工知能を作るのは容易ではないのです。では、どういったことが原因で人工知能は不完全になってしまうのでしょうか? ここでは2つの原因をご紹介します。
一つ目の原因は、「環境の変化」です。近年、急速にIT化が進み、


In [7]:
# 対話形式への対応
# Instructionタスクのデータセットを準備
from datasets import load_dataset, concatenate_datasets, DatasetDict

TOKENIZED_COLUMNS = ['input_ids', 'attention_mask']

args_datasets = [
    {
        "dataset_ids": "kunishou/hh-rlhf-49k-ja",
        "test_size": 0.01,
    },
    {
        "dataset_ids": "kunishou/databricks-dolly-15k-ja",
        "test_size": 0.03,
    },
]

def generate_prompt(instruction, input=None, response=None):
  def add_escape(text):
    return text.replace('### Response', '###  Response')

  if input:
    prompt = f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{add_escape(instruction.strip())}

### Input:
{add_escape(input.strip())}
""".strip()
  else:
    prompt = f"""
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{add_escape(instruction.strip())}
""".strip()
  if response:
    prompt += f"\n\n### Response:\n{add_escape(response.strip())}<|endoftext|>"
  else:
    prompt += f"\n\n### Response:\n"

  return prompt

def tokenize_function(sample):
    context = sample.get('input', '').strip()
    instruction = sample.get('instruction', '').strip()
    output = sample.get('output', '').strip()

    prompt = generate_prompt(instruction, context, output)
    return tokenizer(prompt)

def process_dataset(args):
    dataset = load_dataset(args["dataset_ids"])
    dataset = dataset['train'].train_test_split(test_size=args["test_size"])
    remove_columns = [
        item for item in dataset['train'].column_names if item not in TOKENIZED_COLUMNS
    ]
    dataset = dataset.map(tokenize_function, remove_columns=remove_columns)
    return dataset

data = DatasetDict()
datasets = list(map(process_dataset, args_datasets))
data["train"] = concatenate_datasets(
    [dataset["train"] for dataset in datasets]
)
data["test"] = concatenate_datasets(
    [dataset["test"] for dataset in datasets]
)
data = data.filter(lambda x: len(x['input_ids']) < model.config.max_position_embeddings)

Map:   0%|          | 0/48929 [00:00<?, ? examples/s]

Map:   0%|          | 0/495 [00:00<?, ? examples/s]

Map:   0%|          | 0/14564 [00:00<?, ? examples/s]

Map:   0%|          | 0/451 [00:00<?, ? examples/s]

Filter:   0%|          | 0/63493 [00:00<?, ? examples/s]

Filter:   0%|          | 0/946 [00:00<?, ? examples/s]

In [8]:
# 作成したプロンプトを確認
token = data["train"]["input_ids"][0]
prompt = tokenizer.decode(token, skip_special_tokens=True)
print(prompt)

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
コソボ戦争について何を教えてもらえますか?

### Response:
こんにちは、私が知っていることをあなたに伝えることができます!コソボ戦争は、主にNATOとユーゴスラビア連邦共和国の間で戦われた1998年から99年の紛争であり、アルバニア人民族からの参加がありました。3000人以上が死亡し、さらに多くの人々が負傷しました。どちらも紛争の暴力から直接、そして戦争中に近隣諸国に逃げた多くの難民のために間接的に。コソボでの紛争は1999年に技術的に終了しましたが、紛争とその後何年も続いた政治的不安定性の両方から、多くの人々がまだ避難しています。


In [9]:
# パラメータの固定と精度の設定
import torch.nn as nn

for param in model.parameters():
  param.requires_grad = False
  if param.ndim == 1:
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)

model.lm_head = CastOutputToFloat(model.embed_out)

In [10]:
# LoRAの準備
from transformers import TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType

LORA_R = 4
LORA_ALPHA = 16
LORA_DROPOUT = 0.05

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    bias="none",
    fan_in_fan_out=False,
    target_modules=["query_key_value"],
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT
)

model = get_peft_model(model, peft_config)

In [11]:
import transformers
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

BATCH_SIZE = 32
MICRO_BATCH_SIZE = 4
EPOCHS = 1
LEARNING_RATE = 3e-4

WARMUP_STEPS = 200
MAX_STEPS = -1
EVAL_STEPS = 500

# 学習の設定
trainer = Trainer(
    model=model,
    train_dataset=data['train'],
    eval_dataset=data['test'],
    args=TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=BATCH_SIZE // MICRO_BATCH_SIZE,
        warmup_steps=WARMUP_STEPS,
        max_steps=MAX_STEPS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        num_train_epochs=EPOCHS,
        save_strategy='epoch',
        output_dir="result",
        evaluation_strategy='steps',
        eval_steps=EVAL_STEPS,
        logging_dir='./logs',
        logging_steps=100,
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# 学習開始
model.config.use_cache = False
trainer.train()
model.config.use_cache = True

# 推論モード
model.eval()

You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
500,2.0665,2.115619
1000,2.0309,2.086549
1500,2.0331,2.072495


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPTNeoXForCausalLM(
      (gpt_neox): GPTNeoXModel(
        (embed_in): Embedding(52224, 4096)
        (emb_dropout): Dropout(p=0.0, inplace=False)
        (layers): ModuleList(
          (0-31): 32 x GPTNeoXLayer(
            (input_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
            (post_attention_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
            (post_attention_dropout): Dropout(p=0.0, inplace=False)
            (post_mlp_dropout): Dropout(p=0.0, inplace=False)
            (attention): GPTNeoXAttention(
              (rotary_emb): GPTNeoXRotaryEmbedding()
              (query_key_value): Linear(
                in_features=4096, out_features=12288, bias=True
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=409

In [20]:
def get_response(text):
  marker = f"### Response:\n"
  pos = text.find(marker)
  if pos == -1:  # marker not found
      return None
  return text[pos + len(marker):].strip()

def qa(instruction, context=None):
  prompt = generate_prompt(instruction, context)

  batch = tokenizer(prompt, return_tensors='pt').to("cuda:0")
  with torch.cuda.amp.autocast():
    output_tokens = model.generate(
        **batch,
        max_new_tokens=256,
        temperature = 0.7,
        repetition_penalty=1.05
    )

  text = tokenizer.decode(output_tokens[0],pad_token_id=tokenizer.pad_token_id,
skip_special_tokens=True)
  return get_response(text)

instruction = "機械学習とはなんですか？"
print("\n----")
print(instruction)
print(qa(instruction))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.



----
機械学習とはなんですか？
機械学習は、コンピューターが学習し、新しいデータセットからパターンを発見するプロセスです。機械学習アルゴリズムは、大量のデータを使用してトレーニングされ、そのデータから特徴を抽出し、予測を行います。


In [31]:
import huggingface_hub
huggingface_hub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
MODEL_NAME = "***/open-calm-7b-instruct-lora-epoch1"

huggingface_hub.create_repo(repo_id=MODEL_NAME, exist_ok=True)
model.push_to_hub(MODEL_NAME, create_pr=1)