# Install the neessary packages

In [None]:
!pip install transformers
!pip install datasets
!pip install accelerate
!pip install peft
!pip install bitsandbytes
!pip install sentencePiece

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

# 1. Clean the Data
1. Please Download the data from the github and save it on your local drive.
2. At the end of this step, data should be single JSON file in the following format.
```
            json_entry = {
                'instruction': 'What is diabetes?',
                'input': '',
                'output': 'Diabetes is ...'
            }
```
3. Save the Json file in your computer.

In [None]:
# 구글 드라이브 연동
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 구글 드라이브 자체에 다음과 같은 폴더를 먼저 생성합니다.
'/content/drive/MyDrive/Colab Notebooks/transformer_learn/'

'/content/drive/MyDrive/Colab Notebooks/transformer_learn/dataset'

In [None]:
### 역자 추가 코드
# 원본 코드에 없으나 다음 코드 추가 실행 필요
!pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.14.2


In [None]:
### ***만약 데이터가 이미 base_directory 관련 경로에 있다면 이 코드는 실행하지 말고 넘어가세요.***

# 데이터를 인터넷 url에서 가져와 압축을 풀고 base_directory 관련 경로에 저장하는 과정을 포함하는 코드 블록
# 런타임 수 분 소요

import os
import glob
import requests
import xmltodict
import json
from datasets import load_dataset
from transformers import AutoTokenizer

# 기본 디렉토리 설정
base0_directory='/content/drive/MyDrive/Colab Notebooks/transformer_learn/'

# MedQuAD 데이터셋 다운로드 및 설정
medquad_url = "https://github.com/abachaa/MedQuAD/archive/refs/heads/master.zip"
medquad_zip_path = base0_directory + "MedQuAD.zip"
medquad_extract_path = base0_directory + "MedQuAD-master/"

# 데이터 다운로드
if not os.path.exists(medquad_zip_path):
    print("Downloading MedQuAD dataset...")
    response = requests.get(medquad_url)
    with open(medquad_zip_path, "wb") as f:
        f.write(response.content)

# 데이터 압축 해제
if not os.path.exists(medquad_extract_path):
    import zipfile
    with zipfile.ZipFile(medquad_zip_path, 'r') as zip_ref:
        zip_ref.extractall(base_directory)

In [None]:
# 이 코드를 실행하기 전에 MedQuAD-master 데이터셋을 아래 base_directory에 저장하는 추가 코드 필요!

import xmltodict
import json
import glob
import os

# base_directory를 Google Drive의 실제 경로로 설정
base_directory = '/content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/'

def convert_xml_to_json(xml_file):
    with open(xml_file, 'r', encoding='utf-8') as f:
        xml_data = f.read()

    xml_dict = xmltodict.parse(xml_data)

    # 'Document' 키가 xml_dict안에 있는지와 그 값이 None이 아닌지 점검
    if 'Document' not in xml_dict or xml_dict['Document'] is None or 'QAPairs' not in xml_dict['Document'] or xml_dict['Document']['QAPairs'] is None:
        print(f"Missing or invalid 'Document' or 'QAPairs' key in {xml_file}")
        return []

    questions = xml_dict['Document']['QAPairs']['QAPair']

    # 질의(questions)가 리스트인지 확인
    if not isinstance(questions, list):
        questions = [questions]

    json_data = []

    for question in questions:
        if question['Answer'] and question['Answer'].strip():
            json_entry = {
                'instruction': question['Question']['#text'],
                'input': '',
                'output': question['Answer']
            }
            json_data.append(json_entry)

    return json_data

# 파일 경로 설정
files_path = base_directory  # MedQuAD-master 폴더의 루트 경로

# 모든 하위 디렉토리를 탐색하여 XML 파일 처리
combined_json_data = []

for root, dirs, files in os.walk(files_path):
    for file in files:
        if file.endswith('.xml'):  # XML 파일만 처리
            xml_file_path = os.path.join(root, file)
            combined_json_data.extend(convert_xml_to_json(xml_file_path))

# JSON 파일로 저장
output_file = os.path.join(base_directory, 'alpaca_data.json')
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(combined_json_data, f, indent=4, ensure_ascii=False)

print(f"JSON 데이터가 저장되었습니다: {output_file}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Missing or invalid 'Document' or 'QAPairs' key in /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/5_NIDDK_QA/0000056.xml
Missing or invalid 'Document' or 'QAPairs' key in /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/5_NIDDK_QA/0000064.xml
Missing or invalid 'Document' or 'QAPairs' key in /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/5_NIDDK_QA/0000065.xml
Missing or invalid 'Document' or 'QAPairs' key in /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/5_NIDDK_QA/0000077.xml
Missing or invalid 'Document' or 'QAPairs' key in /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/5_NIDDK_QA/0000175.xml
Missing or invalid 'Document' or 'QAPairs' key in /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/5_NIDDK_QA/0000177.xml
M

#2. Declare the Model and Tokenizer
We will utilize the lama-7b-hf model created by Meta. To obtain the model weights from Meta, you must submit a request through https://ai.facebook.com/blog/large-language-model-llama-meta-ai/. However, the Llama model's weights were inadvertently leaked and incorporated into Hugging Face's decapoda-research/llama-7b-hf. As a result, we will employ the Llama model from decapoda-research rather than requesting the weights from Meta and waiting.

In [None]:
# 코랩 환경에서 다음 코드를 실행하여 토큰을 설정
# 이 코드의 출력화면에서 허깅페이스 토큰을 입력하고 [Login] 버튼을 클릭
# 허깅페이스 토큰 생성 과정은 [25가지 문제로 배우는 LLM 입문 with 파이썬, 임선집 외, 루비페이퍼] 책의 p132-134 참조
# 허깅페이스의 read 토큰을 입력하면 모델 불러오기까지 무난함
from huggingface_hub import login

login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# 코랩 유료 버전 기준 런타임 2분 소요
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#BASE_MODEL = "decapoda-research/llama-7b-hf"
BASE_MODEL = "baffo32/decapoda-research-llama-7B-hf"

model = LlamaForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=True,
    device_map="auto",
)

tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)

tokenizer.pad_token_id = 0
tokenizer.padding_side = "left"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/428 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


pytorch_model.bin.index.json:   0%|          | 0.00/25.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/33 [00:00<?, ?it/s]

pytorch_model-00001-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00002-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00003-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00004-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00005-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00006-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00007-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00008-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00009-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00010-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00011-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00012-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00013-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00014-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00015-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00016-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00017-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00018-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00019-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00020-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00021-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00022-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00023-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00024-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00025-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00026-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00027-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00028-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00029-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00030-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00031-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00032-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

pytorch_model-00033-of-00033.bin:   0%|          | 0.00/524M [00:00<?, ?B/s]



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


#3. Data Preprocessing
1. We allocate 90% of the data for training and 10% for validation purposes.
2. The generate_prompt function establishes the prompt format. Reference: https://github.com/tloen/alpaca-lora
  * Here, Instruction ==> Question, Input ==> Context, Output ==> Answer
  * If there is context, the prompt will have three keys: [Instruction, Input,Output ]
  * If there is no context, the prompt will have two keys: [Instruction,Output ]
3. We create both training and validation datasets.
4. Initially, we generate a prompt and subsequently tokenize it.
5. The training process requires input_ids and attention_mask. It is not necessary to explicitly define the label.
6. This step should produce training and validation dataset with format:
```
Dataset({
    features: ['instruction', 'input', 'output', 'input_ids', 'attention_mask'],
    num_rows: 14762
})
```

In [None]:
# 코랩 유료 버전 기준 런타임 1분 소요

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from torch.utils.data import DataLoader

train_data = load_dataset("json", data_files=base_directory+"alpaca_data.json", split="train[:90%]")
valid_data = load_dataset("json", data_files=base_directory+"alpaca_data.json", split="train[90%:]")

def generate_prompt(data_point):
    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### 지시(Instruction):
{data_point["instruction"]}

### 입력(Input):
{data_point["input"]}

### 출력(Response):
{data_point["output"]}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### 지시(Instruction):
{data_point["instruction"]}

### 출력(Response):
{data_point["output"]}"""

train_data1 = load_dataset("json", data_files=base_directory+"alpaca_data.json", split="train[:90%]")
valid_data2 = load_dataset("json", data_files=base_directory+"alpaca_data.json", split="train[90%:]")
data_train = train_data1.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=True,
        max_length=1000,
        padding="max_length",
    )
)
data_valid = valid_data2.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=True,
        max_length=1000,
        padding="max_length",
    )
)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/14762 [00:00<?, ? examples/s]

Map:   0%|          | 0/1640 [00:00<?, ? examples/s]

In [None]:
### 역자 추가 코드
# 저장 경로 설정
data_train_save_path = os.path.join(base_directory, "data_train_saved")
data_valid_save_path = os.path.join(base_directory, "data_valid_saved")

# 데이터셋 저장
data_train.save_to_disk(data_train_save_path)
data_valid.save_to_disk(data_valid_save_path)

# 저장 경로 출력
print(f"data_train saved to: {data_train_save_path}")
print(f"data_valid saved to: {data_valid_save_path}")


Saving the dataset (0/1 shards):   0%|          | 0/14762 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1640 [00:00<?, ? examples/s]

data_train saved to: /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/data_train_saved
data_valid saved to: /content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/data_valid_saved


# Model Training With PEFT

### Delaring Lora Variables

In [None]:
LORA_R = 8 # Lora 차원
LORA_ALPHA = 16 # Lora 스케일링(scaling)용 alpha 파라미터
LORA_DROPOUT= 0.05
# 학습될 파라미터 정의
LORA_TARGET_MODULES = [
    "q_proj",
    "v_proj",
]

BATCH_SIZE = 128
MICRO_BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LEARNING_RATE = 4e-4
TRAIN_STEPS = 50
OUTPUT_DIR = base_directory

The Below output shows that we are only training 0.06 percentage of parameter, which will higly spped-up fine-tunning process

In [None]:
### 역자 주: 최신 버전의 peft 라이브러리에서는 prepare_model_for_int8_training 함수가 prepare_model_for_kbit_training으로 변경되었습니다.
###        때문에 원문 코드를 아래와 같이 변경합니다.
#from peft import LoraConfig, get_peft_model, get_peft_model_state_dict, prepare_model_for_int8_training
from peft import LoraConfig, get_peft_model, get_peft_model_state_dict
from peft import prepare_model_for_kbit_training

#model = prepare_model_for_int8_training(model)
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.0622


In [None]:
import transformers
training_arguments = transformers.TrainingArguments(
    per_device_train_batch_size=MICRO_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    warmup_steps=10,
    max_steps=TRAIN_STEPS,
    learning_rate=LEARNING_RATE,
    fp16=True,
    logging_steps=10,
    optim="adamw_torch",
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=50,
    save_steps=50,
    output_dir=OUTPUT_DIR,
    save_total_limit=3,
    load_best_model_at_end=True,
    report_to="tensorboard"
)



## Train and save the chatbot

In [None]:
### 역자 주: 다음 원서 코드는 전체 과정의 1/10을 진행하는데도 2시간 정도 소요됩니다.
###        따라서 이 코드 대신에 더 단순화된 다음 코드 블록 실행을 권장합니다.
"""
trainer = transformers.Trainer(
    model=model,
    train_dataset=data_train,
    eval_dataset=data_valid,
    args=training_arguments,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)

model.save_pretrained(base_directory+"chatbot")
"""

In [None]:
### 역자 추가 코드: 학습 시간을 대폭 축소시키는 코드
### 런타임 1시간 이내 (유료 코랩 Colab PRO 기준)

LORA_R = 8  # lora dimension
LORA_ALPHA = 16  # (`float`): The alpha parameter for Lora scaling
LORA_DROPOUT = 0.05
# this defines what parameters need to be trained
LORA_TARGET_MODULES = [
    "q_proj",
    "v_proj",
]

BATCH_SIZE = 128
MICRO_BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LEARNING_RATE = 4e-4
TRAIN_STEPS = 50  # 기존 설정: TRAIN_STEPS = 50
OUTPUT_DIR = base_directory

### 역자 주: 최신 버전의 peft 라이브러리에서는 prepare_model_for_int8_training 함수가 prepare_model_for_kbit_training으로 변경되었습니다.
###        때문에 원문 코드를 아래와 같이 변경합니다.
#from peft import LoraConfig, get_peft_model, get_peft_model_state_dict, prepare_model_for_int8_training
from peft import LoraConfig, get_peft_model, get_peft_model_state_dict
from peft import prepare_model_for_kbit_training

# model = prepare_model_for_int8_training(model)
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()

import transformers

# 기존 TrainingArguments
# training_arguments = transformers.TrainingArguments(
#     per_device_train_batch_size=MICRO_BATCH_SIZE,
#     gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
#     warmup_steps=10,
#     max_steps=TRAIN_STEPS,
#     learning_rate=LEARNING_RATE,
#     fp16=True,
#     logging_steps=10,
#     optim="adamw_torch",
#     evaluation_strategy="steps",
#     save_strategy="steps",
#     eval_steps=50,
#     save_steps=50,
#     output_dir=OUTPUT_DIR,
#     save_total_limit=3,
#     load_best_model_at_end=True,
#     report_to="tensorboard"
# )

# 수정된 TrainingArguments (효율성 개선)
training_arguments = transformers.TrainingArguments(
    per_device_train_batch_size=2,  # 배치 크기 축소 (기존: 4)
    gradient_accumulation_steps=16,  # 더 큰 Gradient Accumulation
    warmup_steps=5,  # 워밍업 단계 감소
    max_steps=25,  # 학습 단계를 줄임 (기존: 50)
    learning_rate=2e-4,  # 학습률 감소 (안정성 증가)
    fp16=True,  # FP16 유지
    logging_steps=5,  # 로깅 빈도 증가
    optim="adamw_torch",
    evaluation_strategy="no",  # 평가 비활성화 (학습 속도 증가)
    save_strategy="no",  # 체크포인트 저장 비활성화 (학습 속도 증가)
    output_dir=OUTPUT_DIR,
    report_to="none",  # TensorBoard 비활성화
)

# 기존 Trainer
# trainer = transformers.Trainer(
#     model=model,
#     train_dataset=data_train,
#     eval_dataset=data_valid,
#     args=training_arguments,
#     data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
# )

# 수정된 Trainer (더 작은 데이터로 학습)
small_data_train = data_train.select(range(500))  # 학습 데이터 크기 축소
small_data_valid = data_valid.select(range(100))  # 검증 데이터 크기 축소

trainer = transformers.Trainer(
    model=model,
    train_dataset=small_data_train,  # 축소된 데이터 사용
    eval_dataset=small_data_valid,
    args=training_arguments,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# 기존 모델 캐시 비활성화
model.config.use_cache = False

# 학습
trainer.train(resume_from_checkpoint=False)

# 모델 저장
model.save_pretrained(base_directory + "chatbot")


trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.0622


  return fn(*args, **kwargs)


Step,Training Loss
5,1.5286
10,1.4499
15,1.2744
20,1.4527
25,1.1264


#Loading Model for inference

### Process

In [None]:
import torch
torch.cuda.empty_cache()


In [None]:
from transformers import AutoModel
from peft import PeftModel, PeftConfig
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

# Base directory 설정
base_directory = '/content/drive/MyDrive/Colab Notebooks/transformer_learn/MedQuAD-master/'

# 모델 경로 설정
BASE_MODEL = "baffo32/decapoda-research-llama-7B-hf"
peft_model_id = base_directory + "chatbot"

# PEFT Config 불러오기
config = PeftConfig.from_pretrained(peft_model_id)

# Base 모델 불러오기(float16 사용 및 자동 장치 설정)
model = LlamaForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"  # 자동으로 GPU 또는 CPU 할당
)

# PEFT 모델 불러오기(메모리 부족 방지)
model = PeftModel.from_pretrained(
    model,
    peft_model_id,
    torch_dtype=torch.float16,
    offload_buffers=True  # 버퍼를 오프로드하여 GPU 메모리 부족 방지
)

# 토크나이저 로드
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)

# GPU에 모델 업로드
if torch.cuda.is_available():
    model = model.to("cuda")
else:
    print("CUDA가 사용 불가능합니다. 모델을 CPU에 올립니다.")
    model = model.to("cpu")

# 모델 평가 모드 설정
model.eval()

# 입력 텍스트 예시
input_text = "Hello, how can I assist you today?"
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(model.device)

# 텍스트 생성
outputs = model.generate(input_ids, max_length=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

Hello, how can I assist you today?
I'm calling to cancel my account.
I'm calling to cancel my account. I'm not happy with the service.
I'm calling to cancel my account. I


#Inference Function

In [None]:
DEVICE='cuda'

In [None]:
import textwrap
from peft import PeftModel
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
from transformers.generation.utils import GreedySearchDecoderOnlyOutput

def ask_ai_doctor(instruction: str, model: PeftModel) -> str:
    PROMPT_TEMPLATE = f"""
    Below is an instruction that describes a task. Write a response that appropriately completes the request.

    ### Instruction:
    [INSTRUCTION]

    ### Response:
    """

    # 템플릿(template)의 [INSTRUCTION]을 주어진 instruction으로 대체
    prompt = PROMPT_TEMPLATE.replace("[INSTRUCTION]", instruction)

    # 프롬프트를 입력 텐서로 인코딩
    encoding = tokenizer(prompt, return_tensors="pt")
    input_ids = encoding["input_ids"].to(DEVICE)

    # 생성 configuration 설정
    '''
    controls various aspects of the text generation process.
    temperature: This parameter (set to 0.1) controls the randomness of the generated text. lower value more determenistic; higher value more random
    top_p: This parameter (set to 0.75) is also called nucleus sampling. In our case, the model will only consider tokens that make up the top 75% of probabilities for the next word
    repetition_penalty: This parameter (set to 1.1) is used to penalize repetitions in the generated text. A value greater than 1 helps to reduce the frequency of repeated phrases
    '''
    generation_config = GenerationConfig(
        temperature=0.1,
        top_p=0.75,
        repetition_penalty=1.1,
    )

    # 모델과 configuration을 사용하여 응답 생성

    with torch.inference_mode():
        response = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=250,
        )

    # 디코딩된 관련 응답을 추출
    decoded_output = tokenizer.decode(response.sequences[0])
    formatted_response = decoded_output.split("### Response:")[1].strip()

    # wrap을 사용하여 포맷된 응답 출력
    return "\n".join(textwrap.wrap(formatted_response))


In [None]:
### 역자 주: 코랩 메모리 제약상 원서와 달리 모델과 데이터셋을 간략화했기 때문에 결과가 책과 다를 수 있습니다.
print(ask_ai_doctor('What are symptoms of Cirrhosis?', model))

From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.


Symptoms of cirrhosis include:         * Jaundice (yellowing of skin
and eyes)         * Abdominal pain         * Weight loss         *
Fatigue         * Nausea         * Itchiness         * Swelling in
legs, feet, abdomen, or scrotum         * Bleeding from veins
* Easy bruising         * Sensitivity to light         * Muscle
weakness         * Confusion         * Loss of appetite         *
Drowsiness         * Constipation         * Menstrual irregularities
* Impotence         * Hair loss         * Skin rashes         *
Pruritus (itchy skin)         * Fluid retention         * Ascites
(fluid buildup in abdomen)         * Encephalopathy (brain damage)
* Hepatomegaly (enlarged liver)         * Spider angiomas (red spots
on skin)         * Portal hypertension (high blood


In [None]:
### 원서 코드의 최종 결과물 (메모리 제약 없이 원서 코드대로 실행했을 때의 결과물)
"""
print(ask_ai_doctor('What are symptoms of Cirrhosis?', model))
"""

The following list of signs and symptoms may be associated with
cirrhosis.  Some people with cirrhosis do not have any of these
symptoms.   If you are concerned about how your general health is
affected by cirrhosis, talk to your doctor or nurse practitioner.
Signs and Symptoms of Cirrhosis   ------------------------   Abdominal
swelling (ascites)   Bleeding problems   Blurred vision   Breath odor
Confusion   Constipation   Difficulty concentrating   Dizziness
Fatigue   Fluid retention   Gallstones   Gout   Hair loss   Headache
Itching   Jaundice   Liver cancer   Memory loss   Muscle weakness
Nausea   Neuropathy   Night sweats   Pain in the upper right abdomen
Poor appetite   Skin itching   Sleepiness   Stomach pain   Swollen
legs and feet   Tiredness   Weight gain   Yellow skin and eyes
------------------------   How common are these symptoms?   These
symptoms can occur at different times


In [None]:
### 이하 코드는 단순 참조 코드

In [None]:
from huggingface_hub import notebook_login

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# 이 모델명 및 허깅페이스 경로는 원저자의 것이므로 단순 참조
model.push_to_hub("prem-timsina/alpaca-ai-doctor", use_auth_token=True)

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/prem-timsina/alpaca-ai-doctor/commit/be92fbce9e96a94dc5e8d3cbaed190acc7c03462', commit_message='Upload model', commit_description='', oid='be92fbce9e96a94dc5e8d3cbaed190acc7c03462', pr_url=None, pr_revision=None, pr_num=None)