## 학습 및 추론 환경
- GPU: H100 80GB 3way

- OS: Ubuntu 22.04
- CUDA Version: CUDA 12.1
- Python Version: 3.10.12
- Library Version
    - Pytorch: 2.2.0
    - trl: 0.9.6
    - peft: 0.12.0
    - accelerate: 0.33.0
    - bitsandbytes: 0.43.3
    - pandas: 2.2.2
    - datasets: 2.21.0
    - kiwipiepy: 0.18.0
    - pymupdf: 1.24.9
    - langchain_community: 0.2.14
    - langchain: 0.2.15
    - faiss: 1.7.2
    - transformers: 4.44.2
    - sentence-transformers: 3.0.1


In [1]:
!pip install trl peft accelerate bitsandbytes torch pandas transformers pandas

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[cu121-torch220]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-lz1wpt70/unsloth_659027197d22457f8ce52f4cfe0ccd4a
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-lz1wpt70/unsloth_659027197d22457f8ce52f4cfe0ccd4a
  Resolved https://github.com/unslothai/unsloth.git to commit 4e570be9ae4ced8cdc64e498125708e34942befc
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting bitsandbytes (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[cu121-torch220]@ git+https://github.com/unslothai/unsloth.git)
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting xformers@ https://download.pytorch.org/whl/cu1

In [1]:
from huggingface_hub import login
login(token="<Your Token>")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [1]:
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
import torch
from datasets import Dataset
from trl import SFTTrainer
from transformers import TrainingArguments, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2"

### 폴더구조
- workspace
    - test_source [폴더]
    - train_source [폴더]
    - llama3.1_qlora [폴더]
    - train.csv
    - test.csv
    - sample_submission.csv
    - Model Finetuning.ipynb
    - RAG Pipeline.ipynb

In [2]:
base_directory = './' # pdf의 상위 폴더 경로
df = pd.read_csv('./train.csv')

In [3]:
MODEL_ID = "meta-llama/Meta-Llama-3.1-70B-Instruct" # 사용할 오픈소스 hugging face 주소
bnb_config = BitsAndBytesConfig( # 4bit로 모델 양자화
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained( # 모델 load
    MODEL_ID,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]

In [4]:
def formatting_prompts_func(examples):
    prompts = []
    for question, answer in zip(examples["Question"], examples["Answer"]):
        prompt = f"""
        <|start_header_id|>user<|end_header_id|>
        질문: {question}
        <|eot_id|>
        <|start_header_id|>model<|end_header_id|>
        답변: {answer}
        <|eot_id|><|end_of_text|>
        """
        prompts.append(prompt)
    return {"text": prompts}

In [5]:
dataset = Dataset.from_pandas(df)
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
    remove_columns=dataset.column_names,
    desc="Formatting prompts"
)

Formatting prompts:   0%|          | 0/496 [00:00<?, ? examples/s]

In [6]:
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"]
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [7]:
training_args = TrainingArguments(
    per_device_train_batch_size=8, # 배치 사이즈
    gradient_accumulation_steps=1,
    warmup_ratio=0.1,
    num_train_epochs=3, # 3 에포크
    learning_rate=2e-4, # 학습률을 2e-4로 설정
    fp16=True, 
    logging_steps=10,
    optim="adamw_8bit", # 옵티마이저를 adamw_8bit로 설정
    weight_decay=0.01, # 가중치 감소를 0.01로 설정
    lr_scheduler_type="cosine",
    seed=601, # 시드 값 고정
    output_dir="outputs",
)
    
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text", # dataset 안의 text 열
    max_seq_length=4096,
    dataset_num_proc=2,
    packing=False,
    args=training_args
)
    
trainer.train()

Map (num_proc=2):   0%|          | 0/496 [00:00<?, ? examples/s]

Step,Training Loss
10,3.3543
20,1.7935
30,1.4351
40,1.2482
50,1.2162
60,1.2353
70,0.859
80,0.8563
90,0.8847
100,0.8389


TrainOutput(global_step=186, training_loss=0.9674547385143978, metrics={'train_runtime': 646.8799, 'train_samples_per_second': 2.3, 'train_steps_per_second': 0.288, 'total_flos': 1.0092914401266893e+17, 'train_loss': 0.9674547385143978, 'epoch': 3.0})

In [8]:
peft_model = trainer.model
peft_model.save_pretrained("./llama3.1_qlora/") # 학습이 완료된 Lora Weight 저장