In [None]:
!python -m pip install --upgrade pip

In [1]:
!pip install typing_extensions pydantic openai

Collecting typing_extensions
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Installing collected packages: typing_extensions
  Attempting uninstall: typing_extensions
    Found existing installation: typing_extensions 4.7.1
    Uninstalling typing_extensions-4.7.1:
      Successfully uninstalled typing_extensions-4.7.1
Successfully installed typing_extensions-4.12.2


In [2]:
!pip install datasets transformers peft trl bitsandbytes

Collecting trl
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.15.2-py3-none-any.whl (318 kB)
Installing collected packages: trl
Successfully installed trl-0.15.2


In [None]:
import os
import torch
import json
import time
import random
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForSeq2Seq, BitsAndBytesConfig, GenerationConfig, AutoModelForSequenceClassification
from datasets import Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import ORPOTrainer, ORPOConfig, AutoModelForCausalLMWithValueHead
from trl.trainer.utils import DPODataCollatorWithPadding
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
os.environ["WANDB_DISABLED"] = "true"          # wandb 비활성화
os.environ["TOKENIZERS_PARALLELISM"] = "false" # 병렬 토크나이저 경고 방지

device = "cuda" if torch.cuda.is_available() else "cpu" # GPU 설정 변수

---

In [4]:
# 데이터 로드 및 Dataset 변환
dataset_path = "./korean_poetry_dataset.json"

with open(dataset_path, "r", encoding="utf-8") as f:
    poem_data = json.load(f)

processed_data = [{"topic": item["text"]["topic"], "poem":item["text"]["poem"]} 
                  for item in poem_data]

train_dataset = Dataset.from_list(processed_data)

In [5]:
# Tokenizer 로드
model_name = "Bllossom/llama-3.2-Korean-Bllossom-3B"

tokenizer = AutoTokenizer.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [6]:
# 데이터 전처리 함수 (토큰화 + labels 추가)
def preprocess_text(sample):
    input_texts = [f"주제: {t}\n시: {p}" for t, p in zip(sample["topic"], sample["poem"])]
    model_inputs = tokenizer(
                        input_texts, 
                        padding="max_length", 
                        max_length=512, 
                        truncation=True
                    )
    
    model_inputs['labels'] = model_inputs["input_ids"].copy()
    pad_token_id = tokenizer.pad_token_id
    model_inputs['labels'] = [
        [(l if l != pad_token_id else -100) for l in label] 
        for label in model_inputs['labels']
    ]
    
    return model_inputs

In [None]:
# 데이터셋 변환
train_dataset = train_dataset.map(
    preprocess_text, 
    batched=True, 
    remove_columns=["topic", "poem"]
)

In [None]:
# 데이터 콜레이터
data_collator = DataCollatorForSeq2Seq(tokenizer, model=None)

In [None]:
# VRAM 최적화를 위한 4-bit 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [None]:
# 모델 로드
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

In [None]:
# LoRA 설정
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
# 양자화 모델 훈련을 위한 준비
model = prepare_model_for_kbit_training(model)

In [None]:
# LoRA 적용
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

model.train()    # 모델 학습 모드 설정

In [None]:
# Trainer 설정
training_args = TrainingArguments(
    output_dir="./q_lora_poem",
    evaluation_strategy="no",
    save_strategy="epoch",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_dir='./logs',
    logging_steps=100,
    save_total_limit=2,
    optim="adamw_bnb_8bit",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

In [None]:
trainer.train()