In [1]:
import os
from dotenv import load_dotenv
import huggingface_hub

load_dotenv()

hf_token = os.getenv('HF_TOKEN')

huggingface_hub.login(hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


## 1) 모델 설정 + 양자화

In [None]:
import os
from datasets import load_dataset
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

# Load environment variable
load_dotenv()
hf_token = os.getenv("HF_TOKEN")

# Model name
model_id = "meta-llama/Llama-3.2-1B-Instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  # necessary for training

# Load model with 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=hf_token,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)



## 2) 데이터 형식 변경

In [None]:
# 데이터셋 불러오기 (예: Alpaca 형식)
dataset = load_dataset("json", data_files="./data/train.json")["train"]

## 3) LoRA 설정

In [None]:
# LoRA 설정
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # 실제 모델에 맞게 수정 필요
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# LoRA 적용
model = get_peft_model(model, lora_config)

## 4) SFT TrainingArguments 설정

In [None]:
# TrainingArguments
training_args = TrainingArguments(
    output_dir="./llama3-lora-sft",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=1,
    bf16=True,
    optim="adamw_torch_fused",
    report_to="none"
)

## 5) SFT

In [None]:
# SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    dataset_text_field="text",  # 또는 prompt-response 구조에 맞는 필드명
)

# 학습 시작
trainer.train()

# 저장
trainer.model.save_pretrained("./llama3-lora-sft/checkpoint")
tokenizer.save_pretrained("./llama3-lora-sft/checkpoint")