In [1]:
!pip install transformers peft datasets



In [2]:
import os
import torch

os.environ["NVIDIA_VISIBLE_DEVICES"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

print(torch.__version__)
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.is_available())

2.8.0+cu126
12.6
91002
True


In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# 튜닝할 기본 모델 로드
model_name = "kakaocorp/kanana-nano-2.1b-base" # 또는 "meta-llama/Llama-3.1-8B"
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
).to("cuda")

# 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
# 토크나이저 설정: llama 타입 모델은 아래와 같이 pad_token 지정 필요
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/692 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/4.17G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

In [4]:
!wget https://objectstorage.kr-central-2.kakaocloud.com/v1/c11fcba415bd4314b595db954e4d4422/public/tutorial/kubeflow/kubeflow-tensorboard/data/sample_train_data.csv

--2025-11-17 21:03:24--  https://objectstorage.kr-central-2.kakaocloud.com/v1/c11fcba415bd4314b595db954e4d4422/public/tutorial/kubeflow/kubeflow-tensorboard/data/sample_train_data.csv
Resolving objectstorage.kr-central-2.kakaocloud.com (objectstorage.kr-central-2.kakaocloud.com)... 61.109.235.79, 61.109.235.224
Connecting to objectstorage.kr-central-2.kakaocloud.com (objectstorage.kr-central-2.kakaocloud.com)|61.109.235.79|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 34703 (34K) [text/csv]
Saving to: ‘sample_train_data.csv’


2025-11-17 21:03:24 (262 MB/s) - ‘sample_train_data.csv’ saved [34703/34703]



In [5]:
from datasets import Dataset

dataset = Dataset.from_csv('sample_train_data.csv')
dataset

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['Unnamed: 0', 'instruction', 'output', 'input'],
    num_rows: 111
})

In [7]:
def formatting_prompts_func(examples):
    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]

    EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}


dataset = dataset.map(formatting_prompts_func, batched=True)
dataset = dataset.remove_columns(['Unnamed: 0'])
dataset

Map:   0%|          | 0/111 [00:00<?, ? examples/s]

Dataset({
    features: ['instruction', 'output', 'input', 'text'],
    num_rows: 111
})

In [8]:
def tokenize_function(examples):
    tokens = tokenizer(examples["text"], padding=True, return_tensors="pt")
    tokens["labels"] = tokens["input_ids"]
    return tokens

dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

Map:   0%|          | 0/111 [00:00<?, ? examples/s]

In [9]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj"]
)
model = get_peft_model(base_model, lora_config)

In [10]:
from transformers import Trainer, TrainingArguments

trainer = Trainer(
    model=model,
    train_dataset=dataset,
    args=TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        bf16 = True,
        logging_steps = 1,
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 1234,
        output_dir = "outputs",
        report_to = "none"
    )
)

In [11]:
# 현재 메모리 상태를 보여주는 코드
gpu_stats = torch.cuda.get_device_properties(0)  # GPU 속성 가져오기
start_gpu_memory = round(
    torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
)  # 시작 시 예약된 GPU 메모리 계산
max_memory = round(
    gpu_stats.total_memory / 1024 / 1024 / 1024, 3
)  # GPU의 최대 메모리 계산
print(
    f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB."
)  # GPU 이름과 최대 메모리 출력
print(f"{start_gpu_memory} GB of memory reserved.")  # 예약된 메모리 양 출력

GPU = Tesla T4. Max memory = 14.741 GB.
8.121 GB of memory reserved.


In [12]:
# 모델 훈련
trainer_stats = trainer.train()

Step,Training Loss
1,8.4921
2,7.0885
3,8.4488
4,8.1746
5,5.8274
6,7.2151
7,7.4518
8,7.5778
9,7.3272
10,6.8858
