<a href="https://colab.research.google.com/github/rickiepark/llm-from-scratch/blob/main/ch06/01_main-chapter-code/load-finetuned-model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<table style="width:100%">
<tr>
<td style="vertical-align:middle; text-align:left;">
<font size="2">
세바스찬 라시카(Sebastian Raschka)가 쓴 <a href="http://mng.bz/orYv">Build a Large Language Model From Scratch</a>의 번역서 예제 코드입니다.<br>
<br>코드 저장소: <a href="https://github.com/rickiepark/llm-from-scratch">https://github.com/rickiepark/llm-from-scratch</a>
</font>
</td>
<td style="vertical-align:middle; text-align:left;">
<a href="http://mng.bz/orYv"><img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/cover-small.webp" width="100px"></a>
</td>
</tr>
</table>


# 미세 튜닝된 모델 로딩 및 사용


이 노트북에는 6장의 [ch06.ipynb](ch06.ipynb)를 통해 생성 및 저장된 미세 튜닝된 모델을 로드하는 최소한의 코드가 포함되어 있습니다.


In [1]:
from importlib.metadata import version

pkgs = [
    "tiktoken",    # 토크나이저
    "torch",       # 딥러닝 라이브러리
]
for p in pkgs:
    print(f"{p} 버전: {version(p)}")

tiktoken 버전: 0.9.0
torch 버전: 2.6.0+cu124


In [2]:
from pathlib import Path

finetuned_model_path = Path("review_classifier.pth")
if not finetuned_model_path.exists():
    print(
        f"'{finetuned_model_path}'을(를) 찾을 수 없습니다.\n"
        "`ch06.ipynb` 노트북을 실행하여 미세 튜닝한 모델을 저장하세요."
    )

In [3]:
!wget https://bit.ly/4esl8dj -O previous_chapters.py

--2025-06-19 04:14:13--  https://bit.ly/4esl8dj
Resolving bit.ly (bit.ly)... 67.199.248.10, 67.199.248.11
Connecting to bit.ly (bit.ly)|67.199.248.10|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://raw.githubusercontent.com/rickiepark/llm-from-scratch/refs/heads/main/ch06/01_main-chapter-code/previous_chapters.py [following]
--2025-06-19 04:14:13--  https://raw.githubusercontent.com/rickiepark/llm-from-scratch/refs/heads/main/ch06/01_main-chapter-code/previous_chapters.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12067 (12K) [text/plain]
Saving to: ‘previous_chapters.py’


2025-06-19 04:14:13 (44.0 MB/s) - ‘previous_chapters.py’ saved [12067/12067]



In [4]:
from previous_chapters import GPTModel


BASE_CONFIG = {
    "vocab_size": 50257,     # 어휘사전 크기
    "context_length": 1024,  # 문맥 길이
    "drop_rate": 0.0,        # 드롭아웃 비율
    "qkv_bias": True         # 쿼리-키-값 편향
}

model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}

CHOOSE_MODEL = "gpt2-small (124M)"

BASE_CONFIG.update(model_configs[CHOOSE_MODEL])

# 기본 모델 초기화
model = GPTModel(BASE_CONFIG)

In [5]:
import torch

# 모델을 6.5절과 같이 분류기로 변환합니다.
num_classes = 2
model.out_head = torch.nn.Linear(in_features=BASE_CONFIG["emb_dim"], out_features=num_classes)

# 그런 다음 사전 훈련된 가중치를 로드합니다.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("review_classifier.pth", map_location=device, weights_only=True))
model.to(device)
model.eval();

In [6]:
import tiktoken

tokenizer = tiktoken.get_encoding("gpt2")

In [7]:
# 이 함수는 ch06.ipynb에서 구현되었습니다.
def classify_review(text, model, tokenizer, device, max_length=None, pad_token_id=50256):
    model.eval()

    # 모델에 대한 입력 준비
    input_ids = tokenizer.encode(text)
    supported_context_length = model.pos_emb.weight.shape[0]

    # 너무 긴 시퀀스 자르기
    input_ids = input_ids[:min(max_length, supported_context_length)]

    # 가장 긴 시퀀스에 맞게 패딩하기
    input_ids += [pad_token_id] * (max_length - len(input_ids))
    input_tensor = torch.tensor(input_ids, device=device).unsqueeze(0) # 배치 차원 추가

    # 모델 추론
    with torch.no_grad():
        logits = model(input_tensor.to(device))[:, -1, :]  # 마지막 출력 토큰의 로짓
    predicted_label = torch.argmax(logits, dim=-1).item()

    # 분류 결과 반환
    return "spam" if predicted_label == 1 else "not spam"

In [8]:
text_1 = (
    "You are a winner you have been specially"
    " selected to receive $1000 cash or a $2000 award."
)

print(classify_review(
    text_1, model, tokenizer, device, max_length=120
))

spam


In [9]:
text_2 = (
    "Hey, just wanted to check if we're still on"
    " for dinner tonight? Let me know!"
)

print(classify_review(
    text_2, model, tokenizer, device, max_length=120
))

not spam
