# 🚀 2강 (CPU-Optimized, Hugging Face): **DeiT-Tiny** Transfer Learning

**GPU 없이도 동작**하지만, CPU에서는 시간이 걸릴 수 있습니다. 그래서 다음과 같이 최적화했습니다.
- 모델: `facebook/deit-tiny-patch16-224` (작고 빠름)
- **Feature Extraction(백본 freeze)** 기본, **Fine-tuning**은 선택
- **Subset 학습 옵션**(기본 20%) + 작은 배치/에폭
- `TrainingArguments(no_cuda=...)`로 CPU 강제 가능


In [None]:
# !pip install -q transformers datasets accelerate evaluate

import os, numpy as np, random, matplotlib.pyplot as plt, torch
from torchvision import datasets as tvdatasets
from transformers import AutoImageProcessor, AutoModelForImageClassification, Trainer, TrainingArguments
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', device)

def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
set_seed(42)

In [None]:
# CIFAR-10 로드 → numpy 변환 → train/val/test 분할
root='./data'
train_full = tvdatasets.CIFAR10(root=root, train=True, download=True)
test_set   = tvdatasets.CIFAR10(root=root, train=False, download=True)
class_names = train_full.classes

def to_numpy_list(tv_dataset):
    imgs, labs = [], []
    for img, lab in tv_dataset:
        imgs.append(np.array(img)); labs.append(lab)
    return imgs, labs

images_train, labels_train = to_numpy_list(train_full)
images_test,  labels_test  = to_numpy_list(test_set)

# Subset 옵션 (CPU용): train의 일부만 사용
SUBSET_FRACTION = 0.2   # 20%만 사용 (필요시 0.1 ~ 0.3로 조정)
idx = np.random.RandomState(42).permutation(len(images_train))
subset_len = int(len(images_train)*SUBSET_FRACTION)
sub_idx = idx[:subset_len]
images_train = [images_train[i] for i in sub_idx]
labels_train = [labels_train[i] for i in sub_idx]

tr_imgs, va_imgs, tr_lbls, va_lbls = train_test_split(
    images_train, labels_train, test_size=0.2, random_state=42, stratify=labels_train
)

ds = DatasetDict({
    "train": Dataset.from_dict({"image": tr_imgs, "label": tr_lbls}),
    "validation": Dataset.from_dict({"image": va_imgs, "label": va_lbls}),
    "test": Dataset.from_dict({"image": images_test, "label": labels_test})
})

In [None]:
# 작은 모델(DeiT-Tiny) + 이미지 프로세서
model_name = "facebook/deit-tiny-patch16-224"
image_processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModelForImageClassification.from_pretrained(
    model_name,
    num_labels=10,
    id2label={i:c for i,c in enumerate(class_names)},
    label2id={c:i for i,c in enumerate(class_names)},
).to(device)

def preprocess_examples(examples):
    inputs = image_processor(images=examples["image"], return_tensors="pt")
    inputs["labels"] = torch.tensor(examples["label"])
    return inputs

ds = ds.with_transform(preprocess_examples)

In [None]:
# Feature Extraction (백본 freeze)
for p in model.base_model.parameters():
    p.requires_grad = False

def collate_fn(batch):
    out = {"pixel_values": torch.stack([b["pixel_values"] for b in batch])}
    out["labels"] = torch.tensor([b["labels"] for b in batch])
    return out

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {"accuracy": accuracy_score(labels, preds), "f1_macro": f1_score(labels, preds, average="macro")}

args = TrainingArguments(
    output_dir="./deit_tiny_fe",
    learning_rate=5e-4,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
    no_cuda=not torch.cuda.is_available()  # CPU 강제 가능
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=ds["train"],
    eval_dataset=ds["validation"],
    tokenizer=image_processor,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
)

trainer.train()
fe_metrics = trainer.evaluate()
fe_metrics

In [None]:
# (선택) Fine-tuning: 상위 블록만 unfreeze (가벼운 추가 학습)
DO_FINETUNE = False  # CPU에서는 False 권장, GPU면 True로 변경

if DO_FINETUNE:
    for name, p in model.base_model.named_parameters():
        if any(k in name for k in ["encoder.layer.10","encoder.layer.11","layer.10","layer.11"]):
            p.requires_grad = True

    args_ft = TrainingArguments(
        output_dir="./deit_tiny_ft",
        learning_rate=1e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=2,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_steps=50,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        report_to="none",
        no_cuda=not torch.cuda.is_available()
    )

    trainer_ft = Trainer(
        model=model,
        args=args_ft,
        train_dataset=ds["train"],
        eval_dataset=ds["validation"],
        tokenizer=image_processor,
        data_collator=collate_fn,
        compute_metrics=compute_metrics,
    )
    trainer_ft.train()
    ft_metrics = trainer_ft.evaluate()
    print(ft_metrics)

In [None]:
# 테스트 평가 + 혼동행렬
from sklearn.metrics import confusion_matrix, classification_report
from torch.utils.data import DataLoader

test_metrics = trainer.evaluate(ds["test"])
print('[Test] Accuracy:', test_metrics['eval_accuracy'])

@torch.no_grad()
def predict_logits(dset):
    loader = DataLoader(dset, batch_size=64, shuffle=False, collate_fn=collate_fn)
    model.eval()
    preds, labels = [], []
    for batch in loader:
        batch = {k: v.to(device) for k,v in batch.items()}
        out = model(**batch)
        preds.append(out.logits.cpu().numpy())
        labels.append(batch["labels"].cpu().numpy())
    return np.concatenate(preds), np.concatenate(labels)

logits, y_true = predict_logits(ds["test"])
y_pred = logits.argmax(1)

cm = confusion_matrix(y_true,y_pred,labels=list(range(10)))
plt.figure(figsize=(6,5))
plt.imshow(cm, interpolation='nearest'); plt.title('Confusion Matrix (DeiT-Tiny)'); plt.colorbar()
plt.xticks(range(10), class_names, rotation=45); plt.yticks(range(10), class_names)
plt.tight_layout(); plt.xlabel('Pred'); plt.ylabel('True'); plt.show()

print('\n[Classification Report]\n', classification_report(y_true,y_pred,target_names=class_names))

In [None]:
# 저장
trainer.save_model('deit_tiny_fe_cpu_opt')
image_processor.save_pretrained('deit_tiny_fe_cpu_opt')
print('saved: deit_tiny_fe_cpu_opt/')