# Multi-SKU Analysis Notebook

Day 3: SKU001/002/003 다중 SKU 파이프라인 분석과 베이스라인 생성 데모를 진행합니다. 셀을 위에서 아래로 순서대로 실행하세요.

In [None]:
# 1. 환경 설정 (SkuConfigManager 임포트)
from pathlib import Path
import sys
import time
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use("seaborn-v0_8")

# 노트북 위치에서 프로젝트 루트 계산
PROJECT_ROOT = Path.cwd().resolve()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent
sys.path.insert(0, str(PROJECT_ROOT))

from src.pipeline import InspectionPipeline
from src.sku_manager import SkuConfigManager
from src.utils.file_io import read_json

DATA_DIR = PROJECT_ROOT / "data" / "raw_images"
SKU_DB = PROJECT_ROOT / "config" / "sku_db"
SKU_CODES = ["SKU001", "SKU002", "SKU003"]

manager = SkuConfigManager(SKU_DB)
sku_configs = {code: manager.get_sku(code) for code in SKU_CODES}

def bgr_to_rgb(img: np.ndarray) -> np.ndarray:
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if img is not None else img

print(f"PROJECT_ROOT: {PROJECT_ROOT}")
print(f"Loaded SKUs: {list(sku_configs.keys())}")

## 2. SKU 목록 조회

In [None]:
skus_df = pd.DataFrame(manager.list_all_skus())
display(skus_df)

## 3. SKU 비교 시각화 (SKU001 vs SKU002 vs SKU003)

In [None]:
zone_rows = []
for sku_code, cfg in sku_configs.items():
    for zone_name, zone_cfg in cfg.get("zones", {}).items():
        zone_rows.append({
            "sku": sku_code,
            "zone": zone_name,
            "L": zone_cfg.get("L", np.nan),
            "a": zone_cfg.get("a", np.nan),
            "b": zone_cfg.get("b", np.nan),
            "threshold": zone_cfg.get("threshold", np.nan)
        })

sku_zone_df = pd.DataFrame(zone_rows)
display(sku_zone_df)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# LAB 비교
ax = axes[0]
for metric in ["L", "a", "b"]:
    ax.plot(sku_zone_df["sku"], sku_zone_df[metric], marker="o", label=metric)
ax.set_title("Zone LAB 비교")
ax.set_ylabel("Value")
ax.grid(True, alpha=0.3)
ax.legend()

# Threshold 비교
ax = axes[1]
ax.bar(sku_zone_df["sku"], sku_zone_df["threshold"], color="orange")
ax.set_title("Threshold 비교")
ax.set_ylabel("ΔE Threshold")
ax.grid(True, axis="y", alpha=0.3)

plt.tight_layout()
plt.show()

## 4. 베이스라인 생성 데모 (임시 SKU_DEMO)

In [None]:
demo_db = PROJECT_ROOT / "data" / "exports" / "sku_demo_db"
demo_manager = SkuConfigManager(demo_db)
demo_sku_code = "SKU_DEMO"

# 이전 실행 결과가 있으면 정리
if (demo_db / f"{demo_sku_code}.json").exists():
    demo_manager.delete_sku(demo_sku_code)

demo_samples = sorted(DATA_DIR.glob("SKU002_OK_*.jpg"))[:5]
print(f"사용 샘플 수: {len(demo_samples)}")

demo_sku = demo_manager.generate_baseline(
    sku_code=demo_sku_code,
    ok_images=demo_samples,
    description="Notebook demo baseline",
    threshold_method="mean_plus_2std"
)

display(pd.json_normalize(demo_sku, sep="."))
print(f"Saved to: {demo_db / f'{demo_sku_code}.json'}")

## 5. 다중 SKU 배치 처리

In [None]:
def collect_paths_by_sku(data_dir: Path, sku_codes: list) -> dict:
    mapping = {}
    for code in sku_codes:
        mapping[code] = sorted(data_dir.glob(f"{code}_*.jpg"))
    return mapping

paths_by_sku = collect_paths_by_sku(DATA_DIR, SKU_CODES)

batch_records = []
for sku_code, paths in paths_by_sku.items():
    pipeline = InspectionPipeline(sku_configs[sku_code])
    for p in paths:
        start = time.time()
        result = pipeline.process(str(p), sku_code)
        elapsed_ms = (time.time() - start) * 1000
        batch_records.append({
            "image": p.name,
            "sku": sku_code,
            "judgment": result.judgment,
            "overall_delta_e": result.overall_delta_e,
            "confidence": result.confidence,
            "elapsed_ms": elapsed_ms
        })

batch_df = pd.DataFrame(batch_records)
display(batch_df.head())

## 6. SKU별 통계

In [None]:
summary_by_sku = batch_df.groupby("sku").agg(
    samples=("image", "count"),
    ok_ratio=("judgment", lambda s: (s == "OK").mean()),
    mean_delta_e=("overall_delta_e", "mean"),
    mean_time_ms=("elapsed_ms", "mean")
).reset_index()

judgment_table = batch_df.pivot_table(index="sku", columns="judgment", values="image", aggfunc="count", fill_value=0)

display(summary_by_sku)
display(judgment_table)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].bar(summary_by_sku["sku"], summary_by_sku["mean_delta_e"], color="steelblue")
axes[0].set_title("SKU별 평균 ΔE")
axes[0].set_ylabel("ΔE")
axes[0].grid(True, axis="y", alpha=0.3)

axes[1].bar(summary_by_sku["sku"], summary_by_sku["mean_time_ms"], color="seagreen")
axes[1].set_title("SKU별 평균 처리시간 (ms)")
axes[1].set_ylabel("ms")
axes[1].grid(True, axis="y", alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Zone 패턴 분석

In [None]:
# Zone 별 LAB/threshold 비교 시각화
if not sku_zone_df.empty:
    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    width = 0.2
    x = np.arange(len(sku_zone_df))

    ax.bar(x - width, sku_zone_df["L"], width, label="L*")
    ax.bar(x, sku_zone_df["a"], width, label="a*")
    ax.bar(x + width, sku_zone_df["b"], width, label="b*")

    ax.set_xticks(x)
    ax.set_xticklabels(sku_zone_df["sku"] + "-" + sku_zone_df["zone"])
    ax.set_ylabel("LAB")
    ax.set_title("Zone LAB 패턴")
    ax.legend()
    ax.grid(True, axis="y", alpha=0.3)
    plt.xticks(rotation=20)
    plt.show()

    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    ax.bar(sku_zone_df["sku"] + "-" + sku_zone_df["zone"], sku_zone_df["threshold"], color="orange")
    ax.set_ylabel("ΔE Threshold")
    ax.set_title("Zone Threshold 패턴")
    ax.grid(True, axis="y", alpha=0.3)
    plt.xticks(rotation=20)
    plt.show()
else:
    print("Zone 데이터가 없습니다.")

## 8. 대시보드 (요약 지표)

In [None]:
dashboard = summary_by_sku.copy()
dashboard["ok_ratio"] = (dashboard["ok_ratio"] * 100).round(1)
dashboard["mean_delta_e"] = dashboard["mean_delta_e"].round(2)
dashboard["mean_time_ms"] = dashboard["mean_time_ms"].round(2)
display(dashboard)

print("\nJudgment 분포:")
print(judgment_table)

print("\n총 처리 이미지 수:", len(batch_df))
print("평균 ΔE:", batch_df["overall_delta_e"].mean())
print("평균 처리시간 (ms):", batch_df["elapsed_ms"].mean())