In [100]:
# -*- coding: utf-8 -*-
from __future__ import annotations
import pandas as pd
import numpy as np
import math, re, ast, operator
from typing import Any, Dict, List, Optional, Tuple
import ast
from pathlib import Path

In [101]:
RULE_CSV = Path("rulebased.csv")

ITEM_CSV = Path("제품 메타데이터 최종 - 제품 메타데이터 최종.csv")

rules = pd.read_csv(RULE_CSV)
items = pd.read_csv(ITEM_CSV)

In [102]:
# =========================================================
# 0) 규칙/피처 표준화 매핑 & 룰 엔진 (이전 답변 버전 포함)
# =========================================================

RULE_VAR_TO_USER_KEY = {
    "age": "age",
    "user_gender": "gender",
    "weight_kg": "weight",
    "lbm_kg": "lean_mass",
    "diet_type": "diet_phase",
    "training_experience": "training_experience",
    "training_intensity": "training_intensity",
    "workout_duration": "training_duration",
    "workout_time": "training_time",
    "workout_environment": "environment_heat_humid",
    "exercise_type": "exercise_type",
    "user_goal": "user_goal",
    "health_conditions": "health_conditions",  # CSV와 일치 (disease_flag도 폴백 지원)
    "allergy": "allergy",
    "is_dehydrated": "is_dehydrated",
    "current_stack": "current_stack",
    "intake_period": "intake_period",
    "cvd_risk": "cvd_risk",
    "oral_hygiene": "oral_hygiene",
    "workout_type": "workout_type",
}

ORDINAL_MAPS = {
    "training_experience": {
        "초보": 0,
        "중급": 1,
        "숙련": 2,
        "숙련자": 2,
        "beginner": 0,
        "intermediate": 1,
        "advanced": 2,
    },
    "training_duration": {
        "short": 0,
        "medium": 1,
        "long": 2,
        "<60": 0,
        "60-90": 1,
        "90+": 2,
    },
    "training_time": {"오전": 0, "오후": 1, "저녁": 2, "밤": 2},
    "diet_phase": {
        "체지방 감소": 0,
        "유지": 1,
        "벌크업": 2,
        "cutting": 0,
        "maintenance": 1,
        "bulking": 2,
    },
    "environment_heat_humid": {"실내": 0, "고온": 1, "고온다습": 2},
    "user_gender": {"남성": 0, "여성": 1, "male": 0, "female": 1},
    "cvd_risk": {"아니오": 0, "예": 1, False: 0, True: 1},
}

# 기존
# OPS = {"==": op.eq, "!=": op.ne, ">": op.gt, ">=": op.ge, "<": op.lt, "<=": op.le}

# 변경: 긴 연산자를 먼저 탐지
OPS_ORDERED = [
    (">=", operator.ge),
    ("<=", operator.le),
    ("==", operator.eq),
    ("!=", operator.ne),
    (">", operator.gt),
    ("<", operator.lt),
]


def _norm_text(x: Any) -> Optional[str]:
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return None
    s = str(x).strip()
    return s if s else None


def _as_number(x: Any) -> Optional[float]:
    try:
        return float(x)
    except:
        return None


def _in_list_or_set(value: str, container: Any) -> bool:
    """
    컨테이너(list/set/tuple/str)에서 value를 찾습니다.
    부분 문자열 매칭도 지원합니다 (예: "회복"이 "회복 지원"에서 매칭됨).
    """
    if container is None:
        return False
    
    norm_value = _norm_text(value)
    if not norm_value:
        return False
    
    if isinstance(container, (list, set, tuple)):
        # 정확히 일치하는 경우
        for v in container:
            norm_v = _norm_text(v)
            if norm_v and norm_value == norm_v:
                return True
        # 부분 문자열 매칭 (예: "회복"이 "회복 지원"에 포함)
        for v in container:
            norm_v = _norm_text(v)
            if norm_v and (norm_value in norm_v or norm_v in norm_value):
                return True
        return False
    
    s = _norm_text(container)
    if s and ("," in s or "/" in s or "|" in s):
        tokens = re.split(r"[,\|/]+", s)
        for t in tokens:
            norm_t = _norm_text(t)
            if norm_t and (norm_value == norm_t or norm_value in norm_t or norm_t in norm_value):
                return True
        return False
    
    # 단일 문자열 비교
    return norm_value == s or norm_value in s or s in norm_value


def _duration_to_minutes(x):
    if x is None:
        return None
    s = str(x).strip()
    m = re.match(r"^(\d+)\s*-\s*(\d+)$", s)  # "60-90" -> 75
    if m:
        a, b = float(m.group(1)), float(m.group(2))
        return (a + b) / 2.0
    m = re.match(r"^<\s*(\d+)$", s)  # "<60" -> 59
    if m:
        return float(m.group(1)) - 1
    m = re.match(r"^(>=\s*)?(\d+)\+?$", s)  # "90+" or ">=60" -> 90 or 60
    if m:
        return float(m.group(2))
    try:
        return float(s)
    except:
        return None


def _coerce_for_compare(var_key: str, user_value, rhs_hint: str | None = None):
    # workout/training duration을 수치로 바꿔 비교
    if var_key in ("training_duration", "workout_duration"):
        v = _duration_to_minutes(user_value)
        if v is not None:
            return v
    # 일반 수치
    try:
        return float(user_value)
    except:
        pass
    # 범주→숫자 맵핑
    m = ORDINAL_MAPS.get(var_key)
    if m:
        return m.get(str(user_value).strip(), None)
    # 문자열 그대로가 의미 있을 수 있으나 비교(>,<)에선 None 처리
    return str(user_value).strip() if user_value is not None else None


def _eval_condition(user: Dict[str, Any], variable: str, condition: str) -> bool:
    var_key = RULE_VAR_TO_USER_KEY.get(variable, variable)
    uval = user.get(var_key, None)
    cond = condition.strip() if condition is not None else ""

    # TRUE/FALSE
    if cond.upper() == "TRUE":
        return bool(uval) is True or uval == "예"
    if cond.upper() == "FALSE":
        return bool(uval) is False or uval == "아니오"

    # CONTAINS "a/b/c" (슬래시/쉼표/파이프 모두 허용)
    m = re.match(r'CONTAINS\s+"(.+)"', cond, flags=re.IGNORECASE)
    if m:
        token_blob = m.group(1).strip()
        tokens = re.split(r"[\/\|,]+", token_blob)
        return any(_in_list_or_set(tok, uval) for tok in tokens)

    # 값만 있는 조건 → == 로 해석, "저녁/밤" 같은 멀티값 허용
    if (
        cond
        and all(sym not in cond for sym, _ in OPS_ORDERED)
        and not cond.upper().startswith("CONTAINS")
    ):
        rhs_blob = cond.strip("\"' ")
        rhs_opts = re.split(r"[\/\|,]+", rhs_blob)
        
        # set/list/tuple인 경우 _in_list_or_set 사용 (부분 매칭 지원)
        if isinstance(uval, (set, list, tuple)):
            return any(_in_list_or_set(opt, uval) for opt in rhs_opts)
        
        # 일반 값인 경우 정확 매칭
        left = _norm_text(uval) or ""
        return any(left == _norm_text(opt) for opt in rhs_opts)

    # 이항 비교 (>=,<= 먼저)
    for sym, fn in OPS_ORDERED:
        if sym in cond:
            rhs_raw = cond.split(sym, 1)[1].strip().strip("\"' ")
            
            # ==, != 연산에서 set/list/tuple 처리
            if fn in (operator.eq, operator.ne) and isinstance(uval, (set, list, tuple)):
                # set/list에 대한 == 는 "포함하는지", != 는 "포함하지 않는지"
                contains = _in_list_or_set(rhs_raw, uval)
                if fn == operator.eq:
                    return contains
                else:  # operator.ne
                    return not contains
            
            lhs = _coerce_for_compare(var_key, uval, rhs_hint=cond)
            # 🔒 유저 값이 없으면(숫자 비교 불가) → 조건 불충족(False) 처리
            if lhs is None:
                return False
            
            # RHS 숫자/지속시간 파싱
            rhs = _as_number(rhs_raw)
            if rhs is None:
                rhs = ORDINAL_MAPS.get(var_key, {}).get(rhs_raw, None)
                if rhs is None:
                    rhs = _duration_to_minutes(rhs_raw)  # "60-90" 등
            # 여전히 None이면 문자열 비교로 폴백 (>,<에는 부적합 → False)
            if rhs is None:
                # 순서 비교 연산(>,<,>=,<=)인데 rhs가 None이면 False
                if fn in (operator.gt, operator.ge, operator.lt, operator.le):
                    return False
                # ==, != 의 경우만 문자열 비교
                return fn(str(lhs), str(rhs_raw))
            try:
                return fn(float(lhs), float(rhs))
            except Exception:
                # 숫자 캐스팅 실패 시 문자열로라도 비교
                return fn(str(lhs), str(rhs))
    return False


def _safe_eval_expression(expr: str, var_scope: Dict[str, Any]) -> Optional[float]:
    """
    수식을 안전하게 평가합니다.
    변수가 None인 경우 None을 반환합니다.
    """
    try:
        # 변수 중 하나라도 None이면 None 반환
        for var_name in ['age', 'weight_kg', 'lbm_kg']:
            if var_name in expr and var_scope.get(var_name) is None:
                return None
        result = eval(expr, {"__builtins__": {}}, var_scope)
        return float(result) if result is not None else None
    except:
        return None


def apply_rules_to_user(
    rules_df: pd.DataFrame, user: Dict[str, Any]
) -> Dict[str, Dict[str, Any]]:
    result: Dict[str, Dict[str, Any]] = {}
    var_scope = {
        "age": user.get("age"),
        "weight_kg": user.get("weight"),
        "lbm_kg": user.get("lean_mass"),
    }
    rules_sorted = rules_df.sort_values(by=["priority", "rule_type"]).reset_index(
        drop=True
    )

    for _, row in rules_sorted.iterrows():
        ingredient = _norm_text(row.get("ingredient"))
        rule_type = _norm_text(row.get("rule_type"))
        priority = int(row.get("priority", 9))
        variable = _norm_text(row.get("variable"))
        condition = _norm_text(row.get("condition"))
        operation = _norm_text(row.get("operation"))
        value = _norm_text(row.get("value"))
        unit = _norm_text(row.get("unit"))

        if not ingredient or not rule_type:
            continue
        if ingredient not in result:
            result[ingredient] = {
                "dose_value": None,
                "dose_unit": unit,
                "dose_range": None,
                "prohibit": False,
                "alternatives": [],
                "warnings": [],
                "suggestions": [],
                "synergy": [],
            }

        cond_ok = True
        if variable and condition:
            cond_ok = _eval_condition(user, variable, condition)
        elif condition in ("TRUE", "FALSE"):
            cond_ok = _eval_condition(user, "", condition)
        if not cond_ok:
            continue

        entry = result[ingredient]

        def parse_range(s: str) -> Optional[Tuple[float, float]]:
            m = re.match(r"^\s*(\d+(\.\d+)?)\s*-\s*(\d+(\.\d+)?)\s*$", s)
            if not m:
                return None
            return (float(m.group(1)), float(m.group(3)))

        def current_numeric():
            if entry["dose_value"] is not None:
                return float(entry["dose_value"])
            if entry["dose_range"] is not None:
                lo, hi, _u = entry["dose_range"]
                return (lo + hi) / 2.0
            return None

        if rule_type == "base_dose":
            if operation == "set" and value:
                entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                    float(value),
                    unit,
                    None,
                )
            elif operation == "set_range" and value:
                rng = parse_range(value)
                if rng:
                    entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                        None,
                        unit,
                        (rng[0], rng[1], unit),
                    )
            elif operation == "set_min" and value:
                cur, v = current_numeric(), _as_number(value)
                if v is not None and (cur is None or cur < v):
                    entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                        float(v),
                        unit,
                        None,
                    )
            elif operation == "expression" and value:
                dose = _safe_eval_expression(value, var_scope)
                # whey_protein 특별 처리: lbm_kg가 None이면 weight_kg 기반으로 대체
                if dose is None and ingredient == "whey_protein" and "lbm_kg" in value:
                    if var_scope.get("weight_kg") is not None:
                        # 체지방 감소 목표: weight * 1.8 (대략적인 추정)
                        dose = var_scope["weight_kg"] * 1.8
                if dose is not None:
                    entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                        float(dose),
                        unit,
                        None,
                    )

        elif rule_type == "adjustment":
            if operation == "set" and value:
                entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                    float(value),
                    unit,
                    None,
                )
            elif operation == "set_range" and value:
                rng = parse_range(value)
                if rng:
                    entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                        None,
                        unit,
                        (rng[0], rng[1], unit),
                    )
            elif operation == "set_min" and value:
                cur, v = current_numeric(), _as_number(value)
                if v is not None and (cur is None or cur < v):
                    entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                        float(v),
                        unit,
                        None,
                    )
            elif operation == "add" and value:
                cur = current_numeric() or 0.0
                entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                    float(cur) + float(value),
                    unit,
                    None,
                )
            elif operation == "multiply":
                mul = _as_number(value) if value is not None else 1.0
                cur = current_numeric()
                if cur is not None:
                    entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                        float(cur) * float(mul),
                        unit or entry["dose_unit"],
                        None,
                    )
            elif operation == "expression" and value:
                dose = _safe_eval_expression(value, var_scope)
                # whey_protein 특별 처리: lbm_kg가 None이면 weight_kg 기반으로 대체
                if dose is None and ingredient == "whey_protein" and "lbm_kg" in value:
                    if var_scope.get("weight_kg") is not None:
                        # 체지방 감소 목표: weight * 1.8 (대략적인 추정)
                        dose = var_scope["weight_kg"] * 1.8
                if dose is not None:
                    entry["dose_value"], entry["dose_unit"], entry["dose_range"] = (
                        float(dose),
                        unit or entry["dose_unit"],
                        None,
                    )
            elif operation == "add_suggestion":
                entry["suggestions"].append(f"{variable or ''}: 추가 제안")
            elif operation == "recommend_alternative" and value:
                entry["alternatives"].append(value)

        elif rule_type == "exception":
            if operation == "prohibit":
                (
                    entry["prohibit"],
                    entry["dose_value"],
                    entry["dose_range"],
                    entry["dose_unit"],
                ) = (True, 0.0, None, unit or entry["dose_unit"])
            elif operation == "add_warning":
                entry["warnings"].append(f"주의: {variable} {condition or ''}")
            elif operation == "recommend_alternative" and value:
                # whey_protein만 금지, casein은 대체제만 제안
                if ingredient == "whey_protein" and "유당 불내증" in str(condition):
                    entry["prohibit"], entry["dose_value"], entry["dose_range"], entry["dose_unit"] = True, 0.0, None, unit or entry["dose_unit"]
                entry["alternatives"].append(value)

        elif rule_type == "interaction":
            if operation == "add_synergy" and value:
                entry["synergy"].append((value, row.get("unit") or ""))
            elif operation == "add_suggestion":
                entry["suggestions"].append(f"시너지 제안: {value}")

        if entry["dose_value"] is not None and not entry["dose_unit"]:
            entry["dose_unit"] = unit

    return result


# =========================================================
# 1) 제품 DF 정규화 (JSON 문자열 파싱, list/str 통일)
# =========================================================


def _to_list(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return []
    if isinstance(x, list):
        return x
    if isinstance(x, (set, tuple)):
        return list(x)
    s = str(x).strip()
    if not s:
        return []
    try:
        obj = ast.literal_eval(s)
        if isinstance(obj, list):
            return obj
        return [obj]
    except:  # 쉼표 구분 텍스트 등
        # 대괄호 없이 'a, b, c'
        parts = re.split(r"[,\|/]+", s)
        return [p.strip() for p in parts if p.strip()]


def _parse_ingredients(value):
    """ingredients 컬럼(리스트 of dict/obj) → 평탄화 + keyword set"""
    arr = _to_list(value)
    names = set()
    details = set()
    for item in arr:
        try:
            if isinstance(item, dict):
                nm = item.get("ingredient")
                if nm:
                    names.add(str(nm).strip())
                det = item.get("details")
                if isinstance(det, dict):
                    for k in det.keys():
                        details.add(str(k).strip())
                elif isinstance(det, list):
                    for d in det:
                        details.add(str(d).strip())
                elif isinstance(det, str):
                    details.add(det.strip())
            else:
                names.add(str(item).strip())
        except:
            continue
    return names, details


def normalize_products_df(df_products: pd.DataFrame) -> pd.DataFrame:
    df = df_products.copy()
    for col in [
        "intake_timing",
        "sensory_tags",
        "functional_tags",
        "feature_tags",
        "allergens",
    ]:
        df[col + "_list"] = df[col].apply(_to_list)
    parsed = df["ingredients"].apply(_parse_ingredients)
    df["ingredient_names"] = parsed.apply(lambda x: sorted(list(x[0])))
    df["ingredient_details"] = parsed.apply(lambda x: sorted(list(x[1])))
    # 검색 편의용 lower set
    df["ingredient_all_tokens"] = df.apply(
        lambda r: set(
            [s.lower() for s in r["ingredient_names"] + r["ingredient_details"]]
        ),
        axis=1,
    )
    df["functional_tokens"] = df["functional_tags_list"].apply(
        lambda xs: set([str(x).lower() for x in xs])
    )
    df["feature_tokens"] = df["feature_tags_list"].apply(
        lambda xs: set([str(x).lower() for x in xs])
    )
    df["allergen_tokens"] = df["allergens_list"].apply(
        lambda xs: set([str(x).lower() for x in xs])
    )
    return df


# =========================================================
# 2) 안전 필터링 (알러지/질환/룰-prohibit)
# =========================================================

# 룰 성분명 ↔ 제품 성분 텍스트 매칭용 키워드(간단 버전)
INGREDIENT_KEYWORDS = {
    "creatine": {"creatine", "creatine monohydrate"},
    "caffeine": {"caffeine", "카페인"},
    "arginine": {"arginine", "l-arginine"},
    "l-theanine": {"theanine", "l-theanine"},
    "glycerol": {"glycerol"},
    "nitrate": {"nitrate", "비트", "beet"},
    "taurine": {"taurine"},
    "whey_protein": {"whey", "wpc", "wpi", "wph"},
    "casein": {"casein", "micellar casein"},
    "bcaa": {"bcaa", "leucine", "isoleucine", "valine"},
    "betaine": {"betaine"},
    "glutamine": {"glutamine"},
    "l-carnitine": {"carnitine"},
}


def product_has_ingredient(product_row, rule_ingredient: str) -> bool:
    tokens = product_row["ingredient_all_tokens"]
    kws = INGREDIENT_KEYWORDS.get(rule_ingredient, {rule_ingredient.lower()})
    return any(kw.lower() in tokens for kw in kws)


def is_product_safe_for_user(
    product_row, user: Dict[str, Any], rule_result: Dict[str, Dict[str, Any]]
) -> Tuple[bool, List[str]]:
    reasons = []

    # 알러지
    user_allergies = set([str(x).lower() for x in _to_list(user.get("allergy", []))])
    if user_allergies:
        if product_row["allergen_tokens"] & user_allergies:
            reasons.append("알러지 교집합")
    # 질환 예시: 유당불내증 → 우유 알러젠
    diseases = set([str(x).lower() for x in _to_list(user.get("health_conditions", []))])
    if (
        "유당 불내증" in user.get("health_conditions", set())
        or "lactose intolerance" in diseases
    ):
        if "우유".lower() in product_row["allergen_tokens"]:
            reasons.append("유당불내증-우유")

    # 룰에서 금지된 성분 포함 여부
    for ing, info in rule_result.items():
        if info.get("prohibit", False) and product_has_ingredient(product_row, ing):
            reasons.append(f"룰 금지 성분 포함: {ing}")

    return (len(reasons) == 0), reasons


# =========================================================
# 3) 점수화 (룰 권고 성분 매칭 + 목표/기능 태그 매칭 + 섭취 타이밍 적합 등)
# =========================================================

GOAL_TO_TOKENS = {
    # 사용자의 user_goal(집합) → 제품 functional tag와 매칭할 키워드
    "퍼포먼스": {"퍼포먼스", "performance", "집중", "focus", "에너지"},
    "근성장": {"근육", "muscle", "성장", "벌크업"},
    "회복": {"회복", "recovery"},
    "다이어트": {"체지방", "다이어트", "cut", "fat loss"},
}


def score_product_for_user(
    product_row, user: Dict[str, Any], rule_result: Dict[str, Dict[str, Any]]
) -> Dict[str, Any]:
    score = 0.0
    reasons = []

    # 1) 룰에서 용량>0 (또는 range)로 권고된 성분이 제품에 있으면 가점
    for ing, info in rule_result.items():
        if info.get("prohibit"):
            continue
        is_recommended = (info.get("dose_value") not in (None, 0.0)) or (
            info.get("dose_range") is not None
        )
        if is_recommended and product_has_ingredient(product_row, ing):
            score += 2.0
            reasons.append(f"권고 성분 포함(+2): {ing}")

    # 2) 목표-기능 태그 매칭
    user_goals = set(_to_list(user.get("user_goal", [])))
    prod_func = set([t.lower() for t in product_row["functional_tokens"]])
    for g in user_goals:
        tokens = GOAL_TO_TOKENS.get(g, set())
        if any(any(tok in ft for ft in prod_func) for tok in tokens):
            score += 1.0
            reasons.append(f"목표 매칭(+1): {g}")

    # 3) 섭취 타이밍 적합(예: '운동 전', '운동 중', '운동 후') – 사용자 운동시간대와 느슨 매칭
    user_time = _norm_text(user.get("training_time"))
    timing_list = [str(x) for x in product_row["intake_timing_list"]]
    if user_time and timing_list:
        # 아주 단순히 '저녁'이면 '운동 전/후' 모두 허용, 더 정교화 가능
        score += 0.5
        reasons.append("타이밍 적합(+0.5)")

    # 4) 당/칼로리 패널티(선택): 다이어트 목표가 있으면 고칼로리/고당 패널티
    if "체지방 감소" in user_goals or "다이어트" in user_goals:
        sugar = product_row.get("sugars", np.nan)
        cals = product_row.get("calories", np.nan)

        # 문자열을 숫자로 변환 시도
        try:
            sugar_num = float(sugar) if pd.notna(sugar) else np.nan
        except (ValueError, TypeError):
            sugar_num = np.nan

        try:
            cals_num = float(cals) if pd.notna(cals) else np.nan
        except (ValueError, TypeError):
            cals_num = np.nan

        if pd.notna(sugar_num) and sugar_num > 20:  # 규칙은 데이터 보며 조정
            score -= 1.0
            reasons.append("당↑(-1)")
        if pd.notna(cals_num) and cals_num > 300:
            score -= 1.0
            reasons.append("칼로리↑(-1)")

    return {"score": score, "reasons": reasons}


# =========================================================
# 4) 메인: 사용자 × 룰 × 제품 → 추천 랭킹
# =========================================================


def recommend_products_for_user(
    user: Dict[str, Any],
    rules_df: pd.DataFrame,
    df_products_raw: pd.DataFrame,
    topk: int = 10,
) -> pd.DataFrame:
    # 1) 룰 적용
    rule_result = apply_rules_to_user(rules_df, user)

    # 2) 제품 정규화
    dfp = normalize_products_df(df_products_raw)

    # 3) 안전 필터
    safe_mask = []
    safe_reasons = []
    for _, row in dfp.iterrows():
        ok, reasons = is_product_safe_for_user(row, user, rule_result)
        safe_mask.append(ok)
        safe_reasons.append(reasons)
    dfp = dfp.assign(is_safe=safe_mask, safe_reasons=safe_reasons)
    dfp_safe = dfp[dfp["is_safe"]].copy()

    # 4) 점수화
    scores, expls = [], []
    for _, row in dfp_safe.iterrows():
        out = score_product_for_user(row, user, rule_result)
        scores.append(out["score"])
        expls.append(out["reasons"])
    dfp_safe["score"] = scores
    dfp_safe["reasons"] = expls

    # 5) 랭킹
    rec = (
        dfp_safe.sort_values(["score"], ascending=False)
        .head(topk)
        .loc[
            :,
            [
                "product_name",
                "brand_name",
                "category",
                "sub_category",
                "score",
                "reasons",
                "functional_tags",
                "feature_tags",
                "allergens",
            ],
        ]
        .reset_index(drop=True)
    )
    return rec, rule_result

In [103]:
# =========================================================
# 4) 룰 기반 원료 추천 기능
# =========================================================


def get_ingredient_recommendations(
    rule_result: Dict[str, Dict[str, Any]],
) -> Dict[str, Any]:
    """
    룰 결과를 바탕으로 원료 추천 리스트를 생성합니다.

    Args:
        rule_result: apply_rules_to_user()의 결과

    Returns:
        추천 원료 정보가 담긴 딕셔너리
    """
    recommendations = {
        "recommended_ingredients": [],  # 권장 성분
        "prohibited_ingredients": [],  # 금지 성분
        "alternative_ingredients": [],  # 대체 성분
        "warnings": [],  # 주의사항
        "suggestions": [],  # 제안사항
        "synergy_combinations": [],  # 시너지 조합
    }

    for ingredient, info in rule_result.items():
        # 금지된 성분
        if info.get("prohibit", False):
            recommendations["prohibited_ingredients"].append(
                {
                    "ingredient": ingredient,
                    "reason": "룰에 의해 금지됨",
                    "alternatives": info.get("alternatives", []),
                }
            )
            continue

        # 권장 성분 (용량이 있는 경우)
        dose_value = info.get("dose_value")
        dose_range = info.get("dose_range")
        dose_unit = info.get("dose_unit", "")

        if dose_value is not None and dose_value > 0:
            recommendations["recommended_ingredients"].append(
                {
                    "ingredient": ingredient,
                    "dose": f"{dose_value}{dose_unit}",
                    "unit": dose_unit,
                    "type": "정확한 용량",
                }
            )
        elif dose_range is not None:
            lo, hi, unit = dose_range
            recommendations["recommended_ingredients"].append(
                {
                    "ingredient": ingredient,
                    "dose": f"{lo}-{hi}{unit}",
                    "unit": unit,
                    "type": "용량 범위",
                }
            )

        # 대체 성분
        if info.get("alternatives"):
            recommendations["alternative_ingredients"].extend(
                [
                    {"ingredient": alt, "original": ingredient}
                    for alt in info.get("alternatives", [])
                ]
            )

        # 주의사항
        if info.get("warnings"):
            recommendations["warnings"].extend(
                [
                    {"ingredient": ingredient, "warning": warning}
                    for warning in info.get("warnings", [])
                ]
            )

        # 제안사항
        if info.get("suggestions"):
            recommendations["suggestions"].extend(
                [
                    {"ingredient": ingredient, "suggestion": suggestion}
                    for suggestion in info.get("suggestions", [])
                ]
            )

        # 시너지 조합
        if info.get("synergy"):
            recommendations["synergy_combinations"].extend(
                [
                    {
                        "ingredient": ingredient,
                        "synergy_with": synergy[0],
                        "unit": synergy[1],
                    }
                    for synergy in info.get("synergy", [])
                ]
            )

    return recommendations


def display_ingredient_recommendations(recommendations: Dict[str, Any]) -> None:
    """
    원료 추천 결과를 보기 좋게 출력합니다.
    """
    print("=" * 60)
    print("🏋️‍♂️ 개인 맞춤 원료 추천 결과")
    print("=" * 60)

    # 권장 성분
    if recommendations["recommended_ingredients"]:
        print("\n✅ 권장 성분:")
        for item in recommendations["recommended_ingredients"]:
            print(f"  • {item['ingredient']}: {item['dose']} ({item['type']})")
    else:
        print("\n❌ 권장 성분이 없습니다.")

    # 금지 성분
    if recommendations["prohibited_ingredients"]:
        print("\n🚫 금지 성분:")
        for item in recommendations["prohibited_ingredients"]:
            print(f"  • {item['ingredient']}: {item['reason']}")
            if item["alternatives"]:
                print(f"    → 대체제: {', '.join(item['alternatives'])}")

    # 대체 성분
    if recommendations["alternative_ingredients"]:
        print("\n🔄 대체 성분:")
        for item in recommendations["alternative_ingredients"]:
            print(f"  • {item['original']} → {item['ingredient']}")

    # 주의사항
    if recommendations["warnings"]:
        print("\n⚠️ 주의사항:")
        for item in recommendations["warnings"]:
            print(f"  • {item['ingredient']}: {item['warning']}")

    # 제안사항
    if recommendations["suggestions"]:
        print("\n💡 제안사항:")
        for item in recommendations["suggestions"]:
            print(f"  • {item['ingredient']}: {item['suggestion']}")

    # 시너지 조합
    if recommendations["synergy_combinations"]:
        print("\n🤝 시너지 조합:")
        for item in recommendations["synergy_combinations"]:
            print(f"  • {item['ingredient']} + {item['synergy_with']} {item['unit']}")

    print("\n" + "=" * 60)

In [104]:
# Input 형식 (CSV 룰베이스와 일치)

# user = {
#     "age": age,                                   # float (나이)
#     "gender": gender,                             # str ("남성"/"여성")
#     "weight": weight,                             # float (체중 kg)
#     "lean_mass": lbm,                             # Optional[float] (제지방량 kg)
#     "training_experience": training_experience,   # str ("초보"/"중급"/"숙련자")
#     "training_duration": training_duration,       # str or float ("60-90", "<60", "90+", 또는 분수치)
#     "training_time": training_time,               # str ("오전"/"오후"/"저녁"/"밤")
#     "activity_level": activity_level,             # int (1~3)
#     "diet_phase": diet_phase,                     # str ("체지방 감소"/"유지"/"벌크업")
#     "diet_timing": diet_timing,                   # str (원문 보존)
#     "weekly_freq": weekly_freq,                   # Optional (주간 운동 빈도)

#     "meat_freq": None,                            # 필요시 추가
#     "caffeine_intake": None,                      # 필요시 추가
#     "training_intensity": None,                   # Optional[str] ("고"/"중"/"저")

#     "allergy": allergy,                           # set (알러지 목록)
#     "health_conditions": health_conditions,       # set (질환 목록: "수면장애", "유당 불내증" 등)
#     "current_stack": current_stack,               # set (현재 복용 중인 성분)
#     "is_dehydrated": is_dehydrated,               # bool (탈수 여부)

#     "user_goal": user_goal,                       # set {"퍼포먼스", "회복", "근성장", "체지방 감소", "유지" 등}
#     
#     # 선택적 필드
#     "diet_type": diet_type,                       # str ("일반식"/"채식 위주" 등)
#     "oral_hygiene": oral_hygiene,                 # str ("해당 없음"/"구강청결제 사용" 등)
#     "workout_type": workout_type,                 # str ("유산소"/"무산소" 등)
#     "exercise_type": exercise_type,               # str ("장시간_고강도_지구력" 등)
#     "environment_heat_humid": environment,        # str ("실내"/"고온"/"고온다습")
#     "intake_period": intake_period,               # str ("만성" 등)
#     "cvd_risk": cvd_risk,                         # bool or str ("예"/"아니오")
# }

In [106]:
# 새로운 사용자 정보로 테스트
user_new = {
    'age': 29.0,
    'gender': '여성',
    'weight': 60.0,
    'lean_mass': None,
    'training_experience': '초보',
    'training_duration': '60-90',
    'training_time': '저녁',
    'diet_phase': '체지방 감소',
    'user_goal': {'체지방 감소', '회복 지원'},
    'health_conditions': {'유당 불내증', '수면장애'},
    'current_stack': set(),
    'is_dehydrated': False,
    'diet_type': '일반식',
    'oral_hygiene': '해당 없음'
}


In [121]:
# 새로운 사용자로 테스트 실행
rec_df_new, rule_out_new = recommend_products_for_user(user_new, rules, items, topk=40)

# 룰 기반 원료 추천 결과
ingredient_recs_new = get_ingredient_recommendations(rule_out_new)


In [122]:
display_ingredient_recommendations(ingredient_recs_new)


🏋️‍♂️ 개인 맞춤 원료 추천 결과

✅ 권장 성분:
  • creatine: 3.0g (정확한 용량)
  • l-theanine: 50.0mg (정확한 용량)
  • glycerol: 6.0g (정확한 용량)
  • nitrate: 372.0mg (정확한 용량)
  • taurine: 2.0g (정확한 용량)
  • casein: 40.0-45.0g (용량 범위)
  • bcaa: 10.0-20.0g (용량 범위)
  • betaine: 2.5g (정확한 용량)
  • l-carnitine: 1.0g (정확한 용량)

🚫 금지 성분:
  • caffeine: 룰에 의해 금지됨
  • whey_protein: 룰에 의해 금지됨
    → 대체제: WPI/WPH

🔄 대체 성분:
  • casein → WPI/WPH

🤝 시너지 조합:
  • bcaa + taurine 2g



In [124]:
ingredient_recs_new

{'recommended_ingredients': [{'ingredient': 'creatine',
   'dose': '3.0g',
   'unit': 'g',
   'type': '정확한 용량'},
  {'ingredient': 'l-theanine',
   'dose': '50.0mg',
   'unit': 'mg',
   'type': '정확한 용량'},
  {'ingredient': 'glycerol', 'dose': '6.0g', 'unit': 'g', 'type': '정확한 용량'},
  {'ingredient': 'nitrate', 'dose': '372.0mg', 'unit': 'mg', 'type': '정확한 용량'},
  {'ingredient': 'taurine', 'dose': '2.0g', 'unit': 'g', 'type': '정확한 용량'},
  {'ingredient': 'casein', 'dose': '40.0-45.0g', 'unit': 'g', 'type': '용량 범위'},
  {'ingredient': 'bcaa', 'dose': '10.0-20.0g', 'unit': 'g', 'type': '용량 범위'},
  {'ingredient': 'betaine', 'dose': '2.5g', 'unit': 'g', 'type': '정확한 용량'},
  {'ingredient': 'l-carnitine',
   'dose': '1.0g',
   'unit': 'g',
   'type': '정확한 용량'}],
 'prohibited_ingredients': [{'ingredient': 'caffeine',
   'reason': '룰에 의해 금지됨',
   'alternatives': []},
  {'ingredient': 'whey_protein',
   'reason': '룰에 의해 금지됨',
   'alternatives': ['WPI/WPH']}],
 'alternative_ingredients': [{'ingredient