<a href="https://colab.research.google.com/github/yneun/Amore_trial/blob/main/vob_api_use.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
sys.path.append("/content/drive/MyDrive/innisfree-vob-voc-agent")

In [4]:
!pip install openai pandas
from google.colab import drive
drive.mount('/content/drive')

import os
import json
import pandas as pd
import openai
import re

# API 키 읽기
API_KEY_FILE = "/content/drive/MyDrive/innisfree-vob-voc-agent/data/seeds/OPEN_API_KEY.txt"
with open(API_KEY_FILE, "r") as f:
    openai.api_key = f.read().strip()

# 경로 설정
UPLOAD_DIR = "/content/drive/MyDrive/innisfree-vob-voc-agent/data/seeds"
OUTPUT_DIR = "/content/drive/MyDrive/innisfree-vob-voc-agent/data/vob_jsons"
os.makedirs(OUTPUT_DIR, exist_ok=True)


# Helper: Safe JSON Parsing
def safe_json_parse(text):
    try:
        match = re.search(r"\{.*\}", text, re.DOTALL)
        if match:
            return json.loads(match.group())
        else:
            return json.loads(text)
    except Exception:
        return {"raw_output": text}

# Slot Evidence Builder
def build_slot_evidence(description, image_urls):
    slots = {
        "Usage": {"text_evidence": [], "image_evidence": []},
        "Ingredients": {"text_evidence": [], "image_evidence": []},
        "Claims": {"text_evidence": [], "image_evidence": []},
        "Safety": {"text_evidence": [], "image_evidence": []}
    }

    desc = description.lower()
    if "apply" in desc or "use" in desc:
        slots["Usage"]["text_evidence"].append(description)
    if "extract" in desc or "ingredient" in desc:
        slots["Ingredients"]["text_evidence"].append(description)
    if "whitening" in desc or "hydration" in desc:
        slots["Claims"]["text_evidence"].append(description)
    if "avoid" in desc or "sensitive" in desc:
        slots["Safety"]["text_evidence"].append(description)

    for img_url in image_urls:
        for slot in slots.values():
            slot["image_evidence"].append(img_url)

    return slots

# GPT API: VoB 추출
def extract_vob_gpt(product_name, description, image_urls):
    slots = build_slot_evidence(description, image_urls)

    prompt = f"""
You are an AI agent specialized in extracting Voice of Brand (VoB) from e-commerce product detail pages.


Product Name: {product_name}
Description: {description}
Images: {image_urls}

IMPORTANT:
- Use ONLY the provided product name, description, and image URLs.
- Do NOT use external knowledge or assumptions.
- Do NOT explain your reasoning.
- Output JSON ONLY.

Step 1: Product Type Identification (internal reasoning only)
- From the product name: "{product_name}", determine the product type.
- Choose ONLY ONE from the following fixed list:
  ["serum/essence", "cleansing", "sun care", "toner", "cream/lotion", "sheet mask", "body/hair care", "make up"]
- Do NOT output this step separately.
- Use the result ONLY to fill the `product_type` field in the final JSON.

Step 2: Voice of Brand (VoB) Extraction
- Adapt your extraction focus based on the identified product type:

  - serum/essence:
    • key active ingredients
    • functional claims (e.g. hydration, brightening, soothing, anti-aging)
    • concentration or strength cues

  - sheet mask:
    • soothing or calming effects
    • texture and material
    • hydration and relaxation experience

  - toner:
    • pH balance
    • skin preparation for next steps
    • skin type suitability

  - cream/lotion:
    • moisturizing level
    • barrier protection
    • texture (light / heavy / neutral)

  - cleansing:
    • cleansing mechanism (foam, gel, oil, clay, etc.)
    • types of impurities removed (sebum, makeup, dirt)

  - make up:
    • color, finish, coverage
    • aesthetic or styling benefits

- Base all extractions strictly on:
  • Product description text
  • Image URLs (visual cues only, no assumptions)

Return the result strictly in the following JSON format:

{{
  "product_name": "{product_name}",
  "product_type": "",
  "vob": {{
    "core_claims": [],
    "key_selling_points": [],
    "target_customer": {{
      "skin_type": [],
      "concerns": [],
      "demographic": ""
    }},
    "usage_context": [],
    "emotional_tone": [],
    "credibility_signals": [],
    "visual_messages": []
  }}
}}
"""
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": "You are a product VoB extractor."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    result_text = response.choices[0].message.content.strip()
    vob_json = safe_json_parse(result_text)
    return vob_json


# CSV 읽고 반복
CSV_PATH = os.path.join(UPLOAD_DIR, "VOB_serum_image.csv")
df = pd.read_csv(CSV_PATH, encoding="latin1")

for idx, row in df.iterrows():
    product_name = row["Title"]
    description = row["Product_Description"]
    image_files = [f.strip() for f in str(row.get("Image", "")).split(";") if f.strip()]

    vob_json = extract_vob_gpt(product_name, description, image_files)

    # 파일 저장
    out_path = os.path.join(OUTPUT_DIR, f"{product_name.replace(' ', '_')}.json")
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(vob_json, f, ensure_ascii=False, indent=2)

    print(f"Saved VoB JSON for {product_name}\n")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saved VoB JSON for [INNISFREE Official] Green Tea Seed Hyaluronic Serum 80ml - Hyaluronic Acid, Instant Hydration, Inner Dryness Relief



In [5]:
CSV_PATH = os.path.join(UPLOAD_DIR, "VOB_sunblock.csv")
df = pd.read_csv(CSV_PATH, encoding="latin1")

for idx, row in df.iterrows():
    product_name = row["Title"]
    description = row["Product_Description"]
    image_files = [f.strip() for f in str(row.get("Image", "")).split(";") if f.strip()]

    vob_json = extract_vob_gpt(product_name, description, image_files)

    # 파일 저장
    out_path = os.path.join(OUTPUT_DIR, f"{product_name.replace(' ', '_')}.json")
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(vob_json, f, ensure_ascii=False, indent=2)

    print(f"Saved VoB JSON for {product_name}\n")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saved VoB JSON for [INNISFREE Official] Vitamin C Tone Up Sunscreen 50ml - Brightening Sun Care, Vita C, Tinted Sunscreen, Tinted suncream



In [7]:
CSV_PATH = os.path.join(UPLOAD_DIR, "VOB_cleansingfoam.csv")
df = pd.read_csv(CSV_PATH, encoding="latin1")

for idx, row in df.iterrows():
    product_name = row["Title"]
    description = row["Product_Description"]
    image_files = [f.strip() for f in str(row.get("Image", "")).split(";") if f.strip()]

    vob_json = extract_vob_gpt(product_name, description, image_files)

    # 파일 저장
    out_path = os.path.join(OUTPUT_DIR, f"{product_name.replace(' ', '_')}.json")
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(vob_json, f, ensure_ascii=False, indent=2)

    print(f"Saved VoB JSON for {product_name}\n")

Saved VoB JSON for [INNISFREE Official] Green Tea Amino Hydrating Cleansing Foam 150g - Hyaluronic Acid, Hydrating Face Wash, DeadSkin Care

