In [56]:
import os
import json
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# .env 파일 로드
load_dotenv()

# 환경 변수에서 API 키 읽기
openai_api_key = os.getenv("OPENAI_API_KEY")

# LangChain ChatOpenAI 모델 설정
chat_model = ChatOpenAI(api_key=openai_api_key, model="gpt-3.5-turbo")

# 향료 데이터 로드
file_path = "spice.json"
with open(file_path, "r", encoding="utf-8") as file:
    fragrances = json.load(file)

# 계열 정보
categories = [
    "Spicy", "Chypre", "Fruity", "Citrus", "Green", "Aldehyde", "Aquatic",
    "Fougere", "Gourmand", "Woody", "Oriental", "Floral", "Musk",
    "Powdery", "Tobacco Leather", "Amber"
]

# 향료 설명을 분류하는 프롬프트 템플릿
prompt_template = """Classify the following fragrance descriptions into one of these categories: {categories}.

Descriptions:
{descriptions}

Ensure the classification strictly follows the given categories. Return the results in JSON format, with each fragrance name and its category.
"""

# 프롬프트 템플릿 설정
prompt = PromptTemplate(input_variables=["categories", "descriptions"], template=prompt_template)

# LLMChain 생성
llm_chain = LLMChain(llm=chat_model, prompt=prompt)

# 향료 설명을 분류하는 함수
def classify_fragrances_batch(fragrances, categories):
    descriptions = [f"- {item['name']}: {item['content']}" for item in fragrances]
    
    # LLMChain 실행 (프롬프트에 categories와 descriptions 값을 전달)
    result = llm_chain.run(categories=', '.join(categories), descriptions='\n'.join(descriptions))
    
    # 결과 출력 (디버깅용)
    print("Result from model:", result)
    
    # 결과가 빈 값이 아닌지 확인
    if result.strip():
        try:
            return json.loads(result.strip())
        except json.JSONDecodeError as e:
            print(f"JSON decoding error: {e}")
            return None
    else:
        print("Result is empty.")
        return None

# 분류 실행
result = classify_fragrances_batch(fragrances, categories)

# 결과 출력 및 저장
if result:
    output_file_path = "classified_fragrances.json"
    with open(output_file_path, "w", encoding="utf-8") as output_file:
        json.dump(result, output_file, ensure_ascii=False, indent=4)

    print(f"분류 결과가 {output_file_path}에 저장되었습니다!")
else:
    print("분류 결과가 없거나 JSON 형식이 잘못되었습니다.")


Result from model: {
  "Bergamot": "Citrus",
  "Bitter Orange": "Citrus",
  "Blood Orange": "Citrus",
  "Buddha's hand": "Citrus",
  "Chinotto": "Citrus",
  "Citron": "Citrus",
  "Clementine": "Citrus",
  "Finger Lime": "Citrus",
  "Grapefruit": "Citrus",
  "Hassaku": "Citrus",
  "Lemon": "Citrus",
  "Lemon Balm": "Citrus",
  "Lemon Tree": "Citrus",
  "Lemon Verbena": "Citrus",
  "Lemongrass": "Citrus",
  "Limetta": "Citrus",
  "Litsea Cubeba": "Citrus",
  "Mandora": "Citrus",
  "Methyl Pamplemousse": "Citrus",
  "Neroli": "Citrus",
  "Orange": "Citrus",
  "Palestinian Sweet Lime": "Citrus",
  "Rangpur": "Citrus",
  "Sudachi citrus": "Citrus",
  "Tangelo": "Citrus",
  "Tangerine": "Citrus",
  "Yuzu": "Citrus",
  "Almond": "Gourmand",
  "Artichoke": "Green",
  "Banana": "Fruity",
  "Barberry": "Fruity",
  "Bearberry": "Fruity",
  "Black Sapote": "Fruity",
  "Blackberry": "Fruity",
  "Breadnut": "Fruity",
  "Carambola (Star Fruit)": "Fruity",
  "Carrot": "Powdery",
  "Cepes": "Musk",
  "