In [1]:
import os
import sys
import pandas as pd
import json
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

KeyboardInterrupt: 

#### Setting up environment

In [None]:
sys.path.insert(0, os.path.abspath('..'))
from config import set_environment
set_environment()

#### Initilalize LLM

In [None]:
llm = ChatOpenAI(model_name='gpt-4-0613')

#### Load dataset

In [None]:
csv_path = './intermediate_data/Product_Article_Matching.csv'
df = pd.read_csv(csv_path) 

#### Generate summary for one product category

In [None]:
def generate_product_summary(category):

    # Prompt template
    template = ChatPromptTemplate.from_messages([
        ('system', '''You are an expert technical writer specializing in electronic components and products.
         Create a comprehensive summary that includes 2-3 lines about what the product category is, how it is used, and why it is popular.
        Keep the summary concise, informative, and technical but accessible.'''),
        ('user', '''{category}
        Please provide a summary following the specified format.''')
    ])
    chain = template | llm | StrOutputParser()
    summary = chain.invoke({
        "category": category,
    })
    return summary

#### Generate summaries for all the product categories

In [None]:
def process_product_categories(df):

    category_column = 'Product Category'
    unique_categories = df[category_column].dropna().unique()
    product_summaries = {}
    print(f"Processing {len(unique_categories)} product categories...")
    
    for i, category in enumerate(unique_categories, 1):
        summary = generate_product_summary(category)
        product_summaries[category] = {
            "category": category,
            "summary": summary.strip()
        } 
    # Ensure output directory exists
    os.makedirs('./output', exist_ok=True)
    # Save to JSON
    with open('./intermediate_data/Product_Description.json', 'w', encoding='utf-8') as f:
        json.dump(product_summaries, f, indent=2, ensure_ascii=False)
    print(f"Product summaries saved to {'./intermediate_data/Product_Description.json'}")

    return product_summaries


#### Generate and save product category descriptions

In [None]:
if __name__ == "__main__":
    summaries = process_product_categories(df)

Processing 51 product categories...
Product summaries saved to ./intermediate_data/Product_Description.json
