## Label Gernaeration
Here we try to build up an efficient and collective way to generate labels for wine. Two approaches are used:
1. **By knowledge**: We input knowledge and promotion articales to generate labels for wine.
2. **By wine**: We input specific wine's basic information and description to generate labels for wine.



## 0.
### 0.1 Libraryies

In [None]:
import pandas as pd
import openai
import requests
import csv

### 0.2 File Path

In [None]:
knowledge_path = "info/knowledge.txt"
demo_path = "info/demo.txt"
example_path = "info/example.txt"
today = pd.Timestamp("today").strftime("%Y%m%d")
output_csv = f"output/labels_{today}.csv"
output_txt = f"output/labels_{today}.txt"

### 0.3 Constants and Variables

In [None]:
# wine-labeling-202412
MY_KEY = "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"
openai.api_key = MY_KEY
OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"
APPROACH = 1 # 1: by information; 2: by wine

# APPROACH 1
category_list = ["產區標籤", "國家標籤", "品種標籤", "風味標籤", "口感標籤", "製成標籤"]
ADD_CATEGORY = True

# APPROACH 2
NUM_LABELS = 5

In [None]:
def read_file(file_path, approach):
    """Read content from Excel or text file."""
    if approach == 1:
        if file_path.endswith(".xlsx"):
            df = pd.read_excel(file_path)
            return df.to_string(index=False)
        elif file_path.endswith(".txt"):
            with open(file_path, "r", encoding="utf-8") as file:
                return file.read()
        else:
            raise ValueError("Unsupported file format")
    elif approach == 2:
        if file_path.endswith(".xlsx"):
            df = pd.read_excel(file_path)
            return df
        else:
            raise ValueError("Unsupported file format")

def send_request_to_openai(prompt, max_tokens):
    """Send a POST request to OpenAI API."""
    headers = {
        "Authorization": f"Bearer {MY_KEY}",
        "Content-Type": "application/json"
    }
    data = {
        "model": "gpt-4",
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": max_tokens
    }
    try:
        response = requests.post(OPENAI_API_URL, headers=headers, json=data)
        response.raise_for_status()  # Raise an exception for HTTP errors
        response_data = response.json()
        return response_data["choices"][0]["message"]["content"].strip()
    except requests.exceptions.RequestException as e:
        raise RuntimeError(f"API request failed: {e}")

def suggest_new_categories(knowledge, category_list):
    """Ask AI for new categories based on knowledge."""
    prompt = (
        f"目前的標籤類別包括：\n\n{', '.join(category_list)}\n\n"
        f"以下是關於葡萄酒的專業知識：\n\n{knowledge}\n\n"
        "為了達到從各種角度去描述葡萄酒的目的，請根據以上內容建議新的標籤類別（新增的標籤類別格式以換行隔開，不應該有其他文字，單傳回傳建議的標籤類別）。"
    )
    response = send_request_to_openai(prompt, max_tokens=200)
    suggested_categories = response.split("\n")
    print(f"Suggested categories: {suggested_categories}")
    return [cat.strip() for cat in suggested_categories if cat.strip() and cat not in category_list]

def determine_reference_and_number(knowledge, category):
    """Determine the reference type and the target number of labels for a given category."""
    prompt = (
        f"以下是關於葡萄酒的知識：\n\n{knowledge}\n\n"
        f"請針對目前的標籤類別「{category}」決定參考資料應該為何者？\n"
        "適合參考專業知識，回傳:專業知識；適合參考酒款文案，回傳:酒款文案；兩者皆要參考，回傳:All。\n"
        "請決定該類標籤需生成多少之數量，所有類別數量應該大於20？\n"
        "格式請回傳如下（僅會有2行）：\n"
        "(第一行參考資料）All\n"
        "(第二行目標數量）50"
    )
    response = send_request_to_openai(prompt, max_tokens=100)
    lines = response.split("\n")
    if len(lines) != 2:
        raise ValueError("Invalid response format: Expected two lines")
    reference = lines[0].strip()
    target_number = int(lines[1].strip())
    if reference not in ["專業知識", "酒款文案", "All"]:
        raise ValueError("Invalid reference type")
    print(f"Suggested reference: {reference}, Target number: {target_number}")
    return reference, target_number

def generate_labels(knowledge, examples, category, num_labels, approach):
    """Generate labels using AI based on knowledge and examples."""
    if approach == 1:
        prompt = (
            f"以下是關於葡萄酒的資訊：\n\n{knowledge}\n\n"
            f"和範例標籤：\n\n{examples}\n\n"
            f"請根據以上內容，為「{category}」生成至少 {num_labels} 個標籤。"
        )
    elif approach == 2:
        prompt = (
            f"請根據以下這款葡萄酒的資訊：\n\n{knowledge}\n\n"
            f"發想和這款酒切合、相關、相反等至少 {num_labels} 個葡萄酒標籤。"
        )

    response = send_request_to_openai(prompt, max_tokens=500)
    return [label.strip() for label in response.split("\n") if label.strip()]

def save_to_csv(category, labels, output_path):
    """Save labels to CSV."""
    with open(output_path, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        for label in labels:
            writer.writerow([category, label])

def save_to_txt(input_csv, output_txt):
    """Extract labels column from CSV and save to TXT."""
    with open(input_csv, mode="r", encoding="utf-8") as file:
        reader = csv.reader(file)
        labels = [row[1] for row in reader]  # Assume second column is the label
    with open(output_txt, mode="w", encoding="utf-8") as file:
        file.write("\n".join(labels))


In [None]:
def main(APPROACH):
    if APPROACH == 1:
        knowledge = read_file(knowledge_path, APPROACH)
        examples = read_file(example_path, APPROACH)
        if ADD_CATEGORY:
            new_categories = suggest_new_categories(knowledge, category_list)
            category_list.extend(new_categories)
        for category in category_list:
            reference, num_labels = determine_reference_and_number(knowledge, category)
            if reference == "專業知識":
                knowledge = read_file(knowledge_path, APPROACH)
            elif reference == "酒款文案":
                knowledge = read_file(demo_path, APPROACH)
            labels = generate_labels(knowledge, examples, category, num_labels, APPROACH)
            save_to_csv(category, labels, output_csv)
    elif APPROACH == 2:
        df = read_file(example_path, APPROACH)
        for i in range(len(df)):
            knowledge = df.iloc[i].to_string(index=False)
            labels = generate_labels(knowledge, None, None, NUM_LABELS, APPROACH)
            save_to_csv(f"酒款{i+1}", labels, output_csv)
    save_to_txt(output_csv, output_txt)

In [None]:
main(APPROACH)