In [2]:
import csv
import os

from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


def extract_entities_keywords(p_summary):
    prompt = f"""
    Extract named entities and keywords from the following Romanian sports news summary.
    Only extract named entities such as people, organizations, teams, competitions, and locations. Do NOT include dates, weekdays, or generic terms.
    Extract relevant keywords that describe the themes, actions, or context of the summary. Avoid repeating named entities or generic filler words.

    Summary: "{p_summary}"

    Return a valid JSON object with two fields: "entities" and "keywords".
    Each field should be a list of strings.
    """
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )

    # ✅ Use .choices[0].message.content (not response['choices'][0]...)
    result_text = response.choices[0].message.content

    # ✅ Safer than eval — use json.loads
    import json
    try:
        return json.loads(result_text)
    except json.JSONDecodeError as err:
        print(f"JSON decode error: {err}")
        print(f"Raw response: {result_text}")
        return {"entities": [], "keywords": []}


input_file = "storage/test_gpt_input_corrupt.csv"
output_file = "storage/test_gpt_output_corrupt.csv"

with open(input_file, encoding="utf-8") as infile, open(output_file, mode="w", newline="", encoding="utf-8") as outfile:
    reader = csv.reader(infile)
    next(reader)
    writer = csv.writer(outfile, quoting=csv.QUOTE_ALL)
    writer.writerow(["summary", "entities", "keywords"])  # Header

    for row in reader:
        if len(row) < 3:
            continue  # Skip malformed rows
        summary = row[2].strip()
        try:
            result = extract_entities_keywords(summary)
            entities = ",".join(result["entities"])
            keywords = ",".join(result["keywords"])
            writer.writerow([summary, entities, keywords])
        except Exception as e:
            print(f"Error processing row: {summary[:50]}... → {e}")