In [3]:
%pip install openai

Collecting openai
  Using cached openai-1.77.0-py3-none-any.whl.metadata (25 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Using cached jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting tqdm>4 (from openai)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached openai-1.77.0-py3-none-any.whl (662 kB)
Using cached jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (352 kB)
Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm, jiter, openai
Successfully installed jiter-0.9.0 openai-1.77.0 tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
from openai import OpenAI

In [None]:
OPENAI_API_KEY = '...'

client = OpenAI(api_key=OPENAI_API_KEY)

In [7]:
import os
import json
import csv
import re
from openai import OpenAI

# Batch settings
BATCH_SIZE = 10

# System prompt for batched sentiment analysis
system_instruction = """
You are an expert sentiment analyst. You will be given a JSON array of review objects.
Each object has:
  - review_id: a unique identifier for the review
  - review_snippet: the full text of the review
For each object, return a new object with keys:
  - review_id: same identifier
  - review_snippet: the original snippet (full text)
  - sentiment: one of "Positive", "Neutral", or "Negative"
  - score: an integer from 0 to 10 representing how positive it is
Return only a JSON array of these objects, without any additional text.
"""

# Helper to split list into batches
def chunk(lst, size):
    for i in range(0, len(lst), size):
        yield lst[i:i + size]

# Load dataset
with open("sample_200_review.json", "r", encoding="utf-8") as f:
    reviews = json.load(f)

all_results = []
for batch_index, batch in enumerate(chunk(reviews, BATCH_SIZE), start=1):
    payload = [
        {
            "review_id": entry["review_id"],
            "review_snippet": entry.get("text", "")
        }
        for entry in batch
    ]
    user_message = json.dumps(payload, ensure_ascii=False)

    messages = [
        {"role": "system", "content": system_instruction},
        {"role": "user",   "content": user_message}
    ]

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )

    raw = response.choices[0].message.content.strip()

    # === Option 1: Extract & clean JSON ===
    m = re.search(r"\[.*\]", raw, re.DOTALL)
    if not m:
        print(f"Batch {batch_index}: no JSON array found\n{raw}")
        continue
    arr_str = m.group(0)

    # remove any trailing commas before } or ]
    arr_str = re.sub(r",\s*([\]\}])", r"\1", arr_str)

    try:
        batch_results = json.loads(arr_str)
    except json.JSONDecodeError as e:
        print(f"Batch {batch_index}: JSON parse failed after cleaning: {e}")
        continue

    all_results.extend(batch_results)
    print(f"Processed batch {batch_index}, got {len(batch_results)} results.")

# Write to CSV, truncating snippet to first 50 chars
csv_file = "sentiment_results.csv"
with open(csv_file, "w", newline="", encoding="utf-8") as csvfile:
    fieldnames = ["review_id", "review_snippet", "sentiment", "score"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for item in all_results:
        writer.writerow({
            "review_id":      item["review_id"],
            "review_snippet": item["review_snippet"][:50],
            "sentiment":      item["sentiment"],
            "score":          item["score"]
        })

print(f"Sentiment analysis complete. Total items: {len(all_results)}. Results saved to {csv_file}.")

Processed batch 1, got 10 results.
Processed batch 2, got 10 results.
Processed batch 3, got 10 results.
Processed batch 4, got 10 results.
Batch 5: no JSON array found
[{"review_id": "q2zPKtW-thQuAXUiTU4Uqw", "review_snippet": "I'm always on the hunt for a tasty dairy-free latte and theirs (almond/coconut milk blend) is definitely one of the best in Nashville! Will definitely be back.", "sentiment": "Positive", "score": 8}, {"review_id": "Tb07UEPbKhNmKf7Py_popA", "review_snippet": "Let me start off by saying we are Not big drinkers so a night out for us consist of two  to three drinks and we start to feel a good buzz. So it's our first time to Nashville we head straight to Broadway on Saturday night. Want to get a good buzz going and enjoy the atmosphere and town. . The place is beautiful, very modern. Sat in the second level bar had the Buckshot and 2 jack and cokes to follow.  All I have to say is SUPER weak drinks!! Paid about $75 and felt Like we drank water!!!", "sentiment": "Neg