In [104]:
import os
import json
import time
import pandas as pd
from tqdm import tqdm
from openrouter import OpenRouter
from google.api_core.exceptions import TooManyRequests





In [105]:
load_dotenv()
client = OpenRouter(api_key=os.getenv("OPENROUTER_API_KEY"))

In [122]:
# Load Yelp dataset (change path accordingly)
df = pd.read_csv("/yelp.csv")  # put your correct local path here
df = df[['text', 'stars']]

# Sample size to limit API calls (adjust as needed)
SAMPLE_SIZE = 10
sample_df = df.sample(SAMPLE_SIZE, random_state=42).reset_index(drop=True)


In [107]:
# Load Yelp dataset (change path accordingly)
df = pd.read_csv("/yelp.csv")  # put your correct local path here
df = df[['text', 'stars']]

# Sample size to limit API calls (adjust as needed)
SAMPLE_SIZE = 3
sample_df = df.sample(SAMPLE_SIZE, random_state=42).reset_index(drop=True)


In [108]:
CACHE_FILE = "openrouter_cache.json"

# Load existing cache or create new
if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, "r") as f:
        response_cache = json.load(f)
else:
    response_cache = {}

def save_cache():
    with open(CACHE_FILE, "w") as f:
        json.dump(response_cache, f, indent=2)

def safe_call(func, *args, retries=5, initial_wait=5, **kwargs):
    wait = initial_wait
    for attempt in range(retries):
        try:
            return func(*args, **kwargs)
        except TooManyRequests:
            print(f"Rate limit hit, retry {attempt+1}/{retries}, waiting {wait}s...")
            time.sleep(wait)
            wait *= 2
        except Exception as e:
            print(f"Error: {e}. Retrying {attempt+1}/{retries}...")
            time.sleep(wait)
            wait *= 2
    raise Exception("Max retries exceeded")

def call_with_cache(func, prompt):
    if prompt in response_cache:
        print("Using cached response")
        return response_cache[prompt]
    response = safe_call(func, prompt)
    response_cache[prompt] = response
    save_cache()
    return response


In [114]:
def call_openrouter(prompt: str) -> str:
    response = client.chat.send(
        model="mistralai/mistral-7b-instruct",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=150,
        temperature=0.7,
    )
    return response.choices[0].message.content


In [110]:
def prompt_zero_shot(review):
    prompt = (
        f"Classify the following Yelp review into 1 to 5 stars. Return JSON like:\n"
        f'{{"predicted_stars": int, "explanation": str}}\n\n'
        f"Review: \"{review}\"\n"
    )
    return call_with_cache(call_openrouter, prompt)

def prompt_few_shot(review):
    few_shot_example = """
Review: "The food was amazing and the service was excellent."
{"predicted_stars": 5, "explanation": "Positive sentiment about food and service."}

Review: "Terrible experience. Waited an hour and food was cold."
{"predicted_stars": 1, "explanation": "Negative experience with waiting time and food quality."}
"""
    prompt = (
        f"Classify the following Yelp review into 1 to 5 stars. Return JSON like:\n"
        f'{{"predicted_stars": int, "explanation": str}}\n\n'
        f"Examples:\n{few_shot_example}\n"
        f"Review: \"{review}\"\n"
    )
    return call_with_cache(call_openrouter, prompt)

def prompt_chain_of_thought(review):
    prompt = (
        f"Read the Yelp review and think step-by-step to assign stars from 1 to 5.\n"
        f"Explain your reasoning and return JSON like:\n"
        f'{{"predicted_stars": int, "explanation": str}}\n\n'
        f"Review: \"{review}\"\n"
    )
    return call_with_cache(call_openrouter, prompt)


In [119]:
import json
import re

def safe_json_parse(text):
    """
    Extract the first JSON object substring from text and parse it.
    Returns a dict if successful, else None.
    """
    if not text or not isinstance(text, str):
        return None

    # Regex to extract JSON object {...} - non-greedy match
    pattern = re.compile(r'\{.*?\}', re.DOTALL)
    matches = pattern.findall(text)
    for match in matches:
        try:
            return json.loads(match)
        except json.JSONDecodeError:
            continue
    return None


In [123]:
results = {
    "zero_shot": [],
    "few_shot": [],
    "cot": []
}

for _, row in tqdm(sample_df.iterrows(), total=len(sample_df)):
    review = row["text"]
    true_rating = row["stars"]

    for method, func in [
        ("zero_shot", prompt_zero_shot),
        ("few_shot", prompt_few_shot),
        ("cot", prompt_chain_of_thought),
    ]:
        output = func(review)
        time.sleep(2)  # small delay between calls

        parsed = safe_json_parse(output)

        if parsed:
            predicted = parsed.get("predicted_stars", None)
            valid_json = True
        else:
            predicted = None
            valid_json = False

        results[method].append({
            "true": true_rating,
            "predicted": predicted,
            "valid_json": valid_json,
            "raw_output": output
        })


  0%|          | 0/10 [00:00<?, ?it/s]

Using cached response
Using cached response
Using cached response


 10%|█         | 1/10 [00:06<00:54,  6.00s/it]

Using cached response
Using cached response
Using cached response


 20%|██        | 2/10 [00:12<00:48,  6.00s/it]

Using cached response
Using cached response
Using cached response


100%|██████████| 10/10 [01:41<00:00, 10.18s/it]


In [124]:
import json

def evaluate_results_json(results):
    output = {}
    for method, entries in results.items():
        total = len(entries)
        if total == 0:
            accuracy_percent = 0.0
            json_validity_percent = 0.0
        else:
            correct = sum(1 for e in entries if e["valid_json"] and e["true"] == e["predicted"])
            valid_jsons = sum(1 for e in entries if e["valid_json"])
            accuracy_percent = round((correct / total) * 100, 2)
            json_validity_percent = round((valid_jsons / total) * 100, 2)

        # Build method summary
        output[method] = {
            "total_samples": total,
            "accuracy_percent": accuracy_percent,
            "json_validity_percent": json_validity_percent,
            "results": entries  # include full details
        }

    return output


# Example usage
summary_json = evaluate_results_json(results)

# Print formatted JSON string (optional)
print(json.dumps(summary_json, indent=2))



{
  "zero_shot": {
    "total_samples": 10,
    "accuracy_percent": 20.0,
    "json_validity_percent": 30.0,
    "results": [
      {
        "true": 4,
        "predicted": null,
        "valid_json": false,
        "raw_output": " "
      },
      {
        "true": 5,
        "predicted": null,
        "valid_json": false,
        "raw_output": " "
      },
      {
        "true": 3,
        "predicted": null,
        "valid_json": false,
        "raw_output": " "
      },
      {
        "true": 1,
        "predicted": 1,
        "valid_json": true,
        "raw_output": " <s> [OUT] {\"predicted_stars\": 1, \"explanation\": \"The review expresses extreme disappointment with the service at Coulter Infiniti, mentioning that the car was overcharged for an unnecessary repair. The customer had to seek a second opinion elsewhere, which resolved the issue for a much lower cost. The negative experience led to a decision not to choose Infiniti again, indicating a very poor rating.\"}"
      