In [15]:
!pip install -q -U google-genai pandas

from google import genai
import os, pandas as pd, numpy as np, re, json, time

os.environ["GEMINI_API_KEY"] = "AIzaSyCwXf98t4tz_xCbrruLCaoCbVVVOWyXww0"
client = genai.Client()


In [13]:
data = pd.read_csv("/content/yelp.csv")

data = data[["text", "stars"]].dropna()
data = data[data["stars"].between(1, 5)]

def clean_text(t):
    t = t.lower()
    t = re.sub(r"http\S+", "", t)
    t = re.sub(r"[^a-zA-Z\s]", " ", t)
    t = re.sub(r"\s+", " ", t).strip()
    return t

data["review"] = data["text"].apply(clean_text)
data = data[["review", "stars"]].rename(columns={"stars": "actual"})
data["actual"] = data["actual"].astype(int)

df = data.reset_index(drop=True)
sample = df.sample(10, random_state=42).reset_index(drop=True)

sample.head()


Unnamed: 0,review,actual
0,we got here around midnight last friday the pl...,4
1,brought a friend from louisiana here she says ...,5
2,every friday my dad and i eat here we order th...,3
3,my husband and i were really really disappoint...,1
4,love this place was in phoenix weeks for work ...,5


In [14]:
PROMPTS = {
    "p1": """
You are a strict review rating assistant.

Task: Read the restaurant review and rate it from 1 to 5 stars.
1 = extremely negative
2 = negative
3 = mixed or neutral
4 = positive
5 = very positive and enthusiastic

Return only valid JSON in this exact format:
{{"predicted_stars": 4, "explanation": "Brief reasoning for the assigned rating."}}

Review:
{review}
""",
    "p2": """
You are analysing customer satisfaction.

Read the restaurant review and choose a star rating from 1 to 5.
1 = very bad experience
2 = bad
3 = okay or mixed
4 = good
5 = excellent and highly recommended

Respond only with JSON:
{{"predicted_stars": 4, "explanation": "Short reason for the rating."}}

Review:
{review}
""",
    "p3": """
You are a sentiment classifier for restaurant reviews.

Decide a rating from 1 to 5 stars based on the overall sentiment and intensity.
Use this mapping:
1 = very negative
2 = somewhat negative
3 = neutral or balanced
4 = clearly positive
5 = extremely positive and enthusiastic

Return only JSON in this format:
{{"predicted_stars": 4, "explanation": "One sentence reason."}}

Review:
{review}
"""
}


In [16]:
def call_gemini(prompt_text):
    try:
        r = client.models.generate_content(
            model="gemini-2.0-flash",
            contents=prompt_text,
        )
        return r.text or ""
    except Exception as e:
        print("Error from Gemini:", e)
        return ""

def extract_json(s):
    try:
        start = s.index("{")
        end = s.rindex("}") + 1
        chunk = s[start:end]
        obj = json.loads(chunk)
        return obj
    except Exception:
        return None

def parse_response(s):
    obj = extract_json(s)
    if obj is None:
        return np.nan, None, False
    v = obj.get("predicted_stars", None)
    expl = obj.get("explanation", "")
    try:
        v = int(v)
    except Exception:
        return np.nan, expl, False
    if v < 1 or v > 5:
        return np.nan, expl, False
    return v, expl, True

def fallback_predict(text):
    t = text.lower()
    if any(w in t for w in ["worst", "awful", "terrible", "disgusting", "horrible", "never again"]):
        return 1
    if any(w in t for w in ["bad", "poor", "rude", "cold food", "dirty", "slow service"]):
        return 2
    if any(w in t for w in ["ok", "okay", "average", "fine", "nothing special", "decent"]):
        return 3
    if any(w in t for w in ["good", "great", "nice", "tasty", "friendly", "quick service", "liked"]):
        return 4
    if any(w in t for w in ["amazing", "excellent", "perfect", "love this place", "fantastic", "best", "highly recommend"]):
        return 5
    return 4


In [17]:
PROMPTS = {
    "p1": """
You are a strict review rating assistant.

Task: Read the restaurant review and rate it from 1 to 5 stars.
1 = extremely negative
2 = negative
3 = mixed or neutral
4 = positive
5 = very positive and enthusiastic

Return only valid JSON in this exact format:
{{"predicted_stars": 4, "explanation": "Brief reasoning for the assigned rating."}}

Review:
{review}
""",
    "p2": """
You are analysing customer satisfaction.

Read the restaurant review and choose a star rating from 1 to 5.
1 = very bad experience
2 = bad
3 = okay or mixed
4 = good
5 = excellent and highly recommended

Respond only with JSON:
{{"predicted_stars": 4, "explanation": "Short reason for the rating."}}

Review:
{review}
""",
    "p3": """
You are a sentiment classifier for restaurant reviews.

Decide a rating from 1 to 5 stars based on the overall sentiment and intensity.
Use this mapping:
1 = very negative
2 = somewhat negative
3 = neutral or balanced
4 = clearly positive
5 = extremely positive and enthusiastic

Return only JSON in this format:
{{"predicted_stars": 4, "explanation": "One sentence reason."}}

Review:
{review}
"""
}


In [18]:
def acc(p):
    sub = res[res["prompt"] == p]
    return (sub["predicted"] == sub["actual"]).mean()

def json_rate(p):
    sub = res[res["prompt"] == p]
    if len(sub) == 0:
        return 0.0
    return sub["json_valid"].mean()

summary = pd.DataFrame(
    {
        "accuracy": [acc("p1"), acc("p2"), acc("p3")],
        "json_valid": [json_rate("p1"), json_rate("p2"), json_rate("p3")],
    },
    index=["Prompt 1", "Prompt 2", "Prompt 3"],
)

summary


Unnamed: 0,accuracy,json_valid
Prompt 1,0.375,0.11
Prompt 2,0.355,0.1
Prompt 3,0.38,0.09
