In [14]:
import pandas as pd

# column review and rating
df = pd.read_excel("rating.xlsx", header=None, names=["review", "rating"])

print(df.shape)
df.head()


(50, 2)


Unnamed: 0,review,rating
0,We recently visited Raised by Wolves and unfor...,2
1,Service has gone down while drink prices have ...,1
2,Let me just start off by saying wow! This no-f...,4
3,"The fries are fresh cut, the burger is juicy, ...",4
4,burger was good fries tasted lik oil,4


In [15]:
import re

def preprocess_review(text: str) -> str:
    
    if pd.isna(text):
        return ""
    
    text = str(text)
    text = text.lower()

    # replace newlines & tabs with space
    text = text.replace("\n", " ").replace("\r", " ").replace("\t", " ")
    
    # collapse multiple spaces
    text = re.sub(r"\s+", " ", text)
    
    return text.strip()

# preprocess reviews
df["review"] = df["review"].apply(preprocess_review)

# ensure that ratings are integers 
df["rating"] = df["rating"].astype(int)

df.head()


Unnamed: 0,review,rating
0,we recently visited raised by wolves and unfor...,2
1,service has gone down while drink prices have ...,1
2,let me just start off by saying wow! this no-f...,4
3,"the fries are fresh cut, the burger is juicy, ...",4
4,burger was good fries tasted lik oil,4


In [16]:
from sklearn.model_selection import train_test_split

# train test split
X_train, X_test, y_train, y_test = train_test_split(
    df["review"],
    df["rating"],
    test_size=0.3,
    random_state=42,
    stratify=df["rating"]
)


In [17]:
from sklearn.metrics import accuracy_score, mean_squared_error

def keyword_baseline(review: str) -> int:
    #lowercase
    text = review.lower()
    
    positive_words = [
        "amazing", "great", "delicious", "excellent", "love",
        "perfect", "tasty", "friendly", "good", "wonderful", "back", "gem"
    ]
    negative_words = [
        "worst", "rude", "terrible", "awful", "disgusting", "horrible", "bad",
        "overpriced", "never", "bland", "cold", "fake"
    ]
    
    # count how many positive/negative words appear 
    num_pos = sum(word in text for word in positive_words)
    num_neg = sum(word in text for word in negative_words)
    
    # convert to rating from 1-5
    if num_pos >= num_neg + 2:
        return 5
    elif num_pos > num_neg:
        return 4
    elif num_neg >= num_pos + 2:
        return 1
    elif num_neg > num_pos:
        return 2
    else:
        # tie 
        return 3

baseline_preds = [keyword_baseline(t) for t in X_test]
baseline_acc = accuracy_score(y_test, baseline_preds)
baseline_mse = mean_squared_error(y_test, baseline_preds)

print(f"Baseline accuracy on test set: {baseline_acc:.3f}")
print(f"Baseline MSE on test set:      {baseline_mse:.3f}")


Baseline accuracy on test set: 0.267
Baseline MSE on test set:      2.467


In [18]:
# pipeline model

import os
os.environ["TRANSFORMERS_NO_TORCHVISION"] = "1"

from transformers import pipeline

model_name = "Festooned/Multilingual-Restaurant-Reviews-Sentiment"  

clf = pipeline(
    task="text-classification",
    model=model_name,
    tokenizer=model_name,
)

print("Model loaded!")


Device set to use cpu


Model loaded!


In [19]:
def hf_predict_stars(text: str) -> int:
    out = clf(text)[0]
    score = out["score"]
    
    stars = round(score)
    stars = max(1, min(5, stars))
    return int(stars)

In [20]:
hf_preds = [hf_predict_stars(t) for t in X_test]
hf_acc = accuracy_score(y_test, hf_preds)

print(f"Baseline accuracy:     {baseline_acc:.3f}")
print(f"Hugging Face accuracy: {hf_acc:.3f}")


Baseline accuracy:     0.267
Hugging Face accuracy: 0.267


In [21]:
from sklearn.metrics import mean_squared_error

baseline_mse = mean_squared_error(y_test, baseline_preds)
hf_mse = mean_squared_error(y_test, hf_preds)

print(f"Baseline MSE:     {baseline_mse:.3f}")
print(f"Hugging Face MSE: {hf_mse:.3f}")


Baseline MSE:     2.467
Hugging Face MSE: 1.333


In [31]:
# Example 1

print(X_test[26])
print()
print("true rating: ", y_test[26])
print("baseline rating: ",baseline_preds[0])
print("pipeline rating: ", hf_preds[0])


i've been a regular customer since before this restaurant changed ownership. i'm a big fan of having a local restaurant in our university city community. during my last visit to the restaurant i purchased a shawarma to-go. i was sadly surprised when i got home to see that it had very little meat. when i got home i called the restaurant to let them know. it's disappointing to see food quality change and i hope this local restaurant can address this issue. during this time of rising prices i expect to pay more but i also expect the same quality.

true rating:  2
baseline rating:  3
pipeline rating:  2


In [33]:
# Example 2

print(X_test[24])
print()
print("true rating: ", y_test[24])
print("baseline rating: ",baseline_preds[1])
print("pipeline rating: ", hf_preds[1])


no i wouldn’t recommend it the food was not good and the hostess did not care for my satisfaction. i told her that my chicken was cheery and she responded with “i don’t know i’m not the cook”. i know your not the cook but i was not the only one who returned the food, the proper response could of been “sorry for that, i’ll let the cook know” .........that’s probably why this place it’s soo empty. sorry guys i wouldn’t come again

true rating:  1
baseline rating:  4
pipeline rating:  1


In [34]:
# Example 3

print(X_test[33])
print()
print("true rating: ", y_test[33])
print("baseline rating: ",baseline_preds[2])
print("pipeline rating: ", hf_preds[2])


chill local vibes in this patio brewpub. the lamb chops were the standout of our meal followed by their seasoned fries. although we didn't try them, their burgers were a popular choice at tables all around us.

true rating:  4
baseline rating:  3
pipeline rating:  3
