In [None]:

import openai
import os
from openai.embeddings_utils import cosine_similarity, get_embedding

# get your API key from https://beta.openai.com/account/api-keys
openai.api_key = os.getenv("OPENAI_API_KEY")
# get the model ID from https://beta.openai.com/models
EMBEDDING_MODEL = "text-embedding-ada-002"

# 获取"好评"和"差评"的
positive_review = get_embedding("Positive Feedback")
negative_review = get_embedding("Negative Feedback")

positive_example = get_embedding("""
The silver version I bought is really nice-looking, and it arrived in just one day. 
I started using it at night and found the system to be very smooth and responsive. 
The build quality is solid, and the touch feeling is delicate and exquisite. 
Apple has always provided good quality products.
""")
negative_example = get_embedding("The price drop is significant, but the price protection policy is unreasonable. I do not recommend it.")

def get_score(sample_embedding):
  return cosine_similarity(sample_embedding, positive_review) - cosine_similarity(sample_embedding, negative_review)

positive_score = get_score(positive_example)
negative_score = get_score(negative_example)

print("Positive review rating : %f" % (positive_score))
print("negative review rating : %f" % (negative_score))

In [None]:
good_restaurant = get_embedding("This restaurant is so delicious, not bad at all.")
bad_restaurant = get_embedding("This restaurant is so terrible, not delicious at all.")

good_score = get_score(good_restraurant)
bad_score = get_score(bad_restraurant)
print("Postivie Restaurant Rating : %f" % (good_score))
print("Negative Restaurant Rating : %f" % (bad_score))

In [None]:

import pandas as pd
import numpy as np
import os

from sklearn.metrics import classification_report

current_directory = os.getcwd()

datafile_path = str(current_directory)+"/data/food_reviews_with_embeddings_1k.csv"

print(datafile_path)

df = pd.read_csv(datafile_path)
df["embedding"] = df.embedding.apply(eval).apply(np.array)

# convert 5-star rating to binary sentiment
df = df[df.Score != 3]
df["sentiment"] = df.Score.replace({1: "negative", 2: "negative", 4: "positive", 5: "positive"})

In [None]:

from sklearn.metrics import PrecisionRecallDisplay

def evaluate_embeddings_approach(
    labels = ['negative', 'positive'], 
    model = EMBEDDING_MODEL,
):
    label_embeddings = [get_embedding(label, engine=model) for label in labels]

    def label_score(review_embedding, label_embeddings):
        return cosine_similarity(review_embedding, label_embeddings[1]) - cosine_similarity(review_embedding, label_embeddings[0])

    probas = df["embedding"].apply(lambda x: label_score(x, label_embeddings))
    preds = probas.apply(lambda x: 'positive' if x>0 else 'negative')

    report = classification_report(df.sentiment, preds)
    print(report)

    display = PrecisionRecallDisplay.from_predictions(df.sentiment, probas, pos_label='positive')
    _ = display.ax_.set_title("2-class Precision-Recall curve")

evaluate_embeddings_approach(labels=['An Amazon review with a negative sentiment.', 'An Amazon review with a positive sentiment.'])