In [1]:
import instructor
import openai
from openai import OpenAI
import enum 
from datasets import load_dataset
import numpy as np
from tqdm import tqdm

#load the .env file
from dotenv import load_dotenv
load_dotenv()

#load the API key
import os
openai.api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from pydantic import BaseModel

client = instructor.patch(OpenAI())

In [25]:
class Labels(str, enum.Enum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    
class SinglePrediction(BaseModel):
    """
    Correct class label for the given text
    """

    class_label: Labels

In [26]:
def classify(data: str) -> SinglePrediction:
    return client.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        temperature=0.4,
        response_model=SinglePrediction,
        messages=[
             {
                "role": "system",
                "content": f"You are a world class algorithm to identify the sentiment of movie reviews.",
            },

            {
                "role": "user",
                "content": f"Classify the sentiment of the following movie review: {data}",
            },
        ],
    )

In [27]:
# convert the integers to labels and vice versa
itol = {0: "negative", 1: "positive"}

In [6]:
# calculate accuracy based on preds and targets
def accuracy(preds, targets):
    return np.sum(np.array(preds) == np.array(targets)) / len(preds)

In [36]:
# let's grab the rotten_tomatoes dataset of movie reviews from huggingface and take a random subset of 100 reviews from it
dataset = load_dataset("rotten_tomatoes")
subset = dataset["train"].shuffle().select(range(100))

In [38]:
preds = [classify(t).class_label.value for t in tqdm(subset["text"])]
targets = [itol[l] for l in subset["label"]]

100%|██████████| 100/100 [01:13<00:00,  1.36it/s]


In [39]:
accuracy(preds, targets) # 0.88 ... well, that's much better than random guessing and remember this is without any training. 

0.88