In [1]:
import instructor
import openai
from openai import OpenAI
import enum 
from datasets import load_dataset
import numpy as np
from tqdm import tqdm

#load the .env file
from dotenv import load_dotenv
load_dotenv()

#load the API key
import os
openai.api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from pydantic import BaseModel

client = instructor.patch(OpenAI())

In [3]:
class Labels(str, enum.Enum):
    POSITIVE = "positive"
    NEUTRAL = "neutral"
    NEGATIVE = "negative"
    
class SinglePrediction(BaseModel):
    """
    Correct class label for the given text
    """

    class_label: Labels

In [14]:
def classify(data: str) -> SinglePrediction:
    return client.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        temperature=0.4,
        response_model=SinglePrediction,
        messages=[
             {
                "role": "system",
                "content": f"You are a world class algorithm to identify the sentiment of tweets.",
            },

            {
                "role": "user",
                "content": f"Classify the sentiment of the following tweet: {data}",
            },
        ],
    )

In [5]:
# convert the integers to labels and vice versa
itol = {0: "negative", 1: "neutral", 2: "positive"}
ltoi = {v: k for k, v in itol.items()}

In [6]:
# calculate accuracy based on preds and targets
def accuracy(preds, targets):
    return np.sum(np.array(preds) == np.array(targets)) / len(preds)

In [7]:
# load the tweet eval dataset from huggingface
dataset = load_dataset("tweet_eval", "sentiment")

In [15]:
preds = [classify(t).class_label.value for t in tqdm(dataset["train"]["text"][200:300])]
targets = [itol[l] for l in dataset["train"]["label"][200:300]]

100%|██████████| 100/100 [01:18<00:00,  1.28it/s]


In [17]:
accuracy(preds, targets) # 0.66 ... well, that's better than random guessing and remember this is without any training. 

0.66