# ChatGPT for Sentiment

## Load Libraries and Data

In [1]:
# Installing OpenAI on Colab:
# !pip install openai

# Installing OpenAI locally:
# pip install openai
# conda install openai

import os
import io
import requests
import re
import numpy as np
import openai
from sklearn.metrics import accuracy_score

# Note: see instructions for OpenAI setup at
# https://github.com/openai/openai-python
openai.api_key = os.getenv('OPENAI_API_KEY')

### Data Source

IMDB movie reviews via: http://ai.stanford.edu/~amaas/data/sentiment/

In [2]:
train_url = 'https://raw.githubusercontent.com/natecraig/aiml/main/Data/movie_train.txt'
test_url = 'https://raw.githubusercontent.com/natecraig/aiml/main/Data/movie_test.txt'

train_download = requests.get(train_url).content
test_download = requests.get(test_url).content

# The first 12,500 reviews are positive,
# and the second 12,500 reviews are negative
    
X_test_raw = []
for l in io.StringIO(test_download.decode('utf-8')):
    X_test_raw.append(l.strip())
    
categories = ['Negative', 'Positive']
y_test = [1 if i < 12500 else 0 for i in range(25000)]

In [3]:
# Drop HTML line breaks
regex = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
X_test = [regex.sub(' ', x) for x in X_test_raw]

In [4]:
print(X_test[1000])



In [5]:
print(y_test[1000])

1


## Assess Sentiment Using ChatGPT

In [6]:
# Randomly sample the testing data
ntest = 50
randidx = np.random.choice(len(X_test), ntest, replace=False)
X_test_sub = [X_test[i] for i in randidx]
y_test_sub = [y_test[i] for i in randidx]

In [7]:
# Inference using ChatGPT
prompt_pre = ("Decide whether a movie review's sentiment is positive or negative.\n"
              'Statement: ')
prompt_post = '\nSentiment:'

y_pred = [0]*ntest

for i in range(ntest):
    prompt = prompt_pre + X_test_sub[i] + prompt_post
    messages=[{
        'role': 'assistant',
        'content': prompt
    }]
    
    completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', 
                                              messages=messages)
    
    sentiment = completion['choices'][0]['message']['content']
    y_pred[i] = 1 if sentiment == 'Positive' else 0
    

In [8]:
accuracy_score(y_test_sub, y_pred)

0.92