In [None]:
# Used to access the openai API and send requests to it
!pip install pandas openai requests
# Used to create a progress tracker while the API calls are being made
!pip install tqdm
# Used to output the results to Word format
!pip install python-docx



In [None]:
# Import all the dependencies
import os
import pandas as pd
import openai
import requests
from tqdm import tqdm
import time
import docx

# Set up the OpenAI API Key
openai.api_key = "OPENAI-API-KEY"
GPT_API_URL = "https://api.openai.com/v1/chat/completions"

In [None]:
# ChatCompletion API available in 2.8
!pip install openai==0.28



In [None]:
import pandas as pd
def analyze_review(review):
    # Set the number of retries to 3
    retries = 3
    sentiment = None

    # Send a prompt to the model to classify sentiment as positive, negative or neutral

    while retries > 0:
        messages = [
            {"role": "system", "content": "You are an AI language model trained to analyze and detect the sentiment of product reviews."},
            {"role": "user", "content": f"Analyze the following product review and determine if the sentiment is: positive, negative or neutral. Return only a single word, either POSITIVE, NEGATIVE or NEUTRAL: {review}"}
        ]

        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            max_tokens=3,
            n=1,
            stop=None,
            temperature=0
        )

        response_text = completion.choices[0].message.content
        print(response_text)
        if response_text in ["POSITIVE", "NEGATIVE", "NEUTRAL"]:
            sentiment = response_text
            break
        else:
            retries -= 1
            time.sleep(1)
    else:
        sentiment = "neutral"

    retries = 3

    # add a delay of 4 seconds between requests to avoid hitting the openai free tier API call rate limit

    time.sleep(4)

    return sentiment


# Read the input Excel file containing user reviews
input_file = "reviews.csv"
df = pd.read_csv(input_file)

# Set the batch size to 500
batch_size = 500
batches = len(df) // batch_size

# Iterates over each batch to analyze reviews
for i in range(batches):

  start = i * batch_size
  end = start + batch_size
  batch_df = df[start:end]

  # Analyze the reviews and store the results
  sentiments = []
  # Analyzes each review in the batch by calling analyze_review()
  for review in tqdm(batch_df["review"], desc="Processing reviews"):
    sentiment = analyze_review(review)
    sentiments.append(sentiment)

  # Adds the sentiments list as a column in the batch DataFrame
  batch_df["sentiment"] = sentiments

  # Save the results to a new Excel file
  output_file = f"reviews_analyzed_{i}.xlsx"
  batch_df.to_excel(output_file, index=False)

  # Write batch results to Word doc
  doc = docx.Document()
  table = doc.add_table(rows=1, cols=2)
  header_cells = table.rows[0].cells
  header_cells[0].text = "Review"
  header_cells[1].text = "Sentiment"

  for idx, row in batch_df.iterrows():
    row_cells = table.add_row().cells
    row_cells[0].text = row["review"]
    row_cells[1].text = row["sentiment"]

  doc.save(f"batch_{i}.docx")

  # Delete batch DataFrame to conserve memory
  del batch_df

  # 300 second pause between batches
  time.sleep(300)
