# BJJ Sentiment Analyzer GPT

The goal of this notebook is to create a fine-tuned versoin of the ChatGPT model that will analyze whether a YouTube video title is related to a BJJ instructional video or not by returning a boolean value. Previous attempts at prompt engineering yielded inconsistent results.

### 1. Setup

In [331]:
# Have ChatGPT Generate the initial labels
import os
from openai import OpenAI
from dotenv import load_dotenv
from googleapiclient.discovery import build
from sklearn.metrics import classification_report, confusion_matrix
load_dotenv()

# Initialize YouTube API Client
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)

# Initialize OpenAI API Client
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=API_KEY)

### 2. Helper Methods

In [361]:
def yt_get_titles(query):
    '''
    This purposed of this function is to retrieve the top 10 results from the YouTube Search API
    for every query given as an argument.
    '''
    titles = []
    
    print(f"Getting Seach Results for {query}")
    youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
    request = youtube.search().list(part='snippet', type='video', q=query, maxResults=10)
    response = request.execute()
    
    for video in response["items"]:
        title = video["snippet"]["title"].replace("&#39;", "'").replace("&amp;", '&')
        titles.append({ "prompt": title, "completion": "" })
    
    return titles

def yt_query_search_terms(search_terms, all_titles=[]):
    '''
    This functions purpose is to:
        1. Iterate over every search term in the provided search term list.
        2. For each term, grab the top 10 results from the 'get_titles' function.
        3. Append them to the output 'all_titles' array and return once every term has been queried.
    '''
    current_query = search_terms.pop()
    titles = get_titles(current_query)
    all_titles = all_titles + titles
    search_terms_left = len(search_terms)
                           
    if search_terms_left != 0: 
        return get_all_titles(search_terms, all_titles)
    else:
        return all_titles

def openai_perform_request(messages, model="gpt-3.5-turbo", validation=False):
    print("Getting responses. This may take a few minutes...")
    predictions = []

    for message in messages:
        # Perform API Call
        completion = client.chat.completions.create(
              model=model,
              messages=message["messages"]
        )
        
        response = completion.choices[0].message.content
        
        if validation == False:
            assistant = { "role": "assistant", "content": response }
            message["messages"].append(assistant)
        else:
            y = 1 if response == "True" else 0
            predictions.append(y)

    print("Finished")
    return messages if validation == False else predictions
    
def print_accuracy_reports(predictions, labels):
    print("Confusion Matrix:")
    print("")
    print(confusion_matrix(labels, predictions))
    print("")
    print("Classification Report:")
    print("")
    print(classification_report(labels, predictions))

### 3. Collect & Create Dataset of Titles from YouTube

In [None]:
# Initialize list of search terms and array of video titles.
search_terms = [
    "kimura trap attacks", 
    "ashi garami entries", 
    "bjj triangle escape", 
    "bjj americana", 
    "john danaher ankle lock",
    "guillotine from turtle",
    "darce choke",
    "double leg takedown",
    "b team",
    "gordon ryan vs dillon danis",
    "uchi mata judo",
    "blast double leg takedown",
    "arm drag to single leg takedown",
    "ankle pick takedown",
    "cross collar takedown"
]

video_titles = yt_query_search_terms(search_terms)

### 4. Format and Split Data into Test & Training sets

In [294]:
# Format into acceptable format for fine-tuning
data = [];
system_role = "You are helpful sentiment analysis assistant whose sole purpose is to determine if the provided YouTube video titles are Brazilian Ju-Jitsu, Judo, or Wrestling instructionial videos. I only want you to give 'True' or 'False' answers with no additional information."

for video in video_titles:
    system = {"role": "system", "content": system_role }
    user = {"role": "user", "content": video }
    data.append({"messages": [system, user]})
    
# Use ChatGPT to generate initial label outputs
data = openai_perform_request(data)

Getting responses. This may take a few minutes...
Finished


In [306]:
# Seperate into Train and Test and seperate into seperate JSONL files.
import random 
import math
import json

random.shuffle(data)

train_size = math.ceil(len(data) * 0.80)
test_size = math.ceil(len(data) * 0.20)

train_data = data[:120]
test_data = data[(len(data) - test_size):]

# Create train & test jsonl files.
with open("data/test.jsonl", "w") as file:
    for line in test_data:
        file.write(json.dumps(line))
        file.write("\n")
    file.close()
    
with open("data/train.jsonl", "w") as file:
    for line in train_data:
        file.write(json.dumps(line))
        file.write("\n")

At this point, I have a cleanly seperated my test and training data. However, because I used ChatGPT to quickly create the initial labels, they may not be accurate. I will need to perform a manuel check and correcty where applicable

### 5. Fine-Tune ChatGPT Model

In [319]:
# First, we need to upload our training data to OpenAI.
client.files.create(
  file=open("data/train.jsonl", "rb"),
  purpose="fine-tune"
)

FileObject(id='file-VYonMe3i4o7J6arAINwUcGsO', bytes=52903, created_at=1702868720, filename='train.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [323]:
# Second, we can start to fine tune a ChatGPT model.
client.fine_tuning.jobs.create(
    training_file="file-VYonMe3i4o7J6arAINwUcGsO", 
    model="gpt-3.5-turbo",
    suffix="grapple-genius"

)

FineTuningJob(id='ftjob-p0ZHj0lWb2UPDvWsOcEjsCOT', created_at=1702869054, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-3.5-turbo-0613', object='fine_tuning.job', organization_id='org-yL8oUKm7DmHq1u5AfS2zkJL7', result_files=[], status='validating_files', trained_tokens=None, training_file='file-VYonMe3i4o7J6arAINwUcGsO', validation_file=None)

In [330]:
# Retrieve the state of a fine-tune
client.fine_tuning.jobs.retrieve("ftjob-p0ZHj0lWb2UPDvWsOcEjsCOT")

FineTuningJob(id='ftjob-p0ZHj0lWb2UPDvWsOcEjsCOT', created_at=1702869054, error=None, fine_tuned_model='ft:gpt-3.5-turbo-0613:personal:grapple-genius:8WyX4haA', finished_at=1702869873, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-0613', object='fine_tuning.job', organization_id='org-yL8oUKm7DmHq1u5AfS2zkJL7', result_files=['file-oDDKPJlZfDXTNwieTcaDOuoD'], status='succeeded', trained_tokens=28638, training_file='file-VYonMe3i4o7J6arAINwUcGsO', validation_file=None)

### 6. Validate & Measure Performance of Fine-Tuned Model 

In [359]:
# Get Test Labels
with open("data/test.jsonl") as f:
    test_data = [json.loads(line) for line in f]
    f.close()

y_test = []
x_test = []

for message in test_data:
    y = 1 if message["messages"][2]["content"] == "True" else 0
    system = message["messages"][0]
    user = message["messages"][1]
    y_test.append(y)
    x_test.append({ "messages": [system, user]})

In [362]:
# Evaluate fine-tuned GPT predictions against normal GPT predictions
ft_predictions = openai_perform_request(x_test, "ft:gpt-3.5-turbo-0613:personal:grapple-genius:8WyX4haA", validation=True)
norm_predictions = openai_perform_request(x_test, "ft:gpt-3.5-turbo-0613:personal:grapple-genius:8WyX4haA", validation=True)

Getting responses. This may take a few minutes...
Finished


In [365]:
print_accuracy_reports(predictions, y_test)

Confusion Matrix:

[[ 5  2]
 [ 1 22]]

Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.71      0.77         7
           1       0.92      0.96      0.94        23

    accuracy                           0.90        30
   macro avg       0.88      0.84      0.85        30
weighted avg       0.90      0.90      0.90        30

