# Finetuned GPT-4o-mini

In [None]:
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.54.3-py3-none-any.whl.metadata (24 kB)
Downloading openai-1.54.3-py3-none-any.whl (389 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.6/389.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.52.2
    Uninstalling openai-1.52.2:
      Successfully uninstalled openai-1.52.2
Successfully installed openai-1.54.3


In [None]:
import time
import json
import os
from openai import OpenAI

In [None]:
# Only use this code block if you are using Google Colab.
# If you are using Jupyter Notebook, please ignore this code block. You can directly upload the file to your Jupyter Notebook file systems.
from google.colab import files

## It will prompt you to select a local file. Click on “Choose Files” then select and upload the file.
## Wait for the file to be 100% uploaded. You should see the name of the file once Colab has uploaded it.
uploaded = files.upload()

In [None]:
def str_to_bool(string):
  if string == "True":
    return True
  elif string == "False":
    return False
  return False

In [None]:
test_set = []
y_test = []

with open("test_set.jsonl", "r") as file:
  for line in file:
    data = json.loads(line)
    test_set.append(data['messages'])
    y_test.append(str_to_bool(data['messages'][2]['content']))

In [None]:
api_key = "" # TODO
os.environ['OPENAI_API_KEY'] = api_key

In [None]:
client = OpenAI()
our_model = "gpt-4o-mini-2024-07-18"

## Finetuning

In [None]:
response = client.files.create(
  file=open("training_set.jsonl", "rb"),
  purpose="fine-tune"
)

In [None]:
response

FileObject(id='file-vss7MURIlec6uNgnXhPkL420', bytes=1029890, created_at=1731103442, filename='training_set.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [None]:
training_file_id = response.id
training_file_id

'file-vss7MURIlec6uNgnXhPkL420'

In [None]:
response = client.files.create(
  file=open("validation_set.jsonl", "rb"),
  purpose="fine-tune"
)

In [None]:
response

FileObject(id='file-cMPI6AtlUJAlLrLo7P6xyNwJ', bytes=249182, created_at=1731103442, filename='validation_set.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [None]:
validation_file_id = response.id
validation_file_id

'file-cMPI6AtlUJAlLrLo7P6xyNwJ'

In [None]:
start = time.time()

response = client.fine_tuning.jobs.create(
  training_file=training_file_id,
  validation_file=validation_file_id,
  model=our_model
)

In [None]:
fine_tuning_job_id = response.id
fine_tuning_job_id

'ftjob-EjcreIsAxmtkDsqYlnqjPWQu'

In [None]:
status = client.fine_tuning.jobs.retrieve(fine_tuning_job_id)
status

FineTuningJob(id='ftjob-EjcreIsAxmtkDsqYlnqjPWQu', created_at=1731103443, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=[], seed=561201982, status='validating_files', trained_tokens=None, training_file='file-vss7MURIlec6uNgnXhPkL420', validation_file='file-cMPI6AtlUJAlLrLo7P6xyNwJ', estimated_finish=None, integrations=[], user_provided_suffix=None)

## Run the Finetuned Model

Check whether it ends in your [fine-tuning UI](https://platform.openai.com/finetune/).

In [None]:
status_status = client.fine_tuning.jobs.retrieve(fine_tuning_job_id).status
while status_status not in ["succeeded", "failed"]:
  time.sleep(2)
  status_status = client.fine_tuning.jobs.retrieve(fine_tuning_job_id).status

In [None]:
end = time.time()
print(f"{end - start:.4f} seconds")

2656.2100 seconds


In [None]:
status = client.fine_tuning.jobs.retrieve(fine_tuning_job_id)
status

FineTuningJob(id='ftjob-EjcreIsAxmtkDsqYlnqjPWQu', created_at=1731103443, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::ARS3xEQw', finished_at=1731106091, hyperparameters=Hyperparameters(n_epochs=3, batch_size=2, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-9WkgYLtjPEQZK96txvPdeQaZ'], seed=561201982, status='succeeded', trained_tokens=683061, training_file='file-vss7MURIlec6uNgnXhPkL420', validation_file='file-cMPI6AtlUJAlLrLo7P6xyNwJ', estimated_finish=None, integrations=[], user_provided_suffix=None)

In [None]:
fine_tuned_model = status.fine_tuned_model
fine_tuned_model

'ft:gpt-4o-mini-2024-07-18:personal::ARS3xEQw'

In [None]:
y_pred = []
start = time.time()

for i in range(len(test_set)):
  completion = client.chat.completions.create(
    model=fine_tuned_model,
    messages=test_set[i]
  )
  y_pred.append(str_to_bool(completion.choices[0].message.content))

end = time.time()
print(f"{end - start:.4f} seconds")

174.5430 seconds


## Evaluation

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

f1_score = f1_score(y_test, y_pred)
print(f"F1 Score: {f1_score}")

recall = recall_score(y_test, y_pred)
print(f"Recall: {recall}")

[[ 49  16]
 [  8 121]]
Accuracy: 0.8762886597938144
F1 Score: 0.9097744360902256
Recall: 0.937984496124031
