# Finetuned GPT-4o-mini

## Preparation

In [None]:
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.55.3-py3-none-any.whl.metadata (24 kB)
Downloading openai-1.55.3-py3-none-any.whl (389 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.6/389.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.54.4
    Uninstalling openai-1.54.4:
      Successfully uninstalled openai-1.54.4
Successfully installed openai-1.55.3


In [None]:
import time
import json
import os
from openai import OpenAI
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, f1_score, recall_score

In [None]:
if not os.path.exists("data"):
  !pip install github-clone
  !ghclone https://github.com/yiw008/nondet-project/tree/main/data

Collecting github-clone
  Downloading github_clone-1.2.0-py3-none-any.whl.metadata (3.7 kB)
Collecting docopt>=0.6.2 (from github-clone)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading github_clone-1.2.0-py3-none-any.whl (9.1 kB)
Building wheels for collected packages: docopt
  Building wheel for docopt (setup.py) ... [?25l[?25hdone
  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=8a33219692b8e18eca28dbc6f76484569b2afe6edc7c15d6685118f607d9e87a
  Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac
Successfully built docopt
Installing collected packages: docopt, github-clone
Successfully installed docopt-0.6.2 github-clone-1.2.0
Cloning into 'data'...
done.


In [None]:
def str_to_bool(string):
  if string == "True":
    return True
  elif string == "False":
    return False
  return False

In [None]:
api_key = "" # TODO
os.environ['OPENAI_API_KEY'] = api_key

In [None]:
client = OpenAI()
our_model = "gpt-4o-mini-2024-07-18"

In [None]:
def test_project(project_name):
  print(f"Test project: {project_name}")
  test_set = []
  y_test = []
  with open(f"data/{project_name}/test_set.jsonl", "r") as file:
    for line in file:
      data = json.loads(line)
      test_set.append(data['messages'])
      y_test.append(str_to_bool(data['messages'][2]['content']))

  training_response = client.files.create(
    file=open(f"data/{project_name}/training_set.jsonl", "rb"),
    purpose="fine-tune"
  )
  training_file_id = training_response.id

  start = time.time()

  fine_tuning_job_response = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    model=our_model
  )

  fine_tuning_job_id = fine_tuning_job_response.id
  print(f"Fine tuning job ID: {fine_tuning_job_id}")

  status = client.fine_tuning.jobs.retrieve(fine_tuning_job_id)
  while status.status not in ["succeeded", "failed"]:
    time.sleep(1)
    status = client.fine_tuning.jobs.retrieve(fine_tuning_job_id)

  end = time.time()
  print(f"Finetuning time (Not accurate): {end - start:.4f} seconds")

  status = client.fine_tuning.jobs.retrieve(fine_tuning_job_id)
  print(f"Status: {status}")
  print(f"Created at: {status.created_at}")
  print(f"Finished at: {status.finished_at}")
  print(f"Duration: {status.finished_at - status.created_at}")
  print(f"Hyperparams: {status.hyperparameters}")

  fine_tuned_model = status.fine_tuned_model

  y_pred = []
  start = time.time()

  for i in range(len(test_set)):
    completion = client.chat.completions.create(
      model=fine_tuned_model,
      messages=test_set[i]
    )
    y_pred.append(str_to_bool(completion.choices[0].message.content))

  end = time.time()
  print(f"Testing time: {end - start:.4f} seconds")

  print("Y_test:")
  print(y_test)
  print("Y_pred:")
  print(y_pred)

  confusion_matrix_res = confusion_matrix(y_test, y_pred, labels=[False, True])
  print("Confusion Matrix:")
  print(confusion_matrix_res)

  accuracy = accuracy_score(y_test, y_pred)
  print(f"Accuracy: {accuracy}")

  precision = precision_score(y_test, y_pred)
  print(f"Precision: {precision}")

  f1 = f1_score(y_test, y_pred)
  print(f"F1 Score: {f1}")

  recall = recall_score(y_test, y_pred)
  print(f"Recall: {recall}")

  return accuracy, precision, f1, recall

## Let's Go

In [None]:
accuracy_values = [0] * 10
precision_values = [0] * 10
f1_values = [0] * 10
recall_values = [0] * 10

In [None]:
accuracy_values[0], precision_values[0], f1_values[0], recall_values[0] = test_project("Butter.MAS.PythonAPI")

Test project: Butter.MAS.PythonAPI
Fine tuning job ID: ftjob-4Dg1JGruBlQ4p3KxatexyZeB
Finetuning time (Not accurate): 1850.7017 seconds
Status: FineTuningJob(id='ftjob-4Dg1JGruBlQ4p3KxatexyZeB', created_at=1733078102, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZjYG09H', finished_at=1733079942, hyperparameters=Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-RzexZcTTiUsisxki2oT3X9'], seed=34039770, status='succeeded', trained_tokens=955854, training_file='file-2dKWa7AhcZWTV5DbskQFkt', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733078102
Finished at: 1733079942
Duration: 1840
Hyperparams: Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8)
Testing time: 42.2603 seconds
Y_test:
[True, True, True, True, 

In [None]:
accuracy_values[1], precision_values[1], f1_values[1], recall_values[1] = test_project("flask-multi-redis")

Test project: flask-multi-redis
Fine tuning job ID: ftjob-u6vyXFAuZCSuSLgqouKe17GM
Finetuning time (Not accurate): 1829.4719 seconds
Status: FineTuningJob(id='ftjob-u6vyXFAuZCSuSLgqouKe17GM', created_at=1733079995, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZk2TydM', finished_at=1733081815, hyperparameters=Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-Dc2HyvQX9Fm3hZYb6nwJ7m'], seed=830087391, status='succeeded', trained_tokens=931974, training_file='file-5eMPgzutVXxPS7ftqdEN7M', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733079995
Finished at: 1733081815
Duration: 1820
Hyperparams: Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8)
Testing time: 17.4228 seconds
Y_test:
[True, True, True, True, Tr

In [None]:
accuracy_values[2], precision_values[2], f1_values[2], recall_values[2] = test_project("centreon-sdk-python")

Test project: centreon-sdk-python
Fine tuning job ID: ftjob-ew8Lh0ZSfKqP5LJewfYNxYzB
Finetuning time (Not accurate): 2165.0766 seconds
Status: FineTuningJob(id='ftjob-ew8Lh0ZSfKqP5LJewfYNxYzB', created_at=1733082667, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZkoyEAY', finished_at=1733084822, hyperparameters=Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-2GZKX635cHfUzbxF3VUkgH'], seed=747279891, status='succeeded', trained_tokens=924984, training_file='file-XYPPMb7DSRGPa2n1beAhHD', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733082667
Finished at: 1733084822
Duration: 2155
Hyperparams: Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8)
Testing time: 32.8394 seconds
Y_test:
[True, True, True, True, 

In [None]:
accuracy_values[3], precision_values[3], f1_values[3], recall_values[3] = test_project("cloudnetpy")

Test project: cloudnetpy
Fine tuning job ID: ftjob-vw5DnAiZ5NeG6RpHp28CKlpI
Finetuning time (Not accurate): 2224.4830 seconds
Status: FineTuningJob(id='ftjob-vw5DnAiZ5NeG6RpHp28CKlpI', created_at=1733084876, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZlPZh7b', finished_at=1733087091, hyperparameters=Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-MJDJjTbzs2ubomg1h6B2cN'], seed=1951613179, status='succeeded', trained_tokens=731553, training_file='file-EmNLGfxNf3NSTG4d5M9gMr', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733084876
Finished at: 1733087091
Duration: 2215
Hyperparams: Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=1.8)
Testing time: 166.4945 seconds
Y_test:
[False, True, True, True, True, 

In [None]:
accuracy_values[4], precision_values[4], f1_values[4], recall_values[4] = test_project("crom")

Test project: crom
Fine tuning job ID: ftjob-J40zkeN7RF41v4DMaFp7xxr7
Finetuning time (Not accurate): 1770.5959 seconds
Status: FineTuningJob(id='ftjob-J40zkeN7RF41v4DMaFp7xxr7', created_at=1733087268, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZlupLkK', finished_at=1733089030, hyperparameters=Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-4LjU7WC6mqEJaXxsX7Revn'], seed=1290515692, status='succeeded', trained_tokens=883272, training_file='file-Gpbyuowr1DEy3v1GYX8Zaq', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733087268
Finished at: 1733089030
Duration: 1762
Hyperparams: Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8)
Testing time: 63.8474 seconds
Y_test:
[True, False, False, False, True, False

In [None]:
accuracy_values[5], precision_values[5], f1_values[5], recall_values[5] = test_project("easypy")

Test project: easypy
Fine tuning job ID: ftjob-WnfNYDlmzVlscUGNQSAWTaAX
Finetuning time (Not accurate): 2103.1076 seconds
Status: FineTuningJob(id='ftjob-WnfNYDlmzVlscUGNQSAWTaAX', created_at=1733089103, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZmTmWcd', finished_at=1733091197, hyperparameters=Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-JDQsokcAR8Qdo4K1CxQsuF'], seed=703816759, status='succeeded', trained_tokens=865029, training_file='file-GytEmcBM3GayZGmQkQmzg8', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733089103
Finished at: 1733091197
Duration: 2094
Hyperparams: Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=1.8)
Testing time: 104.6540 seconds
Y_test:
[False, False, False, False, False, F

In [None]:
accuracy_values[6], precision_values[6], f1_values[6], recall_values[6] = test_project("eppy")

Test project: eppy
Fine tuning job ID: ftjob-YiVwOhwYEMbdsglZDXHr94YF
Finetuning time (Not accurate): 2517.8165 seconds
Status: FineTuningJob(id='ftjob-YiVwOhwYEMbdsglZDXHr94YF', created_at=1733091312, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZnA6FH4', finished_at=1733093820, hyperparameters=Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-7PdiaqxsYvRuWR79xoVwXk'], seed=157843580, status='succeeded', trained_tokens=730995, training_file='file-4dmXovt7pYuCF5ywKCNXui', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733091312
Finished at: 1733093820
Duration: 2508
Hyperparams: Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=1.8)
Testing time: 131.5819 seconds
Y_test:
[True, True, False, True, False, True, 

In [None]:
accuracy_values[7], precision_values[7], f1_values[7], recall_values[7] = test_project("pykicad")

Test project: pykicad
Fine tuning job ID: ftjob-qiKJ3caQLZP0aeKTV81Qg9Ag
Finetuning time (Not accurate): 2092.6796 seconds
Status: FineTuningJob(id='ftjob-qiKJ3caQLZP0aeKTV81Qg9Ag', created_at=1733093962, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZnk0XJA', finished_at=1733096046, hyperparameters=Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-YBYFzgwFpS1DZWqJ4TZDrZ'], seed=699512230, status='succeeded', trained_tokens=931356, training_file='file-RXbHo7vrtxuWg9MeRN9ZC9', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733093962
Finished at: 1733096046
Duration: 2084
Hyperparams: Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8)
Testing time: 45.3176 seconds
Y_test:
[False, False, False, True, False, Tr

In [None]:
accuracy_values[8], precision_values[8], f1_values[8], recall_values[8] = test_project("reframe")

Test project: reframe
Fine tuning job ID: ftjob-PjdkNOHxi6qWo3zbFZTwecjR
Finetuning time (Not accurate): 2401.8701 seconds
Status: FineTuningJob(id='ftjob-PjdkNOHxi6qWo3zbFZTwecjR', created_at=1733096102, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZoNUgRR', finished_at=1733098495, hyperparameters=Hyperparameters(n_epochs=3, batch_size=2, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-BCoSCnu7yinWHpYWPurqQR'], seed=1159480762, status='succeeded', trained_tokens=565647, training_file='file-WbdJvTLwPz3HawXW9JKxPt', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733096102
Finished at: 1733098495
Duration: 2393
Hyperparams: Hyperparameters(n_epochs=3, batch_size=2, learning_rate_multiplier=1.8)
Testing time: 325.7708 seconds
Y_test:
[False, True, False, False, False, 

In [None]:
accuracy_values[9], precision_values[9], f1_values[9], recall_values[9] = test_project("webssh")

Test project: webssh
Fine tuning job ID: ftjob-fSiFSdGrQOp8APi7udxENgmY
Finetuning time (Not accurate): 2034.3964 seconds
Status: FineTuningJob(id='ftjob-fSiFSdGrQOp8APi7udxENgmY', created_at=1733098830, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::AZozaUZf', finished_at=1733100856, hyperparameters=Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-tI0WpKootnbW2KXQbHxLn75u', result_files=['file-Ao2ASjucmsz8chy3BSkgMb'], seed=1044403182, status='succeeded', trained_tokens=877932, training_file='file-6ZRsU7iL5jZqAmoiUpcYZz', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
Created at: 1733098830
Finished at: 1733100856
Duration: 2026
Hyperparams: Hyperparameters(n_epochs=3, batch_size=4, learning_rate_multiplier=1.8)
Testing time: 64.2903 seconds
Y_test:
[True, False, False, True, False, Fal

In [None]:
avg_accuracy = sum(accuracy_values) / len(accuracy_values)
avg_precision = sum(precision_values) / len(precision_values)
avg_f1 = sum(f1_values) / len(f1_values)
avg_recall = sum(recall_values) / len(recall_values)

print(f"Average Accuracy: {avg_accuracy}")
print(f"Average Precision: {avg_precision}")
print(f"Average F1 Score: {avg_f1}")
print(f"Average Recall: {avg_recall}")

Average Accuracy: 0.7411859727983818
Average Precision: 0.6138565659299831
Average F1 Score: 0.4792946109476574
Average Recall: 0.4313793859176706
