In [1]:
!pip install openai datasets



In [2]:
from datasets import load_dataset

ds = load_dataset("argilla/llama-2-banking-fine-tune",split="train")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
len(ds)

100

In [4]:
ds[0]

{'request': 'I tried to make a transfer but it failed',
 'response-1': " Thank you for choosing the Bank of America for your financial needs! I'm so sorry to hear that your transfer failed. Can you please provide me with more details about the transfer, such as the amount and the account it was being transferred from/to? This will help me better understand the issue and assist you in resolving it as quickly as possible. Additionally, have you tried checking your account balance or recent transactions to see if the transfer has been processed? If you have any further questions or concerns, please don't hesitate to ask. Thank you for trusting the Bank of America with your financial matters!",
 'response-2': " Hello there! Thank you for choosing the Bank of America for your financial needs. I'm so sorry to hear that your transfer failed. Can you please provide me with more details about the transfer, such as the amount and the account it was being transferred from/to? Additionally, do you

In [3]:
ds_formatted = [
    {"messages": [
        {"role": "system", "content": "You are a customer service representative from Bank of America. Please reply customer requests using polite and respectful language."},
        {'role': 'user', 'content': x["request"]},
        {'role': 'assistant', 'content': x["response-1"]}]} for x in ds
]

In [4]:
ds_formatted[0]

{'messages': [{'role': 'system',
   'content': 'You are a customer service representative from Bank of America. Please reply customer requests using polite and respectful language.'},
  {'role': 'user', 'content': 'I tried to make a transfer but it failed'},
  {'role': 'assistant',
   'content': " Thank you for choosing the Bank of America for your financial needs! I'm so sorry to hear that your transfer failed. Can you please provide me with more details about the transfer, such as the amount and the account it was being transferred from/to? This will help me better understand the issue and assist you in resolving it as quickly as possible. Additionally, have you tried checking your account balance or recent transactions to see if the transfer has been processed? If you have any further questions or concerns, please don't hesitate to ask. Thank you for trusting the Bank of America with your financial matters!"}]}

In [4]:
import random
random.shuffle(ds_formatted)

In [5]:
ds_train = ds_formatted[:80]
ds_val = ds_formatted[80:]

In [6]:
import json

with open('train.jsonl', 'w') as f:
    for line in ds_train:
        json.dump(line, f)
        f.write('\n')

with open('val.jsonl', 'w') as f:
    for line in ds_val:
        json.dump(line, f)
        f.write('\n')

In [7]:
ls

[0m[01;34msample_data[0m/  train.jsonl  val.jsonl


In [8]:
!head -n 2 train.jsonl

{"messages": [{"role": "system", "content": "You are a customer service representative from Bank of America. Please reply customer requests using polite and respectful language."}, {"role": "user", "content": "I accidentally made a transaction to the wrong account."}, {"role": "assistant", "content": " Of course, I'd be happy to help you with that! Thank you for trusting the Bank of America with your financial matters. I apologize to hear that you accidentally made a transaction to the wrong account. Don't worry, we're here to help you resolve the issue as quickly and efficiently as possible.\nTo correct the mistake, please provide me with the following information:\n1. The account number of the account you accidentally transferred the funds to.\n2. The amount of the transaction.\n3. The date of the transaction.\nWith this information, we can process a reversal of the transaction and transfer the funds back to the correct account. If you have any further questions or concerns, please f

In [9]:
!head -n 2 val.jsonl

{"messages": [{"role": "system", "content": "You are a customer service representative from Bank of America. Please reply customer requests using polite and respectful language."}, {"role": "user", "content": "What kind of currencies can I use to add money?"}, {"role": "assistant", "content": " Hello there! Thank you for choosing the Bank of America for your financial needs. I'm happy to help you with any questions you may have. To answer your question, the Bank of America accepts a variety of currencies for adding money to your account, including:\n* US Dollars (USD)\nWe also offer the convenience of adding money from other foreign currencies, but please note that there may be fees associated with these transactions. If you have any further questions or concerns, please don't hesitate to ask. Thank you for trusting the Bank of America with your financial needs!"}]}
{"messages": [{"role": "system", "content": "You are a customer service representative from Bank of America. Please reply

# Upload data

In [10]:
import openai

openai.api_key = 'api-key'


In [11]:
pip install openai==0.28



In [12]:
# Upload training data
train = openai.File.create(
  file=open("train.jsonl", "rb"),
  purpose='fine-tune'
)
train

<File file id=file-sHsK5clBEOVhirD4LnTRjhtT at 0x7b9b3d2ee3e0> JSON: {
  "object": "file",
  "id": "file-sHsK5clBEOVhirD4LnTRjhtT",
  "purpose": "fine-tune",
  "filename": "file",
  "bytes": 86930,
  "created_at": 1714741093,
  "status": "processed",
  "status_details": null
}

In [13]:
train_id = train['id']

In [14]:
# Upload validation data
val = openai.File.create(
  file=open("val.jsonl", "rb"),
  purpose='fine-tune'
)
val

<File file id=file-58fMMEfj48RrhefwNnl9RJUV at 0x7b9b3cdb28e0> JSON: {
  "object": "file",
  "id": "file-58fMMEfj48RrhefwNnl9RJUV",
  "purpose": "fine-tune",
  "filename": "file",
  "bytes": 21996,
  "created_at": 1714741100,
  "status": "processed",
  "status_details": null
}

In [15]:
val_id = val['id']

# Fine-tuning

In [26]:
# Create a fine-tuned model
response = openai.FineTuningJob.create(
    training_file=train_id,
    validation_file=val_id,
    model="gpt-3.5-turbo"
    )
response

<FineTuningJob fine_tuning.job id=ftjob-rtWAreEKJwn53L68exykrZN1 at 0x7b9b3cd73f10> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-rtWAreEKJwn53L68exykrZN1",
  "model": "gpt-3.5-turbo-0125",
  "created_at": 1714741819,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-zTbtP7D5muX7kI6oqtsYYww1",
  "result_files": [],
  "status": "validating_files",
  "validation_file": "file-58fMMEfj48RrhefwNnl9RJUV",
  "training_file": "file-sHsK5clBEOVhirD4LnTRjhtT",
  "hyperparameters": {
    "n_epochs": "auto",
    "batch_size": "auto",
    "learning_rate_multiplier": "auto"
  },
  "trained_tokens": null,
  "error": {},
  "user_provided_suffix": null,
  "seed": 850923500,
  "estimated_finish": null,
  "integrations": []
}

In [27]:
job_id = response['id']


In [28]:
# Retrieve the state of a fine-tune
response = openai.FineTuningJob.retrieve(job_id)
response

<FineTuningJob fine_tuning.job id=ftjob-rtWAreEKJwn53L68exykrZN1 at 0x7b9b3cdb24d0> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-rtWAreEKJwn53L68exykrZN1",
  "model": "gpt-3.5-turbo-0125",
  "created_at": 1714741819,
  "finished_at": 1714742540,
  "fine_tuned_model": "ft:gpt-3.5-turbo-0125:personal::9Kn9hbZp",
  "organization_id": "org-zTbtP7D5muX7kI6oqtsYYww1",
  "result_files": [
    "file-qZOCMEie1GgwOVusLuUQFZxU"
  ],
  "status": "succeeded",
  "validation_file": "file-58fMMEfj48RrhefwNnl9RJUV",
  "training_file": "file-sHsK5clBEOVhirD4LnTRjhtT",
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": 1,
    "learning_rate_multiplier": 2
  },
  "trained_tokens": 49281,
  "error": {},
  "user_provided_suffix": null,
  "seed": 850923500,
  "estimated_finish": null,
  "integrations": []
}

In [29]:
# Retrieve the state of a fine-tune
response = openai.FineTuningJob.retrieve(job_id)
response

<FineTuningJob fine_tuning.job id=ftjob-rtWAreEKJwn53L68exykrZN1 at 0x7b9b3c409e90> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-rtWAreEKJwn53L68exykrZN1",
  "model": "gpt-3.5-turbo-0125",
  "created_at": 1714741819,
  "finished_at": 1714742540,
  "fine_tuned_model": "ft:gpt-3.5-turbo-0125:personal::9Kn9hbZp",
  "organization_id": "org-zTbtP7D5muX7kI6oqtsYYww1",
  "result_files": [
    "file-qZOCMEie1GgwOVusLuUQFZxU"
  ],
  "status": "succeeded",
  "validation_file": "file-58fMMEfj48RrhefwNnl9RJUV",
  "training_file": "file-sHsK5clBEOVhirD4LnTRjhtT",
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": 1,
    "learning_rate_multiplier": 2
  },
  "trained_tokens": 49281,
  "error": {},
  "user_provided_suffix": null,
  "seed": 850923500,
  "estimated_finish": null,
  "integrations": []
}

In [30]:
# List up to 10 events from a fine-tuning job
response = openai.FineTuningJob.list_events(id=job_id, limit=10)
response

<OpenAIObject list at 0x7b9b3cd730b0> JSON: {
  "object": "list",
  "data": [
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-35J1d9UIXY41FqDtX4QfW3ef",
      "created_at": 1714742545,
      "level": "info",
      "message": "The job has successfully completed",
      "data": {},
      "type": "message"
    },
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-s35UlyvKbZ1wBX0Blx4L9R1j",
      "created_at": 1714742542,
      "level": "info",
      "message": "New fine-tuned model created: ft:gpt-3.5-turbo-0125:personal::9Kn9hbZp",
      "data": {},
      "type": "message"
    },
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-OFAok1t1zen8jEPKFxidlSMH",
      "created_at": 1714742542,
      "level": "info",
      "message": "Checkpoint created at step 160 with Snapshot ID: ft:gpt-3.5-turbo-0125:personal::9Kn9hh9e:ckpt-step-160",
      "data": {},
      "type": "message"
    },
    {
      "object": "fine_tuning.job.event",
      "i

In [32]:
events = response["data"]
events.reverse()

for event in events:
    print(event["message"])

The job has successfully completed
New fine-tuned model created: ft:gpt-3.5-turbo-0125:personal::9Kn9hbZp
Checkpoint created at step 160 with Snapshot ID: ft:gpt-3.5-turbo-0125:personal::9Kn9hh9e:ckpt-step-160
Checkpoint created at step 80 with Snapshot ID: ft:gpt-3.5-turbo-0125:personal::9Kn9hSs7:ckpt-step-80
Step 240/240: training loss=0.32, validation loss=0.69, full validation loss=0.64
Step 239/240: training loss=0.41
Step 238/240: training loss=0.41
Step 237/240: training loss=0.61
Step 236/240: training loss=0.38
Step 235/240: training loss=0.60


In [33]:
response = openai.FineTuningJob.retrieve(job_id)
response

<FineTuningJob fine_tuning.job id=ftjob-rtWAreEKJwn53L68exykrZN1 at 0x7b9b3c2b5710> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-rtWAreEKJwn53L68exykrZN1",
  "model": "gpt-3.5-turbo-0125",
  "created_at": 1714741819,
  "finished_at": 1714742540,
  "fine_tuned_model": "ft:gpt-3.5-turbo-0125:personal::9Kn9hbZp",
  "organization_id": "org-zTbtP7D5muX7kI6oqtsYYww1",
  "result_files": [
    "file-qZOCMEie1GgwOVusLuUQFZxU"
  ],
  "status": "succeeded",
  "validation_file": "file-58fMMEfj48RrhefwNnl9RJUV",
  "training_file": "file-sHsK5clBEOVhirD4LnTRjhtT",
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": 1,
    "learning_rate_multiplier": 2
  },
  "trained_tokens": 49281,
  "error": {},
  "user_provided_suffix": null,
  "seed": 850923500,
  "estimated_finish": null,
  "integrations": []
}

In [34]:
model_id = response["fine_tuned_model"]

# Inference

In [35]:
test_messages = [
    {"role": "system", "content": "You are a customer service representative from Bank of America. Please reply customer requests using polite and respectful language."},
    {"role": "user", "content": "My card is stolen!"}
  ]

In [36]:
completion = openai.ChatCompletion.create(
  model=model_id,
  messages=test_messages
)


In [37]:
print(completion.choices[0].message)

{
  "role": "assistant",
  "content": " Oh no, I'm so sorry to hear that your card has been stolen! Thank you for trusting the Bank of America to assist you with this issue. Can you please provide me with more details about the theft, such as when and where it occurred, so that I can help you resolve the problem as quickly and securely as possible? Additionally, have you already reported the theft to the local authorities and have you cancelled your card with us? If not, please do so right away to protect your account from further unauthorized activity. Thank you for choosing the Bank of America to help you with this matter."
}


In [38]:
completion1 = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=test_messages
)


In [39]:
print(completion1.choices[0].message)

{
  "role": "assistant",
  "content": "I am sorry to hear that your card has been stolen. I recommend that you contact our customer service immediately at 1-800-432-1000 to report the theft and block your card to prevent any unauthorized transactions. Thank you for bringing this to our attention."
}
