In [None]:
%env OPENAI_API_KEY=<PUT_YOUR_API_KEY_HERE>


In [None]:
import os
import pandas as pd
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

houses= ['Stark', 'Lannister', 'Targaryen', 'Baratheon', 'Tyrell']
roles = ['Knight', 'Schemer', 'Magician', 'Assassin', 'Ranger']
events = ["The Battle of Blackwater", "The Battle of the Bastards", "The Destruction of the Iron Throne"]

def gpt35(messages, max_tokens=2048, temperature=0.5, top_p=1, frequency_penalty=0, presence_penalty=0):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        presence_penalty=presence_penalty)
    return response.choices[0].message.content

def prepare_stories(houses, roles, events, repeat=3, output_file="./data/game_of_thrones_stories.csv"):
    df = pd.DataFrame()
    repeat = repeat
    for house in houses:
        for role in roles:
            for event in events:
                   for i in range(repeat):
                        messages = []
                        prompt = f"""Please describe in 140 characters a segment from Game of Thrones where a person as a {role} from {house} participates in the famous {event} event."""
                        messages.append({"role": "system", "content": prompt})
                        story = gpt35(messages)
                        row = {"house": house, "role": role, "event": event, "story": story}
                        row = pd.DataFrame([row])
                        df = pd.concat([df, row], axis=0, ignore_index=True)

    df.to_csv(output_file)

prepare_stories(houses, roles, events)


In [17]:
df = pd.read_csv("./data/game_of_thrones_stories.csv")
df['sub_prompt'] = df['house'] + "," + df['role'] + "," + df['event']
prepared_data = df.loc[:,['sub_prompt','story']]
prepared_data.rename(columns={'sub_prompt':'prompt', 'story':'completion'}, inplace=True)
prepared_data.to_csv('./data/prepared_data.csv',index=False)

import subprocess

subprocess.run('openai tools fine_tunes.prepare_data --file data/prepared_data.csv --quiet'.split())


Analyzing...

- Based on your file extension, your file is formatted as a CSV file
- Your file contains 225 prompt-completion pairs
- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty
- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples.
- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. Se

CompletedProcess(args=['openai', 'tools', 'fine_tunes.prepare_data', '--file', 'data/prepared_data.csv', '--quiet'], returncode=0)

# Upload a training file

In [19]:
client.files.create(
  file=open("./data/prepared_data_prepared.jsonl", "rb"),
  purpose="fine-tune"
)


FileObject(id='file-asb1zV2mq5BohXrEaoHSXxiv', bytes=58173, created_at=1701173679, filename='prepared_data_prepared.jsonl', object='file', purpose='fine-tune', status='uploaded', status_details=None)

# Create a fine-tuned model

In [20]:
from openai import OpenAI
client = OpenAI()

client.fine_tuning.jobs.create(
  training_file="file-asb1zV2mq5BohXrEaoHSXxiv",
  model="davinci-002"
)


FineTuningJob(id='ftjob-c3lQmMXsuPjGQ738qFT0BVXm', created_at=1701173806, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='davinci-002', object='fine_tuning.job', organization_id='org-DBwVg7sysubJfocwfalit1g7', result_files=[], status='validating_files', trained_tokens=None, training_file='file-asb1zV2mq5BohXrEaoHSXxiv', validation_file=None)

# List all fine-tune jobs

In [25]:
client.fine_tuning.jobs.list(limit=10)


SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-c3lQmMXsuPjGQ738qFT0BVXm', created_at=1701173806, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='davinci-002', object='fine_tuning.job', organization_id='org-DBwVg7sysubJfocwfalit1g7', result_files=[], status='running', trained_tokens=None, training_file='file-asb1zV2mq5BohXrEaoHSXxiv', validation_file=None), FineTuningJob(id='ftjob-FQfCf8AdC94DI6Ct4jZaKWkp', created_at=1701172961, error=None, fine_tuned_model='ft:davinci-002:personal::8PrBHlJf', finished_at=1701173318, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='davinci-002', object='fine_tuning.job', organization_id='org-DBwVg7sysubJfocwfalit1g7', result_files=['file-wlOKBSKZbNcn5Nmhv1WStpMV'], status='succeeded', trained_tokens=36816, training_file='file-50wAPxerWbHsKf380S8agt9F', validation_file=None)], object='list', has_mor

# Retrieve the state of a fine-tune

In [4]:
client.fine_tuning.jobs.retrieve("ftjob-c3lQmMXsuPjGQ738qFT0BVXm")


FineTuningJob(id='ftjob-c3lQmMXsuPjGQ738qFT0BVXm', created_at=1701173806, error=None, fine_tuned_model='ft:davinci-002:personal::8PrPuy9v', finished_at=1701174225, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='davinci-002', object='fine_tuning.job', organization_id='org-DBwVg7sysubJfocwfalit1g7', result_files=['file-NK6CMKDrrSNCLvDIxrdnUdUE'], status='succeeded', trained_tokens=36816, training_file='file-asb1zV2mq5BohXrEaoHSXxiv', validation_file=None)

# Use fine-tuned model

In [5]:
def write_a_story(prompt):
    response = client.completions.create(
        model="ft:davinci-002:personal::8PrPuy9v",
        prompt=prompt,
        temperature=0.7,
        max_tokens=2000,
        top_p=1,
        stop=["\n"])
    return response.choices[0].text

prompt = "Targaryen,Soldier,The Battle of the Bastards ->"
story = write_a_story(prompt=prompt)
print(story)


 A Targaryen soldier, wielding a dragon-glass spear, joins the Battle of the Bastards. Amidst chaos, they unleash fire and fury upon their enemies, turning the tide of the battle and proving their loyalty to the Mother of Dragons. #GameOfThrones #BattleOfTheBastards


In [41]:
prompt = "Arryn,Soldier,The Slaughter of the Starks ->"
story = write_a_story(prompt=prompt)
print(story)


 A Soldier from House Arryn joins The Slaughter of the Starks, fighting valiantly for his house, but his loyalty is soon tested as the chaos unfolds. #GameOfThrones


# Analyzing your fine-tuned model's performance

In [42]:
!curl https://api.openai.com/v1/fine_tuning/jobs/ftjob-c3lQmMXsuPjGQ738qFT0BVXm -H "Authorization: Bearer $OPENAI_API_KEY"


14575.22s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


{
  "object": "fine_tuning.job",
  "id": "ftjob-c3lQmMXsuPjGQ738qFT0BVXm",
  "model": "davinci-002",
  "created_at": 1701173806,
  "finished_at": 1701174225,
  "fine_tuned_model": "ft:davinci-002:personal::8PrPuy9v",
  "organization_id": "org-DBwVg7sysubJfocwfalit1g7",
  "result_files": [
    "file-NK6CMKDrrSNCLvDIxrdnUdUE"
  ],
  "status": "succeeded",
  "validation_file": null,
  "training_file": "file-asb1zV2mq5BohXrEaoHSXxiv",
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": 1,
    "learning_rate_multiplier": 2
  },
  "trained_tokens": 36816,
  "error": null
}

In [43]:
!curl https://api.openai.com/v1/files/file-NK6CMKDrrSNCLvDIxrdnUdUE/content -H "Authorization: Bearer $OPENAI_API_KEY" > ./data/model_metrics.csv


15255.29s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 14342    0 14342    0     0   9208      0 --:--:--  0:00:01 --:--:--  9205


# Incremental Training

In [53]:
houses= ['Martell', 'Arryn', 'Greyjoy']
roles = ['Soldier', 'Watchman', 'Wildling']
events = ["Ned Stark's Investigation into Jon Arryn's Death", "The Slaughter of the Starks", "Daenerys Targaryen's Sack of King's Landing"]

new_stories = "./data/game_of_thrones_stories_more.csv"
prepare_stories(houses, roles, events, output_file=new_stories)


In [54]:
df = pd.read_csv(new_stories)
df['sub_prompt'] = df['house'] + "," + df['role'] + "," + df['event']
prepared_data = df.loc[:,['sub_prompt','story']]
prepared_data.rename(columns={'sub_prompt':'prompt', 'story':'completion'}, inplace=True)
new_stories_prepared = './data/prepared_data_more.csv'
prepared_data.to_csv(new_stories_prepared, index=False)

subprocess.run('openai tools fine_tunes.prepare_data --file ./data/prepared_data_more.csv --quiet'.split())


Analyzing...

- Based on your file extension, your file is formatted as a CSV file
- Your file contains 81 prompt-completion pairs. In general, we recommend having at least a few hundred examples. We've found that performance tends to linearly increase for every doubling of the number of examples
- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty
- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset 

CompletedProcess(args=['openai', 'tools', 'fine_tunes.prepare_data', '--file', './data/prepared_data_more.csv', '--quiet'], returncode=0)

In [55]:
client.files.create(
  file=open("./data/prepared_data_more_prepared.jsonl", "rb"),
  purpose="fine-tune"
)


FileObject(id='file-Uc2Zaur7AbifRfNqU76xaBPt', bytes=21143, created_at=1701180109, filename='prepared_data_more_prepared.jsonl', object='file', purpose='fine-tune', status='uploaded', status_details=None)

In [57]:
from openai import OpenAI
from openai.types import fine_tuning
client = OpenAI()

hyper_parameters = {
  "learning_rate_multiplier": 0.2
}

client.fine_tuning.jobs.create(
  training_file="file-Uc2Zaur7AbifRfNqU76xaBPt",
  model="ft:davinci-002:personal::8PrPuy9v",
  hyperparameters=hyper_parameters
)


FineTuningJob(id='ftjob-t2SppqlqEQ5fZ16HFESHUaUP', created_at=1701180231, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='ft:davinci-002:personal::8PrPuy9v', object='fine_tuning.job', organization_id='org-DBwVg7sysubJfocwfalit1g7', result_files=[], status='validating_files', trained_tokens=None, training_file='file-Uc2Zaur7AbifRfNqU76xaBPt', validation_file=None)

In [58]:
fine_tuned = write_a_story("Greyjoy,Watchman,The Slaughter of the Starks ->")
print(fine_tuned)


 In a shocking twist, a Watchman from House Greyjoy joins The Slaughter of the Starks, turning on his own kin to claim vengeance for lost loved ones. #GameOfThrones


In [59]:
def write_a_story_by_stream(prompt):
    response = client.completions.create(
        model="ft:davinci-002:personal::8PrPuy9v",
        prompt=prompt,
        temperature=0.7,
        max_tokens=2000,
        stream=True,
        top_p=1,
        stop=["\n"])
    return response

response = write_a_story_by_stream("Martell,Soldier,Daenerys Targaryen's Sack of King's Landing ->")

for event in response:
    event_text = event.choices[0].text
    print(event_text, end = '')


 A Martell soldier, joins the Sack of King's Landing, wielding a flaming spear, slicing through enemies, seeking revenge for House Martell, but questioning their own loyalty amidst the chaos. #GameOfThrones