# Detailed article explaination
The detailed code explanation for this article is available at the following link:

https://www.daniweb.com/programming/computer-science/tutorials/542333/how-to-fine-tune-the-openai-gpt-4o-model-the-wait-is-finally-over

For my other articles for Daniweb.com, please see this link:

https://www.daniweb.com/members/1235222/usmanmalik57

## Installing and Importing Required Libraries

In [3]:
!pip install openai
!pip install rouge-score
!pip install --upgrade openpyxl
!pip install pandas openpyxl



In [21]:
import os
import json
import time
import pandas as pd
from rouge_score import rouge_scorer
from sklearn.metrics import accuracy_score
from openai import OpenAI

## Fine-tuning GPT-4o for Text Classification

In [41]:

dataset = pd.read_csv(r"D:\Datasets\Tweets.csv")
dataset.head()

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,570306133677760513,neutral,1.0,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2015-02-24 11:35:52 -0800,,Eastern Time (US & Canada)
1,570301130888122368,positive,0.3486,,0.0,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2015-02-24 11:15:59 -0800,,Pacific Time (US & Canada)
2,570301083672813571,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2015-02-24 11:15:48 -0800,Lets Play,Central Time (US & Canada)
3,570301031407624196,negative,1.0,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2015-02-24 11:15:36 -0800,,Pacific Time (US & Canada)
4,570300817074462722,negative,1.0,Can't Tell,1.0,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2015-02-24 11:14:45 -0800,,Pacific Time (US & Canada)


In [42]:


def preprocess_data(dataset, n, records):

    # Remove rows where 'airline_sentiment' or 'text' are NaN
    dataset = dataset.dropna(subset=['airline_sentiment', 'text'])

    # Remove rows where 'airline_sentiment' or 'text' are empty strings
    dataset = dataset[(dataset['airline_sentiment'].str.strip() != '') & (dataset['text'].str.strip() != '')]

    # Filter the DataFrame for each sentiment
    neutral_df = dataset[dataset['airline_sentiment'] == 'neutral']
    positive_df = dataset[dataset['airline_sentiment'] == 'positive']
    negative_df = dataset[dataset['airline_sentiment'] == 'negative']

    # Select records from Nth index
    neutral_sample = neutral_df[n: n +records]
    positive_sample = positive_df[n: n +records]
    negative_sample = negative_df[n: n +records]

    # Concatenate the samples into one DataFrame
    dataset = pd.concat([neutral_sample, positive_sample, negative_sample])

    # Reset index if needed
    dataset.reset_index(drop=True, inplace=True)

    dataset = dataset[["text", "airline_sentiment"]]

    return dataset


In [76]:
training_data = preprocess_data(dataset, 0, 200)
print("Training data value counts:\n", training_data["airline_sentiment"].value_counts())
print("===========================")
test_data = preprocess_data(dataset, 600, 33)
print("Test data value counts:\n", test_data["airline_sentiment"].value_counts())

Training data value counts:
 airline_sentiment
neutral     200
positive    200
negative    200
Name: count, dtype: int64
Test data value counts:
 airline_sentiment
neutral     33
positive    33
negative    33
Name: count, dtype: int64


In [44]:
# JSON file path
json_file_path = r"D:\Datasets\airline_sentiments.json"

# Function to create the JSON structure for each row
def create_json_structure(row):
    return {
        "messages": [
            {"role": "system", "content": "You are a Twitter sentiment analysis expert who can predict sentiment expressed in the tweets about an airline. You select sentiment value from positive, negative, or neutral."},
            {"role": "user", "content": row['text']},
            {"role": "assistant", "content": row['airline_sentiment']}
        ]
    }

# Convert DataFrame to JSON structures
json_structures = training_data.apply(create_json_structure, axis=1).tolist()

# Write JSON structures to file, each on a new line
with open(json_file_path, 'w') as f:
    for json_structure in json_structures:
        f.write(json.dumps(json_structure) + '\n')

print(f"Data has been written to {json_file_path}")


Data has been written to D:\Datasets\airline_sentiments.json


In [45]:
client = OpenAI(
    # This is the default and can be omitted
    api_key = os.environ.get('OPENAI_API_KEY'),
)


training_file = client.files.create(
  file=open(json_file_path, "rb"),
  purpose="fine-tune"
)

In [46]:
fine_tuning_job_gpt4o = client.fine_tuning.jobs.create(
  training_file=training_file.id, 
  model="gpt-4o-2024-08-06"
)

In [None]:
# List up to 10 events from a fine-tuning job
print(client.fine_tuning.jobs.list_events(fine_tuning_job_id = fine_tuning_job_gpt4o.id,
                                    limit=10))

In [74]:
ft_model_id = client.fine_tuning.jobs.retrieve(fine_tuning_job_gpt4o.id).fine_tuned_model

In [77]:
def find_sentiment(client, model, dataset):
    tweets_list = dataset["text"].tolist()

    all_sentiments = []


    i = 0


    while i < len(tweets_list):

        try:
            tweet = tweets_list[i]
            content = """What is the sentiment expressed in the following tweet about an airline?
            Select sentiment value from positive, negative, or neutral. Return only the sentiment value in small letters.
            tweet: {}""".format(tweet)

            response = client.chat.completions.create(
                model=model,
                temperature=0,
                max_tokens=10,
                messages=[
                    {"role": "user", "content": content}
                ]
            )

            sentiment_value = response.choices[0].message.content

            all_sentiments.append(sentiment_value)
            i += 1
            print(i, sentiment_value)

        except Exception as e:
            print("===================")
            print("Exception occurred:", e)

    accuracy = accuracy_score(all_sentiments, dataset["airline_sentiment"])
    print(f"Accuracy: {accuracy}")
    
find_sentiment(client,ft_model_id, test_data)

1 neutral
2 neutral
3 neutral
4 positive
5 neutral
6 neutral
7 neutral
8 neutral
9 positive
10 positive
11 neutral
12 positive
13 neutral
14 neutral
15 neutral
16 neutral
17 neutral
18 neutral
19 neutral
20 positive
21 neutral
22 neutral
23 neutral
24 neutral
25 neutral
26 neutral
27 neutral
28 neutral
29 neutral
30 neutral
31 neutral
32 neutral
33 neutral
34 positive
35 positive
36 positive
37 positive
38 positive
39 positive
40 neutral
41 positive
42 positive
43 positive
44 positive
45 positive
46 positive
47 positive
48 positive
49 positive
50 positive
51 positive
52 positive
53 positive
54 positive
55 positive
56 positive
57 positive
58 positive
59 positive
60 positive
61 positive
62 positive
63 positive
64 positive
65 positive
66 positive
67 neutral
68 negative
69 negative
70 negative
71 negative
72 negative
73 negative
74 negative
75 negative
76 negative
77 negative
78 negative
79 negative
80 negative
81 negative
82 negative
83 negative
84 negative
85 negative
86 negative
87 nega

## Fine-tuning GPT-4o for Text Summarization

In [24]:
dataset = pd.read_excel(r"D:\Datasets\dataset.xlsx")
dataset = dataset.sample(frac=1)
dataset['summary_length'] = dataset['human_summary'].apply(len)
average_length = dataset['summary_length'].mean()
print(f"Average length of summaries: {average_length:.2f} characters")
print(dataset.shape)
dataset.head()

Average length of summaries: 1168.78 characters
(1000, 11)


Unnamed: 0.1,Unnamed: 0,id,human_summary,publication,author,date,year,month,theme,content,summary_length
800,259,18240,His decision to withdraw comes the same week a...,New York Times,Susanne Craig,2017-02-06,2017.0,2.0,politics,"Vincent Viola, a billionaire Wall Street trade...",945
622,259,18015,President Trump spoke by telephone with the ac...,New York Times,Michael D. Shear and Maggie Haberman,2017-01-27,2017.0,1.0,politics,WASHINGTON — President Trump spoke by telep...,1392
858,259,18307,Among the major brands that used their commerc...,New York Times,Sapna Maheshwari,2017-02-06,2017.0,2.0,sports,While many Super Bowl advertisers chose to be ...,896
172,0,17493,"A year later, the reason for the strike remain...",New York Times,Joseph Goldstein,2017-01-10,2017.0,1.0,lifestyle,"On an overcast Friday morning last January, lo...",1412
996,259,18461,Signing balotelli was not just a way to garner...,New York Times,Rory Smith,2017-02-10,2017.0,2.0,sports,"NICE, France — Rivère accepts the complim...",834


In [28]:
selected_data = dataset.iloc[101:201]

# Function to create the JSON structure for each row
def create_json_structure(row):
    return {
        "messages": [
            {"role": "system", "content": "You are analyzing news articles. Use the provided content to generate a concise summary."},
            {"role": "user", "content": row['content']},
            {"role": "assistant", "content": row['human_summary']}
        ]
    }

# Convert selected DataFrame rows to JSON structures
json_structures = selected_data.apply(create_json_structure, axis=1).tolist()

# JSON file path
json_file_path = r"D:\Datasets\news_summaries.json"

# Write JSON structures to file, each on a new line
with open(json_file_path, 'w') as f:
    for json_structure in json_structures:
        f.write(json.dumps(json_structure) + '\n')

print(f"Data has been written to {json_file_path}")

Data has been written to D:\Datasets\news_summaries.json


In [29]:
training_file = client.files.create(
  file=open(json_file_path, "rb"),
  purpose="fine-tune"
)

In [30]:
fine_tuning_job_gpt4o_ts = client.fine_tuning.jobs.create(
  training_file=training_file.id, 
  model="gpt-4o-2024-08-06"
)

In [None]:
print(client.fine_tuning.jobs.list_events(fine_tuning_job_id = fine_tuning_job_gpt4o_ts.id,
                                    limit=10))

In [37]:
ft_model_id = client.fine_tuning.jobs.retrieve(fine_tuning_job_gpt4o_ts.id).fine_tuned_model

In [38]:
# Function to calculate ROUGE scores
def calculate_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return {key: value.fmeasure for key, value in scores.items()}

In [39]:

%%time

results = []

i = 0

for _, row in dataset[:20].iterrows():
    article = row['content']
    human_summary = row['human_summary']

    i = i + 1
    print(f"Summarizing article {i}.")

    prompt = f"Summarize the following article in 1150 characters. The summary should look like human created:\n\n{article}\n\nSummary:"

    response = client.chat.completions.create(
        model= ft_model_id,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=1150,
        temperature=0.7
    )
    generated_summary = response.choices[0].message.content
    rouge_scores = calculate_rouge(human_summary, generated_summary)

    results.append({
    'article_id': row.id,
    'generated_summary': generated_summary,
    'rouge1': rouge_scores['rouge1'],
    'rouge2': rouge_scores['rouge2'],
    'rougeL': rouge_scores['rougeL']
    })


Summarizing article 1.
Summarizing article 2.
Summarizing article 3.
Summarizing article 4.
Summarizing article 5.
Summarizing article 6.
Summarizing article 7.
Summarizing article 8.
Summarizing article 9.
Summarizing article 10.
Summarizing article 11.
Summarizing article 12.
Summarizing article 13.
Summarizing article 14.
Summarizing article 15.
Summarizing article 16.
Summarizing article 17.
Summarizing article 18.
Summarizing article 19.
Summarizing article 20.
CPU times: total: 625 ms
Wall time: 1min 32s


In [40]:

results_df = pd.DataFrame(results)
mean_values = results_df[["rouge1", "rouge2", "rougeL"]].mean()
print(mean_values)


rouge1    0.579758
rouge2    0.417515
rougeL    0.431266
dtype: float64
