In [None]:
import pandas as pd
import numpy as np
import getpass
import time
import openai
from tqdm import tqdm

In [None]:
openai.api_key = getpass.getpass('Enter your openai key:')

In [None]:
def send_request(prompt:str) -> str:
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo',
        messages=[
            {"role": "system", "content": "Assistant is a large language model trained by OpenAI."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=1024,
        n=1,
        temperature=0.0,
        top_p=1,
        frequency_penalty=0.52,
        presence_penalty=0.5,
        stop=["11."]
    )
    
    return response['choices'][0]['message']['content']

In [None]:
train_df = pd.read_csv("nlp-getting-started/train.csv")
train_df.head()

In [None]:
prediction = []
y_true = []

for i in tqdm(range(len(train_df))):
    text = train_df.text.iloc[i]
    try:
        response = send_request(prompt=f"Predict whether the following tweet is about a real disaster or not. If yes return 1 else return 0. Write nothing else. : {text}")
    except Exception as e:
        print('Error: ', e)
        time.sleep(5)
        continue
    try:
        prediction.append(int(response[-1]))
        y_true.append(train_df.target.iloc[i])
    except Exception as e:
        print('Error: ', e)

In [None]:
prediction_df = pd.DataFrame({"target": y_true, "prediction": prediction})

prediction_df.to_csv("prediction_chatgpt_3.csv",index=False)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import numpy as np

sns.set(style='white')
cm = confusion_matrix(prediction_df['target'], prediction_df['prediction'], normalize='all', labels=[0, 1])
#apply rounding
cm = np.around(cm, 2)

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(np.eye(2), annot=cm, fmt='g', annot_kws={'size': 50},
            cmap=sns.color_palette(['tomato', 'palegreen'], as_cmap=True), cbar=False,
            yticklabels=['True', 'False'], xticklabels=['True', 'False'], ax=ax)
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
ax.tick_params(labelsize=20, length=0)

ax.set_title('Confusion Matrix for GPT-3.5', size=24, pad=20)
ax.set_xlabel('Predicted Values', size=20)
ax.set_ylabel('Actual Values', size=20)

additional_texts = ['(True Positive)', '(False Negative)', '(False Positive)', '(True Negative)']
for text_elt, additional_text in zip(ax.texts, additional_texts):
    ax.text(*text_elt.get_position(), '\n' + additional_text, color=text_elt.get_color(),
            ha='center', va='top', size=24, weight='bold')
plt.tight_layout()
plt.savefig('gpt35.png', bbox_inches='tight')
plt.show()