In [None]:
import pandas as pd
import os
from openai import OpenAI
from tqdm import tqdm
import time
import random
random.seed(666)
print("load done")

In [None]:
## This code block directly reads training and testing dataset from the github data source


twitter_training = pd.read_json('https://raw.githubusercontent.com/EducationalTestingService/sarcasm/master/twitter/sarcasm_detection_shared_task_twitter_training.jsonl',lines=True)
twitter_testing = pd.read_json('https://raw.githubusercontent.com/EducationalTestingService/sarcasm/master/twitter/sarcasm_detection_shared_task_twitter_testing.jsonl',lines=True)
reddit_training = pd.read_json('https://raw.githubusercontent.com/EducationalTestingService/sarcasm/master/reddit/sarcasm_detection_shared_task_reddit_training.jsonl',lines=True)
reddit_testing = pd.read_json('https://raw.githubusercontent.com/EducationalTestingService/sarcasm/master/reddit/sarcasm_detection_shared_task_reddit_testing.jsonl',lines=True)
combined_training, combined_testing  = pd.concat([reddit_training, twitter_training], ignore_index=True), pd.concat([reddit_testing, twitter_testing], ignore_index=True)
combined_training['full_seq'] = "context: " + combined_training['context'].astype(str) + " response: " + combined_training['response'].astype(str)
combined_testing['full_seq'] = "context: " + combined_testing['context'].astype(str) + " response: " + combined_testing['response'].astype(str)



In [None]:
## Set up the client account and write the prompt function

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

def api_predict(i, model = "gpt-3.5-turbo", temperature = 0, frequency_penalty = 0.0, toc = False, one_shot = False):
    task = "Classify the user's response given context as either 'SARCASM' or 'NOT_SARCASM'. Only output 'SARCASM' or 'NOT_SARCASM' without anything else. "
    if toc:
            task = "Classify the user's response given context as either 'SARCASM' or 'NOT_SARCASM'. Give the explanation of your answer. "
    if one_shot:
        random_num = random_number = random.randint(0, len(combined_training)-1)
        task = "Below is an example of a " + combined_training["label"][random_num] + " response given its context: " + combined_training['full_seq'][random_num] + ". The example is finished. " + task
    chat_completion = client.chat.completions.create(
    temperature = temperature, frequency_penalty = frequency_penalty,
    messages=[
        { "role": "user", "content": 
            task + combined_testing["full_seq"][i],
        }],
    model=model, max_tokens= 4096)
    return chat_completion.choices[0].message.content

In [None]:
## Run the prediction via gpt api for different scenaiors:

def make_predictions(model, toc, one_shot):
    pre_dict = []
    for i in tqdm(range(len(combined_testing))):
        pre_dict.append(api_predict(i, model = model, toc = toc, one_shot = one_shot))
    return pre_dict

# gpt3.5, no TOC, 0-shot
pre_dict_gpt3shot_0 = make_predictions(model = "gpt-3.5-turbo", toc = False, one_shot = False)
sum(pre_dict_gpt3shot_0==combined_testing["label"])/3600
df = pd.DataFrame(pre_dict_gpt3shot_0, columns=['Element'])
df.to_csv('pre_dict_gpt3shot_0.csv', index=False)

# gpt4, no TOC, 0-shot
pre_dict_gpt4shot_0 = make_predictions(model = "gpt-4-turbo", toc = False, one_shot = False)
sum(pre_dict_gpt4shot_0==combined_testing["label"])/3600
df = pd.DataFrame(pre_dict_gpt4shot_0, columns=['Element'])
df.to_csv('pre_dict_gpt4shot_0.csv', index=False)

# gpt3.5, TOC, 0-shot
pre_dict_gpt3shot_0_toc = make_predictions(model = "gpt-3.5-turbo", toc = True, one_shot = False)
process_pre_dict_gpt3shot_0_toc = ["NOT_SARCASM" if "NOT_SARCASM" in answer else "SARCASM" for answer in pre_dict_gpt3shot_0_toc]
print(sum(process_pre_dict_gpt3shot_0_toc==combined_testing["label"])/3600)
df = pd.DataFrame(pre_dict_gpt3shot_0_toc, columns=['Element'])
df.to_csv('pre_dict_gpt3shot_0_toc.csv', index=False)

# gpt3.5, no TOC, 1-shot
pre_dict_gpt3shot_1 = make_predictions(model = "gpt-3.5-turbo", toc = False, one_shot = True)
sum(pre_dict_gpt3shot_1==combined_testing["label"])/3600
df = pd.DataFrame(pre_dict_gpt3shot_1, columns=['Element'])
df.to_csv('pre_dict_gpt3shot_1.csv', index=False)
