In [None]:
import openai
import copy
import pandas as pd
import numpy as np
import os
import json
import time
from helper_functions import openai_prediction

# This code requires an OpenAI API key. Before proceeding, you can create one on the OpenAI platform and add it to the relevant field in the creds.json file we have provided.
with open("creds.json") as js:
    api_key = json.load(js)['OPENAI_API_KEY']
openai.api_key = api_key

In [None]:
test_original = pd.read_csv("data/semeval_test.csv")
test_finetuning = pd.read_csv("data/openai/semeval_gpt3_test.csv") # Use this test set to evaluate fine-tuned models

# Prompts
prompt1="Return the TARGET [Trump/Clinton] and STANCE [Favor/Against/None]. Answer: {TARGET, STANCE}\n\n"
prompt2="This statement may express a STANCE about a TARGET. Return the TARGET [Trump/Clinton] and STANCE [Favor/Against/None]. Answer: {TARGET, STANCE}\n\n"
prompt3="This statement contains a TARGET and a STANCE. The target is a politician and the stance represents the attitude expressed about them. The target options are Trump or Clinton and stance options are Favor, Against or None. Provide the answer in the following format: {TARGET, STANCE}\n\n"

# Prompt engineering using GPT3 Davinci

In [None]:
test_zero = copy.deepcopy(test_original)
test_zero['prompt1'] = prompt1 + test_zero['prompt'] + "\n"
test_zero['prompt2'] = prompt2 + test_zero['prompt'] + "\n"
test_zero['prompt3'] = prompt3 + test_zero['prompt'] + "\n"
test_zero = test_zero.drop(columns='prompt')

In [None]:
_, _ = openai_prediction(
    model = "text-davinci-003",
    test_data = test_zero.rename(columns={"prompt1":"prompt"}),
    n_chunks = 17,
    result_file_name = "semeval_davinci_zero_prompt1")

_, _ = openai_prediction(
    model = "text-davinci-003",
    test_data = test_zero.rename(columns={"prompt2":"prompt"}),
    n_chunks = 17,
    result_file_name = "semeval_davinci_zero_prompt2")

_, _ = openai_prediction(
    model = "text-davinci-003",
    test_data = test_zero.rename(columns={"prompt3":"prompt"}),
    n_chunks = 17,
    result_file_name = "semeval_davinci_zero_prompt3")

# Zero-shot

In [None]:
## Zero-shot with prompt 3
_, _ = openai_prediction(
    model = "text-davinci-003", # "text-ada-001"
    test_data = test_zero.rename(columns={"prompt3":"prompt"}),
    n_chunks = 17,
    result_file_name = "semeval_davinci_zero_prompt3") # "ada"

# Few-shot prediction with 100 Clinton and 100 Trump tweets

In [None]:
train_100 = pd.read_csv("data/semeval_train_100.csv")

for i, eg in enumerate(train_100.iterrows()):
    test_few = copy.deepcopy(test_original)
    test_few['prompt'] = prompt3 + "###\n" + eg[1].iloc[0] + "\n###\n" + test_few['prompt'] + "\n"
    
    _, _ = openai_prediction(
    model = "text-davinci-003", # "text-ada-001"
    test_data = test_few,
    n_chunks = 17,
    result_file_name = f"semeval_davinci_one_eg_{i}") # ada

# Fine-tuning

In [None]:
finetune_10 = openai.FineTune.create(
    training_file="YOUR_FILE_ID",
    model="davinci" # "ada"
)
finetune_10

In [None]:
finetune_100 = openai.FineTune.create(
    training_file="YOUR_FILE_ID",
    model="davinci" # "ada"
)
finetune_100

In [None]:
finetune_all = openai.FineTune.create(
    training_file="YOUR_FILE_ID",
    model="davinci" # "ada"
)
finetune_all

### Prediction with fine-tuned models

In [None]:
_, _ = openai_prediction(
    model = "MODEL10",
    test_data = test_finetuning,
    n_chunks = 20,
    result_file_name = "semeval_davinci_10") # "ada"

In [None]:
_, _ = openai_prediction(
    model = "MODEL100",
    test_data = test_finetuning,
    n_chunks = 20,
    result_file_name = "semeval_davinci_100") # "ada"

In [None]:
_, _ = openai_prediction(
    model = "MODEL_all",
    test_data = test_finetuning,
    n_chunks = 20,
    result_file_name = "semeval_davinci_all") # "ada"