In [1]:
from openai import OpenAI
import os
import openai
import pickle
import csv
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import pandas as pd
import numpy as np
import json

CWD = os.path.abspath("")  # get jupyter notebook path
path_ = os.path.join(CWD, "batch")
file_path_ratings = os.path.join(path_, "cgpt_ratings.pkl")

## Instruction on Input Files </br>
about training/validation csv files </br>
- ```rated_training.csv``` and ```rated_validation.csv``` correspond to the full annotator-rated sentences </br>
- ```rated_training_more_neg.csv``` contains a subset of samples from ```rated_training.csv``` and ```rated_validation.csv```, that is actually used for fine-tuning (more_neg stands for more critical sentiment sentences); one manually picks rated sentences such that there's a bit more critical in this file, this way the data is more balanced and LLM can see a fair share of what critical sentiment looks like; because otherwise critical sentiment may be too rare to be sufficient for fine-tuning.
- ```rated_validation_more_neg.csv``` will be made based on the above 3 files; this will correspond to the holdout dataset that will be used for validation figure making.

In [2]:
api_key = input()  # Run this cell and then enter your OpenAI api key.

client = OpenAI(api_key=api_key)


system_prompt = """For each in-text citation, the rater should measure the sentiment of the citing research toward the cited research (represented as the character ✪), on a scale of -1 to 1. 
The rater should assign a positive score (+1) to statements depicting the cited research as positive, corroborative, consistent with, similar to, or in common with the citing research.
Conversely, the rater should assign a negative score (-1) to statements depicting the cited research as negative, refuting, inconsistent with, dissimilar to, or different from the citing research.
If the statements are neutral or do not belong to the aforementioned categories, then the rater should assign 0 to the statements. 
When you are given a sentence only answer with the numerical results without explanation. """


user_prompt = "The sentence to analyze is : " 

def get_rating(citation_text):
    
    messages = [ {"role": "system", "content":  
                system_prompt} ] 
    messages.append( 
                {"role": "user", "content": user_prompt + citation_text}, 
            ) 
    try:
        chat_completion = client.chat.completions.create(
            model='PUT_MODEL_NUM',  # Put trained model number here.
            temperature = 0.01,
            messages=messages
        )

    except openai.APIConnectionError as e:
        print("The server could not be reached")
        print(e.__cause__)
        return None
    except openai.RateLimitError as e:
        print("A 429 status code was received; we should back off a bit.")
        print(e)
        return None
    except openai.APIStatusError as e:
        print("Another non-200-range status code was received")
        print(e.status_code)
        print(e.response)
        return None
    response = chat_completion.choices[0].message.content
    
    return response


def transform_rating(ratings):
    avg = np.mean(ratings)
    if avg <= -2 / 5:
        return -1
    elif avg >= 2 / 5:
        return 1
    return 0

In [4]:
def make_json(path_):  # path_ points to data folder
    f1 = os.path.join(path_, "rated_training.csv")
    f2 = os.path.join(path_, "rated_validation.csv")
    # Below have 300 rows, will drop ones in rated_training_more_neg.
    df_valid = pd.concat([pd.read_csv(f1, sep=","), pd.read_csv(f2, sep=",")], ignore_index=True)
    num_ = len(df_valid)
    f_train_more_neg = os.path.join(path_, "rated_training_more_neg.csv")
    df_train = pd.read_csv(f_train_more_neg, sep=",")

    idxs2drop = []
    for index, row in df_train.iterrows():
        tmp = df_valid["sentences"] == row["sentences"]
        idx = tmp[tmp].index
        if len(idx) != 1:
            raise Exception("Something went wrong #1.")
        idxs2drop.append(idx[0])
    df_valid.drop(index=idxs2drop, inplace=True)

    if (len(df_train) + len(df_valid)) != num_:
        raise Exception("Something went wrong #2.")
    df_valid.to_csv(os.path.join(path_, "rated_validation_more_neg.csv"), index=False)  # this is ones that are not in rated_training_more_neg


    for df, suffix in zip([df_train, df_valid], ["training_more_neg", "validation_more_neg"]):

        sentence_list = []
        ratings_list = []
        avg_rating_list = []

        for index, row in df.iterrows():
            sent = row["sentences"]
            ratings = np.array(row.drop("sentences"))
            sent = sent.replace("(|)", "").replace("[]", "")
            sentence_list.append(sent)
            avg_rating_list.append(transform_rating(ratings))
            ratings_list.append(ratings)

        ratings_list = np.array(ratings_list)
        avg_rating_list = np.array(avg_rating_list)

        f = open(os.path.join(path_, f"{suffix}.json"), "w")

        for sent, rate in zip(sentence_list, avg_rating_list):

            big_dict = {}
            list_dict = []
            list_dict.append({"role": "system", "content": system_prompt})
            list_dict.append({"role": "user", "content": user_prompt + sent})
            list_dict.append({"role": "assistant", "content": str(rate)})
            big_dict["messages"] = list_dict

            line = str(json.dumps(big_dict)) + " \n"
            f.write(line)

        f.close()
    return [df_train, df_valid]

In [5]:
df_train, df_valid = make_json(path_)

BELOW IS BATCHING METHOD

In [10]:
print(len(df_train), len(df_valid))

144 156


In [24]:
batch_size = 49999

saved_data = []
list_todo = []
list_sentences = []
df_both = pd.concat([df_train, df_valid], ignore_index=True)
for index, row in df_both.iterrows():
    if index not in saved_data:
        list_todo.append(index)
    list_sentences.append(row["sentences"])


list_of_batch = []
for i in range(100):
    if (len(list_of_batch) + 1) * batch_size < len(list_todo):
        list_of_batch.append(list_todo[len(list_of_batch) * batch_size : (len(list_of_batch) + 1) * batch_size])
    else:
        list_of_batch.append(list_todo[len(list_of_batch) * batch_size :])
        break

In [25]:
print([len(list_of_batch[i]) for i in range(len(list_of_batch))])
# print(len(set(list_of_batch[0]).union(set(list_of_batch[1]))) / (2 * batch_size))  # needed only if we have 2+ batches
print(len(list_sentences))

[300]
300


In [None]:
for batch_todo in range(0, 1):

    tasks = []

    for index_sent in list_of_batch[batch_todo]:

        citation_text = list_sentences[index_sent]

        task = {
            "custom_id": f"task-{batch_todo}-{index_sent}",
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                # "model": "gpt-3.5-turbo-1106",
                "model": "PUT_MODEL_NUM",  # Trained model.
                "temperature": 0.01,
                "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt + citation_text}],
            },
        }
        tasks.append(task)

    file_name = os.path.join(path_, f"sentiment_batch_{batch_todo}.jsonl")  # Name the file.
    with open(file_name, "w") as file:
        for obj in tasks:
            file.write(json.dumps(obj) + "\n")

    batch_file = client.files.create(file=open(file_name, "rb"), purpose="batch")

    print(batch_file)

In [28]:
batch_job = client.batches.create(input_file_id=batch_file.id, endpoint="/v1/chat/completions", completion_window="24h")