In [43]:
import evaluate
from openai import AzureOpenAI
import os
import pandas as pd
import datasets
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report
import evaluate

client = AzureOpenAI(azure_endpoint='https://oai-cbipm-01.openai.azure.com/',
                     api_key=os.environ["OPENAI_API_KEY"],
                     api_version="2023-12-01-preview")  # 2023-12-01-preview <- highest version number as of 1/12/23

deployment = "Deployment"

In [None]:
# metrics function
def compute_metrics(predictions, references, labels=None, pos_label=1, average="weighted", sample_weight=None, zero_division='warn'):
        f1 = f1_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight
        )
        p = precision_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight,
            zero_division=zero_division
        )
        r = recall_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight,
            zero_division=zero_division
        )
        c = classification_report(
            references, predictions, labels=labels
        )
        print(c)
        return {"f1": float(f1) if f1.size == 1 else f1,
                "precision": float(p) if p.size == 1 else p,
                "recall": float(r) if r.size == 1 else r}

In [26]:
# training sentence selection function
def train_sentence_selection(df, n_neutral, n_positive, n_negative, label):
    # saving column name given PT or MD label
    label = f"{label}_label"
    # randomly selecting 1-2 sentences per label
    neutral_sentences = df[df[label] == "neutral"].sample(n_neutral, replace=False, random_state=42)
    positive_sentences = df[df[label] == "positive"].sample(n_positive, replace=False, random_state=42)
    negative_sentences = df[df[label] == "negative"].sample(n_negative, replace=False, random_state=42)
    all_sentences = pd.concat([neutral_sentences,
                               positive_sentences,
                               negative_sentences], ignore_index = True)
    return all_sentences

In [36]:
train = pd.read_csv("../data/train_PT_MD_labels.csv")
test = pd.read_csv("../data/test_PT_MD_labels.csv")

# randomly selecting 1-2 training sentences per MD label
n_neutral_s = 1
n_positive_s = 1
n_negative_s = 1
train_sentences = train_sentence_selection(train, n_neutral_s, n_positive_s, n_negative_s, "MD")
json_train_sentences = train_sentences["language"].to_json()
json_train_labels = train_sentences["MD_label"].to_json()
# print(train_sentences, json_train_labels)

# converting test sentences to json format
json_test_sentences = test["language"].to_json()

In [37]:
messages = [{"role": "system", "content": "You are a medical doctor."},
            {"role": "user", "content": "As a medical doctor, you write many clinical notes about patients.\n"
                                        "Your task is to analyze the sentiment of a series of sentences you wrote about patients.\n"
                                        "For each sentence, what is your attitude towards the patient you wrote about?\n"
                                        "Please assign a sentiment score of negative, neutral, or positive for each sentence.\n"
                                        "Below are three example sentences in JSON format:\n"
                                        f"{json_train_sentences}"
                                        "Please provide your answer in JSON format."
             },
            {"role": "assistant", "content": f"{json_train_labels}"},
            {"role": "user", "content": f"Complete the same task with each of these sentences:\n{json_test_sentences}"}]

response = client.chat.completions.create(model=deployment, messages=messages, temperature=0, seed=42)

output = response.choices[0].message.content
# predictions = pd.read_json(output, orient="index")
# predictions.columns.values[0] = "predictions"
# predictions["predictions"] = predictions[].map(replace)
print(output)

{"0":"negative","1":"negative","2":"negative","3":"negative","4":"negative","5":"negative","6":"negative","7":"negative","8":"negative","9":"negative","10":"neutral","11":"negative","12":"positive","13":"positive","14":"positive","15":"positive","16":"negative","17":"positive","18":"negative","19":"neutral","20":"negative","21":"neutral","22":"positive","23":"negative","24":"neutral","25":"negative","26":"positive"}


In [38]:
predictions = pd.read_json(output, orient="index")
compute_metrics(predictions[0], test["MD_label"])

              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.75      0.27      0.40        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.63        27
   macro avg       0.68      0.70      0.63        27
weighted avg       0.67      0.63      0.59        27



  predictions = pd.read_json(output, orient="index")


{'f1': 0.5888888888888888,
 'precision': 0.6669973544973545,
 'recall': 0.6296296296296297}