In [69]:
import evaluate
from openai import AzureOpenAI
import os
import pandas as pd
import datasets
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report
from sklearn.model_selection import ParameterGrid
import evaluate
from itertools import combinations

client = AzureOpenAI(azure_endpoint='https://oai-cbipm-01.openai.azure.com/',
                     api_key=os.environ["OPENAI_API_KEY"],
                     api_version="2023-12-01-preview")  # 2023-12-01-preview <- highest version number as of 1/12/23

deployment = "Deployment"

In [70]:
# metrics function
def compute_metrics(predictions, references, labels=None, pos_label=1, average="weighted", sample_weight=None, zero_division='warn'):
        f1 = f1_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight
        )
        p = precision_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight,
            zero_division=zero_division
        )
        r = recall_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight,
            zero_division=zero_division
        )
        c = classification_report(
            references, predictions, labels=labels
        )
        print(c)
        return {"f1": float(f1) if f1.size == 1 else f1,
                "precision": float(p) if p.size == 1 else p,
                "recall": float(r) if r.size == 1 else r}

In [71]:
# training sentence selection function
def train_sentence_selection(df, n_neutral, n_positive, n_negative, label, seed):
    # saving column name given PT or MD label
    label = f"{label}_label"
    # randomly selecting 1-2 sentences per label
    neutral_sentences = df[df[label] == "neutral"].sample(n_neutral, replace=False, random_state=seed)
    positive_sentences = df[df[label] == "positive"].sample(n_positive, replace=False, random_state=seed)
    negative_sentences = df[df[label] == "negative"].sample(n_negative, replace=False, random_state=seed)
    all_sentences = pd.concat([neutral_sentences,
                               positive_sentences,
                               negative_sentences], ignore_index = True)
    return all_sentences

In [67]:
train = pd.read_csv("../data/train_PT_MD_labels.csv")
test = pd.read_csv("../data/test_PT_MD_labels.csv")

# randomly selecting 1-2 training sentences per MD label
# n_neutral_s = 2
# n_positive_s = 0
# n_negative_s = 0
# train_sentences = train_sentence_selection(train, n_neutral_s, n_positive_s, n_negative_s, "MD", 42)
json_train_sentences = train["language"].to_json()
json_train_labels = train["MD_label"].to_json()
# print(train_sentences, json_train_labels)

# converting test sentences to json format
json_test_sentences = test["language"].to_json()

In [72]:
list(combinations(train["idx"], 2))

[(8, 28),
 (8, 6),
 (8, 0),
 (8, 16),
 (8, 5),
 (8, 18),
 (8, 3),
 (8, 32),
 (8, 10),
 (8, 21),
 (8, 4),
 (28, 6),
 (28, 0),
 (28, 16),
 (28, 5),
 (28, 18),
 (28, 3),
 (28, 32),
 (28, 10),
 (28, 21),
 (28, 4),
 (6, 0),
 (6, 16),
 (6, 5),
 (6, 18),
 (6, 3),
 (6, 32),
 (6, 10),
 (6, 21),
 (6, 4),
 (0, 16),
 (0, 5),
 (0, 18),
 (0, 3),
 (0, 32),
 (0, 10),
 (0, 21),
 (0, 4),
 (16, 5),
 (16, 18),
 (16, 3),
 (16, 32),
 (16, 10),
 (16, 21),
 (16, 4),
 (5, 18),
 (5, 3),
 (5, 32),
 (5, 10),
 (5, 21),
 (5, 4),
 (18, 3),
 (18, 32),
 (18, 10),
 (18, 21),
 (18, 4),
 (3, 32),
 (3, 10),
 (3, 21),
 (3, 4),
 (32, 10),
 (32, 21),
 (32, 4),
 (10, 21),
 (10, 4),
 (21, 4)]

In [62]:
messages = [{"role": "system", "content": "You are a medical doctor."},
            {"role": "user", "content": "As a medical doctor, you write many clinical notes about patients.\n"
                                        "Your task is to analyze the sentiment of a series of sentences you wrote about patients.\n"
                                        "For each sentence, what is your attitude towards the patient you wrote about?\n"
                                        "Please assign a sentiment score of negative, neutral, or positive for each sentence.\n"
                                        "Below are three example sentences in JSON format:\n"
                                        f"{json_train_sentences}"
                                        "Please provide your answer in JSON format."},
            {"role": "assistant", "content": f"{json_train_labels}"},
            {"role": "user", "content": f"Complete the same task with each of these sentences:\n{json_test_sentences}"}]

response = client.chat.completions.create(model=deployment, messages=messages, temperature=0, seed=42)

output = response.choices[0].message.content
# predictions = pd.read_json(output, orient="index")
# predictions.columns.values[0] = "predictions"
# predictions["predictions"] = predictions[].map(replace)
print(output)

{"0":"negative","1":"negative","2":"negative","3":"negative","4":"negative","5":"negative","6":"negative","7":"negative","8":"negative","9":"negative","10":"neutral","11":"negative","12":"positive","13":"positive","14":"positive","15":"positive","16":"negative","17":"positive","18":"negative","19":"neutral","20":"negative","21":"neutral","22":"positive","23":"negative","24":"neutral","25":"negative","26":"positive"}


In [45]:
predictions = pd.read_json(output, orient="index")
compute_metrics(predictions[0], test["MD_label"])

              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.75      0.27      0.40        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.63        27
   macro avg       0.68      0.70      0.63        27
weighted avg       0.67      0.63      0.59        27



  predictions = pd.read_json(output, orient="index")


{'f1': 0.5888888888888888,
 'precision': 0.6669973544973545,
 'recall': 0.6296296296296297}

In [48]:
results = compute_metrics(predictions[0], test["MD_label"])
print(results)

              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.75      0.27      0.40        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.63        27
   macro avg       0.68      0.70      0.63        27
weighted avg       0.67      0.63      0.59        27

{'f1': 0.5888888888888888, 'precision': 0.6669973544973545, 'recall': 0.6296296296296297}


In [None]:
# Hyperparameters (for best configuration selection)
params = {
    'seed': [42],
    'n_neutral_sentences': [1, 2, 3, 4, 5, 6],
    'n_positive_sentences': [0, 1, 2],
    'n_negative_sentences': [0, 1, 2]
}

metrics_file = f'MD_context_metrics.csv'
if os.path.isfile(metrics_file):
    f = open(metrics_file, 'a')
else:
    f = open(metrics_file, 'w')
    f.write('seed,n_neutral_sentences,n_positive_sentences,n_negative_sentences,f1,precision,recall\n')

best_model = []
best_f1 = 0.0
best_comb, best_results = None, None
for comb in list(ParameterGrid(params)):
    train = pd.read_csv(f"../data/train_PT_MD_labels.csv")
    test = pd.read_csv(f"../data/test_PT_MD_labels.csv")
    # randomly selecting context sentences in json format
    train_sentences = train_sentence_selection(train,
                                               comb['n_neutral_sentences'],
                                               comb['n_positive_sentences'],
                                               comb['n_negative_sentences'],
                                               "MD",
                                               comb['seed'])
    json_train_sentences = train_sentences["language"].to_json()
    json_train_labels = train_sentences["MD_label"].to_json()

    # converting test sentences to json format
    json_test_sentences = test["language"].to_json()

    # creating context prompt
    messages = [{"role": "system", "content": "You are a medical doctor."},
                {"role": "user", "content": "As a medical doctor, you write many clinical notes about patients.\n"
                                            "Your task is to analyze the sentiment of a series of sentences you wrote about patients.\n"
                                            "For each sentence, what is your attitude towards the patient you wrote about?\n"
                                            "Please assign a sentiment score of negative, neutral, or positive for each sentence.\n"
                                            "Below are three example sentences in JSON format:\n"
                                            f"{json_train_sentences}"
                                            "Please provide your answer in JSON format."},
                {"role": "assistant", "content": f"{json_train_labels}"},
                {"role": "user", "content": f"Complete the same task with each of these sentences:\n{json_test_sentences}"}]

    # running chat completion
    response = client.chat.completions.create(model=deployment, messages=messages, temperature=0, seed=comb['seed'])
    # saving response to json format
    output = response.choices[0].message.content
    predictions = pd.read_json(output, orient="index")
    # computing metrics
    results = compute_metrics(predictions[0], test["MD_label"])
    # saving results to metrics sheet
    v = [comb['seed'], comb['n_neutral_sentences'], comb['n_positive_sentences'], comb['n_negative_sentences'],
         results['f1'], results['precision'], results['recall']]
    f.write(','.join([str(el) for el in v]) + '\n')

    if results['f1'] > best_f1:
        best_f1 = results['f1']
        best_comb = comb
        best_results = results
        error_analysis = pd.concat([test[["idx", "language", "PT_MD_labels", "MD_label"]], predictions], axis=1)
        error_analysis = error_analysis.rename(columns={0:"pred", "MD_label":"true"})
        mask = error_analysis["pred"] == error_analysis["true"]
        error_analysis = error_analysis[~ mask]
        error_analysis.to_csv('error_analysis.csv', index=False)
    print('-' * 100)
    print('\n\n')

if best_comb is not None:
    print(f'Best combination of context sentences: {best_comb}')
    print('\n')
    print(f'Best results: {best_results}')
f.close()

In [66]:
# Hyperparameters (for best configuration selection)
params = {
    'seed': [42],
    'n_neutral_sentences': [1, 2, 3, 4, 5, 6],
    'n_positive_sentences': [0, 1, 2],
    'n_negative_sentences': [0, 1, 2]
}

metrics_file = f'MD_context_metrics.csv'
if os.path.isfile(metrics_file):
    f = open(metrics_file, 'a')
else:
    f = open(metrics_file, 'w')
    f.write('seed,n_neutral_sentences,n_positive_sentences,n_negative_sentences,f1,precision,recall\n')

best_model = []
best_f1 = 0.0
best_comb, best_results = None, None
for comb in list(ParameterGrid(params)):
    train = pd.read_csv(f"../data/train_PT_MD_labels.csv")
    test = pd.read_csv(f"../data/test_PT_MD_labels.csv")
    # randomly selecting context sentences in json format
    train_sentences = train_sentence_selection(train,
                                               comb['n_neutral_sentences'],
                                               comb['n_positive_sentences'],
                                               comb['n_negative_sentences'],
                                               "MD",
                                               comb['seed'])
    json_train_sentences = train_sentences["language"].to_json()
    json_train_labels = train_sentences["MD_label"].to_json()

    # converting test sentences to json format
    json_test_sentences = test["language"].to_json()

    # creating context prompt
    messages = [{"role": "system", "content": "You are a medical doctor."},
                {"role": "user", "content": "As a medical doctor, you write many clinical notes about patients.\n"
                                            "Your task is to analyze the sentiment of a series of sentences you wrote about patients.\n"
                                            "For each sentence, what is your attitude towards the patient you wrote about?\n"
                                            "Please assign a sentiment score of negative, neutral, or positive for each sentence.\n"
                                            "Below are three example sentences in JSON format:\n"
                                            f"{json_train_sentences}"
                                            "Please provide your answer in JSON format."},
                {"role": "assistant", "content": f"{json_train_labels}"},
                {"role": "user", "content": f"Complete the same task with each of these sentences:\n{json_test_sentences}"}]

    # running chat completion
    response = client.chat.completions.create(model=deployment, messages=messages, temperature=0, seed=comb['seed'])
    # saving response to json format
    output = response.choices[0].message.content
    predictions = pd.read_json(output, orient="index")
    # computing metrics
    results = compute_metrics(predictions[0], test["MD_label"])
    # saving results to metrics sheet
    v = [comb['seed'], comb['n_neutral_sentences'], comb['n_positive_sentences'], comb['n_negative_sentences'],
         results['f1'], results['precision'], results['recall']]
    f.write(','.join([str(el) for el in v]) + '\n')

    if results['f1'] > best_f1:
        best_f1 = results['f1']
        best_comb = comb
        best_results = results
        error_analysis = pd.concat([test[["idx", "language", "PT_MD_labels", "MD_label"]], predictions], axis=1)
        error_analysis = error_analysis.rename(columns={0:"pred", "MD_label":"true"})
        mask = error_analysis["pred"] == error_analysis["true"]
        error_analysis = error_analysis[~ mask]
        error_analysis.to_csv('error_analysis.csv', index=False)
    print('-' * 100)
    print('\n\n')

if best_comb is not None:
    print(f'Best combination of context sentences: {best_comb}')
    print('\n')
    print(f'Best results: {best_results}')
f.close()

  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.00      0.00      0.00        11
    positive       0.50      1.00      0.67         5

    accuracy                           0.52        27
   macro avg       0.34      0.61      0.44        27
weighted avg       0.31      0.52      0.39        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.67      0.18      0.29        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.59        27
   macro avg       0.64      0.67      0.59        27
weighted avg       0.62      0.59      0.53        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.82      0.69        11
     neutral       0.71      0.45      0.56        11
    positive       1.00      1.00      1.00         5

    accuracy                           0.70        27
   macro avg       0.77      0.76      0.75        27
weighted avg       0.72      0.70      0.69        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.67      0.36      0.47        11
    positive       1.00      1.00      1.00         5

    accuracy                           0.67        27
   macro avg       0.74      0.73      0.71        27
weighted avg       0.69      0.67      0.65        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.67      0.36      0.47        11
    positive       1.00      1.00      1.00         5

    accuracy                           0.67        27
   macro avg       0.74      0.73      0.71        27
weighted avg       0.69      0.67      0.65        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.67      0.36      0.47        11
    positive       1.00      1.00      1.00         5

    accuracy                           0.67        27
   macro avg       0.74      0.73      0.71        27
weighted avg       0.69      0.67      0.65        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.50      0.18      0.27        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.59        27
   macro avg       0.62      0.67      0.61        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.67      0.18      0.29        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.59        27
   macro avg       0.64      0.67      0.59        27
weighted avg       0.62      0.59      0.53        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.60      0.27      0.37        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.63        27
   macro avg       0.67      0.70      0.65        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.75      0.27      0.40        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.63        27
   macro avg       0.68      0.70      0.63        27
weighted avg       0.67      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.75      0.27      0.40        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.63        27
   macro avg       0.68      0.70      0.63        27
weighted avg       0.67      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.60      0.27      0.37        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.63        27
   macro avg       0.67      0.70      0.65        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.50      0.82      0.62        11
     neutral       0.00      0.00      0.00        11
    positive       0.56      1.00      0.71         5

    accuracy                           0.52        27
   macro avg       0.35      0.61      0.44        27
weighted avg       0.31      0.52      0.39        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.00      0.00      0.00        11
    positive       0.50      1.00      0.67         5

    accuracy                           0.52        27
   macro avg       0.34      0.61      0.44        27
weighted avg       0.31      0.52      0.39        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.80      0.36      0.50        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.67        27
   macro avg       0.73      0.73      0.69        27
weighted avg       0.71      0.67      0.64        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.33      0.27      0.30        11
    positive       1.00      0.20      0.33         5

    accuracy                           0.48        27
   macro avg       0.62      0.43      0.43        27
weighted avg       0.54      0.48      0.45        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.67      0.36      0.47        11
    positive       0.80      0.80      0.80         5

    accuracy                           0.63        27
   macro avg       0.68      0.66      0.65        27
weighted avg       0.65      0.63      0.61        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.33      0.27      0.30        11
    positive       1.00      0.20      0.33         5

    accuracy                           0.48        27
   macro avg       0.62      0.43      0.43        27
weighted avg       0.54      0.48      0.45        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.40      0.36      0.38        11
    positive       1.00      0.20      0.33         5

    accuracy                           0.52        27
   macro avg       0.65      0.46      0.46        27
weighted avg       0.58      0.52      0.49        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.67      0.18      0.29        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.59        27
   macro avg       0.64      0.67      0.59        27
weighted avg       0.62      0.59      0.53        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.40      0.36      0.38        11
    positive       1.00      0.20      0.33         5

    accuracy                           0.52        27
   macro avg       0.65      0.46      0.46        27
weighted avg       0.58      0.52      0.49        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.60      0.27      0.37        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.63        27
   macro avg       0.67      0.70      0.65        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.60      0.27      0.37        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.63        27
   macro avg       0.67      0.70      0.65        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.75      0.27      0.40        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.63        27
   macro avg       0.68      0.70      0.63        27
weighted avg       0.67      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.60      0.27      0.37        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.63        27
   macro avg       0.67      0.70      0.65        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.50      0.18      0.27        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.59        27
   macro avg       0.62      0.67      0.61        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.00      0.00      0.00        11
    positive       0.50      1.00      0.67         5

    accuracy                           0.52        27
   macro avg       0.34      0.61      0.44        27
weighted avg       0.31      0.52      0.39        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.67      0.18      0.29        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.59        27
   macro avg       0.64      0.67      0.59        27
weighted avg       0.62      0.59      0.53        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.40      0.18      0.25        11
    positive       0.80      0.80      0.80         5

    accuracy                           0.56        27
   macro avg       0.58      0.60      0.56        27
weighted avg       0.53      0.56      0.51        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.00      0.00      0.00        11
    positive       0.50      1.00      0.67         5

    accuracy                           0.52        27
   macro avg       0.34      0.61      0.44        27
weighted avg       0.31      0.52      0.39        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.33      0.27      0.30        11
    positive       1.00      0.20      0.33         5

    accuracy                           0.48        27
   macro avg       0.62      0.43      0.43        27
weighted avg       0.54      0.48      0.45        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.50      0.18      0.27        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.59        27
   macro avg       0.62      0.67      0.61        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.67      0.36      0.47        11
    positive       0.80      0.80      0.80         5

    accuracy                           0.63        27
   macro avg       0.68      0.66      0.65        27
weighted avg       0.65      0.63      0.61        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.40      0.36      0.38        11
    positive       1.00      0.20      0.33         5

    accuracy                           0.52        27
   macro avg       0.65      0.46      0.46        27
weighted avg       0.58      0.52      0.49        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.67      0.36      0.47        11
    positive       1.00      1.00      1.00         5

    accuracy                           0.67        27
   macro avg       0.74      0.73      0.71        27
weighted avg       0.69      0.67      0.65        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.53      0.82      0.64        11
     neutral       0.67      0.18      0.29        11
    positive       0.71      1.00      0.83         5

    accuracy                           0.59        27
   macro avg       0.64      0.67      0.59        27
weighted avg       0.62      0.59      0.53        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.36      0.36      0.36        11
    positive       0.00      0.00      0.00         5

    accuracy                           0.48        27
   macro avg       0.31      0.39      0.34        27
weighted avg       0.38      0.48      0.42        27

----------------------------------------------------------------------------------------------------





  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.57      0.36      0.44        11
    positive       1.00      0.80      0.89         5

    accuracy                           0.63        27
   macro avg       0.71      0.66      0.67        27
weighted avg       0.65      0.63      0.62        27

----------------------------------------------------------------------------------------------------



              precision    recall  f1-score   support

    negative       0.56      0.82      0.67        11
     neutral       0.60      0.27      0.37        11
    positive       0.83      1.00      0.91         5

    accuracy                           0.63        27
   macro avg       0.67      0.70      0.65        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------



Best combination of context se

  predictions = pd.read_json(output, orient="index")
