In [39]:
import evaluate
from openai import AzureOpenAI
import os
import pandas as pd
import datasets
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report
from sklearn.model_selection import ParameterGrid
import evaluate

client = AzureOpenAI(azure_endpoint='https://oai-cbipm-01.openai.azure.com/',
                     api_key=os.environ["OPENAI_API_KEY"],
                     api_version="2023-12-01-preview")  # 2023-12-01-preview <- highest version number as of 1/12/23

deployment = "Deployment"

In [40]:
# metrics function
def compute_metrics(predictions, references, labels=None, pos_label=1, average="weighted", sample_weight=None, zero_division='warn'):
        f1 = f1_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight
        )
        p = precision_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight,
            zero_division=zero_division
        )
        r = recall_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight,
            zero_division=zero_division
        )
        c = classification_report(
            references, predictions, labels=labels
        )
        print(c)
        return {"f1": float(f1) if f1.size == 1 else f1,
                "precision": float(p) if p.size == 1 else p,
                "recall": float(r) if r.size == 1 else r}

In [41]:
# training sentence selection function
def train_sentence_selection(df, n_neutral, n_positive, n_negative, label, seed):
    # saving column name given PT or MD label
    label = f"{label}_label"
    # randomly selecting 1-2 sentences per label
    neutral_sentences = df[df[label] == "neutral"].sample(n_neutral, replace=False, random_state=seed)
    positive_sentences = df[df[label] == "positive"].sample(n_positive, replace=False, random_state=seed)
    negative_sentences = df[df[label] == "negative"].sample(n_negative, replace=False, random_state=seed)
    all_sentences = pd.concat([neutral_sentences,
                               positive_sentences,
                               negative_sentences], ignore_index = True)
    return all_sentences

In [42]:
train = pd.read_csv("../data/train_PT_MD_labels.csv")
test = pd.read_csv("../data/test_PT_MD_labels.csv")

# randomly selecting 1-2 training sentences per MD label
n_neutral_s = 1
n_positive_s = 1
n_negative_s = 1
train_sentences = train_sentence_selection(train, n_neutral_s, n_positive_s, n_negative_s, "PT", 42)
json_train_sentences = train_sentences["language"].to_json()
json_train_labels = train_sentences["PT_label"].to_json()
# print(train_sentences, json_train_labels)

# converting test sentences to json format
json_test_sentences = test["language"].to_json()

In [43]:
messages = [{"role": "system", "content": "You are a patient."},
            {"role": "user", "content": "As a patient at a medical center, medical doctors write lots of clinical notes about you.\n"
                                        "Your task is to analyze the sentiment of a series of sentences your doctor wrote about you.\n"
                                        "For each sentence, how do you feel reading this description of you?\n"
                                        "Please assign a sentiment score of negative, neutral, or positive for each sentence.\n"
                                        "Below are three example sentences in JSON format:\n"
                                        f"{json_train_sentences}"
                                        "Please provide your answer in JSON format."},
            {"role": "assistant", "content": f"{json_train_labels}"},
            {"role": "user", "content": f"Complete the same task with each of these sentences:\n{json_test_sentences}"}]

response = client.chat.completions.create(model=deployment, messages=messages, temperature=0, seed=42)

output = response.choices[0].message.content
# predictions = pd.read_json(output, orient="index")
# predictions.columns.values[0] = "predictions"
# predictions["predictions"] = predictions[].map(replace)
print(output)

{"0":"negative","1":"negative","2":"negative","3":"negative","4":"negative","5":"negative","6":"negative","7":"negative","8":"negative","9":"negative","10":"negative","11":"negative","12":"positive","13":"neutral","14":"positive","15":"positive","16":"negative","17":"positive","18":"negative","19":"positive","20":"negative","21":"negative","22":"positive","23":"negative","24":"positive","25":"negative","26":"positive"}


In [44]:
predictions = pd.read_json(output, orient="index")
compute_metrics(predictions[0], test["PT_label"])

              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       1.00      0.12      0.22         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.63        27
   macro avg       0.75      0.59      0.54        27
weighted avg       0.73      0.63      0.56        27



  predictions = pd.read_json(output, orient="index")


{'f1': 0.5646090534979424,
 'precision': 0.7299382716049384,
 'recall': 0.6296296296296297}

In [46]:
# Hyperparameters (for best configuration selection)
params = {
    'seed': [40, 41, 42],
    'n_neutral_sentences': [1, 2, 3, 4],
    'n_positive_sentences': [1, 2],
    'n_negative_sentences': [1, 2, 3, 4]
}

metrics_file = f'PT_context_metrics.csv'
if os.path.isfile(metrics_file):
    f = open(metrics_file, 'a')
else:
    f = open(metrics_file, 'w')
    f.write('seed,n_neutral_sentences,n_positive_sentences,n_negative_sentences,f1,precision,recall\n')

best_model = []
best_f1 = 0.0
best_comb, best_results = None, None
for comb in list(ParameterGrid(params)):
    train = pd.read_csv(f"../data/train_PT_MD_labels_{comb['seed']}.csv")
    test = pd.read_csv(f"../data/test_PT_MD_labels_{comb['seed']}.csv")
    # randomly selecting context sentences in json format
    train_sentences = train_sentence_selection(train,
                                               comb['n_neutral_sentences'],
                                               comb['n_positive_sentences'],
                                               comb['n_negative_sentences'],
                                               "PT",
                                               comb['seed'])
    json_train_sentences = train_sentences["language"].to_json()
    json_train_labels = train_sentences["PT_label"].to_json()

    # converting test sentences to json format
    json_test_sentences = test["language"].to_json()

    # creating context prompt
    messages = [{"role": "system", "content": "You are a patient."},
                {"role": "user", "content": "As a patient at a medical center, medical doctors write lots of clinical notes about you.\n"
                                            "Your task is to analyze the sentiment of a series of sentences your doctor wrote about you.\n"
                                            "For each sentence, how do you feel reading this description of you?\n"
                                            "Please assign a sentiment score of negative, neutral, or positive for each sentence.\n"
                                            "Below are three example sentences in JSON format:\n"
                                            f"{json_train_sentences}"
                                            "Please provide your answer in JSON format."},
                {"role": "assistant", "content": f"{json_train_labels}"},
                {"role": "user", "content": f"Complete the same task with each of these sentences:\n{json_test_sentences}"}]

    # running chat completion
    response = client.chat.completions.create(model=deployment, messages=messages, temperature=0, seed=comb['seed'])
    # saving response to json format
    output = response.choices[0].message.content
    predictions = pd.read_json(output, orient="index")
    # computing metrics
    results = compute_metrics(predictions[0], test["PT_label"])
    # saving results to metrics sheet
    v = [comb['seed'], comb['n_neutral_sentences'], comb['n_positive_sentences'], comb['n_negative_sentences'],
         results['f1'], results['precision'], results['recall']]
    f.write(','.join([str(el) for el in v]) + '\n')

    if results['f1'] > best_f1:
        best_f1 = results['f1']
        best_comb = comb
        best_results = results
    print('-' * 100)
    print('\n\n')

    if best_comb is not None:
        print(f'Best combination of context sentences: {best_comb}')
        print('\n')
        print(f'Best results: {best_results}')
f.close()

  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 40}


Best results: {'f1': 0.5343940683978998, 'precision': 0.7017671266037279, 'recall': 0.5925925925925926}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.52        27
   macro avg       0.37      0.49      0.42        27
weighted avg       0.39      0.52      0.45        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 40}


Best results: {'f1': 0.5343940683978998, 'precision': 0.7017671266037279, 'recall': 0.5925925925925926}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       1.00      0.12      0.22         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.63        27
   macro avg       0.75      0.59      0.54        27
weighted avg       0.73      0.63      0.56        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.5646090534979424, 'precision': 0.7299382716049384, 'recall': 0.6296296296296297}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.45      0.71      0.56         7

    accuracy                           0.52        27
   macro avg       0.34      0.49      0.40        27
weighted avg       0.37      0.52      0.43        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.5646090534979424, 'precision': 0.7299382716049384, 'recall': 0.6296296296296297}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.52        27
   macro avg       0.37      0.49      0.42        27
weighted avg       0.39      0.52      0.45        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.5646090534979424, 'precision': 0.7299382716049384, 'recall': 0.6296296296296297}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       1.00      0.12      0.22         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.63        27
   macro avg       0.75      0.59      0.54        27
weighted avg       0.73      0.63      0.56        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.5646090534979424, 'precision': 0.7299382716049384, 'recall': 0.6296296296296297}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.45      0.71      0.56         7

    accuracy                           0.52        27
   macro avg       0.34      0.49      0.40        27
weighted avg       0.37      0.52      0.43        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.5646090534979424, 'precision': 0.7299382716049384, 'recall': 0.6296296296296297}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.52        27
   macro avg       0.37      0.49      0.42        27
weighted avg       0.39      0.52      0.45        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 1, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.5646090534979424, 'precision': 0.7299382716049384, 'recall': 0.6296296296296297}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.67      0.50      0.57         8
    positive       0.80      0.57      0.67         7

    accuracy                           0.67        27
   macro avg       0.70      0.63      0.65        27
weighted avg       0.68      0.67      0.66        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.36      0.52      0.43        27
weighted avg       0.39      0.56      0.46        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.38      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.57      0.67      0.62        12
     neutral       0.20      0.12      0.15         8
    positive       0.50      0.57      0.53         7

    accuracy                           0.48        27
   macro avg       0.42      0.45      0.43        27
weighted avg       0.44      0.48      0.46        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.67      0.50      0.57         8
    positive       0.80      0.57      0.67         7

    accuracy                           0.67        27
   macro avg       0.70      0.63      0.65        27
weighted avg       0.68      0.67      0.66        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       1.00      0.12      0.22         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.38      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.60      0.38      0.46         8
    positive       0.67      0.57      0.62         7

    accuracy                           0.63        27
   macro avg       0.63      0.59      0.60        27
weighted avg       0.63      0.63      0.61        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.50      0.25      0.33         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.59        27
   macro avg       0.58      0.57      0.56        27
weighted avg       0.58      0.59      0.57        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.67      0.50      0.57         8
    positive       0.80      0.57      0.67         7

    accuracy                           0.67        27
   macro avg       0.70      0.63      0.65        27
weighted avg       0.68      0.67      0.66        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.50      0.12      0.20         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.59        27
   macro avg       0.57      0.56      0.52        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.60      0.38      0.46         8
    positive       0.67      0.57      0.62         7

    accuracy                           0.63        27
   macro avg       0.63      0.59      0.60        27
weighted avg       0.63      0.63      0.61        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.52        27
   macro avg       0.37      0.49      0.42        27
weighted avg       0.39      0.52      0.45        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.36      0.52      0.43        27
weighted avg       0.39      0.56      0.46        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.38      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.59        27
   macro avg       0.38      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.45      0.71      0.56         7

    accuracy                           0.52        27
   macro avg       0.34      0.49      0.40        27
weighted avg       0.37      0.52      0.43        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.56      0.56      0.51        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 1, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6596119929453264, 'precision': 0.682716049382716, 'recall': 0.6666666666666666}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.80      0.50      0.62         8
    positive       0.80      0.57      0.67         7

    accuracy                           0.70        27
   macro avg       0.75      0.66      0.68        27
weighted avg       0.73      0.70      0.69        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.56      0.56      0.51        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.54      0.53      0.49        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.33      0.25      0.29         8
    positive       0.50      0.43      0.46         7

    accuracy                           0.52        27
   macro avg       0.48      0.48      0.47        27
weighted avg       0.50      0.52      0.50        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.67      0.50      0.57         8
    positive       0.80      0.57      0.67         7

    accuracy                           0.67        27
   macro avg       0.70      0.63      0.65        27
weighted avg       0.68      0.67      0.66        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.50      0.12      0.20         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.53      0.53      0.48        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.38      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.60      0.38      0.46         8
    positive       0.67      0.57      0.62         7

    accuracy                           0.63        27
   macro avg       0.63      0.59      0.60        27
weighted avg       0.63      0.63      0.61        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.54      0.53      0.49        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.40      0.25      0.31         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.56        27
   macro avg       0.52      0.52      0.52        27
weighted avg       0.53      0.56      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.60      0.38      0.46         8
    positive       0.67      0.57      0.62         7

    accuracy                           0.63        27
   macro avg       0.63      0.59      0.60        27
weighted avg       0.63      0.63      0.61        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.40      0.25      0.31         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.56        27
   macro avg       0.52      0.52      0.52        27
weighted avg       0.53      0.56      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.63        27
   macro avg       0.73      0.59      0.54        27
weighted avg       0.73      0.63      0.57        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.38      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.59        27
   macro avg       0.38      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.45      0.71      0.56         7

    accuracy                           0.52        27
   macro avg       0.34      0.49      0.40        27
weighted avg       0.37      0.52      0.43        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.56      0.56      0.51        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       1.00      0.12      0.22         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.69      0.53      0.48        27
weighted avg       0.68      0.56      0.50        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.38      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       1.00      0.12      0.22         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.63        27
   macro avg       0.75      0.59      0.54        27
weighted avg       0.73      0.63      0.56        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.45      0.71      0.56         7

    accuracy                           0.52        27
   macro avg       0.34      0.49      0.40        27
weighted avg       0.37      0.52      0.43        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.60      0.86      0.71         7

    accuracy                           0.59        27
   macro avg       0.40      0.56      0.47        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.59        27
   macro avg       0.38      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       1.00      0.12      0.22         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.69      0.53      0.48        27
weighted avg       0.68      0.56      0.50        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.39      0.52      0.45        27
weighted avg       0.42      0.56      0.48        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.60      0.38      0.46         8
    positive       0.67      0.57      0.62         7

    accuracy                           0.63        27
   macro avg       0.63      0.59      0.60        27
weighted avg       0.63      0.63      0.61        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.54      0.53      0.49        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.83      0.67        12
     neutral       0.00      0.00      0.00         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.56        27
   macro avg       0.39      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.67      0.25      0.36         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.63        27
   macro avg       0.63      0.58      0.56        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.54      0.53      0.49        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.50      0.25      0.33         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.59        27
   macro avg       0.58      0.57      0.56        27
weighted avg       0.58      0.59      0.57        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.60      0.38      0.46         8
    positive       0.67      0.57      0.62         7

    accuracy                           0.63        27
   macro avg       0.63      0.59      0.60        27
weighted avg       0.63      0.63      0.61        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.54      0.53      0.49        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.52        27
   macro avg       0.37      0.49      0.42        27
weighted avg       0.40      0.52      0.45        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.67      0.25      0.36         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.63        27
   macro avg       0.63      0.58      0.56        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.83      0.67        12
     neutral       0.00      0.00      0.00         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.56        27
   macro avg       0.39      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       1.00      0.12      0.22         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.71      0.56      0.51        27
weighted avg       0.70      0.59      0.53        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.83      0.67        12
     neutral       0.00      0.00      0.00         8
    positive       0.62      0.71      0.67         7

    accuracy                           0.56        27
   macro avg       0.39      0.52      0.44        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       1.00      0.12      0.22         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.69      0.53      0.48        27
weighted avg       0.68      0.56      0.50        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.33      0.12      0.18         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.56        27
   macro avg       0.50      0.51      0.48        27
weighted avg       0.51      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.00      0.00      0.00         8
    positive       0.45      0.71      0.56         7

    accuracy                           0.52        27
   macro avg       0.34      0.49      0.40        27
weighted avg       0.37      0.52      0.43        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.59      0.83      0.69        12
     neutral       0.00      0.00      0.00         8
    positive       0.60      0.86      0.71         7

    accuracy                           0.59        27
   macro avg       0.40      0.56      0.47        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.59        27
   macro avg       0.38      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       1.00      0.12      0.22         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.69      0.53      0.48        27
weighted avg       0.68      0.56      0.50        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.56      0.56      0.51        27
weighted avg       0.57      0.59      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.61      0.92      0.73        12
     neutral       0.00      0.00      0.00         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.59        27
   macro avg       0.39      0.54      0.45        27
weighted avg       0.42      0.59      0.49        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.00      0.00      0.00         8
    positive       0.42      0.71      0.53         7

    accuracy                           0.52        27
   macro avg       0.34      0.49      0.40        27
weighted avg       0.37      0.52      0.43        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.00      0.00      0.00         8
    positive       0.50      0.71      0.59         7

    accuracy                           0.56        27
   macro avg       0.38      0.52      0.43        27
weighted avg       0.41      0.56      0.47        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.67      0.25      0.36         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.63        27
   macro avg       0.63      0.58      0.56        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.54      0.53      0.49        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.40      0.25      0.31         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.56        27
   macro avg       0.52      0.52      0.52        27
weighted avg       0.53      0.56      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.65      0.92      0.76        12
     neutral       0.67      0.25      0.36         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.63        27
   macro avg       0.63      0.58      0.56        27
weighted avg       0.63      0.63      0.59        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.56      0.75      0.64        12
     neutral       0.50      0.12      0.20         8
    positive       0.56      0.71      0.63         7

    accuracy                           0.56        27
   macro avg       0.54      0.53      0.49        27
weighted avg       0.54      0.56      0.51        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}


  predictions = pd.read_json(output, orient="index")


              precision    recall  f1-score   support

    negative       0.60      0.75      0.67        12
     neutral       0.40      0.25      0.31         8
    positive       0.57      0.57      0.57         7

    accuracy                           0.56        27
   macro avg       0.52      0.52      0.52        27
weighted avg       0.53      0.56      0.54        27

----------------------------------------------------------------------------------------------------



Best combination of context sentences: {'n_negative_sentences': 2, 'n_neutral_sentences': 2, 'n_positive_sentences': 1, 'seed': 42}


Best results: {'f1': 0.6923404394668761, 'precision': 0.7320261437908497, 'recall': 0.7037037037037037}
              precision    recall  f1-score   support

    negative       0.62      0.83      0.71        12
     neutral       0.60      0.38      0.46         8
    positive       0.67      0.57      0.62         7

    accuracy                           0.63        27
   ma

  predictions = pd.read_json(output, orient="index")
