# LLM Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import sys
import pprint

import pandas as pd

# from tqdm import tqdm
from tqdm.notebook import tqdm


# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from metrics import EvaluationMetric
from data_processing import DataProcessing
from text_generation_models import TextGenerationModelFactory
from prediction_properties import PredictionProperties

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [39]:
base_data_path = os.path.join(notebook_dir, '../data')
combine_data_path = os.path.join(base_data_path, 'financial_phrase_bank/combined_generated_fin_phrase_bank')
# X_test_set_path = os.path.join(combine_data_path, 'x_test_set-v16.csv') 
X_test_set_path = os.path.join(combine_data_path, 'ml_classifiers-v10.csv') 
y_sentence_test_set_path = os.path.join(combine_data_path, 'y_sentence_test_df-v5.csv')
y_author_test_set_path = os.path.join(combine_data_path, 'y_author_test_df-v5.csv')

In [40]:
X_test_df = DataProcessing.load_from_file(X_test_set_path, 'csv', sep=',')
# X_test_df.drop(columns=['Unnamed: 0'], inplace=True)
X_test_df.shape

(565, 12)

In [41]:
X_test_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,Author Type,Embedding,Normalized Embeddings,perceptron,sgd_classifier,logistic_regression,ridge_classifier,decision_tree_classifier,random_forest_classifier,gradient_boosting_classifier
0,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,0,1,[ 3.10125723e-02 1.48145273e-01 1.18822167e-02 -4.00886834e-02\n 2.18477882e-02 -7.52243698e-02 -6.43576309e-02 -1.18781418e-01\n -9.42035094e-02 1.69367039e+00 -1.97394520e-01 6.25598105e-03\n 4.40751575e-02 -4.00502793e-02 -4.52028252e-02 -1.13160096e-01\n 8.78665820e-02 1.32680905e+00 -4.84675802e-02 -3.56700621e-03\n 5.54845780e-02 9.94077399e-02 -1.34666905e-01 1.01711877e-01\n 6.37347773e-02 5.76260537e-02 -8.55392031e-03 -5.17340861e-02\n -9.67892632e-02 -4.00202125e-02 -4.66998853e-02 2.48341095e-02\n -1.75986104e-02 2.24689487e-02 4.00095731e-02 -1.50968343e-01\n 3.12316287e-02 8.40115175e-02 6.51817173e-02 -1.11669578e-01\n 4.81054671e-02 9.21993256e-02 1.43066615e-01 -2.96952873e-01\n -6.12954162e-02 -9.03191715e-02 3.97677906e-02 -3.97728458e-02\n -7....,[ 1.4120030e+00 -1.1126934e+00 8.9782916e-02 -6.6743813e-02\n -3.7037823e-01 -6.0530519e-01 -9.2802191e-01 -1.2312902e+00\n -1.7777984e+00 -3.0181044e-01 9.9699152e-01 -2.8869405e-01\n -4.5153311e-01 -2.6786438e-01 -5.6393570e-03 -1.2585411e+00\n 1.6161768e+00 1.3745358e+00 1.0518684e+00 4.4895628e-01\n 5.3809673e-01 7.0892632e-01 -1.8697438e+00 2.5000875e+00\n 4.8316771e-01 -7.1109168e-02 1.5739738e+00 -1.1464823e+00\n -1.6519629e+00 -1.5582638e+00 -8.2016826e-01 3.2399574e-01\n -2.7373379e-01 -8.1674218e-01 -2.3484817e-02 -2.1448696e+00\n 6.6440696e-01 6.2469262e-01 1.1777695e+00 -1.1532152e+00\n 6.7729610e-01 7.1531069e-01 8.3312702e-01 -4.0233145e+00\n -1.2297469e+00 -1.5874187e+00 1.4470621e+00 2.0973889e-02\n -1.5879422e+00 3.4062797e-01 -3.7409711e-01 -4.989...,0,0,0,0,0,0,0
1,"Finnish consulting and engineering group Poyry Plc ( OMX Helsinki : POY ) said on Wednesday ( 1 October ) that it has been awarded a contract by Tanqia Dibba FZC as owner-engineer for the wastewater system of Dibba , Emirate of Fujairah , UAE .",0,1,[ 4.70041996e-03 1.10228069e-01 -4.77573602e-03 -9.56104398e-02\n 5.52534722e-02 -7.00817332e-02 -2.05142405e-02 -1.88917577e-01\n -1.04249775e-01 1.33263934e+00 -2.44876042e-01 6.75175413e-02\n -1.49942748e-02 -4.06556427e-02 -5.41962422e-02 -5.62920608e-02\n -5.34963198e-02 8.25075388e-01 -4.66980301e-02 -1.90773290e-02\n 9.21309367e-02 -4.51208055e-02 -1.37235259e-03 2.78238151e-02\n 5.95768094e-02 7.14695230e-02 -6.53408244e-02 -2.12525483e-02\n 1.03413671e-01 4.48262393e-02 4.36710976e-02 -1.38843171e-02\n 3.21916752e-02 8.28981474e-02 3.51038612e-02 6.20842911e-03\n 1.43408813e-02 3.44430916e-02 -3.06746140e-02 -1.25587946e-02\n 5.69351614e-02 2.96466853e-02 3.29996310e-02 3.74354306e-03\n -6.46679699e-02 -1.41989226e-02 -7.01283365e-02 2.36671865e-02\n 2....,[ 1.09362805e+00 -1.62451851e+00 -1.65100798e-01 -9.56259787e-01\n 4.15408053e-02 -5.33732831e-01 -1.78139552e-01 -2.06649685e+00\n -1.91614807e+00 -1.50415599e+00 4.32611406e-01 4.96168882e-01\n -1.36563015e+00 -2.76857466e-01 -9.52787399e-02 -3.11303347e-01\n -7.22661614e-01 -1.60111678e+00 1.07873595e+00 2.00796470e-01\n 1.14176524e+00 -1.31319058e+00 6.96443990e-02 1.48120904e+00\n 4.19971019e-01 1.25214577e-01 8.07624400e-01 -6.86450183e-01\n 1.40184307e+00 -3.70196342e-01 7.16911912e-01 -2.34501705e-01\n 5.22707343e-01 1.74109451e-02 -9.85290855e-02 4.73820418e-01\n 4.31317925e-01 -1.41181558e-01 -1.94443032e-01 1.55737445e-01\n 8.26632380e-01 -2.50532269e-01 -5.49674511e-01 6.59554422e-01\n -1.27693951e+00 -3.35813284e-01 -1.00587562e-01 7.66250908e-01\n -1....,0,0,0,0,0,0,0
2,Diluted EPS rose to EUR3 .68 from EUR0 .50 .,0,1,[-2.69290894e-01 4.16594967e-02 -1.22851051e-01 -2.70933006e-02\n 3.50714996e-02 -1.15661696e-01 -6.43265843e-02 1.75575197e-01\n -1.25396490e-01 7.68171012e-01 -2.24086478e-01 7.85705000e-02\n -2.09025033e-02 -5.41161001e-02 1.49662405e-01 -1.13877594e-01\n 4.11872976e-02 8.62753034e-01 -1.40215501e-01 -1.27536893e-01\n 4.39678207e-02 -1.11634195e-01 -1.07523814e-01 -1.85717091e-01\n 9.68070105e-02 1.18459016e-02 -2.06287235e-01 -5.30498102e-02\n 5.26940357e-03 6.65820986e-02 2.13034455e-05 -3.96589972e-02\n -1.49358094e-01 -1.68723017e-02 -2.74695046e-02 -7.49292001e-02\n -2.03455035e-02 -7.13056028e-02 9.15624946e-02 7.88197964e-02\n 7.60333985e-02 -6.34257048e-02 1.22492693e-01 1.20451011e-01\n 1.57845110e-01 4.86440361e-02 -2.06761993e-02 -1.56963691e-01\n -1....,[-2.22164416e+00 -2.55009079e+00 -1.97177386e+00 1.41455680e-01\n -2.07319200e-01 -1.16808939e+00 -9.27490950e-01 2.27401471e+00\n -2.20736480e+00 -3.38401103e+00 6.79722607e-01 6.37775719e-01\n -1.45705986e+00 -4.76822168e-01 1.93662560e+00 -1.27049232e+00\n 8.43871653e-01 -1.37766039e+00 -3.41158807e-01 -1.53451908e+00\n 3.48383397e-01 -2.24378777e+00 -1.47482145e+00 -1.46341038e+00\n 9.85830665e-01 -7.20348954e-01 -1.09447253e+00 -1.16633940e+00\n -9.52056646e-02 -6.55586347e-02 -2.55081672e-02 -6.06290877e-01\n -2.38134718e+00 -1.35980129e+00 -1.05573428e+00 -8.77996981e-01\n -4.73473296e-02 -1.77508843e+00 1.55541837e+00 1.36257136e+00\n 1.14964020e+00 -1.68761480e+00 5.74651241e-01 2.47708774e+00\n 1.83672142e+00 6.97480619e-01 5.95839024e-01 -1.35575378e+00\n -2....,0,0,0,0,0,0,0


In [7]:
y_sentence_test_df = DataProcessing.load_from_file(y_sentence_test_set_path, 'csv', sep=',')
# y_sentence_test_df.drop(columns=['Unnamed: 0'], inplace=True)
y_sentence_test_df.head(3)

Unnamed: 0,Sentence Label
0,0
1,0
2,0


In [8]:
y_author_test_df = DataProcessing.load_from_file(y_author_test_set_path, 'csv', sep=',')
# y_sentence_test_df.drop(columns=['Unnamed: 0'], inplace=True)
y_author_test_df.head(3)

Unnamed: 0,Author Type
0,1
1,1
2,1


## Load Prompt

1. Sentence Label ('non_prediction': 0, 'prediction': 1)
    - Zero-Shot
    - Few-Shot
    - Chain-of-Thought
    - Potential
        1. Prediction properties vs without predicton properties. Why? We didn't have annotators go off our properties. But, we have the synthetic based off them. Or, use both or at least both examples.

2. Author ('llm': 0, 'human': 1)
    - Zero-Shot
    - Few-Shot
    - Chain-of-Thought

In [9]:
system_identity_prompt = "You are an expert at identifying specific types of sentences called prediction."
prediction_requirements = PredictionProperties.get_requirements()
sentence_label_task = """Classify the sentence "label" as either a "non-prediction": 0, "prediction": 1."""
sentence_label_format_output = """Respond ONLY with valid JSON in this exact format: {"predicted_sentence_label": 0}. Do NOT reason or provide anything other than {"predicted_sentence_label": 0}. """

sentence_author_task = """Classify the sentence "label" as either a "llm": 0, "human": 1."""
sentence_author_format_output = """Respond ONLY with valid JSON in this exact format: {"predicted_sentence_author": 0}."""

In [10]:
prediction_properties = PredictionProperties.get_prediction_properties()
prediction_properties_base_prompt = f"""{system_identity_prompt} For each prediction, the format is based on: 
    
    {prediction_properties}
    Enforce the {prediction_requirements}.

"""
prediction_properties_base_prompt

'You are an expert at identifying specific types of sentences called prediction. For each prediction, the format is based on: \n\n     A prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:\n\n            1. <p_s>\n                - Defined as: \n                    - Source entity that states the <p>\n                - Characteristics:\n                    - A person with either: a name only, profile name only, geneder only, domain specific title only or any combination of these.\n                    - An associated organization\n                    - Named entity: Person, organization\n                    - Part of speech: Noun\n\n            2. <p_t>\n                - Defined as: \n                    - Target entity that the <p> is about\n                - Characteristics:\n                    - Same and <p_s>\n                    \n            3. <p_d>\n                - Defined as: \n                    - Date when the <p> is made\n 

In [11]:
no_prediction_properties_base_prompt = f"""{system_identity_prompt}.

    Enforce the {prediction_requirements}.

"""
no_prediction_properties_base_prompt

'You are an expert at identifying specific types of sentences called prediction..\n\n    Enforce the requirements of a prediction: \n            1. Usage of synonyms to the word "prediction", such as [\'forecast\', \'projection\', \'estimate\', \'outlook\', \'expectation\', \'anticipation\', \'prophecy\', \'prognosis\', \'guess\', \'speculation\', \'forecasting\', \'projection\', \'foretelling\', \'forecasted outcome\', \'forecast estimate\'].\n            2. Usage of the future verb tense, such as: [\'will\', \'shall\', \'would\', \'going\', \'might\', \'should\', \'could\', \'may\', \'must\', \'can\'].\n            3. Do NOT use past or present tense verbs.\n\n        .\n\n'

In [12]:
sentence_label_prompts = [prediction_properties_base_prompt, no_prediction_properties_base_prompt]
sentence_label_prompts

['You are an expert at identifying specific types of sentences called prediction. For each prediction, the format is based on: \n\n     A prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:\n\n            1. <p_s>\n                - Defined as: \n                    - Source entity that states the <p>\n                - Characteristics:\n                    - A person with either: a name only, profile name only, geneder only, domain specific title only or any combination of these.\n                    - An associated organization\n                    - Named entity: Person, organization\n                    - Part of speech: Noun\n\n            2. <p_t>\n                - Defined as: \n                    - Target entity that the <p> is about\n                - Characteristics:\n                    - Same and <p_s>\n                    \n            3. <p_d>\n                - Defined as: \n                    - Date when the <p> is made\n

## Models

In [13]:
tgmf = TextGenerationModelFactory()

# Option 1: Specific models
# models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'llama-3.3-70b-instruct', 'openai/gpt-oss-20b'])
# models = tgmf.create_instances(['openai/gpt-oss-20b'])

# Option 2: All Groq models
# models = tgmf.create_instances(tgmf.get_groq_model_names())

# Option 3: All NaviGator models
models = tgmf.create_instances(tgmf.get_navigator_model_names())

# Option 4: All available models
# models = tgmf.create_instances()

# Option 5: Mix and match
# custom_models = ['llama-3.1-70b-instruct', 'mistral-small-3.1', 'llama-3.1-8b-instant']
# models = tgmf.create_instances(custom_models)
models



[<text_generation_models.Llama3170BInstructTextGenerationModel at 0x103ee9f90>,
 <text_generation_models.Llama318BInstructTextGenerationModel at 0x152419d90>,
 <text_generation_models.Llama3370BInstructTextGenerationModel at 0x1524584d0>,
 <text_generation_models.Mistral7BInstructTextGenerationModel at 0x1523a9a50>,
 <text_generation_models.MistralSmall31TextGenerationModel at 0x15507d990>,
 <text_generation_models.Codestral22BTextGenerationModel at 0x15507ec50>,
 <text_generation_models.GptOss20TextGenerationModel at 0x15507f6d0>,
 <text_generation_models.GptOss120TextGenerationModel at 0x15507f910>,
 <text_generation_models.Granite338BInstructTextGenerationModel at 0x15508c790>]

In [14]:
import json
import re

def parse_json_response(response, reasoning=False):
    """Parse JSON response from LLM to extract label and reasoning"""
    try:
        # Extract JSON if there's extra text
        json_match = re.search(r'\{.*\}', response, re.DOTALL)
        if json_match:
            data = json.loads(json_match.group())
            if reasoning:
                return data.get('predicted_sentence_label'), data.get('reasoning')
            else:
                return data.get('predicted_sentence_label')  # Return single value, not tuple
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        if reasoning:
            return None, None
        else:
            return None  # Return single None when reasoning=False

In [15]:
def llm_certifier(sentence_to_classify: str, base_prompt: str, model, task, format_output: str):
    
      prompt = f"""{base_prompt}
      
      Sentence to label: '{sentence_to_classify}'

      {task}
      
      {format_output}
      """
      idx = 1
      if idx == 1:
            # print(f"\tPrompt: {prompt}")
            idx = idx + 1
      input_prompt = model.user(prompt)
      raw_text_llm_generation = model.chat_completion([input_prompt])
      # print(f"Raw response: {raw_text_llm_generation}")

      # Parse the JSON response
      label = parse_json_response(raw_text_llm_generation, reasoning=False)
      
      return raw_text_llm_generation, label

In [16]:
# print("======= PROMPT + MODEL -> LABEL and REASONING =======")
print("======= PROMPT + MODEL -> LABEL =======")
# sentence_label_prompts



In [17]:
results = []

for idx, row in tqdm(X_test_df.iterrows(), total=len(X_test_df), desc="Processing"):
    text = row['Base Sentence']
    # print(f"{idx} --- Sentence: {text}")
    for model in models:
        # print(model.__name__())
        raw_response, llm_label = llm_certifier(text, sentence_label_prompts[0], model, sentence_label_task, sentence_label_format_output)
        result = (text, raw_response, llm_label, model.__name__())
        results.append(result)

        if idx < 3:
            # print(f"{idx} --- Sentence: {text}")
            print(f"\n--- Result {idx} ---")
            pprint.pprint(result, width=120)
    
    # print()

Processing:   0%|          | 0/565 [00:00<?, ?it/s]


--- Result 0 ---
('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
 '{"predicted_sentence_label": 0}',
 0,
 'llama-3.1-70b-instruct')

--- Result 0 ---
('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
 '{"predicted_sentence_label": 0}',
 0,
 'llama-3.1-8b-instruct')

--- Result 0 ---
('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
 '{"predicted_sentence_label": 0}',
 0,
 'llama-3.3-70b-instruct')

--- Result 0 ---
('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
 ' {"predicted_sentence_label": 0}',
 0,
 'mistral-7b-instruct')

--- Result 0 ---
('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
 '```json\n{"predicted_sentence_label": 0}\n```',
 0,
 'mistral-small-3.1')

---

In [18]:
results

[('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
  '{"predicted_sentence_label": 0}',
  0,
  'llama-3.1-70b-instruct'),
 ('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
  '{"predicted_sentence_label": 0}',
  0,
  'llama-3.1-8b-instruct'),
 ('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
  '{"predicted_sentence_label": 0}',
  0,
  'llama-3.3-70b-instruct'),
 ('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
  ' {"predicted_sentence_label": 0}',
  0,
  'mistral-7b-instruct'),
 ('Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .',
  '```json\n{"predicted_sentence_label": 0}\n```',
  0,
  'mistral-small-3.1'),
 ('Aspocomp intends to set up a plant to manufacture printed circuit 

In [None]:
# groupby text

In [38]:
results_with_llm_label_df = pd.DataFrame(results, columns=['text', 'raw_response', 'llm_label', 'llm_name'])
results_with_llm_label_df.head(7)
# results_with_llm_label_df

Unnamed: 0,text,raw_response,llm_label,llm_name
0,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,"{""predicted_sentence_label"": 0}",0,llama-3.1-70b-instruct
1,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,"{""predicted_sentence_label"": 0}",0,llama-3.1-8b-instruct
2,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,"{""predicted_sentence_label"": 0}",0,llama-3.3-70b-instruct
3,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,"{""predicted_sentence_label"": 0}",0,mistral-7b-instruct
4,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,"```json\n{""predicted_sentence_label"": 0}\n```",0,mistral-small-3.1
5,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,"{""predicted_sentence_label"": 0}",0,codestral-22b
6,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,"{""predicted_sentence_label"": 0}",0,gpt-oss-20b


In [25]:
# DataProcessing.save_to_file(results_with_llm_label_df, combine_data_path, 'llm_classifiers_with_results', '.csv')

Using file number: 1
Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/financial_phrase_bank/combined_generated_fin_phrase_bank/llm_classifiers_with_results-v1.csv


### Align Test Sentences with Predicted Sentence Label from LLMs

In [42]:
def get_llm_labels(df, model_name):
    filt_llama = (df['llm_name'] == model_name)
    filt_df = df[filt_llama]
    return filt_df['llm_label']

X_test_with_results_df = X_test_df.copy()
for model in models:
    llm_model_name = model.__name__()
    # print(f"Model Name: {llm_model_name}")
    model_labels = get_llm_labels(results_with_llm_label_df, llm_model_name)
    # print(model_labels)
    X_test_with_results_df[llm_model_name] = model_labels.to_numpy().ravel()

X_test_with_results_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,Author Type,Embedding,Normalized Embeddings,perceptron,sgd_classifier,logistic_regression,ridge_classifier,decision_tree_classifier,random_forest_classifier,gradient_boosting_classifier,llama-3.1-70b-instruct,llama-3.1-8b-instruct,llama-3.3-70b-instruct,mistral-7b-instruct,mistral-small-3.1,codestral-22b,gpt-oss-20b,gpt-oss-120b,granite-3.3-8b-instruct
0,Aspocomp intends to set up a plant to manufacture printed circuit boards with an investment of Rs310 crore .,0,1,[ 3.10125723e-02 1.48145273e-01 1.18822167e-02 -4.00886834e-02\n 2.18477882e-02 -7.52243698e-02 -6.43576309e-02 -1.18781418e-01\n -9.42035094e-02 1.69367039e+00 -1.97394520e-01 6.25598105e-03\n 4.40751575e-02 -4.00502793e-02 -4.52028252e-02 -1.13160096e-01\n 8.78665820e-02 1.32680905e+00 -4.84675802e-02 -3.56700621e-03\n 5.54845780e-02 9.94077399e-02 -1.34666905e-01 1.01711877e-01\n 6.37347773e-02 5.76260537e-02 -8.55392031e-03 -5.17340861e-02\n -9.67892632e-02 -4.00202125e-02 -4.66998853e-02 2.48341095e-02\n -1.75986104e-02 2.24689487e-02 4.00095731e-02 -1.50968343e-01\n 3.12316287e-02 8.40115175e-02 6.51817173e-02 -1.11669578e-01\n 4.81054671e-02 9.21993256e-02 1.43066615e-01 -2.96952873e-01\n -6.12954162e-02 -9.03191715e-02 3.97677906e-02 -3.97728458e-02\n -7....,[ 1.4120030e+00 -1.1126934e+00 8.9782916e-02 -6.6743813e-02\n -3.7037823e-01 -6.0530519e-01 -9.2802191e-01 -1.2312902e+00\n -1.7777984e+00 -3.0181044e-01 9.9699152e-01 -2.8869405e-01\n -4.5153311e-01 -2.6786438e-01 -5.6393570e-03 -1.2585411e+00\n 1.6161768e+00 1.3745358e+00 1.0518684e+00 4.4895628e-01\n 5.3809673e-01 7.0892632e-01 -1.8697438e+00 2.5000875e+00\n 4.8316771e-01 -7.1109168e-02 1.5739738e+00 -1.1464823e+00\n -1.6519629e+00 -1.5582638e+00 -8.2016826e-01 3.2399574e-01\n -2.7373379e-01 -8.1674218e-01 -2.3484817e-02 -2.1448696e+00\n 6.6440696e-01 6.2469262e-01 1.1777695e+00 -1.1532152e+00\n 6.7729610e-01 7.1531069e-01 8.3312702e-01 -4.0233145e+00\n -1.2297469e+00 -1.5874187e+00 1.4470621e+00 2.0973889e-02\n -1.5879422e+00 3.4062797e-01 -3.7409711e-01 -4.989...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Finnish consulting and engineering group Poyry Plc ( OMX Helsinki : POY ) said on Wednesday ( 1 October ) that it has been awarded a contract by Tanqia Dibba FZC as owner-engineer for the wastewater system of Dibba , Emirate of Fujairah , UAE .",0,1,[ 4.70041996e-03 1.10228069e-01 -4.77573602e-03 -9.56104398e-02\n 5.52534722e-02 -7.00817332e-02 -2.05142405e-02 -1.88917577e-01\n -1.04249775e-01 1.33263934e+00 -2.44876042e-01 6.75175413e-02\n -1.49942748e-02 -4.06556427e-02 -5.41962422e-02 -5.62920608e-02\n -5.34963198e-02 8.25075388e-01 -4.66980301e-02 -1.90773290e-02\n 9.21309367e-02 -4.51208055e-02 -1.37235259e-03 2.78238151e-02\n 5.95768094e-02 7.14695230e-02 -6.53408244e-02 -2.12525483e-02\n 1.03413671e-01 4.48262393e-02 4.36710976e-02 -1.38843171e-02\n 3.21916752e-02 8.28981474e-02 3.51038612e-02 6.20842911e-03\n 1.43408813e-02 3.44430916e-02 -3.06746140e-02 -1.25587946e-02\n 5.69351614e-02 2.96466853e-02 3.29996310e-02 3.74354306e-03\n -6.46679699e-02 -1.41989226e-02 -7.01283365e-02 2.36671865e-02\n 2....,[ 1.09362805e+00 -1.62451851e+00 -1.65100798e-01 -9.56259787e-01\n 4.15408053e-02 -5.33732831e-01 -1.78139552e-01 -2.06649685e+00\n -1.91614807e+00 -1.50415599e+00 4.32611406e-01 4.96168882e-01\n -1.36563015e+00 -2.76857466e-01 -9.52787399e-02 -3.11303347e-01\n -7.22661614e-01 -1.60111678e+00 1.07873595e+00 2.00796470e-01\n 1.14176524e+00 -1.31319058e+00 6.96443990e-02 1.48120904e+00\n 4.19971019e-01 1.25214577e-01 8.07624400e-01 -6.86450183e-01\n 1.40184307e+00 -3.70196342e-01 7.16911912e-01 -2.34501705e-01\n 5.22707343e-01 1.74109451e-02 -9.85290855e-02 4.73820418e-01\n 4.31317925e-01 -1.41181558e-01 -1.94443032e-01 1.55737445e-01\n 8.26632380e-01 -2.50532269e-01 -5.49674511e-01 6.59554422e-01\n -1.27693951e+00 -3.35813284e-01 -1.00587562e-01 7.66250908e-01\n -1....,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Diluted EPS rose to EUR3 .68 from EUR0 .50 .,0,1,[-2.69290894e-01 4.16594967e-02 -1.22851051e-01 -2.70933006e-02\n 3.50714996e-02 -1.15661696e-01 -6.43265843e-02 1.75575197e-01\n -1.25396490e-01 7.68171012e-01 -2.24086478e-01 7.85705000e-02\n -2.09025033e-02 -5.41161001e-02 1.49662405e-01 -1.13877594e-01\n 4.11872976e-02 8.62753034e-01 -1.40215501e-01 -1.27536893e-01\n 4.39678207e-02 -1.11634195e-01 -1.07523814e-01 -1.85717091e-01\n 9.68070105e-02 1.18459016e-02 -2.06287235e-01 -5.30498102e-02\n 5.26940357e-03 6.65820986e-02 2.13034455e-05 -3.96589972e-02\n -1.49358094e-01 -1.68723017e-02 -2.74695046e-02 -7.49292001e-02\n -2.03455035e-02 -7.13056028e-02 9.15624946e-02 7.88197964e-02\n 7.60333985e-02 -6.34257048e-02 1.22492693e-01 1.20451011e-01\n 1.57845110e-01 4.86440361e-02 -2.06761993e-02 -1.56963691e-01\n -1....,[-2.22164416e+00 -2.55009079e+00 -1.97177386e+00 1.41455680e-01\n -2.07319200e-01 -1.16808939e+00 -9.27490950e-01 2.27401471e+00\n -2.20736480e+00 -3.38401103e+00 6.79722607e-01 6.37775719e-01\n -1.45705986e+00 -4.76822168e-01 1.93662560e+00 -1.27049232e+00\n 8.43871653e-01 -1.37766039e+00 -3.41158807e-01 -1.53451908e+00\n 3.48383397e-01 -2.24378777e+00 -1.47482145e+00 -1.46341038e+00\n 9.85830665e-01 -7.20348954e-01 -1.09447253e+00 -1.16633940e+00\n -9.52056646e-02 -6.55586347e-02 -2.55081672e-02 -6.06290877e-01\n -2.38134718e+00 -1.35980129e+00 -1.05573428e+00 -8.77996981e-01\n -4.73473296e-02 -1.77508843e+00 1.55541837e+00 1.36257136e+00\n 1.14964020e+00 -1.68761480e+00 5.74651241e-01 2.47708774e+00\n 1.83672142e+00 6.97480619e-01 5.95839024e-01 -1.35575378e+00\n -2....,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Save Output

In [44]:
DataProcessing.save_to_file(X_test_with_results_df, combine_data_path, 'sentence_label-all_classifiers_with_results', '.csv')

Using file number: 1
Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/financial_phrase_bank/combined_generated_fin_phrase_bank/sentence_label-all_classifiers_with_results-v1.csv


## Evaluation

In [32]:
print("======= EVALUATION/RESULTS =======")



In [34]:
get_metrics = EvaluationMetric()
get_metrics

<metrics.EvaluationMetric at 0x152418e10>

In [35]:
eval_reports = {}

actual_labels = X_test_with_results_df['Sentence Label'].values
print(len(actual_labels))
for model in models:
    llm_model_name = model.__name__()
    print(f"Actual Label:\t\t{actual_labels}")
    llm_model_predictions = X_test_with_results_df[llm_model_name].values
    print(f"{llm_model_name}:\t\t{len(llm_model_predictions)}")
    eval_report = get_metrics.eval_classification_report(actual_labels, llm_model_predictions)
    eval_reports[llm_model_name] = eval_report

565
Actual Label:		[0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 0
 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 0 0 0
 1 0 0 0 0 1 1 0 1 0 1 0 0 1 1 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 0
 0 0 0 0 0 1 1 1 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1
 0 0 0 1 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0
 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 1 0 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1
 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0
 0 0 0 0 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1
 1 0 1 0 1 1 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 0 1 1 0 0
 0 1 1

In [36]:
eval_reports_df = pd.DataFrame(eval_reports)
eval_reports_df.to_latex()

"\\begin{tabular}{llllllllll}\n\\toprule\n & llama-3.1-70b-instruct & llama-3.1-8b-instruct & llama-3.3-70b-instruct & mistral-7b-instruct & mistral-small-3.1 & codestral-22b & gpt-oss-20b & gpt-oss-120b & granite-3.3-8b-instruct \\\\\n\\midrule\n0 & {'precision': 0.972568578553616, 'recall': 0.9605911330049262, 'f1-score': 0.966542750929368, 'support': 406.0} & {'precision': 0.854978354978355, 'recall': 0.9729064039408867, 'f1-score': 0.9101382488479263, 'support': 406.0} & {'precision': 0.9768041237113402, 'recall': 0.9334975369458128, 'f1-score': 0.9546599496221663, 'support': 406.0} & {'precision': 0.8410041841004184, 'recall': 0.9901477832512315, 'f1-score': 0.9095022624434389, 'support': 406.0} & {'precision': 0.9, 'recall': 0.9975369458128078, 'f1-score': 0.9462616822429907, 'support': 406.0} & {'precision': 0.9533169533169533, 'recall': 0.9556650246305419, 'f1-score': 0.9544895448954489, 'support': 406.0} & {'precision': 0.8640350877192983, 'recall': 0.9704433497536946, 'f1-sco