# LLM Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import re
import sys
import json
import pprint

import pandas as pd

# from tqdm import tqdm
from tqdm.notebook import tqdm


# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from metrics import EvaluationMetric
from data_processing import DataProcessing
from text_generation_models import TextGenerationModelFactory
from prediction_properties import PredictionProperties

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [4]:
base_data_path = os.path.join(notebook_dir, '../data')
combine_data_path = os.path.join(base_data_path, 'combined_datasets')
# X_test_set_path = os.path.join(combine_data_path, 'x_test_set-v16.csv') 
X_test_set_path = os.path.join(combine_data_path, 'ml_classifiers-v1.csv') 
y_sentence_test_set_path = os.path.join(combine_data_path, 'y_sentence_test_df-v1.csv')
# y_author_test_set_path = os.path.join(combine_data_path, 'y_author_test_df-v1.csv')

In [5]:
X_test_df = DataProcessing.load_from_file(X_test_set_path, 'csv', sep=',')
# X_test_df.drop(columns=['Unnamed: 0'], inplace=True)
X_test_df.shape

(1846, 12)

In [6]:
X_test_df.head(3)

Unnamed: 0,Base Sentence,Sentence Label,Author Type,Embedding,Normalized Embeddings,perceptron,sgd_classifier,logistic_regression,ridge_classifier,decision_tree_classifier,random_forest_classifier,gradient_boosting_classifier
0,"PARIS — President Emmanuel Macron declared his candidacy for a second five-year term in the presidential election next month, formalizing his decision with a low-key letter in several newspapers that exhorted the French to let him guide “this beautiful and collective adventure that is called France.”",1,1,[ 5.66046238e-02 1.04988851e-01 -3.34139578e-02 1.98167879e-02\n 1.52485088e-01 -6.53142408e-02 -1.45529425e-02 -6.41713664e-02\n -3.75037752e-02 2.14759326e+00 -2.14621708e-01 2.73683928e-02\n 2.88958307e-02 -1.24519587e-01 -6.89217523e-02 -4.13673334e-02\n -5.35880737e-02 7.76791871e-01 -4.43464518e-02 1.01134705e-03\n 1.09494086e-02 -4.75250259e-02 -2.06354335e-02 -6.48403764e-02\n 1.07812867e-01 9.46720596e-03 -1.09430917e-01 -4.65166196e-03\n -8.32740441e-02 5.66849113e-03 -2.98308004e-02 6.16043173e-02\n 2.14268006e-02 5.82768060e-02 6.74477890e-02 -1.06455544e-02\n -7.22951535e-03 -8.13032016e-02 -8.00553635e-02 -4.17012870e-02\n 1.55045995e-02 5.84505014e-02 2.86313556e-02 -1.45766467e-01\n 2.51089260e-02 -3.74014415e-02 -2.69140229e-02 3.26086022e-02\n 4....,[ 9.95389938e-01 -8.30533266e-01 -1.43044465e-03 8.73336315e-01\n 1.09862113e+00 -4.87613559e-01 -1.22331895e-01 -3.02105606e-01\n -6.34055793e-01 3.96464318e-01 1.20431840e-01 1.43495783e-01\n -6.45723879e-01 -1.23308265e+00 1.21950001e-01 9.57266837e-02\n -3.60498607e-01 -1.11962736e+00 1.05052781e+00 5.26727080e-01\n 1.28631011e-01 -6.31512940e-01 -1.30328745e-01 -2.20445901e-01\n 1.18003213e+00 -3.56904298e-01 -9.83796865e-02 -1.43321365e-01\n -1.27634919e+00 -1.40238732e-01 -2.59166718e-01 5.60881197e-01\n 5.56570828e-01 -5.19165695e-02 -8.78698155e-02 4.34541374e-01\n -1.03092425e-01 -1.60028136e+00 -8.79279971e-01 -1.84806854e-01\n 1.09652579e-01 -9.91694033e-02 -2.68077195e-01 -1.27014589e+00\n 7.04476759e-02 -8.28510642e-01 9.05415952e-01 7.40090787e-01\n 3....,0,0,1,0,0,1,1
1,"This time, the plot — about a ray gun that turns humans into monsters, and vice versa — seems to acknowledge the need to goose characters out of their inertia.",0,1,[-2.23119743e-02 6.97671250e-02 -9.84913930e-02 3.75865086e-04\n -3.14215869e-02 9.29637328e-02 -2.15256251e-02 1.64190568e-02\n 3.75561090e-03 2.09514165e+00 -9.81769562e-02 -2.71825790e-02\n 6.37708604e-02 2.52576079e-02 -1.64901182e-01 -1.31515667e-01\n -8.57630968e-02 1.00542879e+00 -1.91387057e-01 -1.72457062e-02\n -1.96998157e-02 1.58039983e-02 -8.47747996e-02 -5.93274459e-02\n -3.87448259e-02 2.83081476e-02 -6.39593303e-02 -3.22961658e-02\n -1.75291598e-02 -5.21216244e-02 -5.90514541e-02 8.25655013e-02\n -1.87435567e-01 1.76751390e-01 1.82304636e-01 -5.84071316e-02\n 6.34762719e-02 7.96191171e-02 -4.05625440e-02 -6.58209398e-02\n 5.19772992e-02 1.05586648e-02 -6.87460601e-02 -1.15110271e-01\n 8.34729597e-02 5.28117083e-03 -7.90077299e-02 3.89454179e-02\n -6....,[ 1.02042906e-01 -1.25779295e+00 -8.07256043e-01 5.91519654e-01\n -1.25950062e+00 1.64591324e+00 -2.32750431e-01 5.62637687e-01\n -5.82166910e-02 2.33458817e-01 1.34504688e+00 -5.37074924e-01\n -1.18046746e-01 9.42714691e-01 -8.83851290e-01 -1.40953076e+00\n -8.80104840e-01 -4.82799970e-02 -8.90786171e-01 2.49257118e-01\n -3.30653459e-01 1.78774640e-01 -1.01988363e+00 -1.42349273e-01\n -1.00427949e+00 -8.71060789e-02 4.76414889e-01 -5.45713067e-01\n -3.79090428e-01 -7.61182785e-01 -7.15413988e-01 8.47321272e-01\n -2.47394013e+00 1.62751734e+00 1.49586022e+00 -2.60521144e-01\n 8.91075373e-01 8.00682306e-01 -3.06060821e-01 -5.04338622e-01\n 6.78358138e-01 -8.19013655e-01 -1.57371426e+00 -8.22251201e-01\n 9.33124840e-01 -1.94198340e-01 1.62035823e-01 8.18059087e-01\n -1....,0,0,0,0,0,1,1
2,"In his first weeks as mayor, that challenge has risen to meet him.",0,1,[ 5.81765212e-02 2.07019195e-01 -7.69932643e-02 -6.81760013e-02\n 1.22693665e-01 -1.35694534e-01 -8.19419175e-02 -3.39005925e-02\n 5.66506805e-03 2.72540665e+00 -1.78715482e-01 2.04635300e-02\n 4.16096002e-02 -2.71960557e-03 -1.61515608e-01 1.82891320e-02\n 1.80162247e-02 7.59893358e-01 -1.08826131e-01 1.53591344e-02\n 6.34927768e-03 -7.43348673e-02 -4.02560048e-02 -7.23461360e-02\n 7.84059986e-02 9.49361399e-02 -9.24548283e-02 1.06267445e-02\n -1.70880035e-02 -8.75466838e-02 -6.63756654e-02 1.13582596e-01\n -7.43332654e-02 -5.63022820e-03 1.33150846e-01 -1.48050874e-01\n 4.73999567e-02 -3.72759961e-02 5.08240648e-02 -1.01549730e-01\n 2.34233841e-04 -1.08329961e-02 4.20196690e-02 -1.49904892e-01\n -5.08043952e-02 7.94086978e-02 -1.34930000e-01 -6.85446570e-03\n -6....,[ 1.0131841 0.4071531 -0.54105407 -0.4022119 0.7166242 -1.436313\n -1.1894954 0.02270282 -0.03156724 2.1921527 0.498047 0.05735163\n -0.45335802 0.5362927 -0.8483727 1.0918443 0.7958655 -1.1988105\n 0.19923028 0.7447842 0.05969716 -0.9745417 -0.4024483 -0.32677308\n 0.741749 0.8669924 0.11621065 0.07907018 -0.37306973 -1.1418184\n -0.8297732 1.271178 -0.83287007 -0.95782936 0.81809187 -1.5650845\n 0.66503227 -0.9433948 1.0203716 -0.9776657 -0.1284527 -1.1405437\n -0.08856661 -1.3306093 -1.051625 0.90742135 -0.63597816 0.2545358\n -0.35334235 -1.1255441 -0.09057999 0.48243684 0.25384033 0.21164928\n 0.3576934 1.7142646 -0.36263907 1.1403483 1.0692748 -0.89533913\n -0.03675979 0.46403384 0.37106022 0.396437 0.4069825 0...,0,0,0,0,1,0,0


In [7]:
y_sentence_test_df = DataProcessing.load_from_file(y_sentence_test_set_path, 'csv', sep=',')
# y_sentence_test_df.drop(columns=['Unnamed: 0'], inplace=True)
y_sentence_test_df.head(3)

Unnamed: 0,Sentence Label
0,1
1,0
2,0


In [8]:
# y_author_test_df = DataProcessing.load_from_file(y_author_test_set_path, 'csv', sep=',')
# # y_sentence_test_df.drop(columns=['Unnamed: 0'], inplace=True)
# y_author_test_df.head(3)z

## Load Prompt

1. Sentence Label ('non_prediction': 0, 'prediction': 1)
    - Zero-Shot
    - Few-Shot
    - Chain-of-Thought
    - Potential
        1. Prediction properties vs without predicton properties. Why? We didn't have annotators go off our properties. But, we have the synthetic based off them. Or, use both or at least both examples.

2. Author ('llm': 0, 'human': 1)
    - Zero-Shot
    - Few-Shot
    - Chain-of-Thought

In [9]:
system_identity_prompt = "You are an expert at identifying specific types of sentences called prediction."
prediction_requirements = PredictionProperties.get_requirements()
sentence_label_task = """Classify the sentence "label" as either a "non-prediction": 0, "prediction": 1."""
sentence_label_format_output = """Respond ONLY with valid JSON in this exact format: {"predicted_sentence_label": 0}. Do NOT reason or provide anything other than {"predicted_sentence_label": 0}. """

sentence_author_task = """Classify the sentence "label" as either a "llm": 0, "human": 1."""
sentence_author_format_output = """Respond ONLY with valid JSON in this exact format: {"predicted_sentence_author": 0}."""

In [10]:
prediction_properties = PredictionProperties.get_prediction_properties()
prediction_properties_base_prompt = f"""{system_identity_prompt} For each prediction, the format is based on: 
    
    {prediction_properties}
    Enforce the {prediction_requirements}.

"""
prediction_properties_base_prompt

'You are an expert at identifying specific types of sentences called prediction. For each prediction, the format is based on: \n\n     A prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:\n\n            1. <p_s>\n                - Defined as: \n                    - Source entity that states the <p>\n                - Characteristics:\n                    - A person with either: a name only, profile name only, geneder only, domain specific title only or any combination of these.\n                    - An associated organization\n                    - Named entity: Person, organization\n                    - Part of speech: Noun\n\n            2. <p_t>\n                - Defined as: \n                    - Target entity that the <p> is about\n                - Characteristics:\n                    - Same and <p_s>\n                    \n            3. <p_d>\n                - Defined as: \n                    - Date when the <p> is made\n 

In [11]:
no_prediction_properties_base_prompt = f"""{system_identity_prompt}.

    Enforce the {prediction_requirements}.

"""
no_prediction_properties_base_prompt

'You are an expert at identifying specific types of sentences called prediction..\n\n    Enforce the requirements of a prediction: \n            1. Usage of synonyms to the word "prediction", such as [\'forecast\', \'projection\', \'estimate\', \'outlook\', \'expectation\', \'anticipation\', \'prophecy\', \'prognosis\', \'guess\', \'speculation\', \'forecasting\', \'projection\', \'foretelling\', \'forecasted outcome\', \'forecast estimate\'].\n            2. Usage of the future verb tense, such as: [\'will\', \'shall\', \'would\', \'going\', \'might\', \'should\', \'could\', \'may\', \'must\', \'can\'].\n            3. Do NOT use past or present tense verbs.\n\n        .\n\n'

In [12]:
sentence_label_prompts = [prediction_properties_base_prompt, no_prediction_properties_base_prompt]
sentence_label_prompts

['You are an expert at identifying specific types of sentences called prediction. For each prediction, the format is based on: \n\n     A prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:\n\n            1. <p_s>\n                - Defined as: \n                    - Source entity that states the <p>\n                - Characteristics:\n                    - A person with either: a name only, profile name only, geneder only, domain specific title only or any combination of these.\n                    - An associated organization\n                    - Named entity: Person, organization\n                    - Part of speech: Noun\n\n            2. <p_t>\n                - Defined as: \n                    - Target entity that the <p> is about\n                - Characteristics:\n                    - Same and <p_s>\n                    \n            3. <p_d>\n                - Defined as: \n                    - Date when the <p> is made\n

## Models

In [13]:
tgmf = TextGenerationModelFactory()

# Option 1: Specific models
# models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'llama-3.3-70b-instruct', 'openai/gpt-oss-20b'])
# models = tgmf.create_instances(['openai/gpt-oss-20b'])

# Option 2: All Groq models
# models = tgmf.create_instances(tgmf.get_groq_model_names())

# Option 3: All NaviGator models
models = tgmf.create_instances(tgmf.get_navigator_model_names())

# Option 4: All available models
# models = tgmf.create_instances()

# Option 5: Mix and match
# custom_models = ['llama-3.1-70b-instruct', 'mistral-small-3.1', 'llama-3.1-8b-instant']
# models = tgmf.create_instances(custom_models)
models



[<text_generation_models.Llama3170BInstructTextGenerationModel at 0x32299be90>,
 <text_generation_models.Llama318BInstructTextGenerationModel at 0x32299a0d0>,
 <text_generation_models.Llama3370BInstructTextGenerationModel at 0x3229a0d10>,
 <text_generation_models.Mistral7BInstructTextGenerationModel at 0x322a45250>,
 <text_generation_models.MistralSmall31TextGenerationModel at 0x322a45e50>,
 <text_generation_models.Codestral22BTextGenerationModel at 0x322a470d0>,
 <text_generation_models.GptOss20TextGenerationModel at 0x322a47b90>,
 <text_generation_models.GptOss120TextGenerationModel at 0x112430650>,
 <text_generation_models.Granite338BInstructTextGenerationModel at 0x322a54a10>]

In [14]:
def parse_json_response(response, reasoning=False):
    """Parse JSON response from LLM to extract label and reasoning"""
    try:
        # Extract JSON if there's extra text
        json_match = re.search(r'\{.*\}', response, re.DOTALL)
        if json_match:
            data = json.loads(json_match.group())
            if reasoning:
                return data.get('predicted_sentence_label'), data.get('reasoning')
            else:
                return data.get('predicted_sentence_label')  # Return single value, not tuple
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        if reasoning:
            return None, None
        else:
            return None  # Return single None when reasoning=False

In [15]:
def llm_certifier(sentence_to_classify: str, base_prompt: str, model, task, format_output: str):
    
      prompt = f"""{base_prompt}
      
      Sentence to label: '{sentence_to_classify}'

      {task}
      
      {format_output}
      """
      idx = 1
      if idx == 1:
            # print(f"\tPrompt: {prompt}")
            idx = idx + 1
      input_prompt = model.user(prompt)
      raw_text_llm_generation = model.chat_completion([input_prompt])
      # print(f"Raw response: {raw_text_llm_generation}")

      # Parse the JSON response
      label = parse_json_response(raw_text_llm_generation, reasoning=False)
      
      return raw_text_llm_generation, label

In [16]:
# print("======= PROMPT + MODEL -> LABEL and REASONING =======")
print("======= PROMPT + MODEL -> LABEL =======")
# sentence_label_prompts



In [17]:
results = []

for idx, row in tqdm(X_test_df.iterrows(), total=len(X_test_df), desc="Processing"):
    text = row['Base Sentence']
    if idx < 3:
        print("Classify sentence as either prediction (1) or non-prediction (0)")
        print(f"   {idx} --- Sentence: {text}")
    for models_idx in range(len(models)):
        # print(model.__name__())
        model = models[models_idx]
        raw_response, llm_label = llm_certifier(text, sentence_label_prompts[0], model, sentence_label_task, sentence_label_format_output)
        result = (text, raw_response, llm_label, model.__name__())
        results.append(result)

        if idx < 3:
            # print(f"{idx} --- Sentence: {text}")
            print(f"\tLabel: {llm_label} via Model: { model.__name__()}")

Processing:   0%|          | 0/1846 [00:00<?, ?it/s]

Classify sentence as either prediction (1) or non-prediction (0)
   0 --- Sentence: PARIS — President Emmanuel Macron declared his candidacy for a second five-year term in the presidential election next month, formalizing his decision with a low-key letter in several newspapers that exhorted the French to let him guide “this beautiful and collective adventure that is called France.”
	Label: 0 via Model: llama-3.1-70b-instruct
	Label: 0 via Model: llama-3.1-8b-instruct
	Label: 0 via Model: llama-3.3-70b-instruct
	Label: 0 via Model: mistral-7b-instruct
	Label: 0 via Model: mistral-small-3.1
	Label: 0 via Model: codestral-22b
	Label: 0 via Model: gpt-oss-20b
	Label: 0 via Model: gpt-oss-120b
	Label: 0 via Model: granite-3.3-8b-instruct
Classify sentence as either prediction (1) or non-prediction (0)
   1 --- Sentence: This time, the plot — about a ray gun that turns humans into monsters, and vice versa — seems to acknowledge the need to goose characters out of their inertia.
	Label: 0 vi

KeyboardInterrupt: 

In [18]:
results

[('PARIS — President Emmanuel Macron declared his candidacy for a second five-year term in the presidential election next month, formalizing his decision with a low-key letter in several newspapers that exhorted the French to let him guide “this beautiful and collective adventure that is called France.”',
  '{"predicted_sentence_label": 0}',
  0,
  'llama-3.1-70b-instruct'),
 ('PARIS — President Emmanuel Macron declared his candidacy for a second five-year term in the presidential election next month, formalizing his decision with a low-key letter in several newspapers that exhorted the French to let him guide “this beautiful and collective adventure that is called France.”',
  '{"predicted_sentence_label": 0}',
  0,
  'llama-3.1-8b-instruct'),
 ('PARIS — President Emmanuel Macron declared his candidacy for a second five-year term in the presidential election next month, formalizing his decision with a low-key letter in several newspapers that exhorted the French to let him guide “this

In [19]:
# groupby text

In [69]:
results_with_llm_label_df = pd.DataFrame(results, columns=['text', 'raw_response', 'llm_label', 'llm_name'])
results_with_llm_label_df.tail(10)
# results_with_llm_label_df

Unnamed: 0,text,raw_response,llm_label,llm_name
13015,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,llama-3.1-8b-instruct
13016,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,llama-3.3-70b-instruct
13017,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,mistral-7b-instruct
13018,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,mistral-small-3.1
13019,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,codestral-22b
13020,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,gpt-oss-20b
13021,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,gpt-oss-120b
13022,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,granite-3.3-8b-instruct
13023,"From on high, a camera zooms through a cloud of vape smoke and along a row of buzzing high school bathroom stalls.","{""predicted_sentence_label"": 0}",0,llama-3.1-70b-instruct
13024,"From on high, a camera zooms through a cloud of vape smoke and along a row of buzzing high school bathroom stalls.","{""predicted_sentence_label"": 0}",0,llama-3.1-8b-instruct


In [70]:
results_with_llm_label_df.shape

(13025, 4)

In [71]:
len(results_with_llm_label_df) - 2

13023

In [72]:
results_with_llm_label_df = results_with_llm_label_df.loc[: len(results_with_llm_label_df) - 3]
results_with_llm_label_df.tail(10)

Unnamed: 0,text,raw_response,llm_label,llm_name
13013,"Finland 's national carrier Finnair PLC carried a record 8.5 million passengers in 2005 , an increase of 4.5 percent on the previous year , the airline reported Tuesday .","{""predicted_sentence_label"": 0}",0,granite-3.3-8b-instruct
13014,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,llama-3.1-70b-instruct
13015,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,llama-3.1-8b-instruct
13016,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,llama-3.3-70b-instruct
13017,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,mistral-7b-instruct
13018,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,mistral-small-3.1
13019,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,codestral-22b
13020,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,gpt-oss-20b
13021,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,gpt-oss-120b
13022,The basketball player's impressive stats earned them the title of MVP.,"{""predicted_sentence_label"": 0}",0,granite-3.3-8b-instruct


In [73]:
results_with_llm_label_df.shape

(13023, 4)

In [74]:
DataProcessing.save_to_file(results_with_llm_label_df, combine_data_path, 'llm_classifiers_with_results', '.csv')

Using file number: 2
Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/combined_datasets/llm_classifiers_with_results-v2.csv


### Align Test Sentences with Predicted Sentence Label from LLMs

In [82]:
def get_llm_labels(df, model_name):
    filt_llama = (df['llm_name'] == model_name)
    filt_df = df[filt_llama]
    return filt_df['llm_label']

X_test_with_results_df = X_test_df.copy()
print(X_test_with_results_df.shape)
X_test_with_results_df = X_test_with_results_df.loc[:1446, :]
print(X_test_with_results_df.shape)
for model in models:
    llm_model_name = model.__name__()
    # print(f"Model Name: {llm_model_name}")
    model_labels = get_llm_labels(results_with_llm_label_df, llm_model_name)
    print(len(model_labels))
    X_test_with_results_df[llm_model_name] = model_labels.to_numpy().ravel()
    print(X_test_with_results_df.shape)
X_test_with_results_df.head(3)

(1846, 12)
(1447, 12)
1447
(1447, 13)
1447
(1447, 14)
1447
(1447, 15)
1447
(1447, 16)
1447
(1447, 17)
1447
(1447, 18)
1447
(1447, 19)
1447
(1447, 20)
1447
(1447, 21)


Unnamed: 0,Base Sentence,Sentence Label,Author Type,Embedding,Normalized Embeddings,perceptron,sgd_classifier,logistic_regression,ridge_classifier,decision_tree_classifier,random_forest_classifier,gradient_boosting_classifier,llama-3.1-70b-instruct,llama-3.1-8b-instruct,llama-3.3-70b-instruct,mistral-7b-instruct,mistral-small-3.1,codestral-22b,gpt-oss-20b,gpt-oss-120b,granite-3.3-8b-instruct
0,"PARIS — President Emmanuel Macron declared his candidacy for a second five-year term in the presidential election next month, formalizing his decision with a low-key letter in several newspapers that exhorted the French to let him guide “this beautiful and collective adventure that is called France.”",1,1,[ 5.66046238e-02 1.04988851e-01 -3.34139578e-02 1.98167879e-02\n 1.52485088e-01 -6.53142408e-02 -1.45529425e-02 -6.41713664e-02\n -3.75037752e-02 2.14759326e+00 -2.14621708e-01 2.73683928e-02\n 2.88958307e-02 -1.24519587e-01 -6.89217523e-02 -4.13673334e-02\n -5.35880737e-02 7.76791871e-01 -4.43464518e-02 1.01134705e-03\n 1.09494086e-02 -4.75250259e-02 -2.06354335e-02 -6.48403764e-02\n 1.07812867e-01 9.46720596e-03 -1.09430917e-01 -4.65166196e-03\n -8.32740441e-02 5.66849113e-03 -2.98308004e-02 6.16043173e-02\n 2.14268006e-02 5.82768060e-02 6.74477890e-02 -1.06455544e-02\n -7.22951535e-03 -8.13032016e-02 -8.00553635e-02 -4.17012870e-02\n 1.55045995e-02 5.84505014e-02 2.86313556e-02 -1.45766467e-01\n 2.51089260e-02 -3.74014415e-02 -2.69140229e-02 3.26086022e-02\n 4....,[ 9.95389938e-01 -8.30533266e-01 -1.43044465e-03 8.73336315e-01\n 1.09862113e+00 -4.87613559e-01 -1.22331895e-01 -3.02105606e-01\n -6.34055793e-01 3.96464318e-01 1.20431840e-01 1.43495783e-01\n -6.45723879e-01 -1.23308265e+00 1.21950001e-01 9.57266837e-02\n -3.60498607e-01 -1.11962736e+00 1.05052781e+00 5.26727080e-01\n 1.28631011e-01 -6.31512940e-01 -1.30328745e-01 -2.20445901e-01\n 1.18003213e+00 -3.56904298e-01 -9.83796865e-02 -1.43321365e-01\n -1.27634919e+00 -1.40238732e-01 -2.59166718e-01 5.60881197e-01\n 5.56570828e-01 -5.19165695e-02 -8.78698155e-02 4.34541374e-01\n -1.03092425e-01 -1.60028136e+00 -8.79279971e-01 -1.84806854e-01\n 1.09652579e-01 -9.91694033e-02 -2.68077195e-01 -1.27014589e+00\n 7.04476759e-02 -8.28510642e-01 9.05415952e-01 7.40090787e-01\n 3....,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0
1,"This time, the plot — about a ray gun that turns humans into monsters, and vice versa — seems to acknowledge the need to goose characters out of their inertia.",0,1,[-2.23119743e-02 6.97671250e-02 -9.84913930e-02 3.75865086e-04\n -3.14215869e-02 9.29637328e-02 -2.15256251e-02 1.64190568e-02\n 3.75561090e-03 2.09514165e+00 -9.81769562e-02 -2.71825790e-02\n 6.37708604e-02 2.52576079e-02 -1.64901182e-01 -1.31515667e-01\n -8.57630968e-02 1.00542879e+00 -1.91387057e-01 -1.72457062e-02\n -1.96998157e-02 1.58039983e-02 -8.47747996e-02 -5.93274459e-02\n -3.87448259e-02 2.83081476e-02 -6.39593303e-02 -3.22961658e-02\n -1.75291598e-02 -5.21216244e-02 -5.90514541e-02 8.25655013e-02\n -1.87435567e-01 1.76751390e-01 1.82304636e-01 -5.84071316e-02\n 6.34762719e-02 7.96191171e-02 -4.05625440e-02 -6.58209398e-02\n 5.19772992e-02 1.05586648e-02 -6.87460601e-02 -1.15110271e-01\n 8.34729597e-02 5.28117083e-03 -7.90077299e-02 3.89454179e-02\n -6....,[ 1.02042906e-01 -1.25779295e+00 -8.07256043e-01 5.91519654e-01\n -1.25950062e+00 1.64591324e+00 -2.32750431e-01 5.62637687e-01\n -5.82166910e-02 2.33458817e-01 1.34504688e+00 -5.37074924e-01\n -1.18046746e-01 9.42714691e-01 -8.83851290e-01 -1.40953076e+00\n -8.80104840e-01 -4.82799970e-02 -8.90786171e-01 2.49257118e-01\n -3.30653459e-01 1.78774640e-01 -1.01988363e+00 -1.42349273e-01\n -1.00427949e+00 -8.71060789e-02 4.76414889e-01 -5.45713067e-01\n -3.79090428e-01 -7.61182785e-01 -7.15413988e-01 8.47321272e-01\n -2.47394013e+00 1.62751734e+00 1.49586022e+00 -2.60521144e-01\n 8.91075373e-01 8.00682306e-01 -3.06060821e-01 -5.04338622e-01\n 6.78358138e-01 -8.19013655e-01 -1.57371426e+00 -8.22251201e-01\n 9.33124840e-01 -1.94198340e-01 1.62035823e-01 8.18059087e-01\n -1....,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0
2,"In his first weeks as mayor, that challenge has risen to meet him.",0,1,[ 5.81765212e-02 2.07019195e-01 -7.69932643e-02 -6.81760013e-02\n 1.22693665e-01 -1.35694534e-01 -8.19419175e-02 -3.39005925e-02\n 5.66506805e-03 2.72540665e+00 -1.78715482e-01 2.04635300e-02\n 4.16096002e-02 -2.71960557e-03 -1.61515608e-01 1.82891320e-02\n 1.80162247e-02 7.59893358e-01 -1.08826131e-01 1.53591344e-02\n 6.34927768e-03 -7.43348673e-02 -4.02560048e-02 -7.23461360e-02\n 7.84059986e-02 9.49361399e-02 -9.24548283e-02 1.06267445e-02\n -1.70880035e-02 -8.75466838e-02 -6.63756654e-02 1.13582596e-01\n -7.43332654e-02 -5.63022820e-03 1.33150846e-01 -1.48050874e-01\n 4.73999567e-02 -3.72759961e-02 5.08240648e-02 -1.01549730e-01\n 2.34233841e-04 -1.08329961e-02 4.20196690e-02 -1.49904892e-01\n -5.08043952e-02 7.94086978e-02 -1.34930000e-01 -6.85446570e-03\n -6....,[ 1.0131841 0.4071531 -0.54105407 -0.4022119 0.7166242 -1.436313\n -1.1894954 0.02270282 -0.03156724 2.1921527 0.498047 0.05735163\n -0.45335802 0.5362927 -0.8483727 1.0918443 0.7958655 -1.1988105\n 0.19923028 0.7447842 0.05969716 -0.9745417 -0.4024483 -0.32677308\n 0.741749 0.8669924 0.11621065 0.07907018 -0.37306973 -1.1418184\n -0.8297732 1.271178 -0.83287007 -0.95782936 0.81809187 -1.5650845\n 0.66503227 -0.9433948 1.0203716 -0.9776657 -0.1284527 -1.1405437\n -0.08856661 -1.3306093 -1.051625 0.90742135 -0.63597816 0.2545358\n -0.35334235 -1.1255441 -0.09057999 0.48243684 0.25384033 0.21164928\n 0.3576934 1.7142646 -0.36263907 1.1403483 1.0692748 -0.89533913\n -0.03675979 0.46403384 0.37106022 0.396437 0.4069825 0...,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


## Save Output

In [84]:
DataProcessing.save_to_file(X_test_with_results_df, combine_data_path, 'sentence_label-subset-all_classifiers_with_results', '.csv')

Using file number: 1
Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/combined_datasets/sentence_label-subset-all_classifiers_with_results-v1.csv


## Evaluation

In [85]:
print("======= EVALUATION/RESULTS =======")



In [86]:
get_metrics = EvaluationMetric()
get_metrics

<metrics.EvaluationMetric at 0x3353150d0>

In [87]:
eval_reports = {}

actual_labels = X_test_with_results_df['Sentence Label'].values
print(len(actual_labels))
for model in models:
    llm_model_name = model.__name__()
    print(f"Actual Label:\t\t{actual_labels}")
    llm_model_predictions = X_test_with_results_df[llm_model_name].values
    print(f"{llm_model_name}:\t\t{len(llm_model_predictions)}")
    eval_report = get_metrics.eval_classification_report(actual_labels, llm_model_predictions)
    eval_reports[llm_model_name] = eval_report

1447
Actual Label:		[1 0 0 ... 0 0 0]
llama-3.1-70b-instruct:		1447
              precision    recall  f1-score   support

           0       0.74      0.98      0.84       817
           1       0.95      0.55      0.69       630

    accuracy                           0.79      1447
   macro avg       0.84      0.76      0.77      1447
weighted avg       0.83      0.79      0.78      1447

Actual Label:		[1 0 0 ... 0 0 0]
llama-3.1-8b-instruct:		1447
              precision    recall  f1-score   support

           0       0.62      0.98      0.76       817
           1       0.91      0.23      0.36       630

    accuracy                           0.65      1447
   macro avg       0.77      0.60      0.56      1447
weighted avg       0.75      0.65      0.59      1447

Actual Label:		[1 0 0 ... 0 0 0]
llama-3.3-70b-instruct:		1447
              precision    recall  f1-score   support

           0       0.74      0.97      0.84       817
           1       0.93      0.55      0.69 

In [88]:
eval_reports_df = pd.DataFrame(eval_reports)
eval_reports_df.to_latex()

"\\begin{tabular}{llllllllll}\n\\toprule\n & llama-3.1-70b-instruct & llama-3.1-8b-instruct & llama-3.3-70b-instruct & mistral-7b-instruct & mistral-small-3.1 & codestral-22b & gpt-oss-20b & gpt-oss-120b & granite-3.3-8b-instruct \\\\\n\\midrule\n0 & {'precision': 0.7368421052631579, 'recall': 0.9767441860465116, 'f1-score': 0.84, 'support': 817.0} & {'precision': 0.6219984508133231, 'recall': 0.9828641370869033, 'f1-score': 0.7618595825426945, 'support': 817.0} & {'precision': 0.7360594795539034, 'recall': 0.9694002447980417, 'f1-score': 0.8367670364500792, 'support': 817.0} & {'precision': 0.6300309597523219, 'recall': 0.996328029375765, 'f1-score': 0.7719298245614035, 'support': 817.0} & {'precision': 0.6347352024922118, 'recall': 0.9975520195838433, 'f1-score': 0.7758210376011423, 'support': 817.0} & {'precision': 0.6734177215189874, 'recall': 0.9767441860465116, 'f1-score': 0.7972027972027972, 'support': 817.0} & {'precision': 0.6813186813186813, 'recall': 0.9865361077111383, 'f1-