# LLM Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import sys

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from data_processing import DataProcessing
from classification_models import EvaluationMetric
from text_generation_models import TextGenerationModelFactory

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [4]:
base_data_path = os.path.join(notebook_dir, '../data/')
combine_data_path = os.path.join(base_data_path, 'combined_generated_fin_phrase_bank')
X_test_set_path = os.path.join(combine_data_path, 'x_test_set-v1.csv')
y_test_set_path = os.path.join(combine_data_path, 'y_test_set-v1.csv')

In [5]:
X_test_df = DataProcessing.load_from_file(X_test_set_path, 'csv', sep=',')
X_test_df.drop(columns=['Unnamed: 0'], inplace=True)
X_test_df.head(7)

Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings
0,"Dr. Maria Rodriguez, a renowned climate scientist, noted on 2025-02-14 that the wind speed in Chicago fell.",0,[-5.33820353e-02 2.96053916e-01 4.16866988e-02 -1.40752643e-03\n 6.82408139e-02 7.03541860e-02 5.54416962e-02 -4.47977930e-02\n -1.14564508e-01 1.61363328e+00 -2.44681358e-01 6.63914764e-03\n -5.85275004e-03 -4.76306081e-02 -2.46703267e-01 -4.53357548e-02\n 2.61321273e-02 1.04844749e+00 -5.91918267e-02 -1.12772949e-01\n 9.06320885e-02 7.04300851e-02 3.07557527e-02 -1.40855715e-01\n 6.73192963e-02 -3.19030066e-03 -1.22142859e-01 1.88190360e-02\n 1.03679232e-01 5.15614264e-02 5.91920912e-02 1.00362174e-01\n -7.44822100e-02 -3.64652388e-02 9.92875174e-02 -4.76585925e-02\n 5.13628684e-02 1.03396410e-02 -8.16966295e-02 -5.59681691e-02\n 1.17678136e-01 2.33407039e-02 1.11587368e-01 -4.07175086e-02\n 2.27868054e-02 5.68023510e-02 -1.29113361e-01 -8.67207572e-02\n 8....,[ 0.3908352 0.88384986 0.54582196 0.5529683 0.20168522 1.4207762\n 1.1209849 -0.35026643 -2.0581949 -0.5683589 0.43492547 -0.28378505\n -1.2241656 -0.38047555 -2.0140388 -0.12880665 0.59478503 -0.2763546\n 0.88904005 -1.2983011 1.117075 0.3034963 0.53709686 -0.84479505\n 0.5376486 -0.9335876 0.04107084 -0.08168354 1.4058938 -0.2758865\n 0.98090166 1.413457 -1.1836389 -1.6302584 0.88330925 -0.423647\n 0.94221413 -0.5136003 -0.9248387 -0.41756865 1.8539761 -0.34789968\n 0.43764463 -0.03285579 -0.05317093 0.83162355 -0.9312643 -0.530558\n 0.6130175 0.3930604 -0.19065279 2.0767083 -0.67761785 0.48515055\n -0.41890967 0.5868992 0.44187045 0.7070218 1.0301542 -1.2024007\n -0.19246452 0.2535662 0.4490124 0.31755912 -0.5983049 -1....
1,"We succeeded in increasing our market share of sold apartment '' , comments Mr Kari Kauniskangas , Head of YIT International Construction Services .",0,[-4.71338071e-02 2.51612782e-01 2.61100605e-02 -1.26865417e-01\n 1.76884234e-01 6.63294748e-04 5.20298518e-02 -3.61273251e-02\n 2.52827350e-02 1.79729569e+00 -2.72558719e-01 -1.54113322e-02\n 1.13873892e-01 -1.15707785e-01 -1.11250550e-01 -4.33727466e-02\n -2.12675184e-02 9.76092160e-01 -1.39500961e-01 6.79444596e-02\n 4.63146679e-02 1.58941254e-01 2.58127805e-02 -1.83992460e-02\n 7.52350911e-02 2.80956198e-02 -5.96148474e-03 4.35078591e-02\n 1.56810004e-02 1.25586882e-01 9.25043598e-02 -2.61787921e-02\n -4.60829176e-02 8.23754594e-02 3.49306278e-02 -1.32550314e-01\n 4.81028110e-03 3.48891094e-02 1.02087855e-04 1.98335554e-02\n -1.02561712e-01 -6.57700896e-02 6.17823340e-02 -4.99959737e-02\n -1.31786028e-02 1.17884390e-01 1.76247712e-02 1.19141666e-02\n 1....,[ 0.46643826 0.28396165 0.3074835 -1.4569969 1.5413463 0.4508572\n 1.0626299 -0.24701537 -0.13232419 0.04329398 0.10356648 -0.56628853\n 0.6285996 -1.3918105 -0.6639517 -0.09610927 -0.18943849 -0.70547533\n -0.33030978 1.5931151 0.38704258 1.5418671 0.4651784 0.84381676\n 0.6579603 -0.4899005 1.6089592 0.29092404 0.06360818 0.76065934\n 1.5474954 -0.41184446 -0.7293663 0.01019586 -0.1011791 -1.8380105\n 0.29979753 -0.1342902 0.24613483 0.5835421 -1.8709322 -1.7238133\n -0.18806958 -0.17735313 -0.5564406 1.8359636 1.1352255 0.6281795\n 0.8216415 -0.28341526 0.23557287 -0.6368683 -0.16734943 1.7343459\n 0.63090867 0.507011 0.01599794 -0.76570714 0.40616304 0.9434384\n 0.26858085 -0.6297688 -0.63668746 -1.6477914 0.96522474 ...
2,"The Centers for Disease Control and Prevention reported that on 4/12/2032, the prevalence of obesity among children increased.",0,[-1.51246399e-01 2.89643884e-01 -6.67415932e-02 -1.25964195e-01\n -1.45232335e-01 -1.85876876e-01 9.91560966e-02 -1.26654934e-02\n 1.91853829e-02 2.25272012e+00 -2.79949337e-01 -1.65273547e-02\n 1.05223402e-01 1.76420361e-02 -3.08898333e-02 -1.08919702e-01\n 6.20932877e-02 1.31970799e+00 -2.89755166e-01 -2.24788003e-02\n -5.98008223e-02 -7.73710106e-03 -1.55985981e-01 -1.21320404e-01\n 5.34137487e-02 -3.02765016e-02 -3.34187560e-02 2.55857743e-02\n 5.20368591e-02 -7.90180359e-03 -2.90252268e-02 4.36594449e-02\n 4.22030035e-03 -1.84887592e-02 1.78330347e-01 1.65097918e-02\n -1.63025200e-01 -3.18169072e-02 3.57161760e-02 1.80087343e-01\n 2.78689675e-02 8.03094357e-02 -1.36023849e-01 -1.48923516e-01\n 9.46817547e-02 -1.39645055e-01 -1.75419390e-01 1.97325438e-01\n 1....,[-7.9331547e-01 7.9732412e-01 -1.1132418e+00 -1.4425584e+00\n -2.4306109e+00 -2.1453054e+00 1.8686610e+00 3.2376587e-02\n -2.1629231e-01 1.5599988e+00 1.5719289e-02 -5.8058661e-01\n 4.9473360e-01 5.8919609e-01 1.3702141e-01 -1.1879098e+00\n 1.1897597e+00 1.3324213e+00 -2.6116498e+00 1.4637411e-01\n -1.3609776e+00 -7.9015052e-01 -2.1799288e+00 -5.7541311e-01\n 3.2629901e-01 -1.3177156e+00 1.2384183e+00 2.0441137e-02\n 6.1816418e-01 -1.1085240e+00 -5.1954782e-01 5.9554350e-01\n 7.5279631e-02 -1.3821145e+00 2.0924530e+00 6.4544934e-01\n -2.0162990e+00 -1.1649547e+00 7.5596136e-01 2.7000082e+00\n 3.3503696e-01 5.3172505e-01 -2.6731625e+00 -1.7179923e+00\n 9.5286661e-01 -2.3984585e+00 -1.5833848e+00 2.8063431e+00\n 8.9468050e-01 -8.8889211e-01 -2.4910569e+00 -8.142...
3,"On 03/20/2024, the Weather Channel reported that the barometric pressure at Miami changed.",0,[ 3.48531380e-02 3.58599305e-01 -5.18614389e-02 -4.23194021e-02\n -7.46684596e-02 5.07630892e-02 -2.43870132e-02 2.42493348e-03\n -2.25098021e-02 1.82668257e+00 -2.15822726e-01 3.17645865e-03\n 1.50864601e-01 -1.60792619e-01 -2.20737353e-01 -8.11893269e-02\n -8.53867978e-02 1.20214665e+00 -1.46414340e-01 -1.33492529e-01\n 5.20259934e-03 2.18567941e-02 -5.50443381e-02 -9.44017321e-02\n 6.14962727e-02 3.20132636e-02 -2.23811090e-01 -5.55672646e-02\n -4.84851450e-02 4.07049023e-02 4.95031811e-02 6.88390732e-02\n -5.26359342e-02 6.08487613e-02 4.87640034e-03 9.49592441e-02\n -3.83603238e-02 1.24828115e-01 2.09228005e-02 -3.21878679e-02\n -1.51473260e-03 -3.34284715e-02 4.64837290e-02 2.54078265e-02\n 9.58596617e-02 -1.70406699e-02 -1.66295096e-01 -1.10882364e-01\n -7....,[ 1.4584736 1.7281182 -0.8855602 -0.10248223 -1.5605016 1.1481183\n -0.24437813 0.21207884 -0.79048693 0.14116141 0.77794814 -0.3281479\n 1.2010294 -2.061578 -1.7552309 -0.7260114 -1.2502872 0.6351954\n -0.43527701 -1.6298072 -0.29018864 -0.37609866 -0.71126384 -0.20421849\n 0.4491448 -0.43434173 -1.33096 -1.2043332 -0.91515344 -0.42790562\n 0.8161073 0.95874923 -0.8341878 -0.28695458 -0.5609284 1.9524796\n -0.2959483 1.2553437 0.54418963 -0.10350303 -0.16192845 -1.2244431\n -0.38027012 0.99694115 0.96934927 -0.38253865 -1.4548888 -0.81440234\n -1.536361 0.05232666 -0.00443031 1.7689508 -0.01639216 1.8893493\n -2.2216747 0.5347387 -0.0617071 -0.09946396 2.0520315 -1.0236064\n 0.73235124 2.8227727 -0.02113104 1.1123948 -0.86049247 -0...
4,"In the second quarter of 2010 , the group 's pretax loss narrowed to EUR 400,000 from EUR 600,000 .",0,[-2.45986860e-02 2.46378332e-01 4.28778417e-02 1.64580747e-01\n 5.33807501e-02 -1.30629987e-01 -8.77950564e-02 8.16475451e-02\n 7.44690150e-02 1.77330434e+00 -2.82453984e-01 1.24339357e-01\n 1.82428971e-01 -3.59319076e-02 1.86506361e-01 1.75051447e-02\n 2.13300111e-03 8.87328982e-01 -8.11287686e-02 -1.31561354e-01\n -2.19257381e-02 5.10508046e-02 -3.68447453e-02 -7.11802170e-02\n 6.44546971e-02 8.50140154e-02 -1.03577718e-01 -5.81524149e-02\n 1.79102570e-02 9.14583430e-02 7.49998242e-02 -7.27816448e-02\n -8.59871060e-02 2.16175988e-01 6.90384433e-02 -5.89544605e-03\n -5.09812832e-02 2.34330893e-02 6.60841912e-02 -1.12782359e-01\n 5.84061034e-02 7.27141947e-02 1.03027344e-01 -7.95887262e-02\n -5.49835488e-02 -5.91239333e-02 6.07552938e-02 1.15602724e-02\n 3....,[ 0.73911136 0.21330449 0.5640477 3.2122722 0.01844863 -1.3764098\n -1.3288877 1.1554903 0.5450321 -0.03660466 -0.01405172 1.2241513\n 1.6894867 -0.20668301 2.3038576 0.9179199 0.19772132 -1.2319068\n 0.555967 -1.5989091 -0.73706853 0.03235838 -0.44646627 0.11599427\n 0.49410972 0.31729832 0.29161072 -1.2433487 0.09761225 0.28277245\n 1.2497685 -1.0840715 -1.3676703 1.8571528 0.42057925 0.27216026\n -0.4701151 -0.31129542 1.1906887 -1.1679116 0.8515104 0.41445082\n 0.3301027 -0.6382132 -1.141424 -1.0744917 1.7426258 0.62402207\n -0.03017995 1.1488822 -1.5507549 -1.8189467 -1.0319893 0.70013916\n 0.9130713 1.4019715 -1.5612488 1.5183505 -1.0937321 0.58286417\n 1.9577196 -0.2102693 -1.1888937 -0.03464834 -1.5276066...
5,"Amazon stock price increased 2024/08/21, according to Morgan Stanley.",0,[-3.56238246e-01 3.16215336e-01 1.96363567e-03 -8.59227255e-02\n 3.29226367e-02 -6.33549988e-02 -9.11083892e-02 -8.00719261e-02\n -4.53244545e-04 1.28432000e+00 -3.49033445e-01 1.52690187e-01\n 1.43907636e-01 -2.38911808e-02 -9.98224542e-02 -2.16362830e-02\n -6.89776391e-02 1.18546176e+00 -1.20027252e-02 1.36906356e-02\n -1.72718223e-02 2.12408453e-01 6.95294514e-02 -2.02160180e-01\n -1.41534194e-01 1.02402717e-01 -2.07431659e-01 5.40152676e-02\n 2.75310874e-02 1.53083712e-01 1.21358827e-01 1.27936387e-02\n 1.50855199e-01 1.58824638e-01 2.46511865e-02 2.99012661e-02\n 7.75047168e-02 7.23923668e-02 -2.02139150e-02 -1.90545738e-01\n -1.73116382e-02 -6.62020892e-02 2.40570083e-01 -5.07421494e-02\n 9.18421820e-02 2.66550351e-02 -1.35689914e-01 -9.05295461e-02\n 5....,[-3.2736998 1.1559986 -0.06198151 -0.8010526 -0.23381642 -0.44011387\n -1.3855579 -0.7703236 -0.48674068 -1.6650745 -0.8054359 1.5873728\n 1.0933707 -0.02780943 -0.5500453 0.26595002 -0.97879887 0.5362416\n 1.6055218 0.72507256 -0.6604055 2.2899332 1.1012404 -1.6901522\n -2.636703 0.5638994 -1.1099167 0.4495035 0.2443641 1.1456854\n 2.0382683 0.15031688 2.4208388 1.0654857 -0.25842705 0.8685619\n 1.3029665 0.44516692 -0.04469495 -2.1949298 -0.42910093 -1.7304835\n 2.0580897 -0.18897364 0.913132 0.33592698 -1.0238808 -0.57530266\n 0.15368779 0.21496727 -0.39875057 -0.5956941 -0.4495656 1.3976265\n 1.6302358 -2.3716974 -0.99496704 -0.91918945 -1.4588856 0.12469574\n -0.41796833 -0.52981603 0.37002143 -1.1927913 -0.10816096 -1...
6,"With the acquisition , the company will expand its offering to North , Central and South America , it said .",0,[ 4.47663330e-02 2.42985860e-01 -4.02790979e-02 -1.43336698e-01\n 2.41053894e-01 -2.37727966e-02 -1.15989529e-01 -2.13672388e-02\n -4.97141555e-02 2.39729977e+00 -2.43915528e-01 -8.70632753e-02\n 1.63370475e-01 -9.79692340e-02 -1.41119704e-01 -1.39058188e-01\n -5.64219505e-02 1.29654527e+00 -1.14797533e-01 -9.06679481e-02\n 7.16461986e-02 1.25767395e-01 7.32074901e-02 -7.21520036e-02\n -3.43216397e-02 1.50832376e-02 -8.73322636e-02 1.62054729e-02\n 1.07962464e-04 2.92290486e-02 7.21188337e-02 7.94769451e-02\n -2.69783381e-03 1.74049884e-01 3.91005240e-02 -1.03789151e-01\n -7.66879618e-02 1.29591329e-02 -5.99452779e-02 -9.48793739e-02\n -3.63441296e-02 7.07981437e-02 1.01982944e-01 -1.43393442e-01\n 1.20310942e-02 -1.66210309e-02 -9.87050086e-02 -8.65358189e-02\n -6....,[ 1.5784224 0.16751124 -0.70833856 -1.7208838 2.33261 0.11076922\n -1.8111163 -0.07124694 -1.1651247 2.041494 0.44402838 -1.4842696\n 1.3945572 -1.1282917 -0.9616642 -1.6899197 -0.77106595 1.1950492\n 0.04476736 -0.9446287 0.80432403 1.0777276 1.1547545 0.10259382\n -1.0071858 -0.67443806 0.510846 -0.12112784 -0.17393598 -0.58859694\n 1.2007669 1.1121962 -0.03538225 1.2756522 -0.03739084 -1.3588266\n -0.8248622 -0.47312692 -0.6134615 -0.93146753 -0.75099677 0.3848661\n 0.31698164 -1.6318702 -0.20367733 -0.37563875 -0.50302833 -0.5283854\n -1.3577423 0.18473631 0.7102287 1.0391486 -1.3216136 1.6999981\n 0.6178434 1.4238505 -1.1198261 1.1614077 -0.79754144 1.3300304\n 1.2568449 0.54746306 0.7087244 1.457231 -0.07412171 -0...


In [6]:
y_test_df = DataProcessing.load_from_file(y_test_set_path, 'csv', sep=',')
y_test_df.drop(columns=['Unnamed: 0'], inplace=True)
# print(f"\t{y_test_df.head(7)}")

In [7]:
print(f"\tShape: {X_test_df.shape}, \nSubset of Data:{X_test_df.head(3)}")
# df.shape, df.head(3)

print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(3)}")
# df.shape, df.head(3)

	Shape: (565, 4), 
Subset of Data:                                                                                                                                          Base Sentence  \
0                                           Dr. Maria Rodriguez, a renowned climate scientist, noted on 2025-02-14 that the wind speed in Chicago fell.   
1  We succeeded in increasing our market share of sold apartment '' , comments Mr Kari Kauniskangas , Head of YIT International Construction Services .   
2                        The Centers for Disease Control and Prevention reported that on 4/12/2032, the prevalence of obesity among children increased.   

   Sentence Label  \
0               0   
1               0   
2               0   

                                                                                                                                                                                                                                                                    

In [8]:
# for idx, row in X_test_df.iterrows():
#     text = row['Base Sentence']
#     label = row['Sentence Label']
#     embedding = row['Embedding']
#     print(type(embedding))
#     norm_embedding = row['Normalized Embeddings']
#     if idx < 7:
#         print(f"{idx}\n Sentence: {text}\n Label: {label}\n Embeddings Shape: {embedding}\n\t Embeddings Subset [:6]: {embedding[:6]} \n Norm Embeddings: {norm_embedding.shape}, \n\tNorm Embeddings Subset [:6]: {norm_embedding[:6]}")
# print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(7)}")

## Load Prompt

In [9]:
# prediction_properties = PredictionProperties.get_prediction_properties()
# prediction_requirements = PredictionProperties.get_requirements()
# system_identity_prompt = "You are an expert at identifying specific types of sentences by knowing the sentence format."
# prediction_examples_prompt = """Some examples of predictions in the PhraseBank dataset are
#     1. According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .
#     2. According to the company 's updated strategy for the years 2009-2012 , Basware targets a long-term net sales growth in the range of 20 % -40 % with an operating profit margin of 10 % -20 % of net sales .
#     3. Its board of directors will propose a dividend of EUR0 .12 per share for 2010 , up from the EUR0 .08 per share paid in 2009 .
# """
# non_prediction_examples_prompt = """Some examples of non-predictions in the PPhraseBank dataset are
#     1. Net sales increased to EUR193 .3 m from EUR179 .9 m and pretax profit rose by 34.2 % to EUR43 .1 m. ( EUR1 = USD1 .4 )
#     2. Net sales surged by 18.5 % to EUR167 .8 m. Teleste said that EUR20 .4 m , or 12.2 % , of the sales came from the acquisitions made in 2009 .
#     3. STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .
# """
# # goal_prompt = "Given the above, identify the prediction."

# base_prompt = f"""{system_identity_prompt} The sentence format is based on: 
    
#     {prediction_properties}
#     Enforce: {prediction_requirements}
#     Know: {prediction_examples_prompt}
#     Know: {non_prediction_examples_prompt}

# """
# base_prompt

In [10]:
prompt_1 = """ 

Role: 
You are a linguist expert. You are acting as a prediction detector. Your task is to identify if a given sentence is a prediction about the future.

Background:
A prediction is a statement about what someone thinks will happen in the future.
Examples of predictions:
- "It will rain tomorrow." (Yes)
- "The stock market is expected to rise next quarter." (Yes)
- "I am going to the store." (No)
- “Lakers will win the championship. ”(Yes)

A prediction may contain: source, target, date, outcome.
"""

## Models

In [11]:
tgmf = TextGenerationModelFactory()

# Option 1: Specific models
models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'llama-3.3-70b-instruct'])

# Option 2: All Groq models
# models = tgmf.create_instances(tgmf.get_groq_model_names())

# Option 3: All NaviGator models
# models = tgmf.create_instances(tgmf.get_navigator_model_names())

# Option 4: All available models
# models = tgmf.create_instances()

# Option 5: Mix and match
# custom_models = ['llama-3.1-70b-instruct', 'mistral-small-3.1', 'llama-3.1-8b-instant']
# models = tgmf.create_instances(custom_models)
models

[<text_generation_models.LlamaInstantTextGenerationModel at 0x316c7fe90>,
 <text_generation_models.LlamaVersatileTextGenerationModel at 0x316d75310>,
 <text_generation_models.Llama3370BInstructTextGenerationModel at 0x316c86a50>]

In [12]:
import json
import re

def parse_json_response(response):
    """Parse JSON response from LLM to extract label and reasoning"""
    try:
        # Extract JSON if there's extra text
        json_match = re.search(r'\{.*\}', response, re.DOTALL)
        if json_match:
            data = json.loads(json_match.group())
            return data.get('label'), data.get('reasoning')
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        return None, None

In [13]:
def llm_certifier(data: str, base_prompt: str, model):
    
        prompt = f""" Given this: {base_prompt}. Also given the sentence '{data}', your task is to analyze the sentence and determine if it is a prediction. If prediction, generate label as 1 and if non-prediction generate label as 0.
        Respond ONLY with valid JSON in this exact format:
        {{"label": 0, "reasoning": "your explanation here"}}
        Examples:
        - "It will rain tomorrow." → {{"label": 1, "reasoning": "Contains the future tense words 'will' and 'tomorrow'"}}
        - "The stock market is expected to rise next quarter." → {{"label": 1, "reasoning": "Contains future tense words 'is expected'"}}
        - "I am going to the store." → {{"label": 0, "reasoning": "Does not contain a future tense word"}}
        - "Lakers will win the championship." → {{"label": 1, "reasoning": "Contains the future tense word 'will'"}}
        """
        idx = 1
        if idx == 1:
            #   print(f"\tPrompt: {prompt}")
              idx = idx + 1
        input_prompt = model.user(prompt)
        raw_text_llm_generation = model.chat_completion([input_prompt])
        
        # Parse the JSON response
        label, reasoning = parse_json_response(raw_text_llm_generation)
        
        return raw_text_llm_generation, label, reasoning

In [14]:
print("======= PROMPT + MODEL -> LABEL and REASONING =======")



In [None]:
    # content : meta :: text : meta_data
results = []
for idx, row in X_test_df.iterrows():
    text = row['Base Sentence']
    print(f"{idx} --- Sentence: {text}")
    for model in models:
        # print(model.__name__())
        raw_response, llm_label, llm_reasoning = llm_certifier(text, prompt_1, model)
        print(f"\tModel: {model.__name__()}\n\t\tLabel:', {llm_label}\n\t\tReasoning:', {llm_reasoning}")
        result = (text, raw_response, llm_label, llm_reasoning, model.__name__())
        results.append(result)


0 --- Sentence: Dr. Maria Rodriguez, a renowned climate scientist, noted on 2025-02-14 that the wind speed in Chicago fell.
	Model: llama-3.1-8b-instant
		Label:', 0
		Reasoning:', The sentence describes a past event, 'the wind speed in Chicago fell', and does not contain any future tense words or phrases.
	Model: llama-3.3-70b-versatile
		Label:', 0
		Reasoning:', The sentence is in the past tense, as indicated by the verb 'noted' and the date '2025-02-14', and it describes a past event, 'the wind speed in Chicago fell', without any reference to a future event or outcome.
	Model: llama-3.3-70b-instruct
		Label:', 0
		Reasoning:', The sentence is in the past tense, indicated by the word 'noted' and the specific date '2025-02-14', and it describes a completed action, 'the wind speed in Chicago fell', which suggests a statement about a past event rather than a prediction about the future.
1 --- Sentence: We succeeded in increasing our market share of sold apartment '' , comments Mr Kari 

In [None]:
results

In [None]:
# groupby text

In [None]:
results_with_llm_label_df = pd.DataFrame(results, columns=['text', 'raw_response', 'llm_label', 'llm_reasoning', 'llm_name'])
results_with_llm_label_df

In [None]:
y_test_df.rename(columns={'Sentence Label' : 'Actual Label'}, inplace=True)

In [None]:
def get_llm_labels(df, model_name):
    filt_llama = (df['llm_name'] == model_name)
    filt_df = df[filt_llama]
    return filt_df['llm_label']


test_and_models_df = pd.concat([X_test_df.loc[:, :], y_test_df], axis=1)
# test_and_models_df.columns = ['Sentence', 'Actual Label']
for model in models:
    print(model.__name__())
    model_labels = get_llm_labels(results_with_llm_label_df, model.__name__())
    test_and_models_df[model.__name__()] = model_labels.to_numpy().ravel()
test_and_models_df

## Save Output

In [None]:
# save_path = os.path.join(combine_data_path )
DataProcessing.save_to_file(test_and_models_df, combine_data_path, 'llm_classifiers', '.csv')

## Evaluation

In [None]:
print("======= EVALUATION/RESULTS =======")

In [None]:
get_metrics = EvaluationMetric()
get_metrics

In [None]:
actual_label = test_and_models_df['Actual Label'].values
for ml_model in models:
    ml_model_name = ml_model.__name__()
    print(f"Actual Label:\t\t{actual_label}")
    ml_model_predictions = test_and_models_df[ml_model_name].values
    print(f"{ml_model_name}:\t\t{ml_model_predictions}")
    get_metrics.eval_classification_report(y_test_df, ml_model_predictions)