# LLM Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import sys

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from data_processing import DataProcessing
from classification_models import EvaluationMetric
from text_generation_models import TextGenerationModelFactory

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [4]:
base_data_path = os.path.join(notebook_dir, '../data/')
combine_data_path = os.path.join(base_data_path, 'combined_generated_fin_phrase_bank')
X_test_set_path = os.path.join(combine_data_path, 'x_test_set-v1.csv')
y_test_set_path = os.path.join(combine_data_path, 'y_test_set-v1.csv')

In [5]:
X_test_df = DataProcessing.load_from_file(X_test_set_path, 'csv')
X_test_df.drop(columns=['Unnamed: 0'], inplace=True)
X_test_df.head(7)

Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings
0,"In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.",1,[-2.95873899e-02 1.89737260e-01 -1.18197471e-01 -4.13006060e-02\n 1.10420622e-01 -4.95679379e-02 1.91853363e-02 3.95639287e-03\n 1.09274819e-01 1.97997952e+00 -1.85330868e-01 -4.54250947e-02\n 1.08167037e-01 2.54211240e-02 -9.49313119e-03 -6.84713572e-02\n 1.55237708e-02 1.00765920e+00 -6.00808859e-03 -4.15705629e-02\n -7.41642118e-02 -1.50733357e-02 1.34443464e-02 -8.06241706e-02\n -7.69194737e-02 1.14793696e-01 -2.15540364e-01 6.73782602e-02\n 3.61875258e-02 1.65809989e-02 -4.40017954e-02 2.61284299e-02\n 4.48011309e-02 -9.68891475e-03 3.49024460e-02 9.65972338e-03\n -2.66898591e-02 7.46985758e-03 -6.69648051e-02 -3.72406021e-02\n 7.12334365e-02 6.75264001e-02 7.85919055e-02 -7.79033080e-03\n 4.30065207e-02 -5.40831983e-02 -1.17614612e-01 -2.35915799e-02\n -4....,[ 0.94400704 -0.8645446 -2.054663 -0.3882444 0.8747154 -0.2001761\n 0.3727082 0.06485146 0.7578419 0.74855906 1.5445073 -0.9450778\n 0.8223399 0.4856363 0.34803256 -0.53150874 0.5283372 -0.55164915\n 1.8045051 -0.24819341 -2.180726 -1.0896391 0.08680368 0.22307044\n -1.5891142 0.5755748 -1.1823585 0.6673478 0.25934806 -0.86385673\n -0.6393052 0.24252476 0.7617 -1.2495198 -0.25059918 0.46805078\n -0.01315914 -0.6343914 -0.95103335 -0.23913547 1.234197 0.26890773\n -0.0123065 0.36895898 0.34772688 -0.8687144 -0.7102191 0.17720816\n -1.3530828 0.6399917 0.0253048 1.1315668 -0.82290274 0.01780647\n 0.33573022 0.84572476 0.5561635 0.630834 0.5291998 1.5564367\n 0.74758047 1.1630359 0.18509752 -1.5357434 -0.7491160...
1,Coach Rachel Thompson forecasts that the points per game at the New York Knicks potentially decrease in August 2024.,1,[-7.31271803e-02 2.57879347e-01 -1.00163361e-02 -2.91225519e-02\n 6.20316342e-02 -1.89948324e-02 2.83652991e-02 -2.36207973e-02\n 1.25035286e-01 1.68172586e+00 -3.08204234e-01 3.09939720e-02\n 5.19324541e-02 -4.09708358e-03 5.32346480e-02 -9.79294349e-03\n -1.77208520e-02 9.74500477e-01 -6.05918542e-02 -1.19134128e-01\n 6.85500652e-02 -1.13969576e-02 1.40849143e-01 -1.98314548e-01\n -2.55904496e-02 2.24934310e-01 -2.85059363e-01 1.34536862e-01\n -5.01615815e-02 1.06635571e-01 -1.34736553e-01 6.85500056e-02\n 9.54245403e-03 1.24958850e-01 8.98100063e-03 5.74925914e-02\n 3.36430110e-02 3.12536396e-02 -2.97347642e-02 -5.55007998e-03\n -7.19834417e-02 1.25202790e-01 3.19119208e-02 -8.32804516e-02\n -8.05644970e-03 -9.72918496e-02 -1.58620656e-01 7.15503991e-02\n -3....,[ 0.4060981 0.06459576 -0.33290786 -0.18525055 0.23426409 0.28097287\n 0.5438155 -0.29924256 0.9618827 -0.30452695 -0.16134328 0.04894692\n -0.02327521 -0.02009717 1.0117191 0.54240227 -0.07506002 -0.78002316\n 0.8962295 -1.5484266 0.80796844 -1.033549 2.02265 -1.4457239\n -0.84145296 2.089933 -2.1967309 1.8129327 -1.2062268 0.43286043\n -2.3416817 0.92772835 0.19314055 0.6750809 -0.73656416 1.299064\n 0.8058958 -0.26324862 -0.31853175 0.24293266 -1.204331 1.1874502\n -0.641938 -0.9460187 -0.5985572 -1.5994209 -1.2832663 1.3151182\n -0.65960604 -0.5158133 0.5862492 -0.48699406 0.34597123 -1.3362038\n -1.5870239 -0.51357347 0.21139102 1.4604084 1.0736531 -0.19801451\n -1.9606705 1.1726837 -0.5702353 -2.2427983 0.5018804 ...
2,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,1,[-2.24677563e-01 2.28302136e-01 -3.20941433e-02 -1.78482141e-02\n -5.66348508e-02 2.05190871e-02 -3.81574295e-02 1.41838089e-01\n 1.64982721e-01 1.45077002e+00 -3.47716570e-01 4.82952334e-02\n 1.09031864e-01 4.18408737e-02 6.93667382e-02 7.94899929e-03\n -2.80997157e-02 1.02238500e+00 -1.17930911e-01 -1.29385918e-01\n 4.74877171e-02 1.93608850e-01 6.20140657e-02 -1.79264918e-01\n 6.11357428e-02 1.66994065e-01 -9.25592855e-02 5.74763976e-02\n 2.44720709e-02 6.20023720e-02 -3.91931273e-02 -3.54092242e-03\n -2.73218546e-02 1.84805289e-01 -8.36593192e-03 -5.23123983e-03\n 3.49834897e-02 1.63400516e-01 1.24472668e-02 1.79933552e-02\n 2.80983802e-02 1.24136940e-01 1.31817430e-01 1.38525711e-02\n 4.25122082e-02 -9.02827159e-02 -2.97950823e-02 -7.99663663e-02\n -1....,[-1.4662192e+00 -3.3869946e-01 -6.8428683e-01 2.6793862e-03\n -1.3363433e+00 9.0282923e-01 -6.9611585e-01 1.8852654e+00\n 1.4790570e+00 -1.1199952e+00 -7.0989293e-01 2.7399394e-01\n 8.3534455e-01 7.6695478e-01 1.1824033e+00 8.6710888e-01\n -2.6343867e-01 -4.5022815e-01 -5.7894431e-02 -1.7202821e+00\n 3.6688486e-01 2.0942023e+00 8.2479399e-01 -1.1756091e+00\n 4.2180544e-01 1.2932943e+00 6.1209482e-01 4.9844283e-01\n 6.0505498e-02 -2.0982359e-01 -5.4908437e-01 -2.3670182e-01\n -4.0131029e-01 1.5305017e+00 -1.0617776e+00 2.0934603e-01\n 8.2409364e-01 1.7988863e+00 3.9809933e-01 6.0106939e-01\n 4.9974424e-01 1.1704757e+00 7.0561284e-01 7.4596107e-01\n 3.3856642e-01 -1.4808886e+00 5.1703244e-01 -4.9704137e-01\n -2.3291378e+00 5.7871742e-03 -3.6583024e-01 -8.420...
3,"The Brazilian unit of Finnish security solutions provider F-Secure signed up 1,500 new clients last year , online news source Reseller Web quoted the division 's commercial director , Vladimir Brand+_o , as saying .",0,[-9.49790627e-02 1.75042719e-01 -4.31647301e-02 -4.51112948e-02\n 1.72519922e-01 -2.92447265e-02 -1.74319204e-02 -2.38732591e-01\n -4.68273982e-02 1.85996163e+00 -2.00454473e-01 -6.07615486e-02\n 8.27170834e-02 -1.10286549e-01 -1.05571873e-01 -1.35309473e-02\n -8.58865678e-04 1.12296283e+00 -5.33046685e-02 -5.39846495e-02\n 5.32859191e-02 5.21327853e-02 4.24367469e-03 -4.33036722e-02\n -6.59389868e-02 1.87109818e-03 -1.60044193e-01 -3.14699784e-02\n 8.60306025e-02 1.97323579e-02 3.05023193e-02 -3.53931487e-02\n 9.71114933e-02 1.66937694e-01 1.44137610e-02 -1.90161038e-02\n -6.34789988e-02 2.72928309e-02 3.54067907e-02 -1.58601403e-02\n 5.33533562e-03 9.40666571e-02 1.13477565e-01 -1.08105645e-01\n -7.01851994e-02 2.89511457e-02 -3.51462960e-02 2.04022620e-02\n 9....,[ 0.13613077 -1.0649096 -0.86048055 -0.45176408 1.6966292 0.11966357\n -0.30980867 -3.1393042 -1.263115 0.3247951 1.3345463 -1.1445675\n 0.43964183 -1.8394341 -0.66852117 0.4739907 0.23098873 0.24248143\n 1.0174885 -0.45629638 0.48830968 -0.06428263 -0.05299549 0.75225765\n -1.4291718 -0.9770338 -0.3725974 -1.0187958 1.1053182 -0.81847954\n 0.75855 -0.7511867 1.6052241 1.2751096 -0.63471293 -0.03014214\n -0.5125938 -0.32505655 0.788159 0.08609865 0.11216199 0.6915825\n 0.4582401 -1.3784537 -1.749909 0.5354891 0.44225088 0.70338005\n 0.9248192 0.05322725 -0.7257571 -0.4146794 0.07898946 1.125015\n -1.4763968 1.195394 -0.0841465 -1.0040774 1.2092102 1.1303582\n 2.0370452 0.5636679 -0.25583628 -1.6240209 1.8427348 ...
4,"On August 15, 2027, Dr. John Lee speculates that the average physical activity levels at U.S. high schools will likely increase.",1,[-1.65867433e-01 3.57482523e-01 7.28247538e-02 -3.14977057e-02\n -3.20128016e-02 2.44006794e-02 -3.71189825e-02 -6.11495972e-03\n 5.43026142e-02 1.84919739e+00 -2.53526360e-01 -3.40995230e-02\n 7.54815936e-02 3.72875221e-02 -7.54721314e-02 5.80832064e-02\n -1.76085960e-02 1.20470536e+00 -1.56163409e-01 -7.61056989e-02\n 1.80367492e-02 7.72641674e-02 7.94613808e-02 -1.32992119e-01\n -3.02432496e-02 1.56555027e-02 -1.88882783e-01 2.65764166e-02\n -7.17546232e-03 8.97909626e-02 -6.21524043e-02 6.56070113e-02\n -6.33275881e-03 5.11547066e-02 3.18643376e-02 -3.88401784e-02\n -8.96424800e-03 1.03460431e-01 -4.25934931e-03 3.29740532e-02\n -2.31975634e-02 -4.60627079e-02 3.91621627e-02 6.83518276e-02\n 7.78002962e-02 -5.75087452e-03 -1.13928251e-01 1.78448763e-02\n 6....,[-7.39654064e-01 1.42271864e+00 9.85548079e-01 -2.24841565e-01\n -1.01045871e+00 9.63916361e-01 -6.76760018e-01 -6.81178197e-02\n 4.61514480e-02 2.86788255e-01 5.97749472e-01 -7.97759831e-01\n 3.30839753e-01 6.88942492e-01 -3.50053191e-01 1.78464699e+00\n -7.30225444e-02 8.05466533e-01 -6.94084466e-01 -8.27121615e-01\n -2.49871537e-01 3.19144130e-01 1.08989644e+00 -5.19482493e-01\n -9.09225821e-01 -7.87507594e-01 -7.93389797e-01 -2.86460631e-02\n -4.76637661e-01 1.90310910e-01 -9.79849100e-01 8.80192399e-01\n -6.28532246e-02 -3.79845709e-01 -3.07556450e-01 -3.74551147e-01\n 2.27476686e-01 8.63529027e-01 1.14270397e-01 8.28951955e-01\n -3.73662561e-01 -1.54008961e+00 -5.44144869e-01 1.69529462e+00\n 9.92515028e-01 -5.13608567e-02 -6.58703268e-01 6.72793388e-01\n 3....
5,Analyst Emma Taylor noted that the home run count at the Chicago Cubs remained stable in 2024.,0,[-2.64777802e-02 1.62194327e-01 -6.83643995e-03 4.88223322e-02\n 7.62382820e-02 1.14460206e-02 -5.89646101e-02 -1.04382239e-01\n 1.83703244e-01 1.89310622e+00 -1.79091990e-01 -6.69373795e-02\n 5.82357794e-02 9.85373370e-03 -7.01477230e-02 2.99751665e-02\n 2.18661092e-02 8.88447881e-01 -1.64659292e-01 -5.07162213e-02\n 3.26223765e-03 -2.32030936e-02 9.22632739e-02 -9.55311134e-02\n -9.47977826e-02 7.93419480e-02 -1.47108674e-01 5.80902882e-02\n -2.33686138e-02 5.19458652e-02 -3.62452157e-02 -1.53706027e-02\n 8.48071575e-02 1.05495445e-01 -3.96783315e-02 9.21180006e-03\n 4.12575155e-02 -4.33583446e-02 -3.84721495e-02 -4.78743613e-02\n 5.88028366e-03 1.01354003e-01 -3.68710863e-03 2.61255540e-02\n 1.05573274e-01 -2.55232155e-02 -1.38972521e-01 -1.31551251e-01\n 5....,[ 0.9824245 -1.2401017 -0.28229827 1.1139989 0.42229584 0.7600405\n -1.0839454 -1.3655136 1.7214198 0.44182336 1.6311216 -1.2248999\n 0.07150966 0.21892124 -0.2937187 1.2702239 0.643452 -1.3726931\n -0.83545625 -0.40150625 -0.55927646 -1.213674 1.2844143 0.01169699\n -1.8495307 0.08813752 -0.18385144 0.50891453 -0.7514787 -0.35462964\n -0.49377528 -0.42777768 1.4068122 0.39687887 -1.64881 0.46026888\n 0.9092673 -1.4275589 -0.4669713 -0.4008935 0.12144072 0.8076393\n -1.1221067 0.9597466 1.5071958 -0.38573322 -1.0086895 -1.1140031\n 0.31486785 -0.6657662 -0.6382753 -0.18856792 -0.6044813 -1.4846716\n -0.5640565 0.22765079 -0.928093 -0.12893389 1.3433449 0.20920368\n -1.0305243 1.7044654 -0.05661023 -0.32602778 -1.4305439 ...
6,"According to Goldman Sachs, the research and development expenses at Facebook would fall in 2025.",1,[-1.21063471e-01 2.51935005e-01 -2.94214804e-02 -4.79285903e-02\n -2.75685582e-02 -1.82558745e-02 -2.92567778e-02 -6.38376474e-02\n 4.33391780e-02 1.99582398e+00 -4.16828781e-01 9.97788087e-03\n 1.58754244e-01 1.18489295e-01 -6.33422360e-02 3.04343104e-02\n -4.03524712e-02 1.07175887e+00 -1.86021462e-01 6.64371178e-02\n 1.12161785e-01 1.61022589e-01 -3.45068164e-02 -1.36160821e-01\n 5.12972996e-02 1.39190540e-01 -9.68207866e-02 1.42498359e-01\n 6.11764472e-03 4.63660844e-02 5.58225587e-02 -1.04840025e-02\n 5.23718782e-02 2.01317146e-01 4.69320081e-02 -1.29268587e-01\n 2.12263362e-03 1.14207558e-01 -9.96199772e-02 -4.00746167e-02\n -1.65041629e-02 9.89470035e-02 1.30141778e-02 -1.81327816e-02\n 3.88694070e-02 1.02692857e-01 -6.69377595e-02 -1.15642011e-01\n 1....,[-1.86127022e-01 -1.64573379e-02 -6.41750097e-01 -4.98725086e-01\n -9.51637030e-01 2.92602330e-01 -5.30214727e-01 -8.30214500e-01\n -9.57852453e-02 8.04503322e-01 -1.66937757e+00 -2.24420890e-01\n 1.58303380e+00 2.08016706e+00 -2.21713752e-01 1.27862692e+00\n -4.85828876e-01 -1.10175587e-01 -1.19092357e+00 1.56238830e+00\n 1.72127664e+00 1.59703732e+00 -6.41788065e-01 -5.64413190e-01\n 2.78498143e-01 9.11014855e-01 5.49914002e-01 1.94873917e+00\n -2.51018137e-01 -4.34974223e-01 1.23361146e+00 -3.48848134e-01\n 8.83781075e-01 1.76651561e+00 -2.50737220e-02 -1.94558835e+00\n 3.77987742e-01 1.03123653e+00 -1.50581253e+00 -2.82245785e-01\n -2.59695232e-01 7.69305944e-01 -8.96835506e-01 1.88801721e-01\n 2.71059096e-01 1.78254378e+00 -2.02530343e-03 -9.23726618e-01\n 1....


In [6]:
y_test_df = DataProcessing.load_from_file(y_test_set_path, 'csv')
y_test_df.drop(columns=['Unnamed: 0'], inplace=True)
# print(f"\t{y_test_df.head(7)}")

In [7]:
print(f"\tShape: {X_test_df.shape}, \nSubset of Data:{X_test_df.head(3)}")
# df.shape, df.head(3)

print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(3)}")
# df.shape, df.head(3)

	Shape: (24, 4), 
Subset of Data:                                                                                                                          Base Sentence  \
0  In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.   
1                  Coach Rachel Thompson forecasts that the points per game at the New York Knicks potentially decrease in August 2024.   
2                                                  JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.   

   Sentence Label  \
0               1   
1               1   
2               1   

                                                                                                                                                                                                                                                                                                                                     

In [8]:
# for idx, row in X_test_df.iterrows():
#     text = row['Base Sentence']
#     label = row['Sentence Label']
#     embedding = row['Embedding']
#     print(type(embedding))
#     norm_embedding = row['Normalized Embeddings']
#     if idx < 7:
#         print(f"{idx}\n Sentence: {text}\n Label: {label}\n Embeddings Shape: {embedding}\n\t Embeddings Subset [:6]: {embedding[:6]} \n Norm Embeddings: {norm_embedding.shape}, \n\tNorm Embeddings Subset [:6]: {norm_embedding[:6]}")
# print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(7)}")

## Load Prompt

In [9]:
# prediction_properties = PredictionProperties.get_prediction_properties()
# prediction_requirements = PredictionProperties.get_requirements()
# system_identity_prompt = "You are an expert at identifying specific types of sentences by knowing the sentence format."
# prediction_examples_prompt = """Some examples of predictions in the PhraseBank dataset are
#     1. According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .
#     2. According to the company 's updated strategy for the years 2009-2012 , Basware targets a long-term net sales growth in the range of 20 % -40 % with an operating profit margin of 10 % -20 % of net sales .
#     3. Its board of directors will propose a dividend of EUR0 .12 per share for 2010 , up from the EUR0 .08 per share paid in 2009 .
# """
# non_prediction_examples_prompt = """Some examples of non-predictions in the PPhraseBank dataset are
#     1. Net sales increased to EUR193 .3 m from EUR179 .9 m and pretax profit rose by 34.2 % to EUR43 .1 m. ( EUR1 = USD1 .4 )
#     2. Net sales surged by 18.5 % to EUR167 .8 m. Teleste said that EUR20 .4 m , or 12.2 % , of the sales came from the acquisitions made in 2009 .
#     3. STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .
# """
# # goal_prompt = "Given the above, identify the prediction."

# base_prompt = f"""{system_identity_prompt} The sentence format is based on: 
    
#     {prediction_properties}
#     Enforce: {prediction_requirements}
#     Know: {prediction_examples_prompt}
#     Know: {non_prediction_examples_prompt}

# """
# base_prompt

In [10]:
prompt_1 = """ 

Role: 
You are a linguist expert. You are acting as a prediction detector. Your task is to identify if a given sentence is a prediction about the future.

Background:
A prediction is a statement about what someone thinks will happen in the future.
Examples of predictions:
- "It will rain tomorrow." (Yes)
- "The stock market is expected to rise next quarter." (Yes)
- "I am going to the store." (No)
- “Lakers will win the championship. ”(Yes)

A prediction may contain: source, target, date, outcome.
"""

## Models

In [11]:
tgmf = TextGenerationModelFactory()

# Option 1: Specific models
models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'llama-3.3-70b-instruct'])

# Option 2: All Groq models
# models = tgmf.create_instances(tgmf.get_groq_model_names())

# Option 3: All NaviGator models
# models = tgmf.create_instances(tgmf.get_navigator_model_names())

# Option 4: All available models
# models = tgmf.create_instances()

# Option 5: Mix and match
# custom_models = ['llama-3.1-70b-instruct', 'mistral-small-3.1', 'llama-3.1-8b-instant']
# models = tgmf.create_instances(custom_models)
models

[<text_generation_models.LlamaInstantTextGenerationModel at 0x32c629610>,
 <text_generation_models.LlamaVersatileTextGenerationModel at 0x32c6d4dd0>,
 <text_generation_models.Llama3370BInstructTextGenerationModel at 0x32c6d7290>]

In [12]:
import json
import re

def parse_json_response(response):
    """Parse JSON response from LLM to extract label and reasoning"""
    try:
        # Extract JSON if there's extra text
        json_match = re.search(r'\{.*\}', response, re.DOTALL)
        if json_match:
            data = json.loads(json_match.group())
            return data.get('label'), data.get('reasoning')
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        return None, None

In [13]:
def llm_certifier(data: str, base_prompt: str, model):
    
        prompt = f""" Given this: {base_prompt}. Also given the sentence '{data}', your task is to analyze the sentence and determine if it is a prediction. If prediction, generate label as 1 and if non-prediction generate label as 0.
        Respond ONLY with valid JSON in this exact format:
        {{"label": 0, "reasoning": "your explanation here"}}
        Examples:
        - "It will rain tomorrow." → {{"label": 1, "reasoning": "Contains the future tense words 'will' and 'tomorrow'"}}
        - "The stock market is expected to rise next quarter." → {{"label": 1, "reasoning": "Contains future tense words 'is expected'"}}
        - "I am going to the store." → {{"label": 0, "reasoning": "Does not contain a future tense word"}}
        - "Lakers will win the championship." → {{"label": 1, "reasoning": "Contains the future tense word 'will'"}}
        """
        idx = 1
        if idx == 1:
            #   print(f"\tPrompt: {prompt}")
              idx = idx + 1
        input_prompt = model.user(prompt)
        raw_text_llm_generation = model.chat_completion([input_prompt])
        
        # Parse the JSON response
        label, reasoning = parse_json_response(raw_text_llm_generation)
        
        return raw_text_llm_generation, label, reasoning

In [14]:
print("======= PROMPT + MODEL -> LABEL and REASONING =======")



In [15]:
    # content : meta :: text : meta_data
results = []
for idx, row in X_test_df.iterrows():
    text = row['Base Sentence']
    print(f"{idx} --- Sentence: {text}")
    for model in models:
        # print(model.__name__())
        raw_response, llm_label, llm_reasoning = llm_certifier(text, prompt_1, model)
        print(f"\tModel: {model.__name__()}\n\t\tLabel:', {llm_label}\n\t\tReasoning:', {llm_reasoning}")
        result = (text, raw_response, llm_label, llm_reasoning, model.__name__())
        results.append(result)


0 --- Sentence: In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.
	Model: llama-3.1-8b-instant
		Label:', 1
		Reasoning:', The sentence contains a future date '08/2024' and the phrase 'envision that', which suggests a prediction about the future.
	Model: llama-3.3-70b-versatile
		Label:', 1
		Reasoning:', Contains the future tense words 'envisions' and a specific date '08/2024', indicating a statement about a potential future outcome regarding the touchdown rate at the New England Patriots
	Model: llama-3.3-70b-instruct
		Label:', 1
		Reasoning:', Contains the future tense phrase 'envisions that' and a specific date '08/2024', indicating a statement about a potential future outcome regarding the touchdown rate at the New England Patriots
1 --- Sentence: Coach Rachel Thompson forecasts that the points per game at the New York Knicks potentially decrease in August 2024.
	Model: llama-3.1-8b-instant
		Label

In [16]:
results

[('In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.',
  '{"label": 1, "reasoning": "The sentence contains a future date \'08/2024\' and the phrase \'envision that\', which suggests a prediction about the future."}',
  1,
  "The sentence contains a future date '08/2024' and the phrase 'envision that', which suggests a prediction about the future.",
  'llama-3.1-8b-instant'),
 ('In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.',
  '{"label": 1, "reasoning": "Contains the future tense words \'envisions\' and a specific date \'08/2024\', indicating a statement about a potential future outcome regarding the touchdown rate at the New England Patriots"}',
  1,
  "Contains the future tense words 'envisions' and a specific date '08/2024', indicating a statement about a potential future outcome regarding the touchdown rate at the 

In [17]:
# groupby text

In [18]:
results_with_llm_label_df = pd.DataFrame(results, columns=['text', 'raw_response', 'llm_label', 'llm_reasoning', 'llm_name'])
results_with_llm_label_df

Unnamed: 0,text,raw_response,llm_label,llm_reasoning,llm_name
0,"In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.","{""label"": 1, ""reasoning"": ""The sentence contains a future date '08/2024' and the phrase 'envision that', which suggests a prediction about the future.""}",1,"The sentence contains a future date '08/2024' and the phrase 'envision that', which suggests a prediction about the future.",llama-3.1-8b-instant
1,"In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.","{""label"": 1, ""reasoning"": ""Contains the future tense words 'envisions' and a specific date '08/2024', indicating a statement about a potential future outcome regarding the touchdown rate at the New England Patriots""}",1,"Contains the future tense words 'envisions' and a specific date '08/2024', indicating a statement about a potential future outcome regarding the touchdown rate at the New England Patriots",llama-3.3-70b-versatile
2,"In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.","{""label"": 1, ""reasoning"": ""Contains the future tense phrase 'envisions that' and a specific date '08/2024', indicating a statement about a potential future outcome regarding the touchdown rate at the New England Patriots""}",1,"Contains the future tense phrase 'envisions that' and a specific date '08/2024', indicating a statement about a potential future outcome regarding the touchdown rate at the New England Patriots",llama-3.3-70b-instruct
3,Coach Rachel Thompson forecasts that the points per game at the New York Knicks potentially decrease in August 2024.,"{""label"": 1, ""reasoning"": ""Contains the future tense word 'will' and also a specific date 'August 2024' which indicates a prediction about the future.""}",1,Contains the future tense word 'will' and also a specific date 'August 2024' which indicates a prediction about the future.,llama-3.1-8b-instant
4,Coach Rachel Thompson forecasts that the points per game at the New York Knicks potentially decrease in August 2024.,"{""label"": 1, ""reasoning"": ""Contains the future tense words 'forecasts', 'potentially decrease', and a specific future date 'August 2024', indicating a prediction about the future.""}",1,"Contains the future tense words 'forecasts', 'potentially decrease', and a specific future date 'August 2024', indicating a prediction about the future.",llama-3.3-70b-versatile
5,Coach Rachel Thompson forecasts that the points per game at the New York Knicks potentially decrease in August 2024.,"{""label"": 1, ""reasoning"": ""Contains the future tense words 'forecasts', 'potentially decrease', and a specific future date 'August 2024'""}",1,"Contains the future tense words 'forecasts', 'potentially decrease', and a specific future date 'August 2024'",llama-3.3-70b-instruct
6,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,"{""label"": 1, ""reasoning"": ""Contains the future tense words 'will', 'potentially' and 'Q3 of 2027' which indicates a prediction about the future""}",1,"Contains the future tense words 'will', 'potentially' and 'Q3 of 2027' which indicates a prediction about the future",llama-3.1-8b-instant
7,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,"{""label"": 1, ""reasoning"": ""Contains the future tense words 'forecasts' and 'potentially decrease', along with a specific future date 'Q3 of 2027', indicating a prediction about the future revenue at Microsoft""}",1,"Contains the future tense words 'forecasts' and 'potentially decrease', along with a specific future date 'Q3 of 2027', indicating a prediction about the future revenue at Microsoft",llama-3.3-70b-versatile
8,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,"{""label"": 1, ""reasoning"": ""Contains the future tense words 'forecasts' and 'potentially decrease in Q3 of 2027', indicating a prediction about a future event""}",1,"Contains the future tense words 'forecasts' and 'potentially decrease in Q3 of 2027', indicating a prediction about a future event",llama-3.3-70b-instruct
9,"The Brazilian unit of Finnish security solutions provider F-Secure signed up 1,500 new clients last year , online news source Reseller Web quoted the division 's commercial director , Vladimir Brand+_o , as saying .","{""label"": 0, ""reasoning"": ""The sentence reports past events and does not contain any future tense words or phrases that indicate a prediction about the future.""}",0,The sentence reports past events and does not contain any future tense words or phrases that indicate a prediction about the future.,llama-3.1-8b-instant


In [19]:
y_test_df.rename(columns={'Sentence Label' : 'Actual Label'}, inplace=True)

In [20]:
def get_llm_labels(df, model_name):
    filt_llama = (df['llm_name'] == model_name)
    filt_df = df[filt_llama]
    return filt_df['llm_label']


test_and_models_df = pd.concat([X_test_df.loc[:, :], y_test_df], axis=1)
# test_and_models_df.columns = ['Sentence', 'Actual Label']
for model in models:
    print(model.__name__())
    model_labels = get_llm_labels(results_with_llm_label_df, model.__name__())
    test_and_models_df[model.__name__()] = model_labels.to_numpy().ravel()
test_and_models_df

llama-3.1-8b-instant
llama-3.3-70b-versatile
llama-3.3-70b-instruct


Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings,Actual Label,llama-3.1-8b-instant,llama-3.3-70b-versatile,llama-3.3-70b-instruct
0,"In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.",1,[-2.95873899e-02 1.89737260e-01 -1.18197471e-01 -4.13006060e-02\n 1.10420622e-01 -4.95679379e-02 1.91853363e-02 3.95639287e-03\n 1.09274819e-01 1.97997952e+00 -1.85330868e-01 -4.54250947e-02\n 1.08167037e-01 2.54211240e-02 -9.49313119e-03 -6.84713572e-02\n 1.55237708e-02 1.00765920e+00 -6.00808859e-03 -4.15705629e-02\n -7.41642118e-02 -1.50733357e-02 1.34443464e-02 -8.06241706e-02\n -7.69194737e-02 1.14793696e-01 -2.15540364e-01 6.73782602e-02\n 3.61875258e-02 1.65809989e-02 -4.40017954e-02 2.61284299e-02\n 4.48011309e-02 -9.68891475e-03 3.49024460e-02 9.65972338e-03\n -2.66898591e-02 7.46985758e-03 -6.69648051e-02 -3.72406021e-02\n 7.12334365e-02 6.75264001e-02 7.85919055e-02 -7.79033080e-03\n 4.30065207e-02 -5.40831983e-02 -1.17614612e-01 -2.35915799e-02\n -4....,[ 0.94400704 -0.8645446 -2.054663 -0.3882444 0.8747154 -0.2001761\n 0.3727082 0.06485146 0.7578419 0.74855906 1.5445073 -0.9450778\n 0.8223399 0.4856363 0.34803256 -0.53150874 0.5283372 -0.55164915\n 1.8045051 -0.24819341 -2.180726 -1.0896391 0.08680368 0.22307044\n -1.5891142 0.5755748 -1.1823585 0.6673478 0.25934806 -0.86385673\n -0.6393052 0.24252476 0.7617 -1.2495198 -0.25059918 0.46805078\n -0.01315914 -0.6343914 -0.95103335 -0.23913547 1.234197 0.26890773\n -0.0123065 0.36895898 0.34772688 -0.8687144 -0.7102191 0.17720816\n -1.3530828 0.6399917 0.0253048 1.1315668 -0.82290274 0.01780647\n 0.33573022 0.84572476 0.5561635 0.630834 0.5291998 1.5564367\n 0.74758047 1.1630359 0.18509752 -1.5357434 -0.7491160...,1,1,1,1
1,Coach Rachel Thompson forecasts that the points per game at the New York Knicks potentially decrease in August 2024.,1,[-7.31271803e-02 2.57879347e-01 -1.00163361e-02 -2.91225519e-02\n 6.20316342e-02 -1.89948324e-02 2.83652991e-02 -2.36207973e-02\n 1.25035286e-01 1.68172586e+00 -3.08204234e-01 3.09939720e-02\n 5.19324541e-02 -4.09708358e-03 5.32346480e-02 -9.79294349e-03\n -1.77208520e-02 9.74500477e-01 -6.05918542e-02 -1.19134128e-01\n 6.85500652e-02 -1.13969576e-02 1.40849143e-01 -1.98314548e-01\n -2.55904496e-02 2.24934310e-01 -2.85059363e-01 1.34536862e-01\n -5.01615815e-02 1.06635571e-01 -1.34736553e-01 6.85500056e-02\n 9.54245403e-03 1.24958850e-01 8.98100063e-03 5.74925914e-02\n 3.36430110e-02 3.12536396e-02 -2.97347642e-02 -5.55007998e-03\n -7.19834417e-02 1.25202790e-01 3.19119208e-02 -8.32804516e-02\n -8.05644970e-03 -9.72918496e-02 -1.58620656e-01 7.15503991e-02\n -3....,[ 0.4060981 0.06459576 -0.33290786 -0.18525055 0.23426409 0.28097287\n 0.5438155 -0.29924256 0.9618827 -0.30452695 -0.16134328 0.04894692\n -0.02327521 -0.02009717 1.0117191 0.54240227 -0.07506002 -0.78002316\n 0.8962295 -1.5484266 0.80796844 -1.033549 2.02265 -1.4457239\n -0.84145296 2.089933 -2.1967309 1.8129327 -1.2062268 0.43286043\n -2.3416817 0.92772835 0.19314055 0.6750809 -0.73656416 1.299064\n 0.8058958 -0.26324862 -0.31853175 0.24293266 -1.204331 1.1874502\n -0.641938 -0.9460187 -0.5985572 -1.5994209 -1.2832663 1.3151182\n -0.65960604 -0.5158133 0.5862492 -0.48699406 0.34597123 -1.3362038\n -1.5870239 -0.51357347 0.21139102 1.4604084 1.0736531 -0.19801451\n -1.9606705 1.1726837 -0.5702353 -2.2427983 0.5018804 ...,1,1,1,1
2,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,1,[-2.24677563e-01 2.28302136e-01 -3.20941433e-02 -1.78482141e-02\n -5.66348508e-02 2.05190871e-02 -3.81574295e-02 1.41838089e-01\n 1.64982721e-01 1.45077002e+00 -3.47716570e-01 4.82952334e-02\n 1.09031864e-01 4.18408737e-02 6.93667382e-02 7.94899929e-03\n -2.80997157e-02 1.02238500e+00 -1.17930911e-01 -1.29385918e-01\n 4.74877171e-02 1.93608850e-01 6.20140657e-02 -1.79264918e-01\n 6.11357428e-02 1.66994065e-01 -9.25592855e-02 5.74763976e-02\n 2.44720709e-02 6.20023720e-02 -3.91931273e-02 -3.54092242e-03\n -2.73218546e-02 1.84805289e-01 -8.36593192e-03 -5.23123983e-03\n 3.49834897e-02 1.63400516e-01 1.24472668e-02 1.79933552e-02\n 2.80983802e-02 1.24136940e-01 1.31817430e-01 1.38525711e-02\n 4.25122082e-02 -9.02827159e-02 -2.97950823e-02 -7.99663663e-02\n -1....,[-1.4662192e+00 -3.3869946e-01 -6.8428683e-01 2.6793862e-03\n -1.3363433e+00 9.0282923e-01 -6.9611585e-01 1.8852654e+00\n 1.4790570e+00 -1.1199952e+00 -7.0989293e-01 2.7399394e-01\n 8.3534455e-01 7.6695478e-01 1.1824033e+00 8.6710888e-01\n -2.6343867e-01 -4.5022815e-01 -5.7894431e-02 -1.7202821e+00\n 3.6688486e-01 2.0942023e+00 8.2479399e-01 -1.1756091e+00\n 4.2180544e-01 1.2932943e+00 6.1209482e-01 4.9844283e-01\n 6.0505498e-02 -2.0982359e-01 -5.4908437e-01 -2.3670182e-01\n -4.0131029e-01 1.5305017e+00 -1.0617776e+00 2.0934603e-01\n 8.2409364e-01 1.7988863e+00 3.9809933e-01 6.0106939e-01\n 4.9974424e-01 1.1704757e+00 7.0561284e-01 7.4596107e-01\n 3.3856642e-01 -1.4808886e+00 5.1703244e-01 -4.9704137e-01\n -2.3291378e+00 5.7871742e-03 -3.6583024e-01 -8.420...,1,1,1,1
3,"The Brazilian unit of Finnish security solutions provider F-Secure signed up 1,500 new clients last year , online news source Reseller Web quoted the division 's commercial director , Vladimir Brand+_o , as saying .",0,[-9.49790627e-02 1.75042719e-01 -4.31647301e-02 -4.51112948e-02\n 1.72519922e-01 -2.92447265e-02 -1.74319204e-02 -2.38732591e-01\n -4.68273982e-02 1.85996163e+00 -2.00454473e-01 -6.07615486e-02\n 8.27170834e-02 -1.10286549e-01 -1.05571873e-01 -1.35309473e-02\n -8.58865678e-04 1.12296283e+00 -5.33046685e-02 -5.39846495e-02\n 5.32859191e-02 5.21327853e-02 4.24367469e-03 -4.33036722e-02\n -6.59389868e-02 1.87109818e-03 -1.60044193e-01 -3.14699784e-02\n 8.60306025e-02 1.97323579e-02 3.05023193e-02 -3.53931487e-02\n 9.71114933e-02 1.66937694e-01 1.44137610e-02 -1.90161038e-02\n -6.34789988e-02 2.72928309e-02 3.54067907e-02 -1.58601403e-02\n 5.33533562e-03 9.40666571e-02 1.13477565e-01 -1.08105645e-01\n -7.01851994e-02 2.89511457e-02 -3.51462960e-02 2.04022620e-02\n 9....,[ 0.13613077 -1.0649096 -0.86048055 -0.45176408 1.6966292 0.11966357\n -0.30980867 -3.1393042 -1.263115 0.3247951 1.3345463 -1.1445675\n 0.43964183 -1.8394341 -0.66852117 0.4739907 0.23098873 0.24248143\n 1.0174885 -0.45629638 0.48830968 -0.06428263 -0.05299549 0.75225765\n -1.4291718 -0.9770338 -0.3725974 -1.0187958 1.1053182 -0.81847954\n 0.75855 -0.7511867 1.6052241 1.2751096 -0.63471293 -0.03014214\n -0.5125938 -0.32505655 0.788159 0.08609865 0.11216199 0.6915825\n 0.4582401 -1.3784537 -1.749909 0.5354891 0.44225088 0.70338005\n 0.9248192 0.05322725 -0.7257571 -0.4146794 0.07898946 1.125015\n -1.4763968 1.195394 -0.0841465 -1.0040774 1.2092102 1.1303582\n 2.0370452 0.5636679 -0.25583628 -1.6240209 1.8427348 ...,0,0,0,0
4,"On August 15, 2027, Dr. John Lee speculates that the average physical activity levels at U.S. high schools will likely increase.",1,[-1.65867433e-01 3.57482523e-01 7.28247538e-02 -3.14977057e-02\n -3.20128016e-02 2.44006794e-02 -3.71189825e-02 -6.11495972e-03\n 5.43026142e-02 1.84919739e+00 -2.53526360e-01 -3.40995230e-02\n 7.54815936e-02 3.72875221e-02 -7.54721314e-02 5.80832064e-02\n -1.76085960e-02 1.20470536e+00 -1.56163409e-01 -7.61056989e-02\n 1.80367492e-02 7.72641674e-02 7.94613808e-02 -1.32992119e-01\n -3.02432496e-02 1.56555027e-02 -1.88882783e-01 2.65764166e-02\n -7.17546232e-03 8.97909626e-02 -6.21524043e-02 6.56070113e-02\n -6.33275881e-03 5.11547066e-02 3.18643376e-02 -3.88401784e-02\n -8.96424800e-03 1.03460431e-01 -4.25934931e-03 3.29740532e-02\n -2.31975634e-02 -4.60627079e-02 3.91621627e-02 6.83518276e-02\n 7.78002962e-02 -5.75087452e-03 -1.13928251e-01 1.78448763e-02\n 6....,[-7.39654064e-01 1.42271864e+00 9.85548079e-01 -2.24841565e-01\n -1.01045871e+00 9.63916361e-01 -6.76760018e-01 -6.81178197e-02\n 4.61514480e-02 2.86788255e-01 5.97749472e-01 -7.97759831e-01\n 3.30839753e-01 6.88942492e-01 -3.50053191e-01 1.78464699e+00\n -7.30225444e-02 8.05466533e-01 -6.94084466e-01 -8.27121615e-01\n -2.49871537e-01 3.19144130e-01 1.08989644e+00 -5.19482493e-01\n -9.09225821e-01 -7.87507594e-01 -7.93389797e-01 -2.86460631e-02\n -4.76637661e-01 1.90310910e-01 -9.79849100e-01 8.80192399e-01\n -6.28532246e-02 -3.79845709e-01 -3.07556450e-01 -3.74551147e-01\n 2.27476686e-01 8.63529027e-01 1.14270397e-01 8.28951955e-01\n -3.73662561e-01 -1.54008961e+00 -5.44144869e-01 1.69529462e+00\n 9.92515028e-01 -5.13608567e-02 -6.58703268e-01 6.72793388e-01\n 3....,1,1,1,1
5,Analyst Emma Taylor noted that the home run count at the Chicago Cubs remained stable in 2024.,0,[-2.64777802e-02 1.62194327e-01 -6.83643995e-03 4.88223322e-02\n 7.62382820e-02 1.14460206e-02 -5.89646101e-02 -1.04382239e-01\n 1.83703244e-01 1.89310622e+00 -1.79091990e-01 -6.69373795e-02\n 5.82357794e-02 9.85373370e-03 -7.01477230e-02 2.99751665e-02\n 2.18661092e-02 8.88447881e-01 -1.64659292e-01 -5.07162213e-02\n 3.26223765e-03 -2.32030936e-02 9.22632739e-02 -9.55311134e-02\n -9.47977826e-02 7.93419480e-02 -1.47108674e-01 5.80902882e-02\n -2.33686138e-02 5.19458652e-02 -3.62452157e-02 -1.53706027e-02\n 8.48071575e-02 1.05495445e-01 -3.96783315e-02 9.21180006e-03\n 4.12575155e-02 -4.33583446e-02 -3.84721495e-02 -4.78743613e-02\n 5.88028366e-03 1.01354003e-01 -3.68710863e-03 2.61255540e-02\n 1.05573274e-01 -2.55232155e-02 -1.38972521e-01 -1.31551251e-01\n 5....,[ 0.9824245 -1.2401017 -0.28229827 1.1139989 0.42229584 0.7600405\n -1.0839454 -1.3655136 1.7214198 0.44182336 1.6311216 -1.2248999\n 0.07150966 0.21892124 -0.2937187 1.2702239 0.643452 -1.3726931\n -0.83545625 -0.40150625 -0.55927646 -1.213674 1.2844143 0.01169699\n -1.8495307 0.08813752 -0.18385144 0.50891453 -0.7514787 -0.35462964\n -0.49377528 -0.42777768 1.4068122 0.39687887 -1.64881 0.46026888\n 0.9092673 -1.4275589 -0.4669713 -0.4008935 0.12144072 0.8076393\n -1.1221067 0.9597466 1.5071958 -0.38573322 -1.0086895 -1.1140031\n 0.31486785 -0.6657662 -0.6382753 -0.18856792 -0.6044813 -1.4846716\n -0.5640565 0.22765079 -0.928093 -0.12893389 1.3433449 0.20920368\n -1.0305243 1.7044654 -0.05661023 -0.32602778 -1.4305439 ...,0,0,0,0
6,"According to Goldman Sachs, the research and development expenses at Facebook would fall in 2025.",1,[-1.21063471e-01 2.51935005e-01 -2.94214804e-02 -4.79285903e-02\n -2.75685582e-02 -1.82558745e-02 -2.92567778e-02 -6.38376474e-02\n 4.33391780e-02 1.99582398e+00 -4.16828781e-01 9.97788087e-03\n 1.58754244e-01 1.18489295e-01 -6.33422360e-02 3.04343104e-02\n -4.03524712e-02 1.07175887e+00 -1.86021462e-01 6.64371178e-02\n 1.12161785e-01 1.61022589e-01 -3.45068164e-02 -1.36160821e-01\n 5.12972996e-02 1.39190540e-01 -9.68207866e-02 1.42498359e-01\n 6.11764472e-03 4.63660844e-02 5.58225587e-02 -1.04840025e-02\n 5.23718782e-02 2.01317146e-01 4.69320081e-02 -1.29268587e-01\n 2.12263362e-03 1.14207558e-01 -9.96199772e-02 -4.00746167e-02\n -1.65041629e-02 9.89470035e-02 1.30141778e-02 -1.81327816e-02\n 3.88694070e-02 1.02692857e-01 -6.69377595e-02 -1.15642011e-01\n 1....,[-1.86127022e-01 -1.64573379e-02 -6.41750097e-01 -4.98725086e-01\n -9.51637030e-01 2.92602330e-01 -5.30214727e-01 -8.30214500e-01\n -9.57852453e-02 8.04503322e-01 -1.66937757e+00 -2.24420890e-01\n 1.58303380e+00 2.08016706e+00 -2.21713752e-01 1.27862692e+00\n -4.85828876e-01 -1.10175587e-01 -1.19092357e+00 1.56238830e+00\n 1.72127664e+00 1.59703732e+00 -6.41788065e-01 -5.64413190e-01\n 2.78498143e-01 9.11014855e-01 5.49914002e-01 1.94873917e+00\n -2.51018137e-01 -4.34974223e-01 1.23361146e+00 -3.48848134e-01\n 8.83781075e-01 1.76651561e+00 -2.50737220e-02 -1.94558835e+00\n 3.77987742e-01 1.03123653e+00 -1.50581253e+00 -2.82245785e-01\n -2.59695232e-01 7.69305944e-01 -8.96835506e-01 1.88801721e-01\n 2.71059096e-01 1.78254378e+00 -2.02530343e-03 -9.23726618e-01\n 1....,1,1,1,1
7,"According to Goldman Sachs, the research and development expenses at Alphabet would fall in 2029 Q2.",1,[-1.16282433e-01 2.23263055e-01 -4.77008447e-02 -2.47607771e-02\n -6.04338851e-03 4.04301733e-02 -3.23314071e-02 -9.68332111e-04\n 3.38746645e-02 1.72199607e+00 -3.18866283e-01 -2.87969969e-02\n 1.62754014e-01 9.87205058e-02 -2.19526701e-02 1.81081221e-02\n -4.70123328e-02 9.82071459e-01 -1.30064502e-01 3.20672477e-03\n 1.02472685e-01 1.69037238e-01 -1.51881082e-02 -7.41784945e-02\n 3.36250067e-02 1.51269838e-01 -8.02540258e-02 1.11201882e-01\n 3.38878855e-02 9.41396952e-02 -2.40037050e-02 -2.10562851e-02\n 7.50183314e-02 1.79007813e-01 1.05513550e-01 -6.57542273e-02\n 7.32603222e-02 9.95843560e-02 -1.04272313e-01 -4.63360501e-03\n 7.77145103e-02 1.30581558e-01 -3.02988812e-02 -4.71362621e-02\n 3.43839973e-02 6.63174689e-02 -1.30910538e-02 -8.30507874e-02\n 7....,[-1.2706007e-01 -4.0740904e-01 -9.3267500e-01 -1.1254490e-01\n -6.6674119e-01 1.2161831e+00 -5.8752340e-01 -1.6831385e-04\n -2.1831633e-01 -1.6233927e-01 -3.0936447e-01 -7.2878700e-01\n 1.6431794e+00 1.7414697e+00 2.1620537e-01 1.0530375e+00\n -6.0670686e-01 -7.2787952e-01 -2.5979781e-01 5.0242859e-01\n 1.5183694e+00 1.7193159e+00 -3.4825081e-01 3.1446710e-01\n 2.1082578e-02 1.0770969e+00 7.9164451e-01 1.4148868e+00\n 2.2031702e-01 2.5292936e-01 -2.6409858e-01 -5.1961416e-01\n 1.2489637e+00 1.4476351e+00 1.0731899e+00 -8.4213614e-01\n 1.3437246e+00 8.0304337e-01 -1.5848511e+00 2.5687385e-01\n 1.3445492e+00 1.2731113e+00 -1.4810530e+00 -3.1641573e-01\n 1.8793680e-01 1.1673954e+00 7.5046623e-01 -5.3393143e-01\n 5.4321671e-01 -6.9263977e-01 -7.2093889e-02 -1.725...,1,1,1,1
8,"According to the company 's updated strategy for the years 2009-2012 , Basware targets a long-term net sales growth in the range of 20 % -40 % with an operating profit margin of 10 % -20 % of net sales .",1,[-2.63320029e-01 3.28540474e-01 4.48558182e-02 -1.09422831e-02\n 3.39385755e-02 -1.25387803e-01 2.09340192e-02 8.32975954e-02\n 1.10356383e-01 1.74510765e+00 -3.98153275e-01 1.41264707e-01\n -8.98099765e-02 -1.47727029e-02 5.77163734e-02 -5.09223454e-02\n -4.27257568e-02 1.41277170e+00 -2.09747717e-01 1.56779997e-02\n -3.64826852e-03 5.97854443e-02 -7.89400712e-02 -1.39893517e-02\n 1.13595366e-01 9.83026922e-02 -5.14178127e-02 7.06899390e-02\n 2.47105230e-02 1.06609218e-01 -4.19648038e-03 -1.85196903e-02\n -1.48957130e-02 9.90139246e-02 1.10973224e-01 -2.59710420e-02\n -6.08825274e-02 4.48305123e-02 4.36691009e-02 -9.27021950e-02\n 6.36729002e-02 1.98753644e-02 2.10891932e-01 -8.54000822e-02\n 4.67465706e-02 -2.04764325e-02 -3.69948000e-02 1.16207667e-01\n 1....,[-1.9436246 1.028084 0.54040897 0.11779313 -0.1375609 -1.3934029\n 0.4053023 1.1123714 0.7718442 -0.08073597 -1.4101056 1.4832989\n -2.1546957 -0.20300184 1.0591376 -0.21033312 -0.5289045 2.2384815\n -1.5857292 0.7114902 -0.703995 0.05247315 -1.316927 1.167921\n 1.1859351 0.34883478 1.2124021 0.7238381 0.06455266 0.43248096\n 0.10752695 -0.47864252 -0.20093407 0.30423462 1.1755456 -0.15097216\n -0.47734514 -0.05138328 0.9285275 -1.0828031 1.1054652 -0.48997307\n 1.7721897 -0.982941 0.4170364 -0.3003865 0.41641855 1.8492247\n 1.2027824 1.2562562 0.23552838 0.1251814 -0.7560077 -0.32334074\n 0.27053404 -0.18198027 -0.9100257 -1.1144073 -0.7384371 -0.37289226\n 1.8860408 -0.8132241 -0.8562722 1.3129568 0.47118616 -...,1,1,1,1
9,"The Lithuanian beer market made up 14.41 million liters in January , a rise of 0.8 percent from the year-earlier figure , the Lithuanian Brewers ' Association reporting citing the results from its members .",0,[-6.91080689e-02 2.09023327e-01 7.64123956e-03 1.30530549e-02\n 1.09097742e-01 -2.65002474e-02 -6.33090129e-03 6.94910735e-02\n 4.81527932e-02 1.83084953e+00 -2.99082041e-01 4.21392843e-02\n 1.01302572e-01 -1.01742432e-01 4.97623309e-02 -1.04363754e-01\n 1.66779663e-02 9.43629920e-01 -8.64167511e-02 -1.25849351e-01\n 6.18927404e-02 -2.40510739e-02 1.94686186e-02 -2.09785700e-02\n 7.02814385e-02 3.05853784e-02 -1.16309822e-01 3.40735577e-02\n 7.60149164e-03 -2.91519240e-03 4.94787134e-02 5.18007539e-02\n -1.03770290e-02 3.81884836e-02 8.20324868e-02 1.25274405e-01\n -1.00585103e-01 -1.71076860e-02 9.11845416e-02 3.09042092e-02\n -1.15173072e-01 8.83789510e-02 1.18721716e-01 -2.96848472e-02\n -1.36763854e-02 2.32736655e-02 -1.64267607e-02 -4.31141667e-02\n -3....,[ 0.4557519 -0.6015726 -0.05187901 0.51776713 0.8572064 0.16285524\n -0.10289441 0.9300877 -0.03346641 0.2220049 -0.03469991 0.19392012\n 0.71911705 -1.6930484 0.9749804 -1.1883984 0.5492861 -0.9926378\n 0.46650228 -1.660997 0.66855204 -1.2266115 0.17833921 1.0688171\n 0.5550221 -0.582232 0.26554373 0.09923944 -0.22583328 -1.144587\n 1.1145874 0.6571904 -0.1280686 -0.56517965 0.6329762 2.476656\n -1.0163314 -1.0179209 1.735768 0.79746604 -1.939713 0.60100126\n 0.52897453 -0.01242603 -0.7027042 0.43947652 0.70385075 -0.05628444\n -1.1568725 -0.41462633 -1.6672571 0.2371596 -0.4023661 0.5949108\n 0.37038162 -1.0722686 0.39552927 -0.39975938 -0.18745846 -0.05191825\n 0.33842385 -1.30149 -0.94034696 1.1220416 -1.0827272 -0...,0,0,0,0


## Save Output

In [21]:
# save_path = os.path.join(combine_data_path )
DataProcessing.save_to_file(test_and_models_df, combine_data_path, 'llm_classifiers', '.csv')

Saved to: 
	/Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/combined_generated_fin_phrase_bank/llm_classifiers-v1.csv


## Evaluation

In [22]:
print("======= EVALUATION/RESULTS =======")



In [23]:
get_metrics = EvaluationMetric()
get_metrics

<classification_models.EvaluationMetric at 0x32cf82510>

In [24]:
actual_label = test_and_models_df['Actual Label'].values
for ml_model in models:
    ml_model_name = ml_model.__name__()
    print(f"Actual Label:\t\t{actual_label}")
    ml_model_predictions = test_and_models_df[ml_model_name].values
    print(f"{ml_model_name}:\t\t{ml_model_predictions}")
    get_metrics.eval_classification_report(y_test_df, ml_model_predictions)

Actual Label:		[1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1]
llama-3.1-8b-instant:		[1 1 1 0 1 0 1 1 1 0 1 0 1 1 0 1 0 1 0 1 1 1 1 0]
              precision    recall  f1-score   support

           0       0.62      1.00      0.77         5
           1       1.00      0.84      0.91        19

    accuracy                           0.88        24
   macro avg       0.81      0.92      0.84        24
weighted avg       0.92      0.88      0.88        24

Actual Label:		[1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1]
llama-3.3-70b-versatile:		[1 1 1 0 1 0 1 1 1 0 1 0 1 1 0 1 0 1 0 1 1 1 1 1]
              precision    recall  f1-score   support

           0       0.71      1.00      0.83         5
           1       1.00      0.89      0.94        19

    accuracy                           0.92        24
   macro avg       0.86      0.95      0.89        24
weighted avg       0.94      0.92      0.92        24

Actual Label:		[1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1]
ll