# LLM Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import sys

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from data_processing import DataProcessing
from classification_models import EvaluationMetric
from text_generation_models import TextGenerationModelFactory

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [4]:
base_data_path = os.path.join(notebook_dir, '../data/')
combine_data_path = os.path.join(base_data_path, 'combined_generated_fin_phrase_bank')
X_test_set_path = os.path.join(combine_data_path, 'x_test_set-v1.csv')
y_test_set_path = os.path.join(combine_data_path, 'y_test_set-v1.csv')

In [5]:
X_test_df = DataProcessing.load_from_file(X_test_set_path, 'csv')
X_test_df.drop(columns=['Unnamed: 0'], inplace=True)
X_test_df.head(7)

Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings
0,"On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.",1,[-2.67128736e-01 3.99917126e-01 1.51789542e-02 -8.48717690e-02\n -9.08976942e-02 -2.59267748e-03 5.57870697e-03 -1.11984558e-01\n 5.50340861e-02 1.75547993e+00 -2.54974365e-01 -1.80750433e-02\n 2.53512263e-02 -5.75582273e-02 -1.17895454e-01 -3.12662870e-02\n -7.41217807e-02 1.10751092e+00 -1.34572908e-01 -2.01131892e-03\n 5.15997782e-02 1.16336204e-01 -8.87980685e-03 -1.33108839e-01\n 4.78714556e-02 5.70764579e-02 -1.72738910e-01 4.16884683e-02\n 2.07518153e-02 4.11881283e-02 7.36815408e-02 -1.50440494e-02\n 3.74726136e-03 1.82219267e-01 -6.20155334e-02 -4.09787893e-03\n 5.32624172e-03 8.10612142e-02 -4.86277193e-02 -1.43699899e-01\n -7.53155202e-02 -2.91622225e-02 8.86417702e-02 -5.73523762e-03\n 5.72941788e-02 5.58102839e-02 -1.44644111e-01 -6.10220470e-02\n 6....,[-1.92454374e+00 1.91188228e+00 6.52823076e-02 -1.09791911e+00\n -1.71294856e+00 5.92892647e-01 1.92068860e-01 -1.46676123e+00\n 1.23968937e-01 -3.15836072e-02 6.55651987e-01 -5.98727763e-01\n -3.60853940e-01 -9.42990124e-01 -7.83263743e-01 9.00564417e-02\n -1.02269506e+00 3.85037921e-02 -3.46616179e-01 3.87996107e-01\n 4.86750692e-01 8.39973629e-01 -1.23200588e-01 -6.20292664e-01\n 1.05327025e-01 -1.63882062e-01 -7.20112205e-01 2.79392928e-01\n -3.39587964e-02 -5.77544093e-01 1.50556660e+00 -4.31914985e-01\n 1.02688305e-01 1.54282737e+00 -2.37315607e+00 2.74423778e-01\n 4.23447758e-01 4.65204388e-01 -6.98811769e-01 -1.86601353e+00\n -1.40664899e+00 -1.21409535e+00 4.51989844e-02 3.71334434e-01\n 7.22978592e-01 9.42269504e-01 -1.18480968e+00 -2.19736397e-01\n 5....
1,"Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .",0,[-8.39660242e-02 1.80937588e-01 -4.57216762e-02 1.31758088e-02\n -1.15853874e-02 -5.86525165e-02 -8.70665710e-04 3.26534510e-02\n 1.16215296e-01 1.84254622e+00 -2.33168826e-01 1.67165652e-01\n -6.99797019e-05 -2.04802342e-02 4.41387780e-02 -8.78530294e-02\n 3.87666002e-02 1.38155854e+00 -1.93677381e-01 2.67565437e-02\n 8.30405392e-03 2.85588354e-02 -9.71042439e-02 -2.32131258e-02\n 4.73141745e-02 -5.10274470e-02 -9.36960801e-02 4.17356864e-02\n -1.44721614e-02 1.21533826e-01 -6.50100559e-02 -2.13543437e-02\n -8.56587365e-02 2.31346134e-02 4.07231599e-02 -7.77563825e-02\n -1.08038373e-01 -2.93932352e-02 2.08473653e-02 7.65744643e-03\n -1.03914235e-02 4.45170105e-02 7.58761391e-02 -4.45002131e-03\n -9.02475566e-02 1.67677191e-03 -5.21407127e-02 7.16749728e-02\n -2....,[ 0.32650337 -1.0380536 -0.91864634 0.51113987 -0.65212625 -0.29796603\n 0.07093044 0.41100782 0.9220639 0.2662444 0.95830584 1.7495745\n -0.7324779 -0.3097109 0.88611746 -0.9876781 1.0547875 1.9621172\n -1.3324827 0.875457 -0.40856755 -0.4626243 -1.5807649 0.94958717\n 0.0967818 -1.6057067 0.50081563 0.2801942 -0.6300226 0.58435524\n -1.0352651 -0.5324218 -1.302615 -0.7046011 -0.29519314 -0.9691\n -1.0828011 -1.2725768 0.4982816 0.4064646 -0.23578587 -0.05474434\n -0.12900919 0.39317027 -2.0725603 0.0272575 0.11121429 1.404259\n -0.9829128 0.3590206 0.32491672 -1.1381576 -0.5637724 0.83665544\n 0.79479074 -0.45303088 -0.28690377 -1.7320254 -1.0912753 1.3369948\n 0.07836758 -0.26163587 -0.870195 0.77505666 -1.885778 -0.8...
2,"Cybersecurity threats should stay same in 2026-06-01, according to the National Security Agency director, Dr. Sophia Rodriguez.",1,[-9.97022167e-02 2.27282479e-01 8.88563171e-02 1.93849038e-02\n 8.27627033e-02 -6.11012615e-03 6.32572249e-02 -1.25566736e-01\n -9.99972895e-02 1.72420835e+00 -3.31175238e-01 -4.78917100e-02\n 4.07287441e-02 -8.05337653e-02 -1.63093254e-01 -3.94880436e-02\n -1.27118714e-02 8.93040419e-01 -5.53227402e-02 7.60172233e-02\n 1.45164803e-01 1.45717368e-01 6.82817474e-02 -7.75945783e-02\n 5.19648008e-02 2.36863736e-02 -1.45081267e-01 -3.98935899e-02\n 4.41102982e-02 8.85714293e-02 8.26337561e-03 5.78368790e-02\n 1.08856373e-02 4.49474491e-02 7.63862580e-02 -9.51796025e-03\n 6.30498528e-02 4.74843718e-02 -4.52780761e-02 -3.98732908e-02\n 5.54662012e-02 -1.95597503e-02 1.21327959e-01 -5.83715141e-02\n -6.98307455e-02 2.14324836e-02 -4.13710661e-02 1.91193689e-02\n 7....,[ 1.33107498e-01 -4.13728386e-01 1.25563562e+00 6.13037348e-01\n 6.09803617e-01 5.36996126e-01 1.27544320e+00 -1.64309239e+00\n -1.89838004e+00 -1.38554424e-01 -4.01991487e-01 -9.76714611e-01\n -1.36055201e-01 -1.33540440e+00 -1.24892056e+00 -6.65327311e-02\n 1.07430309e-01 -1.46692193e+00 9.75282013e-01 1.71016121e+00\n 2.42159462e+00 1.27598393e+00 1.15159273e+00 1.72738448e-01\n 1.68093503e-01 -6.09218836e-01 -2.92900920e-01 -1.10500216e+00\n 3.61315876e-01 1.07677765e-01 3.07105035e-01 7.28891611e-01\n 2.14890882e-01 -3.96445721e-01 4.26118314e-01 1.82920426e-01\n 1.19040775e+00 -6.30604923e-02 -6.41095579e-01 -3.07161570e-01\n 9.51912880e-01 -1.06299937e+00 4.91256148e-01 -5.22954881e-01\n -1.68571389e+00 3.61185819e-01 2.62103021e-01 7.61063993e-01\n 6....
3,"According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .",1,[ 2.75615864e-02 1.79783881e-01 -1.21603109e-01 -1.75067633e-01\n 9.83876437e-02 -2.84420680e-02 7.04421336e-03 -1.00048631e-01\n -7.77818933e-02 2.37184715e+00 -2.96325952e-01 4.51707616e-02\n 7.32708871e-02 3.28064598e-02 -9.75645557e-02 -1.34704620e-01\n -1.46236047e-02 1.19266689e+00 -1.20833769e-01 -2.11537201e-02\n 4.45679054e-02 2.58689113e-02 -2.11139992e-02 -4.58938554e-02\n -1.82265490e-02 6.69947565e-02 -9.96832177e-02 1.51548507e-02\n 1.27175581e-02 -8.47315788e-02 -2.66740546e-02 2.18095612e-02\n 6.70658797e-02 1.45969734e-01 1.41264692e-01 6.51224284e-04\n 4.73446399e-02 -1.93405268e-03 3.11274361e-02 -1.58938453e-01\n 2.45406702e-02 6.88584819e-02 1.01522245e-01 -1.50205001e-01\n -1.53623046e-02 4.58216853e-03 -6.17812127e-02 -9.76008475e-02\n 3....,[ 1.697164 -1.0535955 -2.1446092 -2.5781236 0.81879115 0.18211456\n 0.21959545 -1.3118026 -1.6085852 2.0768263 0.08170533 0.2030413\n 0.33966735 0.60040736 -0.57380164 -1.8799998 0.07224872 0.63623625\n -0.1174464 0.06363511 0.34133762 -0.50254226 -0.32532284 0.6255889\n -0.9082055 -0.03159778 0.4083359 -0.1708655 -0.16991533 -2.3985007\n -0.3329491 0.15506867 1.0979444 1.0307212 1.7383305 0.35459948\n 0.9817363 -0.8405612 0.6754128 -2.0948052 0.39419135 0.32827145\n 0.22097439 -2.0832043 -0.6536761 0.07636706 -0.02385446 -0.6674014\n 0.10471608 1.1564453 -0.03240437 -0.64257437 -0.21797852 1.4896353\n 0.58393174 -0.34075728 0.2807242 -0.95936525 0.8074142 -0.94765747\n 1.8481007 1.4760867 0.9476612 0.77331966 0.91316414 ...
4,"Policy analyst David Lee predicts on 2025-02-10, the renewable energy investments at Tesla may rise.",1,[-2.79465199e-01 4.11210299e-01 1.06271811e-01 5.34210540e-02\n -1.00248531e-02 -3.97931300e-02 -1.52785862e-02 -4.05352302e-02\n -1.21523902e-01 1.57009149e+00 -2.84787118e-01 5.81696490e-03\n 1.12172319e-02 -1.10521093e-02 -1.35719270e-01 7.48538272e-03\n -3.06803323e-02 1.14132047e+00 -1.23263232e-01 3.10381427e-02\n 1.34334370e-01 1.28715515e-01 4.87737637e-03 -1.80470183e-01\n 4.74754125e-02 1.61268070e-01 -9.78351012e-02 1.12551756e-01\n 3.64820994e-02 1.41820818e-01 -3.54899913e-02 5.98004870e-02\n -3.36620472e-02 1.26319811e-01 1.17480211e-01 5.64799979e-02\n 2.10556556e-02 1.38339773e-01 -2.56157555e-02 -8.52385089e-02\n -6.11055233e-02 6.10947609e-02 1.52429134e-01 7.80272558e-02\n -6.91597238e-02 4.46621925e-02 -6.68734610e-02 -1.33346260e-01\n 5....,[-2.07615733e+00 2.06401587e+00 1.53700554e+00 1.17160475e+00\n -6.31253719e-01 1.73244963e-03 -1.99693218e-01 -5.39167345e-01\n -2.17918968e+00 -6.65742636e-01 2.41860628e-01 -2.95848042e-01\n -5.67474008e-01 -1.48681775e-01 -9.66896176e-01 8.28109324e-01\n -2.23242909e-01 2.75821984e-01 -1.57969996e-01 9.48007107e-01\n 2.19763064e+00 1.02367997e+00 1.04083136e-01 -1.29685795e+00\n 9.92541835e-02 1.22576272e+00 4.36882645e-01 1.48189735e+00\n 2.32230693e-01 8.77730548e-01 -4.94457155e-01 7.60166824e-01\n -4.85319495e-01 7.53121912e-01 1.25727260e+00 1.29711664e+00\n 6.32440746e-01 1.36636877e+00 -3.02303135e-01 -9.88274634e-01\n -1.15038109e+00 2.06108406e-01 9.15683210e-01 1.79445755e+00\n -1.67299974e+00 7.53834724e-01 -9.51996893e-02 -1.10486698e+00\n 3....
5,"Dr. Smith predicts on November 20, 2029, the nutritional intake at schools may rise.",1,[-1.23342037e-01 4.48737800e-01 7.77667686e-02 4.12153527e-02\n -9.24057066e-02 5.28984889e-02 -6.34194389e-02 1.32564818e-02\n 8.23703483e-02 1.64521945e+00 -3.48226160e-01 1.44820707e-02\n 1.01476766e-01 5.43523207e-02 -8.24817196e-02 1.30305788e-03\n -1.05997045e-02 1.17163539e+00 -9.93407145e-02 -5.30648306e-02\n -1.68292969e-02 1.81135200e-02 2.31271181e-02 -1.52221113e-01\n -1.81868635e-02 3.84828746e-02 -1.88596129e-01 4.00289409e-02\n 1.39729038e-01 7.81415477e-02 -1.83683988e-02 4.06915322e-02\n 2.74938680e-02 8.73017088e-02 8.22432935e-02 1.05383517e-02\n -3.04893889e-02 1.69457823e-01 -3.25031132e-02 9.37876850e-02\n -8.89982954e-02 -1.00480475e-01 -3.74401733e-02 2.11318731e-02\n 5.85598685e-02 6.55854791e-02 -1.82851776e-01 -1.90664139e-02\n 6....,[-1.57422990e-01 2.56955957e+00 1.07646954e+00 9.71296906e-01\n -1.73311865e+00 1.47471440e+00 -1.10392177e+00 1.59185871e-01\n 4.80564237e-01 -4.08752114e-01 -6.38652802e-01 -1.86000213e-01\n 7.51999557e-01 9.68403220e-01 -4.18408573e-01 7.10362613e-01\n 1.46300480e-01 4.88610536e-01 2.41059229e-01 -4.77086991e-01\n -9.28303540e-01 -6.17630601e-01 4.05588835e-01 -8.93314898e-01\n -9.07597005e-01 -4.11872029e-01 -9.65049267e-01 2.51231819e-01\n 1.97938609e+00 -4.31513749e-02 -1.80789396e-01 4.55810100e-01\n 4.75942761e-01 2.01903462e-01 5.44580996e-01 5.21516740e-01\n -5.24260439e-02 1.85594940e+00 -4.20976043e-01 1.69962347e+00\n -1.65340889e+00 -2.33629584e+00 -1.67539787e+00 8.27806175e-01\n 7.46960223e-01 1.10749841e+00 -1.72012031e+00 2.93732226e-01\n 5....
6,"Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said .",1,[ 2.36152317e-02 1.75637215e-01 -4.75921258e-02 -1.00200571e-01\n 8.89781415e-02 -6.43718988e-02 -2.69504562e-02 -4.83372882e-02\n 8.36279336e-03 2.23455858e+00 -2.08360046e-01 -7.23112794e-03\n 8.49666148e-02 1.64331440e-02 -6.58765361e-02 -4.59754653e-03\n 4.57342863e-02 1.37366998e+00 -2.09634945e-01 -1.05597727e-01\n 5.03051467e-03 6.17915466e-02 -7.04549477e-02 -7.91235566e-02\n 3.26408111e-02 -1.15197964e-01 -5.03080897e-02 5.12997098e-02\n 2.39542034e-02 -1.31541165e-02 5.15889144e-03 -1.93684064e-02\n 2.15651724e-03 3.74105498e-02 1.53509766e-01 -1.50099948e-01\n -2.85472199e-02 7.28019401e-02 1.21796988e-02 -5.34533598e-02\n 3.01733967e-02 1.60360977e-01 9.51667279e-02 -1.36446133e-01\n -4.18400392e-02 -9.26042721e-03 -9.38620940e-02 -4.67620976e-02\n 6....,[ 1.6486638e+00 -1.1094565e+00 -9.4886589e-01 -1.3494802e+00\n 6.9293666e-01 -3.8885394e-01 -4.1892567e-01 -6.4045787e-01\n -4.8484746e-01 1.6072028e+00 1.3026437e+00 -4.6125913e-01\n 5.1064324e-01 3.2075682e-01 -2.4733125e-01 5.9798145e-01\n 1.1830138e+00 1.9067453e+00 -1.5986559e+00 -1.3672378e+00\n -4.7626153e-01 3.0542215e-02 -1.1404893e+00 1.5089671e-01\n -1.2821639e-01 -2.4615743e+00 1.1710043e+00 4.4248950e-01\n 2.0232348e-02 -1.3634009e+00 2.5023085e-01 -5.0079095e-01\n 7.7684633e-02 -5.0292134e-01 1.9859959e+00 -2.1904244e+00\n -2.6620910e-02 3.3526114e-01 3.4893301e-01 -5.1105261e-01\n 4.9577385e-01 1.7680734e+00 1.3424283e-01 -1.8494413e+00\n -1.1553615e+00 -1.5761261e-01 -4.7332543e-01 -4.5217842e-02\n -4.4650114e-01 -7.2861034e-01 -2.2781046e-01 -8.796...


In [6]:
y_test_df = DataProcessing.load_from_file(y_test_set_path, 'csv')
y_test_df.drop(columns=['Unnamed: 0'], inplace=True)
# print(f"\t{y_test_df.head(7)}")

In [7]:
print(f"\tShape: {X_test_df.shape}, \nSubset of Data:{X_test_df.head(3)}")
# df.shape, df.head(3)

print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(3)}")
# df.shape, df.head(3)

	Shape: (21, 4), 
Subset of Data:                                                                                                                                                        Base Sentence  \
0                                    On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.   
1  Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .   
2                                     Cybersecurity threats should stay same in 2026-06-01, according to the National Security Agency director, Dr. Sophia Rodriguez.   

   Sentence Label  \
0               1   
1               0   
2               1   

                                                                                                                                                                                                             

In [8]:
# for idx, row in X_test_df.iterrows():
#     text = row['Base Sentence']
#     label = row['Sentence Label']
#     embedding = row['Embedding']
#     print(type(embedding))
#     norm_embedding = row['Normalized Embeddings']
#     if idx < 7:
#         print(f"{idx}\n Sentence: {text}\n Label: {label}\n Embeddings Shape: {embedding}\n\t Embeddings Subset [:6]: {embedding[:6]} \n Norm Embeddings: {norm_embedding.shape}, \n\tNorm Embeddings Subset [:6]: {norm_embedding[:6]}")
# print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(7)}")

## Load Prompt

In [9]:
# prediction_properties = PredictionProperties.get_prediction_properties()
# prediction_requirements = PredictionProperties.get_requirements()
# system_identity_prompt = "You are an expert at identifying specific types of sentences by knowing the sentence format."
# prediction_examples_prompt = """Some examples of predictions in the PhraseBank dataset are
#     1. According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .
#     2. According to the company 's updated strategy for the years 2009-2012 , Basware targets a long-term net sales growth in the range of 20 % -40 % with an operating profit margin of 10 % -20 % of net sales .
#     3. Its board of directors will propose a dividend of EUR0 .12 per share for 2010 , up from the EUR0 .08 per share paid in 2009 .
# """
# non_prediction_examples_prompt = """Some examples of non-predictions in the PPhraseBank dataset are
#     1. Net sales increased to EUR193 .3 m from EUR179 .9 m and pretax profit rose by 34.2 % to EUR43 .1 m. ( EUR1 = USD1 .4 )
#     2. Net sales surged by 18.5 % to EUR167 .8 m. Teleste said that EUR20 .4 m , or 12.2 % , of the sales came from the acquisitions made in 2009 .
#     3. STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .
# """
# # goal_prompt = "Given the above, identify the prediction."

# base_prompt = f"""{system_identity_prompt} The sentence format is based on: 
    
#     {prediction_properties}
#     Enforce: {prediction_requirements}
#     Know: {prediction_examples_prompt}
#     Know: {non_prediction_examples_prompt}

# """
# base_prompt

In [10]:
prompt_1 = """ 

Role: 
You are a linguist expert. You are acting as a prediction detector. Your task is to identify if a given sentence is a prediction about the future.

Background:
A prediction is a statement about what someone thinks will happen in the future.
Examples of predictions:
- "It will rain tomorrow." (Yes)
- "The stock market is expected to rise next quarter." (Yes)
- "I am going to the store." (No)
- “Lakers will win the championship. ”(Yes)

A prediction may contain: source, target, date, outcome.
"""

## Models

In [11]:
tgmf = TextGenerationModelFactory()

# Option 1: Specific models
models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'llama-3.3-70b-instruct'])

# Option 2: All Groq models
# models = tgmf.create_instances(tgmf.get_groq_model_names())

# Option 3: All NaviGator models
# models = tgmf.create_instances(tgmf.get_navigator_model_names())

# Option 4: All available models
# models = tgmf.create_instances()

# Option 5: Mix and match
# custom_models = ['llama-3.1-70b-instruct', 'mistral-small-3.1', 'llama-3.1-8b-instant']
# models = tgmf.create_instances(custom_models)
models

[<text_generation_models.LlamaInstantTextGenerationModel at 0x17ddaa1d0>,
 <text_generation_models.LlamaVersatileTextGenerationModel at 0x17e751190>,
 <text_generation_models.Llama3370BInstructTextGenerationModel at 0x17e7516d0>]

In [12]:
import json
import re

def parse_json_response(response):
    """Parse JSON response from LLM to extract label and reasoning"""
    try:
        # Extract JSON if there's extra text
        json_match = re.search(r'\{.*\}', response, re.DOTALL)
        if json_match:
            data = json.loads(json_match.group())
            return data.get('label'), data.get('reasoning')
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        return None, None

In [13]:
def llm_certifier(data: str, base_prompt: str, model):
    
        prompt = f""" Given this: {base_prompt}. Also given the sentence '{data}', your task is to analyze the sentence and determine if it is a prediction. If prediction, generate label as 1 and if non-prediction generate label as 0.
        Respond ONLY with valid JSON in this exact format:
        {{"label": 0, "reasoning": "your explanation here"}}
        Examples:
        - "It will rain tomorrow." → {{"label": 1, "reasoning": "Contains the future tense words 'will' and 'tomorrow'"}}
        - "The stock market is expected to rise next quarter." → {{"label": 1, "reasoning": "Contains future tense words 'is expected'"}}
        - "I am going to the store." → {{"label": 0, "reasoning": "Does not contain a future tense word"}}
        - "Lakers will win the championship." → {{"label": 1, "reasoning": "Contains the future tense word 'will'"}}
        """
        idx = 1
        if idx == 1:
            #   print(f"\tPrompt: {prompt}")
              idx = idx + 1
        input_prompt = model.user(prompt)
        raw_text_llm_generation = model.chat_completion([input_prompt])
        
        # Parse the JSON response
        label, reasoning = parse_json_response(raw_text_llm_generation)
        
        return raw_text_llm_generation, label, reasoning

In [14]:
print("======= PROMPT + MODEL -> LABEL and REASONING =======")



In [15]:
    # content : meta :: text : meta_data
results = []
for idx, row in X_test_df.iterrows():
    text = row['Base Sentence']
    print(f"{idx} --- Sentence: {text}")
    for model in models:
        # print(model.__name__())
        raw_response, llm_label, llm_reasoning = llm_certifier(text, prompt_1, model)
        print(f"\tModel: {model.__name__()}\n\t\tLabel:', {llm_label}\n\t\tReasoning:', {llm_reasoning}")
        result = (text, raw_response, llm_label, llm_reasoning, model.__name__())
        results.append(result)


0 --- Sentence: On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.
	Model: llama-3.1-8b-instant
		Label:', 1
		Reasoning:', Contains future tense words 'will likely increase' and specifies a date 'August 15, 2024', a target 'customer satisfaction ratings at Amazon', and a source 'marketing expert David Lee'
	Model: llama-3.3-70b-versatile
		Label:', 1
		Reasoning:', Contains the future tense words 'will likely increase' and a specific date 'August 15, 2024', indicating a prediction about the future
	Model: llama-3.3-70b-instruct
		Label:', 1
		Reasoning:', Contains the future tense words 'will likely increase' and a specific future date 'August 15, 2024', indicating a prediction about the future.
1 --- Sentence: Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .
	Model: llama-3.1-8b-instant
		Label:

In [16]:
results

[('On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.',
  '{"label": 1, "reasoning": "Contains future tense words \'will likely increase\' and specifies a date \'August 15, 2024\', a target \'customer satisfaction ratings at Amazon\', and a source \'marketing expert David Lee\'"}',
  1,
  "Contains future tense words 'will likely increase' and specifies a date 'August 15, 2024', a target 'customer satisfaction ratings at Amazon', and a source 'marketing expert David Lee'",
  'llama-3.1-8b-instant'),
 ('On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.',
  '{"label": 1, "reasoning": "Contains the future tense words \'will likely increase\' and a specific date \'August 15, 2024\', indicating a prediction about the future"}',
  1,
  "Contains the future tense words 'will likely increase' and a specific date 'August 15, 2024', indicati

In [17]:
# groupby text

In [18]:
results_with_llm_label_df = pd.DataFrame(results, columns=['text', 'raw_response', 'llm_label', 'llm_reasoning', 'llm_name'])
results_with_llm_label_df

Unnamed: 0,text,raw_response,llm_label,llm_reasoning,llm_name
0,"On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.","{""label"": 1, ""reasoning"": ""Contains future tense words 'will likely increase' and specifies a date 'August 15, 2024', a target 'customer satisfaction ratings at Amazon', and a source 'marketing expert David Lee'""}",1,"Contains future tense words 'will likely increase' and specifies a date 'August 15, 2024', a target 'customer satisfaction ratings at Amazon', and a source 'marketing expert David Lee'",llama-3.1-8b-instant
1,"On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.","{""label"": 1, ""reasoning"": ""Contains the future tense words 'will likely increase' and a specific date 'August 15, 2024', indicating a prediction about the future""}",1,"Contains the future tense words 'will likely increase' and a specific date 'August 15, 2024', indicating a prediction about the future",llama-3.3-70b-versatile
2,"On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.","{""label"": 1, ""reasoning"": ""Contains the future tense words 'will likely increase' and a specific future date 'August 15, 2024', indicating a prediction about the future.""}",1,"Contains the future tense words 'will likely increase' and a specific future date 'August 15, 2024', indicating a prediction about the future.",llama-3.3-70b-instruct
3,"Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .","{""label"": 0, ""reasoning"": ""Does not contain a future tense word, it is a statement about a past comparison""}",0,"Does not contain a future tense word, it is a statement about a past comparison",llama-3.1-8b-instant
4,"Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .","{""label"": 0, ""reasoning"": ""The sentence is in the past tense, discussing increases in operating profit and revenue compared to a period in 2005, and does not contain any future tense words or phrases indicating a prediction about the future.""}",0,"The sentence is in the past tense, discussing increases in operating profit and revenue compared to a period in 2005, and does not contain any future tense words or phrases indicating a prediction about the future.",llama-3.3-70b-versatile
5,"Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .","{""label"": 0, ""reasoning"": ""The sentence is in the past tense, discussing increases in operating profit and revenue compared to a period in 2005, and does not contain any future tense words or phrases indicating a prediction about what will happen.""}",0,"The sentence is in the past tense, discussing increases in operating profit and revenue compared to a period in 2005, and does not contain any future tense words or phrases indicating a prediction about what will happen.",llama-3.3-70b-instruct
6,"Cybersecurity threats should stay same in 2026-06-01, according to the National Security Agency director, Dr. Sophia Rodriguez.","{""label"": 1, ""reasoning"": ""Contains the future tense phrase 'should stay same in 2026-06-01' indicating an expectation for the future, and a named source 'the National Security Agency director, Dr. Sophia Rodriguez'.""}",1,"Contains the future tense phrase 'should stay same in 2026-06-01' indicating an expectation for the future, and a named source 'the National Security Agency director, Dr. Sophia Rodriguez'.",llama-3.1-8b-instant
7,"Cybersecurity threats should stay same in 2026-06-01, according to the National Security Agency director, Dr. Sophia Rodriguez.","{""label"": 1, ""reasoning"": ""Contains the future tense phrase 'should stay' and a specific future date '2026-06-01', indicating a statement about what is expected to happen in the future according to the National Security Agency director, Dr. Sophia Rodriguez.""}",1,"Contains the future tense phrase 'should stay' and a specific future date '2026-06-01', indicating a statement about what is expected to happen in the future according to the National Security Agency director, Dr. Sophia Rodriguez.",llama-3.3-70b-versatile
8,"Cybersecurity threats should stay same in 2026-06-01, according to the National Security Agency director, Dr. Sophia Rodriguez.","{""label"": 1, ""reasoning"": ""Contains a future date '2026-06-01' and expresses an expectation about the state of cybersecurity threats, indicating a prediction about the future""}",1,"Contains a future date '2026-06-01' and expresses an expectation about the state of cybersecurity threats, indicating a prediction about the future",llama-3.3-70b-instruct
9,"According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .","{""label"": 0, ""reasoning"": ""The sentence does not contain any future tense words or phrases that indicate a prediction about the future.""}",0,The sentence does not contain any future tense words or phrases that indicate a prediction about the future.,llama-3.1-8b-instant


In [19]:
y_test_df.rename(columns={'Sentence Label' : 'Actual Label'}, inplace=True)

In [20]:
def get_llm_labels(df, model_name):
    filt_llama = (df['llm_name'] == model_name)
    filt_df = df[filt_llama]
    return filt_df['llm_label']


test_and_models_df = pd.concat([X_test_df.loc[:, :], y_test_df], axis=1)
# test_and_models_df.columns = ['Sentence', 'Actual Label']
for model in models:
    print(model.__name__())
    model_labels = get_llm_labels(results_with_llm_label_df, model.__name__())
    test_and_models_df[model.__name__()] = model_labels.to_numpy().ravel()
test_and_models_df

llama-3.1-8b-instant
llama-3.3-70b-versatile
llama-3.3-70b-instruct


Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings,Actual Label,llama-3.1-8b-instant,llama-3.3-70b-versatile,llama-3.3-70b-instruct
0,"On August 15, 2024, marketing expert David Lee speculates that the customer satisfaction ratings at Amazon will likely increase.",1,[-2.67128736e-01 3.99917126e-01 1.51789542e-02 -8.48717690e-02\n -9.08976942e-02 -2.59267748e-03 5.57870697e-03 -1.11984558e-01\n 5.50340861e-02 1.75547993e+00 -2.54974365e-01 -1.80750433e-02\n 2.53512263e-02 -5.75582273e-02 -1.17895454e-01 -3.12662870e-02\n -7.41217807e-02 1.10751092e+00 -1.34572908e-01 -2.01131892e-03\n 5.15997782e-02 1.16336204e-01 -8.87980685e-03 -1.33108839e-01\n 4.78714556e-02 5.70764579e-02 -1.72738910e-01 4.16884683e-02\n 2.07518153e-02 4.11881283e-02 7.36815408e-02 -1.50440494e-02\n 3.74726136e-03 1.82219267e-01 -6.20155334e-02 -4.09787893e-03\n 5.32624172e-03 8.10612142e-02 -4.86277193e-02 -1.43699899e-01\n -7.53155202e-02 -2.91622225e-02 8.86417702e-02 -5.73523762e-03\n 5.72941788e-02 5.58102839e-02 -1.44644111e-01 -6.10220470e-02\n 6....,[-1.92454374e+00 1.91188228e+00 6.52823076e-02 -1.09791911e+00\n -1.71294856e+00 5.92892647e-01 1.92068860e-01 -1.46676123e+00\n 1.23968937e-01 -3.15836072e-02 6.55651987e-01 -5.98727763e-01\n -3.60853940e-01 -9.42990124e-01 -7.83263743e-01 9.00564417e-02\n -1.02269506e+00 3.85037921e-02 -3.46616179e-01 3.87996107e-01\n 4.86750692e-01 8.39973629e-01 -1.23200588e-01 -6.20292664e-01\n 1.05327025e-01 -1.63882062e-01 -7.20112205e-01 2.79392928e-01\n -3.39587964e-02 -5.77544093e-01 1.50556660e+00 -4.31914985e-01\n 1.02688305e-01 1.54282737e+00 -2.37315607e+00 2.74423778e-01\n 4.23447758e-01 4.65204388e-01 -6.98811769e-01 -1.86601353e+00\n -1.40664899e+00 -1.21409535e+00 4.51989844e-02 3.71334434e-01\n 7.22978592e-01 9.42269504e-01 -1.18480968e+00 -2.19736397e-01\n 5....,1,1,1,1
1,"Operating profit for the three-month period increased from EUR1 .2 m , while revenue increased from EUR20 .2 m , as compared to the corresponding period in 2005 .",0,[-8.39660242e-02 1.80937588e-01 -4.57216762e-02 1.31758088e-02\n -1.15853874e-02 -5.86525165e-02 -8.70665710e-04 3.26534510e-02\n 1.16215296e-01 1.84254622e+00 -2.33168826e-01 1.67165652e-01\n -6.99797019e-05 -2.04802342e-02 4.41387780e-02 -8.78530294e-02\n 3.87666002e-02 1.38155854e+00 -1.93677381e-01 2.67565437e-02\n 8.30405392e-03 2.85588354e-02 -9.71042439e-02 -2.32131258e-02\n 4.73141745e-02 -5.10274470e-02 -9.36960801e-02 4.17356864e-02\n -1.44721614e-02 1.21533826e-01 -6.50100559e-02 -2.13543437e-02\n -8.56587365e-02 2.31346134e-02 4.07231599e-02 -7.77563825e-02\n -1.08038373e-01 -2.93932352e-02 2.08473653e-02 7.65744643e-03\n -1.03914235e-02 4.45170105e-02 7.58761391e-02 -4.45002131e-03\n -9.02475566e-02 1.67677191e-03 -5.21407127e-02 7.16749728e-02\n -2....,[ 0.32650337 -1.0380536 -0.91864634 0.51113987 -0.65212625 -0.29796603\n 0.07093044 0.41100782 0.9220639 0.2662444 0.95830584 1.7495745\n -0.7324779 -0.3097109 0.88611746 -0.9876781 1.0547875 1.9621172\n -1.3324827 0.875457 -0.40856755 -0.4626243 -1.5807649 0.94958717\n 0.0967818 -1.6057067 0.50081563 0.2801942 -0.6300226 0.58435524\n -1.0352651 -0.5324218 -1.302615 -0.7046011 -0.29519314 -0.9691\n -1.0828011 -1.2725768 0.4982816 0.4064646 -0.23578587 -0.05474434\n -0.12900919 0.39317027 -2.0725603 0.0272575 0.11121429 1.404259\n -0.9829128 0.3590206 0.32491672 -1.1381576 -0.5637724 0.83665544\n 0.79479074 -0.45303088 -0.28690377 -1.7320254 -1.0912753 1.3369948\n 0.07836758 -0.26163587 -0.870195 0.77505666 -1.885778 -0.8...,0,0,0,0
2,"Cybersecurity threats should stay same in 2026-06-01, according to the National Security Agency director, Dr. Sophia Rodriguez.",1,[-9.97022167e-02 2.27282479e-01 8.88563171e-02 1.93849038e-02\n 8.27627033e-02 -6.11012615e-03 6.32572249e-02 -1.25566736e-01\n -9.99972895e-02 1.72420835e+00 -3.31175238e-01 -4.78917100e-02\n 4.07287441e-02 -8.05337653e-02 -1.63093254e-01 -3.94880436e-02\n -1.27118714e-02 8.93040419e-01 -5.53227402e-02 7.60172233e-02\n 1.45164803e-01 1.45717368e-01 6.82817474e-02 -7.75945783e-02\n 5.19648008e-02 2.36863736e-02 -1.45081267e-01 -3.98935899e-02\n 4.41102982e-02 8.85714293e-02 8.26337561e-03 5.78368790e-02\n 1.08856373e-02 4.49474491e-02 7.63862580e-02 -9.51796025e-03\n 6.30498528e-02 4.74843718e-02 -4.52780761e-02 -3.98732908e-02\n 5.54662012e-02 -1.95597503e-02 1.21327959e-01 -5.83715141e-02\n -6.98307455e-02 2.14324836e-02 -4.13710661e-02 1.91193689e-02\n 7....,[ 1.33107498e-01 -4.13728386e-01 1.25563562e+00 6.13037348e-01\n 6.09803617e-01 5.36996126e-01 1.27544320e+00 -1.64309239e+00\n -1.89838004e+00 -1.38554424e-01 -4.01991487e-01 -9.76714611e-01\n -1.36055201e-01 -1.33540440e+00 -1.24892056e+00 -6.65327311e-02\n 1.07430309e-01 -1.46692193e+00 9.75282013e-01 1.71016121e+00\n 2.42159462e+00 1.27598393e+00 1.15159273e+00 1.72738448e-01\n 1.68093503e-01 -6.09218836e-01 -2.92900920e-01 -1.10500216e+00\n 3.61315876e-01 1.07677765e-01 3.07105035e-01 7.28891611e-01\n 2.14890882e-01 -3.96445721e-01 4.26118314e-01 1.82920426e-01\n 1.19040775e+00 -6.30604923e-02 -6.41095579e-01 -3.07161570e-01\n 9.51912880e-01 -1.06299937e+00 4.91256148e-01 -5.22954881e-01\n -1.68571389e+00 3.61185819e-01 2.62103021e-01 7.61063993e-01\n 6....,1,1,1,1
3,"According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .",1,[ 2.75615864e-02 1.79783881e-01 -1.21603109e-01 -1.75067633e-01\n 9.83876437e-02 -2.84420680e-02 7.04421336e-03 -1.00048631e-01\n -7.77818933e-02 2.37184715e+00 -2.96325952e-01 4.51707616e-02\n 7.32708871e-02 3.28064598e-02 -9.75645557e-02 -1.34704620e-01\n -1.46236047e-02 1.19266689e+00 -1.20833769e-01 -2.11537201e-02\n 4.45679054e-02 2.58689113e-02 -2.11139992e-02 -4.58938554e-02\n -1.82265490e-02 6.69947565e-02 -9.96832177e-02 1.51548507e-02\n 1.27175581e-02 -8.47315788e-02 -2.66740546e-02 2.18095612e-02\n 6.70658797e-02 1.45969734e-01 1.41264692e-01 6.51224284e-04\n 4.73446399e-02 -1.93405268e-03 3.11274361e-02 -1.58938453e-01\n 2.45406702e-02 6.88584819e-02 1.01522245e-01 -1.50205001e-01\n -1.53623046e-02 4.58216853e-03 -6.17812127e-02 -9.76008475e-02\n 3....,[ 1.697164 -1.0535955 -2.1446092 -2.5781236 0.81879115 0.18211456\n 0.21959545 -1.3118026 -1.6085852 2.0768263 0.08170533 0.2030413\n 0.33966735 0.60040736 -0.57380164 -1.8799998 0.07224872 0.63623625\n -0.1174464 0.06363511 0.34133762 -0.50254226 -0.32532284 0.6255889\n -0.9082055 -0.03159778 0.4083359 -0.1708655 -0.16991533 -2.3985007\n -0.3329491 0.15506867 1.0979444 1.0307212 1.7383305 0.35459948\n 0.9817363 -0.8405612 0.6754128 -2.0948052 0.39419135 0.32827145\n 0.22097439 -2.0832043 -0.6536761 0.07636706 -0.02385446 -0.6674014\n 0.10471608 1.1564453 -0.03240437 -0.64257437 -0.21797852 1.4896353\n 0.58393174 -0.34075728 0.2807242 -0.95936525 0.8074142 -0.94765747\n 1.8481007 1.4760867 0.9476612 0.77331966 0.91316414 ...,1,0,0,0
4,"Policy analyst David Lee predicts on 2025-02-10, the renewable energy investments at Tesla may rise.",1,[-2.79465199e-01 4.11210299e-01 1.06271811e-01 5.34210540e-02\n -1.00248531e-02 -3.97931300e-02 -1.52785862e-02 -4.05352302e-02\n -1.21523902e-01 1.57009149e+00 -2.84787118e-01 5.81696490e-03\n 1.12172319e-02 -1.10521093e-02 -1.35719270e-01 7.48538272e-03\n -3.06803323e-02 1.14132047e+00 -1.23263232e-01 3.10381427e-02\n 1.34334370e-01 1.28715515e-01 4.87737637e-03 -1.80470183e-01\n 4.74754125e-02 1.61268070e-01 -9.78351012e-02 1.12551756e-01\n 3.64820994e-02 1.41820818e-01 -3.54899913e-02 5.98004870e-02\n -3.36620472e-02 1.26319811e-01 1.17480211e-01 5.64799979e-02\n 2.10556556e-02 1.38339773e-01 -2.56157555e-02 -8.52385089e-02\n -6.11055233e-02 6.10947609e-02 1.52429134e-01 7.80272558e-02\n -6.91597238e-02 4.46621925e-02 -6.68734610e-02 -1.33346260e-01\n 5....,[-2.07615733e+00 2.06401587e+00 1.53700554e+00 1.17160475e+00\n -6.31253719e-01 1.73244963e-03 -1.99693218e-01 -5.39167345e-01\n -2.17918968e+00 -6.65742636e-01 2.41860628e-01 -2.95848042e-01\n -5.67474008e-01 -1.48681775e-01 -9.66896176e-01 8.28109324e-01\n -2.23242909e-01 2.75821984e-01 -1.57969996e-01 9.48007107e-01\n 2.19763064e+00 1.02367997e+00 1.04083136e-01 -1.29685795e+00\n 9.92541835e-02 1.22576272e+00 4.36882645e-01 1.48189735e+00\n 2.32230693e-01 8.77730548e-01 -4.94457155e-01 7.60166824e-01\n -4.85319495e-01 7.53121912e-01 1.25727260e+00 1.29711664e+00\n 6.32440746e-01 1.36636877e+00 -3.02303135e-01 -9.88274634e-01\n -1.15038109e+00 2.06108406e-01 9.15683210e-01 1.79445755e+00\n -1.67299974e+00 7.53834724e-01 -9.51996893e-02 -1.10486698e+00\n 3....,1,1,1,1
5,"Dr. Smith predicts on November 20, 2029, the nutritional intake at schools may rise.",1,[-1.23342037e-01 4.48737800e-01 7.77667686e-02 4.12153527e-02\n -9.24057066e-02 5.28984889e-02 -6.34194389e-02 1.32564818e-02\n 8.23703483e-02 1.64521945e+00 -3.48226160e-01 1.44820707e-02\n 1.01476766e-01 5.43523207e-02 -8.24817196e-02 1.30305788e-03\n -1.05997045e-02 1.17163539e+00 -9.93407145e-02 -5.30648306e-02\n -1.68292969e-02 1.81135200e-02 2.31271181e-02 -1.52221113e-01\n -1.81868635e-02 3.84828746e-02 -1.88596129e-01 4.00289409e-02\n 1.39729038e-01 7.81415477e-02 -1.83683988e-02 4.06915322e-02\n 2.74938680e-02 8.73017088e-02 8.22432935e-02 1.05383517e-02\n -3.04893889e-02 1.69457823e-01 -3.25031132e-02 9.37876850e-02\n -8.89982954e-02 -1.00480475e-01 -3.74401733e-02 2.11318731e-02\n 5.85598685e-02 6.55854791e-02 -1.82851776e-01 -1.90664139e-02\n 6....,[-1.57422990e-01 2.56955957e+00 1.07646954e+00 9.71296906e-01\n -1.73311865e+00 1.47471440e+00 -1.10392177e+00 1.59185871e-01\n 4.80564237e-01 -4.08752114e-01 -6.38652802e-01 -1.86000213e-01\n 7.51999557e-01 9.68403220e-01 -4.18408573e-01 7.10362613e-01\n 1.46300480e-01 4.88610536e-01 2.41059229e-01 -4.77086991e-01\n -9.28303540e-01 -6.17630601e-01 4.05588835e-01 -8.93314898e-01\n -9.07597005e-01 -4.11872029e-01 -9.65049267e-01 2.51231819e-01\n 1.97938609e+00 -4.31513749e-02 -1.80789396e-01 4.55810100e-01\n 4.75942761e-01 2.01903462e-01 5.44580996e-01 5.21516740e-01\n -5.24260439e-02 1.85594940e+00 -4.20976043e-01 1.69962347e+00\n -1.65340889e+00 -2.33629584e+00 -1.67539787e+00 8.27806175e-01\n 7.46960223e-01 1.10749841e+00 -1.72012031e+00 2.93732226e-01\n 5....,1,1,1,1
6,"Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said .",1,[ 2.36152317e-02 1.75637215e-01 -4.75921258e-02 -1.00200571e-01\n 8.89781415e-02 -6.43718988e-02 -2.69504562e-02 -4.83372882e-02\n 8.36279336e-03 2.23455858e+00 -2.08360046e-01 -7.23112794e-03\n 8.49666148e-02 1.64331440e-02 -6.58765361e-02 -4.59754653e-03\n 4.57342863e-02 1.37366998e+00 -2.09634945e-01 -1.05597727e-01\n 5.03051467e-03 6.17915466e-02 -7.04549477e-02 -7.91235566e-02\n 3.26408111e-02 -1.15197964e-01 -5.03080897e-02 5.12997098e-02\n 2.39542034e-02 -1.31541165e-02 5.15889144e-03 -1.93684064e-02\n 2.15651724e-03 3.74105498e-02 1.53509766e-01 -1.50099948e-01\n -2.85472199e-02 7.28019401e-02 1.21796988e-02 -5.34533598e-02\n 3.01733967e-02 1.60360977e-01 9.51667279e-02 -1.36446133e-01\n -4.18400392e-02 -9.26042721e-03 -9.38620940e-02 -4.67620976e-02\n 6....,[ 1.6486638e+00 -1.1094565e+00 -9.4886589e-01 -1.3494802e+00\n 6.9293666e-01 -3.8885394e-01 -4.1892567e-01 -6.4045787e-01\n -4.8484746e-01 1.6072028e+00 1.3026437e+00 -4.6125913e-01\n 5.1064324e-01 3.2075682e-01 -2.4733125e-01 5.9798145e-01\n 1.1830138e+00 1.9067453e+00 -1.5986559e+00 -1.3672378e+00\n -4.7626153e-01 3.0542215e-02 -1.1404893e+00 1.5089671e-01\n -1.2821639e-01 -2.4615743e+00 1.1710043e+00 4.4248950e-01\n 2.0232348e-02 -1.3634009e+00 2.5023085e-01 -5.0079095e-01\n 7.7684633e-02 -5.0292134e-01 1.9859959e+00 -2.1904244e+00\n -2.6620910e-02 3.3526114e-01 3.4893301e-01 -5.1105261e-01\n 4.9577385e-01 1.7680734e+00 1.3424283e-01 -1.8494413e+00\n -1.1553615e+00 -1.5761261e-01 -4.7332543e-01 -4.5217842e-02\n -4.4650114e-01 -7.2861034e-01 -2.2781046e-01 -8.796...,1,0,1,1
7,"According to economist Emily Patel, the unemployment rate at the United States would fall in January 2029.",1,[-1.08750269e-01 2.72238493e-01 3.78524251e-02 -1.44298999e-02\n -7.86941499e-03 -8.09559822e-02 5.32063805e-02 -2.51004640e-02\n 6.56329840e-02 1.97280908e+00 -4.00870115e-01 -1.77489463e-02\n 5.58234155e-02 1.25552053e-02 -7.33267888e-02 -4.50718440e-02\n -3.89597900e-02 8.34333241e-01 -7.61679709e-02 2.89964732e-02\n 9.25693288e-02 6.12872951e-02 1.35242745e-01 -1.65925056e-01\n -7.12398365e-02 5.60551584e-02 -1.58236861e-01 -2.81694834e-03\n -3.52622867e-02 1.37697384e-01 -3.92289199e-02 7.84029439e-02\n 5.65246828e-02 3.25252116e-02 5.15987910e-02 -1.62812620e-02\n -8.79744515e-02 4.32295687e-02 -1.34526670e-01 6.80115493e-03\n 6.51678368e-02 8.81631300e-03 9.56089050e-03 -1.14474200e-01\n 8.91714171e-02 7.47566074e-02 -3.57211567e-02 -1.92767218e-01\n 1....,[ 2.19080448e-02 1.91886961e-01 4.31601644e-01 5.81025220e-02\n -6.02424145e-01 -6.52395010e-01 1.08665836e+00 -3.38784814e-01\n 2.62229145e-01 7.11835086e-01 -1.36933386e+00 -5.94593823e-01\n 8.46086815e-02 2.54522979e-01 -3.24088573e-01 -1.72880158e-01\n -3.75609607e-01 -1.87900329e+00 6.27582133e-01 9.13411796e-01\n 1.33396566e+00 2.30592266e-02 2.25786185e+00 -1.08907819e+00\n -1.72109985e+00 -1.77503526e-01 -4.96107638e-01 -4.75835383e-01\n -9.81835246e-01 8.18100512e-01 -5.62954366e-01 1.05645645e+00\n 9.32255566e-01 -5.71937740e-01 -7.52258003e-02 6.87404573e-02\n -8.16216469e-01 -1.30001366e-01 -2.17889738e+00 3.93608242e-01\n 1.12687540e+00 -6.16498828e-01 -1.03399050e+00 -1.47613847e+00\n 1.32697082e+00 1.26251686e+00 3.41261387e-01 -1.83208275e+00\n 1....,1,1,1,1
8,"The Centers for Disease Control and Prevention's health screening participation should stay the same in late 2025, according to a report by the CDC.",1,[-5.03036678e-02 1.87489763e-01 -3.05069238e-02 -5.69656156e-02\n -1.62138101e-02 -9.16014537e-02 3.42771076e-02 -1.09264076e-01\n 1.87090393e-02 2.24549818e+00 -3.22809726e-01 -1.73693094e-02\n 1.24907926e-01 1.06444255e-01 -5.95280081e-02 -9.59863365e-02\n 4.98264506e-02 1.26422679e+00 -2.03949749e-01 -4.86984774e-02\n -1.55044356e-02 -2.76404414e-02 -6.09812587e-02 -8.41173008e-02\n 4.86957803e-02 1.62519962e-02 -7.08130524e-02 -2.27988921e-02\n 1.03943340e-01 -4.24967669e-02 -1.00552015e-01 -2.39724852e-02\n 8.43803957e-02 6.75017014e-02 1.52697787e-01 -2.53763963e-02\n -2.70479475e-03 3.22124735e-02 -1.43974777e-02 -1.51846251e-02\n 3.38993147e-02 1.55861691e-01 -5.80707975e-02 -1.77395374e-01\n 4.27497774e-02 -7.44838715e-02 -1.42478839e-01 1.11677259e-01\n 2....,[ 0.7402096 -0.94978744 -0.6728323 -0.6399512 -0.71403253 -0.82156444\n 0.7311103 -1.4314425 -0.34988302 1.6446239 -0.2858809 -0.58978117\n 1.0945317 1.8581153 -0.18192463 -1.1425827 1.2583218 1.1385343\n -1.5038265 -0.40310073 -0.9009066 -1.29661 -0.98397356 0.07956019\n 0.11796705 -0.7083741 0.85427624 -0.81491613 1.3738167 -1.7877324\n -1.6863942 -0.57412213 1.3700978 -0.07781618 1.969573 -0.08480617\n 0.3167413 -0.3033335 -0.10900614 0.06351389 0.5629683 1.6972766\n -1.956937 -2.5451682 0.44739935 -1.2600764 -1.154473 1.8938218\n -0.14515889 -1.2174587 -0.95868355 -0.8788726 1.2204343 -0.28456232\n -0.06379237 -0.01516396 0.34762335 1.4988242 0.3776272 0.7422517\n 0.55150896 1.9247674 1.0427295 0.01515606 1.8798522 ...,1,1,1,1
9,"In 08/2024, Coach Michael Brown envisions that the touchdown rate at the New England Patriots has some probability to remain stable.",1,[-2.95873899e-02 1.89737260e-01 -1.18197471e-01 -4.13006060e-02\n 1.10420622e-01 -4.95679379e-02 1.91853363e-02 3.95639287e-03\n 1.09274819e-01 1.97997952e+00 -1.85330868e-01 -4.54250947e-02\n 1.08167037e-01 2.54211240e-02 -9.49313119e-03 -6.84713572e-02\n 1.55237708e-02 1.00765920e+00 -6.00808859e-03 -4.15705629e-02\n -7.41642118e-02 -1.50733357e-02 1.34443464e-02 -8.06241706e-02\n -7.69194737e-02 1.14793696e-01 -2.15540364e-01 6.73782602e-02\n 3.61875258e-02 1.65809989e-02 -4.40017954e-02 2.61284299e-02\n 4.48011309e-02 -9.68891475e-03 3.49024460e-02 9.65972338e-03\n -2.66898591e-02 7.46985758e-03 -6.69648051e-02 -3.72406021e-02\n 7.12334365e-02 6.75264001e-02 7.85919055e-02 -7.79033080e-03\n 4.30065207e-02 -5.40831983e-02 -1.17614612e-01 -2.35915799e-02\n -4....,[ 9.94810045e-01 -9.19510782e-01 -2.08958673e+00 -3.82872701e-01\n 9.79735315e-01 -1.53601065e-01 4.47641909e-01 3.84470709e-02\n 8.31526935e-01 7.36362994e-01 1.62228131e+00 -9.45445299e-01\n 8.49802375e-01 4.74268407e-01 3.33566934e-01 -6.18540466e-01\n 6.27050281e-01 -6.62382126e-01 1.79785383e+00 -2.82320827e-01\n -2.11394024e+00 -1.11011660e+00 2.45618850e-01 1.29460186e-01\n -1.80819011e+00 6.05915606e-01 -1.38124096e+00 7.15332150e-01\n 2.27245882e-01 -9.33393896e-01 -6.50393546e-01 2.23857194e-01\n 7.47982144e-01 -1.16830730e+00 -4.12921190e-01 5.06683528e-01\n -1.94259407e-03 -6.92609370e-01 -1.01476967e+00 -2.67634422e-01\n 1.23626447e+00 3.07310998e-01 -9.19480622e-02 3.36418450e-01\n 4.52263981e-01 -9.15246308e-01 -8.06111395e-01 2.38351554e-01\n -1....,1,1,1,1


## Save Output

In [21]:
# save_path = os.path.join(combine_data_path )
DataProcessing.save_to_file(test_and_models_df, combine_data_path, 'llm_classifiers', '.csv')

Saved to: 
	/Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_classification_experiments-v2/../data/combined_generated_fin_phrase_bank/llm_classifiers-v1.csv


## Evaluation

In [22]:
print("======= EVALUATION/RESULTS =======")



In [23]:
get_metrics = EvaluationMetric()
get_metrics

<classification_models.EvaluationMetric at 0x318798950>

In [24]:
actual_label = test_and_models_df['Actual Label'].values
for ml_model in models:
    ml_model_name = ml_model.__name__()
    print(f"Actual Label:\t\t{actual_label}")
    ml_model_predictions = test_and_models_df[ml_model_name].values
    print(f"{ml_model_name}:\t\t{ml_model_predictions}")
    get_metrics.eval_classification_report(y_test_df, ml_model_predictions)

Actual Label:		[1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0]
llama-3.1-8b-instant:		[1 0 1 0 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0]
              precision    recall  f1-score   support

           0       0.60      1.00      0.75         3
           1       1.00      0.89      0.94        18

    accuracy                           0.90        21
   macro avg       0.80      0.94      0.85        21
weighted avg       0.94      0.90      0.91        21

Actual Label:		[1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0]
llama-3.3-70b-versatile:		[1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0]
              precision    recall  f1-score   support

           0       0.75      1.00      0.86         3
           1       1.00      0.94      0.97        18

    accuracy                           0.95        21
   macro avg       0.88      0.97      0.91        21
weighted avg       0.96      0.95      0.96        21

Actual Label:		[1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0]
llama-3.3-70b-instruct:		[1 0 1 