# LLM Classifiers

**Goal:** Given a sentence as input, classify it as either a prediction or non-prediction.

In [1]:
import os
import sys

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

# import log_files
from data_processing import DataProcessing
from classification_models import EvaluationMetric
from text_generation_models import TextGenerationModelFactory

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load Data

In [3]:
print("======= LOAD DATA =======")



In [4]:
base_data_path = os.path.join(notebook_dir, '../data/')
combine_data_path = os.path.join(base_data_path, 'combined_generated_fin_phrase_bank')
X_test_set_path = os.path.join(combine_data_path, 'x_test_set-v1.csv')
y_test_set_path = os.path.join(combine_data_path, 'y_test_set-v1.csv')

In [5]:
X_test_df = DataProcessing.load_from_file(X_test_set_path, 'csv')
X_test_df.drop(columns=['Unnamed: 0'], inplace=True)
X_test_df.head(7)

Unnamed: 0,Base Sentence,Sentence Label,Embedding,Normalized Embeddings
0,"Clothing retail chain Sepp+Æl+Æ 's sales increased by 8 % to EUR 155.2 mn , and operating profit rose to EUR 31.1 mn from EUR 17.1 mn in 2004 .",0,[-1.81379244e-01 3.26961249e-01 1.23493910e-01 -6.83715194e-02\n 2.05705479e-01 -1.32566988e-01 6.46815300e-02 4.80639301e-02\n 1.11243963e-01 1.16633594e+00 -3.26421767e-01 4.92487364e-02\n -4.69185673e-02 7.74623454e-03 1.52571976e-01 -6.91895038e-02\n 2.16319803e-02 1.07760060e+00 -1.00143224e-01 -9.74733476e-03\n 3.12185083e-02 8.63965750e-02 -5.83441146e-02 -1.37795210e-01\n 1.45358860e-01 7.57031664e-02 -1.95762575e-01 -4.27063033e-02\n -4.27523181e-02 1.41373962e-01 -1.76752619e-02 -6.47352412e-02\n -7.01081008e-02 4.31978516e-02 4.62445021e-02 -1.18927144e-01\n -6.29041046e-02 -6.16270676e-02 7.53541365e-02 1.11510776e-01\n -4.29402962e-02 -5.15806749e-02 2.08475545e-01 -7.44214728e-02\n -3.59057635e-02 5.50432652e-02 4.90117073e-02 2.19748504e-02\n 5....,[-8.70693088e-01 9.29072917e-01 1.81525087e+00 -8.27133536e-01\n 2.25419474e+00 -1.47255635e+00 1.30219591e+00 6.11075044e-01\n 8.57213974e-01 -2.04687071e+00 -3.36014867e-01 2.54737914e-01\n -1.41734147e+00 1.72387421e-01 2.00326610e+00 -6.32218063e-01\n 7.39459515e-01 -1.71444774e-01 2.27673307e-01 2.56912142e-01\n 6.52836114e-02 3.95675719e-01 -9.40405071e-01 -6.87238336e-01\n 1.60017776e+00 8.45497325e-02 -1.07574511e+00 -1.15273201e+00\n -1.10858238e+00 8.71268451e-01 -1.68091118e-01 -1.22336841e+00\n -1.05818677e+00 -4.21162724e-01 -1.83520094e-01 -1.66415644e+00\n -4.83112693e-01 -1.77971220e+00 1.43746281e+00 1.96571779e+00\n -8.22783291e-01 -1.56685221e+00 1.68052936e+00 -7.95643389e-01\n -1.04292214e+00 9.29304719e-01 1.52841580e+00 7.96010494e-01\n 2....
1,"On 2027/08/20, the Federal Reserve speculates that interest rates at major banks will likely increase.",1,[-1.81962401e-01 2.51789659e-01 -6.82267621e-02 -1.99264623e-02\n -8.90381262e-02 -6.99648187e-02 -1.15359366e-01 -4.32647616e-02\n -7.00312927e-02 2.39203525e+00 -2.53967375e-01 4.79923822e-02\n 1.87749177e-01 3.72870415e-02 -6.69317171e-02 -6.66592419e-02\n -1.47542655e-01 1.03909945e+00 -1.73058122e-01 -3.13351192e-02\n -1.90051142e-02 1.15966164e-01 -1.18214618e-02 -8.65336806e-02\n -1.89228803e-02 1.55890629e-01 -1.02456011e-01 5.62909395e-02\n -1.22471154e-03 5.37542142e-02 -3.22786272e-02 9.06065181e-02\n 1.37776569e-01 -3.36506404e-03 3.07458919e-02 5.67997433e-02\n -5.82847483e-02 1.10756606e-01 -4.76526432e-02 -3.98236886e-02\n -5.81108145e-02 3.09332926e-02 8.60598236e-02 -6.81905076e-02\n 1.33367687e-01 4.05994616e-02 -1.25284176e-02 -8.88468400e-02\n -2....,[-0.87786 -0.08358515 -1.2822452 -0.03210158 -1.6880763 -0.47773218\n -2.0795083 -0.57460356 -1.5074805 2.1458838 0.66962874 0.23881108\n 2.0131867 0.6769342 -0.2582024 -0.5840274 -2.3738577 -0.44169456\n -0.98855215 -0.10888498 -0.9732976 0.8344823 -0.17179996 0.04504178\n -0.9188829 1.1540416 0.3655062 0.52718747 -0.40584785 -0.39582273\n -0.43562493 1.2508281 2.2093909 -1.0789686 -0.49699044 1.3025148\n -0.4217364 0.93240243 -0.6820107 -0.30641684 -1.0963736 -0.26848567\n 0.00996405 -0.6897794 2.1643772 0.6851629 0.6662045 -0.56026655\n -0.91432357 1.1897789 0.43965554 -0.77815723 -0.19758885 2.0060427\n 0.28865618 -0.09320762 0.3796414 -2.1365964 -1.618398 1.0528172\n 1.4662083 0.41146848 0.81602204 3.0982656 -0.9026434...
2,"Consolidated net sales increased 16 % to reach EUR74 .8 m , while operating profit amounted to EUR0 .9 m compared to a loss of EUR0 .7 m in the prior year period .",0,[-1.54190257e-01 2.46810824e-01 -6.17762655e-02 7.55247250e-02\n -2.77188867e-02 -1.83458388e-01 9.04082379e-04 7.04187900e-02\n 1.32708490e-01 1.65489316e+00 -3.38965923e-01 1.12873256e-01\n -4.17059436e-02 1.10604092e-02 2.69969776e-02 -9.27351117e-02\n 1.27250105e-02 1.32193303e+00 -2.12390900e-01 7.88436830e-02\n 7.81772956e-02 6.12183416e-04 -7.47607872e-02 1.64486654e-02\n 1.09230407e-01 4.25606733e-03 -8.09814483e-02 5.87075278e-02\n 2.88513191e-02 8.61533359e-02 -3.19531746e-02 -5.02092279e-02\n -7.97794461e-02 -1.97548941e-02 1.43823205e-02 -1.42352000e-01\n -5.04354797e-02 -2.86521036e-02 5.53551735e-03 -1.37289437e-02\n -6.20589417e-04 -4.15615886e-02 1.55246615e-01 7.41739478e-03\n -1.70165207e-02 1.47167966e-02 -2.34167334e-02 6.58851191e-02\n -1....,[-5.36543846e-01 -1.50656462e-01 -1.17802906e+00 1.53434801e+00\n -8.67915928e-01 -2.28128219e+00 1.04265496e-01 9.01297927e-01\n 1.13721383e+00 -3.75661194e-01 -5.10123670e-01 1.06130803e+00\n -1.34114003e+00 2.28992373e-01 7.09511518e-01 -1.08066082e+00\n 5.75544715e-01 1.54358983e+00 -1.64462554e+00 1.75805449e+00\n 1.03635085e+00 -8.77346814e-01 -1.21162653e+00 1.51616299e+00\n 1.04619193e+00 -8.68368506e-01 6.97211027e-01 5.68195403e-01\n 1.03101850e-01 7.27091059e-02 -4.29662645e-01 -9.92006242e-01\n -1.21020305e+00 -1.31051183e+00 -8.27955246e-01 -2.05962157e+00\n -3.17445040e-01 -1.26091659e+00 2.34450147e-01 8.53694826e-02\n -5.95753379e-02 -1.40920079e+00 9.54132438e-01 5.94797432e-01\n -6.85019314e-01 2.47671410e-01 5.13653159e-01 1.33340061e+00\n -7....
3,"The American Heart Association predicts on November 1, 2029, the obesity rates at the national level may rise.",1,[-1.44551620e-01 3.84713501e-01 4.10299003e-02 -1.51987616e-02\n -1.00560952e-02 -6.73255771e-02 1.77811161e-02 4.57233824e-02\n 8.39890912e-02 2.01573181e+00 -3.67580205e-01 -5.70408031e-02\n 1.22891583e-01 6.99080601e-02 1.97983291e-02 -5.66798002e-02\n 4.76624072e-03 1.21926570e+00 -1.45055830e-01 -3.63234729e-02\n 1.35041941e-02 -3.28723826e-02 -2.80004255e-02 -1.84945613e-01\n 3.26813050e-02 8.54878053e-02 -1.49334148e-01 4.50391369e-03\n 3.68039422e-02 1.91185996e-01 2.18118466e-02 1.32588789e-01\n 8.98898989e-02 1.37999743e-01 1.21656165e-01 6.33542910e-02\n -1.06478065e-01 8.26832876e-02 -4.30808812e-02 1.46262258e-01\n -2.19029468e-02 4.71463166e-02 -2.72156689e-02 -2.27241516e-02\n 8.14019889e-02 -8.10244754e-02 -7.50189573e-02 -1.04098104e-01\n 6....,[-0.41808617 1.70707 0.48293787 0.04548473 -0.63167155 -0.43579143\n 0.42126647 0.5806888 0.5016804 0.8586611 -0.9072807 -1.092698\n 1.0650555 1.2340899 0.6353464 -0.39396188 0.42907965 0.8229404\n -0.52147204 -0.19341081 -0.3010331 -1.3742509 -0.43909413 -1.3607903\n -0.12759547 0.21505134 -0.35859266 -0.35160506 0.23767695 1.5916128\n 0.55531317 1.9194982 1.4566973 0.9181272 1.3417342 1.4131705\n -1.0620697 0.4907245 -0.60323673 2.487476 -0.44338858 -0.01337191\n -1.5358676 0.08269311 1.17976 -1.3706315 -0.20932262 -0.7469172\n 0.5761304 -0.17463458 -1.0923884 0.02983202 0.9992851 -0.7784116\n 0.1491646 0.0532804 1.4128524 1.1143323 -0.23840122 -0.9047452\n -0.31026492 1.8315382 1.4846493 0.20744875 -1.2032983 -0.0...
4,"In Q4 of 2026, a fitness expert envisions that national physical activity levels have some probability to remain stable.",1,[-2.56676301e-02 2.35884875e-01 -5.70623763e-02 9.25376266e-03\n 3.80679183e-02 1.99117120e-02 -2.20596306e-02 3.64422277e-02\n 4.38902490e-02 1.99586248e+00 -2.19309464e-01 4.75243432e-03\n 6.30175620e-02 1.40980601e-01 2.74290405e-02 5.14714513e-03\n 1.75932813e-02 1.19517374e+00 -2.62635082e-01 4.77114553e-03\n -2.17727609e-02 -5.66069875e-03 -4.13805014e-03 -5.45928031e-02\n -5.20204119e-02 8.13232362e-02 2.63057854e-02 4.15404513e-02\n 1.15059577e-01 5.07648522e-03 -5.61902821e-02 -6.78571919e-03\n 6.34850040e-02 -3.96144316e-02 8.60374272e-02 -4.96014059e-02\n 1.31226555e-01 6.61965758e-02 -4.36082631e-02 -5.92541881e-02\n 1.17349532e-02 9.58479047e-02 5.51239252e-02 1.70102005e-03\n -2.17258595e-02 -8.00878834e-03 -4.81235199e-02 1.45418672e-02\n 3....,[ 1.0429834e+00 -2.9784304e-01 -1.1018701e+00 4.4677514e-01\n 1.1999425e-02 9.5051461e-01 -3.2706141e-01 4.6019578e-01\n -2.1399880e-02 7.9069394e-01 1.1506693e+00 -3.0934313e-01\n 1.8977746e-01 2.4479842e+00 7.1396297e-01 7.8357601e-01\n 6.6513538e-01 6.5383250e-01 -2.4827020e+00 5.0292248e-01\n -1.0305301e+00 -9.7043514e-01 -4.4861581e-02 5.0132287e-01\n -1.4263933e+00 1.5950683e-01 2.3544133e+00 2.7688116e-01\n 1.5619268e+00 -1.0997636e+00 -8.7368679e-01 -3.0038095e-01\n 1.0416594e+00 -1.5910726e+00 6.2132007e-01 -4.9377966e-01\n 2.0962553e+00 2.3133886e-01 -6.1232382e-01 -5.9814620e-01\n 1.6324870e-01 7.5295305e-01 -4.1220751e-01 4.9767631e-01\n -7.7424926e-01 -1.3645633e-01 1.6749740e-01 7.0504284e-01\n -4.8316368e-03 -1.2965175e+00 -1.5054924e+00 -5.146...
5,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,1,[-2.24677563e-01 2.28302136e-01 -3.20941433e-02 -1.78482141e-02\n -5.66348508e-02 2.05190871e-02 -3.81574295e-02 1.41838089e-01\n 1.64982721e-01 1.45077002e+00 -3.47716570e-01 4.82952334e-02\n 1.09031864e-01 4.18408737e-02 6.93667382e-02 7.94899929e-03\n -2.80997157e-02 1.02238500e+00 -1.17930911e-01 -1.29385918e-01\n 4.74877171e-02 1.93608850e-01 6.20140657e-02 -1.79264918e-01\n 6.11357428e-02 1.66994065e-01 -9.25592855e-02 5.74763976e-02\n 2.44720709e-02 6.20023720e-02 -3.91931273e-02 -3.54092242e-03\n -2.73218546e-02 1.84805289e-01 -8.36593192e-03 -5.23123983e-03\n 3.49834897e-02 1.63400516e-01 1.24472668e-02 1.79933552e-02\n 2.80983802e-02 1.24136940e-01 1.31817430e-01 1.38525711e-02\n 4.25122082e-02 -9.02827159e-02 -2.97950823e-02 -7.99663663e-02\n -1....,[-1.40282404e+00 -3.99992287e-01 -6.98475897e-01 2.00455543e-03\n -1.25467432e+00 9.60166514e-01 -6.29426062e-01 1.82850206e+00\n 1.55822384e+00 -1.07390594e+00 -6.31579876e-01 2.42650330e-01\n 8.62444937e-01 7.54712045e-01 1.14603209e+00 8.36939335e-01\n -1.75751880e-01 -5.59017777e-01 -6.90265298e-02 -1.77031994e+00\n 4.01716799e-01 1.98668361e+00 1.04804373e+00 -1.27964044e+00\n 3.08718741e-01 1.30213261e+00 5.18375039e-01 5.47303915e-01\n 2.89957542e-02 -2.76544064e-01 -5.62298656e-01 -2.48699635e-01\n -3.85662943e-01 1.57936072e+00 -1.28805482e+00 2.55290061e-01\n 8.17496598e-01 1.76064968e+00 3.53543341e-01 5.61647832e-01\n 4.58352298e-01 1.19808412e+00 6.34402335e-01 7.04130948e-01\n 4.42898035e-01 -1.52712226e+00 4.24288988e-01 -4.51584041e-01\n -2....
6,"Dr. David Kim predicts on 21 August 2024, the average salary at Google may rise.",1,[-2.19678044e-01 3.58231366e-01 1.11611769e-01 8.27183574e-02\n -4.10397351e-03 5.94087541e-02 8.42853040e-02 -1.26476273e-01\n 1.93848222e-01 1.58560538e+00 -3.83215278e-01 -6.82659000e-02\n 2.65631787e-02 3.18739861e-02 -1.55770555e-01 7.61794150e-02\n -3.25269401e-02 1.08684051e+00 -5.73244579e-02 -4.11688425e-02\n 1.93334278e-02 1.30204648e-01 -2.55622957e-02 -9.50331837e-02\n 4.36590202e-02 7.60001764e-02 -2.54368067e-01 2.70699663e-03\n 1.01018883e-01 1.83981180e-01 1.99133120e-02 3.77997048e-02\n 1.31281056e-02 8.22375417e-02 -5.96388383e-03 8.24570842e-03\n -4.85837013e-02 1.42140388e-01 -4.78982292e-02 1.32265864e-02\n -6.44472912e-02 -6.19467013e-02 9.69798863e-02 1.17050722e-01\n 5.60283400e-02 -1.26295071e-02 -9.29180533e-02 -1.13482483e-01\n 1....,[-1.3413806 1.3503217 1.6232795 1.6524028 -0.5520604 1.5781704\n 1.6704131 -1.6549006 1.9347678 -0.6126742 -1.1242905 -1.234999\n -0.34313682 0.58448106 -1.1734774 2.1364357 -0.257226 -0.10658737\n 0.9418932 -0.27551383 -0.1804896 1.0457784 -0.39881358 -0.07637514\n 0.04073445 0.08851107 -1.9809895 -0.38209757 1.3243288 1.4874222\n 0.520532 0.4097507 0.2501385 0.13036071 -1.2394717 0.48281172\n -0.2928409 1.4261639 -0.6862423 0.49007958 -1.2106477 -1.7299627\n 0.15898639 2.4574656 0.6989942 -0.21455975 -0.4600989 -0.8617667\n 1.7205554 0.7295758 0.62539285 1.5202314 -0.06971423 -1.8725423\n -1.384728 0.40215185 0.64634436 0.6856444 0.09397434 -2.1451356\n -1.291805 -0.5888874 0.942006 -0.07066136 0.75399065 -0....


In [6]:
y_test_df = DataProcessing.load_from_file(y_test_set_path, 'csv')
y_test_df.drop(columns=['Unnamed: 0'], inplace=True)
# print(f"\t{y_test_df.head(7)}")

In [None]:
print(f"\tShape: {X_test_df.shape}, \nSubset of Data:{X_test_df.head(3)}")
# df.shape, df.head(3)

print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(3)}")
# df.shape, df.head(3)

In [None]:
# for idx, row in X_test_df.iterrows():
#     text = row['Base Sentence']
#     label = row['Sentence Label']
#     embedding = row['Embedding']
#     print(type(embedding))
#     norm_embedding = row['Normalized Embeddings']
#     if idx < 7:
#         print(f"{idx}\n Sentence: {text}\n Label: {label}\n Embeddings Shape: {embedding}\n\t Embeddings Subset [:6]: {embedding[:6]} \n Norm Embeddings: {norm_embedding.shape}, \n\tNorm Embeddings Subset [:6]: {norm_embedding[:6]}")
# print(f"\tShape: {y_test_df.shape}, \nSubset of Data:{y_test_df.head(7)}")

<class 'str'>


AttributeError: 'str' object has no attribute 'shape'

## Load Prompt

In [None]:
# prediction_properties = PredictionProperties.get_prediction_properties()
# prediction_requirements = PredictionProperties.get_requirements()
# system_identity_prompt = "You are an expert at identifying specific types of sentences by knowing the sentence format."
# prediction_examples_prompt = """Some examples of predictions in the PhraseBank dataset are
#     1. According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .
#     2. According to the company 's updated strategy for the years 2009-2012 , Basware targets a long-term net sales growth in the range of 20 % -40 % with an operating profit margin of 10 % -20 % of net sales .
#     3. Its board of directors will propose a dividend of EUR0 .12 per share for 2010 , up from the EUR0 .08 per share paid in 2009 .
# """
# non_prediction_examples_prompt = """Some examples of non-predictions in the PPhraseBank dataset are
#     1. Net sales increased to EUR193 .3 m from EUR179 .9 m and pretax profit rose by 34.2 % to EUR43 .1 m. ( EUR1 = USD1 .4 )
#     2. Net sales surged by 18.5 % to EUR167 .8 m. Teleste said that EUR20 .4 m , or 12.2 % , of the sales came from the acquisitions made in 2009 .
#     3. STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMENE Credit Suisse First Boston ( CFSB ) raised the fair value for shares in four of the largest Nordic forestry groups .
# """
# # goal_prompt = "Given the above, identify the prediction."

# base_prompt = f"""{system_identity_prompt} The sentence format is based on: 
    
#     {prediction_properties}
#     Enforce: {prediction_requirements}
#     Know: {prediction_examples_prompt}
#     Know: {non_prediction_examples_prompt}

# """
# base_prompt

In [None]:
prompt_1 = """ 

Role: 
You are a linguist expert. You are acting as a prediction detector. Your task is to identify if a given sentence is a prediction about the future.

Background:
A prediction is a statement about what someone thinks will happen in the future.
Examples of predictions:
- "It will rain tomorrow." (Yes)
- "The stock market is expected to rise next quarter." (Yes)
- "I am going to the store." (No)
- “Lakers will win the championship. ”(Yes)

A prediction may contain: source, target, date, outcome.
"""

## Models

In [None]:
tgmf = TextGenerationModelFactory()

# Option 1: Specific models
# models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile'])

# Option 2: All Groq models
# models = tgmf.create_instances(tgmf.get_groq_model_names())

# Option 3: All NaviGator models
models = tgmf.create_instances(tgmf.get_navigator_model_names())

# Option 4: All available models
# models = tgmf.create_instances()

# Option 5: Mix and match
# custom_models = ['llama-3.1-70b-instruct', 'mistral-small-3.1', 'llama-3.1-8b-instant']
# models = tgmf.create_instances(custom_models)
models

In [None]:
import json
import re

def parse_json_response(response):
    """Parse JSON response from LLM to extract label and reasoning"""
    try:
        # Extract JSON if there's extra text
        json_match = re.search(r'\{.*\}', response, re.DOTALL)
        if json_match:
            data = json.loads(json_match.group())
            return data.get('label'), data.get('reasoning')
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        return None, None

In [None]:
def llm_certifier(data: str, base_prompt: str, model):
    
        prompt = f""" Given this: {base_prompt}. Also given the sentence '{data}', your task is to analyze the sentence and determine if it is a prediction. If prediction, generate label as 1 and if non-prediction generate label as 0.
        Respond ONLY with valid JSON in this exact format:
        {{"label": 0, "reasoning": "your explanation here"}}
        Examples:
        - "It will rain tomorrow." → {{"label": 1, "reasoning": "Contains the future tense words 'will' and 'tomorrow'"}}
        - "The stock market is expected to rise next quarter." → {{"label": 1, "reasoning": "Contains future tense words 'is expected'"}}
        - "I am going to the store." → {{"label": 0, "reasoning": "Does not contain a future tense word"}}
        - "Lakers will win the championship." → {{"label": 1, "reasoning": "Contains the future tense word 'will'"}}
        """
        idx = 1
        if idx == 1:
            #   print(f"\tPrompt: {prompt}")
              idx = idx + 1
        input_prompt = model.user(prompt)
        raw_text_llm_generation = model.chat_completion([input_prompt])
        
        # Parse the JSON response
        label, reasoning = parse_json_response(raw_text_llm_generation)
        
        return raw_text_llm_generation, label, reasoning

In [None]:
print("======= PROMPT + MODEL -> LABEL and REASONING =======")

In [None]:
    # content : meta :: text : meta_data
results = []
for idx, row in X_test_df.iterrows():
    text = row['Base Sentence']
    print(f"{idx} --- Sentence: {text}")
    for model in models:
        # print(model.__name__())
        raw_response, llm_label, llm_reasoning = llm_certifier(text, prompt_1, model)
        print(f"\tModel: {model.__name__()}\n\t\tLabel:', {llm_label}\n\t\tReasoning:', {llm_reasoning}")
        result = (text, raw_response, llm_label, llm_reasoning, model.__name__())
        results.append(result)


In [None]:
results

In [None]:
# groupby text

In [None]:
results_with_llm_label_df = pd.DataFrame(results, columns=['text', 'raw_response', 'llm_label', 'llm_reasoning', 'llm_name'])
results_with_llm_label_df

In [None]:
y_test_df.rename(columns={'Sentence Label' : 'Actual Label'}, inplace=True)

In [None]:
def get_llm_labels(df, model_name):
    filt_llama = (df['llm_name'] == model_name)
    filt_df = df[filt_llama]
    return filt_df['llm_label']


test_and_models_df = pd.concat([X_test_df.loc[:, :], y_test_df], axis=1)
# test_and_models_df.columns = ['Sentence', 'Actual Label']
for model in models:
    print(model.__name__())
    model_labels = get_llm_labels(results_with_llm_label_df, model.__name__())
    test_and_models_df[model.__name__()] = model_labels.to_numpy().ravel()
test_and_models_df

## Evaluation

In [None]:
print("======= EVALUATION/RESULTS =======")

In [None]:
get_metrics = EvaluationMetric()
get_metrics

In [None]:
actual_label = test_and_models_df['Actual Label'].values
for ml_model in models:
    ml_model_name = ml_model.__name__()
    print(f"Actual Label:\t\t{actual_label}")
    ml_model_predictions = test_and_models_df[ml_model_name].values
    print(f"{ml_model_name}:\t\t{ml_model_predictions}")
    get_metrics.eval_classification_report(y_test_df, ml_model_predictions)