# Entailment Ratio

- PAPER: [Factuality Enhanced Language Models for Opn-Ended Text Generation](https://proceedings.neurips.cc/paper_files/paper/2022/hash/df438caa36714f69277daa92d608dd63-Abstract-Conference.html) 


In [1]:
import os, sys

import numpy as np
import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

import log_files
from log_files import LogData
from data_processing import DataProcessing
from feature_extraction import SpacyFeatureExtraction, TfidfFeatureExtraction

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Read csv files and load as df

In [3]:
log_file_path = "data/prediction_logs"
predictions = True
predictions_df = log_files.read_data(notebook_dir, log_file_path, predictions)
predictions_df.head(7)

Start logging batch
log_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/prediction_logs
save_batch_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/prediction_logs/batch_1-prediction
CSV to DF
Load saved csv: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/prediction_logs/batch_1-prediction/batch_1-from_df.csv
save_batch_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/prediction_logs/batch_2-prediction
CSV to DF
Load saved csv: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/prediction_logs/batch_2-prediction/batch_2-from_df.csv
save_batch_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/pr

Unnamed: 0,Base Sentence,Sentence Label,Domain,Model Name,API Name,Batch ID,Template Number
0,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,1,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,1
1,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.",1,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,2
2,"Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.",1,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,3
3,"According to Goldman Sachs, the research and development expenses at Facebook would fall in 2025.",1,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,4
4,"In 21 August 2024, Morgan Stanley envisions that the gross profit at Johnson & Johnson has some probability to remain stable.",1,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,5
5,"The stock price at Visa should stay same in Q2 of 2026, according to Wells Fargo.",1,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,6
6,JPMorgan forecasts that the revenue at Microsoft potentially decrease in Q3 of 2027.,1,finance,llama-3.3-70b-instruct,NAVI_GATOR,0,1


In [4]:
log_file_path = "data/observation_logs"
predictions = False
observations_df = log_files.read_data(notebook_dir, log_file_path, predictions)
observations_df.head(7)

Start logging batch
log_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/observation_logs
save_batch_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/observation_logs/batch_1-observation
CSV to DF
Load saved csv: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/observation_logs/batch_1-observation/batch_1-from_df.csv
save_batch_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/observation_logs/batch_2-observation
CSV to DF
Load saved csv: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/notebook_experiments/../data/observation_logs/batch_2-observation/batch_2-from_df.csv
save_batch_directory: /Users/detraviousjamaribrinkley/Documents/Development/research_labs

Unnamed: 0,Base Sentence,Sentence Label,Domain,Model Name,API Name,Batch ID,Template Number
0,The financial analyst at Goldman Sachs observed that the operating income at Tesla had increased in the first quarter of 2024.,0,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,1
1,"On 2024-08-20 to 2025-08-20, Morgan Stanley speculates the stock price at Amazon will likely rise.",0,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,2
2,"A young investor predicts on 2025-03-15, the S&P 500 index may rise.",0,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,3
3,"According to Bank of America, the net profit at Microsoft would fall in the second quarter of 2026.",0,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,4
4,"In 2027-01-01 to 2027-12-31, Wells Fargo envisions that the interest rates at the Federal Reserve have some probability to remain stable.",0,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,5
5,"The trading volume at Apple should stay same in the fourth quarter of 2025, according to a financial expert at JPMorgan Chase.",0,finance,llama-3.1-70b-instruct,NAVI_GATOR,0,6
6,JPMorgan observed that the net profit at Microsoft had risen in September 2023.,0,finance,llama-3.3-70b-instruct,NAVI_GATOR,0,1


In [5]:
col_name = 'Base Sentence'
predictions = DataProcessing.df_to_list(predictions_df, col_name)
observations = DataProcessing.df_to_list(observations_df, col_name)
len(predictions), len(observations)

(694, 1891)

In [6]:
import torch
import pandas as pd

from tqdm import tqdm
from transformers import RobertaTokenizer, RobertaForSequenceClassification

def get_entailment(predictions: list, observations: list) -> pd.DataFrame:
    entailment_outcome = {}
    entailment_predictions = []
    entailment_observations = []
    entailment_labels = []

    # Load a tokenizer and a model fine-tuned for sequence classification on MNLI
    tokenizer = RobertaTokenizer.from_pretrained('roberta-large-mnli')
    model = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli')
    model.eval()

    for prediction in tqdm(predictions[:3]):
        for observation in observations[:33]:

            # Tokenize the input pair and get the model's prediction
            with torch.no_grad():
                inputs = tokenizer(prediction, observation, return_tensors='pt')
                outputs = model(**inputs)
                logits = outputs.logits
                
                # The model outputs logits for three classes. We take the one with the highest score.
                predicted_class_id = torch.argmax(logits).item()

            # The model's config has a mapping from class ID to label
            # For roberta-large-mnli: 0 -> contradiction, 1 -> neutral, 2 -> entailment
            entailment_label = model.config.id2label[predicted_class_id]
            entailment_predictions.append(prediction)
            entailment_observations.append(observation)
            entailment_labels.append(entailment_label)
    
    # print(len(predictions), len(observations), len(entailment_labels))
    entailment_outcome["Prediction"] = entailment_predictions
    entailment_outcome["Observation"] = entailment_observations
    entailment_outcome["Entailment Label"] = entailment_labels
    entailment_df = pd.DataFrame(entailment_outcome)

    return entailment_df

In [None]:
entailment_df = get_entailment(predictions, observations)

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 3/3 [00:05<00:00,  1.94s/it]


Unnamed: 0,Prediction,Observation,Entailment Label
0,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,The financial analyst at Goldman Sachs observed that the operating income at Tesla had increased in the first quarter of 2024.,CONTRADICTION
1,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"On 2024-08-20 to 2025-08-20, Morgan Stanley speculates the stock price at Amazon will likely rise.",CONTRADICTION
2,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"A young investor predicts on 2025-03-15, the S&P 500 index may rise.",CONTRADICTION
3,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"According to Bank of America, the net profit at Microsoft would fall in the second quarter of 2026.",CONTRADICTION
4,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"In 2027-01-01 to 2027-12-31, Wells Fargo envisions that the interest rates at the Federal Reserve have some probability to remain stable.",CONTRADICTION
5,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"The trading volume at Apple should stay same in the fourth quarter of 2025, according to a financial expert at JPMorgan Chase.",NEUTRAL
6,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,JPMorgan observed that the net profit at Microsoft had risen in September 2023.,CONTRADICTION


In [10]:
entailment_df

Unnamed: 0,Prediction,Observation,Entailment Label
0,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,The financial analyst at Goldman Sachs observed that the operating income at Tesla had increased in the first quarter of 2024.,CONTRADICTION
1,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"On 2024-08-20 to 2025-08-20, Morgan Stanley speculates the stock price at Amazon will likely rise.",CONTRADICTION
2,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"A young investor predicts on 2025-03-15, the S&P 500 index may rise.",CONTRADICTION
3,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"According to Bank of America, the net profit at Microsoft would fall in the second quarter of 2026.",CONTRADICTION
4,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"In 2027-01-01 to 2027-12-31, Wells Fargo envisions that the interest rates at the Federal Reserve have some probability to remain stable.",CONTRADICTION
5,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"The trading volume at Apple should stay same in the fourth quarter of 2025, according to a financial expert at JPMorgan Chase.",NEUTRAL
6,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,JPMorgan observed that the net profit at Microsoft had risen in September 2023.,CONTRADICTION
7,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"On November 10, 2022, to November 10, 2023, Citigroup speculates the research and development expenses at Amazon will likely increase.",NEUTRAL
8,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"Bank of America predicts on October 15, 2024, the operating income at Johnson & Johnson may rise.",CONTRADICTION
9,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"According to Google, the gross profit at Facebook would fall in the second quarter of 2026.",CONTRADICTION


In [21]:
contridiction_filt = (entailment_df['Entailment Label'] != 'CONTRADICTION')
entails_df = entailment_df.loc[contridiction_filt]
entails_df

Unnamed: 0,Prediction,Observation,Entailment Label
5,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"The trading volume at Apple should stay same in the fourth quarter of 2025, according to a financial expert at JPMorgan Chase.",NEUTRAL
7,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"On November 10, 2022, to November 10, 2023, Citigroup speculates the research and development expenses at Amazon will likely increase.",NEUTRAL
10,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"In the fourth quarter of 2027, Wells Fargo envisions that the operating cash flow at Intel has some probability to remain stable.",NEUTRAL
11,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"Intel stock price should stay the same in January 2028, according to a financial expert at Harvard University.",NEUTRAL
26,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"According to the financial top executive at BlackRock, the stock price at Amazon may rise in Q4 2028.",NEUTRAL
28,JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"On March 1, 2029, the financial advisor at Wells Fargo envisions that the inflation rate at the Federal Reserve has some probability to remain stable.",NEUTRAL
36,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.","According to Bank of America, the net profit at Microsoft would fall in the second quarter of 2026.",NEUTRAL
38,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.","The trading volume at Apple should stay same in the fourth quarter of 2025, according to a financial expert at JPMorgan Chase.",NEUTRAL
44,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.","Intel stock price should stay the same in January 2028, according to a financial expert at Harvard University.",NEUTRAL
56,"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.","Apple stock price decreased in August 2024, according to Roger.",NEUTRAL


In [32]:
by_prediction_df = pd.pivot_table(entails_df, values='Entailment Label', index=['Prediction', 'Observation', 'Entailment Label'])
by_prediction_df

Prediction,Observation,Entailment Label
"Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.","In Q2 2025, a financial research advisor envisions that the research and development expenses at Google has some probability to remain stable.",NEUTRAL
JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"According to the financial top executive at BlackRock, the stock price at Amazon may rise in Q4 2028.",NEUTRAL
JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"In the fourth quarter of 2027, Wells Fargo envisions that the operating cash flow at Intel has some probability to remain stable.",NEUTRAL
JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"Intel stock price should stay the same in January 2028, according to a financial expert at Harvard University.",NEUTRAL
JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"On March 1, 2029, the financial advisor at Wells Fargo envisions that the inflation rate at the Federal Reserve has some probability to remain stable.",NEUTRAL
JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"On November 10, 2022, to November 10, 2023, Citigroup speculates the research and development expenses at Amazon will likely increase.",NEUTRAL
JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.,"The trading volume at Apple should stay same in the fourth quarter of 2025, according to a financial expert at JPMorgan Chase.",NEUTRAL
"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.","According to Bank of America, the net profit at Microsoft would fall in the second quarter of 2026.",NEUTRAL
"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.","Apple stock price decreased in August 2024, according to Roger.",NEUTRAL
"On August 21, 2024, Bank of America speculates the revenue at Microsoft will likely increase.","Intel stock price should stay the same in January 2028, according to a financial expert at Harvard University.",NEUTRAL


In [56]:
entailment_pairings = by_prediction_df.index.values.tolist()
entailment_pairings

[('Citigroup predicts on 2024-08-21, the operating income at Alphabet may rise.',
  'In Q2 2025, a financial research advisor envisions that the research and development expenses at Google has some probability to remain stable.',
  'NEUTRAL'),
 ('JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.',
  'According to the financial top executive at BlackRock, the stock price at Amazon may rise in Q4 2028.',
  'NEUTRAL'),
 ('JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.',
  'In the fourth quarter of 2027, Wells Fargo envisions that the operating cash flow at Intel has some probability to remain stable.',
  'NEUTRAL'),
 ('JPMorgan Chase forecasts that the net profit at Amazon potentially decrease in Q3 of 2027.',
  'Intel stock price should stay the same in January 2028, according to a financial expert at Harvard University.',
  'NEUTRAL'),
 ('JPMorgan Chase forecasts that the net profit at Amazon potentiall

In [62]:
# # col_name = 'Base Sentence'
# entailed_predictions = DataProcessing.df_to_list(by_prediction_df, 'Prediction')
# entailed_predictions
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
sims = []
model = SentenceTransformer("all-MiniLM-L6-v2")

for entailment_pairing in tqdm(entailment_pairings):
    prediction, observation, _ = entailment_pairing[0], entailment_pairing[1], entailment_pairing[2]
    sent_transformer_pred_sent_embedding = model.encode(prediction)
    sent_transformer_obser_sent_embedding = model.encode(observation)


        # make them (1 × vector_dim) for sklearn
    pred_sent_embedding_reshaped = sent_transformer_pred_sent_embedding.reshape(1, -1)
    obser_sent_embedding_reshaped = sent_transformer_obser_sent_embedding.reshape(1, -1)

    sim = cosine_similarity(pred_sent_embedding_reshaped, obser_sent_embedding_reshaped)[0, 0]
    sims.append(sim)

  0%|          | 0/12 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

 42%|████▏     | 5/12 [00:00<00:00, 45.00it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

 83%|████████▎ | 10/12 [00:00<00:00, 47.71it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 12/12 [00:00<00:00, 48.35it/s]


In [63]:
sims

[np.float32(0.32882446),
 np.float32(0.64473987),
 np.float32(0.31158242),
 np.float32(0.32173368),
 np.float32(0.28276524),
 np.float32(0.49003255),
 np.float32(0.48939294),
 np.float32(0.7715675),
 np.float32(0.3755531),
 np.float32(0.39405707),
 np.float32(0.15972918),
 np.float32(0.40385187)]