In [1]:
import pandas as pd
from pandarallel import pandarallel
from evaluate import load
import re

pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 2 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
df = pd.read_parquet('gs://scraped-news-article-data-null/mistral-final-test-predicted.parquet')
df["predicted"] = df.predicted.str.strip()
df.head()

Unnamed: 0,body,summary,question,predicted
0,"This spring, millionaires felt relatively pess...","Millionaires see market volatility, inflation ...",What are the top three threats to personal wea...,Millionaires are bracing for a market downturn...
1,"TOKYO, Feb 7 (Reuters) - Toshiba Corp (6502.T)...","Toshiba now plans to split into two, bumps up ...",What businesses is Toshiba splitting off and s...,"Toshiba to split into two companies, offers mo..."
2,"BERLIN, Jan 19 (Reuters) - Germany's Greens pa...",Germany's Greens must squeeze coalition on cli...,What did the Greens secure in exchange for sup...,Analysis: Germany's Greens face pressure to de...
3,"PARIS/ROME, Nov 11 (Reuters) - A charity-run s...",IMPOSSIBLE,What are the potential economic and political ...,IMPOSSIBLE
4,Feb 24 (Reuters) - European shares slid on Fri...,IMPOSSIBLE,What factors contributed to the 0.4% shrinkage...,IMPOSSIBLE


In [3]:
from sklearn.metrics import classification_report

df.loc[df.summary == "IMPOSSIBLE", "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted == "IMPOSSIBLE", "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.96      0.37      0.54       500
        True       0.61      0.98      0.75       500

    accuracy                           0.68      1000
   macro avg       0.79      0.68      0.65      1000
weighted avg       0.79      0.68      0.65      1000



In [4]:
df

Unnamed: 0,body,summary,question,predicted,y,y_hat
0,"This spring, millionaires felt relatively pess...","Millionaires see market volatility, inflation ...",What are the top three threats to personal wea...,Millionaires are bracing for a market downturn...,False,False
1,"TOKYO, Feb 7 (Reuters) - Toshiba Corp (6502.T)...","Toshiba now plans to split into two, bumps up ...",What businesses is Toshiba splitting off and s...,"Toshiba to split into two companies, offers mo...",False,False
2,"BERLIN, Jan 19 (Reuters) - Germany's Greens pa...",Germany's Greens must squeeze coalition on cli...,What did the Greens secure in exchange for sup...,Analysis: Germany's Greens face pressure to de...,False,False
3,"PARIS/ROME, Nov 11 (Reuters) - A charity-run s...",IMPOSSIBLE,What are the potential economic and political ...,IMPOSSIBLE,True,True
4,Feb 24 (Reuters) - European shares slid on Fri...,IMPOSSIBLE,What factors contributed to the 0.4% shrinkage...,IMPOSSIBLE,True,True
...,...,...,...,...,...,...
995,European Central Bank member Jens Weidmann ann...,IMPOSSIBLE,How will Weidmann's departure affect the futur...,IMPOSSIBLE,True,True
996,## In this article\n\nFollow your favorite sto...,Google is offering an on-campus hotel 'special...,Is Google facing any financial difficulties?,IMPOSSIBLE,False,True
997,"INCHEON, May 2 (Reuters) - Asian finance leade...",Asian finance leaders look to improve market s...,What are the benefits of expanding the Chiang ...,IMPOSSIBLE,False,True
998,A factory at the mobile phone plant of Rising ...,Apple supplier Foxconn cautious despite beatin...,What are the specific factors contributing to ...,IMPOSSIBLE,False,True


In [5]:
metric = load("rouge")

In [6]:
import nltk
import numpy as np


def compute_metrics(pred, label):

    pred = ".\n".join(pred.split("\n"))
    label = ".\n".join(label.split("\n"))
    preds = [pred]
    labels = [label]
    # Rouge expects a newline after each sentence
    decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in preds]
    decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in labels]

    # Note that other metrics may not have a `use_aggregator` parameter
    # and thus will return a list, computing a metric for each sentence.
    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True, use_aggregator=True)
    # Extract a few results
    result = {key: value * 100 for key, value in result.items()}


    return {k: round(v, 4) for k, v in result.items()}

In [7]:
#metric.compute(predictions="test", references="test")

In [8]:
rouge_results = df.loc[(df.summary != "IMPOSSIBLE") & (df.predicted != "IMPOSSIBLE")].parallel_apply(lambda row: compute_metrics(row["predicted"], row["summary"]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=94), Label(value='0 / 94'))), HBox…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,44.7552,17.0213,30.7692,39.1608
1,35.9551,11.4943,24.7191,33.7079
2,45.0704,26.087,39.4366,42.2535
3,66.3462,48.5437,57.6923,62.5
4,56.8421,34.4086,50.5263,54.7368


In [9]:
rouge_results.to_excel('gs://scraped-news-article-data-null/mistral-final-sum-rouge.xlsx')

In [10]:
df = pd.read_parquet('gs://scraped-news-article-data-null/llama-chat-test-predicted.parquet')
df.loc[df.predicted.str.contains("IMPOSSIBLE", case=False), "predicted"] = "IMPOSSIBLE"
df.loc[df.predicted.str.contains("The context does not provide", case=False), "predicted"] = "IMPOSSIBLE"
df.head()

Unnamed: 0,body,summary,question,predicted
0,"This spring, millionaires felt relatively pess...","Millionaires see market volatility, inflation ...",What are the top three threats to personal wea...,"Based on the given context, the top three th..."
1,"TOKYO, Feb 7 (Reuters) - Toshiba Corp (6502.T)...","Toshiba now plans to split into two, bumps up ...",What businesses is Toshiba splitting off and s...,"Based on the given context, Toshiba is plann..."
2,"BERLIN, Jan 19 (Reuters) - Germany's Greens pa...",Germany's Greens must squeeze coalition on cli...,What did the Greens secure in exchange for sup...,"Based on the given context, the Greens secur..."
3,"PARIS/ROME, Nov 11 (Reuters) - A charity-run s...",IMPOSSIBLE,What are the potential economic and political ...,The potential economic and political implica...
4,Feb 24 (Reuters) - European shares slid on Fri...,IMPOSSIBLE,What factors contributed to the 0.4% shrinkage...,"Based on the provided context, the factors t..."


In [11]:
df.loc[df.summary == "IMPOSSIBLE", "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted == "IMPOSSIBLE", "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.54      0.84      0.65       500
        True       0.63      0.28      0.39       500

    accuracy                           0.56      1000
   macro avg       0.59      0.56      0.52      1000
weighted avg       0.59      0.56      0.52      1000



In [12]:
rouge_results = df.loc[(df.summary != "IMPOSSIBLE") & (df.predicted != "IMPOSSIBLE")].parallel_apply(lambda row: compute_metrics(row["predicted"], row["summary"]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=209), Label(value='0 / 209'))), HB…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,40.0,17.4863,27.027,36.7568
1,28.125,13.6842,23.9583,28.125
2,25.974,8.0,15.5844,20.7792
3,15.3846,0.0,11.5385,11.5385
4,20.0803,1.6194,12.0482,19.2771


In [13]:
rouge_results.to_excel('gs://scraped-news-article-data-null/llama-chat-rouge.xlsx')

In [14]:
df.loc[df.summary == "IMPOSSIBLE"].predicted.sample(20).tolist()

['IMPOSSIBLE',
 '  Based on the provided context, it is likely that the rise in heat-related deaths will have a significant impact on the insurance industry. Here are some potential implications:\n\n1. Increased claims: As heatwaves become more frequent and severe, there will likely be an increase in claims related to heat-related illnesses and deaths. This could result in higher payouts for insurance companies, which could lead to increased premiums for policyholders.\n2. Shift in risk assessment: As the frequency and severity of heatwaves increase, insurance companies may need to reassess their risk assessments and adjust their underwriting strategies to account for the changing climate. This could lead to higher premiums for policyholders in areas that are particularly vulnerable to heatwaves.\n3. New products and services: Insurance companies may need to develop new products and services to address the growing risk of heat-related illnesses and deaths. This could include new polici