In [1]:
import pandas as pd
from pandarallel import pandarallel
from sklearn.metrics import classification_report
from finetune.common import create_summarization_metrics


pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
df = pd.read_parquet('gs://scraped-news-article-data-null/text-bison-test-predicted.parquet')
df["predicted"] = df.predicted.str.strip()
df.head()

Unnamed: 0,context,published,question,summary,predicted
0,"Published: 2022-12-16\nPARIS, Dec 16 (Reuters)...",2022-12-16 15:39:00+00:00,"How are French, German, and Polish crops expec...",French crops predicted to withstand wintry wea...,"* In France, the cold weather is not expected ..."
1,Published: 2023-10-25\n## In this article\nFol...,2023-10-25 20:12:41+00:00,What caused the surge in Endeavor stock?,Silver Lake's interest in taking Endeavor priv...,Endeavor stock surged after the company announ...
2,"Published: 2023-10-02\n""In mitigating climate ...",2023-10-02 11:54:30+00:00,What specific climate criticisms and accusatio...,Impossible to answer with given information,The fossil fuel industry has been accused of i...
3,Published: 2023-10-18\nA report published Tues...,2023-10-18 19:49:14+00:00,Which specific projects or companies will rece...,Impossible to answer with given information,Specific projects that will receive funding in...
4,"Published: 2022-03-04\nMeanwhile, a new law th...",2022-03-04 18:46:53+00:00,What were the reasons cited by Russia for bloc...,Russia blocks Meta-owned Facebook amid invasio...,- Russia's media regulator said it was blockin...


In [3]:
df["y"] = False
df["y_hat"] = False
df.loc[df.summary.str.contains("Impossible", case=False), "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted.str.contains("Impossible", case=False), "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.53      0.99      0.69       985
        True       0.96      0.19      0.32      1093

    accuracy                           0.57      2078
   macro avg       0.74      0.59      0.51      2078
weighted avg       0.76      0.57      0.50      2078



In [4]:
_, rouge_metric, _ = create_summarization_metrics(None, None)
pure_df = df.loc[(~df.summary.str.contains("Impossible", case=False)) &
                 (~df.predicted.str.contains("Impossible", case=False))]
rouge_results = pure_df.parallel_apply(lambda row: rouge_metric([row["summary"]], [row["predicted"]]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

[nltk_data] Downloading package punkt to /home/sdai/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/sdai/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /home/sdai/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=163), Label(value='0 / 163'))), HB…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,23.6842,2.7027,15.7895,21.0526
1,33.3333,15.7895,20.5128,23.0769
2,42.4242,10.3093,22.2222,36.3636
3,31.8182,23.8095,31.8182,31.8182
4,33.3333,17.0732,26.1905,26.1905


In [5]:
rouge_results.to_excel('gs://scraped-news-article-data-null/text-bison-rouge.xlsx')

In [2]:
df = pd.read_parquet('gs://scraped-news-article-data-null/mistral-sum-test-predicted.parquet')
df["predicted"] = df.predicted.str.strip()
df.head()

Unnamed: 0,body,published,question,summary,predicted
0,"Published: 2022-12-16\nPARIS, Dec 16 (Reuters)...",2022-12-16 15:39:00+00:00,"How are French, German, and Polish crops expec...",French crops predicted to withstand wintry wea...,"Wintry weather poses minimal risk to French, G..."
1,Published: 2023-10-25\n## In this article\nFol...,2023-10-25 20:12:41+00:00,What caused the surge in Endeavor stock?,Silver Lake's interest in taking Endeavor priv...,Endeavor stock surges as major shareholder Sil...
2,"Published: 2023-10-02\n""In mitigating climate ...",2023-10-02 11:54:30+00:00,What specific climate criticisms and accusatio...,Impossible to answer with given information,Impossible to answer with given information
3,Published: 2023-10-18\nA report published Tues...,2023-10-18 19:49:14+00:00,Which specific projects or companies will rece...,Impossible to answer with given information,Impossible to answer with given information
4,"Published: 2022-03-04\nMeanwhile, a new law th...",2022-03-04 18:46:53+00:00,What were the reasons cited by Russia for bloc...,Russia blocks Meta-owned Facebook amid invasio...,Russia blocks Meta-owned Facebook in the count...


In [3]:
from sklearn.metrics import classification_report

df["y"] = False
df["y_hat"] = False
df.loc[df.summary.str.contains("Impossible", case=False), "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted.str.contains("Impossible", case=False), "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.95      0.94      0.94       985
        True       0.94      0.95      0.95      1093

    accuracy                           0.94      2078
   macro avg       0.94      0.94      0.94      2078
weighted avg       0.94      0.94      0.94      2078



In [4]:
df

Unnamed: 0,body,published,question,summary,predicted,y,y_hat
0,"Published: 2022-12-16\nPARIS, Dec 16 (Reuters)...",2022-12-16 15:39:00+00:00,"How are French, German, and Polish crops expec...",French crops predicted to withstand wintry wea...,"Wintry weather poses minimal risk to French, G...",False,False
1,Published: 2023-10-25\n## In this article\nFol...,2023-10-25 20:12:41+00:00,What caused the surge in Endeavor stock?,Silver Lake's interest in taking Endeavor priv...,Endeavor stock surges as major shareholder Sil...,False,False
2,"Published: 2023-10-02\n""In mitigating climate ...",2023-10-02 11:54:30+00:00,What specific climate criticisms and accusatio...,Impossible to answer with given information,Impossible to answer with given information,True,True
3,Published: 2023-10-18\nA report published Tues...,2023-10-18 19:49:14+00:00,Which specific projects or companies will rece...,Impossible to answer with given information,Impossible to answer with given information,True,True
4,"Published: 2022-03-04\nMeanwhile, a new law th...",2022-03-04 18:46:53+00:00,What were the reasons cited by Russia for bloc...,Russia blocks Meta-owned Facebook amid invasio...,Russia blocks Meta-owned Facebook in the count...,False,False
...,...,...,...,...,...,...,...
2073,"Published: 2022-07-03\nALMATY, July 3 (Reuters...",2017-09-01 12:35:39+00:00,What types of products are recommended to shop...,Impossible to answer with given information,Impossible to answer with given information,True,True
2074,"Published: 2022-11-21\nNEW YORK, Nov 21 (Reute...",2023-04-19 19:37:40+00:00,How will the defamation lawsuits filed against...,Impossible to answer with given information,Impossible to answer with given information,True,True
2075,Published: 2022-03-08\n## In this article\nFol...,2020-04-07 20:14:20+00:00,How many paying customers does Google's G Suit...,Impossible to answer with given information,Impossible to answer with given information,True,True
2076,Published: 2023-10-18\nAnalyst Jos Versteeg of...,2022-07-13 23:57:39+00:00,How are the wildfires in Portugal and Spain af...,Impossible to answer with given information,Impossible to answer with given information,True,True


In [5]:
import finetune.config as config
from finetune.common import create_summarization_metrics

_, rouge_metric, _ = create_summarization_metrics(None, None)
pure_df = df.loc[(~df.summary.str.contains("Impossible", case=False)) &
                 (~df.predicted.str.contains("Impossible", case=False))]
rouge_results = pure_df.parallel_apply(lambda row: rouge_metric([row["summary"]], [row["predicted"]]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

[nltk_data] Downloading package punkt to /home/sdai/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/sdai/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /home/sdai/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=154), Label(value='0 / 154'))), HB…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,46.3415,12.5,26.8293,26.8293
1,59.3407,31.4607,50.5495,50.5495
2,44.6809,19.5652,31.9149,38.2979
3,45.0,25.641,32.5,35.0
4,50.4673,26.6667,41.1215,41.1215


In [6]:
rouge_results.to_excel('gs://scraped-news-article-data-null/mistral-sum-rouge.xlsx')

In [7]:
df = pd.read_parquet('gs://scraped-news-article-data-null/llama-chat-test-predicted.parquet')
df.loc[df.predicted.str.contains("IMPOSSIBLE", case=False), "predicted"] = "IMPOSSIBLE"
df.loc[df.predicted.str.contains("The context does not provide", case=False), "predicted"] = "IMPOSSIBLE"
df.head()

Unnamed: 0,body,published,question,summary,predicted
0,"Published: 2022-12-16\nPARIS, Dec 16 (Reuters)...",2022-12-16 15:39:00+00:00,"How are French, German, and Polish crops expec...",French crops predicted to withstand wintry wea...,"Based on the provided context, it is possibl..."
1,Published: 2023-10-25\n## In this article\nFol...,2023-10-25 20:12:41+00:00,What caused the surge in Endeavor stock?,Silver Lake's interest in taking Endeavor priv...,IMPOSSIBLE
2,"Published: 2023-10-02\n""In mitigating climate ...",2023-10-02 11:54:30+00:00,What specific climate criticisms and accusatio...,Impossible to answer with given information,"Based on the given context, the specific cli..."
3,Published: 2023-10-18\nA report published Tues...,2023-10-18 19:49:14+00:00,Which specific projects or companies will rece...,Impossible to answer with given information,IMPOSSIBLE
4,"Published: 2022-03-04\nMeanwhile, a new law th...",2022-03-04 18:46:53+00:00,What were the reasons cited by Russia for bloc...,Russia blocks Meta-owned Facebook amid invasio...,IMPOSSIBLE


In [8]:
df["y"] = False
df["y_hat"] = False
df.loc[df.summary.str.contains("IMPOSSIBLE", case=False), "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted == "IMPOSSIBLE", "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.63      0.48      0.55       985
        True       0.62      0.75      0.68      1093

    accuracy                           0.62      2078
   macro avg       0.62      0.61      0.61      2078
weighted avg       0.62      0.62      0.61      2078



In [9]:
pure_df = df.loc[df.y == df.y_hat]
rouge_results = pure_df.parallel_apply(lambda row: rouge_metric([row["predicted"]], [row["summary"]]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=216), Label(value='0 / 216'))), HB…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,21.2766,2.8777,12.766,18.4397
1,40.0,0.0,40.0,40.0
2,40.0,0.0,40.0,40.0
3,25.4237,10.3448,15.2542,20.339
4,40.0,0.0,40.0,40.0


In [10]:
rouge_results.to_excel('gs://scraped-news-article-data-null/llama-chat-rouge.xlsx')

In [11]:
df = pd.read_parquet('gs://scraped-news-article-data-null/orca-mistral-test-predicted.parquet')
df.loc[df.predicted.str.contains("IMPOSSIBLE", case=False), "predicted"] = "IMPOSSIBLE"
df.loc[df.predicted.str.contains("does not provide", case=False), "predicted"] = "IMPOSSIBLE"
df.head()

Unnamed: 0,body,published,question,summary,predicted
0,"Published: 2022-12-16\nPARIS, Dec 16 (Reuters)...",2022-12-16 15:39:00+00:00,"How are French, German, and Polish crops expec...",French crops predicted to withstand wintry wea...,French crops are expected to be unaffected by...
1,Published: 2023-10-25\n## In this article\nFol...,2023-10-25 20:12:41+00:00,What caused the surge in Endeavor stock?,Silver Lake's interest in taking Endeavor priv...,The surge in Endeavor stock was caused by the...
2,"Published: 2023-10-02\n""In mitigating climate ...",2023-10-02 11:54:30+00:00,What specific climate criticisms and accusatio...,Impossible to answer with given information,IMPOSSIBLE
3,Published: 2023-10-18\nA report published Tues...,2023-10-18 19:49:14+00:00,Which specific projects or companies will rece...,Impossible to answer with given information,The Biden Administration is investing $3.46 b...
4,"Published: 2022-03-04\nMeanwhile, a new law th...",2022-03-04 18:46:53+00:00,What were the reasons cited by Russia for bloc...,Russia blocks Meta-owned Facebook amid invasio...,Russia's media regulator blocked Meta-owned F...


In [12]:
df["y"] = False
df["y_hat"] = False
df.loc[df.summary.str.contains("IMPOSSIBLE", case=False), "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted == "IMPOSSIBLE", "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.58      0.95      0.72       985
        True       0.90      0.39      0.55      1093

    accuracy                           0.66      2078
   macro avg       0.74      0.67      0.63      2078
weighted avg       0.75      0.66      0.63      2078



In [13]:
pure_df = df.loc[df.y == df.y_hat]
rouge_results = pure_df.parallel_apply(lambda row: rouge_metric([row["predicted"]], [row["summary"]]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=228), Label(value='0 / 228'))), HB…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,28.5714,12.5,20.4082,28.5714
1,34.2105,16.2162,23.6842,28.9474
2,40.0,0.0,40.0,40.0
3,37.6471,21.6867,28.2353,35.2941
4,43.1373,28.5714,35.2941,43.1373


In [14]:
rouge_results.to_excel('gs://scraped-news-article-data-null/orca-mistral-rouge.xlsx')

In [2]:
df = pd.read_parquet('gs://scraped-news-article-data-null/llama-chat-qa-test-predicted.parquet')
df.loc[df.predicted.str.contains("IMPOSSIBLE", case=False), "predicted"] = "IMPOSSIBLE"
df.loc[df.predicted.str.contains("The context does not provide", case=False), "predicted"] = "IMPOSSIBLE"
df.head()

Unnamed: 0,question,context,answer,predicted
0,How are dilutive RSUs calculated?,In periods with a net income from continuing o...,The calculation of dilutive RSUs is based on t...,"According to the given context, dilutive RSU..."
1,What does the table show?,The income tax expense consists of:,The table illustrates the income tax expense.,The table shows the income tax expense for t...
2,What is a debutante and what does she do?,"A debutante, also spelled débutante, (/ˈdɛbjʊt...",A debutante is a young woman of aristocratic o...,A debutante is a young woman of aristocratic...
3,"What are ""chemtrails"" and why are some people ...",Reading the chemtrail conspiracy really remind...,Chemtrails refers to the theory that governmen...,"Based on the given context, ""chemtrails"" ref..."
4,"As of December 2019, which is the best perform...",The following graph compares the cumulative to...,"Answer: As of December 2019, the best performi...",Based on the information provided in the con...


In [4]:
df["y"] = False
df["y_hat"] = False
df.loc[df.answer.str.contains("IMPOSSIBLE", case=False), "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted == "IMPOSSIBLE", "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.95      0.99      0.97      1986
        True       0.27      0.08      0.12       114

    accuracy                           0.94      2100
   macro avg       0.61      0.53      0.55      2100
weighted avg       0.91      0.94      0.92      2100



In [6]:
_, rouge_metric, _ = create_summarization_metrics(None, None)
pure_df = df.loc[df.y == df.y_hat]
rouge_results = pure_df.parallel_apply(lambda row: rouge_metric([row["predicted"]], [row["answer"]]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

[nltk_data] Downloading package punkt to /home/sdai/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/sdai/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /home/sdai/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=329), Label(value='0 / 329'))), HB…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,52.1739,38.0952,52.1739,52.1739
1,40.0,22.2222,40.0,40.0
2,72.1649,52.6316,70.1031,70.1031
3,30.6748,12.4224,20.8589,28.2209
4,40.0,22.2222,30.0,30.0


In [7]:
rouge_results.to_excel('gs://scraped-news-article-data-null/llama-chat-qa-rouge.xlsx')

In [8]:
df = pd.read_parquet('gs://scraped-news-article-data-null/orca-mistral-qa-test-predicted.parquet')
df.loc[df.predicted.str.contains("IMPOSSIBLE", case=False), "predicted"] = "IMPOSSIBLE"
df.loc[df.predicted.str.contains("The context does not provide", case=False), "predicted"] = "IMPOSSIBLE"
df.head()

Unnamed: 0,question,context,answer,predicted
0,How are dilutive RSUs calculated?,In periods with a net income from continuing o...,The calculation of dilutive RSUs is based on t...,Dilutive RSUs are calculated based on the ave...
1,What does the table show?,The income tax expense consists of:,The table illustrates the income tax expense.,The table shows the components of income tax ...
2,What is a debutante and what does she do?,"A debutante, also spelled débutante, (/ˈdɛbjʊt...",A debutante is a young woman of aristocratic o...,A debutante is a young woman of aristocratic ...
3,"What are ""chemtrails"" and why are some people ...",Reading the chemtrail conspiracy really remind...,Chemtrails refers to the theory that governmen...,Chemtrails refer to the theory that governmen...
4,"As of December 2019, which is the best perform...",The following graph compares the cumulative to...,"Answer: As of December 2019, the best performi...","As of December 2019, the best performing stoc..."


In [9]:
df["y"] = False
df["y_hat"] = False
df.loc[df.answer.str.contains("IMPOSSIBLE", case=False), "y"] = True
df["y"] = df.y.fillna(False)
df.loc[df.predicted == "IMPOSSIBLE", "y_hat"] = True
df["y_hat"] = df.y_hat.fillna(False)
print(classification_report(df.y, df.y_hat))

              precision    recall  f1-score   support

       False       0.95      0.99      0.97      1986
        True       0.04      0.01      0.01       114

    accuracy                           0.93      2100
   macro avg       0.49      0.50      0.49      2100
weighted avg       0.90      0.93      0.91      2100



In [10]:
pure_df = df.loc[df.y == df.y_hat]
rouge_results = pure_df.parallel_apply(lambda row: rouge_metric([row["predicted"]], [row["answer"]]), axis=1)
rouge_results = pd.DataFrame(rouge_results.tolist())
rouge_results.head()

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=328), Label(value='0 / 328'))), HB…

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
0,92.3077,83.3333,92.3077,92.3077
1,72.7273,44.4444,72.7273,72.7273
2,46.729,30.4762,41.1215,46.729
3,60.9524,48.0769,56.1905,60.9524
4,47.0588,40.0,47.0588,47.0588


In [11]:
rouge_results.to_excel('gs://scraped-news-article-data-null/orca-mistral-qa-rouge.xlsx')