In [52]:
import os
import json
import pandas as pd

import difflib
import nltk
import re

from rouge_s import py_rouge_scores
from bert_score import score

%matplotlib inline

In [53]:
name_method = {'1': '1_bart_noprompt', '2': '2_bart_topic', '3': '3_bart_length', 
               '4': '4_bart_topic_length', '5': '5_bart_contrastive_random', '6': '6_bart_contrastive_synonym',
               '7': '7_bart_contrastive_combine', '8': '8_bart_contrastive_combine_word_tagger', '9': '9_bart-contrastive-combine-prompt-tagger'}

In [54]:
total_method = 9

list_method = list(range(1 ,total_method + 1))

In [55]:
def load_gold_summary(file_path):
    """load result jsonl data"""

    data = []

    with open(file_path, "r") as f:
        for line in f:
            data.append(json.loads(line))

    id_list = [sample["fname"] for sample in data]
    dialogue_list = [sample["dialogue"] for sample in data]

    summary_list1 = [sample["summary1"] for sample in data]
    summary_list2 = [sample["summary2"] for sample in data]
    summary_list3 = [sample["summary3"] for sample in data]
    
    topic_list1 = [sample["topic1"] for sample in data]
    topic_list2 = [sample["topic2"] for sample in data]
    topic_list3 = [sample["topic3"] for sample in data]

    data_dict = {
    "fname": id_list,
    "dialogue": dialogue_list,
    "summary1": summary_list1,
    "summary1": summary_list1,
    "summary2": summary_list2,
    "summary3": summary_list3,
    "topic1": topic_list1,
    "topic2": topic_list2,
    "topic3": topic_list3,
    }

    return data_dict

In [56]:
def load_result(file_path):
    """load result jsonl data"""

    data = []

    with open(file_path, "r") as f:
        for line in f:
            data.append(json.loads(line))

    id_list = [sample["fname"] for sample in data]

    summary_list1 = [sample["gen_summary1"] for sample in data]
    summary_list2 = [sample["gen_summary2"] for sample in data]
    summary_list3 = [sample["gen_summary3"] for sample in data]

    data_dict = {
    "fname": id_list,
    "gen_summary1": summary_list1,
    "gen_summary2": summary_list2,
    "gen_summary3": summary_list3,
    }

    return data_dict

In [57]:
gold_summary = load_gold_summary(f"./data/dialogsum/dialogsum.test.jsonl")

In [58]:
gold_summary_df = pd.DataFrame.from_dict(gold_summary)

In [59]:
gold_summary_df.head()

Unnamed: 0,fname,dialogue,summary1,summary2,summary3,topic1,topic2,topic3
0,test_0,"#Person1#: Ms. Dawson, I need you to take a di...",Ms. Dawson helps #Person1# to write a memo to ...,In order to prevent employees from wasting tim...,Ms. Dawson takes a dictation for #Person1# abo...,communication method,company policy,dictation
1,test_1,#Person1#: You're finally here! What took so l...,#Person2# arrives late because of traffic jam....,#Person2# decides to follow #Person1#'s sugges...,#Person2# complains to #Person1# about the tra...,public transportation,transportation,discuss transportation
2,test_2,"#Person1#: Kate, you never believe what's happ...",#Person1# tells Kate that Masha and Hero get d...,#Person1# tells Kate that Masha and Hero are g...,#Person1# and Kate talk about the divorce betw...,divorce,divorce,discuss divorce
3,test_3,"#Person1#: Happy Birthday, this is for you, Br...",#Person1# and Brian are at the birthday party ...,#Person1# attends Brian's birthday party. Bria...,#Person1# has a dance with Brian at Brian's bi...,birthday party,birthday party,birthday party
4,test_4,#Person1#: This Olympic park is so big!\n#Pers...,#Person1# is surprised at the Olympic Stadium'...,#Person2# shows #Person1# around the construct...,#Person2# introduces the Olympic Stadium's fin...,Olympic Stadium,sports stadium,Olympic Stadium


In [60]:
gold_summary_df.to_excel("dataset.xlsx") 

In [61]:
result_all_df = {}
for i in list_method:
    result_dict = load_result(f"./result/{i}.jsonl")
    result_df = pd.DataFrame.from_dict(result_dict)
    result_all_df[i] = result_df

In [62]:
result_all_df[1].head()

Unnamed: 0,fname,gen_summary1,gen_summary2,gen_summary3
0,test_0,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...
1,test_1,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic again. #Person1...
2,test_2,#Person1# tells Kate that Masha and Hero are g...,#Person1# tells Kate that Masha and Hero are g...,#Person1# tells Kate that Masha and Hero are g...
3,test_3,#Person1# celebrates Brian's birthday and danc...,#Person1# celebrates Brian's birthday and danc...,#Person1# celebrates Brian's birthday and danc...
4,test_4,#Person1# and #Person2# are in the Olympic sta...,#Person1# and #Person2# are in the Olympic sta...,#Person1# and #Person2# are in the Olympic sta...


In [63]:
num_method = 1

num_example = 1
gen_summary_num = 0

list_gold_summary = ['summary1', 'summary2', 'summary3']
list_gold_topic = ['topic1', 'topic2', 'topic3']
list_gen_summary = ['gen_summary1', 'gen_summary2', 'gen_summary3']

In [64]:
for i in list_method:
    for num in range(len(list_gen_summary)):
        rouge_1_list = []
        rouge_2_list = []
        rouge_l_list = []
        gold_list = []
        gen_list = []
        # print(num)
        for idx in range(500):
            gold = gold_summary_df.loc[idx, list_gold_summary[num]]
            gen = result_all_df[i].loc[idx, list_gen_summary[num]]
            rouge_1 = py_rouge_scores(gold, gen)['rouge-1']['f']
            rouge_2 = py_rouge_scores(gold, gen)['rouge-2']['f']
            rouge_l = py_rouge_scores(gold, gen)['rouge-l']['f']
            rouge_1_list.append(rouge_1)
            rouge_2_list.append(rouge_2)
            rouge_l_list.append(rouge_l)
            gold_list.append(gold)
            gen_list.append(gen)
            # print(len(rouge_1_list))
        P, R, F1 = score(gold_list, gen_list, lang="en", verbose=True)
        bert_score = F1.tolist()
        result_all_df[i][f"gen{num+1}_rouge_1"] = rouge_1_list
        result_all_df[i][f"gen{num+1}_rouge_2"] = rouge_2_list
        result_all_df[i][f"gen{num+1}_rouge_l"] = rouge_l_list
        result_all_df[i][f"gen{num+1}_bert_score"] = bert_score

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.53 seconds, 327.32 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.52 seconds, 329.33 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.50 seconds, 334.00 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.50 seconds, 332.47 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.49 seconds, 336.16 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.50 seconds, 334.26 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.46 seconds, 342.93 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.44 seconds, 348.01 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.44 seconds, 347.97 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 2.59 seconds, 192.85 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 2.60 seconds, 192.67 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 2.77 seconds, 180.23 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.56 seconds, 320.88 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.50 seconds, 332.30 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.48 seconds, 338.84 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.44 seconds, 347.85 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.42 seconds, 351.25 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.41 seconds, 355.10 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.48 seconds, 337.93 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.46 seconds, 342.71 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.46 seconds, 343.18 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.51 seconds, 330.23 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.51 seconds, 330.71 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.57 seconds, 318.17 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.49 seconds, 335.23 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.47 seconds, 340.15 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 1.44 seconds, 346.56 sentences/sec


In [65]:
result_all_df[1]

Unnamed: 0,fname,gen_summary1,gen_summary2,gen_summary3,gen1_rouge_1,gen1_rouge_2,gen1_rouge_l,gen1_bert_score,gen2_rouge_1,gen2_rouge_2,gen2_rouge_l,gen2_bert_score,gen3_rouge_1,gen3_rouge_2,gen3_rouge_l,gen3_bert_score
0,test_0,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...,0.350000,0.051282,0.317362,0.891681,0.426966,0.160920,0.239912,0.890968,0.425000,0.230769,0.394919,0.884799
1,test_1,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic again. #Person1...,0.400000,0.052632,0.466322,0.910078,0.486486,0.171429,0.444932,0.917976,0.514286,0.181818,0.466024,0.919479
2,test_2,#Person1# tells Kate that Masha and Hero are g...,#Person1# tells Kate that Masha and Hero are g...,#Person1# tells Kate that Masha and Hero are g...,0.871795,0.702703,0.892000,0.971663,0.650000,0.526316,0.698384,0.963978,0.634146,0.205128,0.549823,0.936696
3,test_3,#Person1# celebrates Brian's birthday and danc...,#Person1# celebrates Brian's birthday and danc...,#Person1# celebrates Brian's birthday and danc...,0.342857,0.000000,0.292332,0.903291,0.466667,0.142857,0.400809,0.913248,0.555556,0.176471,0.343960,0.919319
4,test_4,#Person1# and #Person2# are in the Olympic sta...,#Person1# and #Person2# are in the Olympic sta...,#Person1# and #Person2# are in the Olympic sta...,0.344828,0.148148,0.411817,0.899916,0.444444,0.160000,0.437425,0.927322,0.413793,0.148148,0.411817,0.904353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,test_495,Jack invites Charlie to come to his house and ...,Jack invites Charlie to come to his house and ...,Jack invites Charlie to come to his house and ...,0.689655,0.296296,0.672541,0.954458,0.533333,0.214286,0.592425,0.946067,0.580645,0.275862,0.576319,0.935164
496,test_496,#Person2# tells #Person1# how #Person 2# got i...,#Person2# tells #Person1# how #Person 2# got i...,#Person2# tells #Person1# how #Person 2# got i...,0.611111,0.294118,0.663958,0.963076,0.277778,0.058824,0.285779,0.889773,0.291667,0.130435,0.315593,0.921176
497,test_497,Alice shows #Person1# how to use the washing m...,Alice shows #Person1# how to use the washing m...,Alice shows #Person1# how to use the washing m...,0.387097,0.166667,0.389635,0.899229,0.484848,0.187500,0.400312,0.888136,0.441176,0.212121,0.390688,0.886449
498,test_498,Matthew and Steve haven't seen each other for ...,Matthew and Steve haven't seen each other for ...,Matthew and Steve haven't seen each other for ...,0.428571,0.222222,0.463273,0.905874,0.507463,0.276923,0.512569,0.919481,0.584615,0.412698,0.526070,0.917013


In [66]:
result_all_df[1].to_excel("result.xlsx",
             sheet_name='1')  

In [67]:
for i in range(2, total_method+1):
    with pd.ExcelWriter('result.xlsx',
                        mode='a') as writer:  
        result_all_df[i].to_excel(writer, sheet_name=f'{i}')

In [50]:
# import torch
# gen_dict = {}
# for i in list_method:
#     metric = {}
#     gold_1 = gold_summary_df['summary1'].tolist()
#     gold_2 = gold_summary_df['summary2'].tolist()
#     gold_3 = gold_summary_df['summary3'].tolist()
#     gen_1 = result_all_df[i]['gen_summary1'].tolist()
#     gen_2 = result_all_df[i]['gen_summary2'].tolist()
#     gen_3 = result_all_df[i]['gen_summary3'].tolist()
#     P_1, R_1, F1_1 = score(gold_1, gen_1, lang="en", verbose=True)
#     P_2, R_2, F1_2 = score(gold_2, gen_2, lang="en", verbose=True)
#     P_3, R_3, F1_3 = score(gold_3, gen_3, lang="en", verbose=True)
#     metric = {'bert_score_1' : F1_1,
#               'bert_score_2' : F1_2,
#               'bert_score_3' : F1_3}
#     gen_dict[i] = metric

In [51]:
# for i in list_method:
#     print(name_method[str(i)])
#     print((torch.mean(gen_dict[i]['bert_score_1']).item() + torch.mean(gen_dict[i]['bert_score_1']).item() + torch.mean(gen_dict[i]['bert_score_1']).item())/3)

In [33]:
torch.mean(F1)

tensor(0.9195)

In [118]:
num_example = 7
gen_summary_num = 0

list_gold_summary = ['summary1', 'summary2', 'summary3']
list_gold_topic = ['topic1', 'topic2', 'topic3']
list_gen_summary = ['gen_summary1', 'gen_summary2', 'gen_summary3']

print("Gold Summary: ", gold_summary_df.loc[num_example, list_gold_summary[gen_summary_num]])
print("Gold Topic: ", gold_summary_df.loc[num_example, list_gold_topic[gen_summary_num]])
print("="*150)
for num_method in list_method:
    print("Experiment: ", name_method[str(num_method)])
    
    # print("gen_summary1: ", result_all_df[num_method].loc[num_example, 'gen_summary1'])
    # print("gen_summary2: ", result_all_df[num_method].loc[num_example, 'gen_summary2'])
    # print("gen_summary3: ", result_all_df[num_method].loc[num_example, 'gen_summary3'])
    # print("-"*150)
    
    print(list_gen_summary[gen_summary_num], result_all_df[num_method].loc[num_example, list_gen_summary[gen_summary_num]])
    print("-"*150)

Gold Summary:  #Person2# is checking out and asks #Person1# for the bill. #Person1# gives #Person2# a wrong bill at first then corrects it.
Gold Topic:  bill
Experiment:  1_bart_noprompt
gen_summary1 #Person1# helps #Person2# check out and finds the charge for laundry service on Nov. 20th has been added to someone else's. They'll correct the bill.
------------------------------------------------------------------------------------------------------------------------------------------------------
Experiment:  2_bart_topic
gen_summary1 #Person2# is checking out and finds the bill is wrong. #Person1# will correct it with the department concerned.
------------------------------------------------------------------------------------------------------------------------------------------------------
Experiment:  3_bart_length
gen_summary1 #Person1# helps #Person2# check out and finds the charge for laundry service on Nov. 20th has been added to someone else's bill.
----------------------------

In [127]:
length_prompt = "#Person2# is checking out and finds the bill is wrong. #Person1# will correct it with the department concerned."

In [128]:
len(length_prompt.split(" "))

18

In [46]:
'''
Find the matching substrings in 2 strings.
:parameter
    :param a: string - raw text
    :param b: string - raw text
:return
    2 lists used in to display matches
'''
def utils_split_sentences(a, b):
    ## find clean matches
    match = difflib.SequenceMatcher(isjunk=None, a=a, b=b, autojunk=True)
    lst_match = [block for block in match.get_matching_blocks() if block.size > 20]

    ## difflib didn't find any match
    if len(lst_match) == 0:
        lst_a, lst_b = nltk.sent_tokenize(a), nltk.sent_tokenize(b)

    ## work with matches
    else:
        first_m, last_m = lst_match[0], lst_match[-1]

        ### a
        string = a[0 : first_m.a]
        lst_a = [t for t in nltk.sent_tokenize(string)]
        for n in range(len(lst_match)):
            m = lst_match[n]
            string = a[m.a : m.a+m.size]
            lst_a.append(string)
            if n+1 < len(lst_match):
                next_m = lst_match[n+1]
                string = a[m.a+m.size : next_m.a]
                lst_a = lst_a + [t for t in nltk.sent_tokenize(string)]
            else:
                break
        string = a[last_m.a+last_m.size :]
        lst_a = lst_a + [t for t in nltk.sent_tokenize(string)]

        ### b
        string = b[0 : first_m.b]
        lst_b = [t for t in nltk.sent_tokenize(string)]
        for n in range(len(lst_match)):
            m = lst_match[n]
            string = b[m.b : m.b+m.size]
            lst_b.append(string)
            if n+1 < len(lst_match):
                next_m = lst_match[n+1]
                string = b[m.b+m.size : next_m.b]
                lst_b = lst_b + [t for t in nltk.sent_tokenize(string)]
            else:
                break
        string = b[last_m.b+last_m.size :]
        lst_b = lst_b + [t for t in nltk.sent_tokenize(string)]

    return lst_a, lst_b


'''
Highlights the matched strings in text.
:parameter
    :param a: string - raw text
    :param b: string - raw text
    :param both: bool - search a in b and, if True, viceversa
    :param sentences: bool - if False matches single words
:return
    text html, it can be visualized on notebook with display(HTML(text))
'''
def display_string_matching(a, b, both=True, sentences=True, titles=[]):
    if sentences is True:
        lst_a, lst_b = utils_split_sentences(a, b)
    else:
        lst_a, lst_b = a.split(), b.split()

    ## highlight a
    first_text = []
    for i in lst_a:
        if re.sub(r'[^\w\s]', '', i.lower()) in [re.sub(r'[^\w\s]', '', z.lower()) for z in lst_b]:
            first_text.append('<span style="background-color:rgba(255,215,0,0.1);">' + i + '</span>')
        else:
            first_text.append(i)
    first_text = ' '.join(first_text)

    ## highlight b
    second_text = []
    if both is True:
        for i in lst_b:
            if re.sub(r'[^\w\s]', '', i.lower()) in [re.sub(r'[^\w\s]', '', z.lower()) for z in lst_a]:
                second_text.append('<span style="background-color:rgba(255,215,0,0.1);">' + i + '</span>')
            else:
                second_text.append(i)
    else:
        second_text.append(b)
    second_text = ' '.join(second_text)

    ## concatenate
    if len(titles) > 0:
        first_text = "<strong>"+titles[0]+"</strong><br>"+first_text
    if len(titles) > 1:
        second_text = "<strong>"+titles[1]+"</strong><br>"+second_text
    else:
        second_text = "---"*65+"<br><br>"+second_text
    final_text = first_text +'<br><br>'+ second_text
    return final_text

In [47]:
list_method = list(range(1 ,total_method + 1))
result_all_df = {}
for i in list_method:
    print(i)

1
2
3
4
5
6
7
8
9


In [None]:
match = display_string_matching(text_a, text_b, both=True, sentences=False, titles=["Full Text", "Actual Text"])
from IPython.core.display import display, HTML
display(HTML(match))