In [6]:
import os
import json
import pandas as pd

import difflib
import nltk
import re

from rouge_s import py_rouge_scores
from bert_score import score

%matplotlib inline

In [7]:
name_method = {1: '1_bart_noprompt', 2: '2_bart_topic', 3: '3_bart_length', 
            4: '4_bart_topic_length', 5: '5_bart_contrastive_random', 6: '6_bart_contrastive_synonym',
            7: '7_bart_contrastive_combine', 8: '8_bart_contrastive_combine_word_tagger_dialog', 9: '9_bart_contrastive_combine_prompt_tagger_dialog',
            10: '5_bart_contrastive_random_negative', 11: '6_bart_contrastive_synonym_positive', 12: '7_bart_contrastive_combine_pos_neg',
            13: '4_bart_topic_length_word_tagger', 14: '4_bart_topic_length_word_tagger',
            15: '15_bart_topic_word_tagger', 16: '16_bart_topic_contrastive_combine', 17: '17_bart_topic_contrastive_combine_word_tagger'}

In [8]:
total_method = 17

# list_method = list(range(1 ,total_method + 1))
list_method = list(range(15 ,total_method + 1))

In [9]:
list_method

[15, 16, 17]

In [10]:
def load_gold_summary(file_path):
    """load result jsonl data"""

    data = []

    with open(file_path, "r") as f:
        for line in f:
            data.append(json.loads(line))

    id_list = [sample["fname"] for sample in data]
    dialogue_list = [sample["dialogue"] for sample in data]

    summary_list1 = [sample["summary1"] for sample in data]
    summary_list2 = [sample["summary2"] for sample in data]
    summary_list3 = [sample["summary3"] for sample in data]
    
    topic_list1 = [sample["topic1"] for sample in data]
    topic_list2 = [sample["topic2"] for sample in data]
    topic_list3 = [sample["topic3"] for sample in data]

    data_dict = {
    "fname": id_list,
    "dialogue": dialogue_list,
    "summary1": summary_list1,
    "summary1": summary_list1,
    "summary2": summary_list2,
    "summary3": summary_list3,
    "topic1": topic_list1,
    "topic2": topic_list2,
    "topic3": topic_list3,
    }

    return data_dict

In [11]:
def load_result(file_path):
    """load result jsonl data"""

    data = []

    with open(file_path, "r") as f:
        for line in f:
            data.append(json.loads(line))

    id_list = [sample["fname"] for sample in data]

    summary_list1 = [sample["gen_summary1"] for sample in data]
    summary_list2 = [sample["gen_summary2"] for sample in data]
    summary_list3 = [sample["gen_summary3"] for sample in data]

    data_dict = {
    "fname": id_list,
    "gen_summary1": summary_list1,
    "gen_summary2": summary_list2,
    "gen_summary3": summary_list3,
    }

    return data_dict

In [12]:
gold_summary = load_gold_summary(f"./data/dialogsum/dialogsum.test.jsonl")

In [13]:
gold_summary_df = pd.DataFrame.from_dict(gold_summary)

In [14]:
gold_summary_df.head()

Unnamed: 0,fname,dialogue,summary1,summary2,summary3,topic1,topic2,topic3
0,test_0,"#Person1#: Ms. Dawson, I need you to take a di...",Ms. Dawson helps #Person1# to write a memo to ...,In order to prevent employees from wasting tim...,Ms. Dawson takes a dictation for #Person1# abo...,communication method,company policy,dictation
1,test_1,#Person1#: You're finally here! What took so l...,#Person2# arrives late because of traffic jam....,#Person2# decides to follow #Person1#'s sugges...,#Person2# complains to #Person1# about the tra...,public transportation,transportation,discuss transportation
2,test_2,"#Person1#: Kate, you never believe what's happ...",#Person1# tells Kate that Masha and Hero get d...,#Person1# tells Kate that Masha and Hero are g...,#Person1# and Kate talk about the divorce betw...,divorce,divorce,discuss divorce
3,test_3,"#Person1#: Happy Birthday, this is for you, Br...",#Person1# and Brian are at the birthday party ...,#Person1# attends Brian's birthday party. Bria...,#Person1# has a dance with Brian at Brian's bi...,birthday party,birthday party,birthday party
4,test_4,#Person1#: This Olympic park is so big!\n#Pers...,#Person1# is surprised at the Olympic Stadium'...,#Person2# shows #Person1# around the construct...,#Person2# introduces the Olympic Stadium's fin...,Olympic Stadium,sports stadium,Olympic Stadium


In [15]:
# gold_summary_df.to_excel("dataset.xlsx") 

In [16]:
result_all_df = {}
for i in list_method:
    result_dict = load_result(f"./result/{i}.jsonl")
    result_df = pd.DataFrame.from_dict(result_dict)
    result_all_df[i] = result_df

In [17]:
# result_all_df[1].head()

In [18]:
num_method = 1

num_example = 1
gen_summary_num = 0

list_gold_summary = ['summary1', 'summary2', 'summary3']
list_gold_topic = ['topic1', 'topic2', 'topic3']
list_gen_summary = ['gen_summary1', 'gen_summary2', 'gen_summary3']

In [19]:
for i in list_method:
    # for num in range(len(list_gen_summary)):
    for num in range(len(list_gen_summary)):
        rouge_1_list = []
        rouge_2_list = []
        rouge_l_list = []
        gold_list = []
        gen_list = []
        gold_length_list = []
        gen_length_list = []
        diff_length_list = []
        # print(num)
        for idx in range(500):
            gold = gold_summary_df.loc[idx, list_gold_summary[num]]
            gen = result_all_df[i].loc[idx, list_gen_summary[num]]
            gold_len = len(gold.split(" "))
            gen_len = len(gen.split(" "))
            diff_len = gen_len - gold_len
            rouge_1 = py_rouge_scores(gold, gen)['rouge-1']['f']
            rouge_2 = py_rouge_scores(gold, gen)['rouge-2']['f']
            rouge_l = py_rouge_scores(gold, gen)['rouge-l']['f']
            rouge_1_list.append(rouge_1)
            rouge_2_list.append(rouge_2)
            rouge_l_list.append(rouge_l)
            gold_list.append(gold)
            gen_list.append(gen)
            gold_length_list.append(gold_len)
            gen_length_list.append(gen_len)
            diff_length_list.append(diff_len)
            # print(len(rouge_1_list))
        P, R, F1 = score(gold_list, gen_list, lang="en", verbose=True)
        bert_score = F1.tolist()
        result_all_df[i][f"gen{num+1}_rouge_1"] = rouge_1_list
        result_all_df[i][f"gen{num+1}_rouge_2"] = rouge_2_list
        result_all_df[i][f"gen{num+1}_rouge_l"] = rouge_l_list
        result_all_df[i][f"gen{num+1}_bert_score"] = bert_score
        result_all_df[i][f"gen{num+1}_gold_length"] = gold_length_list
        result_all_df[i][f"gen{num+1}_gen_length"] = gen_length_list
        result_all_df[i][f"gen{num+1}_diff_length"] = diff_length_list
        # break

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 9.22 seconds, 54.22 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 9.22 seconds, 54.24 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 8.22 seconds, 60.86 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 5.32 seconds, 94.06 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 4.70 seconds, 106.40 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 4.69 seconds, 106.66 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 9.31 seconds, 53.71 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 9.34 seconds, 53.54 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/8 [00:00<?, ?it/s]

done in 6.65 seconds, 75.21 sentences/sec


In [21]:
# result_all_df[1]

In [25]:
result_all_df[15]

Unnamed: 0,fname,gen_summary1,gen_summary2,gen_summary3,gen1_rouge_1,gen1_rouge_2,gen1_rouge_l,gen1_bert_score,gen1_gold_length,gen1_gen_length,...,gen2_gold_length,gen2_gen_length,gen2_diff_length,gen3_rouge_1,gen3_rouge_2,gen3_rouge_l,gen3_bert_score,gen3_gold_length,gen3_gen_length,gen3_diff_length
0,test_0,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take a dictation ...,#Person1# asks Ms. Dawson to take adictation f...,0.310680,0.039604,0.280912,0.880892,27,76,...,36,64,28,0.313725,0.140000,0.304386,0.863925,27,75,48
1,test_1,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic again. #Person1...,#Person2# got stuck in traffic again. #Person1...,0.361111,0.057143,0.376401,0.907797,22,49,...,18,35,17,0.384615,0.080000,0.377708,0.907712,17,35,18
2,test_2,#Person1# tells Kate Masha and Hero are gettin...,#Person1# tells Kate Masha and Hero are gettin...,#Person1# tells Kate Masha and Hero are gettin...,0.923077,0.756757,0.935516,0.976268,19,20,...,20,20,0,0.682927,0.358974,0.640040,0.941496,21,20,-1
3,test_3,#Person1# gives Brian a birthday party for him...,#Person1# gives Brian a birthday party for him...,#Person1# gives Brian a birthday party for him...,0.325581,0.048780,0.297108,0.896426,18,25,...,12,25,13,0.272727,0.047619,0.291308,0.900162,18,25,7
4,test_4,#Person1# and #Person2# are in the Olympic sta...,#Person1# and #Person2# are in the Olympic par...,#Person1# and #Person2# are in the Olympic sta...,0.363636,0.129032,0.370341,0.907939,13,19,...,12,27,15,0.363636,0.129032,0.370341,0.904505,13,19,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,test_495,Jack invites Charlie to his house to play a ne...,Jack invites Charlie to come to his house and ...,Jack invites Charlie to come to his house and ...,0.645161,0.551724,0.695312,0.943142,13,18,...,14,19,5,0.470588,0.187500,0.477852,0.921216,15,19,4
496,test_496,#Person2# tells #Person1# how #Person2 # got i...,#Person2# tells #Person1# how #Person2 # got i...,#Person2# tells #Person1# how #Person2 # got i...,0.685714,0.424242,0.730592,0.962334,16,20,...,15,20,5,0.340426,0.133333,0.365433,0.921195,27,20,-7
497,test_497,#Person1# asks Alice how to use the washing ma...,#Person1# asks Alice how to use the washing ma...,#Person1# asks Alice how to use the washing ma...,0.430380,0.129870,0.398069,0.894714,29,47,...,31,39,8,0.467532,0.293333,0.378570,0.902635,34,39,5
498,test_498,Steve tells Matthew he's looking for a place t...,Steve tells Matthew he's been looking for a pl...,Steve tells Matthew he's been looking for a pl...,0.412698,0.229508,0.484737,0.894851,17,42,...,28,35,7,0.435897,0.236842,0.428691,0.903670,27,47,20


In [26]:
# result_all_df[1].to_excel("result.xlsx",
#              sheet_name='1')  
result_all_df[15].to_excel("result_15_17.xlsx",
             sheet_name='15')  

In [27]:
for i in range(16, total_method+1):
    # with pd.ExcelWriter('result.xlsx',
    #                     mode='a') as writer:  
    #     result_all_df[i].to_excel(writer, sheet_name=f'{i}')
    with pd.ExcelWriter('result_15_17.xlsx',
                        mode='a') as writer:  
        result_all_df[i].to_excel(writer, sheet_name=f'{i}')

In [None]:
num_example = 7
gen_summary_num = 0

list_gold_summary = ['summary1', 'summary2', 'summary3']
list_gold_topic = ['topic1', 'topic2', 'topic3']
list_gen_summary = ['gen_summary1', 'gen_summary2', 'gen_summary3']

print("Gold Summary: ", gold_summary_df.loc[num_example, list_gold_summary[gen_summary_num]])
print("Gold Topic: ", gold_summary_df.loc[num_example, list_gold_topic[gen_summary_num]])
print("="*150)
for num_method in list_method:
    print("Experiment: ", name_method[num_method])
    
    # print("gen_summary1: ", result_all_df[num_method].loc[num_example, 'gen_summary1'])
    # print("gen_summary2: ", result_all_df[num_method].loc[num_example, 'gen_summary2'])
    # print("gen_summary3: ", result_all_df[num_method].loc[num_example, 'gen_summary3'])
    # print("-"*150)
    
    print(list_gen_summary[gen_summary_num], result_all_df[num_method].loc[num_example, list_gen_summary[gen_summary_num]])
    print("-"*100)