In [14]:
from os import listdir
from os.path import isfile, join
from sumeval.metrics.rouge import RougeCalculator
import numpy as np

# example

In [2]:

from sumeval.metrics.rouge import RougeCalculator
rouge = RougeCalculator(stopwords=True, lang="en")

rouge_1 = rouge.rouge_n(
            summary="I went to the Mars from my living town.",
            references="I went to Mars",
            n=1)

rouge_2 = rouge.rouge_n(
            summary="I went to the Mars from my living town.",
            references=["I went to Mars", "It's my living town"],
            n=2)

rouge_l = rouge.rouge_l(
            summary="I went to the Mars from my living town.",
            references=["I went to Mars", "It's my living town"])

# You need spaCy to calculate ROUGE-BE

rouge_be = rouge.rouge_be(
            summary="I went to the Mars from my living town.",
            references=["I went to Mars", "It's my living town"])

print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format(
    rouge_1, rouge_2, rouge_l, rouge_be
).replace(", ", "\n"))
    

ROUGE-1: 0.5
ROUGE-2: 0.4
ROUGE-L: 0.6666666666666666
ROUGE-BE: 0.4


In [15]:
# relationship small dataset
path_pre = "/home/ubuntu/cs224u/processed_relationships/baseline"
ref_path = path_pre + '/reference'
reference_files = [f for f in listdir(ref_path) if isfile(join(ref_path, f))]
dec_path = path_pre + '/decoded'
decoded_files = [f for f in listdir(dec_path) if isfile(join(dec_path, f))]


In [3]:
reference_files

['t3_4bcuhi_reference.txt',
 't3_3yzftu_reference.txt',
 '6ejvy9_reference.txt',
 'd5axozy_reference.txt',
 't3_29tth3_reference.txt',
 'di3h5hh_reference.txt',
 'd62d0ho_reference.txt',
 '64tr5t_reference.txt',
 't3_1udtww_reference.txt']

In [36]:
def computeRougeScore(path_pre, alpha_input):
    '''
    input: a working directory indicate where the reference and decode files are
    '''
    # setup working directory
    #path_pre = "/home/ubuntu/cs224u/processed_relationships/baseline"
    ref_path = path_pre + '/reference/'
    reference_files = [f for f in listdir(ref_path) if isfile(join(ref_path, f))]
    reference_files.sort()
    reference_files = reference_files[:5000]
    dec_path = path_pre + '/decoded/'
    decoded_files = [f for f in listdir(dec_path) if isfile(join(dec_path, f))]
    decoded_files.sort()
    decoded_files = decoded_files[:5000]
    num_file = len(reference_files)
    rouge = RougeCalculator(stopwords=False, lang="en")

        
    rouge_1 = np.zeros(num_file)
    rouge_2 = np.zeros(num_file)
    rouge_l = np.zeros(num_file)
    rouge_be = np.zeros(num_file)
    
    
    for i in range(num_file):
        f_ref = open(ref_path + reference_files[i], 'r').read()
        f_decode = open( dec_path + decoded_files[i], 'r').read()
        rouge_1[i] = rouge.rouge_n(
            summary=f_decode,
            references=f_ref,
            n=1,alpha = alpha_input)

        rouge_2[i] = rouge.rouge_n(
            summary=f_decode,
            references=f_ref,
            n=2, alpha = alpha_input)

        rouge_l[i] = rouge.rouge_l(
            summary=f_decode,
            references=f_ref,alpha = alpha_input)
        
        rouge_be[i] = rouge.rouge_be(
            summary=f_decode,
            references=f_ref,alpha = alpha_input)

    rouge_1_average = np.mean(rouge_1)    
    rouge_2_average = np.mean(rouge_2)    
    rouge_l_average = np.mean(rouge_l)
    rouge_be_average = np.mean(rouge_be)
    print("rouge_1_average score:", rouge_1_average)
    print("rouge_2_average score:", rouge_2_average)
    print("rouge_l_average score:", rouge_l_average)
    print("rouge_be_average score:", rouge_be_average)   


In [32]:
# relationship small dataset
path_pre = "/home/ubuntu/cs224u/processed_relationships/baseline"


computeRougeScore(path_pre,alpha_input = 0.5) # return: precision, recall, f1

rouge_1_average score: 0.17448384037694867
rouge_2_average score: 0.028741521495144688
rouge_l_average score: 0.12528579276117954
rouge_be_average score: 0.05478750640040962


In [33]:
# pretrained model
path_pre = '/home/ubuntu/cs224u/pointer/log/pretrained_model/decode_test_400maxenc_4beam_35mindec_120maxdec_ckpt-238410'
computeRougeScore(path_pre,alpha_input = 0.5) # return: rouge1 rouge2, rougel

rouge_1_average score: 0.14524432050248826
rouge_2_average score: 0.0262261693517867
rouge_l_average score: 0.10016417609387036
rouge_be_average score: 0.021919220954800978


In [34]:
# our training
path_pre = '/home/ubuntu/cs224u/pointer/log/reddit_exp/decode_test_400maxenc_4beam_35mindec_120maxdec_ckpt-39995_cov_init_cov_init'

computeRougeScore(path_pre,alpha_input = 0.5)

rouge_1_average score: 0.13774893754386922
rouge_2_average score: 0.031008597706054207
rouge_l_average score: 0.10987995815993751
rouge_be_average score: 0.02278000647831357


In [37]:
# lead-3 baseline
path_pre = '../AskReddit_baseline_summarization'

computeRougeScore(path_pre,alpha_input = 0.5)

rouge_1_average score: 0.1518306825952093
rouge_2_average score: 0.027848149178214408
rouge_l_average score: 0.10711039495037253
rouge_be_average score: 0.027821014579900872


# not relavant below

In [23]:
def rouge_eval(ref_dir, dec_dir):
  """Evaluate the files in ref_dir and dec_dir with pyrouge, returning results_dict"""
  r = pyrouge.Rouge155('/home/ubuntu/pyrouge/pyrouge/ROUGE-1.5.5')
  r.model_filename_pattern = '#ID#_reference.txt'
#   r.system_filename_pattern = '(\s+)_decoded.txt'

  r.system_filename_pattern = '(\d+)_decoded.txt'
  r.model_dir = ref_dir
  r.system_dir = dec_dir
  #logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging
  rouge_results = r.convert_and_evaluate()
  return r.output_to_dict(rouge_results)

In [24]:
# compute rouge score on the relationship's small dataset
#ref_dir = '/home/ubuntu/cs224u/processed_relationships/baseline/reference'
#dec_dir = '/home/ubuntu/cs224u/processed_relationships/baseline/decoded'

In [20]:
# compute rouge score on the relationship's small dataset
ref_dir = '/home/ubuntu/cs224u/pointer/log/test_exp/decode_test_400maxenc_4beam_35mindec_120maxdec_ckpt-2315_cov_init/reference'
dec_dir = '/home/ubuntu/cs224u/pointer/log/test_exp/decode_test_400maxenc_4beam_35mindec_120maxdec_ckpt-2315_cov_init/decoded'

In [21]:
result = rouge_eval(ref_dir, dec_dir)

2019-05-27 05:54:10,748 [MainThread  ] [INFO ]  Set ROUGE home directory to /home/ubuntu/pyrouge/pyrouge/ROUGE-1.5.5.
2019-05-27 05:54:10,748 [MainThread  ] [INFO ]  Writing summaries.
2019-05-27 05:54:10,749 [MainThread  ] [INFO ]  Processing summaries. Saving system files to /tmp/tmph0cnzion/system and model files to /tmp/tmph0cnzion/model.
2019-05-27 05:54:10,750 [MainThread  ] [INFO ]  Processing files in /home/ubuntu/cs224u/pointer/log/test_exp/decode_test_400maxenc_4beam_35mindec_120maxdec_ckpt-2315_cov_init/decoded.
2019-05-27 05:54:10,751 [MainThread  ] [INFO ]  Processing 000005_decoded.txt.
2019-05-27 05:54:10,752 [MainThread  ] [INFO ]  Processing 000004_decoded.txt.
2019-05-27 05:54:10,752 [MainThread  ] [INFO ]  Processing 000003_decoded.txt.
2019-05-27 05:54:10,753 [MainThread  ] [INFO ]  Processing 000000_decoded.txt.
2019-05-27 05:54:10,754 [MainThread  ] [INFO ]  Processing 000002_decoded.txt.
2019-05-27 05:54:10,755 [MainThread  ] [INFO ]  Processing 000007_decoded.tx

CalledProcessError: Command '['/home/ubuntu/pyrouge/pyrouge/ROUGE-1.5.5/ROUGE-1.5.5.pl', '-e', '/home/ubuntu/pyrouge/pyrouge/ROUGE-1.5.5/data', '-c', '95', '-2', '-1', '-U', '-r', '1000', '-n', '4', '-w', '1.2', '-a', '-m', '/tmp/tmpmkj1n5cs/rouge_conf.xml']' returned non-zero exit status 255.

In [None]:
# example

r = Rouge155()
r.system_dir = '/cs224u/processed_relationships/baseline/reference'
r.model_dir = '/cs224u/processed_relationships/baseline/decoded'
r.system_filename_pattern = 'some_name.(\d+).txt'
r.model_filename_pattern = 'some_name.[A-Z].#ID#.txt'

output = r.convert_and_evaluate()
print(output)
output_dict = r.output_to_dict(output)

In [None]:
output_result = rouge_eval(ref_dir, dec_dir)

In [None]:
def rouge_log(results_dict, dir_to_write):
  """Log ROUGE results to screen and write to file.

  Args:
    results_dict: the dictionary returned by pyrouge
    dir_to_write: the directory where we will write the results to"""
  log_str = ""
  for x in ["1","2","l"]:
    log_str += "\nROUGE-%s:\n" % x
    for y in ["f_score", "recall", "precision"]:
      key = "rouge_%s_%s" % (x,y)
      key_cb = key + "_cb"
      key_ce = key + "_ce"
      val = results_dict[key]
      val_cb = results_dict[key_cb]
      val_ce = results_dict[key_ce]
      log_str += "%s: %.4f with confidence interval (%.4f, %.4f)\n" % (key, val, val_cb, val_ce)
  tf.logging.info(log_str) # log to screen
  results_file = os.path.join(dir_to_write, "ROUGE_results.txt")
  tf.logging.info("Writing final ROUGE results to %s...", results_file)
  with open(results_file, "w") as f:
    f.write(log_str)

In [7]:
from pyrouge import Rouge155
from pprint import pprint

rouge = Rouge155('/home/ubuntu/pyrouge/tools/ROUGE-1.5.5')
python -m pyrouge.test

2019-05-27 05:18:39,170 [MainThread  ] [INFO ]  Set ROUGE home directory to /home/ubuntu/pyrouge/tools/ROUGE-1.5.5.


AttributeError: 'Rouge155' object has no attribute 'score_summary'