In [13]:
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import sentence_bleu
import rouge
import yaml
import re
import glob
import os
import numpy as np

In [14]:
config_path = '../configs/dpng_transformer.yaml'

# for training size
with open(config_path) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
    print(config)

train_size = config['train_size']
val_size = config['val_size']
test_size = config['test_size']

{'save_model_path': '../models/DNPG_base_transformer.pth', 'log_file': '../logs/DNPG_base_transformer_training.txt', 'test_output_file': '../outputs/test_DNPG_base_transformer.txt', 'val_output_file': '../outputs/val_DNPG_base_transformer.txt', 'dataset': 'quora_dataset', 'num_epochs': 50, 'batch_size': 128, 'd_model': 450, 'd_inner_hid': 512, 'd_k': 50, 'd_v': 50, 'n_head': 9, 'n_layers': 3, 'n_warmup_steps': 12000, 'dropout': 0.1, 'embs_share_weight': True, 'proj_share_weight': True, 'label_smoothing': False, 'train_size': 100000, 'val_size': 4000, 'test_size': 20000, 'is_bow': False, 'lr': '1e-3'}


In [15]:
seeds = [0, 777, 33333]
ref_path = '../data/quora_train.txt'

In [16]:
seed_dir_dic = {}
seed_root = './fixseed'
for seed in seeds:
    seed_dir = 'seed{}'.format(seed)
    seed_dir = os.path.join(seed_root, seed_dir)
    txt_path = os.path.join(seed_dir, '*.txt')
    seed_files = glob.glob(txt_path)
    seed_dir_dic[seed] = seed_files

In [21]:
def read_ref(ref_path):
    # read reference sentence and prediction sentence
    reference_text = open(ref_path, 'r').readlines()
    np.random.shuffle(reference_text)
    reference_text = reference_text[train_size+val_size:train_size+val_size+test_size]
    # use input sentence to calculate bleu-ori
    reference_text = [text.strip().split('\t')[0] for text in reference_text]

    # normalize reference corpus , eg: seperate question mark , remove '(' ,')' etc
    reference_text = [re.sub(r"([.!?])", r" \1", seq) for seq in reference_text]
    reference_text = [text for text in reference_text]

    # reference_text = [re.sub(r"[^a-zA-Z.!?]+", r" ", seq) for seq in reference_text]
    reference_corpus = [[text.split()] for text in reference_text]
    return reference_text, reference_corpus




In [22]:
def evaluate(reference_corpus, prediction_corpus):
    print("[Info] Calculating BLEU 1...")
    bleu1 = corpus_bleu(reference_corpus, prediction_corpus, weights=(1, 0, 0, 0))
    print("[Info] Calculating BLEU 2...")
    bleu2 = corpus_bleu(reference_corpus, prediction_corpus, weights=(0.5, 0.5, 0, 0))
    print("[Info] Calculating BLEU 3...")
    bleu3 = corpus_bleu(reference_corpus, prediction_corpus, weights=(0.33, 0.33, 0.34, 0))
    print("[Info] Calculating BLEU 4...")
    bleu4 = corpus_bleu(reference_corpus, prediction_corpus, weights=(0.25, 0.25, 0.25, 0.25))
    print("[Info] Done")

    print("[Info] BLEU1 Score: {}".format(bleu1))
    print("[Info] BLEU2 Score: {}".format(bleu2))
    print("[Info] BLEU3 Score: {}".format(bleu3))
    print("[Info] BLEU4 Score: {}".format(bleu4))
    
    rouge_evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l'], max_n=2)
    rouge_scores = rouge_evaluator.get_scores(prediction_text, reference_text)

    print("[Info] Rouge 1 score: {}".format(rouge_scores["rouge-1"]["f"]))
    print("[Info] Rouge 2 score: {}".format(rouge_scores["rouge-2"]["f"]))
    print("[Info] Rouge l score: {}".format(rouge_scores["rouge-l"]["f"]))
    
    return bleu1, bleu2, bleu3, bleu4, rouge_scores

In [23]:
for seed in seeds:
    print("### Evaluating seed {} ###".format(seed))
    # fixseed
    # get reference text
    np.random.seed(seed)
    reference_text, reference_corpus = read_ref(ref_path)
    print(reference_corpus[0])

    # for every file in seeds
    files = seed_dir_dic[seed]
    # get predict text
    for file in files:
        if 'bow' in file or 'wordnet' in file:
            continue
        print("Calculating file: ", file)
        prediction_text = open(file, 'r').readlines()
        prediction_text = [text.replace('Predict: ', '').strip() for text in prediction_text if 'Predict: ' in text]
        prediction_corpus = [text.split() for text in prediction_text]
        assert len(reference_text) == len(prediction_text), "len mismatch, ref: {}, pred: {}".format(len(reference_text), len(prediction_text))
        
        # evaluate score
        bleu1, bleu2, bleu3, bleu4, rouge_scores = evaluate(reference_corpus, prediction_corpus)
        # write to output
        score_dir = '../scores/fixseed/seed{}/ori'.format(seed)
        if not os.path.exists(score_dir):
            os.mkdir(score_dir)
        score_path = os.path.join(score_dir, file.split('/')[-1])
        print("Writing to file:", score_path)
        
        f = open(score_path, 'w')
        f.write("[Info] BLEU1 Score: {}\n".format(bleu1))
        f.write("[Info] BLEU2 Score: {}\n".format(bleu2))
        f.write("[Info] BLEU3 Score: {}\n".format(bleu3))
        f.write("[Info] BLEU4 Score: {}\n".format(bleu4))

        f.write("\n\n[Info] Rouge 1 score: {}\n".format(rouge_scores["rouge-1"]["f"]))
        f.write("[Info] Rouge 2 score: {}\n".format(rouge_scores["rouge-2"]["f"]))
        f.write("[Info] Rouge l score: {}\n".format(rouge_scores["rouge-l"]["f"]))

        f.close()
        print("==============================")

### Evaluating seed 0 ###
[['What', 'are', 'the', 'benefits', 'of', 'using', 'digital', 'signage', 'for', 'your', 'business', '?']]
Calculating file:  ./fixseed/seed0/test_DNPG_base_transformer_epoch70.txt
[Info] Calculating BLEU 1...
[Info] Calculating BLEU 2...
[Info] Calculating BLEU 3...
[Info] Calculating BLEU 4...
[Info] Done
[Info] BLEU1 Score: 0.6445214424573718
[Info] BLEU2 Score: 0.5266553765143346
[Info] BLEU3 Score: 0.4415410065807005
[Info] BLEU4 Score: 0.3778980355290477
[Info] Rouge 1 score: 0.6877523016748484
[Info] Rouge 2 score: 0.487186817342597
[Info] Rouge l score: 0.670222217730458
Writing to file: ../scores/fixseed/seed0/ori/test_DNPG_base_transformer_epoch70.txt
Calculating file:  ./fixseed/seed0/transformer_key_enc_bert_val_attention_alpha0.5_softmax.txt
[Info] Calculating BLEU 1...
[Info] Calculating BLEU 2...
[Info] Calculating BLEU 3...
[Info] Calculating BLEU 4...
[Info] Done
[Info] BLEU1 Score: 0.6065280300710959
[Info] BLEU2 Score: 0.47987224407050855
[In