In [None]:
import pandas as pd
from sacrebleu import sentence_bleu
import bert_score
from rouge_score import rouge_scorer
from scipy.stats import spearmanr
from tqdm import tqdm

tqdm.pandas()

DIMENSIONS = ["correctness_topical", "coherence_logical", "coherence_stylistic", "coverage_broad", "coverage_deep", "consistency_internal", "quality_overall"]

In [None]:
rouge = rouge_scorer.RougeScorer(['rouge1', "rouge2", "rougeL"], use_stemmer=True)

def content_overlap_correlation(group):
    corr = {}
    for dim in DIMENSIONS:
        data = group.sort_values(dim, ascending=False)
        reference = data["cleaned_text"].iloc[0]
        hypotheses = data["cleaned_text"].iloc[1:]
        ranking = data[dim].iloc[1:].values
        scores_bleu = [sentence_bleu(reference, [hyp]).score for hyp in hypotheses]
        scores_rogue = [rouge.score(reference, hyp)["rougeL"].fmeasure for hyp in hypotheses]
        _, _, scores_bertscore = bert_score.score(list((reference,))*5, [[x] for x in hypotheses], lang="eng")
        corr[(dim, "full", "bleu")] = spearmanr(ranking, scores_bleu)[0]
        corr[(dim, "full", "rougeL")] = spearmanr(ranking, scores_rogue)[0]
        corr[(dim, "full", "bertscore")] = spearmanr(ranking, scores_bertscore)[0]
        corr[(dim, "bw", "bleu")] = spearmanr([ranking[0], ranking[-1]], [scores_bleu[0], scores_bleu[-1]])[0]
        corr[(dim, "bw", "rougeL")] = spearmanr([ranking[0], ranking[-1]], [scores_rogue[0], scores_rogue[-1]])[0]
        corr[(dim, "bw", "bertscore")] = spearmanr([ranking[0], ranking[-1]], [scores_bertscore[0], scores_bertscore[-1]])[0]
    
    return pd.Series(corr)

In [None]:
df_corr = (
    pd.read_json("../data/artifacts/responses.jsonl.gz", lines=True)
    .merge(
        pd.read_json("../data/artifacts/grades.jsonl.gz", lines=True),
        on="response",
        how="left"
    )
    .dropna(subset="quality_overall")
    .groupby("topic")
    .progress_apply(content_overlap_correlation)
)

In [None]:
df_corr.mean(axis=0).reset_index().pivot(index=["level_1", "level_2"], columns=["level_0"], values=0).round(3).transpose()