In [None]:
import json

dicts = {}
dicts['regular'] = './results/regular.jsonl'
dicts['cocolex'] = './results/cocolex.jsonl'

def longest_common_starting_words(str1, str2, reference):
    ref_words = reference.split()
    words1 = str1.split()
    words2 = str2.split()
    common_words = []
    for index, (w1, w2, ref_w) in enumerate(zip(words1, words2, ref_words)):
        if index < 4:
            if w1 == ref_w:
                return None
        if w1 == w2 :
            common_words.append(w1)
        else:
            break
    return ' '.join(common_words)

records = []
with open(dicts['regular'], 'r') as reg_file, open(dicts['cocolex'], 'r') as coco_file:
    for index, (reg_line, coco_line) in enumerate(zip(reg_file, coco_file)):
        reg_record = json.loads(reg_line)
        coco_record = json.loads(coco_line)
        if reg_record and coco_record and "results" in reg_record and "results" in coco_record:
            reg_text = reg_record['results']['gen']
            coco_text = coco_record['results']['gen']
            if coco_record['scores'] and reg_record['scores']:
                better_faithfulness = coco_record['scores']['align_score']['faithfulness']['passages'] > reg_record['scores']['align_score']['faithfulness']['passages']
                better_correctness = coco_record['scores']['align_score']['correctness'] > reg_record['scores']['align_score']['correctness']
                if better_faithfulness and better_correctness:
                    common_words = longest_common_starting_words(reg_text, coco_text, reg_record['results']['meta']['previous_text'])
                    if common_words:
                        increase_faithfulness = round(coco_record['scores']['align_score']['faithfulness']['passages'] - reg_record['scores']['align_score']['faithfulness']['passages'], 2)
                        increase_correctness = round(coco_record['scores']['align_score']['correctness'] - reg_record['scores']['align_score']['correctness'], 2)
                        print(f"Index: {index} — ID: {coco_record['id']} — ({increase_faithfulness}, {increase_correctness}) — {common_words}")
                        records.append({
                            'id': coco_record['id'],
                            'index': index,
                            'increase_faithfulness': increase_faithfulness,
                            'increase_correctness': increase_correctness,
                        })


In [None]:
def get_generated_texts_by_index(index):
    with open(dicts['regular'], 'r') as reg_file, open(dicts['cocolex'], 'r') as coco_file:
        for i, (reg_line, coco_line) in enumerate(zip(reg_file, coco_file)):
            if i == index:
                reg_record = json.loads(reg_line)
                coco_record = json.loads(coco_line)
                return reg_record, coco_record
    return None, None

case_study_file = './results/case_study.txt'
open(case_study_file, 'w').close()
# Sort records based on increase_faithfulness and then increase_correctness
records = sorted(records, key=lambda x: (
    x['increase_faithfulness'],
    x['increase_correctness']
    ), reverse=True)

for record in records:
    index = record['index']
    reg_record, coco_record = get_generated_texts_by_index(index)
    previous_text = reg_record['results']['meta']['previous_text']
    reg_text = reg_record['results']['gen']
    coco_text = coco_record['results']['gen']
    with open(case_study_file, 'a') as file:
        file.write(f"Document ID: {reg_record['results']['meta']['docid']}\n")
        file.write("-------------------------------------\n")
        file.write(f"Context: {coco_record['results']['meta']['context']}\n")
        file.write("-------------------------------------\n")
        file.write(f"Prompt: {reg_record['results']['meta']['prompt']}\n")
        file.write("-------------------------------------\n")
        file.write(f"Faithfulness: {record['increase_faithfulness']}\n")
        file.write(f"Correctness: {record['increase_correctness']}\n")
        file.write("-------------------------------------\n")
        file.write(f"Previous: {previous_text}\n")
        file.write("-------------------------------------\n")
        file.write(f"Regular: {reg_text}\n")
        file.write("-------------------------------------\n")
        file.write(f"Cocolex: {coco_text}\n")
        file.write("-------------------------------------\n")
        file.write(f"Reference: {reg_record['results']['meta']['gold_text']}\n")
        file.write("=====================================\n\n\n\n")