In [None]:
from typing import Type
import multiprocessing
import json
from datetime import datetime
from text_summary_eval_interface import (
    TextSummaryEvalInterface,
    DummySummarizer,
    ReferenceSummarizer,
    LatentReferenceSummarizer,
    CostlyReferenceSummarizer,
    SameSameSummarizer,
    HalfSummarizer,
)
from eval_vis import plot_eval_comparison
from data_classes import EvalComparison, DataSetSplit
from md_rules_summary import MarkdownRulesSummarizer
from flan_t5_summary import FlanT5Summarizer
from langchain_summary import AgenticOpenaiSummarizer
from openai_summary import OpenaiSingleCallSummarizer

In [None]:

summary_classes = [
    # DummySummarizer,
    ReferenceSummarizer,
    # LatentReferenceSummarizer,
    # CostlyReferenceSummarizer,
    # SameSameSummarizer,
    # HalfSummarizer,
    # MarkdownRulesSummarizer,
    AgenticOpenaiSummarizer,
    FlanT5Summarizer,
    OpenaiSingleCallSummarizer
]

def _eval_worker(cls: Type[TextSummaryEvalInterface], data_set, n, n_worker_inside):
    """Worker function for multiprocessing"""
    summarizer = cls()
    print(f"--- {cls.__name__} ---")
    eval_result = summarizer.eval_dataset(data_set, n, n_worker_inside)
    return eval_result

def full_eval(data_set, n_workers, n_data=None, n_worker_inside=None):
    if n_workers == 1:
        eval_results = [_eval_worker(cls, data_set, n_data, n_worker_inside) for cls in summary_classes]
    else:
        with multiprocessing.Pool(n_workers) as pool:
            eval_results = pool.starmap(
                _eval_worker, [(cls, data_set, n_data, n_worker_inside) for cls in summary_classes])

    eval_comp = EvalComparison(evals=eval_results)
    eval_comparison_dict = eval_comp.to_dict()
    now_str = datetime.now().strftime("%Y%m%d_%H%M%S")
    with open(f'/home/jonathan/PycharmProjects/tavily/evals/eval_comparison_{data_set.name}_{now_str}.json', 'w+') as f:
        json.dump(eval_comparison_dict, f)
    plot_eval_comparison(eval_comp)

In [None]:
# full_eval(DataSetSplit.train, 1, 2, 1)


In [None]:
summary_classes = [
    # DummySummarizer,
    ReferenceSummarizer,
    # LatentReferenceSummarizer,
    # CostlyReferenceSummarizer,
    # SameSameSummarizer,
    # HalfSummarizer,
    # MarkdownRulesSummarizer,
    AgenticOpenaiSummarizer,
    FlanT5Summarizer,
    OpenaiSingleCallSummarizer
]
full_eval(DataSetSplit.test, 4, 3, 1)
