In [None]:
### 🚀 RUN ME BEFORE YOU START WORKING ON THIS NOTEBOOK ⚠️

import helpers
NOTEBOOK_PATH = __vsc_ipynb_file__ # type: ignore
helpers.initialize(notebook_path=NOTEBOOK_PATH)

### 🚀 RUN ME BEFORE YOU START WORKING ON THIS NOTEBOOK ⚠️

In [2]:

import glob
import pandas as pd
from pathlib import Path

# Recursively find all HTML files in assets directory
html_files = glob.glob("assets/articles_short/*.html")

# Create list to store file data
file_data = []

# Read each HTML file
for file_path in html_files:
    content = open(file_path, 'r', encoding='utf-8').read()
    file_data.append({
        'file_name': Path(file_path).name,
        'text': content
    })

# Create DataFrame
df = pd.DataFrame(file_data)


In [None]:
from textsummarizer import SerializableArticleSummarizerModel

m = SerializableArticleSummarizerModel()
df["summaries"] = m.predict(df)

df

In [None]:
import mlflow

with mlflow.start_run(run_name="LLM Evaluation") as run:
    eval_result = mlflow.evaluate(
        data=df,
        predictions="summaries",
        targets="text",
        model_type="text-summarization",
        extra_metrics=[
            mlflow.metrics.token_count(),
        ],
    )


In [None]:
from mlflow.metrics import MetricValue, make_metric
from mlflow.metrics.base import standard_aggregations
from textsummarizer import REFUSAL_TEXT

def model_refused_to_respond(predictions, targets):
    scores = []
    for prediction in predictions:
        if prediction == REFUSAL_TEXT:
            scores.append(1)
        else:
            scores.append(0)

    return MetricValue(
        scores=scores,
        aggregate_results=standard_aggregations(scores),
    )


# Create an EvaluationMetric object.
refusal_rate_metric = make_metric(
    eval_fn=model_refused_to_respond, greater_is_better=False, name="refusal_rate"
)

print(model_refused_to_respond(df["summaries"], df["text"]))

In [None]:
import mlflow

with mlflow.start_run(run_name="LLM Evaluation") as run:
    eval_result = mlflow.evaluate(
        data=df,
        predictions="summaries",
        targets="text",
        model_type="text-summarization",
        extra_metrics=[
            refusal_rate_metric
        ],
    )
