In [None]:
import numpy as np
import polars as pl
import plotly.io as pio
import plotly.express as px
import plot_theme as pt

pio.templates.default = "plotly_white+cc"

# Load dataframes

In [None]:
df = pl.read_parquet("../merged_metrics_consistency.parquet").with_columns(
    pl.col("Param. count (B)").round(2)
)
df.shape

In [None]:
df.sort("f1", descending=True).head(10)

In [None]:
fig = px.scatter(
    df,
    x="Inference duration (s)",
    y="f1",
    color="model",
    log_x=True,
    labels={"model": "LLM", "f1": "F1 score (weighted)"},

)
pt.save(fig, "inference_speed_vs_f1")

In [None]:
fig = px.scatter(
    df,
    x="Param. count (B)",
    y="f1",
    color="model",
    log_x=True,
    labels={"model": "LLM", "f1": "F1 score (weighted)"},
)
pt.save(fig, "param_count_vs_f1")

In [None]:
fig = px.scatter(
    df.with_columns(pl.col("accuracy") * 100),
    x="Param. count (B)",
    y="accuracy",
    color="model",
    log_x=True,
    labels={"model": "LLM", "accuracy": "Accuracy (%)"},
)
pt.save(fig, "param_count_vs_accuracy")

In [None]:
fig = px.scatter(
    df.drop_nulls(subset=["entropy"]),
    x="Param. count (B)",
    y="entropy",
    color="model",
    log_x=True,
    labels={"model": "LLM", "entropy": "Entropy (nats)"},
)
pt.save(fig, "param_count_vs_entropy")

In [None]:
fig = px.scatter_3d(
    df.drop_nulls(subset=["entropy"]),
    x="Inference duration (s)",
    y="entropy",
    z="f1",
    log_x=True,
    color="model",
    labels={
        "model": "LLM",
        "f1": "F1 score",
        "entropy": "Entropy",
        "Inference duration (s)": "Inference dur.",
    },
    height=550,
)

fig = fig.update_layout(
    scene=dict(
        annotations=[
            dict(
                showarrow=False,
                x=np.log10(0.3),
                y=0.3,
                z=0.6,
                text="Optimal<br>zone",
                xanchor="center",
                font=dict(color="mediumseagreen", weight="bold"),
                bgcolor="rgba(0.4, 0.6, 0.4, 0.1)",
            )
        ],
    )
)
fig = fig.update_scenes(
    camera_projection_type="orthographic",
)
pt.save(fig, "entropy_vs_f1_vs_inference_speed")

In [None]:
def minmax(col: pl.Expr) -> pl.Expr:
    return (col - col.min()) / (col.max() - col.min())

In [None]:
subset = df.drop_nulls(subset=["entropy"])

subset = subset.with_columns(
    minmax(pl.col("Inference duration (s)")),
    minmax(pl.col("entropy")),
    (1 - minmax(pl.col("f1"))).alias("f1"),
    (1 - minmax(pl.col("accuracy"))).alias("accuracy"),
)

# compute weighted average of the normalized metrics
subset = subset.with_columns(
    (
        pl.col("Inference duration (s)") * 0.3
        + pl.col("entropy") * 0.2
        + pl.col("f1") * 0.35
        + pl.col("accuracy") * 0.15
    ).alias("Weighted avg.")
)

subset.select(["model", "Weighted avg."]).sort("Weighted avg.").head(5)