This notebook produces the plots for failure rate and inference duration for all language models.

In [None]:
import polars as pl
import plotly.io as pio
import plotly.express as px
import plot_theme as pt

# set theme
pio.templates.default = "plotly_white+cc"

# Load dataframes

In [None]:
df = pl.read_parquet("../merged_metrics_v5.parquet").with_columns(
    pl.col("Param. count (B)").round(2)
)

# Plot inference speed

In [None]:
inference_sorting = df.sort("Inference duration (s)", descending=True)["model"]

fig = px.bar(
    df,
    x="model",
    y="Inference duration (s)",
    labels={"model": "LLM"},
    category_orders={"model": inference_sorting},
    log_y=True,
).update_yaxes(tickvals=[1, 10])
fig.update_traces(marker_color="#333")
pt.save(fig, "inference_speed_plotly")

In [None]:
fig = px.scatter(
    df,
    x="Param. count (B)",
    y="Inference duration (s)",
    color="model",
    log_x=True,
    log_y=True,
    labels={"model": "LLM"},
)
pt.save(fig, "inference_speed_vs_param_count")