In [1]:
# ruff: noqa: F401, ANN201

In [2]:
%load_ext autoreload
%load_ext pyinstrument

%autoreload 2

In [3]:
import sys

from dotenv import load_dotenv

sys.path.insert(0, "..")

load_dotenv()

True

In [4]:
from datetime import datetime, timedelta
from pathlib import Path
from typing import Literal

import altair as alt
import duckdb
import hvplot.polars
import numpy as np
import polars as pl

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
from tsdb_benchmarks.settings import SETTINGS, DatabaseName, Operation, SuiteName

In [6]:
SUITE: SuiteName = "time_series"

In [7]:
db = duckdb.connect(SETTINGS.results_directory / "results-replica.db")

In [8]:
db.execute(
    """
        select * from benchmark
        where finished_at is not null
            and deleted_at is null
            and suite = (?)
        order by started_at
    """,
    [SUITE],
).pl()

id,suite,db,operation,started_at,finished_at,deleted_at,notes
i32,str,str,str,datetime[μs],datetime[μs],datetime[μs],str
125,"""time_series""","""monetdb""","""populate""",2025-07-31 12:36:31.237963,2025-07-31 12:41:59.918723,,
126,"""time_series""","""monetdb""","""run""",2025-07-31 12:42:18.913060,2025-07-31 12:42:35.177421,,
129,"""time_series""","""clickhouse""","""populate""",2025-07-31 12:47:24.823897,2025-07-31 12:51:18.812919,,
130,"""time_series""","""clickhouse""","""run""",2025-07-31 12:51:31.087906,2025-07-31 12:51:56.888311,,
131,"""time_series""","""duckdb""","""populate""",2025-07-31 12:52:12.104823,2025-07-31 12:53:08.812614,,
132,"""time_series""","""duckdb""","""run""",2025-07-31 12:53:24.180009,2025-07-31 12:53:28.642636,,
133,"""time_series""","""timescaledb""","""populate""",2025-07-31 12:53:55.527614,2025-08-01 06:00:00,,
134,"""time_series""","""timescaledb""","""run""",2025-08-01 07:25:11.548055,2025-08-01 07:25:13.026509,,


In [12]:
df = db.execute(
    """
SELECT
	db,
	operation,
	ROUND(AVG(epoch(finished_at - started_at)), 2) AS avg_duration_seconds
FROM
	benchmark
WHERE
	suite = (?)
    and deleted_at is null
	AND finished_at IS NOT NULL
GROUP BY
	db,
	operation
ORDER BY
	operation,
	avg_duration_seconds;
""",
    [SUITE],
).pl()

width = 100 * df.select("db").n_unique()

chart = (
    alt.Chart(df)
    .mark_bar()
    .encode(
        x=alt.X("db:N", title="Database"),
        xOffset=alt.XOffset("operation:N"),
        y=alt.Y("avg_duration_seconds:Q", title="Avg Duration (s)"),
        color=alt.Color("operation:N", title="Operation"),
        tooltip=["db:N", "operation:N", "avg_duration_seconds:Q"],
    )
    .properties(
        width=width,
        height=400,
    )
).interactive()

chart

In [13]:
df = db.execute(
    """
WITH query_events AS (
    SELECT
        b.db,
        e.benchmark_id,
        e.name,
        e.time,
        e.type
    FROM event e
    JOIN benchmark b ON e.benchmark_id = b.id
    WHERE b.suite = (?)
      AND b.deleted_at is NULL
      AND e.name ~ '^query_.*_iteration_[0-9]+$'
),
paired AS (
    SELECT
        s.db,
        REGEXP_REPLACE(s.name, '_iteration_[0-9]+$', '') AS base_query,
        CAST(REGEXP_EXTRACT(s.name, '_iteration_([0-9]+)$', 1) AS INTEGER) AS iteration,
        epoch(e.time - s.time) AS duration_seconds
    FROM query_events s
    JOIN query_events e
      ON s.db = e.db
     AND s.benchmark_id = e.benchmark_id
     AND s.name = e.name
     AND s.type = 'start'
     AND e.type = 'end'
),
aggregated AS (
    SELECT
        db,
        base_query,
        MAX(CASE WHEN iteration = 1 THEN duration_seconds END) AS first_seconds,
        AVG(CASE WHEN iteration > 1 THEN duration_seconds END) AS rest_avg_seconds,
        STDDEV_SAMP(CASE WHEN iteration > 1 THEN duration_seconds END) AS rest_stddev_seconds,
        COUNT(*) FILTER (WHERE iteration > 1) AS rest_runs
    FROM paired
    GROUP BY db, base_query
)
SELECT
	db,
	base_query as query,
	1000 * ROUND(first_seconds, 4) AS first_ms,
	1000 * ROUND(rest_avg_seconds, 4) AS rest_avg_ms,
	1000 * ROUND(rest_stddev_seconds, 4) AS rest_stddev_ms,
	rest_runs
FROM
	aggregated
ORDER BY
	query;

""",
    [SUITE],
).pl()


df = df.with_columns(
    pl.col.query.str.strip_prefix("query_").str.split("_").list.first().alias("query"),
    (pl.col.rest_runs + 1).alias("runs"),
)


metric: Literal["first", "rest"] = "rest"


dff = (
    df.group_by("query")
    .agg(pl.col("rest_avg_ms" if metric == "rest" else "first_ms").min().alias("best"))
    .join(df, on="query")
    .with_columns((pl.col("rest_avg_ms" if metric == "rest" else "first_ms") == pl.col("best")).alias("is_fastest"))
    .drop("best")
)


if SUITE == "clickbench":
    dff = dff.sort(pl.col.query.str.strip_prefix("Q").cast(pl.Int32))


query_order = dff["query"].to_list()

db_order = (
    dff.filter(pl.col("is_fastest")).group_by("db").len().sort("len", descending=True).get_column("db").to_list()
)[::-1]


for missing in sorted(set(dff.unique("db").get_column("db").to_list()) - set(db_order), reverse=True):
    db_order = [missing] + db_order

heatmap = (
    alt.Chart(dff)
    .mark_rect()
    .encode(
        y=alt.X("db:N", title="Database", sort=db_order),
        x=alt.Y("query:N", title="Query", sort=query_order),
        color=alt.Color(
            "first_ms:Q",
            title="First (ms)",
            scale=alt.Scale(scheme="redyellowgreen", type="log", reverse=True),
        )
        if metric == "first"
        else alt.Color(
            "rest_avg_ms:Q",
            title="Rest avg (ms)",
            scale=alt.Scale(scheme="redyellowgreen", type="log", reverse=True),
        ),
        stroke=alt.condition(
            "datum.is_fastest",
            alt.value("magenta"),
            alt.value("none"),
        ),
        strokeWidth=alt.condition(
            "datum.is_fastest",
            alt.value(3),
            alt.value(0),
        ),
        tooltip=[
            alt.Tooltip("db:N", title="Database"),
            alt.Tooltip("query:N", title="Query"),
            alt.Tooltip("rest_avg_ms:Q", title="Rest avg (ms)"),
            alt.Tooltip("rest_stddev_ms:Q", title="Rest stddev (ms)"),
            alt.Tooltip("first_ms:Q", title="First run (ms)"),
            alt.Tooltip("runs:Q", title="Number of runs"),
        ],
    )
    .properties(width=1200, height=200)
)


heatmap

In [14]:
df = db.execute(
    """
WITH first_metric_time AS (
    SELECT
        benchmark_id,
        MIN(time) AS start_time
    FROM metric
    GROUP BY benchmark_id
)
SELECT
    b.db,
    b.suite,
    b.operation,
    epoch(m.time - fm.start_time) AS seconds_since_start,
    m.cpu_percent,
    m.mem_mb,
    m.disk_mb
FROM metric m
JOIN first_metric_time fm ON m.benchmark_id = fm.benchmark_id
JOIN benchmark b ON m.benchmark_id = b.id
WHERE b.suite = (?)
and b.deleted_at is null
ORDER BY b.id, seconds_since_start;
""",
    [SUITE],
).pl()

df = df.with_columns(pl.col.cpu_percent.clip(0, 25 * 100))

df_long = df.unpivot(
    on=["cpu_percent", "mem_mb", "disk_mb"],
    index=["db", "suite", "operation", "seconds_since_start"],
    variable_name="metric",
    value_name="value",
)

df_long = df_long.with_columns(
    [
        pl.col("db").cast(pl.Categorical),
        pl.col("operation").cast(pl.Categorical),
        pl.col("metric").cast(pl.Categorical),
    ]
)

selector = alt.selection_point(fields=["db"], bind="legend")

width = 800
height = 200

cpu_chart = (
    alt.Chart(df_long.filter(pl.col("metric") == "cpu_percent"))
    .mark_line(point=True)
    .encode(
        x=alt.X("seconds_since_start:Q", axis=alt.Axis(title=None, ticks=True, grid=True, labels=False)),
        y=alt.Y("value:Q", axis=alt.Axis(title="CPU %", ticks=True, grid=True), scale=alt.Scale(zero=False)),
        color=alt.Color("db:N"),
        tooltip=[
            "db",
            "suite",
            "operation",
            "metric",
            alt.Tooltip("seconds_since_start:Q", format=".0f"),
            alt.Tooltip("value:Q", format=".0f"),
        ],
        opacity=alt.condition(selector, alt.value(1.0), alt.value(0.1)),
    )
    .add_params(selector)
    .properties(width=width, height=height)
    .facet(column=alt.Column("operation:N", title=None))
    .resolve_scale(x="independent")
)

mem_chart = (
    alt.Chart(df_long.filter(pl.col("metric") == "mem_mb"))
    .mark_line(point=True)
    .encode(
        x=alt.X("seconds_since_start:Q", axis=alt.Axis(title=None, ticks=True, grid=True, labels=False)),
        y=alt.Y("value:Q", axis=alt.Axis(title="Memory MB", ticks=True, grid=True), scale=alt.Scale(zero=False)),
        color=alt.Color("db:N"),
        tooltip=[
            "db",
            "suite",
            "operation",
            "metric",
            alt.Tooltip("seconds_since_start:Q", format=".2f"),
            alt.Tooltip("value:Q", format=".1f"),
        ],
        opacity=alt.condition(selector, alt.value(1.0), alt.value(0.1)),
    )
    .add_params(selector)
    .properties(width=width, height=height)
    .facet(column=alt.Column("operation:N", title=None))
    .resolve_scale(x="independent")
)

disk_chart = (
    alt.Chart(df_long.filter(pl.col("metric") == "disk_mb"))
    .mark_line(point=True)
    .encode(
        x=alt.X("seconds_since_start:Q", axis=alt.Axis(title="seconds", ticks=True, grid=True)),
        y=alt.Y("value:Q", axis=alt.Axis(title="Disk MB", ticks=True, grid=True), scale=alt.Scale(zero=False)),
        color=alt.Color("db:N"),
        tooltip=[
            "db",
            "suite",
            "operation",
            "metric",
            alt.Tooltip("seconds_since_start:Q", format=".2f"),
            alt.Tooltip("value:Q", format=".1f"),
        ],
        opacity=alt.condition(selector, alt.value(1.0), alt.value(0.1)),
    )
    .add_params(selector)
    .properties(width=width, height=height)
    .facet(column=alt.Column("operation:N", title=None))
    .resolve_scale(x="independent")
)

chart = alt.vconcat(cpu_chart, mem_chart, disk_chart).interactive(bind_y=False)

chart