In [1]:
# ruff: noqa: F401

In [2]:
%load_ext autoreload
%load_ext pyinstrument

%autoreload 2

In [3]:
import sys

from dotenv import load_dotenv

sys.path.insert(0, "..")

load_dotenv()

True

In [4]:
from datetime import datetime, timedelta
from pathlib import Path

import hvplot.polars
import numpy as np
import polars as pl
from sqlalchemy import text

In [5]:
from tsdb_benchmarks.dbs.monetdb import MonetDB
from tsdb_benchmarks.dbs.monetdb.fetch import fetch_binary, fetch_pymonetdb

db = MonetDB()

In [6]:
con = db.connect()

statement = """
drop table if exists large;

create table large
(id int primary key, value float, ts timestamp, txt text, txt_20 varchar(20),
bb blob, s_interval interval second, h_interval interval hour, d_interval interval day, json_data json);

insert into large(id) (select value from generate_series(1, 10000000));

update large set bb = '1111111111111111111111111111111111' where id > 5 or id = 3;

update large set json_data = '{}' where id > 5 or id = 3;

update large set json_data = '{"asd": [1, 2, 3]}' where id > 15;

update large set json_data = '{"dsa": [1, 2, 3]}' where id > 25;

update large set s_interval = interval '2' second where id > 5 or id = 3;

update large set h_interval = interval '2' hour where id > 5 or id = 3;

update large set d_interval = interval '2' day where id > 5 or id = 3;

update large set value = id * 2;

update large set ts = '2025-01-01';

update large set txt = concat('this is index ', cast(id as text)) where id > 5;

update large set txt_20 = concat('idx  ', cast(id as text)) where id > 5;

update large set txt = 'this is not null' where id = 2;

update large set txt_20 = 'this is not null' where id = 2;

update large set txt = null where id = 7;

update large set txt_20 = null where id = 7;

update large set txt = '' where id = 9;

update large set txt_20 = '' where id = 9;

"""

con.execute(text(statement))
con.commit()

  import pkg_resources


In [7]:
from time import perf_counter

data = {"limit": [], "binary": [], "pymonetdb": []}

for limit in [
    1,
    10,
    100,
    1_000,
    5_000,
    10_000,
    50_000,
    100_000,
    500_000,
    1_000_000,
    5_000_000,
    6_000_000,
    7_000_000,
    8_000_000,
    9_000_000,
    10_000_000,
]:
    q = f"select * from large order by id limit {limit}"

    t0 = perf_counter()

    fetch_pymonetdb(q, db.connect())

    data["pymonetdb"].append(perf_counter() - t0)

    t0 = perf_counter()

    fetch_binary(q, db.connect())

    data["binary"].append(perf_counter() - t0)

    data["limit"].append(limit)

    print(f"Fetched with limit {limit:_}")

Fetched with limit 1
Fetched with limit 10
Fetched with limit 100
Fetched with limit 1_000
Fetched with limit 5_000
Fetched with limit 10_000
Fetched with limit 50_000
Fetched with limit 100_000
Fetched with limit 500_000
Fetched with limit 1_000_000
Fetched with limit 5_000_000
Fetched with limit 6_000_000
Fetched with limit 7_000_000
Fetched with limit 8_000_000
Fetched with limit 9_000_000
Fetched with limit 10_000_000


In [8]:
df = pl.DataFrame(data)

df.hvplot(x="limit", kind="scatter", grid=True, width=900, height=600)

In [9]:
df

limit,binary,pymonetdb
i64,f64,f64
1,0.024092,0.077169
10,0.017691,0.008928
100,0.016082,0.007147
1000,0.021115,0.015956
5000,0.028448,0.048932
…,…,…
6000000,7.953836,38.155444
7000000,9.050251,45.129646
8000000,10.425993,52.10298
9000000,11.760476,59.3534
