In [1]:
# ruff: noqa: F401

In [2]:
%load_ext autoreload
%load_ext pyinstrument

%autoreload 2

In [3]:
import sys

sys.path.insert(0, "..")

In [4]:
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
from datetime import datetime, timedelta
from pathlib import Path

import hvplot.polars
import numpy as np
import polars as pl

In [6]:
from utils import compare

from tsdb_benchmarks.monetdb import MonetDB
from tsdb_benchmarks.monetdb.fetch import fetch_binary, fetch_pymonetdb
from tsdb_benchmarks.monetdb.insert import insert, upsert
from tsdb_benchmarks.monetdb.utils import drop_table

db = MonetDB()

In [7]:
df = pl.read_parquet("../data/input/time-series/data_0.2M_0.5k.parquet").with_columns(
    (100 * pl.col.col_1 - 50).cast(pl.Int8).alias("col_1_int8"),
    (100 * pl.col.col_1 - 50).cast(pl.Int16).alias("col_1_int16"),
    (100 * pl.col.col_1 - 50).cast(pl.Int32).alias("col_1_int32"),
    (100 * pl.col.col_1 - 50).cast(pl.Int64).alias("col_1_int64"),
    pl.col.col_1.cast(pl.Float32).alias("col_1_float32"),
    pl.col.col_1.cast(pl.Float64).alias("col_1_float64"),
    pl.col.col_1.cast(pl.Decimal(10, 2)).alias("col_1_decimal_10_2"),
    pl.col.col_1.cast(pl.Decimal(18, 3)).alias("col_1_decimal_18_3"),
    pl.col.col_1.cast(pl.String).alias("col_1_str"),
    (pl.col.col_1 > 0.5).cast(pl.Boolean).alias("col_1_bool"),
    pl.col.col_1.cast(pl.Binary).alias("col_1_blob"),
    ('{ "val": ' + pl.col.col_1.cast(pl.String) + "}").str.json_decode().alias("col_1_json"),
    pl.col.time.cast(pl.Time).alias("time_time"),
    pl.col.time.cast(pl.Date).alias("time_date"),
)


df = df.with_columns(
    pl.when((pl.col.time > df.get_column("time")[4]) | (pl.col.time == df.get_column("time")[0]))
    .then(pl.col(n))
    .otherwise(None)
    for n in df.columns
    if n != "time"
)

# df = df.select([n for n in df.columns if n == "time" or n.count("_") == 2])

cutoff = 50
overlap = 50

df_first = df.with_row_index().filter(pl.col.index < len(df) - cutoff).drop("index")
df_second = df.with_row_index().filter(pl.col.index > len(df) - (cutoff + overlap)).drop("index")
df_second = df_second.with_columns(pl.selectors.float() * 1_000)

In [8]:
TABLE_NAME = "test_upsert"

In [16]:
drop_table(TABLE_NAME, db.connect())

In [17]:
insert(df_first, TABLE_NAME, db.connect(), primary_key="time")

In [18]:
fetch_binary(f"select time, col_125 from {TABLE_NAME} order by time", db.connect())

time,col_125
datetime[ms],f32
2024-08-15 02:41:00,0.15993
2024-08-15 02:42:00,
2024-08-15 02:43:00,
2024-08-15 02:44:00,
2024-08-15 02:45:00,
…,…
2024-12-31 23:06:00,0.554483
2024-12-31 23:07:00,0.410327
2024-12-31 23:08:00,0.032196
2024-12-31 23:09:00,0.683604


In [20]:
upsert(df_second, TABLE_NAME, db.connect(), primary_key="time")

In [19]:
# %%pyinstrument

for idx in range(len(df_second)):
    upsert(df_second[idx], TABLE_NAME, db.connect(), primary_key="time")

    print(fetch_pymonetdb(f"select max(time) from {TABLE_NAME}", db.connect()))

shape: (1, 1)
┌─────────────────────┐
│ %1                  │
│ ---                 │
│ datetime[ms]        │
╞═════════════════════╡
│ 2024-12-31 23:10:00 │
└─────────────────────┘
shape: (1, 1)
┌─────────────────────┐
│ %1                  │
│ ---                 │
│ datetime[ms]        │
╞═════════════════════╡
│ 2024-12-31 23:10:00 │
└─────────────────────┘
shape: (1, 1)
┌─────────────────────┐
│ %1                  │
│ ---                 │
│ datetime[ms]        │
╞═════════════════════╡
│ 2024-12-31 23:10:00 │
└─────────────────────┘
shape: (1, 1)
┌─────────────────────┐
│ %1                  │
│ ---                 │
│ datetime[ms]        │
╞═════════════════════╡
│ 2024-12-31 23:10:00 │
└─────────────────────┘
shape: (1, 1)
┌─────────────────────┐
│ %1                  │
│ ---                 │
│ datetime[ms]        │
╞═════════════════════╡
│ 2024-12-31 23:10:00 │
└─────────────────────┘
shape: (1, 1)
┌─────────────────────┐
│ %1                  │
│ ---                 │
│ da

In [21]:
dff = fetch_binary(f"select * from {TABLE_NAME} order by time", db.connect())

dff

time,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,col_11,col_12,col_13,col_14,col_15,col_16,col_17,col_18,col_19,col_20,col_21,col_22,col_23,col_24,col_25,col_26,col_27,col_28,col_29,col_30,col_31,col_32,col_33,col_34,col_35,col_36,…,col_478,col_479,col_480,col_481,col_482,col_483,col_484,col_485,col_486,col_487,col_488,col_489,col_490,col_491,col_492,col_493,col_494,col_495,col_496,col_497,col_498,col_499,col_500,col_1_int8,col_1_int16,col_1_int32,col_1_int64,col_1_float32,col_1_float64,col_1_decimal_10_2,col_1_decimal_18_3,col_1_str,col_1_bool,col_1_blob,col_1_json,time_time,time_date
datetime[ms],f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i8,i16,i32,i64,f32,f64,"decimal[10,2]","decimal[18,3]",str,bool,binary,struct[1],time,date
2024-08-15 02:41:00,0.558972,0.528642,0.995832,0.268113,0.379835,0.824283,0.850447,0.898939,0.656081,0.462798,0.538847,0.100355,0.082282,0.405463,0.568379,0.809475,0.034152,0.093551,0.148555,0.827094,0.427706,0.452292,0.085811,0.392709,0.839373,0.441632,0.272149,0.048649,0.79554,0.32123,0.044465,0.742381,0.657496,0.430154,0.157876,0.998165,…,0.069214,0.462403,0.016247,0.717631,0.539519,0.167658,0.440894,0.843609,0.337872,0.65543,0.918049,0.929099,0.527873,0.88279,0.998202,0.855403,0.366723,0.613186,0.726142,0.552907,0.481775,0.563608,0.560218,5,5,5,5,0.558972,0.558972,0.55,0.558,"""0.558972""",true,"b""0.558972""",{0.558972},02:41:00,2024-08-15
2024-08-15 02:42:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2024-08-15 02:43:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2024-08-15 02:44:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2024-08-15 02:45:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2024-12-31 23:56:00,411.647186,53.16177,654.97406,562.809509,959.890991,643.470581,781.333984,964.650574,710.035767,179.699341,775.966736,437.762115,98.131393,6.750329,364.134094,623.712219,457.880249,270.429901,21.475601,179.030899,550.986206,905.915894,415.68985,400.59903,387.41571,183.939804,313.719635,157.728058,606.044861,45.963425,318.292328,157.569138,207.104263,226.027237,737.767151,687.92334,…,231.922913,91.755142,397.516968,796.589355,630.121338,944.559265,275.287842,789.8526,746.835876,35.700787,793.809143,75.652985,471.013855,244.707962,340.754822,509.384857,440.586334,472.109619,174.617889,520.404053,903.723206,203.868713,221.991989,-8,-8,-8,-8,411.647186,411.647201,0.41,0.411,"""0.4116472""",false,"b""0.4116472""",{0.4116472},23:56:00,2024-12-31
2024-12-31 23:57:00,242.912384,104.939819,370.701813,348.109406,194.449432,117.225082,511.168793,161.151138,476.904938,130.560257,299.409851,701.450745,393.145874,873.990662,910.132019,760.06897,586.082031,382.490234,6.829069,595.778992,343.492859,513.669373,22.049112,631.286987,421.593964,461.386444,764.465332,546.163452,759.463196,537.641235,90.395233,611.805054,504.205658,753.463867,289.341766,391.62793,…,92.90377,658.98822,474.921204,956.614868,937.096252,713.155884,861.694641,328.572906,697.955017,133.383911,381.006989,864.886108,311.496033,334.252441,387.226959,864.351257,101.283417,154.611816,502.734009,109.684853,476.924652,200.789368,718.234253,-25,-25,-25,-25,242.912384,242.912382,0.24,0.242,"""0.24291238""",false,"b""0.24291238""",{0.242912},23:57:00,2024-12-31
2024-12-31 23:58:00,326.850128,358.204529,559.104614,240.451752,735.21814,202.289688,179.661682,24.149717,911.761169,607.425415,834.743286,541.173645,850.1203,693.316711,474.126007,711.376709,709.11499,323.383942,109.653008,513.606445,418.032837,978.530212,52.972065,136.063034,730.774597,489.151581,920.777466,681.626587,108.511833,255.100067,359.125,968.93335,264.578278,885.062012,73.900154,221.398987,…,235.669693,19.500971,17.216879,853.580444,37.123314,860.528809,833.854919,442.416809,855.286194,395.743744,640.678894,761.704712,318.167358,683.499146,806.667542,169.837906,461.263947,687.076904,335.435181,28.527025,304.91861,336.197845,319.714294,-17,-17,-17,-17,326.850128,326.850116,0.32,0.326,"""0.32685012""",false,"b""0.32685012""",{0.32685},23:58:00,2024-12-31
2024-12-31 23:59:00,610.789429,441.108063,786.977417,785.830444,693.327332,305.296387,548.664612,566.806458,393.284088,454.533813,291.45163,617.040527,318.393463,935.318909,629.47467,580.795349,968.456726,718.42865,402.014252,931.69989,460.657196,887.06488,480.032135,424.212219,690.168518,569.796448,663.41626,11.654084,663.245667,52.170021,19.568193,978.730591,272.147461,810.016846,654.290955,587.451355,…,35.721184,232.838364,371.081055,950.949585,715.884583,692.939575,952.726929,624.250305,44.714115,24.367092,944.974243,647.229309,822.929688,559.308044,770.961792,615.078369,607.378418,399.539368,977.371826,727.094238,889.190857,252.32666,645.979553,11,11,11,11,610.789429,610.789418,0.61,0.610,"""0.6107894""",true,"b""0.6107894""",{0.6107894},23:59:00,2024-12-31


In [22]:
compare(dff, df)

✅ Checking DataFrames...
❌ DataFrames are NOT equal.

✅ Column names match.

⚠️ Columns with differences: ['col_1', 'col_2', 'col_3', 'col_4', 'col_5', 'col_6', 'col_7', 'col_8', 'col_9', 'col_10', 'col_11', 'col_12', 'col_13', 'col_14', 'col_15', 'col_16', 'col_17', 'col_18', 'col_19', 'col_20', 'col_21', 'col_22', 'col_23', 'col_24', 'col_25', 'col_26', 'col_27', 'col_28', 'col_29', 'col_30', 'col_31', 'col_32', 'col_33', 'col_34', 'col_35', 'col_36', 'col_37', 'col_38', 'col_39', 'col_40', 'col_41', 'col_42', 'col_43', 'col_44', 'col_45', 'col_46', 'col_47', 'col_48', 'col_49', 'col_50', 'col_51', 'col_52', 'col_53', 'col_54', 'col_55', 'col_56', 'col_57', 'col_58', 'col_59', 'col_60', 'col_61', 'col_62', 'col_63', 'col_64', 'col_65', 'col_66', 'col_67', 'col_68', 'col_69', 'col_70', 'col_71', 'col_72', 'col_73', 'col_74', 'col_75', 'col_76', 'col_77', 'col_78', 'col_79', 'col_80', 'col_81', 'col_82', 'col_83', 'col_84', 'col_85', 'col_86', 'col_87', 'col_88', 'col_89', 'col_90', 'c