Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for casting tdigest to text and vice versa #1

Merged
merged 3 commits into from
Jun 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions tdigest.c
Original file line number Diff line number Diff line change
Expand Up @@ -1689,16 +1689,15 @@ tdigest_in(PG_FUNCTION_ARGS)
tdigest_t *digest = NULL;

/* t-digest header fields */
int32 flags;
int64 count;
int compression;
int ncentroids;
char *centroids;
int header_length;
char *ptr;

centroids = palloc(strlen(str));

r = sscanf(str, "count %ld compression %d centroids %d%s",
&count, &compression, &ncentroids, centroids);
r = sscanf(str, "flags %d count %ld compression %d centroids %d%n",
&flags, &count, &compression, &ncentroids, &header_length);

if (r != 4)
elog(ERROR, "failed to parse t-digest value");
Expand All @@ -1725,11 +1724,12 @@ tdigest_in(PG_FUNCTION_ARGS)

digest = tdigest_allocate(ncentroids);

digest->flags = flags;
digest->count = count;
digest->ncentroids = ncentroids;
digest->compression = compression;

ptr = centroids;
ptr = str + header_length;

for (i = 0; i < digest->ncentroids; i++)
{
Expand All @@ -1750,9 +1750,7 @@ tdigest_in(PG_FUNCTION_ARGS)
ptr = strchr(ptr, ')') + 1;
}

Assert(ptr == centroids + strlen(centroids));

pfree(centroids);
Assert(ptr == str + strlen(str));

AssertCheckTDigest(digest);

Expand Down Expand Up @@ -1803,7 +1801,7 @@ tdigest_recv(PG_FUNCTION_ARGS)
if (flags != 0)
elog(ERROR, "unsupported t-digest on-disk format");

count = pq_getmsgint(buf, sizeof(int64));
count = pq_getmsgint64(buf);
compression = pq_getmsgint(buf, sizeof(int32));
ncentroids = pq_getmsgint(buf, sizeof(int32));

Expand All @@ -1817,7 +1815,7 @@ tdigest_recv(PG_FUNCTION_ARGS)
for (i = 0; i < digest->ncentroids; i++)
{
digest->centroids[i].sum = pq_getmsgfloat8(buf);
digest->centroids[i].count = pq_getmsgint(buf, sizeof(int64));
digest->centroids[i].count = pq_getmsgint64(buf);
}

PG_RETURN_POINTER(digest);
Expand Down
62 changes: 62 additions & 0 deletions test/expected/tdigest.out
Original file line number Diff line number Diff line change
Expand Up @@ -1196,3 +1196,65 @@ SELECT * FROM (
---+---+---
(0 rows)

-- some basic tests to verify transforming from and to text work
-- 10 centroids (tiny)
WITH data AS (SELECT i / 1000000.0 AS x FROM generate_series(1,1000000) s(i)),
intermediate AS (SELECT tdigest(x, 10)::text AS intermediate_x FROM data),
tdigest_parsed AS (SELECT tdigest_percentile(intermediate_x::tdigest, ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS a FROM intermediate),
pg_percentile AS (SELECT percentile_cont(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) WITHIN GROUP (ORDER BY x) AS b FROM data)
SELECT
p,
abs(a - b) < 0.01, -- arbitrary threshold of 1%
(CASE WHEN abs(a - b) < 0.01 THEN NULL ELSE (a - b) END) AS err
FROM (
SELECT
unnest(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS p,
unnest(a) AS a,
unnest(b) AS b
FROM tdigest_parsed,
pg_percentile
) foo;
p | ?column? | err
------+----------+-----
0.01 | t |
0.05 | t |
0.1 | t |
0.9 | t |
0.95 | t |
0.99 | t |
(6 rows)

-- verify we can store tdigest in a summary table
CREATE TABLE intermediate_tdigest (grouping int, summary tdigest);
WITH data AS (SELECT row_number() OVER () AS i, pow(z, 4) AS x FROM random_normal(1000000) s(z))
INSERT INTO intermediate_tdigest
SELECT
i % 10 AS grouping,
tdigest(x, 100) AS summary
FROM data
GROUP BY i % 10;
WITH data AS (SELECT pow(z, 4) AS x FROM random_normal(1000000) s(z)),
intermediate AS (SELECT tdigest_percentile(summary, ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS a FROM intermediate_tdigest),
pg_percentile AS (SELECT percentile_cont(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) WITHIN GROUP (ORDER BY x) AS b FROM data)
SELECT
p,
abs(a - b) < 0.01, -- arbitrary threshold of 1%
(CASE WHEN abs(a - b) < 0.01 THEN NULL ELSE (a - b) END) AS err
FROM (
SELECT
unnest(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS p,
unnest(a) AS a,
unnest(b) AS b
FROM intermediate,
pg_percentile
) foo;
p | ?column? | err
------+----------+-----
0.01 | t |
0.05 | t |
0.1 | t |
0.9 | t |
0.95 | t |
0.99 | t |
(6 rows)

46 changes: 46 additions & 0 deletions test/sql/tdigest.sql
Original file line number Diff line number Diff line change
Expand Up @@ -892,3 +892,49 @@ SELECT * FROM (
unnest(tdigest_percentile(x, 1000, (SELECT p FROM perc))) AS a
FROM data
) foo ) bar WHERE a <= b;

-- some basic tests to verify transforming from and to text work
-- 10 centroids (tiny)
WITH data AS (SELECT i / 1000000.0 AS x FROM generate_series(1,1000000) s(i)),
intermediate AS (SELECT tdigest(x, 10)::text AS intermediate_x FROM data),
tdigest_parsed AS (SELECT tdigest_percentile(intermediate_x::tdigest, ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS a FROM intermediate),
pg_percentile AS (SELECT percentile_cont(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) WITHIN GROUP (ORDER BY x) AS b FROM data)
SELECT
p,
abs(a - b) < 0.01, -- arbitrary threshold of 1%
(CASE WHEN abs(a - b) < 0.01 THEN NULL ELSE (a - b) END) AS err
FROM (
SELECT
unnest(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS p,
unnest(a) AS a,
unnest(b) AS b
FROM tdigest_parsed,
pg_percentile
) foo;

-- verify we can store tdigest in a summary table
CREATE TABLE intermediate_tdigest (grouping int, summary tdigest);

WITH data AS (SELECT row_number() OVER () AS i, pow(z, 4) AS x FROM random_normal(1000000) s(z))
INSERT INTO intermediate_tdigest
SELECT
i % 10 AS grouping,
tdigest(x, 100) AS summary
FROM data
GROUP BY i % 10;

WITH data AS (SELECT pow(z, 4) AS x FROM random_normal(1000000) s(z)),
intermediate AS (SELECT tdigest_percentile(summary, ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS a FROM intermediate_tdigest),
pg_percentile AS (SELECT percentile_cont(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) WITHIN GROUP (ORDER BY x) AS b FROM data)
SELECT
p,
abs(a - b) < 0.01, -- arbitrary threshold of 1%
(CASE WHEN abs(a - b) < 0.01 THEN NULL ELSE (a - b) END) AS err
FROM (
SELECT
unnest(ARRAY[0.01, 0.05, 0.1, 0.9, 0.95, 0.99]) AS p,
unnest(a) AS a,
unnest(b) AS b
FROM intermediate,
pg_percentile
) foo;