Skip to content

Commit

Permalink
Optimize by rewriting declarative pure SQL code into imperative PL/pgSQL
Browse files Browse the repository at this point in the history
PostgreSQL's SQL language isn't terribly well suited to execute
a fundamentally stepwise, imperative algorithm like CBOR.

Rather than hacking up cute tricks with LATERAL, we should just use
a language that *is* well suited, a PL, like PL/pgSQL.

-- Pure SQL (before):
select * from pg_stat_xact_user_functions ;
 schemaname |  funcname  | calls | total_time | self_time
------------+------------+-------+------------+-----------
 cbor       | next_item  |    14 |   48.91024 | 38.964918
 cbor       | next_array |     1 |   7.297435 |  1.816102
 cbor       | next_map   |     2 |  40.844352 |    7.8957
 cbor       | to_jsonb   |     1 |  50.222183 |  1.311943

-- PL/pgSQL (after):
select * from pg_stat_xact_user_functions ;
 schemaname |   funcname   | calls | total_time | self_time
------------+--------------+-------+------------+-----------
 cbor       | next_item    |    14 |   8.021371 |  3.358271
 cbor       | next_array   |     1 |   0.565398 |  0.353071
 cbor       | next_map     |     2 |   5.607702 |  1.324057
 cbor       | to_jsonb     |     1 |   8.823691 |   0.80232
  • Loading branch information
joelonsql committed Dec 22, 2020
1 parent 5a71b4f commit 7ea7640
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 86 deletions.
23 changes: 23 additions & 0 deletions FUNCTIONS/major_type_0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE OR REPLACE FUNCTION cbor.major_type_0(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type <= 27 THEN
RETURN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(data_value));
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSIF additional_type = 31 THEN
RAISE EXCEPTION 'additional information 31 used with major type 0';
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 0, additional_type;
END IF;
END;
$$;
23 changes: 23 additions & 0 deletions FUNCTIONS/major_type_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE OR REPLACE FUNCTION cbor.major_type_1(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type <= 27 THEN
RETURN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(-1-data_value));
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSIF additional_type = 31 THEN
RAISE EXCEPTION 'additional information 31 used with major type 1';
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 1, additional_type;
END IF;
END;
$$;
23 changes: 23 additions & 0 deletions FUNCTIONS/major_type_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE OR REPLACE FUNCTION cbor.major_type_2(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type <= 27 THEN
RETURN ROW(substring(cbor,2+length_bytes+data_value::integer), pg_catalog.to_jsonb(cbor.encode(substring(cbor,2+length_bytes,data_value::integer),encode_binary_format)));
ELSIF additional_type = 31 THEN
RETURN cbor.next_indefinite_byte_string(substring(cbor,2), encode_binary_format);
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 2, additional_type;
END IF;
END;
$$;
23 changes: 23 additions & 0 deletions FUNCTIONS/major_type_3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE OR REPLACE FUNCTION cbor.major_type_3(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type <= 27 THEN
RETURN ROW(substring(cbor,2+length_bytes+data_value::integer), pg_catalog.to_jsonb(convert_from(substring(cbor,2+length_bytes,data_value::integer),'utf8')));
ELSIF additional_type = 31 THEN
RETURN cbor.next_indefinite_text_string(substring(cbor,2), encode_binary_format);
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 3, additional_type;
END IF;
END;
$$;
23 changes: 23 additions & 0 deletions FUNCTIONS/major_type_4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE OR REPLACE FUNCTION cbor.major_type_4(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type <= 27 THEN
RETURN cbor.next_array(substring(cbor,2+length_bytes), data_value::integer, encode_binary_format);
ELSIF additional_type = 31 THEN
RETURN cbor.next_indefinite_array(substring(cbor,2), encode_binary_format);
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 4, additional_type;
END IF;
END;
$$;
23 changes: 23 additions & 0 deletions FUNCTIONS/major_type_5.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE OR REPLACE FUNCTION cbor.major_type_5(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type <= 27 THEN
RETURN cbor.next_map(substring(cbor,2+length_bytes), data_value::integer, encode_binary_format);
ELSIF additional_type = 31 THEN
RETURN cbor.next_indefinite_map(substring(cbor,2), encode_binary_format);
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 5, additional_type;
END IF;
END;
$$;
40 changes: 40 additions & 0 deletions FUNCTIONS/major_type_6.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
CREATE OR REPLACE FUNCTION cbor.major_type_6(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type = 2 THEN
RETURN (
SELECT
ROW(tag_item.remainder, pg_catalog.to_jsonb(cbor.bytea_to_numeric(decode(tag_item.item#>>'{}','hex'))))
FROM cbor.next_item(substring(cbor,2), encode_binary_format) AS tag_item
);
ELSIF additional_type = 3 THEN
RETURN (
SELECT ROW(tag_item.remainder, pg_catalog.to_jsonb(-1-cbor.bytea_to_numeric(decode(tag_item.item#>>'{}','hex'))))
FROM cbor.next_item(substring(cbor,2), encode_binary_format) AS tag_item
);
ELSIF additional_type = 21 THEN
RETURN cbor.next_item(substring(cbor,2), 'base64url');
ELSIF additional_type = 22 THEN
RETURN cbor.next_item(substring(cbor,2), 'base64');
ELSIF additional_type = 23 THEN
RETURN cbor.next_item(substring(cbor,2), 'hex');
ELSIF additional_type = ANY(ARRAY[0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,24,25,26,27]) THEN
RETURN cbor.next_tag(substring(cbor,2+length_bytes), data_value, encode_binary_format);
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSIF additional_type = 31 THEN
RAISE EXCEPTION 'additional information 31 used with major type 6';
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 6, additional_type;
END IF;
END;
$$;
43 changes: 43 additions & 0 deletions FUNCTIONS/major_type_7.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
CREATE OR REPLACE FUNCTION cbor.major_type_7(
cbor bytea,
encode_binary_format text,
additional_type integer,
length_bytes integer,
data_value numeric
)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE plpgsql
AS $$
BEGIN
IF additional_type = 20 THEN
RETURN ROW(substring(cbor,2), pg_catalog.to_jsonb(false));
ELSIF additional_type = 21 THEN
RETURN ROW(substring(cbor,2), pg_catalog.to_jsonb(true));
ELSIF additional_type = 22 THEN
RETURN ROW(substring(cbor,2), 'null'::jsonb);
ELSIF additional_type = 25 THEN
RETURN cbor.next_float_half(substring(cbor,2));
ELSIF additional_type = 26 THEN
RETURN cbor.next_float_single(substring(cbor,2));
ELSIF additional_type = 27 THEN
RETURN cbor.next_float_double(substring(cbor,2));
ELSIF additional_type = 23 THEN
RETURN ROW(substring(cbor,2), cbor.undefined_value());
ELSIF additional_type = 24 AND data_value >= 32 THEN
RETURN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(data_value));
ELSIF additional_type <= 19 THEN
RETURN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(data_value));
ELSIF additional_type > 27 AND additional_type < 31 THEN
RETURN ROW(substring(cbor,2), cbor.substitute_value(substring(cbor,2), 7, additional_type));
ELSIF additional_type = 31 THEN
RAISE EXCEPTION '"break" stop code appeared where a data item is expected, the enclosing item is not well-formed';
ELSIF additional_type = 24 AND data_value < 32 THEN
RAISE EXCEPTION 'major type 7, additional information 24, data_value(%) < 32 (incorrect)', data_value;
ELSIF additional_type >= 28 AND additional_type <= 30 THEN
RAISE EXCEPTION 'a reserved value is used for additional information(%)', additional_type;
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', 7, additional_type;
END IF;
END;
$$;
109 changes: 36 additions & 73 deletions FUNCTIONS/next_item.sql
Original file line number Diff line number Diff line change
@@ -1,79 +1,42 @@
CREATE OR REPLACE FUNCTION cbor.next_item(cbor bytea, encode_binary_format text)
RETURNS cbor.next_state
IMMUTABLE
LANGUAGE sql
LANGUAGE plpgsql
AS $$
WITH
data_item_header(major_type,additional_type) AS MATERIALIZED (
DECLARE
major_type constant integer := (get_byte(cbor,0) >> 5) & '111'::bit(3)::integer;
additional_type constant integer := get_byte(cbor,0) & '11111'::bit(5)::integer;
length_bytes constant integer := NULLIF(LEAST(floor(2 ^ (additional_type - 24))::integer,16),16);
data_value numeric := 0;
BEGIN
IF additional_type <= 23 THEN
data_value := additional_type::numeric;
ELSIF additional_type BETWEEN 24 AND 27 THEN
/*
FOR byte_pos IN 1..length_bytes LOOP
data_value := data_value + get_byte(cbor,byte_pos) * 2::numeric^(8*(length_bytes-byte_pos));
END LOOP;
data_value := floor(data_value);
*/
SELECT
(get_byte(cbor,0) >> 5) & '111'::bit(3)::integer,
get_byte(cbor,0) & '11111'::bit(5)::integer
),
calc_length_bytes(length_bytes) AS MATERIALIZED (
SELECT
NULLIF(LEAST(floor(2 ^ (additional_type - 24))::integer,16),16)
FROM data_item_header
),
calc_data_value(data_value) AS MATERIALIZED (
SELECT
CASE
WHEN additional_type <= 23 THEN
additional_type::numeric
WHEN additional_type BETWEEN 24 AND 27 THEN (
SELECT
floor(SUM(get_byte(cbor,byte_pos) * 2::numeric^(8*(length_bytes-byte_pos))))
FROM calc_length_bytes
CROSS JOIN generate_series(1,length_bytes) AS byte_pos
)
END
FROM data_item_header
)
SELECT
CASE
WHEN major_type = 0 AND additional_type <= 27 THEN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(data_value))::cbor.next_state
WHEN major_type = 1 AND additional_type <= 27 THEN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(-1-data_value))::cbor.next_state
WHEN major_type = 2 AND additional_type <= 27 THEN ROW(substring(cbor,2+length_bytes+data_value::integer), pg_catalog.to_jsonb(cbor.encode(substring(cbor,2+length_bytes,data_value::integer),encode_binary_format)))::cbor.next_state
WHEN major_type = 2 AND additional_type = 31 THEN cbor.next_indefinite_byte_string(substring(cbor,2), encode_binary_format)
WHEN major_type = 3 AND additional_type <= 27 THEN ROW(substring(cbor,2+length_bytes+data_value::integer), pg_catalog.to_jsonb(convert_from(substring(cbor,2+length_bytes,data_value::integer),'utf8')))::cbor.next_state
WHEN major_type = 3 AND additional_type = 31 THEN cbor.next_indefinite_text_string(substring(cbor,2), encode_binary_format)
WHEN major_type = 4 AND additional_type <= 27 THEN cbor.next_array(substring(cbor,2+length_bytes), data_value::integer, encode_binary_format)
WHEN major_type = 4 AND additional_type = 31 THEN cbor.next_indefinite_array(substring(cbor,2), encode_binary_format)
WHEN major_type = 5 AND additional_type <= 27 THEN cbor.next_map(substring(cbor,2+length_bytes), data_value::integer, encode_binary_format)
WHEN major_type = 5 AND additional_type = 31 THEN cbor.next_indefinite_map(substring(cbor,2), encode_binary_format)
WHEN major_type = 6 AND additional_type = 2 THEN (SELECT ROW(tag_item.remainder, pg_catalog.to_jsonb(cbor.bytea_to_numeric(decode(tag_item.item#>>'{}','hex'))))::cbor.next_state FROM cbor.next_item(substring(cbor,2), encode_binary_format) AS tag_item)
WHEN major_type = 6 AND additional_type = 3 THEN (SELECT ROW(tag_item.remainder, pg_catalog.to_jsonb(-1-cbor.bytea_to_numeric(decode(tag_item.item#>>'{}','hex'))))::cbor.next_state FROM cbor.next_item(substring(cbor,2), encode_binary_format) AS tag_item)
WHEN major_type = 6 AND additional_type = 21 THEN cbor.next_item(substring(cbor,2), 'base64url')
WHEN major_type = 6 AND additional_type = 22 THEN cbor.next_item(substring(cbor,2), 'base64')
WHEN major_type = 6 AND additional_type = 23 THEN cbor.next_item(substring(cbor,2), 'hex')
WHEN major_type = 7 AND additional_type <= 19 THEN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(data_value))::cbor.next_state
WHEN major_type = 7 AND additional_type = 20 THEN ROW(substring(cbor,2), pg_catalog.to_jsonb(false))::cbor.next_state
WHEN major_type = 7 AND additional_type = 21 THEN ROW(substring(cbor,2), pg_catalog.to_jsonb(true))::cbor.next_state
WHEN major_type = 7 AND additional_type = 22 THEN ROW(substring(cbor,2), 'null'::jsonb)::cbor.next_state
WHEN major_type = 7 AND additional_type = 23 THEN ROW(substring(cbor,2), cbor.undefined_value())::cbor.next_state
WHEN major_type = 7 AND additional_type = 24
AND data_value >= 32 THEN ROW(substring(cbor,2+length_bytes), pg_catalog.to_jsonb(data_value))::cbor.next_state
WHEN major_type = 7 AND additional_type = 25 THEN cbor.next_float_half(substring(cbor,2))
WHEN major_type = 7 AND additional_type = 26 THEN cbor.next_float_single(substring(cbor,2))
WHEN major_type = 7 AND additional_type = 27 THEN cbor.next_float_double(substring(cbor,2))
WHEN major_type = 7 AND additional_type > 27
AND additional_type < 31 THEN ROW(substring(cbor,2), cbor.substitute_value(substring(cbor,2), major_type, additional_type))::cbor.next_state
WHEN major_type = 7 AND additional_type = 31 THEN cbor.raise('"break" stop code appeared where a data item is expected, the enclosing item is not well-formed',NULL,NULL::cbor.next_state)
WHEN major_type = 7
AND additional_type = 24
AND data_value < 32
THEN cbor.raise('major type 7, additional information 24, value < 32 (incorrect)',json_build_object('data_value',data_value),NULL::cbor.next_state)
WHEN additional_type >= 28
AND additional_type <= 30
THEN cbor.raise('a reserved value is used for additional information (28, 29, 30)',json_build_object('major_type',major_type,'additional_type',additional_type),NULL::cbor.next_state)
WHEN major_type = ANY(ARRAY[0,1,6])
AND additional_type = 31
THEN cbor.raise(format('additional information 31 used with major type %s',major_type),NULL,NULL::cbor.next_state)
WHEN major_type = 6
AND additional_type = ANY(ARRAY[0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,24,25,26,27])
THEN cbor.next_tag(substring(cbor,2+length_bytes), data_value, encode_binary_format)
ELSE cbor.raise('not implemented',json_build_object('major_type',major_type,'additional_type',additional_type),NULL::cbor.next_state)
END::cbor.next_state
FROM data_item_header
CROSS JOIN calc_length_bytes
CROSS JOIN calc_data_value
floor(SUM(get_byte(cbor,byte_pos) * 2::numeric^(8*(length_bytes-byte_pos))))
INTO data_value
FROM generate_series(1,length_bytes) AS byte_pos;
END IF;
--
-- Sorted by observed frequency from real-life WebAuthn examples
-- to hit the matching case as early as possible.
--
IF major_type = 3 THEN RETURN cbor.major_type_3(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSIF major_type = 5 THEN RETURN cbor.major_type_5(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSIF major_type = 1 THEN RETURN cbor.major_type_1(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSIF major_type = 2 THEN RETURN cbor.major_type_2(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSIF major_type = 4 THEN RETURN cbor.major_type_4(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSIF major_type = 0 THEN RETURN cbor.major_type_0(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSIF major_type = 6 THEN RETURN cbor.major_type_6(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSIF major_type = 7 THEN RETURN cbor.major_type_7(cbor,encode_binary_format,additional_type,length_bytes,data_value);
ELSE
RAISE EXCEPTION 'not implemented, major_type %, additional_type %', major_type, additional_type;
END IF;
END;
$$;
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ SQL_SRC = \
FUNCTIONS/next_float_half.sql \
FUNCTIONS/next_float_single.sql \
FUNCTIONS/next_float_double.sql \
FUNCTIONS/major_type_0.sql \
FUNCTIONS/major_type_1.sql \
FUNCTIONS/major_type_2.sql \
FUNCTIONS/major_type_3.sql \
FUNCTIONS/major_type_4.sql \
FUNCTIONS/major_type_5.sql \
FUNCTIONS/major_type_6.sql \
FUNCTIONS/major_type_7.sql \
FUNCTIONS/next_item.sql \
FUNCTIONS/next_array.sql \
FUNCTIONS/next_map.sql \
Expand Down
Loading

0 comments on commit 7ea7640

Please sign in to comment.