Skip to content

Commit

Permalink
[BACKPORT 2.20][#20831] YSQL: Show distinct prefix keys explicitly in…
Browse files Browse the repository at this point in the history
… the explain output

Summary:
Original commit: e75e20d / D33042

Before this change, the explain output looks as follows

```
yugabyte=# explain select distinct  col_int_key from mm;
                                            QUERY PLAN
---------------------------------------------------------------------------------------------------
 Unique  (cost=0.00..12.00 rows=93 width=4)
   ->  Distinct Index Only Scan using idx_mm_col_int_key on mm  (cost=0.00..12.00 rows=93 width=4)
         Distinct Prefix: 1
(3 rows)
```

We displayed the length of the distinct prefix because that is the sole parameter necessary for the HybridScan on the DocDB side.

However, users may find it more useful if we instead displayed the keys corresponding to the index columns instead. The new output looks

```
yugabyte=# explain select distinct k1 from t;
                                    QUERY PLAN
----------------------------------------------------------------------------------
 Unique  (cost=0.00..22.80 rows=200 width=4)
   ->  Distinct Index Scan using t_pkey on t  (cost=0.00..22.80 rows=200 width=4)
         Distinct Prefix Keys: k1
(3 rows)
```

Pick the prefix length number of leading columns from the list of index targets and print them in explain.
Also, change the regression test output to reflect the change.
Jira: DB-9820

Test Plan:
Jenkins
./yb_build.sh --java-test TestPgRegressDistinctPushdown

Reviewers: tnayak, smishra

Reviewed By: tnayak

Subscribers: yql

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D33234
  • Loading branch information
pao214 committed Mar 17, 2024
1 parent 8595d65 commit 30d1b69
Show file tree
Hide file tree
Showing 7 changed files with 414 additions and 116 deletions.
66 changes: 55 additions & 11 deletions src/postgres/src/backend/commands/explain.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ static void
YbAggregateExplainableRPCRequestStat(ExplainState *es,
const YbInstrumentation *instr);
static void YbExplainDistinctPrefixLen(
int yb_distinct_prefixlen, ExplainState *es);
PlanState *planstate, List *indextlist, int yb_distinct_prefixlen,
ExplainState *es, List *ancestors);

typedef enum YbStatLabel
{
Expand Down Expand Up @@ -2391,7 +2392,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
* that's currently the order of operations in DocDB.
*/
YbExplainDistinctPrefixLen(
((IndexScan *) plan)->yb_distinct_prefixlen, es);
planstate, ((IndexScan *) plan)->indextlist,
((IndexScan *) plan)->yb_distinct_prefixlen, es, ancestors);
show_scan_qual(((IndexScan *) plan)->yb_idx_pushdown.quals,
"Remote Index Filter", planstate, ancestors, es);
show_scan_qual(((IndexScan *) plan)->yb_rel_pushdown.quals,
Expand All @@ -2405,10 +2407,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (es->verbose && yb_enable_base_scans_cost_model)
{
ExplainPropertyFloat(
"Estimated Seeks", NULL,
"Estimated Seeks", NULL,
((IndexScan *) plan)->estimated_num_seeks, 0, es);
ExplainPropertyFloat(
"Estimated Nexts", NULL,
"Estimated Nexts", NULL,
((IndexScan *) plan)->estimated_num_nexts, 0, es);
}
break;
Expand All @@ -2426,7 +2428,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
* that's currently the order of operations in DocDB.
*/
YbExplainDistinctPrefixLen(
((IndexOnlyScan *) plan)->yb_distinct_prefixlen, es);
planstate, ((IndexOnlyScan *) plan)->indextlist,
((IndexOnlyScan *) plan)->yb_distinct_prefixlen, es, ancestors);
/*
* Remote filter is applied first, so it is output first.
*/
Expand All @@ -2444,10 +2447,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (es->verbose && yb_enable_base_scans_cost_model)
{
ExplainPropertyFloat(
"Estimated Seeks", NULL,
"Estimated Seeks", NULL,
((IndexOnlyScan *) plan)->estimated_num_seeks, 0, es);
ExplainPropertyFloat(
"Estimated Nexts", NULL,
"Estimated Nexts", NULL,
((IndexOnlyScan *) plan)->estimated_num_nexts, 0, es);
}
break;
Expand Down Expand Up @@ -4959,15 +4962,56 @@ YbAggregateExplainableRPCRequestStat(ExplainState *es,
* --------------
* Distinct Index Scan
* ...
* Distinct Prefix: <prefix length>
* Distinct Keys: <Index Prefix Keys>
* ...
*
* Adds Distinct Prefix to explain info
*/
static void
YbExplainDistinctPrefixLen(int yb_distinct_prefixlen, ExplainState *es)
YbExplainDistinctPrefixLen(PlanState *planstate, List *indextlist,
int yb_distinct_prefixlen, ExplainState *es,
List *ancestors)
{
if (yb_distinct_prefixlen > 0)
ExplainPropertyInteger(
"Distinct Prefix", NULL, yb_distinct_prefixlen, es);
{
/* Print distinct prefix keys. */
List *context;
List *result = NIL;
StringInfoData distinct_prefix_key_buf;
bool useprefix;
int keyno;
ListCell *tlelc;

initStringInfo(&distinct_prefix_key_buf);

/* Set up deparsing context */
context = set_deparse_context_planstate(es->deparse_cxt,
(Node *) planstate,
ancestors);
useprefix = (list_length(es->rtable) > 1 || es->verbose);

keyno = 0;
foreach(tlelc, indextlist)
{
TargetEntry *indextle;
char *exprstr;

if (keyno >= yb_distinct_prefixlen)
break;

indextle = (TargetEntry *) lfirst(tlelc);

/* Deparse the expression, showing any top-level cast */
exprstr = deparse_expression((Node *) indextle->expr, context,
useprefix, true);
resetStringInfo(&distinct_prefix_key_buf);
appendStringInfoString(&distinct_prefix_key_buf, exprstr);
/* Emit one property-list item per key */
result = lappend(result, pstrdup(distinct_prefix_key_buf.data));

keyno++;
}

ExplainPropertyList("Distinct Keys", result, es);
}
}
2 changes: 1 addition & 1 deletion src/postgres/src/test/regress/expected/yb_aggregates.out
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ EXPLAIN (COSTS OFF) SELECT DISTINCT int_8 FROM ybaggtest;
HashAggregate
Group Key: int_8
-> Distinct Index Only Scan using ybaggtestindex on ybaggtest
Distinct Prefix: 2
Distinct Keys: int_8, int_2
(4 rows)

EXPLAIN (COSTS OFF) SELECT COUNT(distinct int_4), SUM(int_4) FROM ybaggtest;
Expand Down
254 changes: 254 additions & 0 deletions src/postgres/src/test/regress/expected/yb_bitmap_scans_distinct.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
--
-- test distinct bitmap scans (distinct pushdown is not supported by bitmap scans)
--
SET yb_explain_hide_non_deterministic_fields = true;
CREATE TABLE test_distinct (r1 INT, r2 INT, r3 INT, v INT, PRIMARY KEY(r1 ASC, r2 ASC, r3 ASC)) SPLIT AT VALUES ((1, 1, 500));
INSERT INTO test_distinct (SELECT 1, i%3, i, i/3 FROM GENERATE_SERIES(1, 1000) AS i);
-- Add one more distinct value to catch bugs that arise only with more than one distinct value.
INSERT INTO test_distinct (SELECT 2, i%3, i, i/3 FROM GENERATE_SERIES(1, 1000) AS i);
SET yb_enable_distinct_pushdown = true;
SET enable_bitmapscan = false;
EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
QUERY PLAN
---------------------------------------------------------------------------------------------
Unique (actual rows=1 loops=1)
-> Distinct Index Scan using test_distinct_pkey on test_distinct (actual rows=2 loops=1)
Index Cond: (r1 < 2)
Distinct Keys: r1
Storage Table Read Requests: 2
Storage Table Rows Scanned: 2
(6 rows)

SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
r1
----
1
(1 row)

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
QUERY PLAN
---------------------------------------------------------------------------------------------
Unique (actual rows=6 loops=1)
-> Distinct Index Scan using test_distinct_pkey on test_distinct (actual rows=7 loops=1)
Distinct Keys: r1, r2
Storage Filter: ((r1 < 2) OR (r2 < 3))
(4 rows)

SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
r1 | r2
----+----
1 | 0
1 | 1
1 | 2
2 | 0
2 | 1
2 | 2
(6 rows)

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 OR r3 < 2 ORDER BY r1, r2;
QUERY PLAN
------------------------------------------------------------------
Sort (actual rows=6 loops=1)
Sort Key: r1, r2
Sort Method: quicksort
-> HashAggregate (actual rows=6 loops=1)
Group Key: r1, r2
-> Seq Scan on test_distinct (actual rows=2000 loops=1)
Storage Filter: ((r1 < 2) OR (r2 < 3) OR (r3 < 2))
(7 rows)

SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 OR r3 < 2 ORDER BY r1, r2;
r1 | r2
----+----
1 | 0
1 | 1
1 | 2
2 | 0
2 | 1
2 | 2
(6 rows)

RESET enable_bitmapscan;
/*+ BitmapScan(test_distinct) */ EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
QUERY PLAN
--------------------------------------------------------------------------------------
Unique (actual rows=1 loops=1)
-> Sort (actual rows=1000 loops=1)
Sort Key: r1
Sort Method: quicksort
-> YB Bitmap Table Scan on test_distinct (actual rows=1000 loops=1)
Storage Table Read Requests: 1
Storage Table Rows Scanned: 1000
-> Bitmap Index Scan on test_distinct_pkey (actual rows=1000 loops=1)
Index Cond: (r1 < 2)
Storage Table Read Requests: 2
Storage Table Rows Scanned: 1000
(11 rows)

/*+ BitmapScan(test_distinct) */
SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
r1
----
1
(1 row)

/*+ BitmapScan(test_distinct) */ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
QUERY PLAN
--------------------------------------------------------------------------------------------
Unique (actual rows=6 loops=1)
-> Sort (actual rows=2000 loops=1)
Sort Key: r1, r2
Sort Method: quicksort
-> YB Bitmap Table Scan on test_distinct (actual rows=2000 loops=1)
-> BitmapOr (actual rows=2000 loops=1)
-> Bitmap Index Scan on test_distinct_pkey (actual rows=1000 loops=1)
Index Cond: (r1 < 2)
-> Bitmap Index Scan on test_distinct_pkey (actual rows=2000 loops=1)
Index Cond: (r2 < 3)
(10 rows)

/*+ BitmapScan(test_distinct) */
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
r1 | r2
----+----
1 | 0
1 | 1
1 | 2
2 | 0
2 | 1
2 | 2
(6 rows)

/*+ BitmapScan(test_distinct) */ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 OR r3 < 2 ORDER BY r1, r2;
QUERY PLAN
--------------------------------------------------------------------------------------------
Unique (actual rows=6 loops=1)
-> Sort (actual rows=2000 loops=1)
Sort Key: r1, r2
Sort Method: quicksort
-> YB Bitmap Table Scan on test_distinct (actual rows=2000 loops=1)
-> BitmapOr (actual rows=2000 loops=1)
-> Bitmap Index Scan on test_distinct_pkey (actual rows=1000 loops=1)
Index Cond: (r1 < 2)
-> Bitmap Index Scan on test_distinct_pkey (actual rows=2000 loops=1)
Index Cond: (r2 < 3)
-> Bitmap Index Scan on test_distinct_pkey (actual rows=2 loops=1)
Index Cond: (r3 < 2)
(12 rows)

/*+ BitmapScan(test_distinct) */
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 OR r3 < 2 ORDER BY r1, r2;
r1 | r2
----+----
1 | 0
1 | 1
1 | 2
2 | 0
2 | 1
2 | 2
(6 rows)

SET yb_enable_distinct_pushdown TO false;
SET enable_bitmapscan = false;
EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
QUERY PLAN
---------------------------------------------------------------------------------------
Unique (actual rows=1 loops=1)
-> Index Scan using test_distinct_pkey on test_distinct (actual rows=1000 loops=1)
Index Cond: (r1 < 2)
Storage Table Read Requests: 2
Storage Table Rows Scanned: 1000
(5 rows)

SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
r1
----
1
(1 row)

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
QUERY PLAN
------------------------------------------------------------------
Sort (actual rows=6 loops=1)
Sort Key: r1, r2
Sort Method: quicksort
-> HashAggregate (actual rows=6 loops=1)
Group Key: r1, r2
-> Seq Scan on test_distinct (actual rows=2000 loops=1)
Storage Filter: ((r1 < 2) OR (r2 < 3))
(7 rows)

SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
r1 | r2
----+----
1 | 0
1 | 1
1 | 2
2 | 0
2 | 1
2 | 2
(6 rows)

RESET enable_bitmapscan;
/*+ BitmapScan(test_distinct) */ EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
QUERY PLAN
--------------------------------------------------------------------------------------
Unique (actual rows=1 loops=1)
-> Sort (actual rows=1000 loops=1)
Sort Key: r1
Sort Method: quicksort
-> YB Bitmap Table Scan on test_distinct (actual rows=1000 loops=1)
Storage Table Read Requests: 1
Storage Table Rows Scanned: 1000
-> Bitmap Index Scan on test_distinct_pkey (actual rows=1000 loops=1)
Index Cond: (r1 < 2)
Storage Table Read Requests: 2
Storage Table Rows Scanned: 1000
(11 rows)

/*+ BitmapScan(test_distinct) */
SELECT DISTINCT r1 FROM test_distinct WHERE r1 < 2 ORDER BY r1;
r1
----
1
(1 row)

/*+ BitmapScan(test_distinct) */ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF)
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
QUERY PLAN
--------------------------------------------------------------------------------------------
Unique (actual rows=6 loops=1)
-> Sort (actual rows=2000 loops=1)
Sort Key: r1, r2
Sort Method: quicksort
-> YB Bitmap Table Scan on test_distinct (actual rows=2000 loops=1)
-> BitmapOr (actual rows=2000 loops=1)
-> Bitmap Index Scan on test_distinct_pkey (actual rows=1000 loops=1)
Index Cond: (r1 < 2)
-> Bitmap Index Scan on test_distinct_pkey (actual rows=2000 loops=1)
Index Cond: (r2 < 3)
(10 rows)

/*+ BitmapScan(test_distinct) */
SELECT DISTINCT r1, r2 FROM test_distinct WHERE r1 < 2 OR r2 < 3 ORDER BY r1, r2;
r1 | r2
----+----
1 | 0
1 | 1
1 | 2
2 | 0
2 | 1
2 | 2
(6 rows)

RESET yb_enable_distinct_pushdown;
DROP TABLE test_distinct;
Loading

0 comments on commit 30d1b69

Please sign in to comment.