Skip to content

Commit 09d607b

Browse files
author
Jan Wedvik
committed
Bug#34787357 Hypergraph: row estimates for field=non_field_term ignores indexes and histogram [2/2]
This patch concerns selectivity estimates for the Hypergraph optimizer, for predicates like: table.field <op> <expression> where <expression> is something else than a field, and independent of 'table'. If <op> is '=' the selectivity of these predicates was previously estimated as 1.0 or 0.1, depending on the specific predicate. But if we have statistics (index or histogram) on the number of distinct values for 'field', and make the assumption that the value of <expression> is uniformly distributed over the values of 'field', we can make a better estimate as: selectivity = 1.0 / <number of distinct values> This patch makes such estimates if a suitable histogram or index can be found. The patch also makes a similar change when <op> is '<>' (i.e. not equal). Previously, we estimate the selectivity to be 1.0 or 0.9, depending on the specific predicate. And now, if we have a suitable index or histogram, we make the following estimate: predicates like: selectivity = 1.0 - 1.0 / <number of distinct values> For the '<', '>', '<=' and '>=' operators, we previously estimated the selectivity to 1.0 if <expression> was an (independent) subquery or non-deterministic. Now we use the default value for other types of expressions for these operators (i.e. 0.33). Change-Id: I34c307b12216025bedf6e995ee89a58793f59aac
1 parent 44f859b commit 09d607b

21 files changed

+789
-242
lines changed

mysql-test/include/index_merge_ror_cpk.inc

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ analyze table t1;
7777
--echo # Verify that range scan on CPK is ROR
7878
--echo # (use index_intersection because it is impossible to check that for index union)
7979
--echo # Column 9, rows, can change depending on innodb-page-size.
80-
--replace_regex $elide_costs
80+
--replace_regex $elide_costs_and_rows
8181
explain select * from t1 where pk1 = 1 and pk2 < 80 and key1=0;
8282
--echo # CPK scan + 1 ROR range scan is a special case
8383
--sorted_result

mysql-test/r/explain_json_hypergraph.result

+20-20
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ Table Op Msg_type Msg_text
99
test.t1 analyze status OK
1010
EXPLAIN FORMAT=TREE SELECT * FROM t1 WHERE f1 = ( SELECT MIN(f1) FROM t1 AS i WHERE i.f1 > t1.f1 );
1111
EXPLAIN
12-
-> Filter: (t1.f1 = (select #2)) (cost=3.1..3.1 rows=1)
12+
-> Filter: (t1.f1 = (select #2)) (cost=2.5..2.5 rows=1)
1313
-> Table scan on t1 (cost=0.0833..0.25 rows=3)
1414
-> Select #2 (subquery in condition; dependent)
15-
-> Aggregate: min(i.f1) (cost=0.85..0.85 rows=1)
16-
-> Filter: (i.f1 > t1.f1) (cost=0.183..0.55 rows=3)
15+
-> Aggregate: min(i.f1) (cost=0.65..0.65 rows=1)
16+
-> Filter: (i.f1 > t1.f1) (cost=0.55..0.55 rows=1)
1717
-> Table scan on i (cost=0.0833..0.25 rows=3)
1818

1919
Warnings:
@@ -47,9 +47,9 @@ EXPLAIN
4747
"condition": "(i.f1 > t1.f1)",
4848
"operation": "Filter: (i.f1 > t1.f1)",
4949
"access_type": "filter",
50-
"estimated_rows": 3.0,
50+
"estimated_rows": 0.9998999834060669,
5151
"estimated_total_cost": 0.55,
52-
"estimated_first_row_cost": 0.18333333333333335
52+
"estimated_first_row_cost": 0.55
5353
}
5454
],
5555
"heading": "Select #2 (subquery in condition; dependent)",
@@ -62,22 +62,22 @@ EXPLAIN
6262
"access_type": "aggregate",
6363
"estimated_rows": 1.0,
6464
"subquery_location": "condition",
65-
"estimated_total_cost": 0.8500000000000001,
66-
"estimated_first_row_cost": 0.8500000000000001
65+
"estimated_total_cost": 0.6499899983406068,
66+
"estimated_first_row_cost": 0.6499899983406068
6767
}
6868
],
6969
"condition": "(t1.f1 = (select #2))",
7070
"operation": "Filter: (t1.f1 = (select #2))",
7171
"access_type": "filter",
7272
"estimated_rows": 1.0,
73-
"estimated_total_cost": 3.1000000000000005,
74-
"estimated_first_row_cost": 3.1000000000000005
73+
"estimated_total_cost": 2.4999699950218206,
74+
"estimated_first_row_cost": 2.4999699950218206
7575
}
7676
Warnings:
7777
Note 1276 Field or reference 'test.t1.f1' of SELECT #2 was resolved in SELECT #1
7878
EXPLAIN FORMAT=TREE SELECT * FROM t1 WHERE f1 > ( SELECT f1 FROM t1 LIMIT 1 );
7979
EXPLAIN
80-
-> Filter: (t1.f1 > (select #2)) (cost=0.267..0.633 rows=3)
80+
-> Filter: (t1.f1 > (select #2)) (cost=0.633..0.633 rows=1)
8181
-> Table scan on t1 (cost=0.0833..0.25 rows=3)
8282
-> Select #2 (subquery in condition; run only once)
8383
-> Limit: 1 row(s) (cost=0.0833..0.0833 rows=1)
@@ -123,9 +123,9 @@ EXPLAIN
123123
"condition": "(t1.f1 > (select #2))",
124124
"operation": "Filter: (t1.f1 > (select #2))",
125125
"access_type": "filter",
126-
"estimated_rows": 3.0,
126+
"estimated_rows": 0.9998999834060669,
127127
"estimated_total_cost": 0.6333333333333333,
128-
"estimated_first_row_cost": 0.26666666666666666
128+
"estimated_first_row_cost": 0.6333333333333333
129129
}
130130
drop table t1;
131131
#
@@ -800,7 +800,7 @@ EXPLAIN
800800
-> Table scan on x2 (rows=1)
801801
-> Select #2 (subquery in condition; dependent)
802802
-> Aggregate: min(x3.a) (rows=1)
803-
-> Filter: (x1.a = x3.a) (rows=1)
803+
-> Filter: (x1.a = x3.a) (rows=0.1)
804804
-> Table scan on x3 (rows=1)
805805

806806
Warnings:
@@ -844,7 +844,7 @@ EXPLAIN
844844
"condition": "(x1.a = x3.a)",
845845
"operation": "Filter: (x1.a = x3.a)",
846846
"access_type": "filter",
847-
"estimated_rows": 1.0,
847+
"estimated_rows": 0.10000000149011612,
848848
"estimated_total_cost": 0.35,
849849
"estimated_first_row_cost": 0.35
850850
}
@@ -859,8 +859,8 @@ EXPLAIN
859859
"access_type": "aggregate",
860860
"estimated_rows": 1.0,
861861
"subquery_location": "condition",
862-
"estimated_total_cost": 0.44999999999999996,
863-
"estimated_first_row_cost": 0.44999999999999996
862+
"estimated_total_cost": 0.36000000014901157,
863+
"estimated_first_row_cost": 0.36000000014901157
864864
}
865865
],
866866
"join_type": "inner join",
@@ -885,7 +885,7 @@ EXPLAIN
885885
-> Table scan on x2 (rows=1)
886886
-> Select #2 (subquery in extra conditions; dependent)
887887
-> Aggregate: min(x3.a) (rows=1)
888-
-> Filter: (x1.a = x3.a) (rows=1)
888+
-> Filter: (x1.a = x3.a) (rows=0.1)
889889
-> Table scan on x3 (rows=1)
890890

891891
Warnings:
@@ -929,7 +929,7 @@ EXPLAIN
929929
"condition": "(x1.a = x3.a)",
930930
"operation": "Filter: (x1.a = x3.a)",
931931
"access_type": "filter",
932-
"estimated_rows": 1.0,
932+
"estimated_rows": 0.10000000149011612,
933933
"estimated_total_cost": 0.35,
934934
"estimated_first_row_cost": 0.35
935935
}
@@ -944,8 +944,8 @@ EXPLAIN
944944
"access_type": "aggregate",
945945
"estimated_rows": 1.0,
946946
"subquery_location": "extra conditions",
947-
"estimated_total_cost": 0.44999999999999996,
948-
"estimated_first_row_cost": 0.44999999999999996
947+
"estimated_total_cost": 0.36000000014901157,
948+
"estimated_first_row_cost": 0.36000000014901157
949949
}
950950
],
951951
"join_type": "inner join",

mysql-test/r/explain_tree_hypergraph.result

+15-15
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,32 @@ test.t1 analyze status OK
1111
EXPLAIN FORMAT=TREE SELECT * FROM t1 x1
1212
WHERE x1.b = (SELECT MAX(b) FROM t1 x2 WHERE x2.a>x1.a);
1313
EXPLAIN
14-
-> Filter: (x1.b = (select #2)) (cost=2.38..23.8 rows=10)
14+
-> Filter: (x1.b = (select #2)) (cost=1.71..17.1 rows=10)
1515
-> Table scan on x1 (cost=0.025..0.25 rows=10)
1616
-> Select #2 (subquery in condition; dependent)
17-
-> Aggregate: max(x2.b) (cost=2.25..2.25 rows=1)
18-
-> Filter: (x2.a > x1.a) (cost=0.125..1.25 rows=10)
17+
-> Aggregate: max(x2.b) (cost=1.58..1.58 rows=1)
18+
-> Filter: (x2.a > x1.a) (cost=0.375..1.25 rows=3.33)
1919
-> Table scan on x2 (cost=0.025..0.25 rows=10)
2020

2121
Warnings:
2222
Note 1276 Field or reference 'test.x1.a' of SELECT #2 was resolved in SELECT #1
2323
EXPLAIN FORMAT=TREE SELECT * FROM t1 x1
2424
WHERE x1.b IN (SELECT MAX(b) FROM t1 x2 WHERE x2.a>x1.a);
2525
EXPLAIN
26-
-> Filter: <in_optimizer>(x1.b,<exists>(select #2)) (cost=2.48..24.8 rows=10)
26+
-> Filter: <in_optimizer>(x1.b,<exists>(select #2)) (cost=1.81..18.1 rows=10)
2727
-> Table scan on x1 (cost=0.025..0.25 rows=10)
2828
-> Select #2 (subquery in condition; dependent)
29-
-> Filter: (<cache>(x1.b) = <ref_null_helper>(max(x2.b))) (cost=2.35..2.35 rows=1)
30-
-> Aggregate: max(x2.b) (cost=2.25..2.25 rows=1)
31-
-> Filter: (x2.a > x1.a) (cost=0.125..1.25 rows=10)
29+
-> Filter: (<cache>(x1.b) = <ref_null_helper>(max(x2.b))) (cost=1.68..1.68 rows=1)
30+
-> Aggregate: max(x2.b) (cost=1.58..1.58 rows=1)
31+
-> Filter: (x2.a > x1.a) (cost=0.375..1.25 rows=3.33)
3232
-> Table scan on x2 (cost=0.025..0.25 rows=10)
3333

3434
Warnings:
3535
Note 1276 Field or reference 'test.x1.a' of SELECT #2 was resolved in SELECT #1
3636
EXPLAIN FORMAT=TREE SELECT * FROM t1 x1
3737
WHERE x1.b = (SELECT MAX(b) FROM t1 x2 WHERE x2.a>5);
3838
EXPLAIN
39-
-> Filter: (x1.b = (select #2)) (cost=1.19..2.31 rows=10)
39+
-> Filter: (x1.b = (select #2)) (cost=2.31..2.31 rows=1)
4040
-> Table scan on x1 (cost=0.025..0.25 rows=10)
4141
-> Select #2 (subquery in condition; run only once)
4242
-> Aggregate: max(x2.b) (cost=1.06..1.06 rows=1)
@@ -58,22 +58,22 @@ EXPLAIN
5858
EXPLAIN FORMAT=TREE SELECT * FROM t1 x1
5959
WHERE x1.b = (SELECT MAX(b) FROM t1 x2 WHERE x2.a>10*rand(0));
6060
EXPLAIN
61-
-> Filter: (x1.b = (select #2)) (cost=2.38..23.8 rows=10)
61+
-> Filter: (x1.b = (select #2)) (cost=1.71..17.1 rows=10)
6262
-> Table scan on x1 (cost=0.025..0.25 rows=10)
6363
-> Select #2 (subquery in condition; uncacheable)
64-
-> Aggregate: max(x2.b) (cost=2.25..2.25 rows=1)
65-
-> Filter: (x2.a > (10 * rand(0))) (cost=0.125..1.25 rows=10)
64+
-> Aggregate: max(x2.b) (cost=1.58..1.58 rows=1)
65+
-> Filter: (x2.a > (10 * rand(0))) (cost=0.375..1.25 rows=3.33)
6666
-> Table scan on x2 (cost=0.025..0.25 rows=10)
6767

6868
EXPLAIN FORMAT=TREE SELECT * FROM t1 x1
6969
WHERE x1.b IN (SELECT MAX(b) FROM t1 x2 WHERE x2.a>10*rand(0));
7070
EXPLAIN
71-
-> Filter: <in_optimizer>(x1.b,<exists>(select #2)) (cost=2.48..24.8 rows=10)
71+
-> Filter: <in_optimizer>(x1.b,<exists>(select #2)) (cost=1.81..18.1 rows=10)
7272
-> Table scan on x1 (cost=0.025..0.25 rows=10)
7373
-> Select #2 (subquery in condition; dependent)
74-
-> Filter: (<cache>(x1.b) = <ref_null_helper>(max(x2.b))) (cost=2.35..2.35 rows=1)
75-
-> Aggregate: max(x2.b) (cost=2.25..2.25 rows=1)
76-
-> Filter: (x2.a > (10 * rand(0))) (cost=0.125..1.25 rows=10)
74+
-> Filter: (<cache>(x1.b) = <ref_null_helper>(max(x2.b))) (cost=1.68..1.68 rows=1)
75+
-> Aggregate: max(x2.b) (cost=1.58..1.58 rows=1)
76+
-> Filter: (x2.a > (10 * rand(0))) (cost=0.375..1.25 rows=3.33)
7777
-> Table scan on x2 (cost=0.025..0.25 rows=10)
7878

7979
EXPLAIN FORMAT=TREE SELECT SUM(x1.a) s FROM t1 x1

mysql-test/r/hash_join_hypergraph.result

+8-8
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ WHERE
684684
t1.col1 = t2.col1
685685
AND t1.col1 = FLOOR(RAND() / 2 + 2);
686686
EXPLAIN
687-
-> Filter: (t1.col1 = floor(((rand() / 2) + 2))) (rows=0.4)
687+
-> Filter: (t1.col1 = floor(((rand() / 2) + 2))) (rows=0.04)
688688
-> Inner hash join (t1.col1 = t2.col1) (rows=0.4)
689689
-> Table scan on t1 (rows=2)
690690
-> Hash
@@ -1021,10 +1021,10 @@ WHERE (NOT EXISTS
10211021
ON alias4.col_varchar = alias3.col_varchar_key)
10221022
WHERE alias3.pk >= table1.pk));
10231023
EXPLAIN
1024-
-> Hash antijoin (no condition), extra conditions: (alias3.pk >= table1.pk) (rows=1.07)
1025-
-> Nested loop inner join (rows=1.6)
1024+
-> Hash antijoin (no condition), extra conditions: (alias3.pk >= table1.pk) (rows=1.33)
1025+
-> Nested loop inner join (rows=2)
10261026
-> Table scan on table2 (rows=1)
1027-
-> Filter: ((table2.pk = table1.pk) or (table1.col_varchar < 'D')) (rows=1.6)
1027+
-> Filter: ((table2.pk = table1.pk) or (table1.col_varchar < 'D')) (rows=2)
10281028
-> Table scan on table1 (rows=4)
10291029
-> Hash
10301030
-> Inner hash join (alias4.col_varchar = alias3.col_varchar_key) (rows=1)
@@ -2202,10 +2202,10 @@ WHERE t2.pk = 9
22022202
EXPLAIN
22032203
-> Table scan on t1 (rows=2)
22042204
-> Select #2 (subquery in projection; dependent)
2205-
-> Nested loop inner join (rows=0.01)
2206-
-> Nested loop left join (rows=0.1)
2207-
-> Single-row covering index lookup on t2 using PRIMARY (pk=9) (rows=0.1)
2208-
-> Filter: ((t3.pk = 9) and (t2.pk = 9)) (rows=0.01)
2205+
-> Nested loop inner join (rows=0.1)
2206+
-> Nested loop left join (rows=1)
2207+
-> Single-row covering index lookup on t2 using PRIMARY (pk=9) (rows=1)
2208+
-> Filter: ((t3.pk = 9) and (t2.pk = 9)) (rows=0.1)
22092209
-> Table scan on t3 (rows=1)
22102210
-> Filter: (t4.pk = 9) (rows=0.1)
22112211
-> Table scan on t4 (rows=1)

mysql-test/r/histograms.result

+23
Original file line numberDiff line numberDiff line change
@@ -3980,4 +3980,27 @@ Note 1003 /* select#1 */ select `test`.`ten_thousand`.`x` AS `x` from `test`.`te
39803980
DROP TABLE ten;
39813981
DROP TABLE hundred;
39823982
DROP TABLE ten_thousand;
3983+
#
3984+
# Bug#34787357 Hypergraph: row estimates for
3985+
# field=non_field_term ignores indexes and histogram.
3986+
#
3987+
CREATE TABLE t1 (col1 INT);
3988+
INSERT INTO t1 VALUES (1),(1),(2),(2),(3),(3),(NULL),(NULL);
3989+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON col1;
3990+
Table Op Msg_type Msg_text
3991+
test.t1 histogram status Histogram statistics created for column 'col1'.
3992+
ANALYZE TABLE t1;
3993+
Table Op Msg_type Msg_text
3994+
test.t1 analyze status OK
3995+
EXPLAIN SELECT * FROM t1 WHERE col1 = FLOOR(RAND(0));
3996+
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
3997+
1 SIMPLE t1 NULL ALL NULL NULL NULL NULL 8 25.00 Using where
3998+
Warnings:
3999+
Note 1003 /* select#1 */ select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` = floor(rand(0)))
4000+
EXPLAIN SELECT * FROM t1 WHERE col1 <> FLOOR(RAND(0));
4001+
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
4002+
1 SIMPLE t1 NULL ALL NULL NULL NULL NULL 8 50.00 Using where
4003+
Warnings:
4004+
Note 1003 /* select#1 */ select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <> floor(rand(0)))
4005+
DROP TABLE t1;
39834006
# restart:

0 commit comments

Comments
 (0)