You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Implement final duplicate removal by means of filesort.
Normally, deduplication (SELECT DISTINCT) happens by adding a unique index to
the final temporary table. However, in the cases where we do aggregation
directly into a temporary table, we cannot use such an index, since rows change
during query execution and thus cannot be deduplicated on-the-fly.
(E.g., consider SELECT DISTINCT COUNT(*) FROM t1 GROUP BY f1.) The old executor solves
this by adding a post-pass that actually deletes rows from the temporary table;
for small tables, it uses a hash table to deduplicate, but for larger ones, it
uses an O(n^2) algorithm based on pairwise comparison, which is extremely slow.
Neither fits very well in an iterator design, and thus, we replace this
step by filesort, which is consistently O(n log n) with a small constant
factor. Filesort needs to be extended with support for deduplicating rows,
which is done as part of this work.
Note that this removes a determinism-by-accident that was in filesort earlier
(if the algorithm decided to sort by row ID, the row ID would be part of the
key). This also requires new functionality in the test framework, so that we
can test for partially ordered results (--partial_sorted_result).
Change-Id: I985f6d1f30630d6e8e20767c67ba8b4382144df6
Copy file name to clipboardExpand all lines: mysql-test/r/distinct.result
+47-9
Original file line number
Diff line number
Diff line change
@@ -929,6 +929,14 @@ c1 c2 COUNT(*)
929
929
2 2 1
930
930
3 1 2
931
931
4 4 1
932
+
EXPLAIN FORMAT=tree SELECT DISTINCT c2 FROM t1 GROUP BY c1 HAVING COUNT(*) > 1;
933
+
EXPLAIN
934
+
-> Sort with duplicate removal: <temporary>.c2
935
+
-> Filter: (count(0) > 1)
936
+
-> Table scan on <temporary>
937
+
-> Aggregate using temporary table
938
+
-> Table scan on t1
939
+
932
940
SELECT DISTINCT c2 FROM t1 GROUP BY c1 HAVING COUNT(*) > 1;
933
941
c2
934
942
1
@@ -1096,13 +1104,13 @@ b
1096
1104
1
1097
1105
select distinct min(b) from t1 group by a order by min(c);
1098
1106
min(b)
1099
-
2
1100
1107
1
1108
+
2
1101
1109
explain select distinct min(b) from t1 group by a order by min(c);
1102
1110
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1103
1111
1 SIMPLE t1 NULL ALL NULL NULL NULL # 4 100.00 Using temporary; Using filesort
1104
-
Warnings:
1105
1112
Note 1003 /* select#1 */ select distinct min(`test`.`t1`.`b`) AS `min(b)` from `test`.`t1` group by `test`.`t1`.`a` order by min(`test`.`t1`.`c`)
1113
+
Warnings:
1106
1114
Insert rows in different order:
1107
1115
delete from t1;
1108
1116
insert into t1 values(200,1,1),(100,1,2),(400,2,2),(300,2,1);
@@ -1324,14 +1332,14 @@ b
1324
1332
1
1325
1333
select distinct min(b) from v1 group by a order by min(c);
1326
1334
min(b)
1327
-
2
1328
1335
1
1336
+
2
1329
1337
explain select distinct min(b) from v1 group by a order by min(c);
1330
1338
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1331
1339
1 SIMPLE t1 NULL ALL NULL NULL NULL # 4 100.00 Using temporary; Using filesort
1332
1340
1 SIMPLE t2 NULL ALL NULL NULL NULL # 1 100.00 Using where; Using join buffer (Block Nested Loop)
1333
-
Warnings:
1334
1341
Note 1003 /* select#1 */ select distinct min(`test`.`t1`.`b`) AS `min(b)` from `test`.`t1` left join `test`.`t2` on(true) where true group by `test`.`t1`.`a` order by min(`test`.`t1`.`c`)
1342
+
Warnings:
1335
1343
Insert rows in different order:
1336
1344
delete from t1;
1337
1345
insert into t1 values(200,1,1),(100,1,2),(400,2,2),(300,2,1);
@@ -1554,13 +1562,13 @@ b
1554
1562
2
1555
1563
select distinct min(b) from v1 group by a order by min(c);
1556
1564
min(b)
1557
-
4
1558
1565
2
1566
+
4
1559
1567
explain select distinct min(b) from v1 group by a order by min(c);
1560
1568
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1561
1569
1 SIMPLE t1 NULL ALL NULL NULL NULL # 4 100.00 Using temporary; Using filesort
1562
-
Warnings:
1563
1570
Note 1003 /* select#1 */ select distinct min((`test`.`t1`.`b` * 2)) AS `min(b)` from `test`.`t1` group by (`test`.`t1`.`a` * 2) order by min((`test`.`t1`.`c` * 2))
1571
+
Warnings:
1564
1572
Insert rows in different order:
1565
1573
delete from t1;
1566
1574
insert into t1 values(200,1,1),(100,1,2),(400,2,2),(300,2,1);
@@ -1782,14 +1790,14 @@ b
1782
1790
1
1783
1791
select distinct min(b) from (SELECT t1.* FROM t1 left join t2 on 1) AS derived group by a order by min(c);
1784
1792
min(b)
1785
-
2
1786
1793
1
1794
+
2
1787
1795
explain select distinct min(b) from (SELECT t1.* FROM t1 left join t2 on 1) AS derived group by a order by min(c);
1788
1796
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1789
1797
1 SIMPLE t1 NULL ALL NULL NULL NULL # 4 100.00 Using temporary; Using filesort
1790
1798
1 SIMPLE t2 NULL ALL NULL NULL NULL # 1 100.00 Using where; Using join buffer (Block Nested Loop)
1791
-
Warnings:
1792
1799
Note 1003 /* select#1 */ select distinct min(`test`.`t1`.`b`) AS `min(b)` from `test`.`t1` left join `test`.`t2` on(true) where true group by `test`.`t1`.`a` order by min(`test`.`t1`.`c`)
1800
+
Warnings:
1793
1801
Insert rows in different order:
1794
1802
delete from t1;
1795
1803
insert into t1 values(200,1,1),(100,1,2),(400,2,2),(300,2,1);
@@ -2106,7 +2114,37 @@ pk
2106
2114
12
2107
2115
EXPLAIN FORMAT=tree SELECT /*+JOIN_ORDER(t2,t3,t1) */ DISTINCT t2.pk FROM t1 LEFT JOIN t2 RIGHT OUTER JOIN t3 ON t2.f1 = t3.f3 ON t1.pk = t3.f2 WHERE t3.pk <> t2.pk;
2108
2116
EXPLAIN
2109
-
<not executable by iterator executor>
2117
+
-> Limit: 1 row(s)
2118
+
-> Table scan on <temporary>
2119
+
-> Temporary table
2120
+
-> Filter: ((t3.f2 = '3') and (t3.f3 = '4') and (t3.pk <> '12'))
2121
+
-> Table scan on t3
2110
2122
2111
2123
DROP TABLE t1, t2, t3;
2112
2124
SET optimizer_switch=@old_optimizer_switch;
2125
+
#
2126
+
# Test that DISTINCT-by-filesort manages to deduplicate across sort chunks.
2127
+
#
2128
+
SET @old_sort_buffer_size = @@sort_buffer_size;
2129
+
SET @@sort_buffer_size = 32768;
2130
+
CREATE TABLE t1 ( f FLOAT );
2131
+
INSERT INTO t1 VALUES (0.0);
2132
+
INSERT INTO t1 SELECT RAND() FROM t1 AS t1, t1 AS t2;
2133
+
INSERT INTO t1 SELECT RAND() FROM t1 AS t1, t1 AS t2;
2134
+
INSERT INTO t1 SELECT RAND() FROM t1 AS t1, t1 AS t2;
2135
+
INSERT INTO t1 SELECT RAND() FROM t1 AS t1, t1 AS t2;
2136
+
INSERT INTO t1 SELECT RAND() FROM t1;
2137
+
INSERT INTO t1 SELECT RAND() FROM t1;
2138
+
EXPLAIN FORMAT=tree SELECT DISTINCT COUNT(*) AS num FROM t1 GROUP BY f HAVING num=1;
2139
+
EXPLAIN
2140
+
-> Sort with duplicate removal: <temporary>.num
2141
+
-> Filter: (num = 1)
2142
+
-> Table scan on <temporary>
2143
+
-> Aggregate using temporary table
2144
+
-> Table scan on t1
2145
+
2146
+
SELECT DISTINCT COUNT(*) AS num FROM t1 GROUP BY f HAVING num=1;
0 commit comments