Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Bug#67718: InnoDB drastically under-fills pages in certain conditions
The problem arises from InnoDB's B+ tree page split algorithm that attempts to optimize for sequential inserts but might end up causing poor space utilization depending on the distribution pattern of index key values throughout the index. If an insert that causes a page to be split is inserting a key value that is an immediate successor or predecessor to the last inserted key value in the same page, the insertion point is used as the split point irrespective of the actual distribution of values in the page. For example, further inserts into the page with a key value that is greater than any key value already in the page, but lower than the minimum key value of its sibling page, will lead to a series of drastically under-filled pages. This page split behavior is especially a problem for workloads with a random distribution of inserts across the pages. Since any given insert into a page might be an immediate successor or predecessor to the last insert, a pattern of sequential inserts might be incorrectly assumed and lead to an unbalanced distribution of data through page splits. The solution is to use the standard B+ tree split algorithm while still preserving some form of optimization for sequential inserts. When a page needs to be split, the median key value in a page is used as the split point so that the data is distributed in a symmetric fashion. In order to improve space utilization for sequential inserts, if a pattern of sequential inserts is detected, the insert point might be used as the split point if the page is the right-most or left-most page in the tree and the key value being inserted is above the largest or below the smallest value in the index. A new variable named "innodb_index_page_split_mode" is introduced to provide a way to control the page split behavior. The variable accepts a set of permitted values: "symmetric", "lower" and "upper". Currently, "symmetric" is always required and will be added if not stated. If its value is empty, the original aforementioned behavior is used instead. Using "symmetric" alone, pages are always split roughly in the middle. When "symmetric,lower" or "symmetric,upper" are set, sequential inserts into the left-most or right-most page in the tree will cause the insertion point to be used as the split point. Using "symmetric,upper, lower" will cause sequential-insert behavior to split at the insertion point (as above) for both the left-most and right-most pages in the tree (or more precisely, at their level).
- Loading branch information
Davi Arnaut
committed
Dec 5, 2012
1 parent
88a6812
commit 48996ca
Showing
9 changed files
with
627 additions
and
11 deletions.
There are no files selected for viewing
134 changes: 134 additions & 0 deletions
134
mysql-test/suite/innodb/r/innodb_index_page_split_mode.result
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
# | ||
# Bug#67718: InnoDB drastically under-fills pages in certain conditions | ||
# | ||
SET @old_innodb_file_per_table = @@GLOBAL.innodb_file_per_table; | ||
SET GLOBAL innodb_file_per_table = ON; | ||
SET @old_innodb_index_page_split_mode = @@GLOBAL.innodb_index_page_split_mode; | ||
SET GLOBAL innodb_index_page_split_mode = "symmetric,lower,upper"; | ||
CREATE TABLE page_split_test | ||
( | ||
id BIGINT UNSIGNED NOT NULL, | ||
payload1 CHAR(255) NOT NULL, | ||
payload2 CHAR(255) NOT NULL, | ||
payload3 CHAR(255) NOT NULL, | ||
payload4 CHAR(255) NOT NULL, | ||
PRIMARY KEY (`id`) | ||
) ENGINE=INNODB; | ||
# Fill up the root page, but don't split it. | ||
INSERT INTO page_split_test VALUES (1, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (2, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (3, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (4, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (5, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (6, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (7, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (8, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (9, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (10, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (11, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (12, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (13, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (14, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
# The next insert will trigger the root page to be split into a root | ||
# non-leaf page and two leaf pages, both about half full. | ||
INSERT INTO page_split_test VALUES (15, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
# Insert enough records to fill up the right-most leaf page, but don't | ||
# split it. | ||
INSERT INTO page_split_test VALUES (16, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (17, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (18, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (19, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (20, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (21, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
# Increase the id by some amount, and insert a few records in descending | ||
# order from that point. Each row inserted would split the "full" page | ||
# from above, and create a new page for the single record. | ||
INSERT INTO page_split_test VALUES (30, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (29, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (28, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
# Dump the page information about the test table. The right-most page | ||
# has only one record as the record (key 30) was inserted in ascending | ||
# order. The remaining nodes are split in half and filled by the later | ||
# inserts. | ||
SELECT page_number, page_type, number_records | ||
FROM information_schema.innodb_buffer_page | ||
WHERE table_name = "test/page_split_test" AND index_name = "PRIMARY" | ||
ORDER BY page_number; | ||
page_number page_type number_records | ||
3 INDEX 4 | ||
4 INDEX 7 | ||
5 INDEX 7 | ||
6 INDEX 1 | ||
7 INDEX 9 | ||
DROP TABLE page_split_test; | ||
# | ||
# Test page split behavior when keys are being inserted in order. | ||
# | ||
CREATE TABLE page_split_test | ||
( | ||
id BIGINT UNSIGNED NOT NULL, | ||
payload CHAR(255) NOT NULL, | ||
PRIMARY KEY (`id`) | ||
) ENGINE=INNODB; | ||
# Populate table with 300 rows in ascending key order. | ||
|
||
# Space utilization is the same across leaf pages, except at the end | ||
# since it wasn't completely filled up. | ||
SELECT page_number, page_type, number_records | ||
FROM information_schema.innodb_buffer_page | ||
WHERE table_name = "test/page_split_test" AND index_name = "PRIMARY" | ||
ORDER BY page_number; | ||
page_number page_type number_records | ||
3 INDEX 7 | ||
4 INDEX 27 | ||
5 INDEX 54 | ||
6 INDEX 54 | ||
7 INDEX 54 | ||
8 INDEX 54 | ||
9 INDEX 54 | ||
10 INDEX 3 | ||
TRUNCATE TABLE page_split_test; | ||
# Populate table with 300 rows in descending key order. | ||
|
||
# Space utilization is the same across leaf pages, except at the end | ||
# since it wasn't completely filled up. | ||
SELECT page_number, page_type, number_records | ||
FROM information_schema.innodb_buffer_page | ||
WHERE table_name = "test/page_split_test" AND index_name = "PRIMARY" | ||
ORDER BY page_number; | ||
page_number page_type number_records | ||
3 INDEX 7 | ||
4 INDEX 54 | ||
5 INDEX 27 | ||
6 INDEX 54 | ||
7 INDEX 54 | ||
8 INDEX 54 | ||
9 INDEX 54 | ||
10 INDEX 3 | ||
DROP TABLE page_split_test; | ||
SET GLOBAL innodb_file_per_table = @old_innodb_file_per_table; | ||
SET GLOBAL innodb_index_page_split_mode = @old_innodb_index_page_split_mode; |
130 changes: 130 additions & 0 deletions
130
mysql-test/suite/innodb/t/innodb_index_page_split_mode.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
--echo # | ||
--echo # Bug#67718: InnoDB drastically under-fills pages in certain conditions | ||
--echo # | ||
|
||
source include/have_innodb.inc; | ||
|
||
SET @old_innodb_file_per_table = @@GLOBAL.innodb_file_per_table; | ||
SET GLOBAL innodb_file_per_table = ON; | ||
|
||
SET @old_innodb_index_page_split_mode = @@GLOBAL.innodb_index_page_split_mode; | ||
SET GLOBAL innodb_index_page_split_mode = "symmetric,lower,upper"; | ||
|
||
CREATE TABLE page_split_test | ||
( | ||
id BIGINT UNSIGNED NOT NULL, | ||
payload1 CHAR(255) NOT NULL, | ||
payload2 CHAR(255) NOT NULL, | ||
payload3 CHAR(255) NOT NULL, | ||
payload4 CHAR(255) NOT NULL, | ||
PRIMARY KEY (`id`) | ||
) ENGINE=INNODB; | ||
|
||
--echo # Fill up the root page, but don't split it. | ||
let $i = 1; | ||
while ($i < 15) | ||
{ | ||
eval | ||
INSERT INTO page_split_test VALUES ($i, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
inc $i; | ||
} | ||
|
||
--echo # The next insert will trigger the root page to be split into a root | ||
--echo # non-leaf page and two leaf pages, both about half full. | ||
|
||
INSERT INTO page_split_test VALUES (15, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
|
||
--echo # Insert enough records to fill up the right-most leaf page, but don't | ||
--echo # split it. | ||
|
||
INSERT INTO page_split_test VALUES (16, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (17, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (18, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (19, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (20, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (21, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
|
||
--echo # Increase the id by some amount, and insert a few records in descending | ||
--echo # order from that point. Each row inserted would split the "full" page | ||
--echo # from above, and create a new page for the single record. | ||
|
||
INSERT INTO page_split_test VALUES (30, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (29, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
INSERT INTO page_split_test VALUES (28, REPEAT("A", 255), REPEAT("B", 255), | ||
REPEAT("C", 255), REPEAT("D", 255)); | ||
|
||
--echo # Dump the page information about the test table. The right-most page | ||
--echo # has only one record as the record (key 30) was inserted in ascending | ||
--echo # order. The remaining nodes are split in half and filled by the later | ||
--echo # inserts. | ||
|
||
SELECT page_number, page_type, number_records | ||
FROM information_schema.innodb_buffer_page | ||
WHERE table_name = "test/page_split_test" AND index_name = "PRIMARY" | ||
ORDER BY page_number; | ||
|
||
DROP TABLE page_split_test; | ||
|
||
--echo # | ||
--echo # Test page split behavior when keys are being inserted in order. | ||
--echo # | ||
|
||
CREATE TABLE page_split_test | ||
( | ||
id BIGINT UNSIGNED NOT NULL, | ||
payload CHAR(255) NOT NULL, | ||
PRIMARY KEY (`id`) | ||
) ENGINE=INNODB; | ||
|
||
--echo # Populate table with 300 rows in ascending key order. | ||
disable_query_log; | ||
let $i = 0; | ||
while ($i < 300) | ||
{ | ||
eval INSERT INTO page_split_test VALUES ($i, REPEAT("A", 255)); | ||
inc $i; | ||
} | ||
enable_query_log; | ||
--echo | ||
|
||
--echo # Space utilization is the same across leaf pages, except at the end | ||
--echo # since it wasn't completely filled up. | ||
SELECT page_number, page_type, number_records | ||
FROM information_schema.innodb_buffer_page | ||
WHERE table_name = "test/page_split_test" AND index_name = "PRIMARY" | ||
ORDER BY page_number; | ||
|
||
TRUNCATE TABLE page_split_test; | ||
|
||
--echo # Populate table with 300 rows in descending key order. | ||
disable_query_log; | ||
let $i = 300; | ||
while ($i > 0) | ||
{ | ||
eval INSERT INTO page_split_test VALUES ($i, REPEAT("A", 255)); | ||
dec $i; | ||
} | ||
enable_query_log; | ||
--echo | ||
|
||
--echo # Space utilization is the same across leaf pages, except at the end | ||
--echo # since it wasn't completely filled up. | ||
SELECT page_number, page_type, number_records | ||
FROM information_schema.innodb_buffer_page | ||
WHERE table_name = "test/page_split_test" AND index_name = "PRIMARY" | ||
ORDER BY page_number; | ||
|
||
DROP TABLE page_split_test; | ||
|
||
SET GLOBAL innodb_file_per_table = @old_innodb_file_per_table; | ||
SET GLOBAL innodb_index_page_split_mode = @old_innodb_index_page_split_mode; |
100 changes: 100 additions & 0 deletions
100
mysql-test/suite/sys_vars/r/innodb_index_page_split_mode_basic.result
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
SET @old_innodb_index_page_split_mode = @@GLOBAL.innodb_index_page_split_mode; | ||
SELECT @old_innodb_index_page_split_mode; | ||
@old_innodb_index_page_split_mode | ||
|
||
# | ||
# Default value. | ||
# | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
|
||
SELECT @@SESSION.innodb_index_page_split_mode; | ||
@@SESSION.innodb_index_page_split_mode | ||
|
||
SET GLOBAL innodb_index_page_split_mode = 'symmetric'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric | ||
SET GLOBAL innodb_index_page_split_mode = 'lower'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric,lower | ||
SET GLOBAL innodb_index_page_split_mode = 'upper'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric,upper | ||
SET GLOBAL innodb_index_page_split_mode = 'lower,symmetric'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric,lower | ||
SET GLOBAL innodb_index_page_split_mode = 'upper,symmetric'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric,upper | ||
SET GLOBAL innodb_index_page_split_mode = 'upper,lower,symmetric'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric,upper,lower | ||
# | ||
# Scope. | ||
# | ||
SET GLOBAL innodb_index_page_split_mode = 'symmetric'; | ||
SET SESSION innodb_index_page_split_mode = 'lower'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric | ||
SELECT @@SESSION.innodb_index_page_split_mode; | ||
@@SESSION.innodb_index_page_split_mode | ||
symmetric,lower | ||
SHOW GLOBAL VARIABLES LIKE 'innodb_index_page_split_mode'; | ||
Variable_name Value | ||
innodb_index_page_split_mode symmetric | ||
SHOW SESSION VARIABLES LIKE 'innodb_index_page_split_mode'; | ||
Variable_name Value | ||
innodb_index_page_split_mode symmetric,lower | ||
SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE | ||
VARIABLE_NAME = 'innodb_index_page_split_mode'; | ||
VARIABLE_NAME VARIABLE_VALUE | ||
INNODB_INDEX_PAGE_SPLIT_MODE symmetric | ||
SELECT * FROM INFORMATION_SCHEMA.SESSION_VARIABLES WHERE | ||
VARIABLE_NAME = 'innodb_index_page_split_mode'; | ||
VARIABLE_NAME VARIABLE_VALUE | ||
INNODB_INDEX_PAGE_SPLIT_MODE symmetric,lower | ||
SET GLOBAL innodb_index_page_split_mode = 'symmetric'; | ||
SET SESSION innodb_index_page_split_mode = 'lower'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric | ||
SELECT @@SESSION.innodb_index_page_split_mode; | ||
@@SESSION.innodb_index_page_split_mode | ||
symmetric,lower | ||
SET GLOBAL innodb_index_page_split_mode = 'symmetric'; | ||
SET SESSION innodb_index_page_split_mode = ''; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
symmetric | ||
SELECT @@SESSION.innodb_index_page_split_mode; | ||
@@SESSION.innodb_index_page_split_mode | ||
|
||
SET GLOBAL innodb_index_page_split_mode = ''; | ||
SET SESSION innodb_index_page_split_mode = 'lower'; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
|
||
SELECT @@SESSION.innodb_index_page_split_mode; | ||
@@SESSION.innodb_index_page_split_mode | ||
symmetric,lower | ||
# | ||
# Invalid values. | ||
# | ||
SET GLOBAL innodb_index_page_split_mode = 1.1; | ||
ERROR 42000: Incorrect argument type to variable 'innodb_index_page_split_mode' | ||
SET GLOBAL innodb_index_page_split_mode = 1e1; | ||
ERROR 42000: Incorrect argument type to variable 'innodb_index_page_split_mode' | ||
SET GLOBAL innodb_index_page_split_mode = 'inexistent'; | ||
ERROR 42000: Variable 'innodb_index_page_split_mode' can't be set to the value of 'inexistent' | ||
# Cleanup. | ||
SET @@GLOBAL.innodb_index_page_split_mode = @old_innodb_index_page_split_mode; | ||
SELECT @@GLOBAL.innodb_index_page_split_mode; | ||
@@GLOBAL.innodb_index_page_split_mode | ||
|
Oops, something went wrong.