Skip to content

Commit cbb2c76

Browse files
author
Debarun Banerjee
committed
WL#14717 - InnoDB: Bulk Ingest Feature V1
WL#14772 InnoDB: Bulk Load - Parallel build Infrastructure WL#15131 Innodb: Support Bulk Load with Sorted data WL#15132 Innodb: Support Bulk Load with Unsorted Data WL#15133 Innodb: Support Bulk Load from OCI Object Store WL#15612 Innodb: Support Bulk Load from AWS S3 LOAD DATA FROM INFILE | URL | S3 'file_prefix' [COUNT N] INTO TABLE table_name [CHARACTER SET charset_name] [{COLUMNS | FIELDS} [TERMINATED BY 'string'] [[OPTIONALLY] ENCLOSED BY 'char'] [ESCAPED BY 'char'] ] [LINES TERMINATED BY 'line_term_string'] [IGNORE number {LINES | ROWS}] [PARALLEL = C] [MEMORY = M] [ALGORITHM = BULK]; Developed By: Niksa Skeledzija <niksa.skeledzija@oracle.com> Annamalai Gurusami <annamalai.gurusami@oracle.com> Debarun Banerjee <debarun.banerjee@oracle.com> Change-Id: Id131b86fe39daece93a9d08150731c04ab552193
1 parent 4207b6c commit cbb2c76

File tree

150 files changed

+14191
-1150
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

150 files changed

+14191
-1150
lines changed

include/mysql/components/services/bulk_data_service.h

+456
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/* Copyright (c) 2022, 2023, Oracle and/or its affiliates.
2+
3+
This program is free software; you can redistribute it and/or modify
4+
it under the terms of the GNU General Public License, version 2.0,
5+
as published by the Free Software Foundation.
6+
7+
This program is also distributed with certain software (including
8+
but not limited to OpenSSL) that is licensed under separate terms,
9+
as designated in a particular file or component or in included license
10+
documentation. The authors of MySQL hereby grant you an additional
11+
permission to link the program and your derivative works with the
12+
separately licensed software that they have included with MySQL.
13+
14+
This program is distributed in the hope that it will be useful,
15+
but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
GNU General Public License, version 2.0, for more details.
18+
19+
You should have received a copy of the GNU General Public License
20+
along with this program; if not, write to the Free Software
21+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22+
23+
#pragma once
24+
25+
/**
26+
@file
27+
This service provides interface for loading data in bulk from CSV files.
28+
29+
*/
30+
31+
#include <mysql/components/service.h>
32+
#include <string>
33+
34+
/* Forward declaration for opaque types. */
35+
class THD;
36+
struct TABLE;
37+
struct CHARSET_INFO;
38+
39+
using Bulk_loader = void;
40+
41+
/** Bulk loader source. */
42+
enum class Bulk_source {
43+
/** Local file system. */
44+
LOCAL,
45+
/** OCI object store. */
46+
OCI,
47+
/** Amazon S3. */
48+
S3
49+
};
50+
51+
/** Bulk loader string attributes. */
52+
enum class Bulk_string {
53+
/** Schema name */
54+
SCHEMA_NAME,
55+
/* Table name */
56+
TABLE_NAME,
57+
/* File prefix URL */
58+
FILE_PREFIX,
59+
/** Column terminator */
60+
COLUMN_TERM,
61+
/** Row terminator */
62+
ROW_TERM,
63+
};
64+
65+
/** Bulk loader boolean attributes. */
66+
enum class Bulk_condition {
67+
/** The algorithm used is different based on whether the data is in sorted
68+
primary key order. This option tells whether to expect sorted input. */
69+
ORDERED_DATA,
70+
/** If enclosing is optional. */
71+
OPTIONAL_ENCLOSE
72+
};
73+
74+
/** Bulk loader size attributes. */
75+
enum class Bulk_size {
76+
/** Number of input files. */
77+
COUNT_FILES,
78+
/** Number of rows to skip. */
79+
COUNT_ROW_SKIP,
80+
/** Number of columns in the table. */
81+
COUNT_COLUMNS,
82+
/** Number of concurrent loaders to use, */
83+
CONCURRENCY,
84+
/** Total memory size to use for LOAD in bytes. */
85+
MEMORY
86+
};
87+
88+
/** Bulk loader single byte attributes. */
89+
enum class Bulk_char {
90+
/** Escape character. */
91+
ESCAPE_CHAR,
92+
/** Column enclosing character. */
93+
ENCLOSE_CHAR
94+
};
95+
96+
/** Bulk load driver service. */
97+
BEGIN_SERVICE_DEFINITION(bulk_load_driver)
98+
99+
/**
100+
Create bulk loader.
101+
@param[in] thd mysql THD
102+
@param[in] table mysql TABLE object
103+
@param[in] src bulk loader source
104+
@param[in] charset source data character set
105+
@return bulk loader object, opaque type.
106+
*/
107+
DECLARE_METHOD(Bulk_loader *, create_bulk_loader,
108+
(THD * thd, const TABLE *table, Bulk_source src,
109+
const CHARSET_INFO *charset));
110+
/**
111+
Set string attribute for loading data.
112+
@param[in,out] loader bulk loader
113+
@param[in] type attribute type
114+
@param[in] value attribute value
115+
*/
116+
DECLARE_METHOD(void, set_string,
117+
(Bulk_loader * loader, Bulk_string type, std::string value));
118+
/**
119+
Set single byte character attribute for loading data.
120+
@param[in,out] loader bulk loader
121+
@param[in] type attribute type
122+
@param[in] value attribute value
123+
*/
124+
DECLARE_METHOD(void, set_char,
125+
(Bulk_loader * loader, Bulk_char type, unsigned char value));
126+
/**
127+
Set size attribute for loading data.
128+
@param[in,out] loader bulk loader
129+
@param[in] type attribute type
130+
@param[in] value attribute value
131+
*/
132+
DECLARE_METHOD(void, set_size,
133+
(Bulk_loader * loader, Bulk_size type, size_t value));
134+
/**
135+
Set boolean condition attribute for loading data.
136+
@param[in,out] loader bulk loader
137+
@param[in] type attribute type
138+
@param[in] value attribute value
139+
*/
140+
DECLARE_METHOD(void, set_condition,
141+
(Bulk_loader * loader, Bulk_condition type, bool value));
142+
/**
143+
Load data from CSV files.
144+
@param[in,out] loader bulk loader
145+
@return true if successful.
146+
*/
147+
DECLARE_METHOD(bool, load, (Bulk_loader * loader, size_t &affected_rows));
148+
149+
/**
150+
Drop bulk loader.
151+
@param[in,out] thd mysql THD
152+
@param[in,out] loader loader object to drop
153+
*/
154+
DECLARE_METHOD(void, drop_bulk_loader, (THD * thd, Bulk_loader *loader));
155+
156+
END_SERVICE_DEFINITION(bulk_load_driver)

mysql-test/include/subquery_sj.inc

+6-2
Original file line numberDiff line numberDiff line change
@@ -3609,14 +3609,18 @@ create table t3 as select * from t2 limit 0;
36093609
insert into t3 select @a:=@a+1, t2.a from t2, t0;
36103610
insert into t3 select @a:=@a+1, t2.a from t2, t0;
36113611
insert into t3 select @a:=@a+1, t2.a from t2, t0;
3612-
36133612
alter table t3 add primary key(id), add key(a);
3613+
3614+
# The number of leaf pages in the primary key is less than 60.
3615+
SET GLOBAL innodb_stats_persistent_sample_pages = 60;
3616+
ANALYZE TABLE t2;
3617+
ANALYZE TABLE t3;
36143618
--echo The following must use loose index scan over t3, key a:
36153619
explain select count(a) from t2 where a in ( SELECT a FROM t3);
36163620
select count(a) from t2 where a in ( SELECT a FROM t3);
36173621

36183622
drop table t0,t1,t2,t3;
3619-
3623+
SET GLOBAL innodb_stats_persistent_sample_pages = DEFAULT;
36203624

36213625
--echo #
36223626
--echo # Bug#33062: subquery in stored routine cause crash

mysql-test/r/information_schema_keywords.result

+2
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ OVER 1
443443
OWNER 0
444444
PACK_KEYS 0
445445
PAGE 0
446+
PARALLEL 0
446447
PARSER 0
447448
PARSE_TREE 0
448449
PARTIAL 0
@@ -554,6 +555,7 @@ ROW_COUNT 0
554555
ROW_FORMAT 0
555556
ROW_NUMBER 1
556557
RTREE 0
558+
S3 0
557559
SAVEPOINT 0
558560
SCHEDULE 0
559561
SCHEMA 1

mysql-test/r/loaddata.result

+22-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
drop table if exists t1, t2;
21
create table t1 (a date, b date, c date not null, d date);
32
load data infile '../../std_data/loaddata1.dat' ignore into table t1 fields terminated by ',';
43
Warnings:
@@ -20,6 +19,28 @@ a b c d
2019
2003-03-03 2003-03-03 2003-03-03 NULL
2120
2003-03-03 2003-03-03 2003-03-03 NULL
2221
truncate table t1;
22+
load data from infile '../../std_data/loaddata1.dat'
23+
into table t1 fields terminated by ',' ignore 2 lines;
24+
Warnings:
25+
Warning 4096 Delimiter ' ' in position 0 in datetime value ' 20030303' at row 1 is superfluous and is deprecated. Please remove.
26+
SELECT * from t1;
27+
a b c d
28+
2003-03-03 2003-03-03 2003-03-03 NULL
29+
truncate table t1;
30+
load data infile '../../std_data/loaddata1.dat' in primary key order
31+
into table t1 fields terminated by ',' ignore 2 lines;
32+
Warnings:
33+
Warning 4096 Delimiter ' ' in position 0 in datetime value ' 20030303' at row 1 is superfluous and is deprecated. Please remove.
34+
SELECT * from t1;
35+
a b c d
36+
2003-03-03 2003-03-03 2003-03-03 NULL
37+
truncate table t1;
38+
load data infile '../../std_data/loaddata1.dat' count 1 into table t1;
39+
ERROR HY000: Incorrect usage of LOAD DATA without BULK Algorithm and multiple files
40+
load data infile '../../std_data/loaddata1.dat' count 10 into table t1;
41+
ERROR HY000: Incorrect usage of LOAD DATA without BULK Algorithm and multiple files
42+
load data url '../../std_data/loaddata1.dat' into table t1;
43+
ERROR HY000: Incorrect usage of LOAD DATA without BULK Algorithm and URL source
2344
load data infile '../../std_data/loaddata1.dat' ignore into table t1 fields terminated by ',' LINES STARTING BY ',' (b,c,d);
2445
Warnings:
2546
Warning 1265 Data truncated for column 'c' at row 1

mysql-test/r/partition.result

+3-3
Original file line numberDiff line numberDiff line change
@@ -2823,7 +2823,7 @@ SUBPARTITION BY HASH (to_days("col2"))
28232823
SUBPARTITION s1 TABLESPACE = "innodb_file_per_table" ENGINE = InnoDB),
28242824
PARTITION p1 VALUES LESS THAN MAXVALUE
28252825
(SUBPARTITION s2 TABLESPACE = "innodb_file_per_table" ENGINE = InnoDB,
2826-
SUBPARTITION s3 TABLESPACE = "innodb_file_per_table" ENGINE = InnoDB)) */
2826+
SUBPARTITION "s3" TABLESPACE = "innodb_file_per_table" ENGINE = InnoDB)) */
28272827
SELECT partition_expression FROM information_schema.partitions
28282828
WHERE table_schema = 'test' AND table_name = 't1';
28292829
PARTITION_EXPRESSION
@@ -2846,7 +2846,7 @@ SUBPARTITION BY HASH (to_days(`col2`))
28462846
SUBPARTITION s1 TABLESPACE = `innodb_file_per_table` ENGINE = InnoDB),
28472847
PARTITION p1 VALUES LESS THAN MAXVALUE
28482848
(SUBPARTITION s2 TABLESPACE = `innodb_file_per_table` ENGINE = InnoDB,
2849-
SUBPARTITION s3 TABLESPACE = `innodb_file_per_table` ENGINE = InnoDB)) */
2849+
SUBPARTITION `s3` TABLESPACE = `innodb_file_per_table` ENGINE = InnoDB)) */
28502850
SELECT partition_expression FROM information_schema.partitions
28512851
WHERE table_schema = 'test' AND table_name = 't1';
28522852
PARTITION_EXPRESSION
@@ -2868,7 +2868,7 @@ SUBPARTITION BY HASH (to_days(col2))
28682868
SUBPARTITION s1 TABLESPACE = innodb_file_per_table ENGINE = InnoDB),
28692869
PARTITION p1 VALUES LESS THAN MAXVALUE
28702870
(SUBPARTITION s2 TABLESPACE = innodb_file_per_table ENGINE = InnoDB,
2871-
SUBPARTITION s3 TABLESPACE = innodb_file_per_table ENGINE = InnoDB)) */
2871+
SUBPARTITION `s3` TABLESPACE = innodb_file_per_table ENGINE = InnoDB)) */
28722872
SELECT partition_expression FROM information_schema.partitions
28732873
WHERE table_schema = 'test' AND table_name = 't1';
28742874
PARTITION_EXPRESSION

mysql-test/r/partition_exchange.result

+5-5
Original file line numberDiff line numberDiff line change
@@ -1907,7 +1907,7 @@ SUBPARTITION BY HASH (to_days(`purchased`))
19071907
SUBPARTITION s1 ENGINE = InnoDB),
19081908
PARTITION p1 VALUES LESS THAN (2000)
19091909
(SUBPARTITION s2 ENGINE = InnoDB,
1910-
SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
1910+
SUBPARTITION `s3` DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
19111911
# List files from t1_dir/test
19121912
t1.ibd
19131913
# List files from sp0_dir/test
@@ -1950,7 +1950,7 @@ SUBPARTITION BY HASH (to_days(`purchased`))
19501950
SUBPARTITION s1 ENGINE = InnoDB),
19511951
PARTITION p1 VALUES LESS THAN (2000)
19521952
(SUBPARTITION s2 ENGINE = InnoDB,
1953-
SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
1953+
SUBPARTITION `s3` DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
19541954
ALTER TABLE t2 EXCHANGE PARTITION s0 WITH TABLE t1;
19551955
SHOW CREATE TABLE t1;
19561956
Table Create Table
@@ -1971,7 +1971,7 @@ SUBPARTITION BY HASH (to_days(`purchased`))
19711971
SUBPARTITION s1 ENGINE = InnoDB),
19721972
PARTITION p1 VALUES LESS THAN (2000)
19731973
(SUBPARTITION s2 ENGINE = InnoDB,
1974-
SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
1974+
SUBPARTITION `s3` DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
19751975
# List files from t1_dir/test
19761976
t1.ibd
19771977
# List files from sp0_dir/test
@@ -2018,7 +2018,7 @@ SUBPARTITION BY HASH (to_days(`purchased`))
20182018
SUBPARTITION s1 ENGINE = InnoDB),
20192019
PARTITION p1 VALUES LESS THAN (2000)
20202020
(SUBPARTITION s2 ENGINE = InnoDB,
2021-
SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/t1_dir/' ENGINE = InnoDB)) */
2021+
SUBPARTITION `s3` DATA DIRECTORY = 'MYSQL_TMP_DIR/t1_dir/' ENGINE = InnoDB)) */
20222022
ALTER TABLE t2 EXCHANGE PARTITION s3 WITH TABLE t1;
20232023
SHOW CREATE TABLE t1;
20242024
Table Create Table
@@ -2039,7 +2039,7 @@ SUBPARTITION BY HASH (to_days(`purchased`))
20392039
SUBPARTITION s1 ENGINE = InnoDB),
20402040
PARTITION p1 VALUES LESS THAN (2000)
20412041
(SUBPARTITION s2 ENGINE = InnoDB,
2042-
SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
2042+
SUBPARTITION `s3` DATA DIRECTORY = 'MYSQL_TMP_DIR/sp3_dir/' ENGINE = InnoDB)) */
20432043
# List files from t1_dir/test
20442044
t1.ibd
20452045
# List files from sp0_dir/test

mysql-test/r/partition_innodb_tablespace.result

+2-2
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ SUBPARTITION BY HASH (to_days(`purchased`))
236236
SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB),
237237
PARTITION p1 VALUES LESS THAN (2000)
238238
(SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data/' ENGINE = InnoDB,
239-
SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB),
239+
SUBPARTITION `s3` DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB),
240240
PARTITION p2 VALUES LESS THAN MAXVALUE
241241
(SUBPARTITION s4 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data/' ENGINE = InnoDB,
242242
SUBPARTITION s5 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB)) */
@@ -326,7 +326,7 @@ SUBPARTITION BY HASH (to_days(`purchased`))
326326
SUBPARTITION s1 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB),
327327
PARTITION p1 VALUES LESS THAN (2000)
328328
(SUBPARTITION s2 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data/' ENGINE = InnoDB,
329-
SUBPARTITION s3 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB),
329+
SUBPARTITION `s3` DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB),
330330
PARTITION p2 VALUES LESS THAN MAXVALUE
331331
(SUBPARTITION s4 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data/' ENGINE = InnoDB,
332332
SUBPARTITION s5 DATA DIRECTORY = 'MYSQL_TMP_DIR/alternate_dir/data2/' ENGINE = InnoDB)) */

mysql-test/r/subquery_sj_all.result

+8
Original file line numberDiff line numberDiff line change
@@ -8344,6 +8344,13 @@ insert into t3 select @a:=@a+1, t2.a from t2, t0;
83448344
Warnings:
83458345
Warning 1287 Setting user variables within expressions is deprecated and will be removed in a future release. Consider alternatives: 'SET variable=expression, ...', or 'SELECT expression(s) INTO variables(s)'.
83468346
alter table t3 add primary key(id), add key(a);
8347+
SET GLOBAL innodb_stats_persistent_sample_pages = 60;
8348+
ANALYZE TABLE t2;
8349+
Table Op Msg_type Msg_text
8350+
test.t2 analyze status OK
8351+
ANALYZE TABLE t3;
8352+
Table Op Msg_type Msg_text
8353+
test.t3 analyze status OK
83478354
The following must use loose index scan over t3, key a:
83488355
explain select count(a) from t2 where a in ( SELECT a FROM t3);
83498356
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
@@ -8355,6 +8362,7 @@ select count(a) from t2 where a in ( SELECT a FROM t3);
83558362
count(a)
83568363
1000
83578364
drop table t0,t1,t2,t3;
8365+
SET GLOBAL innodb_stats_persistent_sample_pages = DEFAULT;
83588366
#
83598367
# Bug#33062: subquery in stored routine cause crash
83608368
#

mysql-test/r/subquery_sj_all_bka.result

+8
Original file line numberDiff line numberDiff line change
@@ -8346,6 +8346,13 @@ insert into t3 select @a:=@a+1, t2.a from t2, t0;
83468346
Warnings:
83478347
Warning 1287 Setting user variables within expressions is deprecated and will be removed in a future release. Consider alternatives: 'SET variable=expression, ...', or 'SELECT expression(s) INTO variables(s)'.
83488348
alter table t3 add primary key(id), add key(a);
8349+
SET GLOBAL innodb_stats_persistent_sample_pages = 60;
8350+
ANALYZE TABLE t2;
8351+
Table Op Msg_type Msg_text
8352+
test.t2 analyze status OK
8353+
ANALYZE TABLE t3;
8354+
Table Op Msg_type Msg_text
8355+
test.t3 analyze status OK
83498356
The following must use loose index scan over t3, key a:
83508357
explain select count(a) from t2 where a in ( SELECT a FROM t3);
83518358
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
@@ -8357,6 +8364,7 @@ select count(a) from t2 where a in ( SELECT a FROM t3);
83578364
count(a)
83588365
1000
83598366
drop table t0,t1,t2,t3;
8367+
SET GLOBAL innodb_stats_persistent_sample_pages = DEFAULT;
83608368
#
83618369
# Bug#33062: subquery in stored routine cause crash
83628370
#

mysql-test/r/subquery_sj_all_bka_nobnl.result

+8
Original file line numberDiff line numberDiff line change
@@ -8344,6 +8344,13 @@ insert into t3 select @a:=@a+1, t2.a from t2, t0;
83448344
Warnings:
83458345
Warning 1287 Setting user variables within expressions is deprecated and will be removed in a future release. Consider alternatives: 'SET variable=expression, ...', or 'SELECT expression(s) INTO variables(s)'.
83468346
alter table t3 add primary key(id), add key(a);
8347+
SET GLOBAL innodb_stats_persistent_sample_pages = 60;
8348+
ANALYZE TABLE t2;
8349+
Table Op Msg_type Msg_text
8350+
test.t2 analyze status OK
8351+
ANALYZE TABLE t3;
8352+
Table Op Msg_type Msg_text
8353+
test.t3 analyze status OK
83478354
The following must use loose index scan over t3, key a:
83488355
explain select count(a) from t2 where a in ( SELECT a FROM t3);
83498356
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
@@ -8355,6 +8362,7 @@ select count(a) from t2 where a in ( SELECT a FROM t3);
83558362
count(a)
83568363
1000
83578364
drop table t0,t1,t2,t3;
8365+
SET GLOBAL innodb_stats_persistent_sample_pages = DEFAULT;
83588366
#
83598367
# Bug#33062: subquery in stored routine cause crash
83608368
#

0 commit comments

Comments
 (0)