maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #03305
Please review: MWL#121: DS-MRR support for clustered primary keys
Hello Igor,
Please find below the combined patch for MWL#121. It is ready for review.
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/mysql-test/r/innodb_mrr_cpk.result maria-5.3-dsmrr-for-cpk-noc/mysql-test/r/innodb_mrr_cpk.result
--- maria-5.3-dsmrr-for-cpk-clean/mysql-test/r/innodb_mrr_cpk.result 1970-01-01 03:00:00.000000000 +0300
+++ maria-5.3-dsmrr-for-cpk-noc/mysql-test/r/innodb_mrr_cpk.result 2010-06-22 23:28:02.000000000 +0400
@@ -0,0 +1,148 @@
+drop table if exists t0,t1,t2,t3;
+set @save_join_cache_level=@@join_cache_level;
+set join_cache_level=6;
+set @save_storage_engine=@@storage_engine;
+set storage_engine=innodb;
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a char(8), b char(8), filler char(100), primary key(a));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` char(8) NOT NULL DEFAULT '',
+ `b` char(8) DEFAULT NULL,
+ `filler` char(100) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8));
+insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
+This should use join buffer:
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer
+This output must be sorted by value of t1.a:
+select * from t1, t2 where t1.a=t2.a;
+a b filler a
+a-1010=A b-1010=B filler a-1010=A
+a-1020=A b-1020=B filler a-1020=A
+a-1030=A b-1030=B filler a-1030=A
+drop table t1, t2;
+create table t1(
+a char(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 5
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1(
+a varchar(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
+insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
+insert into t1 values (11, 11, 11, 'filler');
+insert into t1 values (11, 11, 12, 'filler');
+insert into t1 values (11, 11, 13, 'filler');
+insert into t1 values (11, 22, 1234, 'filler');
+insert into t1 values (11, 33, 124, 'filler');
+insert into t1 values (11, 33, 125, 'filler');
+create table t2 (a int, b int);
+insert into t2 values (11,33), (11,22), (11,11);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+11 22 1234 filler 11 22
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+set join_cache_level=0;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+11 22 1234 filler 11 22
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+set join_cache_level=6;
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+a b c filler a b
+set optimizer_switch='index_condition_pushdown=off';
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using where; Using join buffer
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+a b c filler a b
+set optimizer_switch='index_condition_pushdown=on';
+drop table t1,t2;
+set @@join_cache_level= @save_join_cache_level;
+set storage_engine=@save_storage_engine;
+drop table t0;
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/mysql-test/r/innodb_mrr_cpk.result.moved maria-5.3-dsmrr-for-cpk-noc/mysql-test/r/innodb_mrr_cpk.result.moved
--- maria-5.3-dsmrr-for-cpk-clean/mysql-test/r/innodb_mrr_cpk.result.moved 1970-01-01 03:00:00.000000000 +0300
+++ maria-5.3-dsmrr-for-cpk-noc/mysql-test/r/innodb_mrr_cpk.result.moved 2010-06-22 19:23:18.000000000 +0400
@@ -0,0 +1,122 @@
+drop table if exists t0,t1,t2,t3;
+set @save_join_cache_level=@@join_cache_level;
+set join_cache_level=6;
+set @save_storage_engine=@@storage_engine;
+set storage_engine=innodb;
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a char(8), b char(8), filler char(100), primary key(a));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` char(8) NOT NULL DEFAULT '',
+ `b` char(8) DEFAULT NULL,
+ `filler` char(100) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8));
+insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
+This should use join buffer:
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer
+This output must be sorted by value of t1.a:
+select * from t1, t2 where t1.a=t2.a;
+a b filler a
+a-1010=A b-1010=B filler a-1010=A
+a-1020=A b-1020=B filler a-1020=A
+a-1030=A b-1030=B filler a-1030=A
+drop table t1, t2;
+create table t1(
+a char(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1(
+a varchar(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
+insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
+insert into t1 values (11, 11, 11, 'filler');
+insert into t1 values (11, 11, 12, 'filler');
+insert into t1 values (11, 11, 13, 'filler');
+insert into t1 values (11, 22, 1234, 'filler');
+insert into t1 values (11, 33, 124, 'filler');
+insert into t1 values (11, 33, 125, 'filler');
+create table t2 (a int, b int);
+insert into t2 values (11,33), (11,22), (11,11);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+11 22 1234 filler 11 22
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+set join_cache_level=0;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+11 22 1234 filler 11 22
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+set join_cache_level=6;
+drop table t1,t2;
+set @@join_cache_level= @save_join_cache_level;
+set storage_engine=@save_storage_engine;
+drop table t0;
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/mysql-test/t/innodb_mrr_cpk.test maria-5.3-dsmrr-for-cpk-noc/mysql-test/t/innodb_mrr_cpk.test
--- maria-5.3-dsmrr-for-cpk-clean/mysql-test/t/innodb_mrr_cpk.test 1970-01-01 03:00:00.000000000 +0300
+++ maria-5.3-dsmrr-for-cpk-noc/mysql-test/t/innodb_mrr_cpk.test 2010-06-22 23:28:02.000000000 +0400
@@ -0,0 +1,137 @@
+#
+# Tests for DS-MRR over clustered primary key. The only engine that supports
+# this is InnoDB/XtraDB.
+#
+# Basic idea about testing
+# - DS-MRR/CPK works only with BKA
+# - Should also test index condition pushdown
+# - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering
+# - Also test access using prefix of primary key
+#
+# - Forget about cost model, BKA's multi_range_read_info() call passes 10 for
+# #rows, the call is there at all only for applicability check
+#
+-- source include/have_innodb.inc
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3;
+--enable_warnings
+
+set @save_join_cache_level=@@join_cache_level;
+set join_cache_level=6;
+
+set @save_storage_engine=@@storage_engine;
+set storage_engine=innodb;
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a char(8), b char(8), filler char(100), primary key(a));
+show create table t1;
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8));
+insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
+
+--echo This should use join buffer:
+explain select * from t1, t2 where t1.a=t2.a;
+
+--echo This output must be sorted by value of t1.a:
+select * from t1, t2 where t1.a=t2.a;
+drop table t1, t2;
+
+# Try multi-column indexes
+create table t1(
+ a char(8) character set utf8, b int, filler char(100),
+ primary key(a,b)
+);
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ 1000 + A.a + B.a*10 + C.a*100,
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+# Try with dataset that causes identical lookup keys:
+insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+drop table t1, t2;
+
+create table t1(
+ a varchar(8) character set utf8, b int, filler char(100),
+ primary key(a,b)
+);
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ 1000 + A.a + B.a*10 + C.a*100,
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+#
+# Try scanning on a CPK prefix
+#
+explain select * from t1, t2 where t1.a=t2.a;
+select * from t1, t2 where t1.a=t2.a;
+drop table t1, t2;
+
+#
+# The above example is not very interesting, as CPK prefix has
+# only one match. Create a dataset where scan on CPK prefix
+# would produce multiple matches:
+#
+create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
+insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
+
+insert into t1 values (11, 11, 11, 'filler');
+insert into t1 values (11, 11, 12, 'filler');
+insert into t1 values (11, 11, 13, 'filler');
+insert into t1 values (11, 22, 1234, 'filler');
+insert into t1 values (11, 33, 124, 'filler');
+insert into t1 values (11, 33, 125, 'filler');
+
+create table t2 (a int, b int);
+insert into t2 values (11,33), (11,22), (11,11);
+
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+# Check a real resultset for comaprison:
+set join_cache_level=0;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+set join_cache_level=6;
+
+
+#
+# Check that Index Condition Pushdown (BKA) actually works:
+#
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+
+set optimizer_switch='index_condition_pushdown=off';
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+set optimizer_switch='index_condition_pushdown=on';
+
+drop table t1,t2;
+
+set @@join_cache_level= @save_join_cache_level;
+set storage_engine=@save_storage_engine;
+drop table t0;
+
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/mysql-test/t/innodb_mrr_cpk.test.moved maria-5.3-dsmrr-for-cpk-noc/mysql-test/t/innodb_mrr_cpk.test.moved
--- maria-5.3-dsmrr-for-cpk-clean/mysql-test/t/innodb_mrr_cpk.test.moved 1970-01-01 03:00:00.000000000 +0300
+++ maria-5.3-dsmrr-for-cpk-noc/mysql-test/t/innodb_mrr_cpk.test.moved 2010-06-22 19:23:18.000000000 +0400
@@ -0,0 +1,128 @@
+#
+# Tests for DS-MRR over clustered primary key. The only engine that supports
+# this is InnoDB/XtraDB.
+#
+# Basic idea about testing
+# - DS-MRR/CPK works only with BKA
+# - Should also test index condition pushdown
+# - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering
+# - Also test access using prefix of primary key
+#
+# - Forget about cost model, BKA's multi_range_read_info() call passes 10 for
+# #rows, the call is there at all only for applicability check
+#
+-- source include/have_innodb.inc
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3;
+--enable_warnings
+
+set @save_join_cache_level=@@join_cache_level;
+set join_cache_level=6;
+
+set @save_storage_engine=@@storage_engine;
+set storage_engine=innodb;
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a char(8), b char(8), filler char(100), primary key(a));
+show create table t1;
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8));
+insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
+
+--echo This should use join buffer:
+explain select * from t1, t2 where t1.a=t2.a;
+
+--echo This output must be sorted by value of t1.a:
+select * from t1, t2 where t1.a=t2.a;
+drop table t1, t2;
+
+# Try multi-column indexes
+create table t1(
+ a char(8) character set utf8, b int, filler char(100),
+ primary key(a,b)
+);
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ 1000 + A.a + B.a*10 + C.a*100,
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+drop table t1, t2;
+
+create table t1(
+ a varchar(8) character set utf8, b int, filler char(100),
+ primary key(a,b)
+);
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ 1000 + A.a + B.a*10 + C.a*100,
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+#
+# Try scanning on a CPK prefix
+#
+explain select * from t1, t2 where t1.a=t2.a;
+select * from t1, t2 where t1.a=t2.a;
+drop table t1, t2;
+
+#
+# The above example is not very interesting, as CPK prefix has
+# only one match. Create a dataset where scan on CPK prefix
+# would produce multiple matches:
+#
+create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
+insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
+
+insert into t1 values (11, 11, 11, 'filler');
+insert into t1 values (11, 11, 12, 'filler');
+insert into t1 values (11, 11, 13, 'filler');
+insert into t1 values (11, 22, 1234, 'filler');
+insert into t1 values (11, 33, 124, 'filler');
+insert into t1 values (11, 33, 125, 'filler');
+
+create table t2 (a int, b int);
+insert into t2 values (11,33), (11,22), (11,11);
+
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+set join_cache_level=0;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+set join_cache_level=6;
+
+drop table t1,t2;
+
+#
+# Check that Index Condition Pushdown (BKA) actually works:
+#
+
+# TODO
+
+#
+# Check that record-check-func is done:
+#
+
+set @@join_cache_level= @save_join_cache_level;
+set storage_engine=@save_storage_engine;
+drop table t0;
+
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/r/innodb_mrr_cpk.result maria-5.3-dsmrr-for-cpk-noc/r/innodb_mrr_cpk.result
--- maria-5.3-dsmrr-for-cpk-clean/r/innodb_mrr_cpk.result 1970-01-01 03:00:00.000000000 +0300
+++ maria-5.3-dsmrr-for-cpk-noc/r/innodb_mrr_cpk.result 2010-06-22 19:23:14.000000000 +0400
@@ -0,0 +1,122 @@
+drop table if exists t0,t1,t2,t3;
+set @save_join_cache_level=@@join_cache_level;
+set join_cache_level=6;
+set @save_storage_engine=@@storage_engine;
+set storage_engine=innodb;
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a char(8), b char(8), filler char(100), primary key(a));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` char(8) NOT NULL DEFAULT '',
+ `b` char(8) DEFAULT NULL,
+ `filler` char(100) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8));
+insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
+This should use join buffer:
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer
+This output must be sorted by value of t1.a:
+select * from t1, t2 where t1.a=t2.a;
+a b filler a
+a-1010=A b-1010=B filler a-1010=A
+a-1020=A b-1020=B filler a-1020=A
+a-1030=A b-1030=B filler a-1030=A
+drop table t1, t2;
+create table t1(
+a char(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1(
+a varchar(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
+insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
+insert into t1 values (11, 11, 11, 'filler');
+insert into t1 values (11, 11, 12, 'filler');
+insert into t1 values (11, 11, 13, 'filler');
+insert into t1 values (11, 22, 1234, 'filler');
+insert into t1 values (11, 33, 124, 'filler');
+insert into t1 values (11, 33, 125, 'filler');
+create table t2 (a int, b int);
+insert into t2 values (11,33), (11,22), (11,11);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+11 22 1234 filler 11 22
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+set join_cache_level=0;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+11 22 1234 filler 11 22
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+set join_cache_level=6;
+drop table t1,t2;
+set @@join_cache_level= @save_join_cache_level;
+set storage_engine=@save_storage_engine;
+drop table t0;
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/sql/handler.h maria-5.3-dsmrr-for-cpk-noc/sql/handler.h
--- maria-5.3-dsmrr-for-cpk-clean/sql/handler.h 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/sql/handler.h 2010-06-22 23:28:40.000000000 +0400
@@ -1168,9 +1168,9 @@
COST_VECT *cost);
/*
- The below two are not used (and not handled) in this milestone of this WL
- entry because there seems to be no use for them at this stage of
- implementation.
+ Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
+ The ranges may not use the full key but all of them will use the same number
+ of key parts.
*/
#define HA_MRR_SINGLE_POINT 1
#define HA_MRR_FIXED_KEY 2
@@ -1752,9 +1752,10 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode,
+ uint n_ranges, uint mode,
HANDLER_BUFFER *buf);
virtual int multi_range_read_next(char **range_info);
virtual int read_range_first(const key_range *start_key,
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/sql/multi_range_read.cc maria-5.3-dsmrr-for-cpk-noc/sql/multi_range_read.cc
--- maria-5.3-dsmrr-for-cpk-clean/sql/multi_range_read.cc 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/sql/multi_range_read.cc 2010-06-22 23:28:40.000000000 +0400
@@ -1,4 +1,5 @@
#include "mysql_priv.h"
+#include <my_bit.h>
#include "sql_select.h"
/****************************************************************************
@@ -136,10 +137,16 @@
*/
ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
- uint *bufsz, uint *flags, COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
- *bufsz= 0; /* Default implementation doesn't need a buffer */
+ /*
+ Currently we expect this function to be called only in preparation of scan
+ with HA_MRR_SINGLE_POINT property.
+ */
+ DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
+ *bufsz= 0; /* Default implementation doesn't need a buffer */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
cost->zero();
@@ -316,25 +323,39 @@
{
use_default_impl= TRUE;
const int retval=
- h->handler::multi_range_read_init(seq_funcs, seq_init_param,
- n_ranges, mode, buf);
+ h->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+ mode, buf);
DBUG_RETURN(retval);
}
- rowids_buf= buf->buffer;
+ mrr_buf= buf->buffer;
is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
if (is_mrr_assoc)
status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
- rowids_buf_end= buf->buffer_end;
+ mrr_buf_end= buf->buffer_end;
+
+ if ((doing_cpk_scan= check_cpk_scan(h->active_index, mode)))
+ {
+ /* It's a DS-MRR/CPK scan */
+ cpk_tuple_length= 0; /* dummy value telling it needs to be inited */
+ cpk_have_range= FALSE;
+ use_default_impl= FALSE;
+ h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+ h->mrr_funcs= *seq_funcs;
+ dsmrr_fill_buffer_cpk();
+ if (dsmrr_eof)
+ buf->end_of_used_area= mrr_buf_last;
+ DBUG_RETURN(0); /* nothing could go wrong while filling the buffer */
+ }
+
+ /* In regular DS-MRR, buffer stores {rowid, range_id} pairs */
elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- rowids_buf_last= rowids_buf +
- ((rowids_buf_end - rowids_buf)/ elem_size)*
- elem_size;
- rowids_buf_end= rowids_buf_last;
+ mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/ elem_size)* elem_size;
+ mrr_buf_end= mrr_buf_last;
- /*
+ /*
There can be two cases:
- This is the first call since index_init(), h2==NULL
Need to setup h2 then.
@@ -406,8 +427,8 @@
goto error;
}
- if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
- mode, buf) ||
+ if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+ mode, buf) ||
dsmrr_fill_buffer())
{
goto error;
@@ -417,7 +438,7 @@
adjust *buf to indicate that the remaining buffer space will not be used.
*/
if (dsmrr_eof)
- buf->end_of_used_area= rowids_buf_last;
+ buf->end_of_used_area= mrr_buf_last;
/*
h->inited == INDEX may occur when 'range checked for each record' is
@@ -473,6 +494,9 @@
rowid and return.
The function assumes that rowids buffer is empty when it is invoked.
+
+ dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
+ scanning.
@param h Table handler
@@ -487,8 +511,8 @@
int res;
DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
- rowids_buf_cur= rowids_buf;
- while ((rowids_buf_cur < rowids_buf_end) &&
+ mrr_buf_cur= mrr_buf;
+ while ((mrr_buf_cur < mrr_buf_end) &&
!(res= h2->handler::multi_range_read_next(&range_info)))
{
KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
@@ -498,13 +522,13 @@
/* Put rowid, or {rowid, range_id} pair into the buffer */
h2->position(table->record[0]);
- memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
- rowids_buf_cur += h2->ref_length;
+ memcpy(mrr_buf_cur, h2->ref, h2->ref_length);
+ mrr_buf_cur += h2->ref_length;
if (is_mrr_assoc)
{
- memcpy(rowids_buf_cur, &range_info, sizeof(void*));
- rowids_buf_cur += sizeof(void*);
+ memcpy(mrr_buf_cur, &range_info, sizeof(void*));
+ mrr_buf_cur += sizeof(void*);
}
}
@@ -514,16 +538,224 @@
/* Sort the buffer contents by rowid */
uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+ uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
- my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
+ my_qsort2(mrr_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
(void*)h);
- rowids_buf_last= rowids_buf_cur;
- rowids_buf_cur= rowids_buf;
+ mrr_buf_last= mrr_buf_cur;
+ mrr_buf_cur= mrr_buf;
DBUG_RETURN(0);
}
+/*
+ my_qsort2-compatible function to compare key tuples
+*/
+
+int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
+{
+ DsMrr_impl *dsmrr= (DsMrr_impl*)arg;
+ TABLE *table= dsmrr->h->table;
+
+ KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part;
+ uchar *key1_end= key1 + dsmrr->cpk_tuple_length;
+
+ while (key1 < key1_end)
+ {
+ Field* f = part->field;
+ int len = part->store_length;
+ int res = f->cmp(key1, key2);
+ if (res)
+ return res;
+ key1 += len;
+ key2 += len;
+ part++;
+ }
+ return 0;
+}
+
+
+/*
+ DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
+
+ SYNOPSIS
+ DsMrr_impl::dsmrr_fill_buffer_cpk()
+
+ DESCRIPTION
+ DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
+
+ dsmrr_eof is set to indicate whether we've exhausted the list of ranges
+ we're scanning.
+*/
+
+void DsMrr_impl::dsmrr_fill_buffer_cpk()
+{
+ int res;
+ KEY_MULTI_RANGE cur_range;
+ DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer_cpk");
+
+ mrr_buf_cur= mrr_buf;
+ while ((mrr_buf_cur < mrr_buf_end) &&
+ !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range)))
+ {
+ DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
+ DBUG_ASSERT(!cpk_tuple_length ||
+ cpk_tuple_length == cur_range.start_key.length);
+ if (!cpk_tuple_length)
+ {
+ cpk_tuple_length= cur_range.start_key.length;
+ cpk_is_unique_scan= test(table->key_info[h->active_index].key_parts ==
+ my_count_bits(cur_range.start_key.keypart_map));
+ uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*);
+ mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/elem_size) * elem_size;
+ mrr_buf_end= mrr_buf_last;
+ }
+
+ /* Put key, or {key, range_id} pair into the buffer */
+ memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length);
+ mrr_buf_cur += cpk_tuple_length;
+
+ if (is_mrr_assoc)
+ {
+ memcpy(mrr_buf_cur, &cur_range.ptr, sizeof(void*));
+ mrr_buf_cur += sizeof(void*);
+ }
+ }
+
+ dsmrr_eof= test(res);
+
+ /* Sort the buffer contents by rowid */
+ uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*);
+ uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
+
+ my_qsort2(mrr_buf, n_rowids, elem_size,
+ (qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this);
+ mrr_buf_last= mrr_buf_cur;
+ mrr_buf_cur= mrr_buf;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ DS-MRR/CPK: multi_range_read_next() function
+
+ DESCRIPTION
+ DsMrr_impl::dsmrr_next_cpk()
+ range_info OUT identifier of range that the returned record belongs to
+
+ DESCRIPTION
+ DS-MRR/CPK: multi_range_read_next() function.
+ This is similar to DsMrr_impl::dsmrr_next(), the differences are that
+ - we get records with index_read(), not with rnd_pos()
+ - we may get multiple records for one key (=element of the buffer)
+ - unlike dsmrr_fill_buffer(), dsmrr_fill_buffer_cpk() never fails.
+
+ RETURN
+ 0 OK, next record was successfully read
+ HA_ERR_END_OF_FILE End of records
+ Other Some other error
+*/
+
+int DsMrr_impl::dsmrr_next_cpk(char **range_info)
+{
+ int res;
+
+ while (cpk_have_range)
+ {
+
+ if (h->mrr_funcs.skip_record &&
+ h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL))
+ {
+ cpk_have_range= FALSE;
+ break;
+ }
+
+ res= h->index_next_same(table->record[0], mrr_buf_cur, cpk_tuple_length);
+
+ if (h->mrr_funcs.skip_index_tuple &&
+ h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info))
+ continue;
+
+ if (res != HA_ERR_END_OF_FILE)
+ {
+ if (is_mrr_assoc)
+ memcpy(range_info, &cpk_saved_range_info, sizeof(void*));
+ return res;
+ }
+
+ /* No more records in this range. Exit this loop and go get another range */
+ cpk_have_range= FALSE;
+ }
+
+ do
+ {
+ /* First, make sure we have a range at start of the buffer */
+ if (mrr_buf_cur == mrr_buf_last)
+ {
+ if (dsmrr_eof)
+ {
+ res= HA_ERR_END_OF_FILE;
+ goto end;
+ }
+ dsmrr_fill_buffer_cpk();
+ }
+ if (mrr_buf_cur == mrr_buf_last)
+ {
+ res= HA_ERR_END_OF_FILE;
+ goto end;
+ }
+
+ /* Ok, got the range. Try making a lookup. */
+ uchar *lookup_tuple= mrr_buf_cur;
+ mrr_buf_cur += cpk_tuple_length;
+ if (is_mrr_assoc)
+ {
+ memcpy(&cpk_saved_range_info, mrr_buf_cur, sizeof(void*));
+ mrr_buf_cur += sizeof(void*) * test(is_mrr_assoc);
+ }
+
+ if (h->mrr_funcs.skip_record &&
+ h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL))
+ continue;
+
+ res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length,
+ HA_READ_KEY_EXACT);
+
+ /*
+ Check pushed index condition. Performance-wise, it does not make any
+ sense to put this call here (the above call has already accessed the full
+ record). That's the best I could do, though, because:
+ - ha_innobase doesn't support IndexConditionPushdown on clustered PK
+ - MRR interface doesn't allow the storage engine to refuse a pushed index
+ condition.
+ Having this call here is not fully harmless: EXPLAIN shows "pushed index
+ condition", which is technically true but doesn't bring the benefits that
+ one might expect.
+ */
+ if (h->mrr_funcs.skip_index_tuple &&
+ h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info))
+ continue;
+
+ if (res && res != HA_ERR_END_OF_FILE)
+ goto end;
+
+ if (!res)
+ {
+ memcpy(range_info, &cpk_saved_range_info, sizeof(void*));
+ /*
+ Attempt reading more rows from this range only if there actually can
+ be multiple matches:
+ */
+ cpk_have_range= !cpk_is_unique_scan;
+ break;
+ }
+ } while (true);
+
+end:
+ return res;
+}
+
+
/**
DS-MRR implementation: multi_range_read_next() function
*/
@@ -536,10 +768,13 @@
if (use_default_impl)
return h->handler::multi_range_read_next(range_info);
+
+ if (doing_cpk_scan)
+ return dsmrr_next_cpk(range_info);
do
{
- if (rowids_buf_cur == rowids_buf_last)
+ if (mrr_buf_cur == mrr_buf_last)
{
if (dsmrr_eof)
{
@@ -552,17 +787,17 @@
}
/* return eof if there are no rowids in the buffer after re-fill attempt */
- if (rowids_buf_cur == rowids_buf_last)
+ if (mrr_buf_cur == mrr_buf_last)
{
res= HA_ERR_END_OF_FILE;
goto end;
}
- rowid= rowids_buf_cur;
+ rowid= mrr_buf_cur;
if (is_mrr_assoc)
- memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
+ memcpy(&cur_range_info, mrr_buf_cur + h->ref_length, sizeof(uchar**));
- rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
+ mrr_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
if (h2->mrr_funcs.skip_record &&
h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
continue;
@@ -582,7 +817,8 @@
/**
DS-MRR implementation: multi_range_read_info() function
*/
-ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ uint key_parts,
uint *bufsz, uint *flags, COST_VECT *cost)
{
ha_rows res;
@@ -590,8 +826,8 @@
uint def_bufsz= *bufsz;
/* Get cost/flags/mem_usage of default MRR implementation */
- res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
- &def_flags, cost);
+ res= h->handler::multi_range_read_info(keyno, n_ranges, rows, key_parts,
+ &def_bufsz, &def_flags, cost);
DBUG_ASSERT(!res);
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
@@ -683,7 +919,33 @@
return FALSE;
}
-/**
+
+/*
+ Check if key/flags allow DS-MRR/CPK strategy to be used
+
+ SYNOPSIS
+ DsMrr_impl::check_cpk_scan()
+ keyno Index that will be used
+ mrr_flags
+
+ DESCRIPTION
+ Check if key/flags allow DS-MRR/CPK strategy to be used.
+
+ RETURN
+ TRUE DS-MRR/CPK should be used
+ FALSE Otherwise
+*/
+
+bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags)
+{
+ return test((mrr_flags & HA_MRR_SINGLE_POINT) &&
+ !(mrr_flags & HA_MRR_SORTED) &&
+ keyno == table->s->primary_key &&
+ h->primary_key_is_clustered());
+}
+
+
+/*
DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
Make the choice between using Default MRR implementation and DS-MRR.
@@ -706,14 +968,18 @@
@retval FALSE DS-MRR implementation should be used
*/
+
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
uint *bufsz, COST_VECT *cost)
{
COST_VECT dsmrr_cost;
bool res;
THD *thd= current_thd;
+
+ doing_cpk_scan= check_cpk_scan(keyno, *flags);
if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
- (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
+ (keyno == table->s->primary_key && h->primary_key_is_clustered() &&
+ !doing_cpk_scan) ||
key_uses_partial_cols(table, keyno))
{
/* Use the default implementation */
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/sql/multi_range_read.h maria-5.3-dsmrr-for-cpk-noc/sql/multi_range_read.h
--- maria-5.3-dsmrr-for-cpk-clean/sql/multi_range_read.h 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/sql/multi_range_read.h 2010-06-22 23:28:40.000000000 +0400
@@ -1,16 +1,76 @@
/*
- This file contains declarations for
- - Disk-Sweep MultiRangeRead (DS-MRR) implementation
+ This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR)
+ implementation
*/
/**
- A Disk-Sweep MRR interface implementation
+ A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
- This implementation makes range (and, in the future, 'ref') scans to read
- table rows in disk sweeps.
-
- Currently it is used by MyISAM and InnoDB. Potentially it can be used with
- any table handler that has non-clustered indexes and on-disk rows.
+ This is a "plugin"(*) for storage engines that allows make index scans
+ read table rows in rowid order. For disk-based storage engines, this is
+ faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
+
+ (*) - only conceptually. No dynamic loading or binary compatibility of any
+ kind.
+
+ General scheme of things:
+
+ SQL Layer code
+ | | |
+ -v---v---v---- handler->multi_range_read_XXX() function calls
+ | | |
+ ____________________________________
+ / DS-MRR module \
+ | (scan indexes, order rowids, do |
+ | full record reads in rowid order) |
+ \____________________________________/
+ | | |
+ -|---|---|----- handler->read_range_first()/read_range_next(),
+ | | | handler->index_read(), handler->rnd_pos() calls.
+ | | |
+ v v v
+ Storage engine internals
+
+ Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
+ Potentially it can be used with any table handler that has disk-based data
+ storage and has better performance when reading data in rowid order.
+*/
+
+
+/*
+ DS-MRR implementation for one table. Create/use one object of this class for
+ each ha_{myisam/innobase/etc} object. That object will be further referred to
+ as "the handler"
+
+ There are actually three strategies
+ S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
+ MRR-to-non-MRR calls converter)
+ S2. Regular DS-MRR
+ S3. DS-MRR/CPK for doing scans on clustered primary keys.
+
+ S1 is used for cases which DS-MRR is unable to handle for some reason.
+
+ S2 is the actual DS-MRR. The basic algorithm is as follows:
+ 1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and
+ fill the buffer with {rowid, range_id} pairs
+ 2. Sort the buffer by rowid
+ 3. for each {rowid, range_id} pair in the buffer
+ get record by rowid and return the {record, range_id} pair
+ 4. Repeat the above steps until we've exhausted the list of ranges we're
+ scanning.
+
+ S3 is the variant of DS-MRR for use with clustered primary keys (or any
+ clustered index). The idea is that in clustered index it is sufficient to
+ access the index in index order, and we don't need an intermediate steps to
+ get rowid (like step #1 in S2).
+
+ DS-MRR/CPK's basic algorithm is as follows:
+ 1. Collect a number of ranges (=lookup keys)
+ 2. Sort them so that they follow in index order.
+ 3. for each {lookup_key, range_id} pair in the buffer
+ get record(s) matching the lookup key and return {record, range_id} pairs
+ 4. Repeat the above steps until we've exhausted the list of ranges we're
+ scanning.
*/
class DsMrr_impl
@@ -21,21 +81,38 @@
DsMrr_impl()
: h2(NULL) {};
+ void init(handler *h_arg, TABLE *table_arg)
+ {
+ h= h_arg;
+ table= table_arg;
+ }
+ int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ void dsmrr_close();
+ int dsmrr_next(char **range_info);
+
+ ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
+ uint *bufsz, uint *flags, COST_VECT *cost);
+
+ ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param, uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+private:
/*
The "owner" handler object (the one that calls dsmrr_XXX functions.
It is used to retrieve full table rows by calling rnd_pos().
*/
handler *h;
TABLE *table; /* Always equal to h->table */
-private:
+
/* Secondary handler object. It is used for scanning the index */
handler *h2;
/* Buffer to store rowids, or (rowid, range_id) pairs */
- uchar *rowids_buf;
- uchar *rowids_buf_cur; /* Current position when reading/writing */
- uchar *rowids_buf_last; /* When reading: end of used buffer space */
- uchar *rowids_buf_end; /* End of the buffer */
+ uchar *mrr_buf;
+ uchar *mrr_buf_cur; /* Current position when reading/writing */
+ uchar *mrr_buf_last; /* When reading: end of used buffer space */
+ uchar *mrr_buf_end; /* End of the buffer */
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
@@ -43,28 +120,31 @@
bool is_mrr_assoc;
bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
-public:
- void init(handler *h_arg, TABLE *table_arg)
- {
- h= h_arg;
- table= table_arg;
- }
- int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf);
- void dsmrr_close();
- int dsmrr_fill_buffer();
- int dsmrr_next(char **range_info);
- ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz,
- uint *flags, COST_VECT *cost);
+ bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */
+
+ /** DS-MRR/CPK variables start */
+
+ /* Length of lookup tuple being used, in bytes */
+ uint cpk_tuple_length;
+ /*
+ TRUE <=> We're scanning on a full primary key (and not on prefix), and so
+ can get max. one match for each key
+ */
+ bool cpk_is_unique_scan;
+ /* TRUE<=> we're in a middle of enumerating records from a range */
+ bool cpk_have_range;
+ /* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */
+ char *cpk_saved_range_info;
- ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param, uint n_ranges, uint *bufsz,
- uint *flags, COST_VECT *cost);
-private:
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
COST_VECT *cost);
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
uint *buffer_size, COST_VECT *cost);
+ bool check_cpk_scan(uint keyno, uint mrr_flags);
+ static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
+ int dsmrr_fill_buffer();
+ void dsmrr_fill_buffer_cpk();
+ int dsmrr_next_cpk(char **range_info);
};
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/sql/opt_range.cc maria-5.3-dsmrr-for-cpk-noc/sql/opt_range.cc
--- maria-5.3-dsmrr-for-cpk-clean/sql/opt_range.cc 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/sql/opt_range.cc 2010-06-22 23:28:40.000000000 +0400
@@ -8006,6 +8006,7 @@
quick->mrr_buf_size= thd->variables.mrr_buff_size;
if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
+ uint(-1),
&quick->mrr_buf_size,
&quick->mrr_flags, &cost))
goto err;
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/sql/sql_join_cache.cc maria-5.3-dsmrr-for-cpk-noc/sql/sql_join_cache.cc
--- maria-5.3-dsmrr-for-cpk-clean/sql/sql_join_cache.cc 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/sql/sql_join_cache.cc 2010-06-22 23:28:40.000000000 +0400
@@ -2376,8 +2376,8 @@
*/
if (!file->inited)
file->ha_index_init(join_tab->ref.key, 1);
- if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
- mrr_mode, &mrr_buff)))
+ if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
+ mrr_mode, &mrr_buff)))
rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
return rc;
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/sql/sql_select.cc maria-5.3-dsmrr-for-cpk-noc/sql/sql_select.cc
--- maria-5.3-dsmrr-for-cpk-clean/sql/sql_select.cc 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/sql/sql_select.cc 2010-06-22 19:06:54.000000000 +0400
@@ -7318,10 +7318,11 @@
case JT_EQ_REF:
if (cache_level <= 4)
return 0;
- flags= HA_MRR_NO_NULL_ENDPOINTS;
+ flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
if (tab->table->covering_keys.is_set(tab->ref.key))
flags|= HA_MRR_INDEX_ONLY;
rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
+ tab->ref.key_parts,
&bufsz, &flags, &cost);
if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL) &&
(!(flags & HA_MRR_NO_ASSOCIATION) || cache_level > 6) &&
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/storage/maria/ha_maria.cc maria-5.3-dsmrr-for-cpk-noc/storage/maria/ha_maria.cc
--- maria-5.3-dsmrr-for-cpk-clean/storage/maria/ha_maria.cc 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/storage/maria/ha_maria.cc 2010-06-22 23:28:40.000000000 +0400
@@ -3501,8 +3501,8 @@
***************************************************************************/
int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode,
- HANDLER_BUFFER *buf)
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
@@ -3528,11 +3528,11 @@
}
ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags,
- COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
}
/* MyISAM MRR implementation ends */
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/storage/maria/ha_maria.h maria-5.3-dsmrr-for-cpk-noc/storage/maria/ha_maria.h
--- maria-5.3-dsmrr-for-cpk-clean/storage/maria/ha_maria.h 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/storage/maria/ha_maria.h 2010-06-22 23:28:40.000000000 +0400
@@ -181,7 +181,8 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
/* Index condition pushdown implementation */
Item *idx_cond_push(uint keyno, Item* idx_cond);
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/storage/myisam/ha_myisam.cc maria-5.3-dsmrr-for-cpk-noc/storage/myisam/ha_myisam.cc
--- maria-5.3-dsmrr-for-cpk-clean/storage/myisam/ha_myisam.cc 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/storage/myisam/ha_myisam.cc 2010-06-22 23:28:40.000000000 +0400
@@ -2244,11 +2244,11 @@
}
ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags,
- COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
}
/* MyISAM MRR implementation ends */
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/storage/myisam/ha_myisam.h maria-5.3-dsmrr-for-cpk-noc/storage/myisam/ha_myisam.h
--- maria-5.3-dsmrr-for-cpk-clean/storage/myisam/ha_myisam.h 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/storage/myisam/ha_myisam.h 2010-06-22 23:28:40.000000000 +0400
@@ -169,7 +169,8 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
/* Index condition pushdown implementation */
Item *idx_cond_push(uint keyno, Item* idx_cond);
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/storage/xtradb/handler/ha_innodb.cc maria-5.3-dsmrr-for-cpk-noc/storage/xtradb/handler/ha_innodb.cc
--- maria-5.3-dsmrr-for-cpk-clean/storage/xtradb/handler/ha_innodb.cc 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/storage/xtradb/handler/ha_innodb.cc 2010-06-22 23:28:40.000000000 +0400
@@ -11025,7 +11025,8 @@
*/
int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
@@ -11052,12 +11053,13 @@
return res;
}
-ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
- uint keys, uint *bufsz,
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
return res;
}
diff -urN --exclude='.*' maria-5.3-dsmrr-for-cpk-clean/storage/xtradb/handler/ha_innodb.h maria-5.3-dsmrr-for-cpk-noc/storage/xtradb/handler/ha_innodb.h
--- maria-5.3-dsmrr-for-cpk-clean/storage/xtradb/handler/ha_innodb.h 2010-06-22 19:10:46.000000000 +0400
+++ maria-5.3-dsmrr-for-cpk-noc/storage/xtradb/handler/ha_innodb.h 2010-06-22 23:28:40.000000000 +0400
@@ -217,7 +217,8 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
DsMrr_impl ds_mrr;
Item *idx_cond_push(uint keyno, Item* idx_cond);
BR
Sergey
--
Sergey Petrunia, Software Developer
Monty Program AB, http://askmonty.org
Blog: http://s.petrunia.net/blog