maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #03537
DS-MRR improvements patch ready for review
Hello Igor,
Please find attached the combined patch of DS-MRR for clustered PKs and key
sorting.
The tree is in launchpad and buildbot also:
https://code.launchpad.net/~maria-captains/maria/5.3-dsmrr-cpk
and all observed buildbot failures in the tree are known to occur without the
new code as well.
BR
Sergey
--
Sergey Petrunia, Software Developer
Monty Program AB, http://askmonty.org
Blog: http://s.petrunia.net/blog
diff -urN --exclude='.*' 5.3-noc/mysql-test/r/innodb_mrr_cpk.result maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/innodb_mrr_cpk.result
--- 5.3-noc/mysql-test/r/innodb_mrr_cpk.result 1970-01-01 01:00:00.000000000 +0100
+++ maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/innodb_mrr_cpk.result 2010-08-14 17:28:23.000000000 +0200
@@ -0,0 +1,148 @@
+drop table if exists t0,t1,t2,t3;
+set @save_join_cache_level=@@join_cache_level;
+set join_cache_level=6;
+set @save_storage_engine=@@storage_engine;
+set storage_engine=innodb;
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a char(8), b char(8), filler char(100), primary key(a));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` char(8) NOT NULL DEFAULT '',
+ `b` char(8) DEFAULT NULL,
+ `filler` char(100) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8));
+insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
+This should use join buffer:
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer
+This output must be sorted by value of t1.a:
+select * from t1, t2 where t1.a=t2.a;
+a b filler a
+a-1010=A b-1010=B filler a-1010=A
+a-1020=A b-1020=B filler a-1020=A
+a-1030=A b-1030=B filler a-1030=A
+drop table t1, t2;
+create table t1(
+a char(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 5
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1(
+a varchar(8) character set utf8, b int, filler char(100),
+primary key(a,b)
+);
+insert into t1 select
+concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+1000 + A.a + B.a*10 + C.a*100,
+'filler'
+from t0 A, t0 B, t0 C;
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+explain select * from t1, t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a;
+a b filler a b
+a-1010=A 1010 filler a-1010=A 1010
+a-1020=A 1020 filler a-1020=A 1020
+a-1030=A 1030 filler a-1030=A 1030
+drop table t1, t2;
+create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
+insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
+insert into t1 values (11, 11, 11, 'filler');
+insert into t1 values (11, 11, 12, 'filler');
+insert into t1 values (11, 11, 13, 'filler');
+insert into t1 values (11, 22, 1234, 'filler');
+insert into t1 values (11, 33, 124, 'filler');
+insert into t1 values (11, 33, 125, 'filler');
+create table t2 (a int, b int);
+insert into t2 values (11,33), (11,22), (11,11);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+11 22 1234 filler 11 22
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+set join_cache_level=0;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+a b c filler a b
+11 33 124 filler 11 33
+11 33 125 filler 11 33
+11 22 1234 filler 11 22
+11 11 11 filler 11 11
+11 11 12 filler 11 11
+11 11 13 filler 11 11
+set join_cache_level=6;
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using index condition(BKA); Using join buffer
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+a b c filler a b
+set optimizer_switch='index_condition_pushdown=off';
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 3
+1 SIMPLE t1 ref PRIMARY PRIMARY 4 test.t2.a 1 Using where; Using join buffer
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+a b c filler a b
+set optimizer_switch='index_condition_pushdown=on';
+drop table t1,t2;
+set @@join_cache_level= @save_join_cache_level;
+set storage_engine=@save_storage_engine;
+drop table t0;
diff -urN --exclude='.*' 5.3-noc/mysql-test/r/join_nested_jcl6.result maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/join_nested_jcl6.result
--- 5.3-noc/mysql-test/r/join_nested_jcl6.result 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/join_nested_jcl6.result 2010-08-14 17:28:23.000000000 +0200
@@ -865,12 +865,12 @@
(t1,t2)
ON t3.a=1 AND t3.b=t2.b AND t2.b=t4.b;
a b a b a b
-4 2 1 2 3 2
4 2 1 2 4 2
4 2 1 2 3 2
4 2 1 2 4 2
4 2 1 2 3 2
4 2 1 2 4 2
+4 2 1 2 3 2
NULL NULL 2 2 3 2
NULL NULL 2 2 4 2
EXPLAIN EXTENDED
@@ -1105,8 +1105,8 @@
(t8.b=t9.b OR t8.c IS NULL) AND
(t9.a=1);
a b a b a b a b a b a b a b a b a b a b
-1 2 3 2 4 2 1 2 3 2 2 2 6 2 2 2 0 2 1 2
1 2 3 2 4 2 1 2 4 2 2 2 6 2 2 2 0 2 1 2
+1 2 3 2 4 2 1 2 3 2 2 2 6 2 2 2 0 2 1 2
1 2 3 2 4 2 1 2 3 2 3 1 6 2 1 1 NULL NULL 1 1
1 2 3 2 4 2 1 2 4 2 3 1 6 2 1 1 NULL NULL 1 1
1 2 3 2 4 2 1 2 3 2 3 1 6 2 1 1 NULL NULL 1 2
@@ -1785,8 +1785,8 @@
ON t6.b >= 2 AND t5.b=t7.b AND
(t8.a > 0 OR t8.c IS NULL);
a b a b a b a b
-2 2 1 2 2 2 1 2
2 2 3 2 2 2 1 2
+2 2 1 2 2 2 1 2
1 1 1 2 1 1 NULL NULL
1 1 3 2 1 1 NULL NULL
3 3 NULL NULL NULL NULL NULL NULL
diff -urN --exclude='.*' 5.3-noc/mysql-test/r/join_outer_jcl6.result maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/join_outer_jcl6.result
--- 5.3-noc/mysql-test/r/join_outer_jcl6.result 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/join_outer_jcl6.result 2010-08-14 17:28:23.000000000 +0200
@@ -352,14 +352,14 @@
Lilliana Angelovska NULL NULL NULL
select t1.name, t2.name, t2.id,t3.id from t1 right join t2 on (t1.id = t2.owner) right join t1 as t3 on t3.id=t2.owner;
name name id id
-Antonio Paz El Gato 1 1
Antonio Paz Perrito 2 1
+Antonio Paz El Gato 1 1
Thimble Smith Happy 3 3
NULL NULL NULL 2
select t1.name, t2.name, t2.id, t2.owner, t3.id from t1 left join t2 on (t1.id = t2.owner) right join t1 as t3 on t3.id=t2.owner;
name name id owner id
-Antonio Paz El Gato 1 1 1
Antonio Paz Perrito 2 1 1
+Antonio Paz El Gato 1 1 1
Thimble Smith Happy 3 3 3
NULL NULL NULL NULL 2
drop table t1,t2;
@@ -413,9 +413,9 @@
select t1.*, t2.* from t1 left join t2 on t1.n = t2.n and
t1.m = t2.m where t1.n = 1;
n m o n m o
-1 2 11 1 2 3
-1 2 7 1 2 3
1 2 9 1 2 3
+1 2 7 1 2 3
+1 2 11 1 2 3
1 3 9 NULL NULL NULL
select t1.*, t2.* from t1 left join t2 on t1.n = t2.n and
t1.m = t2.m where t1.n = 1 order by t1.o;
diff -urN --exclude='.*' 5.3-noc/mysql-test/r/optimizer_switch.result maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/optimizer_switch.result
--- 5.3-noc/mysql-test/r/optimizer_switch.result 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/mysql-test/r/optimizer_switch.result 2010-08-14 17:28:23.000000000 +0200
@@ -4,19 +4,19 @@
#
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='index_merge=off,index_merge_union=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='index_merge_union=on';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,index_merge_sort_union=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch=4;
ERROR 42000: Variable 'optimizer_switch' can't be set to the value of '4'
set optimizer_switch=NULL;
@@ -43,57 +43,57 @@
set optimizer_switch='index_merge=off,index_merge_union=off,default';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch=default;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set @@global.optimizer_switch=default;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
#
# Check index_merge's @@optimizer_switch flags
#
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
BUG#37120 optimizer_switch allowable values not according to specification
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,materialization=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,semijoin=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,semijoin=off,materialization=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,materialization=off,semijoin=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,semijoin=off,materialization=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,semijoin=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch='default,materialization=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr_sort_keys=on
set optimizer_switch=default;
diff -urN --exclude='.*' 5.3-noc/mysql-test/suite/vcol/r/vcol_misc.result maria-5.3-dsmrr-cpk-r5-noc/mysql-test/suite/vcol/r/vcol_misc.result
--- 5.3-noc/mysql-test/suite/vcol/r/vcol_misc.result 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/mysql-test/suite/vcol/r/vcol_misc.result 2010-08-19 22:41:38.000000000 +0200
@@ -13,8 +13,8 @@
1 SIMPLE t2 ref idx idx 5 test.t1.b 2 Using where; Using join buffer
select * from t1,t2 where t1.b=t2.c and d <= 100;
a b c d v
-4 20 20 100 101
1 20 20 100 101
3 30 30 100 101
+4 20 20 100 101
set join_cache_level=default;
drop table t1, t2;
diff -urN --exclude='.*' 5.3-noc/mysql-test/suite/vcol/t/vcol_misc.test maria-5.3-dsmrr-cpk-r5-noc/mysql-test/suite/vcol/t/vcol_misc.test
--- 5.3-noc/mysql-test/suite/vcol/t/vcol_misc.test 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/mysql-test/suite/vcol/t/vcol_misc.test 2010-08-19 22:41:38.000000000 +0200
@@ -17,7 +17,8 @@
explain
select * from t1,t2 where t1.b=t2.c and d <= 100;
+--sorted_result
select * from t1,t2 where t1.b=t2.c and d <= 100;
set join_cache_level=default;
-drop table t1, t2;
\ No newline at end of file
+drop table t1, t2;
diff -urN --exclude='.*' 5.3-noc/mysql-test/t/innodb_mrr_cpk.test maria-5.3-dsmrr-cpk-r5-noc/mysql-test/t/innodb_mrr_cpk.test
--- 5.3-noc/mysql-test/t/innodb_mrr_cpk.test 1970-01-01 01:00:00.000000000 +0100
+++ maria-5.3-dsmrr-cpk-r5-noc/mysql-test/t/innodb_mrr_cpk.test 2010-08-14 17:28:23.000000000 +0200
@@ -0,0 +1,137 @@
+#
+# Tests for DS-MRR over clustered primary key. The only engine that supports
+# this is InnoDB/XtraDB.
+#
+# Basic idea about testing
+# - DS-MRR/CPK works only with BKA
+# - Should also test index condition pushdown
+# - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering
+# - Also test access using prefix of primary key
+#
+# - Forget about cost model, BKA's multi_range_read_info() call passes 10 for
+# #rows, the call is there at all only for applicability check
+#
+-- source include/have_innodb.inc
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3;
+--enable_warnings
+
+set @save_join_cache_level=@@join_cache_level;
+set join_cache_level=6;
+
+set @save_storage_engine=@@storage_engine;
+set storage_engine=innodb;
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a char(8), b char(8), filler char(100), primary key(a));
+show create table t1;
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8));
+insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
+
+--echo This should use join buffer:
+explain select * from t1, t2 where t1.a=t2.a;
+
+--echo This output must be sorted by value of t1.a:
+select * from t1, t2 where t1.a=t2.a;
+drop table t1, t2;
+
+# Try multi-column indexes
+create table t1(
+ a char(8) character set utf8, b int, filler char(100),
+ primary key(a,b)
+);
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ 1000 + A.a + B.a*10 + C.a*100,
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+# Try with dataset that causes identical lookup keys:
+insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+drop table t1, t2;
+
+create table t1(
+ a varchar(8) character set utf8, b int, filler char(100),
+ primary key(a,b)
+);
+
+insert into t1 select
+ concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
+ 1000 + A.a + B.a*10 + C.a*100,
+ 'filler'
+from t0 A, t0 B, t0 C;
+
+create table t2 (a char(8) character set utf8, b int);
+insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+#
+# Try scanning on a CPK prefix
+#
+explain select * from t1, t2 where t1.a=t2.a;
+select * from t1, t2 where t1.a=t2.a;
+drop table t1, t2;
+
+#
+# The above example is not very interesting, as CPK prefix has
+# only one match. Create a dataset where scan on CPK prefix
+# would produce multiple matches:
+#
+create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
+insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
+
+insert into t1 values (11, 11, 11, 'filler');
+insert into t1 values (11, 11, 12, 'filler');
+insert into t1 values (11, 11, 13, 'filler');
+insert into t1 values (11, 22, 1234, 'filler');
+insert into t1 values (11, 33, 124, 'filler');
+insert into t1 values (11, 33, 125, 'filler');
+
+create table t2 (a int, b int);
+insert into t2 values (11,33), (11,22), (11,11);
+
+explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+
+# Check a real resultset for comaprison:
+set join_cache_level=0;
+select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
+set join_cache_level=6;
+
+
+#
+# Check that Index Condition Pushdown (BKA) actually works:
+#
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+
+set optimizer_switch='index_condition_pushdown=off';
+explain select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+select * from t1, t2 where t1.a=t2.a and t2.b + t1.b > 100;
+set optimizer_switch='index_condition_pushdown=on';
+
+drop table t1,t2;
+
+set @@join_cache_level= @save_join_cache_level;
+set storage_engine=@save_storage_engine;
+drop table t0;
+
diff -urN --exclude='.*' 5.3-noc/sql/handler.h maria-5.3-dsmrr-cpk-r5-noc/sql/handler.h
--- 5.3-noc/sql/handler.h 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/handler.h 2010-08-14 17:28:23.000000000 +0200
@@ -1278,9 +1278,9 @@
COST_VECT *cost);
/*
- The below two are not used (and not handled) in this milestone of this WL
- entry because there seems to be no use for them at this stage of
- implementation.
+ Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
+ The ranges may not use the full key but all of them will use the same number
+ of key parts.
*/
#define HA_MRR_SINGLE_POINT 1
#define HA_MRR_FIXED_KEY 2
@@ -1322,6 +1322,12 @@
*/
#define HA_MRR_NO_NULL_ENDPOINTS 128
+/*
+ The MRR user has materialized range keys somewhere in the user's buffer.
+ This can be used for optimization of the procedure that sorts these keys
+ since in this case key values don't have to be copied into the MRR buffer.
+*/
+#define HA_MRR_MATERIALIZED_KEYS 256
/*
@@ -1806,9 +1812,10 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode,
+ uint n_ranges, uint mode,
HANDLER_BUFFER *buf);
virtual int multi_range_read_next(char **range_info);
virtual int read_range_first(const key_range *start_key,
diff -urN --exclude='.*' 5.3-noc/sql/multi_range_read.cc maria-5.3-dsmrr-cpk-r5-noc/sql/multi_range_read.cc
--- 5.3-noc/sql/multi_range_read.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/multi_range_read.cc 2010-08-19 22:41:38.000000000 +0200
@@ -1,4 +1,5 @@
#include "mysql_priv.h"
+#include <my_bit.h>
#include "sql_select.h"
/****************************************************************************
@@ -136,10 +137,16 @@
*/
ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
- uint *bufsz, uint *flags, COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
- *bufsz= 0; /* Default implementation doesn't need a buffer */
+ /*
+ Currently we expect this function to be called only in preparation of scan
+ with HA_MRR_SINGLE_POINT property.
+ */
+ DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
+ *bufsz= 0; /* Default implementation doesn't need a buffer */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
cost->zero();
@@ -280,6 +287,84 @@
* DS-MRR implementation
***************************************************************************/
+void SimpleBuffer::write(const uchar *data, size_t bytes)
+{
+ DBUG_ASSERT(have_space_for(bytes));
+
+ if (direction == -1)
+ write_pos -= bytes;
+
+ memcpy(write_pos, data, bytes);
+
+ if (direction == 1)
+ write_pos += bytes;
+}
+
+bool SimpleBuffer::have_space_for(size_t bytes)
+{
+ if (direction == 1)
+ return (write_pos + bytes < end);
+ else
+ return (write_pos - bytes >= start);
+}
+
+size_t SimpleBuffer::used_size()
+{
+ return (direction == 1)? write_pos - read_pos : read_pos - write_pos;
+}
+
+uchar *SimpleBuffer::read(size_t bytes)
+{
+ DBUG_ASSERT(have_data(bytes));
+ uchar *res;
+ if (direction == 1)
+ {
+ res= read_pos;
+ read_pos += bytes;
+ return res;
+ }
+ else
+ {
+ read_pos= read_pos - bytes;
+ return read_pos;
+ }
+}
+
+bool SimpleBuffer::have_data(size_t bytes)
+{
+ return (direction == 1)? (write_pos - read_pos >= (ptrdiff_t)bytes) :
+ (read_pos - write_pos >= (ptrdiff_t)bytes);
+}
+
+void SimpleBuffer::reset_for_writing()
+{
+ if (direction == 1)
+ write_pos= read_pos= start;
+ else
+ write_pos= read_pos= end;
+}
+
+void SimpleBuffer::reset_for_reading()
+{
+/*
+Do we need this at all?
+ if (direction == 1)
+ pos= start;
+ else
+ pos= end;
+//end?
+*/
+}
+
+uchar *SimpleBuffer::end_of_space()
+{
+ if (direction == 1)
+ return start;
+ else
+ return end;
+//TODO: check this.
+}
+
/**
DS-MRR: Initialize and start MRR scan
@@ -302,9 +387,9 @@
void *seq_init_param, uint n_ranges, uint mode,
HANDLER_BUFFER *buf)
{
- uint elem_size;
Item *pushed_cond= NULL;
handler *new_h2= 0;
+ THD *thd= current_thd;
DBUG_ENTER("DsMrr_impl::dsmrr_init");
/*
@@ -316,25 +401,82 @@
{
use_default_impl= TRUE;
const int retval=
- h->handler::multi_range_read_init(seq_funcs, seq_init_param,
- n_ranges, mode, buf);
+ h->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+ mode, buf);
DBUG_RETURN(retval);
}
- rowids_buf= buf->buffer;
-
+ use_default_impl= FALSE;
is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+
+ /*
+ Figure out what steps we'll need to do
+ */
+ do_sort_keys= FALSE;
+ if ((mode & HA_MRR_SINGLE_POINT) &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+ {
+ do_sort_keys= TRUE;
+ use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS);
+ }
+
+ do_rowid_fetch= FALSE;
+ doing_cpk_scan= check_cpk_scan(h->inited == handler::INDEX?
+ h->active_index: h2->active_index, mode);
+ if (!doing_cpk_scan /* && !index_only_read */)
+ {
+ /* Will use rowid buffer to store/sort rowids, etc */
+ do_rowid_fetch= TRUE;
+ }
+ DBUG_ASSERT(do_sort_keys || do_rowid_fetch);
+
+ full_buf= buf->buffer;
+ full_buf_end= buf->buffer_end;
+
+ /*
+ At start, alloc all of the buffer for rowids. Key sorting code will grab a
+ piece if necessary.
+ */
+ rowid_buffer.set_buffer_space(full_buf, full_buf_end, 1);
if (is_mrr_assoc)
status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
-
- rowids_buf_end= buf->buffer_end;
- elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- rowids_buf_last= rowids_buf +
- ((rowids_buf_end - rowids_buf)/ elem_size)*
- elem_size;
- rowids_buf_end= rowids_buf_last;
+
+ /*
+ psergey2-todo: for CPK scans:
+ - use MRR irrespectively of @@mrr_sort_keys setting,
+ - dont do rowid retrieval.
+ */
+ if (do_sort_keys)
+ {
+ /* It's a DS-MRR/CPK scan */
+ key_tuple_length= 0; /* dummy value telling it needs to be inited */
+ key_buff_elem_size= 0;
+ in_index_range= FALSE;
+ h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+ h->mrr_funcs= *seq_funcs;
+ keyno= (h->inited == handler::INDEX)? h->active_index : h2->active_index;
+ dsmrr_fill_key_buffer();
+
+ if (dsmrr_eof && !do_rowid_fetch)
+ buf->end_of_used_area= key_buffer.end_of_space();
+ }
- /*
+ if (!do_rowid_fetch)
+ {
+ /*
+ We have the keys and won't need to fetch rowids, as key lookup will be
+ the last operation, done in multi_range_read_next().
+ */
+ DBUG_RETURN(0);
+ }
+
+ rowid_buff_elem_size= h->ref_length + (is_mrr_assoc? sizeof(char*) : 0);
+ /*
+ psergey2: this is only needed when
+ - doing a rowid-to-row scan
+ - the buffer wasn't exhausted on the first pass.
+ */
+ /*
There can be two cases:
- This is the first call since index_init(), h2==NULL
Need to setup h2 then.
@@ -344,8 +486,7 @@
*/
if (!h2)
{
- /* Create a separate handler object to do rndpos() calls. */
- THD *thd= current_thd;
+ /* Create a separate handler object to do rnd_pos() calls. */
/*
::clone() takes up a lot of stack, especially on 64 bit platforms.
The constant 5 is an empiric result.
@@ -353,9 +494,9 @@
if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
DBUG_RETURN(1);
DBUG_ASSERT(h->active_index != MAX_KEY);
- uint mrr_keyno= h->active_index;
+ keyno= h->active_index;
- /* Create a separate handler object to do rndpos() calls. */
+ /* Create a separate handler object to do rnd_pos() calls. */
if (!(new_h2= h->clone(thd->mem_root)) ||
new_h2->ha_external_lock(thd, F_RDLCK))
{
@@ -363,7 +504,7 @@
DBUG_RETURN(1);
}
- if (mrr_keyno == h->pushed_idx_cond_keyno)
+ if (keyno == h->pushed_idx_cond_keyno)
pushed_cond= h->pushed_idx_cond;
/*
@@ -376,16 +517,18 @@
goto error;
}
+ use_default_impl= FALSE;
h2= new_h2; /* Ok, now can put it into h2 */
table->prepare_for_position();
h2->extra(HA_EXTRA_KEYREAD);
-
- if (h2->ha_index_init(mrr_keyno, FALSE))
+ h2->mrr_funcs= *seq_funcs; //psergey3-todo: sort out where to store
+ h2->mrr_iter= h->mrr_iter;
+
+ if (h2->ha_index_init(keyno, FALSE))
goto error;
- use_default_impl= FALSE;
if (pushed_cond)
- h2->idx_cond_push(mrr_keyno, pushed_cond);
+ h2->idx_cond_push(keyno, pushed_cond);
}
else
{
@@ -401,14 +544,18 @@
h2= NULL;
int res= (h->inited == handler::INDEX && h->ha_index_end());
h2= save_h2;
- use_default_impl= FALSE;
if (res)
goto error;
}
+
+ if (!do_sort_keys &&
+ h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+ mode, buf))
+ {
+ goto error;
+ }
- if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
- mode, buf) ||
- dsmrr_fill_buffer())
+ if (dsmrr_fill_rowid_buffer())
{
goto error;
}
@@ -416,8 +563,8 @@
If the above call has scanned through all intervals in *seq, then
adjust *buf to indicate that the remaining buffer space will not be used.
*/
- if (dsmrr_eof)
- buf->end_of_used_area= rowids_buf_last;
+// if (dsmrr_eof)
+// buf->end_of_used_area= rowid_buffer.end_of_space();
/*
h->inited == INDEX may occur when 'range checked for each record' is
@@ -428,7 +575,6 @@
(h->ha_rnd_init(FALSE))))
goto error;
- use_default_impl= FALSE;
h->mrr_funcs= *seq_funcs;
DBUG_RETURN(0);
@@ -465,65 +611,474 @@
/**
- DS-MRR: Fill the buffer with rowids and sort it by rowid
+ DS-MRR: Fill and sort the rowid buffer
{This is an internal function of DiskSweep MRR implementation}
+
Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
buffer. When the buffer is full or scan is completed, sort the buffer by
rowid and return.
- The function assumes that rowids buffer is empty when it is invoked.
-
+ dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
+ scanning. This function never returns HA_ERR_END_OF_FILE.
+
+ post-condition:
+ rowid buffer is not empty, or key source is exhausted.
+
@param h Table handler
@retval 0 OK, the next portion of rowids is in the buffer,
properly ordered
@retval other Error
+
*/
-int DsMrr_impl::dsmrr_fill_buffer()
+int DsMrr_impl::dsmrr_fill_rowid_buffer()
{
char *range_info;
int res;
- DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
+ DBUG_ENTER("DsMrr_impl::dsmrr_fill_rowid_buffer");
+
+ DBUG_ASSERT(rowid_buffer.is_empty());
+ rowid_buffer.reset_for_writing();
+ identical_rowid_ptr= NULL;
+
+ if (do_sort_keys && key_buffer.is_reverse())
+ key_buffer.flip();
+
+ while (rowid_buffer.have_space_for(rowid_buff_elem_size))
+ {
+ if (do_sort_keys)
+ res= dsmrr_next_from_index(&range_info);
+ else
+ res= h2->handler::multi_range_read_next(&range_info);
+
+ if (res)
+ break;
- rowids_buf_cur= rowids_buf;
- while ((rowids_buf_cur < rowids_buf_end) &&
- !(res= h2->handler::multi_range_read_next(&range_info)))
- {
KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
- if (h2->mrr_funcs.skip_index_tuple &&
+ if (!do_sort_keys && /* If keys are sorted then this check is already done */
+ h2->mrr_funcs.skip_index_tuple &&
h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
continue;
-
+
/* Put rowid, or {rowid, range_id} pair into the buffer */
h2->position(table->record[0]);
- memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
- rowids_buf_cur += h2->ref_length;
+ rowid_buffer.write(h2->ref, h2->ref_length);
if (is_mrr_assoc)
- {
- memcpy(rowids_buf_cur, &range_info, sizeof(void*));
- rowids_buf_cur += sizeof(void*);
- }
+ rowid_buffer.write((uchar*)&range_info, sizeof(void*));
}
if (res && res != HA_ERR_END_OF_FILE)
DBUG_RETURN(res);
- dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
+
+ if (!do_sort_keys)
+ dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
/* Sort the buffer contents by rowid */
uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+ uint n_rowids= rowid_buffer.used_size() / elem_size;
- my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
- (void*)h);
- rowids_buf_last= rowids_buf_cur;
- rowids_buf_cur= rowids_buf;
+ my_qsort2(rowid_buffer.used_area(), n_rowids, elem_size,
+ (qsort2_cmp)rowid_cmp, (void*)h);
+
DBUG_RETURN(0);
}
+/*
+ my_qsort2-compatible function to compare key tuples
+
+ If dsmrr->use_key_pointers==FALSE
+*/
+
+int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
+{
+ DsMrr_impl *dsmrr= (DsMrr_impl*)arg;
+ TABLE *table= dsmrr->h->table;
+ int res;
+ KEY_PART_INFO *part= table->key_info[dsmrr->keyno].key_part;
+
+ if (dsmrr->use_key_pointers)
+ {
+ /* the buffer stores pointers to keys, get to the keys */
+ key1= *((uchar**)key1);
+ key2= *((uchar**)key2); // todo is this alignment-safe?
+ }
+
+ uchar *key1_end= key1 + dsmrr->key_tuple_length;
+
+ while (key1 < key1_end)
+ {
+ Field* f = part->field;
+ int len = part->store_length;
+ if (part->null_bit)
+ {
+ if (*key1) // key1 == NULL
+ {
+ if (!*key2) // key1(NULL) < key2(notNULL)
+ return -1;
+ goto equals;
+ }
+ else if (*key2) // key1(notNULL) > key2 (NULL)
+ return 1;
+ // Step over NULL byte for f->cmp().
+ key1++;
+ key2++;
+ len--;
+ }
+
+ if ((res= f->key_cmp(key1, key2)))
+ return res;
+equals:
+ key1 += len;
+ key2 += len;
+ part++;
+ }
+ return 0;
+}
+
+
+/*
+ Setup key/rowid buffer sizes based on sample_key
+
+ DESCRIPTION
+ Setup key/rowid buffer sizes based on sample_key and its length.
+
+ This function must be called when all buffer space is empty.
+*/
+
+void DsMrr_impl::setup_buffer_sizes(key_range *sample_key)
+{
+ key_tuple_length= sample_key->length;
+ key_tuple_map= sample_key->keypart_map;
+ key_size_in_keybuf= use_key_pointers ? sizeof(char*) :
+ key_tuple_length;
+ key_buff_elem_size= key_size_in_keybuf +
+ (int)is_mrr_assoc * sizeof(void*);
+
+ KEY *key_info= &h->table->key_info[keyno];
+ index_ranges_unique= test(key_info->flags & HA_NOSAME &&
+ key_info->key_parts ==
+ my_count_bits(sample_key->keypart_map));
+ if (!do_rowid_fetch)
+ {
+ /* Give all space to key buffer. */
+ key_buffer.set_buffer_space(full_buf, full_buf_end, 1);
+
+ /* Just in case, tell rowid buffer that it has zero size: */
+ rowid_buffer.set_buffer_space(full_buf_end, full_buf_end, 1);
+ return;
+ }
+
+ /*
+ Ok if we got here we need to allocate one part of the buffer
+ for keys and another part for rowids.
+ */
+ uint rowid_buf_elem_size= h->ref_length +
+ (int)is_mrr_assoc * sizeof(char*);
+
+ /*
+ Use rec_per_key statistics as a basis to find out how many rowids
+ we'll get for each key value.
+ TODO: are we guaranteed to get r_p_c==1 for unique keys?
+ TODO: what should be the default value to use when there is no
+ statistics?
+ */
+ uint parts= my_count_bits(key_tuple_map);
+ ulong rpc;
+ if ((rpc= key_info->rec_per_key[parts - 1]))
+ {
+ rowid_buf_elem_size *= rpc;
+ }
+
+ double fraction_for_rowids=
+ ((double) rowid_buf_elem_size /
+ ((double)rowid_buf_elem_size + key_buff_elem_size));
+
+ size_t bytes_for_rowids=
+ round(fraction_for_rowids * (full_buf_end - full_buf));
+
+ uint bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
+
+ if (bytes_for_keys < key_buff_elem_size + 1)
+ {
+ uint add= key_buff_elem_size + 1 - bytes_for_keys;
+ bytes_for_rowids -= add;
+ DBUG_ASSERT(bytes_for_rowids >=
+ (h->ref_length + (int)is_mrr_assoc * sizeof(char*) + 1));
+ }
+
+ rowid_buffer_end= full_buf + bytes_for_rowids;
+ rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end, 1);
+ key_buffer.set_buffer_space(rowid_buffer_end, full_buf_end, -1);
+}
+
+
+/*
+ DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
+
+ SYNOPSIS
+ DsMrr_impl::dsmrr_fill_key_buffer()
+
+ DESCRIPTION
+ DS-MRR/CPK: Enumerate the input range (=key) sequence, fill the key buffer
+ (lookup_key, range_id) pairs and sort.
+
+ dsmrr_eof is set to indicate whether we've exhausted the list of ranges
+ we're scanning.
+
+ post-condition:
+ - key buffer is non-empty
+ - key buffer is empty and source range sequence is exhausted
+*/
+
+void DsMrr_impl::dsmrr_fill_key_buffer()
+{
+ int res;
+ KEY_MULTI_RANGE cur_range;
+ DBUG_ENTER("DsMrr_impl::dsmrr_fill_key_buffer");
+
+ DBUG_ASSERT(!key_tuple_length || key_buffer.is_empty());
+
+ if (key_tuple_length)
+ {
+ if (do_rowid_fetch && rowid_buffer.is_empty())
+ {
+ /*
+ We're using two buffers and both of them are empty now. Restore the
+ original sizes
+ */
+ rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end, 1);
+ key_buffer.set_buffer_space(rowid_buffer_end, full_buf_end, -1);
+ }
+ key_buffer.reset_for_writing();
+ }
+
+ while ((key_tuple_length == 0 ||
+ key_buffer.have_space_for(key_buff_elem_size)) &&
+ !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range)))
+ {
+ DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
+ if (!key_tuple_length)
+ {
+ /* This only happens when we've just started filling the buffer */
+ setup_buffer_sizes(&cur_range.start_key);
+ }
+
+ if (key_buffer.is_reverse() && is_mrr_assoc)
+ key_buffer.write((uchar*)&cur_range.ptr, sizeof(void*));
+
+ /* Put key, or {key, range_id} pair into the buffer */
+ if (use_key_pointers)
+ key_buffer.write((uchar*)&cur_range.start_key.key, sizeof(char*));
+ else
+ key_buffer.write(cur_range.start_key.key, key_tuple_length);
+
+ if (!key_buffer.is_reverse() && is_mrr_assoc)
+ key_buffer.write((uchar*)&cur_range.ptr, sizeof(void*));
+ }
+
+ dsmrr_eof= test(res);
+
+ /* Sort the buffer contents by rowid */
+ uint key_elem_size= key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*);
+ uint n_keys= key_buffer.used_size() / key_elem_size;
+
+ my_qsort2(key_buffer.used_area(), n_keys, key_elem_size,
+ (qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this);
+
+ last_identical_key_ptr= NULL;
+ in_identical_keys_range= FALSE;
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ DS-MRR/CPK: multi_range_read_next() function
+
+ DESCRIPTION
+ DsMrr_impl::dsmrr_next_from_index()
+ range_info OUT identifier of range that the returned record belongs to
+
+ DESCRIPTION
+
+ This function walks over key buffer and does index reads, i.e. it produces
+ {current_record, range_id} pairs.
+
+ The function has the same call contract like multi_range_read_next()'s.
+
+ We actually iterate nested sequences:
+
+ - a disjoint sequence of index ranges
+ - each range has multiple records
+ - each record goes into multiple identical ranges.
+
+ RETURN
+ 0 OK, next record was successfully read
+ HA_ERR_END_OF_FILE End of records
+ Other Some other error
+*/
+
+int DsMrr_impl::dsmrr_next_from_index(char **range_info_arg)
+{
+ int res;
+ uchar *key_in_buf;
+ handler *file= do_rowid_fetch? h2: h;
+
+ while (in_identical_keys_range)
+ {
+ /* Read record/key pointer from the buffer */
+ key_in_buf= identical_key_it.get_next(key_size_in_keybuf);
+ if (is_mrr_assoc)
+ cur_range_info= (char*)identical_key_it.get_next(sizeof(void*));
+
+ if (key_in_buf == last_identical_key_ptr)
+ {
+ /* We're looking at the last of the identical keys */
+ in_identical_keys_range= FALSE;
+ }
+check_record:
+ if ((h->mrr_funcs.skip_index_tuple &&
+ h->mrr_funcs.skip_index_tuple(h->mrr_iter, *(char**)cur_range_info)) ||
+ (h->mrr_funcs.skip_record &&
+ h->mrr_funcs.skip_record(h->mrr_iter, *(char**)cur_range_info, NULL)))
+ {
+ continue;
+ }
+ memcpy(range_info_arg, cur_range_info, sizeof(void*));
+
+ return 0;
+ }
+
+ /* Try returrning next record from the current range */
+ while (in_index_range)
+ {
+ res= file->ha_index_next_same(table->record[0], cur_index_tuple,
+ key_tuple_length);
+
+ if (res)
+ {
+ if (res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
+ return res; /* Fatal error */
+
+ in_index_range= FALSE; /* no more records here */
+ break;
+ }
+
+ if (last_identical_key_ptr)
+ {
+ in_identical_keys_range= TRUE;
+ identical_key_it.init(&key_buffer);
+ cur_range_info= first_identical_range_info;
+ }
+
+ goto check_record;
+ }
+
+ while(1)
+ {
+ DBUG_ASSERT(!in_identical_keys_range && !in_index_range);
+
+ /* Jump over the keys that were handled by identical key processing */
+ if (last_identical_key_ptr)
+ {
+ while (key_buffer.read(key_size_in_keybuf) != last_identical_key_ptr)
+ {
+ if (is_mrr_assoc)
+ key_buffer.read(sizeof(void*));
+ }
+ if (is_mrr_assoc)
+ key_buffer.read(sizeof(void*));
+ last_identical_key_ptr= NULL;
+ }
+
+ /* First, make sure we have a range at start of the buffer */
+ if (key_buffer.is_empty())
+ {
+ if (dsmrr_eof)
+ {
+ res= HA_ERR_END_OF_FILE;
+ goto end;
+ }
+ /*
+ When rowid fetching is used, it controls all buffer refills. When we're
+ on our own, try refilling our buffer.
+ */
+ if (!do_rowid_fetch)
+ dsmrr_fill_key_buffer();
+
+ if (key_buffer.is_empty())
+ {
+ res= HA_ERR_END_OF_FILE;
+ goto end;
+ }
+ }
+
+ if (do_rowid_fetch)
+ {
+ /*
+ At this point we're not using anything what we've read from key
+ buffer. Cut off unused key buffer space and give it to the rowid
+ buffer.
+ */
+ uchar *unused_start, *unused_end;
+ key_buffer.remove_unused_space(&unused_start, &unused_end);
+ rowid_buffer.grow(unused_start, unused_end);
+ }
+
+ /* Get the next range to scan */
+ cur_index_tuple= key_in_buf= key_buffer.read(key_size_in_keybuf);
+ if (use_key_pointers)
+ cur_index_tuple= *((uchar**)cur_index_tuple);
+
+ if (is_mrr_assoc)
+ cur_range_info= (char*)key_buffer.read(sizeof(void*));
+
+
+ /* Do index lookup */
+ if ((res= file->ha_index_read_map(table->record[0], cur_index_tuple,
+ key_tuple_map, HA_READ_KEY_EXACT)))
+ {
+ if (res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
+ return res;
+ continue; /* to next key and make another lookup */
+ }
+
+ /* Check if subsequent keys in the key buffer are the same as this one */
+ {
+ uchar *ptr;
+ identical_key_it.init(&key_buffer);
+ last_identical_key_ptr= NULL;
+ while ((ptr= identical_key_it.get_next(key_size_in_keybuf)))
+ {
+ if (is_mrr_assoc)
+ identical_key_it.get_next(sizeof(void*));
+
+ if (key_tuple_cmp(this, key_in_buf, ptr))
+ break;
+
+ last_identical_key_ptr= ptr;
+ }
+ if (last_identical_key_ptr)
+ {
+ in_identical_keys_range= TRUE;
+ identical_key_it.init(&key_buffer);
+ first_identical_range_info= cur_range_info;
+ }
+ }
+
+ in_index_range= !index_ranges_unique;
+ goto check_record;
+ }
+
+end:
+ return res;
+}
+
+
/**
DS-MRR implementation: multi_range_read_next() function
*/
@@ -533,48 +1088,125 @@
int res;
uchar *cur_range_info= 0;
uchar *rowid;
+ uchar *range_id;
if (use_default_impl)
return h->handler::multi_range_read_next(range_info);
+
+ if (!do_rowid_fetch)
+ return dsmrr_next_from_index(range_info);
- do
+ while (identical_rowid_ptr)
+ {
+ /*
+ Current record (the one we've returned in previous call) was obtained
+ from a rowid that matched multiple range_ids. Return this record again,
+ with next matching range_id.
+ */
+ rowid= rowid_buffer.read(h->ref_length);
+ if (is_mrr_assoc)
+ {
+ uchar *range_ptr= rowid_buffer.read(sizeof(uchar*));
+ memcpy(range_info, range_ptr, sizeof(uchar*));
+ }
+
+ if (rowid == identical_rowid_ptr)
+ {
+ identical_rowid_ptr= NULL; /* reached the last of identical rowids */
+ }
+
+ if (!h2->mrr_funcs.skip_record ||
+ !h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) *range_info, rowid))
+ {
+ return 0;
+ }
+ }
+
+ while (1)
{
- if (rowids_buf_cur == rowids_buf_last)
+ if (rowid_buffer.is_empty())
{
- if (dsmrr_eof)
+ if (do_sort_keys)
{
- res= HA_ERR_END_OF_FILE;
- goto end;
+ if (!key_buffer.is_empty() || in_index_range)
+ {
+ /* There are some sorted keys left. Use them to get rowids */
+ if ((res= dsmrr_fill_rowid_buffer()))
+ return res; /* for fatal errors */
+ }
+ while (rowid_buffer.is_empty())
+ {
+ if (dsmrr_eof)
+ return HA_ERR_END_OF_FILE;
+ dsmrr_fill_key_buffer();
+ if ((res= dsmrr_fill_rowid_buffer()))
+ return res;
+ }
+ }
+ else
+ {
+ /*
+ There is no buffer with sorted keys. If fill_rowid_buffer() haven't
+ reached eof condition before, try refilling the buffer.
+ */
+ if (dsmrr_eof)
+ return HA_ERR_END_OF_FILE;
+
+ if ((res= dsmrr_fill_rowid_buffer()))
+ return res;
}
- res= dsmrr_fill_buffer();
- if (res)
- goto end;
}
- /* return eof if there are no rowids in the buffer after re-fill attempt */
- if (rowids_buf_cur == rowids_buf_last)
- {
- res= HA_ERR_END_OF_FILE;
- goto end;
- }
- rowid= rowids_buf_cur;
+ /* Return eof if there are no rowids in the buffer after re-fill attempt */
+ if (rowid_buffer.is_empty())
+ return HA_ERR_END_OF_FILE;
- if (is_mrr_assoc)
- memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
+ rowid= rowid_buffer.read(h->ref_length);
+ identical_rowid_ptr= NULL;
- rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
+ if (is_mrr_assoc)
+ {
+ range_id= rowid_buffer.read(sizeof(uchar*));
+ memcpy(&cur_range_info, range_id, sizeof(uchar*));
+ memcpy(range_info, range_id, sizeof(uchar*));
+ }
+
if (h2->mrr_funcs.skip_record &&
h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
continue;
+
res= h->ha_rnd_pos(table->record[0], rowid);
- break;
- } while (true);
-
- if (is_mrr_assoc)
- {
- memcpy(range_info, rowid + h->ref_length, sizeof(void*));
+
+ if (res == HA_ERR_RECORD_DELETED)
+ continue;
+
+ /*
+ Check if subsequent buffer elements have the same rowid value as this
+ one. If yes, remember this fact so that we don't make any more rnd_pos()
+ calls with this value.
+ */
+ if (!res)
+ {
+ /*
+ Note: this implies that SQL layer doesn't touch table->record[0]
+ between calls.
+ */
+ uchar *ptr;
+ SimpleBuffer::PeekIterator identical_rowid_it;
+ identical_rowid_it.init(&rowid_buffer);
+ while ((ptr= identical_rowid_it.get_next(h->ref_length)))
+ {
+ if (is_mrr_assoc)
+ identical_rowid_it.get_next(sizeof(void*));
+
+ if (h2->cmp_ref(rowid, ptr))
+ break;
+ identical_rowid_ptr= ptr;
+ }
+ }
+ return 0;
}
-end:
+
return res;
}
@@ -582,7 +1214,8 @@
/**
DS-MRR implementation: multi_range_read_info() function
*/
-ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ uint key_parts,
uint *bufsz, uint *flags, COST_VECT *cost)
{
ha_rows res;
@@ -590,8 +1223,8 @@
uint def_bufsz= *bufsz;
/* Get cost/flags/mem_usage of default MRR implementation */
- res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
- &def_flags, cost);
+ res= h->handler::multi_range_read_info(keyno, n_ranges, rows, key_parts,
+ &def_bufsz, &def_flags, cost);
DBUG_ASSERT(!res);
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
@@ -683,7 +1316,33 @@
return FALSE;
}
-/**
+
+/*
+ Check if key/flags allow DS-MRR/CPK strategy to be used
+
+ SYNOPSIS
+ DsMrr_impl::check_cpk_scan()
+ keyno Index that will be used
+ mrr_flags
+
+ DESCRIPTION
+ Check if key/flags allow DS-MRR/CPK strategy to be used.
+
+ RETURN
+ TRUE DS-MRR/CPK should be used
+ FALSE Otherwise
+*/
+
+bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags)
+{
+ return test((mrr_flags & HA_MRR_SINGLE_POINT) &&
+ !(mrr_flags & HA_MRR_SORTED) &&
+ keyno == table->s->primary_key &&
+ h->primary_key_is_clustered());
+}
+
+
+/*
DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
Make the choice between using Default MRR implementation and DS-MRR.
@@ -706,21 +1365,25 @@
@retval FALSE DS-MRR implementation should be used
*/
+
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
uint *bufsz, COST_VECT *cost)
{
COST_VECT dsmrr_cost;
bool res;
THD *thd= current_thd;
+
+ doing_cpk_scan= check_cpk_scan(keyno, *flags);
if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
- (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
+ (keyno == table->s->primary_key && h->primary_key_is_clustered() &&
+ !doing_cpk_scan) ||
key_uses_partial_cols(table, keyno))
{
/* Use the default implementation */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
return TRUE;
}
-
+
uint add_len= table->key_info[keyno].key_length + h->ref_length;
*bufsz -= add_len;
if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
@@ -744,6 +1407,10 @@
*flags &= ~HA_MRR_SORTED; /* We will return unordered output */
*cost= dsmrr_cost;
res= FALSE;
+
+ if ((*flags & HA_MRR_SINGLE_POINT) &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+ *flags |= HA_MRR_MATERIALIZED_KEYS;
}
else
{
diff -urN --exclude='.*' 5.3-noc/sql/multi_range_read.h maria-5.3-dsmrr-cpk-r5-noc/sql/multi_range_read.h
--- 5.3-noc/sql/multi_range_read.h 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/multi_range_read.h 2010-08-19 22:41:38.000000000 +0200
@@ -1,18 +1,213 @@
/*
- This file contains declarations for
- - Disk-Sweep MultiRangeRead (DS-MRR) implementation
+ This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR)
+ implementation
*/
/**
- A Disk-Sweep MRR interface implementation
+ A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
- This implementation makes range (and, in the future, 'ref') scans to read
- table rows in disk sweeps.
+ This is a "plugin"(*) for storage engines that allows make index scans
+ read table rows in rowid order. For disk-based storage engines, this is
+ faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
+
+ (*) - only conceptually. No dynamic loading or binary compatibility of any
+ kind.
+
+ General scheme of things:
+
+ SQL Layer code
+ | | |
+ -v---v---v---- handler->multi_range_read_XXX() function calls
+ | | |
+ ____________________________________
+ / DS-MRR module \
+ | (scan indexes, order rowids, do |
+ | full record reads in rowid order) |
+ \____________________________________/
+ | | |
+ -|---|---|----- handler->read_range_first()/read_range_next(),
+ | | | handler->index_read(), handler->rnd_pos() calls.
+ | | |
+ v v v
+ Storage engine internals
+
+ Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
+ Potentially it can be used with any table handler that has disk-based data
+ storage and has better performance when reading data in rowid order.
+*/
+
+
+/*
+ A simple memory buffer for reading and writing.
+
+ when writing, there is no user-visible "current" position, although
+ internally 'pos' points to just after the end of used area (or at the
+ start of it for reverse buffer).
+
+ When reading, there is current position pointing at start (for reverse
+ buffer, end) of the element that will be read next.
+ ^^ why end for reverse? it's more logical to point at start
+*/
+
+class SimpleBuffer
+{
+ uchar *start;
+ uchar *end;
+ uchar *read_pos;
+ uchar *write_pos;
+
+ /*
+ 1 <=> buffer grows/is filled/is read from start to end
+ -1 <=> everthing is done from end to start instead.
+ */
+ int direction;
+public:
+ /* Write-mode functions */
+ void reset_for_writing();
+ void write(const uchar *data, size_t bytes);
+ bool have_space_for(size_t bytes);
+
+ uchar *used_area() { return (direction == 1)? read_pos : write_pos; }
+ size_t used_size();
+ bool is_empty() { return used_size() == 0; }
+
+ /* Read-mode functions */
+ void reset_for_reading();
+
+ uchar *read(size_t bytes);
+ bool have_data(size_t bytes);
+ uchar *end_of_space();
+
+ /* Control functions */
+ void set_buffer_space(uchar *start_arg, uchar *end_arg, int direction_arg)
+ {
+ start= start_arg;
+ end= end_arg;
+ direction= direction_arg;
+ reset_for_writing();
+ }
+
+ /*
+ Stop/return the unneded space (the one that we have wrote to and have read
+ from.
+ */
+ void remove_unused_space(uchar **unused_start, uchar **unused_end)
+ {
+ if (direction == 1)
+ {
+ *unused_start= start;
+ *unused_end= read_pos;
+ start= read_pos;
+ }
+ else
+ {
+ *unused_start= read_pos;
+ *unused_end= end;
+ end= read_pos;
+ }
+ }
+
+ void flip()
+ {
+ uchar *tmp= read_pos;
+ read_pos= write_pos;
+ write_pos= tmp;
+ direction= -direction;
+ }
+ bool is_reverse() { return direction == -1; }
+
+ void grow(uchar *unused_start, uchar *unused_end)
+ {
+ /*
+ Passed memory area can be meaningfully used for growing the buffer if:
+ - it is adjacent to buffer space we're using
+ - it is on the end towards which we grow.
+ */
+ if (direction == 1 && end == unused_start)
+ {
+ end= unused_end;
+ }
+ else if (direction == -1 && start == unused_end)
+ {
+ start= unused_start;
+ }
+ else
+ DBUG_ASSERT(0); /* Attempt to grow buffer in wrong direction */
+ }
- Currently it is used by MyISAM and InnoDB. Potentially it can be used with
- any table handler that has non-clustered indexes and on-disk rows.
+ //friend class PeekIterator;
+ class PeekIterator
+ {
+ // if direction==1 : pointer to what to return next
+ // if direction==-1: pointer to the end of what is to be returned next
+ uchar *pos;
+ SimpleBuffer *sb;
+ public:
+ void init(SimpleBuffer *sb_arg)
+ {
+ sb= sb_arg;
+ pos= sb->read_pos;
+ }
+ /* Return pointer to next chunk of nbytes bytes and avance over it */
+ uchar *get_next(size_t nbytes)
+ {
+ if (sb->direction == 1)
+ {
+ if (pos + nbytes > sb->write_pos)
+ return NULL;
+ uchar *res= pos;
+ pos += nbytes;
+ return res;
+ }
+ else
+ {
+ if (pos - nbytes < sb->write_pos)
+ return NULL;
+ pos -= nbytes;
+ return pos;
+ }
+ }
+ };
+};
+
+/*
+ DS-MRR implementation for one table. Create/use one object of this class for
+ each ha_{myisam/innobase/etc} object. That object will be further referred to
+ as "the handler"
+
+ There are actually three strategies
+ S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
+ MRR-to-non-MRR calls converter)
+ S2. Regular DS-MRR
+ S3. DS-MRR/CPK for doing scans on clustered primary keys.
+
+ S1 is used for cases which DS-MRR is unable to handle for some reason.
+
+ S2 is the actual DS-MRR. The basic algorithm is as follows:
+ 1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and
+ fill the buffer with {rowid, range_id} pairs
+ 2. Sort the buffer by rowid
+ 3. for each {rowid, range_id} pair in the buffer
+ get record by rowid and return the {record, range_id} pair
+ 4. Repeat the above steps until we've exhausted the list of ranges we're
+ scanning.
+
+ S3 is the variant of DS-MRR for use with clustered primary keys (or any
+ clustered index). The idea is that in clustered index it is sufficient to
+ access the index in index order, and we don't need an intermediate steps to
+ get rowid (like step #1 in S2).
+
+ DS-MRR/CPK's basic algorithm is as follows:
+ 1. Collect a number of ranges (=lookup keys)
+ 2. Sort them so that they follow in index order.
+ 3. for each {lookup_key, range_id} pair in the buffer
+ get record(s) matching the lookup key and return {record, range_id} pairs
+ 4. Repeat the above steps until we've exhausted the list of ranges we're
+ scanning.
*/
+
+
class DsMrr_impl
{
public:
@@ -21,50 +216,118 @@
DsMrr_impl()
: h2(NULL) {};
+ void init(handler *h_arg, TABLE *table_arg)
+ {
+ h= h_arg;
+ table= table_arg;
+ }
+ int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ void dsmrr_close();
+ int dsmrr_next(char **range_info);
+
+ ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
+ uint *bufsz, uint *flags, COST_VECT *cost);
+
+ ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param, uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+private:
/*
The "owner" handler object (the one that calls dsmrr_XXX functions.
It is used to retrieve full table rows by calling rnd_pos().
*/
handler *h;
TABLE *table; /* Always equal to h->table */
-private:
+
/* Secondary handler object. It is used for scanning the index */
handler *h2;
+ uchar *full_buf;
+ uchar *full_buf_end;
+
+ /* Valid when using both rowid and key buffer: the original bound between them */
+ uchar *rowid_buffer_end;
+
/* Buffer to store rowids, or (rowid, range_id) pairs */
- uchar *rowids_buf;
- uchar *rowids_buf_cur; /* Current position when reading/writing */
- uchar *rowids_buf_last; /* When reading: end of used buffer space */
- uchar *rowids_buf_end; /* End of the buffer */
+ SimpleBuffer rowid_buffer;
+
+ uchar *identical_rowid_ptr;
+
+ /* Identical keys */
+ bool in_identical_keys_range;
+ uchar *last_identical_key_ptr;
+ SimpleBuffer::PeekIterator identical_key_it;
+
+ SimpleBuffer key_buffer;
+
+ uint keyno;
+
+ /* Execution control */
+ bool do_sort_keys;
+ bool use_key_pointers;
+ bool do_rowid_fetch;
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
+
+ /*
+ TRUE <=> key buffer is exhausted (we need this because we may have a situation
+ where we've read everything from the key buffer but haven't finished with
+ scanning the last range)
+ */
+ bool key_eof;
/* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
bool is_mrr_assoc;
bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
-public:
- void init(handler *h_arg, TABLE *table_arg)
- {
- h= h_arg;
- table= table_arg;
- }
- int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf);
- void dsmrr_close();
- int dsmrr_fill_buffer();
- int dsmrr_next(char **range_info);
- ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz,
- uint *flags, COST_VECT *cost);
+ bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */
+
+ /** DS-MRR/CPK variables start */
+
+ /* Length of lookup tuple being used, in bytes */
+ uint key_tuple_length;
+ key_part_map key_tuple_map;
+ /*
+ This is
+ = key_tuple_length if we copy keys to buffer
+ = sizeof(void*) if we're using pointers to materialized keys.
+ */
+ uint key_size_in_keybuf;
+
+ /* = key_size_in_keybuf [ + sizeof(range_assoc_info) ] */
+ uint key_buff_elem_size;
+
+ /* = h->ref_length [ + sizeof(range_assoc_info) ] */
+ uint rowid_buff_elem_size;
+
+ /*
+ TRUE <=> We're scanning on a full primary key (and not on prefix), and so
+ can get max. one match for each key
+ */
+ bool index_ranges_unique;
+ /* TRUE<=> we're in a middle of enumerating records from a range */
+ bool in_index_range;
+ uchar *cur_index_tuple;
+ /* if in_index_range==TRUE: range_id of the range we're enumerating */
+ char *cur_range_info;
+
+ char *first_identical_range_info;
- ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param, uint n_ranges, uint *bufsz,
- uint *flags, COST_VECT *cost);
-private:
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
COST_VECT *cost);
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
uint *buffer_size, COST_VECT *cost);
+ bool check_cpk_scan(uint keyno, uint mrr_flags);
+ static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
+ int dsmrr_fill_rowid_buffer();
+ void dsmrr_fill_key_buffer();
+ int dsmrr_next_from_index(char **range_info);
+
+ void setup_buffer_sizes(key_range *sample_key);
+
+ static range_seq_t key_buf_seq_init(void *init_param, uint n_ranges, uint flags);
+ static uint key_buf_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
};
diff -urN --exclude='.*' 5.3-noc/sql/mysqld.cc maria-5.3-dsmrr-cpk-r5-noc/sql/mysqld.cc
--- 5.3-noc/sql/mysqld.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/mysqld.cc 2010-08-14 17:28:23.000000000 +0200
@@ -308,6 +308,7 @@
"partial_match_rowid_merge",
"partial_match_table_scan",
"subquery_cache",
+ "mrr_sort_keys",
#ifndef DBUG_OFF
"table_elimination",
#endif
@@ -329,6 +330,7 @@
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
sizeof("subquery_cache") - 1,
+ sizeof("mrr_sort_keys") - 1,
#ifndef DBUG_OFF
sizeof("table_elimination") - 1,
#endif
@@ -415,7 +417,8 @@
"semijoin=on,"
"partial_match_rowid_merge=on,"
"partial_match_table_scan=on,"
- "subquery_cache=on"
+ "subquery_cache=on,"
+ "mrr_sort_keys=on"
#ifndef DBUG_OFF
",table_elimination=on";
#else
diff -urN --exclude='.*' 5.3-noc/sql/mysql_priv.h maria-5.3-dsmrr-cpk-r5-noc/sql/mysql_priv.h
--- 5.3-noc/sql/mysql_priv.h 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/mysql_priv.h 2010-08-14 17:28:23.000000000 +0200
@@ -571,12 +571,13 @@
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
+#define OPTIMIZER_SWITCH_MRR_SORT_KEYS (1<<12)
#ifdef DBUG_OFF
-# define OPTIMIZER_SWITCH_LAST (1<<12)
-#else
-# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
# define OPTIMIZER_SWITCH_LAST (1<<13)
+#else
+# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
+# define OPTIMIZER_SWITCH_LAST (1<<14)
#endif
#ifdef DBUG_OFF
@@ -592,7 +593,8 @@
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
- OPTIMIZER_SWITCH_SUBQUERY_CACHE)
+ OPTIMIZER_SWITCH_SUBQUERY_CACHE|\
+ OPTIMIZER_SWITCH_MRR_SORT_KEYS)
#else
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
@@ -606,7 +608,8 @@
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
- OPTIMIZER_SWITCH_SUBQUERY_CACHE)
+ OPTIMIZER_SWITCH_SUBQUERY_CACHE|\
+ OPTIMIZER_SWITCH_MRR_SORT_KEYS)
#endif
/*
diff -urN --exclude='.*' 5.3-noc/sql/opt_range.cc maria-5.3-dsmrr-cpk-r5-noc/sql/opt_range.cc
--- 5.3-noc/sql/opt_range.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/opt_range.cc 2010-08-14 17:28:23.000000000 +0200
@@ -8005,6 +8005,7 @@
quick->mrr_buf_size= thd->variables.mrr_buff_size;
if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
+ uint(-1),
&quick->mrr_buf_size,
&quick->mrr_flags, &cost))
goto err;
diff -urN --exclude='.*' 5.3-noc/sql/sql_join_cache.cc maria-5.3-dsmrr-cpk-r5-noc/sql/sql_join_cache.cc
--- 5.3-noc/sql/sql_join_cache.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/sql_join_cache.cc 2010-08-14 17:28:23.000000000 +0200
@@ -651,6 +651,9 @@
use_emb_key= check_emb_key_usage();
+ if (use_emb_key)
+ mrr_mode|= HA_MRR_MATERIALIZED_KEYS;
+
create_remaining_fields(FALSE);
set_constants();
@@ -2390,8 +2393,8 @@
*/
if (!file->inited)
file->ha_index_init(join_tab->ref.key, 1);
- if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
- mrr_mode, &mrr_buff)))
+ if ((error= file->multi_range_read_init(seq_funcs, (void*) this, ranges,
+ mrr_mode, &mrr_buff)))
rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
return rc;
@@ -2631,6 +2634,8 @@
data_fields_offset+= copy->length;
}
+ mrr_mode|= HA_MRR_MATERIALIZED_KEYS;
+
DBUG_RETURN(rc);
}
diff -urN --exclude='.*' 5.3-noc/sql/sql_select.cc maria-5.3-dsmrr-cpk-r5-noc/sql/sql_select.cc
--- 5.3-noc/sql/sql_select.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/sql/sql_select.cc 2010-08-14 17:28:23.000000000 +0200
@@ -7508,10 +7508,11 @@
case JT_EQ_REF:
if (cache_level <= 4)
return 0;
- flags= HA_MRR_NO_NULL_ENDPOINTS;
+ flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
if (tab->table->covering_keys.is_set(tab->ref.key))
flags|= HA_MRR_INDEX_ONLY;
rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
+ tab->ref.key_parts,
&bufsz, &flags, &cost);
if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL) &&
(!(flags & HA_MRR_NO_ASSOCIATION) || cache_level > 6) &&
diff -urN --exclude='.*' 5.3-noc/storage/maria/ha_maria.cc maria-5.3-dsmrr-cpk-r5-noc/storage/maria/ha_maria.cc
--- 5.3-noc/storage/maria/ha_maria.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/storage/maria/ha_maria.cc 2010-08-14 17:28:23.000000000 +0200
@@ -3503,8 +3503,8 @@
***************************************************************************/
int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode,
- HANDLER_BUFFER *buf)
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
@@ -3530,11 +3530,11 @@
}
ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags,
- COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
}
/* MyISAM MRR implementation ends */
diff -urN --exclude='.*' 5.3-noc/storage/maria/ha_maria.h maria-5.3-dsmrr-cpk-r5-noc/storage/maria/ha_maria.h
--- 5.3-noc/storage/maria/ha_maria.h 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/storage/maria/ha_maria.h 2010-08-14 17:28:23.000000000 +0200
@@ -183,7 +183,8 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
/* Index condition pushdown implementation */
Item *idx_cond_push(uint keyno, Item* idx_cond);
diff -urN --exclude='.*' 5.3-noc/storage/myisam/ha_myisam.cc maria-5.3-dsmrr-cpk-r5-noc/storage/myisam/ha_myisam.cc
--- 5.3-noc/storage/myisam/ha_myisam.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/storage/myisam/ha_myisam.cc 2010-08-14 17:28:23.000000000 +0200
@@ -2206,11 +2206,11 @@
}
ha_rows ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags,
- COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
}
/* MyISAM MRR implementation ends */
diff -urN --exclude='.*' 5.3-noc/storage/myisam/ha_myisam.h maria-5.3-dsmrr-cpk-r5-noc/storage/myisam/ha_myisam.h
--- 5.3-noc/storage/myisam/ha_myisam.h 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/storage/myisam/ha_myisam.h 2010-08-14 17:28:23.000000000 +0200
@@ -168,7 +168,8 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
/* Index condition pushdown implementation */
Item *idx_cond_push(uint keyno, Item* idx_cond);
diff -urN --exclude='.*' 5.3-noc/storage/xtradb/handler/ha_innodb.cc maria-5.3-dsmrr-cpk-r5-noc/storage/xtradb/handler/ha_innodb.cc
--- 5.3-noc/storage/xtradb/handler/ha_innodb.cc 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/storage/xtradb/handler/ha_innodb.cc 2010-08-14 17:28:23.000000000 +0200
@@ -11207,7 +11207,8 @@
*/
int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
@@ -11234,12 +11235,13 @@
return res;
}
-ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
- uint keys, uint *bufsz,
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+ ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
return res;
}
diff -urN --exclude='.*' 5.3-noc/storage/xtradb/handler/ha_innodb.h maria-5.3-dsmrr-cpk-r5-noc/storage/xtradb/handler/ha_innodb.h
--- 5.3-noc/storage/xtradb/handler/ha_innodb.h 2010-08-14 17:29:22.000000000 +0200
+++ maria-5.3-dsmrr-cpk-r5-noc/storage/xtradb/handler/ha_innodb.h 2010-08-14 17:28:23.000000000 +0200
@@ -219,7 +219,8 @@
uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint *bufsz, uint *flags, COST_VECT *cost);
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
DsMrr_impl ds_mrr;
Item *idx_cond_push(uint keyno, Item* idx_cond);
Follow ups