maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #01801
bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2742)
#At lp:maria/5.2 based on revid:igor@xxxxxxxxxxxx-20091222151209-8ijjadlltdvmr1dy
2742 Igor Babaev 2009-12-22 [merge]
Merge
removed:
sql/ds_mrr.cc
sql/ds_mrr.h
added:
sql/multi_range_read.cc
sql/multi_range_read.h
sql/opt_index_cond_pushdown.cc
sql/opt_range_mrr.cc
modified:
include/my_handler.h
libmysqld/Makefile.am
mysql-test/include/common-tests.inc
mysql-test/r/ctype_cp1251.result
mysql-test/r/index_merge_myisam.result
mysql-test/r/innodb_mrr.result
mysql-test/r/myisam_mrr.result
mysql-test/t/ctype_cp1251.test
mysql-test/t/innodb_mrr.test
mysql-test/t/myisam_mrr.test
sql/Makefile.am
sql/handler.h
sql/mysql_priv.h
sql/mysqld.cc
sql/opt_range.cc
sql/opt_range.h
sql/set_var.cc
sql/sql_class.h
storage/maria/ha_maria.cc
storage/maria/ha_maria.h
storage/maria/ma_key.c
storage/maria/maria_def.h
storage/myisam/mi_key.c
storage/myisam/mi_rkey.c
storage/myisam/mi_rnext.c
storage/myisam/mi_rnext_same.c
storage/xtradb/handler/ha_innodb.cc
storage/xtradb/include/row0mysql.h
storage/xtradb/row/row0sel.c
=== modified file 'include/my_handler.h'
--- a/include/my_handler.h 2009-12-15 07:16:46 +0000
+++ b/include/my_handler.h 2009-12-22 12:33:21 +0000
@@ -138,11 +138,13 @@ extern void my_handler_error_unregister(
*/
typedef enum icp_result {
- ICP_NO_MATCH,
- ICP_MATCH,
- ICP_OUT_OF_RANGE
+ ICP_ERROR=-1,
+ ICP_NO_MATCH=0,
+ ICP_MATCH=1,
+ ICP_OUT_OF_RANGE=2
} ICP_RESULT;
+
#ifdef __cplusplus
}
#endif
=== modified file 'libmysqld/Makefile.am'
--- a/libmysqld/Makefile.am 2009-12-21 02:26:15 +0000
+++ b/libmysqld/Makefile.am 2009-12-22 12:49:15 +0000
@@ -45,7 +45,7 @@ libmysqlsources = errmsg.c get_password.
noinst_HEADERS = embedded_priv.h emb_qcache.h
-sqlsources = ds_mrr.cc derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \
+sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \
ha_ndbcluster.cc ha_ndbcluster_cond.cc \
ha_ndbcluster_binlog.cc ha_partition.cc \
handler.cc sql_handler.cc \
@@ -77,7 +77,8 @@ sqlsources = ds_mrr.cc derror.cc field.c
rpl_filter.cc sql_partition.cc sql_builtin.cc sql_plugin.cc \
sql_tablespace.cc \
rpl_injector.cc my_user.c partition_info.cc \
- sql_servers.cc event_parse_data.cc opt_table_elimination.cc
+ sql_servers.cc event_parse_data.cc opt_table_elimination.cc \
+ multi_range_read.cc opt_index_cond_pushdown.cc
libmysqld_int_a_SOURCES= $(libmysqld_sources)
nodist_libmysqld_int_a_SOURCES= $(libmysqlsources) $(sqlsources)
=== modified file 'mysql-test/include/common-tests.inc'
--- a/mysql-test/include/common-tests.inc 2006-06-09 07:23:59 +0000
+++ b/mysql-test/include/common-tests.inc 2009-12-22 12:33:21 +0000
@@ -1332,7 +1332,7 @@ explain select fld1 from t2 where fld1=2
# Search with a key with LIKE constant
# If the like starts with a certain letter key will be used.
#
-
+--sorted_result
select fld1,fld3 from t2 where companynr = 37 and fld3 like 'f%';
select fld3 from t2 where fld3 like "L%" and fld3 = "ok";
select fld3 from t2 where (fld3 like "C%" and fld3 = "Chantilly");
=== modified file 'mysql-test/r/ctype_cp1251.result'
--- a/mysql-test/r/ctype_cp1251.result 2009-12-15 07:16:46 +0000
+++ b/mysql-test/r/ctype_cp1251.result 2009-12-22 12:33:21 +0000
@@ -65,8 +65,8 @@ insert into t1 (a) values ('air'),
('tn_fakira'),('vw_silvia'),('vw_starshi'),('vw_geo'),('vw_b0x1');
select * from t1 where a like 'we_%';
a b
-we_toshko NULL
-we_ivo NULL
we_iliyan NULL
+we_ivo NULL
we_martin NULL
+we_toshko NULL
drop table t1;
=== modified file 'mysql-test/r/index_merge_myisam.result'
--- a/mysql-test/r/index_merge_myisam.result 2009-12-21 02:26:15 +0000
+++ b/mysql-test/r/index_merge_myisam.result 2009-12-22 12:49:15 +0000
@@ -1419,19 +1419,19 @@ drop table t1;
#
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
set optimizer_switch='index_merge=off,index_merge_union=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on
+index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
set optimizer_switch='index_merge_union=on';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on
+index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
set optimizer_switch='default,index_merge_sort_union=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on
set optimizer_switch=4;
ERROR 42000: Variable 'optimizer_switch' can't be set to the value of '4'
set optimizer_switch=NULL;
@@ -1458,21 +1458,21 @@ set optimizer_switch=default;
set optimizer_switch='index_merge=off,index_merge_union=off,default';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on
+index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
set optimizer_switch=default;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
set @@global.optimizer_switch=default;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
#
# Check index_merge's @@optimizer_switch flags
#
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (a int, b int, c int, filler char(100),
@@ -1582,5 +1582,5 @@ id select_type table type possible_keys
set optimizer_switch=default;
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
drop table t0, t1;
=== modified file 'mysql-test/r/innodb_mrr.result'
--- a/mysql-test/r/innodb_mrr.result 2009-12-19 19:54:54 +0000
+++ b/mysql-test/r/innodb_mrr.result 2009-12-22 12:33:21 +0000
@@ -292,10 +292,10 @@ NULL 9 0
NULL 9 0
drop table t1, t2;
set storage_engine= @save_storage_engine;
-set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
-set read_rnd_buffer_size=64;
+set @mrr_buffer_size_save= @@mrr_buffer_size;
+set mrr_buffer_size=64;
Warnings:
-Warning 1292 Truncated incorrect read_rnd_buffer_size value: '64'
+Warning 1292 Truncated incorrect mrr_buffer_size value: '64'
create table t1(a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=InnoDB;
@@ -318,10 +318,10 @@ filler char(10), key(d), primary key (a,
insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B;
explain select * from t2 force index (d) where d < 10;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t2 range d d 5 NULL 53 Using index condition; Using MRR
+1 SIMPLE t2 range d d 5 NULL # Using index condition; Using MRR
drop table t2;
drop table t1;
-set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;
+set @@mrr_buffer_size= @mrr_buffer_size_save;
create table t1 (f1 int not null, f2 int not null,f3 int not null, f4 char(1), primary key (f1,f2), key ix(f3))Engine=InnoDB;
select * from t1 where (f3>=5 and f3<=10) or (f3>=1 and f3<=4);
f1 f2 f3 f4
=== modified file 'mysql-test/r/myisam_mrr.result'
--- a/mysql-test/r/myisam_mrr.result 2009-12-15 17:23:55 +0000
+++ b/mysql-test/r/myisam_mrr.result 2009-12-22 14:43:00 +0000
@@ -1,8 +1,8 @@
drop table if exists t1, t2, t3;
-set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
-set read_rnd_buffer_size=79;
+set @mrr_buffer_size_save= @@mrr_buffer_size;
+set mrr_buffer_size=79;
Warnings:
-Warning 1292 Truncated incorrect read_rnd_buffer_size value: '79'
+Warning 1292 Truncated incorrect mrr_buffer_size value: '79'
create table t1(a int);
show create table t1;
Table Create Table
@@ -293,7 +293,7 @@ NULL 7 0
NULL 9 0
NULL 9 0
drop table t1, t2;
-set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;
+set @@mrr_buffer_size= @mrr_buffer_size_save;
CREATE TABLE t1 (
ID int(10) unsigned NOT NULL AUTO_INCREMENT,
col1 int(10) unsigned DEFAULT NULL,
@@ -388,3 +388,29 @@ explain select * from t1 where a < 20 o
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 5 NULL 20 Using index condition
drop table t0, t1;
+#
+# Part of MWL#67: DS-MRR backport: add an @@optimizer_switch flag for
+# index_condition pushdown:
+# - engine_condition_pushdown does not affect ICP
+select @@optimizer_switch;
+@@optimizer_switch
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (a int, b int, key(a));
+insert into t1 select A.a + 10 *(B.a + 10*C.a), A.a + 10 *(B.a + 10*C.a) from t0 A, t0 B, t0 C;
+A query that will use ICP:
+explain select * from t1 where a < 20;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 20 Using index condition; Using MRR
+set @save_optimizer_switch=@@optimizer_switch;
+set optimizer_switch='index_condition_pushdown=off';
+explain select * from t1 where a < 20;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 20 Using where; Using MRR
+set optimizer_switch='index_condition_pushdown=on';
+explain select * from t1 where a < 20;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 20 Using index condition; Using MRR
+set optimizer_switch=@save_optimizer_switch;
+drop table t0, t1;
=== modified file 'mysql-test/t/ctype_cp1251.test'
--- a/mysql-test/t/ctype_cp1251.test 2005-10-13 14:16:19 +0000
+++ b/mysql-test/t/ctype_cp1251.test 2009-12-22 12:33:21 +0000
@@ -44,6 +44,7 @@ insert into t1 (a) values ('air'),
('we_martin'),('vw_grado'),('vw_vasko'),('tn_vili'),('tn_kalina'),
('tn_fakira'),('vw_silvia'),('vw_starshi'),('vw_geo'),('vw_b0x1');
+--sorted_result
select * from t1 where a like 'we_%';
drop table t1;
=== modified file 'mysql-test/t/innodb_mrr.test'
--- a/mysql-test/t/innodb_mrr.test 2009-12-15 07:16:46 +0000
+++ b/mysql-test/t/innodb_mrr.test 2009-12-22 12:33:21 +0000
@@ -12,8 +12,8 @@ set storage_engine=InnoDB;
set storage_engine= @save_storage_engine;
# Try big rowid sizes
-set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
-set read_rnd_buffer_size=64;
+set @mrr_buffer_size_save= @@mrr_buffer_size;
+set mrr_buffer_size=64;
# By default InnoDB will fill values only for key parts used by the query,
# which will cause DS-MRR to supply an invalid tuple on scan restoration.
@@ -38,11 +38,12 @@ drop table t2;
create table t2 (a char(100), b char(100), c char(100), d int,
filler char(10), key(d), primary key (a,b,c)) engine= innodb;
insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B;
+--replace_column 9 #
explain select * from t2 force index (d) where d < 10;
drop table t2;
drop table t1;
-set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;
+set @@mrr_buffer_size= @mrr_buffer_size_save;
#
# BUG#33033 "MySQL/InnoDB crashes with simple select range query"
=== modified file 'mysql-test/t/myisam_mrr.test'
--- a/mysql-test/t/myisam_mrr.test 2009-12-15 17:23:55 +0000
+++ b/mysql-test/t/myisam_mrr.test 2009-12-22 14:43:00 +0000
@@ -6,12 +6,12 @@
drop table if exists t1, t2, t3;
--enable_warnings
-set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
-set read_rnd_buffer_size=79;
+set @mrr_buffer_size_save= @@mrr_buffer_size;
+set mrr_buffer_size=79;
-- source include/mrr_tests.inc
-set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;
+set @@mrr_buffer_size= @mrr_buffer_size_save;
#
# BUG#30622: Incorrect query results for MRR + filesort
@@ -96,3 +96,32 @@ insert into t1 select A.a + 10 *(B.a + 1
explain select * from t1 where a < 20 order by a;
drop table t0, t1;
+-- echo #
+-- echo # Part of MWL#67: DS-MRR backport: add an @@optimizer_switch flag for
+-- echo # index_condition pushdown:
+-- echo # - engine_condition_pushdown does not affect ICP
+
+
+# Check that optimizer_switch is present
+--replace_regex /,table_elimination=o[nf]*//
+select @@optimizer_switch;
+
+# Check if it affects ICP
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (a int, b int, key(a));
+insert into t1 select A.a + 10 *(B.a + 10*C.a), A.a + 10 *(B.a + 10*C.a) from t0 A, t0 B, t0 C;
+
+-- echo A query that will use ICP:
+explain select * from t1 where a < 20;
+
+set @save_optimizer_switch=@@optimizer_switch;
+set optimizer_switch='index_condition_pushdown=off';
+explain select * from t1 where a < 20;
+
+set optimizer_switch='index_condition_pushdown=on';
+explain select * from t1 where a < 20;
+
+set optimizer_switch=@save_optimizer_switch;
+
+drop table t0, t1;
=== modified file 'sql/Makefile.am'
--- a/sql/Makefile.am 2009-12-21 02:26:15 +0000
+++ b/sql/Makefile.am 2009-12-22 14:43:00 +0000
@@ -47,7 +47,7 @@ mysqld_LDADD = libndb.la \
$(LDADD) $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \
$(yassl_libs) $(openssl_libs) @MYSQLD_EXTRA_LIBS@
-noinst_HEADERS = ds_mrr.h item.h item_func.h item_sum.h item_cmpfunc.h \
+noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
item_strfunc.h item_timefunc.h \
item_xmlfunc.h \
item_create.h item_subselect.h item_row.h \
@@ -77,9 +77,10 @@ noinst_HEADERS = ds_mrr.h item.h item_fu
sql_plugin.h authors.h event_parse_data.h \
event_data_objects.h event_scheduler.h \
sql_partition.h partition_info.h partition_element.h \
- contributors.h sql_servers.h
+ contributors.h sql_servers.h \
+ multi_range_read.h
-mysqld_SOURCES = ds_mrr.cc sql_lex.cc sql_handler.cc sql_partition.cc \
+mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \
item.cc item_sum.cc item_buff.cc item_func.cc \
item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \
thr_malloc.cc item_create.cc item_subselect.cc \
@@ -123,7 +124,9 @@ mysqld_SOURCES = ds_mrr.cc sql_lex.cc sq
sql_plugin.cc sql_binlog.cc \
sql_builtin.cc sql_tablespace.cc partition_info.cc \
sql_servers.cc event_parse_data.cc \
- opt_table_elimination.cc
+ opt_table_elimination.cc \
+ multi_range_read.cc \
+ opt_index_cond_pushdown.cc
nodist_mysqld_SOURCES = mini_client_errors.c pack.c client.c my_time.c my_user.c
@@ -151,7 +154,7 @@ BUILT_SOURCES = $(BUILT_MAINT_SRC) lex_
EXTRA_DIST = udf_example.c udf_example.def $(BUILT_MAINT_SRC) \
nt_servc.cc nt_servc.h \
message.mc message.h message.rc MSG00001.bin \
- CMakeLists.txt
+ CMakeLists.txt opt_range_mrr.cc
CLEANFILES = lex_hash.h sql_yacc.output link_sources
DISTCLEANFILES = $(EXTRA_PROGRAMS)
=== removed file 'sql/ds_mrr.cc'
--- a/sql/ds_mrr.cc 2009-12-21 02:26:15 +0000
+++ b/sql/ds_mrr.cc 1970-01-01 00:00:00 +0000
@@ -1,1337 +0,0 @@
-#include "mysql_priv.h"
-#include "sql_select.h"
-
-/* **************************************************************************
- * DS-MRR implementation
- ***************************************************************************/
-
-/**
- DS-MRR: Initialize and start MRR scan
-
- Initialize and start the MRR scan. Depending on the mode parameter, this
- may use default or DS-MRR implementation.
-
- @param h Table handler to be used
- @param key Index to be used
- @param seq_funcs Interval sequence enumeration functions
- @param seq_init_param Interval sequence enumeration parameter
- @param n_ranges Number of ranges in the sequence.
- @param mode HA_MRR_* modes to use
- @param buf INOUT Buffer to use
-
- @retval 0 Ok, Scan started.
- @retval other Error
-*/
-
-int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
- void *seq_init_param, uint n_ranges, uint mode,
- HANDLER_BUFFER *buf)
-{
- uint elem_size;
- Item *pushed_cond= NULL;
- handler *new_h2= 0;
- DBUG_ENTER("DsMrr_impl::dsmrr_init");
-
- /*
- index_merge may invoke a scan on an object for which dsmrr_info[_const]
- has not been called, so set the owner handler here as well.
- */
- h= h_arg;
- if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
- {
- use_default_impl= TRUE;
- const int retval=
- h->handler::multi_range_read_init(seq_funcs, seq_init_param,
- n_ranges, mode, buf);
- DBUG_RETURN(retval);
- }
- rowids_buf= buf->buffer;
-
- is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
-
- if (is_mrr_assoc)
- status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
-
- rowids_buf_end= buf->buffer_end;
- elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- rowids_buf_last= rowids_buf +
- ((rowids_buf_end - rowids_buf)/ elem_size)*
- elem_size;
- rowids_buf_end= rowids_buf_last;
-
- /*
- There can be two cases:
- - This is the first call since index_init(), h2==NULL
- Need to setup h2 then.
- - This is not the first call, h2 is initalized and set up appropriately.
- The caller might have called h->index_init(), need to switch h to
- rnd_pos calls.
- */
- if (!h2)
- {
- /* Create a separate handler object to do rndpos() calls. */
- THD *thd= current_thd;
- /*
- ::clone() takes up a lot of stack, especially on 64 bit platforms.
- The constant 5 is an empiric result.
- */
- if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
- DBUG_RETURN(1);
- DBUG_ASSERT(h->active_index != MAX_KEY);
- uint mrr_keyno= h->active_index;
-
- /* Create a separate handler object to do rndpos() calls. */
- if (!(new_h2= h->clone(thd->mem_root)) ||
- new_h2->ha_external_lock(thd, F_RDLCK))
- {
- delete new_h2;
- DBUG_RETURN(1);
- }
-
- if (mrr_keyno == h->pushed_idx_cond_keyno)
- pushed_cond= h->pushed_idx_cond;
-
- /*
- Caution: this call will invoke this->dsmrr_close(). Do not put the
- created secondary table handler into this->h2 or it will delete it.
- */
- if (h->ha_index_end())
- {
- h2=new_h2;
- goto error;
- }
-
- h2= new_h2; /* Ok, now can put it into h2 */
- table->prepare_for_position();
- h2->extra(HA_EXTRA_KEYREAD);
-
- if (h2->ha_index_init(mrr_keyno, FALSE))
- goto error;
-
- use_default_impl= FALSE;
- if (pushed_cond)
- h2->idx_cond_push(mrr_keyno, pushed_cond);
- }
- else
- {
- /*
- We get here when the access alternates betwen MRR scan(s) and non-MRR
- scans.
-
- Calling h->index_end() will invoke dsmrr_close() for this object,
- which will delete h2. We need to keep it, so save put it away and dont
- let it be deleted:
- */
- handler *save_h2= h2;
- h2= NULL;
- int res= (h->inited == handler::INDEX && h->ha_index_end());
- h2= save_h2;
- use_default_impl= FALSE;
- if (res)
- goto error;
- }
-
- if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
- mode, buf) ||
- dsmrr_fill_buffer())
- {
- goto error;
- }
- /*
- If the above call has scanned through all intervals in *seq, then
- adjust *buf to indicate that the remaining buffer space will not be used.
- */
- if (dsmrr_eof)
- buf->end_of_used_area= rowids_buf_last;
-
- /*
- h->inited == INDEX may occur when 'range checked for each record' is
- used.
- */
- if ((h->inited != handler::RND) &&
- ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
- (h->ha_rnd_init(FALSE))))
- goto error;
-
- use_default_impl= FALSE;
- h->mrr_funcs= *seq_funcs;
-
- DBUG_RETURN(0);
-error:
- h2->ha_index_or_rnd_end();
- h2->ha_external_lock(current_thd, F_UNLCK);
- h2->close();
- delete h2;
- h2= NULL;
- DBUG_RETURN(1);
-}
-
-
-void DsMrr_impl::dsmrr_close()
-{
- DBUG_ENTER("DsMrr_impl::dsmrr_close");
- if (h2)
- {
- h2->ha_index_or_rnd_end();
- h2->ha_external_lock(current_thd, F_UNLCK);
- h2->close();
- delete h2;
- h2= NULL;
- }
- use_default_impl= TRUE;
- DBUG_VOID_RETURN;
-}
-
-
-static int rowid_cmp(void *h, uchar *a, uchar *b)
-{
- return ((handler*)h)->cmp_ref(a, b);
-}
-
-
-/**
- DS-MRR: Fill the buffer with rowids and sort it by rowid
-
- {This is an internal function of DiskSweep MRR implementation}
- Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
- buffer. When the buffer is full or scan is completed, sort the buffer by
- rowid and return.
-
- The function assumes that rowids buffer is empty when it is invoked.
-
- @param h Table handler
-
- @retval 0 OK, the next portion of rowids is in the buffer,
- properly ordered
- @retval other Error
-*/
-
-int DsMrr_impl::dsmrr_fill_buffer()
-{
- char *range_info;
- int res;
- DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
-
- rowids_buf_cur= rowids_buf;
- while ((rowids_buf_cur < rowids_buf_end) &&
- !(res= h2->handler::multi_range_read_next(&range_info)))
- {
- KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
- if (h2->mrr_funcs.skip_index_tuple &&
- h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
- continue;
-
- /* Put rowid, or {rowid, range_id} pair into the buffer */
- h2->position(table->record[0]);
- memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
- rowids_buf_cur += h2->ref_length;
-
- if (is_mrr_assoc)
- {
- memcpy(rowids_buf_cur, &range_info, sizeof(void*));
- rowids_buf_cur += sizeof(void*);
- }
- }
-
- if (res && res != HA_ERR_END_OF_FILE)
- DBUG_RETURN(res);
- dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
-
- /* Sort the buffer contents by rowid */
- uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
-
- my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
- (void*)h);
- rowids_buf_last= rowids_buf_cur;
- rowids_buf_cur= rowids_buf;
- DBUG_RETURN(0);
-}
-
-
-/**
- DS-MRR implementation: multi_range_read_next() function
-*/
-
-int DsMrr_impl::dsmrr_next(char **range_info)
-{
- int res;
- uchar *cur_range_info= 0;
- uchar *rowid;
-
- if (use_default_impl)
- return h->handler::multi_range_read_next(range_info);
-
- do
- {
- if (rowids_buf_cur == rowids_buf_last)
- {
- if (dsmrr_eof)
- {
- res= HA_ERR_END_OF_FILE;
- goto end;
- }
- res= dsmrr_fill_buffer();
- if (res)
- goto end;
- }
-
- /* return eof if there are no rowids in the buffer after re-fill attempt */
- if (rowids_buf_cur == rowids_buf_last)
- {
- res= HA_ERR_END_OF_FILE;
- goto end;
- }
- rowid= rowids_buf_cur;
-
- if (is_mrr_assoc)
- memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
-
- rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
- if (h2->mrr_funcs.skip_record &&
- h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
- continue;
- res= h->rnd_pos(table->record[0], rowid);
- break;
- } while (true);
-
- if (is_mrr_assoc)
- {
- memcpy(range_info, rowid + h->ref_length, sizeof(void*));
- }
-end:
- return res;
-}
-
-
-/**
- DS-MRR implementation: multi_range_read_info() function
-*/
-ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
- uint *bufsz, uint *flags, COST_VECT *cost)
-{
- ha_rows res;
- uint def_flags= *flags;
- uint def_bufsz= *bufsz;
-
- /* Get cost/flags/mem_usage of default MRR implementation */
- res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
- &def_flags, cost);
- DBUG_ASSERT(!res);
-
- if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
- choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
- {
- /* Default implementation is choosen */
- DBUG_PRINT("info", ("Default MRR implementation choosen"));
- *flags= def_flags;
- *bufsz= def_bufsz;
- }
- else
- {
- /* *flags and *bufsz were set by choose_mrr_impl */
- DBUG_PRINT("info", ("DS-MRR implementation choosen"));
- }
- return 0;
-}
-
-
-/**
- DS-MRR Implementation: multi_range_read_info_const() function
-*/
-
-ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param, uint n_ranges,
- uint *bufsz, uint *flags, COST_VECT *cost)
-{
- ha_rows rows;
- uint def_flags= *flags;
- uint def_bufsz= *bufsz;
- /* Get cost/flags/mem_usage of default MRR implementation */
- rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
- n_ranges, &def_bufsz,
- &def_flags, cost);
- if (rows == HA_POS_ERROR)
- {
- /* Default implementation can't perform MRR scan => we can't either */
- return rows;
- }
-
- /*
- If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
- use the default MRR implementation (we need it for UPDATE/DELETE).
- Otherwise, make a choice based on cost and @@optimizer_use_mrr.
- */
- if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
- choose_mrr_impl(keyno, rows, flags, bufsz, cost))
- {
- DBUG_PRINT("info", ("Default MRR implementation choosen"));
- *flags= def_flags;
- *bufsz= def_bufsz;
- }
- else
- {
- /* *flags and *bufsz were set by choose_mrr_impl */
- DBUG_PRINT("info", ("DS-MRR implementation choosen"));
- }
- return rows;
-}
-
-
-/**
- Check if key has partially-covered columns
-
- We can't use DS-MRR to perform range scans when the ranges are over
- partially-covered keys, because we'll not have full key part values
- (we'll have their prefixes from the index) and will not be able to check
- if we've reached the end the range.
-
- @param keyno Key to check
-
- @todo
- Allow use of DS-MRR in cases where the index has partially-covered
- components but they are not used for scanning.
-
- @retval TRUE Yes
- @retval FALSE No
-*/
-
-bool key_uses_partial_cols(TABLE *table, uint keyno)
-{
- KEY_PART_INFO *kp= table->key_info[keyno].key_part;
- KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
- for (; kp != kp_end; kp++)
- {
- if (!kp->field->part_of_key.is_set(keyno))
- return TRUE;
- }
- return FALSE;
-}
-
-/**
- DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
-
- Make the choice between using Default MRR implementation and DS-MRR.
- This function contains common functionality factored out of dsmrr_info()
- and dsmrr_info_const(). The function assumes that the default MRR
- implementation's applicability requirements are satisfied.
-
- @param keyno Index number
- @param rows E(full rows to be retrieved)
- @param flags IN MRR flags provided by the MRR user
- OUT If DS-MRR is choosen, flags of DS-MRR implementation
- else the value is not modified
- @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
- else the value is not modified
- @param cost IN Cost of default MRR implementation
- OUT If DS-MRR is choosen, cost of DS-MRR scan
- else the value is not modified
-
- @retval TRUE Default MRR implementation should be used
- @retval FALSE DS-MRR implementation should be used
-*/
-
-bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
- uint *bufsz, COST_VECT *cost)
-{
- COST_VECT dsmrr_cost;
- bool res;
- THD *thd= current_thd;
- if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
- (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
- key_uses_partial_cols(table, keyno))
- {
- /* Use the default implementation */
- *flags |= HA_MRR_USE_DEFAULT_IMPL;
- return TRUE;
- }
-
- uint add_len= table->key_info[keyno].key_length + h->ref_length;
- *bufsz -= add_len;
- if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
- return TRUE;
- *bufsz += add_len;
-
- bool force_dsmrr;
- /*
- If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
- DS-MRR and Default implementations cost. This allows one to force use of
- DS-MRR whenever it is applicable without affecting other cost-based
- choices.
- */
- if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
- dsmrr_cost.total_cost() > cost->total_cost())
- dsmrr_cost= *cost;
-
- if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
- {
- *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
- *flags &= ~HA_MRR_SORTED; /* We will return unordered output */
- *cost= dsmrr_cost;
- res= FALSE;
- }
- else
- {
- /* Use the default MRR implementation */
- res= TRUE;
- }
- return res;
-}
-
-
-static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
-
-
-/**
- Get cost of DS-MRR scan
-
- @param keynr Index to be used
- @param rows E(Number of rows to be scanned)
- @param flags Scan parameters (HA_MRR_* flags)
- @param buffer_size INOUT Buffer size
- @param cost OUT The cost
-
- @retval FALSE OK
- @retval TRUE Error, DS-MRR cannot be used (the buffer is too small
- for even 1 rowid)
-*/
-
-bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
- uint *buffer_size, COST_VECT *cost)
-{
- ulong max_buff_entries, elem_size;
- ha_rows rows_in_full_step, rows_in_last_step;
- uint n_full_steps;
- double index_read_cost;
-
- elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
- max_buff_entries = *buffer_size / elem_size;
-
- if (!max_buff_entries)
- return TRUE; /* Buffer has not enough space for even 1 rowid */
-
- /* Number of iterations we'll make with full buffer */
- n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
-
- /*
- Get numbers of rows we'll be processing in
- - non-last sweep, with full buffer
- - last iteration, with non-full buffer
- */
- rows_in_full_step= max_buff_entries;
- rows_in_last_step= rows % max_buff_entries;
-
- /* Adjust buffer size if we expect to use only part of the buffer */
- if (n_full_steps)
- {
- get_sort_and_sweep_cost(table, rows, cost);
- cost->multiply(n_full_steps);
- }
- else
- {
- cost->zero();
- *buffer_size= max(*buffer_size,
- (size_t)(1.2*rows_in_last_step) * elem_size +
- h->ref_length + table->key_info[keynr].key_length);
- }
-
- COST_VECT last_step_cost;
- get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
- cost->add(&last_step_cost);
-
- if (n_full_steps != 0)
- cost->mem_cost= *buffer_size;
- else
- cost->mem_cost= (double)rows_in_last_step * elem_size;
-
- /* Total cost of all index accesses */
- index_read_cost= h->index_only_read_time(keynr, (double)rows);
- cost->add_io(index_read_cost, 1 /* Random seeks */);
- return FALSE;
-}
-
-
-/*
- Get cost of one sort-and-sweep step
-
- SYNOPSIS
- get_sort_and_sweep_cost()
- table Table being accessed
- nrows Number of rows to be sorted and retrieved
- cost OUT The cost
-
- DESCRIPTION
- Get cost of these operations:
- - sort an array of #nrows ROWIDs using qsort
- - read #nrows records from table in a sweep.
-*/
-
-static
-void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
-{
- if (nrows)
- {
- get_sweep_read_cost(table, nrows, FALSE, cost);
- /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
- double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
- if (cmp_op < 3)
- cmp_op= 3;
- cost->cpu_cost += cmp_op * log2(cmp_op);
- }
- else
- cost->zero();
-}
-
-
-/**
- Get cost of reading nrows table records in a "disk sweep"
-
- A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
- for an ordered sequence of rowids.
-
- We assume hard disk IO. The read is performed as follows:
-
- 1. The disk head is moved to the needed cylinder
- 2. The controller waits for the plate to rotate
- 3. The data is transferred
-
- Time to do #3 is insignificant compared to #2+#1.
-
- Time to move the disk head is proportional to head travel distance.
-
- Time to wait for the plate to rotate depends on whether the disk head
- was moved or not.
-
- If disk head wasn't moved, the wait time is proportional to distance
- between the previous block and the block we're reading.
-
- If the head was moved, we don't know how much we'll need to wait for the
- plate to rotate. We assume the wait time to be a variate with a mean of
- 0.5 of full rotation time.
-
- Our cost units are "random disk seeks". The cost of random disk seek is
- actually not a constant, it depends one range of cylinders we're going
- to access. We make it constant by introducing a fuzzy concept of "typical
- datafile length" (it's fuzzy as it's hard to tell whether it should
- include index file, temp.tables etc). Then random seek cost is:
-
- 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
-
- We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
-
- @param table Table to be accessed
- @param nrows Number of rows to retrieve
- @param interrupted TRUE <=> Assume that the disk sweep will be
- interrupted by other disk IO. FALSE - otherwise.
- @param cost OUT The cost.
-*/
-
-void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
- COST_VECT *cost)
-{
- DBUG_ENTER("get_sweep_read_cost");
-
- cost->zero();
- if (table->file->primary_key_is_clustered())
- {
- cost->io_count= table->file->read_time(table->s->primary_key,
- (uint) nrows, nrows);
- }
- else
- {
- double n_blocks=
- ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
- double busy_blocks=
- n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
- if (busy_blocks < 1.0)
- busy_blocks= 1.0;
-
- DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
- busy_blocks));
- cost->io_count= busy_blocks;
-
- if (!interrupted)
- {
- /* Assume reading is done in one 'sweep' */
- cost->avg_io_cost= (DISK_SEEK_BASE_COST +
- DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
- }
- }
- DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
- DBUG_VOID_RETURN;
-}
-
-
-/* **************************************************************************
- * DS-MRR implementation ends
- ***************************************************************************/
-
-/* **************************************************************************
- * Index Condition Pushdown code starts
- ***************************************************************************/
-/*
- Check if given expression uses only table fields covered by the given index
-
- SYNOPSIS
- uses_index_fields_only()
- item Expression to check
- tbl The table having the index
- keyno The index number
- other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
-
- DESCRIPTION
- Check if given expression only uses fields covered by index #keyno in the
- table tbl. The expression can use any fields in any other tables.
-
- The expression is guaranteed not to be AND or OR - those constructs are
- handled outside of this function.
-
- RETURN
- TRUE Yes
- FALSE No
-*/
-
-bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
- bool other_tbls_ok)
-{
- if (item->const_item())
- return TRUE;
-
- /*
- Don't push down the triggered conditions. Nested outer joins execution
- code may need to evaluate a condition several times (both triggered and
- untriggered), and there is no way to put thi
- TODO: Consider cloning the triggered condition and using the copies for:
- 1. push the first copy down, to have most restrictive index condition
- possible
- 2. Put the second copy into tab->select_cond.
- */
- if (item->type() == Item::FUNC_ITEM &&
- ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC)
- return FALSE;
-
- if (!(item->used_tables() & tbl->map))
- return other_tbls_ok;
-
- Item::Type item_type= item->type();
- switch (item_type) {
- case Item::FUNC_ITEM:
- {
- /* This is a function, apply condition recursively to arguments */
- Item_func *item_func= (Item_func*)item;
- Item **child;
- Item **item_end= (item_func->arguments()) + item_func->argument_count();
- for (child= item_func->arguments(); child != item_end; child++)
- {
- if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
- return FALSE;
- }
- return TRUE;
- }
- case Item::COND_ITEM:
- {
- /*
- This is a AND/OR condition. Regular AND/OR clauses are handled by
- make_cond_for_index() which will chop off the part that can be
- checked with index. This code is for handling non-top-level AND/ORs,
- e.g. func(x AND y).
- */
- List_iterator<Item> li(*((Item_cond*)item)->argument_list());
- Item *item;
- while ((item=li++))
- {
- if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
- return FALSE;
- }
- return TRUE;
- }
- case Item::FIELD_ITEM:
- {
- Item_field *item_field= (Item_field*)item;
- if (item_field->field->table != tbl)
- return TRUE;
- /*
- The below is probably a repetition - the first part checks the
- other two, but let's play it safe:
- */
- return item_field->field->part_of_key.is_set(keyno) &&
- item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
- item_field->field->type() != MYSQL_TYPE_BLOB;
- }
- case Item::REF_ITEM:
- return uses_index_fields_only(item->real_item(), tbl, keyno,
- other_tbls_ok);
- default:
- return FALSE; /* Play it safe, don't push unknown non-const items */
- }
-}
-
-#define ICP_COND_USES_INDEX_ONLY 10
-
-/*
- Get a part of the condition that can be checked using only index fields
-
- SYNOPSIS
- make_cond_for_index()
- cond The source condition
- table The table that is partially available
- keyno The index in the above table. Only fields covered by the index
- are available
- other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
-
- DESCRIPTION
- Get a part of the condition that can be checked when for the given table
- we have values only of fields covered by some index. The condition may
- refer to other tables, it is assumed that we have values of all of their
- fields.
-
- Example:
- make_cond_for_index(
- "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
- t2, keyno(t2.key1))
- will return
- "cond(t1.field) AND cond(t2.key2)"
-
- RETURN
- Index condition, or NULL if no condition could be inferred.
-*/
-
-Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
- bool other_tbls_ok)
-{
- if (!cond)
- return NULL;
- if (cond->type() == Item::COND_ITEM)
- {
- uint n_marked= 0;
- if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
- {
- table_map used_tables= 0;
- Item_cond_and *new_cond=new Item_cond_and;
- if (!new_cond)
- return (COND*) 0;
- List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
- Item *item;
- while ((item=li++))
- {
- Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
- if (fix)
- {
- new_cond->argument_list()->push_back(fix);
- used_tables|= fix->used_tables();
- }
- n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY);
- }
- if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
- cond->marker= ICP_COND_USES_INDEX_ONLY;
- switch (new_cond->argument_list()->elements) {
- case 0:
- return (COND*) 0;
- case 1:
- new_cond->used_tables_cache= used_tables;
- return new_cond->argument_list()->head();
- default:
- new_cond->quick_fix_field();
- new_cond->used_tables_cache= used_tables;
- return new_cond;
- }
- }
- else /* It's OR */
- {
- Item_cond_or *new_cond=new Item_cond_or;
- if (!new_cond)
- return (COND*) 0;
- List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
- Item *item;
- while ((item=li++))
- {
- Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
- if (!fix)
- return (COND*) 0;
- new_cond->argument_list()->push_back(fix);
- n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY);
- }
- if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
- cond->marker= ICP_COND_USES_INDEX_ONLY;
- new_cond->quick_fix_field();
- new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
- new_cond->top_level_item();
- return new_cond;
- }
- }
-
- if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
- return (COND*) 0;
- cond->marker= ICP_COND_USES_INDEX_ONLY;
- return cond;
-}
-
-
-Item *make_cond_remainder(Item *cond, bool exclude_index)
-{
- if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
- return 0; /* Already checked */
-
- if (cond->type() == Item::COND_ITEM)
- {
- table_map tbl_map= 0;
- if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
- {
- /* Create new top level AND item */
- Item_cond_and *new_cond=new Item_cond_and;
- if (!new_cond)
- return (COND*) 0;
- List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
- Item *item;
- while ((item=li++))
- {
- Item *fix= make_cond_remainder(item, exclude_index);
- if (fix)
- {
- new_cond->argument_list()->push_back(fix);
- tbl_map |= fix->used_tables();
- }
- }
- switch (new_cond->argument_list()->elements) {
- case 0:
- return (COND*) 0;
- case 1:
- return new_cond->argument_list()->head();
- default:
- new_cond->quick_fix_field();
- ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
- return new_cond;
- }
- }
- else /* It's OR */
- {
- Item_cond_or *new_cond=new Item_cond_or;
- if (!new_cond)
- return (COND*) 0;
- List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
- Item *item;
- while ((item=li++))
- {
- Item *fix= make_cond_remainder(item, FALSE);
- if (!fix)
- return (COND*) 0;
- new_cond->argument_list()->push_back(fix);
- tbl_map |= fix->used_tables();
- }
- new_cond->quick_fix_field();
- ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
- new_cond->top_level_item();
- return new_cond;
- }
- }
- return cond;
-}
-
-
-/*
- Try to extract and push the index condition
-
- SYNOPSIS
- push_index_cond()
- tab A join tab that has tab->table->file and its condition
- in tab->select_cond
- keyno Index for which extract and push the condition
- other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
-
- DESCRIPTION
- Try to extract and push the index condition down to table handler
-*/
-
-void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok)
-{
- DBUG_ENTER("push_index_cond");
- Item *idx_cond;
- bool do_index_cond_pushdown=
- ((tab->table->file->index_flags(keyno, 0, 1) &
- HA_DO_INDEX_COND_PUSHDOWN) &&
- tab->join->thd->variables.engine_condition_pushdown);
-
- /*
- Do not try index condition pushdown on indexes which have partially-covered
- columns. Unpacking from a column prefix into index tuple is not a supported
- operation in some engines, see e.g. MySQL BUG#42991.
- TODO: a better solution would be not to consider partially-covered columns
- as parts of the index and still produce/check index condition for
- fully-covered index columns.
- */
- KEY *key_info= tab->table->key_info + keyno;
- for (uint kp= 0; kp < key_info->key_parts; kp++)
- {
- if ((key_info->key_part[kp].key_part_flag & HA_PART_KEY_SEG))
- {
- do_index_cond_pushdown= FALSE;
- break;
- }
- }
-
- /*
- When WL#5116 is done this DBUG statement must be removed. It's just a
- temporary hack to allow us to discriminate whether a test failure relates
- to *Engine* or *Index* Condition Pushdown.
- */
- DBUG_EXECUTE_IF("optimizer_no_icp", do_index_cond_pushdown= false;);
- if (do_index_cond_pushdown)
- {
- DBUG_EXECUTE("where",
- print_where(tab->select_cond, "full cond", QT_ORDINARY););
-
- idx_cond= make_cond_for_index(tab->select_cond, tab->table, keyno,
- other_tbls_ok);
-
- DBUG_EXECUTE("where",
- print_where(idx_cond, "idx cond", QT_ORDINARY););
-
- if (idx_cond)
- {
- Item *idx_remainder_cond= 0;
- tab->pre_idx_push_select_cond= tab->select_cond;
- /*
- For BKA cache we store condition to special BKA cache field
- because evaluation of the condition requires additional operations
- before the evaluation. This condition is used in
- JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
- */
- if (tab->use_join_cache &&
- /*
- if cache is used then the value is TRUE only
- for BKA[_UNIQUE] cache (see check_join_cache_usage func).
- In this case other_tbls_ok is an equivalent of
- cache->is_key_access().
- */
- other_tbls_ok &&
- (idx_cond->used_tables() &
- ~(tab->table->map | tab->join->const_table_map)))
- tab->cache_idx_cond= idx_cond;
- else
- idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
-
- /*
- Disable eq_ref's "lookup cache" if we've pushed down an index
- condition.
- TODO: This check happens to work on current ICP implementations, but
- there may exist a compliant implementation that will not work
- correctly with it. Sort this out when we stabilize the condition
- pushdown APIs.
- */
- if (idx_remainder_cond != idx_cond)
- tab->ref.disable_cache= TRUE;
-
- Item *row_cond= make_cond_remainder(tab->select_cond, TRUE);
-
- DBUG_EXECUTE("where",
- print_where(row_cond, "remainder cond", QT_ORDINARY););
-
- if (row_cond)
- {
- if (!idx_remainder_cond)
- tab->select_cond= row_cond;
- else
- {
- COND *new_cond= new Item_cond_and(row_cond, idx_remainder_cond);
- tab->select_cond= new_cond;
- tab->select_cond->quick_fix_field();
- ((Item_cond_and*)tab->select_cond)->used_tables_cache=
- row_cond->used_tables() | idx_remainder_cond->used_tables();
- }
- }
- else
- tab->select_cond= idx_remainder_cond;
- if (tab->select)
- {
- DBUG_EXECUTE("where",
- print_where(tab->select->cond,
- "select_cond",
- QT_ORDINARY););
-
- tab->select->cond= tab->select_cond;
- }
- }
- }
- DBUG_VOID_RETURN;
-}
-
-/* **************************************************************************
- * Default MRR implementation starts
- ***************************************************************************/
-
-
-/****************************************************************************
- * Default MRR implementation (MRR to non-MRR converter)
- ***************************************************************************/
-
-/**
- Get cost and other information about MRR scan over a known list of ranges
-
- Calculate estimated cost and other information about an MRR scan for given
- sequence of ranges.
-
- @param keyno Index number
- @param seq Range sequence to be traversed
- @param seq_init_param First parameter for seq->init()
- @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
- can't efficiently determine it
- @param bufsz INOUT IN: Size of the buffer available for use
- OUT: Size of the buffer that is expected to be actually
- used, or 0 if buffer is not needed.
- @param flags INOUT A combination of HA_MRR_* flags
- @param cost OUT Estimated cost of MRR access
-
- @note
- This method (or an overriding one in a derived class) must check for
- thd->killed and return HA_POS_ERROR if it is not zero. This is required
- for a user to be able to interrupt the calculation by killing the
- connection/query.
-
- @retval
- HA_POS_ERROR Error or the engine is unable to perform the requested
- scan. Values of OUT parameters are undefined.
- @retval
- other OK, *cost contains cost of the scan, *bufsz and *flags
- contain scan parameters.
-*/
-
-ha_rows
-handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param, uint n_ranges_arg,
- uint *bufsz, uint *flags, COST_VECT *cost)
-{
- KEY_MULTI_RANGE range;
- range_seq_t seq_it;
- ha_rows rows, total_rows= 0;
- uint n_ranges=0;
- THD *thd= current_thd;
-
- /* Default MRR implementation doesn't need buffer */
- *bufsz= 0;
-
- seq_it= seq->init(seq_init_param, n_ranges, *flags);
- while (!seq->next(seq_it, &range))
- {
- if (unlikely(thd->killed != 0))
- return HA_POS_ERROR;
-
- n_ranges++;
- key_range *min_endp, *max_endp;
- if (range.range_flag & GEOM_FLAG)
- {
- /* In this case tmp_min_flag contains the handler-read-function */
- range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
- min_endp= &range.start_key;
- max_endp= NULL;
- }
- else
- {
- min_endp= range.start_key.length? &range.start_key : NULL;
- max_endp= range.end_key.length? &range.end_key : NULL;
- }
- if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
- rows= 1; /* there can be at most one row */
- else
- {
- if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
- max_endp)))
- {
- /* Can't scan one range => can't do MRR scan at all */
- total_rows= HA_POS_ERROR;
- break;
- }
- }
- total_rows += rows;
- }
-
- if (total_rows != HA_POS_ERROR)
- {
- /* The following calculation is the same as in multi_range_read_info(): */
- *flags |= HA_MRR_USE_DEFAULT_IMPL;
- cost->zero();
- cost->avg_io_cost= 1; /* assume random seeks */
- if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
- cost->io_count= index_only_read_time(keyno, (uint)total_rows);
- else
- cost->io_count= read_time(keyno, n_ranges, total_rows);
- cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
- }
- return total_rows;
-}
-
-
-/**
- Get cost and other information about MRR scan over some sequence of ranges
-
- Calculate estimated cost and other information about an MRR scan for some
- sequence of ranges.
-
- The ranges themselves will be known only at execution phase. When this
- function is called we only know number of ranges and a (rough) E(#records)
- within those ranges.
-
- Currently this function is only called for "n-keypart singlepoint" ranges,
- i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
-
- The flags parameter is a combination of those flags: HA_MRR_SORTED,
- HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
-
- @param keyno Index number
- @param n_ranges Estimated number of ranges (i.e. intervals) in the
- range sequence.
- @param n_rows Estimated total number of records contained within all
- of the ranges
- @param bufsz INOUT IN: Size of the buffer available for use
- OUT: Size of the buffer that will be actually used, or
- 0 if buffer is not needed.
- @param flags INOUT A combination of HA_MRR_* flags
- @param cost OUT Estimated cost of MRR access
-
- @retval
- 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan
- parameters.
- @retval
- other Error or can't perform the requested scan
-*/
-
-ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
- uint *bufsz, uint *flags, COST_VECT *cost)
-{
- *bufsz= 0; /* Default implementation doesn't need a buffer */
-
- *flags |= HA_MRR_USE_DEFAULT_IMPL;
-
- cost->zero();
- cost->avg_io_cost= 1; /* assume random seeks */
-
- /* Produce the same cost as non-MRR code does */
- if (*flags & HA_MRR_INDEX_ONLY)
- cost->io_count= index_only_read_time(keyno, n_rows);
- else
- cost->io_count= read_time(keyno, n_ranges, n_rows);
- return 0;
-}
-
-
-/**
- Initialize the MRR scan
-
- Initialize the MRR scan. This function may do heavyweight scan
- initialization like row prefetching/sorting/etc (NOTE: but better not do
- it here as we may not need it, e.g. if we never satisfy WHERE clause on
- previous tables. For many implementations it would be natural to do such
- initializations in the first multi_read_range_next() call)
-
- mode is a combination of the following flags: HA_MRR_SORTED,
- HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
-
- @param seq Range sequence to be traversed
- @param seq_init_param First parameter for seq->init()
- @param n_ranges Number of ranges in the sequence
- @param mode Flags, see the description section for the details
- @param buf INOUT: memory buffer to be used
-
- @note
- One must have called index_init() before calling this function. Several
- multi_range_read_init() calls may be made in course of one query.
-
- Until WL#2623 is done (see its text, section 3.2), the following will
- also hold:
- The caller will guarantee that if "seq->init == mrr_ranges_array_init"
- then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
- This property will only be used by NDB handler until WL#2623 is done.
-
- Buffer memory management is done according to the following scenario:
- The caller allocates the buffer and provides it to the callee by filling
- the members of HANDLER_BUFFER structure.
- The callee consumes all or some fraction of the provided buffer space, and
- sets the HANDLER_BUFFER members accordingly.
- The callee may use the buffer memory until the next multi_range_read_init()
- call is made, all records have been read, or until index_end() call is
- made, whichever comes first.
-
- @retval 0 OK
- @retval 1 Error
-*/
-
-int
-handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf)
-{
- DBUG_ENTER("handler::multi_range_read_init");
- mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
- mrr_funcs= *seq_funcs;
- mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
- mrr_have_range= FALSE;
- DBUG_RETURN(0);
-}
-
-
-/**
- Get next record in MRR scan
-
- Default MRR implementation: read the next record
-
- @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
- Otherwise, the opaque value associated with the range
- that contains the returned record.
-
- @retval 0 OK
- @retval other Error code
-*/
-
-int handler::multi_range_read_next(char **range_info)
-{
- int UNINIT_VAR(result);
- int range_res;
- DBUG_ENTER("handler::multi_range_read_next");
-
- if (!mrr_have_range)
- {
- mrr_have_range= TRUE;
- goto start;
- }
-
- do
- {
- /* Save a call if there can be only one row in range. */
- if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
- {
- result= read_range_next();
- /* On success or non-EOF errors jump to the end. */
- if (result != HA_ERR_END_OF_FILE)
- break;
- }
- else
- {
- if (was_semi_consistent_read())
- goto scan_it_again;
- /*
- We need to set this for the last range only, but checking this
- condition is more expensive than just setting the result code.
- */
- result= HA_ERR_END_OF_FILE;
- }
-
-start:
- /* Try the next range(s) until one matches a record. */
- while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
- {
-scan_it_again:
- result= read_range_first(mrr_cur_range.start_key.keypart_map ?
- &mrr_cur_range.start_key : 0,
- mrr_cur_range.end_key.keypart_map ?
- &mrr_cur_range.end_key : 0,
- test(mrr_cur_range.range_flag & EQ_RANGE),
- mrr_is_output_sorted);
- if (result != HA_ERR_END_OF_FILE)
- break;
- }
- }
- while ((result == HA_ERR_END_OF_FILE) && !range_res);
-
- *range_info= mrr_cur_range.ptr;
- DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
- DBUG_RETURN(result);
-}
-
=== removed file 'sql/ds_mrr.h'
--- a/sql/ds_mrr.h 2009-12-15 21:35:55 +0000
+++ b/sql/ds_mrr.h 1970-01-01 00:00:00 +0000
@@ -1,71 +0,0 @@
-/*
- This file contains declarations for
- - Disk-Sweep MultiRangeRead (DS-MRR) implementation
- - Index Condition Pushdown helper functions
-*/
-
-/**
- A Disk-Sweep MRR interface implementation
-
- This implementation makes range (and, in the future, 'ref') scans to read
- table rows in disk sweeps.
-
- Currently it is used by MyISAM and InnoDB. Potentially it can be used with
- any table handler that has non-clustered indexes and on-disk rows.
-*/
-
-class DsMrr_impl
-{
-public:
- typedef void (handler::*range_check_toggle_func_t)(bool on);
-
- DsMrr_impl()
- : h2(NULL) {};
-
- /*
- The "owner" handler object (the one that calls dsmrr_XXX functions.
- It is used to retrieve full table rows by calling rnd_pos().
- */
- handler *h;
- TABLE *table; /* Always equal to h->table */
-private:
- /* Secondary handler object. It is used for scanning the index */
- handler *h2;
-
- /* Buffer to store rowids, or (rowid, range_id) pairs */
- uchar *rowids_buf;
- uchar *rowids_buf_cur; /* Current position when reading/writing */
- uchar *rowids_buf_last; /* When reading: end of used buffer space */
- uchar *rowids_buf_end; /* End of the buffer */
-
- bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
-
- /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
- bool is_mrr_assoc;
-
- bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
-public:
- void init(handler *h_arg, TABLE *table_arg)
- {
- h= h_arg;
- table= table_arg;
- }
- int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
- uint n_ranges, uint mode, HANDLER_BUFFER *buf);
- void dsmrr_close();
- int dsmrr_fill_buffer();
- int dsmrr_next(char **range_info);
-
- ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz,
- uint *flags, COST_VECT *cost);
-
- ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param, uint n_ranges, uint *bufsz,
- uint *flags, COST_VECT *cost);
-private:
- bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
- COST_VECT *cost);
- bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
- uint *buffer_size, COST_VECT *cost);
-};
-
=== modified file 'sql/handler.h'
--- a/sql/handler.h 2009-12-15 17:23:55 +0000
+++ b/sql/handler.h 2009-12-22 12:33:21 +0000
@@ -2314,7 +2314,7 @@ private:
friend class DsMrr_impl;
};
-#include "ds_mrr.h"
+#include "multi_range_read.h"
/* Some extern variables used with handlers */
=== added file 'sql/multi_range_read.cc'
--- a/sql/multi_range_read.cc 1970-01-01 00:00:00 +0000
+++ b/sql/multi_range_read.cc 2009-12-22 12:33:21 +0000
@@ -0,0 +1,944 @@
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/****************************************************************************
+ * Default MRR implementation (MRR to non-MRR converter)
+ ***************************************************************************/
+
+/**
+ Get cost and other information about MRR scan over a known list of ranges
+
+ Calculate estimated cost and other information about an MRR scan for given
+ sequence of ranges.
+
+ @param keyno Index number
+ @param seq Range sequence to be traversed
+ @param seq_init_param First parameter for seq->init()
+ @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
+ can't efficiently determine it
+ @param bufsz INOUT IN: Size of the buffer available for use
+ OUT: Size of the buffer that is expected to be actually
+ used, or 0 if buffer is not needed.
+ @param flags INOUT A combination of HA_MRR_* flags
+ @param cost OUT Estimated cost of MRR access
+
+ @note
+ This method (or an overriding one in a derived class) must check for
+ thd->killed and return HA_POS_ERROR if it is not zero. This is required
+ for a user to be able to interrupt the calculation by killing the
+ connection/query.
+
+ @retval
+ HA_POS_ERROR Error or the engine is unable to perform the requested
+ scan. Values of OUT parameters are undefined.
+ @retval
+ other OK, *cost contains cost of the scan, *bufsz and *flags
+ contain scan parameters.
+*/
+
+ha_rows
+handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param, uint n_ranges_arg,
+ uint *bufsz, uint *flags, COST_VECT *cost)
+{
+ KEY_MULTI_RANGE range;
+ range_seq_t seq_it;
+ ha_rows rows, total_rows= 0;
+ uint n_ranges=0;
+ THD *thd= current_thd;
+
+ /* Default MRR implementation doesn't need buffer */
+ *bufsz= 0;
+
+ seq_it= seq->init(seq_init_param, n_ranges, *flags);
+ while (!seq->next(seq_it, &range))
+ {
+ if (unlikely(thd->killed != 0))
+ return HA_POS_ERROR;
+
+ n_ranges++;
+ key_range *min_endp, *max_endp;
+ if (range.range_flag & GEOM_FLAG)
+ {
+ /* In this case tmp_min_flag contains the handler-read-function */
+ range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
+ min_endp= &range.start_key;
+ max_endp= NULL;
+ }
+ else
+ {
+ min_endp= range.start_key.length? &range.start_key : NULL;
+ max_endp= range.end_key.length? &range.end_key : NULL;
+ }
+ if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
+ rows= 1; /* there can be at most one row */
+ else
+ {
+ if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
+ max_endp)))
+ {
+ /* Can't scan one range => can't do MRR scan at all */
+ total_rows= HA_POS_ERROR;
+ break;
+ }
+ }
+ total_rows += rows;
+ }
+
+ if (total_rows != HA_POS_ERROR)
+ {
+ /* The following calculation is the same as in multi_range_read_info(): */
+ *flags |= HA_MRR_USE_DEFAULT_IMPL;
+ cost->zero();
+ cost->avg_io_cost= 1; /* assume random seeks */
+ if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
+ cost->io_count= index_only_read_time(keyno, (uint)total_rows);
+ else
+ cost->io_count= read_time(keyno, n_ranges, total_rows);
+ cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+ }
+ return total_rows;
+}
+
+
+/**
+ Get cost and other information about MRR scan over some sequence of ranges
+
+ Calculate estimated cost and other information about an MRR scan for some
+ sequence of ranges.
+
+ The ranges themselves will be known only at execution phase. When this
+ function is called we only know number of ranges and a (rough) E(#records)
+ within those ranges.
+
+ Currently this function is only called for "n-keypart singlepoint" ranges,
+ i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
+
+ The flags parameter is a combination of those flags: HA_MRR_SORTED,
+ HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
+
+ @param keyno Index number
+ @param n_ranges Estimated number of ranges (i.e. intervals) in the
+ range sequence.
+ @param n_rows Estimated total number of records contained within all
+ of the ranges
+ @param bufsz INOUT IN: Size of the buffer available for use
+ OUT: Size of the buffer that will be actually used, or
+ 0 if buffer is not needed.
+ @param flags INOUT A combination of HA_MRR_* flags
+ @param cost OUT Estimated cost of MRR access
+
+ @retval
+ 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan
+ parameters.
+ @retval
+ other Error or can't perform the requested scan
+*/
+
+ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
+ uint *bufsz, uint *flags, COST_VECT *cost)
+{
+ *bufsz= 0; /* Default implementation doesn't need a buffer */
+
+ *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+ cost->zero();
+ cost->avg_io_cost= 1; /* assume random seeks */
+
+ /* Produce the same cost as non-MRR code does */
+ if (*flags & HA_MRR_INDEX_ONLY)
+ cost->io_count= index_only_read_time(keyno, n_rows);
+ else
+ cost->io_count= read_time(keyno, n_ranges, n_rows);
+ return 0;
+}
+
+
+/**
+ Initialize the MRR scan
+
+ Initialize the MRR scan. This function may do heavyweight scan
+ initialization like row prefetching/sorting/etc (NOTE: but better not do
+ it here as we may not need it, e.g. if we never satisfy WHERE clause on
+ previous tables. For many implementations it would be natural to do such
+ initializations in the first multi_read_range_next() call)
+
+ mode is a combination of the following flags: HA_MRR_SORTED,
+ HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
+
+ @param seq Range sequence to be traversed
+ @param seq_init_param First parameter for seq->init()
+ @param n_ranges Number of ranges in the sequence
+ @param mode Flags, see the description section for the details
+ @param buf INOUT: memory buffer to be used
+
+ @note
+ One must have called index_init() before calling this function. Several
+ multi_range_read_init() calls may be made in course of one query.
+
+ Until WL#2623 is done (see its text, section 3.2), the following will
+ also hold:
+ The caller will guarantee that if "seq->init == mrr_ranges_array_init"
+ then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
+ This property will only be used by NDB handler until WL#2623 is done.
+
+ Buffer memory management is done according to the following scenario:
+ The caller allocates the buffer and provides it to the callee by filling
+ the members of HANDLER_BUFFER structure.
+ The callee consumes all or some fraction of the provided buffer space, and
+ sets the HANDLER_BUFFER members accordingly.
+ The callee may use the buffer memory until the next multi_range_read_init()
+ call is made, all records have been read, or until index_end() call is
+ made, whichever comes first.
+
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+int
+handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+ DBUG_ENTER("handler::multi_range_read_init");
+ mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+ mrr_funcs= *seq_funcs;
+ mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
+ mrr_have_range= FALSE;
+ DBUG_RETURN(0);
+}
+
+
+/**
+ Get next record in MRR scan
+
+ Default MRR implementation: read the next record
+
+ @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
+ Otherwise, the opaque value associated with the range
+ that contains the returned record.
+
+ @retval 0 OK
+ @retval other Error code
+*/
+
+int handler::multi_range_read_next(char **range_info)
+{
+ int UNINIT_VAR(result);
+ int range_res;
+ DBUG_ENTER("handler::multi_range_read_next");
+
+ if (!mrr_have_range)
+ {
+ mrr_have_range= TRUE;
+ goto start;
+ }
+
+ do
+ {
+ /* Save a call if there can be only one row in range. */
+ if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
+ {
+ result= read_range_next();
+ /* On success or non-EOF errors jump to the end. */
+ if (result != HA_ERR_END_OF_FILE)
+ break;
+ }
+ else
+ {
+ if (was_semi_consistent_read())
+ goto scan_it_again;
+ /*
+ We need to set this for the last range only, but checking this
+ condition is more expensive than just setting the result code.
+ */
+ result= HA_ERR_END_OF_FILE;
+ }
+
+start:
+ /* Try the next range(s) until one matches a record. */
+ while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
+ {
+scan_it_again:
+ result= read_range_first(mrr_cur_range.start_key.keypart_map ?
+ &mrr_cur_range.start_key : 0,
+ mrr_cur_range.end_key.keypart_map ?
+ &mrr_cur_range.end_key : 0,
+ test(mrr_cur_range.range_flag & EQ_RANGE),
+ mrr_is_output_sorted);
+ if (result != HA_ERR_END_OF_FILE)
+ break;
+ }
+ }
+ while ((result == HA_ERR_END_OF_FILE) && !range_res);
+
+ *range_info= mrr_cur_range.ptr;
+ DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
+ DBUG_RETURN(result);
+}
+
+/****************************************************************************
+ * DS-MRR implementation
+ ***************************************************************************/
+
+/**
+ DS-MRR: Initialize and start MRR scan
+
+ Initialize and start the MRR scan. Depending on the mode parameter, this
+ may use default or DS-MRR implementation.
+
+ @param h Table handler to be used
+ @param key Index to be used
+ @param seq_funcs Interval sequence enumeration functions
+ @param seq_init_param Interval sequence enumeration parameter
+ @param n_ranges Number of ranges in the sequence.
+ @param mode HA_MRR_* modes to use
+ @param buf INOUT Buffer to use
+
+ @retval 0 Ok, Scan started.
+ @retval other Error
+*/
+
+int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+ void *seq_init_param, uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
+{
+ uint elem_size;
+ Item *pushed_cond= NULL;
+ handler *new_h2= 0;
+ DBUG_ENTER("DsMrr_impl::dsmrr_init");
+
+ /*
+ index_merge may invoke a scan on an object for which dsmrr_info[_const]
+ has not been called, so set the owner handler here as well.
+ */
+ h= h_arg;
+ if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
+ {
+ use_default_impl= TRUE;
+ const int retval=
+ h->handler::multi_range_read_init(seq_funcs, seq_init_param,
+ n_ranges, mode, buf);
+ DBUG_RETURN(retval);
+ }
+ rowids_buf= buf->buffer;
+
+ is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+
+ if (is_mrr_assoc)
+ status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
+
+ rowids_buf_end= buf->buffer_end;
+ elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
+ rowids_buf_last= rowids_buf +
+ ((rowids_buf_end - rowids_buf)/ elem_size)*
+ elem_size;
+ rowids_buf_end= rowids_buf_last;
+
+ /*
+ There can be two cases:
+ - This is the first call since index_init(), h2==NULL
+ Need to setup h2 then.
+ - This is not the first call, h2 is initalized and set up appropriately.
+ The caller might have called h->index_init(), need to switch h to
+ rnd_pos calls.
+ */
+ if (!h2)
+ {
+ /* Create a separate handler object to do rndpos() calls. */
+ THD *thd= current_thd;
+ /*
+ ::clone() takes up a lot of stack, especially on 64 bit platforms.
+ The constant 5 is an empiric result.
+ */
+ if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
+ DBUG_RETURN(1);
+ DBUG_ASSERT(h->active_index != MAX_KEY);
+ uint mrr_keyno= h->active_index;
+
+ /* Create a separate handler object to do rndpos() calls. */
+ if (!(new_h2= h->clone(thd->mem_root)) ||
+ new_h2->ha_external_lock(thd, F_RDLCK))
+ {
+ delete new_h2;
+ DBUG_RETURN(1);
+ }
+
+ if (mrr_keyno == h->pushed_idx_cond_keyno)
+ pushed_cond= h->pushed_idx_cond;
+
+ /*
+ Caution: this call will invoke this->dsmrr_close(). Do not put the
+ created secondary table handler into this->h2 or it will delete it.
+ */
+ if (h->ha_index_end())
+ {
+ h2=new_h2;
+ goto error;
+ }
+
+ h2= new_h2; /* Ok, now can put it into h2 */
+ table->prepare_for_position();
+ h2->extra(HA_EXTRA_KEYREAD);
+
+ if (h2->ha_index_init(mrr_keyno, FALSE))
+ goto error;
+
+ use_default_impl= FALSE;
+ if (pushed_cond)
+ h2->idx_cond_push(mrr_keyno, pushed_cond);
+ }
+ else
+ {
+ /*
+ We get here when the access alternates betwen MRR scan(s) and non-MRR
+ scans.
+
+ Calling h->index_end() will invoke dsmrr_close() for this object,
+ which will delete h2. We need to keep it, so save put it away and dont
+ let it be deleted:
+ */
+ handler *save_h2= h2;
+ h2= NULL;
+ int res= (h->inited == handler::INDEX && h->ha_index_end());
+ h2= save_h2;
+ use_default_impl= FALSE;
+ if (res)
+ goto error;
+ }
+
+ if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+ mode, buf) ||
+ dsmrr_fill_buffer())
+ {
+ goto error;
+ }
+ /*
+ If the above call has scanned through all intervals in *seq, then
+ adjust *buf to indicate that the remaining buffer space will not be used.
+ */
+ if (dsmrr_eof)
+ buf->end_of_used_area= rowids_buf_last;
+
+ /*
+ h->inited == INDEX may occur when 'range checked for each record' is
+ used.
+ */
+ if ((h->inited != handler::RND) &&
+ ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
+ (h->ha_rnd_init(FALSE))))
+ goto error;
+
+ use_default_impl= FALSE;
+ h->mrr_funcs= *seq_funcs;
+
+ DBUG_RETURN(0);
+error:
+ h2->ha_index_or_rnd_end();
+ h2->ha_external_lock(current_thd, F_UNLCK);
+ h2->close();
+ delete h2;
+ h2= NULL;
+ DBUG_RETURN(1);
+}
+
+
+void DsMrr_impl::dsmrr_close()
+{
+ DBUG_ENTER("DsMrr_impl::dsmrr_close");
+ if (h2)
+ {
+ h2->ha_index_or_rnd_end();
+ h2->ha_external_lock(current_thd, F_UNLCK);
+ h2->close();
+ delete h2;
+ h2= NULL;
+ }
+ use_default_impl= TRUE;
+ DBUG_VOID_RETURN;
+}
+
+
+static int rowid_cmp(void *h, uchar *a, uchar *b)
+{
+ return ((handler*)h)->cmp_ref(a, b);
+}
+
+
+/**
+ DS-MRR: Fill the buffer with rowids and sort it by rowid
+
+ {This is an internal function of DiskSweep MRR implementation}
+ Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
+ buffer. When the buffer is full or scan is completed, sort the buffer by
+ rowid and return.
+
+ The function assumes that rowids buffer is empty when it is invoked.
+
+ @param h Table handler
+
+ @retval 0 OK, the next portion of rowids is in the buffer,
+ properly ordered
+ @retval other Error
+*/
+
+int DsMrr_impl::dsmrr_fill_buffer()
+{
+ char *range_info;
+ int res;
+ DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
+
+ rowids_buf_cur= rowids_buf;
+ while ((rowids_buf_cur < rowids_buf_end) &&
+ !(res= h2->handler::multi_range_read_next(&range_info)))
+ {
+ KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
+ if (h2->mrr_funcs.skip_index_tuple &&
+ h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
+ continue;
+
+ /* Put rowid, or {rowid, range_id} pair into the buffer */
+ h2->position(table->record[0]);
+ memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
+ rowids_buf_cur += h2->ref_length;
+
+ if (is_mrr_assoc)
+ {
+ memcpy(rowids_buf_cur, &range_info, sizeof(void*));
+ rowids_buf_cur += sizeof(void*);
+ }
+ }
+
+ if (res && res != HA_ERR_END_OF_FILE)
+ DBUG_RETURN(res);
+ dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
+
+ /* Sort the buffer contents by rowid */
+ uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
+ uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+
+ my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
+ (void*)h);
+ rowids_buf_last= rowids_buf_cur;
+ rowids_buf_cur= rowids_buf;
+ DBUG_RETURN(0);
+}
+
+
+/**
+ DS-MRR implementation: multi_range_read_next() function
+*/
+
+int DsMrr_impl::dsmrr_next(char **range_info)
+{
+ int res;
+ uchar *cur_range_info= 0;
+ uchar *rowid;
+
+ if (use_default_impl)
+ return h->handler::multi_range_read_next(range_info);
+
+ do
+ {
+ if (rowids_buf_cur == rowids_buf_last)
+ {
+ if (dsmrr_eof)
+ {
+ res= HA_ERR_END_OF_FILE;
+ goto end;
+ }
+ res= dsmrr_fill_buffer();
+ if (res)
+ goto end;
+ }
+
+ /* return eof if there are no rowids in the buffer after re-fill attempt */
+ if (rowids_buf_cur == rowids_buf_last)
+ {
+ res= HA_ERR_END_OF_FILE;
+ goto end;
+ }
+ rowid= rowids_buf_cur;
+
+ if (is_mrr_assoc)
+ memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
+
+ rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
+ if (h2->mrr_funcs.skip_record &&
+ h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
+ continue;
+ res= h->rnd_pos(table->record[0], rowid);
+ break;
+ } while (true);
+
+ if (is_mrr_assoc)
+ {
+ memcpy(range_info, rowid + h->ref_length, sizeof(void*));
+ }
+end:
+ return res;
+}
+
+
+/**
+ DS-MRR implementation: multi_range_read_info() function
+*/
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ uint *bufsz, uint *flags, COST_VECT *cost)
+{
+ ha_rows res;
+ uint def_flags= *flags;
+ uint def_bufsz= *bufsz;
+
+ /* Get cost/flags/mem_usage of default MRR implementation */
+ res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
+ &def_flags, cost);
+ DBUG_ASSERT(!res);
+
+ if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
+ choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
+ {
+ /* Default implementation is choosen */
+ DBUG_PRINT("info", ("Default MRR implementation choosen"));
+ *flags= def_flags;
+ *bufsz= def_bufsz;
+ }
+ else
+ {
+ /* *flags and *bufsz were set by choose_mrr_impl */
+ DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+ }
+ return 0;
+}
+
+
+/**
+ DS-MRR Implementation: multi_range_read_info_const() function
+*/
+
+ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param, uint n_ranges,
+ uint *bufsz, uint *flags, COST_VECT *cost)
+{
+ ha_rows rows;
+ uint def_flags= *flags;
+ uint def_bufsz= *bufsz;
+ /* Get cost/flags/mem_usage of default MRR implementation */
+ rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
+ n_ranges, &def_bufsz,
+ &def_flags, cost);
+ if (rows == HA_POS_ERROR)
+ {
+ /* Default implementation can't perform MRR scan => we can't either */
+ return rows;
+ }
+
+ /*
+ If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
+ use the default MRR implementation (we need it for UPDATE/DELETE).
+ Otherwise, make a choice based on cost and @@optimizer_use_mrr.
+ */
+ if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
+ choose_mrr_impl(keyno, rows, flags, bufsz, cost))
+ {
+ DBUG_PRINT("info", ("Default MRR implementation choosen"));
+ *flags= def_flags;
+ *bufsz= def_bufsz;
+ }
+ else
+ {
+ /* *flags and *bufsz were set by choose_mrr_impl */
+ DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+ }
+ return rows;
+}
+
+
+/**
+ Check if key has partially-covered columns
+
+ We can't use DS-MRR to perform range scans when the ranges are over
+ partially-covered keys, because we'll not have full key part values
+ (we'll have their prefixes from the index) and will not be able to check
+ if we've reached the end the range.
+
+ @param keyno Key to check
+
+ @todo
+ Allow use of DS-MRR in cases where the index has partially-covered
+ components but they are not used for scanning.
+
+ @retval TRUE Yes
+ @retval FALSE No
+*/
+
+bool key_uses_partial_cols(TABLE *table, uint keyno)
+{
+ KEY_PART_INFO *kp= table->key_info[keyno].key_part;
+ KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
+ for (; kp != kp_end; kp++)
+ {
+ if (!kp->field->part_of_key.is_set(keyno))
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/**
+ DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
+
+ Make the choice between using Default MRR implementation and DS-MRR.
+ This function contains common functionality factored out of dsmrr_info()
+ and dsmrr_info_const(). The function assumes that the default MRR
+ implementation's applicability requirements are satisfied.
+
+ @param keyno Index number
+ @param rows E(full rows to be retrieved)
+ @param flags IN MRR flags provided by the MRR user
+ OUT If DS-MRR is choosen, flags of DS-MRR implementation
+ else the value is not modified
+ @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
+ else the value is not modified
+ @param cost IN Cost of default MRR implementation
+ OUT If DS-MRR is choosen, cost of DS-MRR scan
+ else the value is not modified
+
+ @retval TRUE Default MRR implementation should be used
+ @retval FALSE DS-MRR implementation should be used
+*/
+
+bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
+ uint *bufsz, COST_VECT *cost)
+{
+ COST_VECT dsmrr_cost;
+ bool res;
+ THD *thd= current_thd;
+ if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
+ (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
+ key_uses_partial_cols(table, keyno))
+ {
+ /* Use the default implementation */
+ *flags |= HA_MRR_USE_DEFAULT_IMPL;
+ return TRUE;
+ }
+
+ uint add_len= table->key_info[keyno].key_length + h->ref_length;
+ *bufsz -= add_len;
+ if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
+ return TRUE;
+ *bufsz += add_len;
+
+ bool force_dsmrr;
+ /*
+ If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
+ DS-MRR and Default implementations cost. This allows one to force use of
+ DS-MRR whenever it is applicable without affecting other cost-based
+ choices.
+ */
+ if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
+ dsmrr_cost.total_cost() > cost->total_cost())
+ dsmrr_cost= *cost;
+
+ if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
+ {
+ *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
+ *flags &= ~HA_MRR_SORTED; /* We will return unordered output */
+ *cost= dsmrr_cost;
+ res= FALSE;
+ }
+ else
+ {
+ /* Use the default MRR implementation */
+ res= TRUE;
+ }
+ return res;
+}
+
+
+static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
+
+
+/**
+ Get cost of DS-MRR scan
+
+ @param keynr Index to be used
+ @param rows E(Number of rows to be scanned)
+ @param flags Scan parameters (HA_MRR_* flags)
+ @param buffer_size INOUT Buffer size
+ @param cost OUT The cost
+
+ @retval FALSE OK
+ @retval TRUE Error, DS-MRR cannot be used (the buffer is too small
+ for even 1 rowid)
+*/
+
+bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
+ uint *buffer_size, COST_VECT *cost)
+{
+ ulong max_buff_entries, elem_size;
+ ha_rows rows_in_full_step, rows_in_last_step;
+ uint n_full_steps;
+ double index_read_cost;
+
+ elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
+ max_buff_entries = *buffer_size / elem_size;
+
+ if (!max_buff_entries)
+ return TRUE; /* Buffer has not enough space for even 1 rowid */
+
+ /* Number of iterations we'll make with full buffer */
+ n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
+
+ /*
+ Get numbers of rows we'll be processing in
+ - non-last sweep, with full buffer
+ - last iteration, with non-full buffer
+ */
+ rows_in_full_step= max_buff_entries;
+ rows_in_last_step= rows % max_buff_entries;
+
+ /* Adjust buffer size if we expect to use only part of the buffer */
+ if (n_full_steps)
+ {
+ get_sort_and_sweep_cost(table, rows, cost);
+ cost->multiply(n_full_steps);
+ }
+ else
+ {
+ cost->zero();
+ *buffer_size= max(*buffer_size,
+ (size_t)(1.2*rows_in_last_step) * elem_size +
+ h->ref_length + table->key_info[keynr].key_length);
+ }
+
+ COST_VECT last_step_cost;
+ get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
+ cost->add(&last_step_cost);
+
+ if (n_full_steps != 0)
+ cost->mem_cost= *buffer_size;
+ else
+ cost->mem_cost= (double)rows_in_last_step * elem_size;
+
+ /* Total cost of all index accesses */
+ index_read_cost= h->index_only_read_time(keynr, (double)rows);
+ cost->add_io(index_read_cost, 1 /* Random seeks */);
+ return FALSE;
+}
+
+
+/*
+ Get cost of one sort-and-sweep step
+
+ SYNOPSIS
+ get_sort_and_sweep_cost()
+ table Table being accessed
+ nrows Number of rows to be sorted and retrieved
+ cost OUT The cost
+
+ DESCRIPTION
+ Get cost of these operations:
+ - sort an array of #nrows ROWIDs using qsort
+ - read #nrows records from table in a sweep.
+*/
+
+static
+void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
+{
+ if (nrows)
+ {
+ get_sweep_read_cost(table, nrows, FALSE, cost);
+ /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
+ double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+ if (cmp_op < 3)
+ cmp_op= 3;
+ cost->cpu_cost += cmp_op * log2(cmp_op);
+ }
+ else
+ cost->zero();
+}
+
+
+/**
+ Get cost of reading nrows table records in a "disk sweep"
+
+ A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
+ for an ordered sequence of rowids.
+
+ We assume hard disk IO. The read is performed as follows:
+
+ 1. The disk head is moved to the needed cylinder
+ 2. The controller waits for the plate to rotate
+ 3. The data is transferred
+
+ Time to do #3 is insignificant compared to #2+#1.
+
+ Time to move the disk head is proportional to head travel distance.
+
+ Time to wait for the plate to rotate depends on whether the disk head
+ was moved or not.
+
+ If disk head wasn't moved, the wait time is proportional to distance
+ between the previous block and the block we're reading.
+
+ If the head was moved, we don't know how much we'll need to wait for the
+ plate to rotate. We assume the wait time to be a variate with a mean of
+ 0.5 of full rotation time.
+
+ Our cost units are "random disk seeks". The cost of random disk seek is
+ actually not a constant, it depends one range of cylinders we're going
+ to access. We make it constant by introducing a fuzzy concept of "typical
+ datafile length" (it's fuzzy as it's hard to tell whether it should
+ include index file, temp.tables etc). Then random seek cost is:
+
+ 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
+
+ We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
+
+ @param table Table to be accessed
+ @param nrows Number of rows to retrieve
+ @param interrupted TRUE <=> Assume that the disk sweep will be
+ interrupted by other disk IO. FALSE - otherwise.
+ @param cost OUT The cost.
+*/
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
+ COST_VECT *cost)
+{
+ DBUG_ENTER("get_sweep_read_cost");
+
+ cost->zero();
+ if (table->file->primary_key_is_clustered())
+ {
+ cost->io_count= table->file->read_time(table->s->primary_key,
+ (uint) nrows, nrows);
+ }
+ else
+ {
+ double n_blocks=
+ ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
+ double busy_blocks=
+ n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
+ if (busy_blocks < 1.0)
+ busy_blocks= 1.0;
+
+ DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
+ busy_blocks));
+ cost->io_count= busy_blocks;
+
+ if (!interrupted)
+ {
+ /* Assume reading is done in one 'sweep' */
+ cost->avg_io_cost= (DISK_SEEK_BASE_COST +
+ DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+ }
+ }
+ DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
+ DBUG_VOID_RETURN;
+}
+
+
+/* **************************************************************************
+ * DS-MRR implementation ends
+ ***************************************************************************/
+
+
=== added file 'sql/multi_range_read.h'
--- a/sql/multi_range_read.h 1970-01-01 00:00:00 +0000
+++ b/sql/multi_range_read.h 2009-12-22 12:33:21 +0000
@@ -0,0 +1,70 @@
+/*
+ This file contains declarations for
+ - Disk-Sweep MultiRangeRead (DS-MRR) implementation
+*/
+
+/**
+ A Disk-Sweep MRR interface implementation
+
+ This implementation makes range (and, in the future, 'ref') scans to read
+ table rows in disk sweeps.
+
+ Currently it is used by MyISAM and InnoDB. Potentially it can be used with
+ any table handler that has non-clustered indexes and on-disk rows.
+*/
+
+class DsMrr_impl
+{
+public:
+ typedef void (handler::*range_check_toggle_func_t)(bool on);
+
+ DsMrr_impl()
+ : h2(NULL) {};
+
+ /*
+ The "owner" handler object (the one that calls dsmrr_XXX functions.
+ It is used to retrieve full table rows by calling rnd_pos().
+ */
+ handler *h;
+ TABLE *table; /* Always equal to h->table */
+private:
+ /* Secondary handler object. It is used for scanning the index */
+ handler *h2;
+
+ /* Buffer to store rowids, or (rowid, range_id) pairs */
+ uchar *rowids_buf;
+ uchar *rowids_buf_cur; /* Current position when reading/writing */
+ uchar *rowids_buf_last; /* When reading: end of used buffer space */
+ uchar *rowids_buf_end; /* End of the buffer */
+
+ bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
+
+ /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */
+ bool is_mrr_assoc;
+
+ bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
+public:
+ void init(handler *h_arg, TABLE *table_arg)
+ {
+ h= h_arg;
+ table= table_arg;
+ }
+ int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ void dsmrr_close();
+ int dsmrr_fill_buffer();
+ int dsmrr_next(char **range_info);
+
+ ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+
+ ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param, uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+private:
+ bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
+ COST_VECT *cost);
+ bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
+ uint *buffer_size, COST_VECT *cost);
+};
+
=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h 2009-12-15 07:16:46 +0000
+++ b/sql/mysql_priv.h 2009-12-22 12:33:21 +0000
@@ -540,12 +540,13 @@ protected:
#define OPTIMIZER_SWITCH_INDEX_MERGE_UNION 2
#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
+#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16
#ifdef DBUG_OFF
-# define OPTIMIZER_SWITCH_LAST 16
-#else
-# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 16
# define OPTIMIZER_SWITCH_LAST 32
+#else
+# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 32
+# define OPTIMIZER_SWITCH_LAST 64
#endif
#ifdef DBUG_OFF
@@ -553,12 +554,14 @@ protected:
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
- OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT)
+ OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
+ OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN)
#else
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
+ OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \
OPTIMIZER_SWITCH_TABLE_ELIMINATION)
#endif
=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc 2009-12-21 02:26:15 +0000
+++ b/sql/mysqld.cc 2009-12-22 12:49:15 +0000
@@ -300,6 +300,7 @@ static const char *optimizer_switch_name
{
"index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection",
+ "index_condition_pushdown",
#ifndef DBUG_OFF
"table_elimination",
#endif
@@ -313,6 +314,7 @@ static const unsigned int optimizer_swit
sizeof("index_merge_union") - 1,
sizeof("index_merge_sort_union") - 1,
sizeof("index_merge_intersection") - 1,
+ sizeof("index_condition_pushdown") - 1,
#ifndef DBUG_OFF
sizeof("table_elimination") - 1,
#endif
@@ -391,7 +393,8 @@ static const char *sql_mode_str= "OFF";
/* Text representation for OPTIMIZER_SWITCH_DEFAULT */
static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"index_merge_sort_union=on,"
- "index_merge_intersection=on"
+ "index_merge_intersection=on,"
+ "index_condition_pushdown=on"
#ifndef DBUG_OFF
",table_elimination=on";
#else
@@ -5767,7 +5770,7 @@ enum options_mysqld
OPT_MAX_SEEKS_FOR_KEY, OPT_MAX_TMP_TABLES, OPT_MAX_USER_CONNECTIONS,
OPT_MAX_LENGTH_FOR_SORT_DATA,
OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE,
- OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE,
+ OPT_MAX_ERROR_COUNT, OPT_MRR_BUFFER_SIZE, OPT_MYISAM_DATA_POINTER_SIZE,
OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE,
OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE,
@@ -6968,6 +6971,12 @@ The minimum value for this variable is 4
(uchar**) &global_system_variables.min_examined_row_limit,
(uchar**) &max_system_variables.min_examined_row_limit, 0, GET_ULONG,
REQUIRED_ARG, 0, 0, (longlong) ULONG_MAX, 0, 1L, 0},
+ {"mrr_buffer_size", OPT_MRR_BUFFER_SIZE,
+ "Size of buffer to use when using MRR with range access",
+ (uchar**) &global_system_variables.mrr_buff_size,
+ (uchar**) &max_system_variables.mrr_buff_size, 0,
+ GET_ULONG, REQUIRED_ARG, 256*1024L, IO_SIZE*2+MALLOC_OVERHEAD,
+ INT_MAX32, MALLOC_OVERHEAD, 1 /* Small to be able to do tests */ , 0},
{"myisam_block_size", OPT_MYISAM_BLOCK_SIZE,
"Block size to be used for MyISAM index pages.",
(uchar**) &opt_myisam_block_size,
@@ -7047,7 +7056,8 @@ The minimum value for this variable is 4
0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0},
{"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, "
- "index_merge_union, index_merge_sort_union, index_merge_intersection"
+ "index_merge_union, index_merge_sort_union, index_merge_intersection, "
+ "index_condition_pushdown"
#ifndef DBUG_OFF
", table_elimination"
#endif
@@ -7131,7 +7141,7 @@ The minimum value for this variable is 4
(uchar**) &global_system_variables.read_rnd_buff_size,
(uchar**) &max_system_variables.read_rnd_buff_size, 0,
GET_ULONG, REQUIRED_ARG, 256*1024L, IO_SIZE*2+MALLOC_OVERHEAD,
- INT_MAX32, MALLOC_OVERHEAD, 1 /* Small overhead to be able to test MRR, was: IO_SIZE*/ , 0},
+ INT_MAX32, MALLOC_OVERHEAD, IO_SIZE, 0},
{"record_buffer", OPT_RECORD_BUFFER,
"Alias for read_buffer_size",
(uchar**) &global_system_variables.read_buff_size,
=== added file 'sql/opt_index_cond_pushdown.cc'
--- a/sql/opt_index_cond_pushdown.cc 1970-01-01 00:00:00 +0000
+++ b/sql/opt_index_cond_pushdown.cc 2009-12-22 12:49:15 +0000
@@ -0,0 +1,387 @@
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+/****************************************************************************
+ * Index Condition Pushdown code starts
+ ***************************************************************************/
+/*
+ Check if given expression uses only table fields covered by the given index
+
+ SYNOPSIS
+ uses_index_fields_only()
+ item Expression to check
+ tbl The table having the index
+ keyno The index number
+ other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
+
+ DESCRIPTION
+ Check if given expression only uses fields covered by index #keyno in the
+ table tbl. The expression can use any fields in any other tables.
+
+ The expression is guaranteed not to be AND or OR - those constructs are
+ handled outside of this function.
+
+ RETURN
+ TRUE Yes
+ FALSE No
+*/
+
+bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
+ bool other_tbls_ok)
+{
+ if (item->const_item())
+ return TRUE;
+
+ /*
+ Don't push down the triggered conditions. Nested outer joins execution
+ code may need to evaluate a condition several times (both triggered and
+ untriggered), and there is no way to put thi
+ TODO: Consider cloning the triggered condition and using the copies for:
+ 1. push the first copy down, to have most restrictive index condition
+ possible
+ 2. Put the second copy into tab->select_cond.
+ */
+ if (item->type() == Item::FUNC_ITEM &&
+ ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC)
+ return FALSE;
+
+ if (!(item->used_tables() & tbl->map))
+ return other_tbls_ok;
+
+ Item::Type item_type= item->type();
+ switch (item_type) {
+ case Item::FUNC_ITEM:
+ {
+ /* This is a function, apply condition recursively to arguments */
+ Item_func *item_func= (Item_func*)item;
+ Item **child;
+ Item **item_end= (item_func->arguments()) + item_func->argument_count();
+ for (child= item_func->arguments(); child != item_end; child++)
+ {
+ if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
+ return FALSE;
+ }
+ return TRUE;
+ }
+ case Item::COND_ITEM:
+ {
+ /*
+ This is a AND/OR condition. Regular AND/OR clauses are handled by
+ make_cond_for_index() which will chop off the part that can be
+ checked with index. This code is for handling non-top-level AND/ORs,
+ e.g. func(x AND y).
+ */
+ List_iterator<Item> li(*((Item_cond*)item)->argument_list());
+ Item *item;
+ while ((item=li++))
+ {
+ if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
+ return FALSE;
+ }
+ return TRUE;
+ }
+ case Item::FIELD_ITEM:
+ {
+ Item_field *item_field= (Item_field*)item;
+ if (item_field->field->table != tbl)
+ return TRUE;
+ /*
+ The below is probably a repetition - the first part checks the
+ other two, but let's play it safe:
+ */
+ return item_field->field->part_of_key.is_set(keyno) &&
+ item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
+ item_field->field->type() != MYSQL_TYPE_BLOB;
+ }
+ case Item::REF_ITEM:
+ return uses_index_fields_only(item->real_item(), tbl, keyno,
+ other_tbls_ok);
+ default:
+ return FALSE; /* Play it safe, don't push unknown non-const items */
+ }
+}
+
+#define ICP_COND_USES_INDEX_ONLY 10
+
+/*
+ Get a part of the condition that can be checked using only index fields
+
+ SYNOPSIS
+ make_cond_for_index()
+ cond The source condition
+ table The table that is partially available
+ keyno The index in the above table. Only fields covered by the index
+ are available
+ other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
+
+ DESCRIPTION
+ Get a part of the condition that can be checked when for the given table
+ we have values only of fields covered by some index. The condition may
+ refer to other tables, it is assumed that we have values of all of their
+ fields.
+
+ Example:
+ make_cond_for_index(
+ "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
+ t2, keyno(t2.key1))
+ will return
+ "cond(t1.field) AND cond(t2.key2)"
+
+ RETURN
+ Index condition, or NULL if no condition could be inferred.
+*/
+
+Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
+ bool other_tbls_ok)
+{
+ if (!cond)
+ return NULL;
+ if (cond->type() == Item::COND_ITEM)
+ {
+ uint n_marked= 0;
+ if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+ {
+ table_map used_tables= 0;
+ Item_cond_and *new_cond=new Item_cond_and;
+ if (!new_cond)
+ return (COND*) 0;
+ List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+ Item *item;
+ while ((item=li++))
+ {
+ Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+ if (fix)
+ {
+ new_cond->argument_list()->push_back(fix);
+ used_tables|= fix->used_tables();
+ }
+ n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY);
+ }
+ if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+ cond->marker= ICP_COND_USES_INDEX_ONLY;
+ switch (new_cond->argument_list()->elements) {
+ case 0:
+ return (COND*) 0;
+ case 1:
+ new_cond->used_tables_cache= used_tables;
+ return new_cond->argument_list()->head();
+ default:
+ new_cond->quick_fix_field();
+ new_cond->used_tables_cache= used_tables;
+ return new_cond;
+ }
+ }
+ else /* It's OR */
+ {
+ Item_cond_or *new_cond=new Item_cond_or;
+ if (!new_cond)
+ return (COND*) 0;
+ List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+ Item *item;
+ while ((item=li++))
+ {
+ Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
+ if (!fix)
+ return (COND*) 0;
+ new_cond->argument_list()->push_back(fix);
+ n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY);
+ }
+ if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
+ cond->marker= ICP_COND_USES_INDEX_ONLY;
+ new_cond->quick_fix_field();
+ new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
+ new_cond->top_level_item();
+ return new_cond;
+ }
+ }
+
+ if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
+ return (COND*) 0;
+ cond->marker= ICP_COND_USES_INDEX_ONLY;
+ return cond;
+}
+
+
+Item *make_cond_remainder(Item *cond, bool exclude_index)
+{
+ if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
+ return 0; /* Already checked */
+
+ if (cond->type() == Item::COND_ITEM)
+ {
+ table_map tbl_map= 0;
+ if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
+ {
+ /* Create new top level AND item */
+ Item_cond_and *new_cond=new Item_cond_and;
+ if (!new_cond)
+ return (COND*) 0;
+ List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+ Item *item;
+ while ((item=li++))
+ {
+ Item *fix= make_cond_remainder(item, exclude_index);
+ if (fix)
+ {
+ new_cond->argument_list()->push_back(fix);
+ tbl_map |= fix->used_tables();
+ }
+ }
+ switch (new_cond->argument_list()->elements) {
+ case 0:
+ return (COND*) 0;
+ case 1:
+ return new_cond->argument_list()->head();
+ default:
+ new_cond->quick_fix_field();
+ ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+ return new_cond;
+ }
+ }
+ else /* It's OR */
+ {
+ Item_cond_or *new_cond=new Item_cond_or;
+ if (!new_cond)
+ return (COND*) 0;
+ List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
+ Item *item;
+ while ((item=li++))
+ {
+ Item *fix= make_cond_remainder(item, FALSE);
+ if (!fix)
+ return (COND*) 0;
+ new_cond->argument_list()->push_back(fix);
+ tbl_map |= fix->used_tables();
+ }
+ new_cond->quick_fix_field();
+ ((Item_cond*)new_cond)->used_tables_cache= tbl_map;
+ new_cond->top_level_item();
+ return new_cond;
+ }
+ }
+ return cond;
+}
+
+
+/*
+ Try to extract and push the index condition
+
+ SYNOPSIS
+ push_index_cond()
+ tab A join tab that has tab->table->file and its condition
+ in tab->select_cond
+ keyno Index for which extract and push the condition
+ other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
+
+ DESCRIPTION
+ Try to extract and push the index condition down to table handler
+*/
+
+void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok)
+{
+ DBUG_ENTER("push_index_cond");
+ Item *idx_cond;
+ bool do_index_cond_pushdown=
+ ((tab->table->file->index_flags(keyno, 0, 1) &
+ HA_DO_INDEX_COND_PUSHDOWN) &&
+ optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN));
+
+ /*
+ Do not try index condition pushdown on indexes which have partially-covered
+ columns. Unpacking from a column prefix into index tuple is not a supported
+ operation in some engines, see e.g. MySQL BUG#42991.
+ TODO: a better solution would be not to consider partially-covered columns
+ as parts of the index and still produce/check index condition for
+ fully-covered index columns.
+ */
+ KEY *key_info= tab->table->key_info + keyno;
+ for (uint kp= 0; kp < key_info->key_parts; kp++)
+ {
+ if ((key_info->key_part[kp].key_part_flag & HA_PART_KEY_SEG))
+ {
+ do_index_cond_pushdown= FALSE;
+ break;
+ }
+ }
+
+ if (do_index_cond_pushdown)
+ {
+ DBUG_EXECUTE("where",
+ print_where(tab->select_cond, "full cond", QT_ORDINARY););
+
+ idx_cond= make_cond_for_index(tab->select_cond, tab->table, keyno,
+ other_tbls_ok);
+
+ DBUG_EXECUTE("where",
+ print_where(idx_cond, "idx cond", QT_ORDINARY););
+
+ if (idx_cond)
+ {
+ Item *idx_remainder_cond= 0;
+ tab->pre_idx_push_select_cond= tab->select_cond;
+ /*
+ For BKA cache we store condition to special BKA cache field
+ because evaluation of the condition requires additional operations
+ before the evaluation. This condition is used in
+ JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
+ */
+ if (tab->use_join_cache &&
+ /*
+ if cache is used then the value is TRUE only
+ for BKA[_UNIQUE] cache (see check_join_cache_usage func).
+ In this case other_tbls_ok is an equivalent of
+ cache->is_key_access().
+ */
+ other_tbls_ok &&
+ (idx_cond->used_tables() &
+ ~(tab->table->map | tab->join->const_table_map)))
+ tab->cache_idx_cond= idx_cond;
+ else
+ idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
+
+ /*
+ Disable eq_ref's "lookup cache" if we've pushed down an index
+ condition.
+ TODO: This check happens to work on current ICP implementations, but
+ there may exist a compliant implementation that will not work
+ correctly with it. Sort this out when we stabilize the condition
+ pushdown APIs.
+ */
+ if (idx_remainder_cond != idx_cond)
+ tab->ref.disable_cache= TRUE;
+
+ Item *row_cond= make_cond_remainder(tab->select_cond, TRUE);
+
+ DBUG_EXECUTE("where",
+ print_where(row_cond, "remainder cond", QT_ORDINARY););
+
+ if (row_cond)
+ {
+ if (!idx_remainder_cond)
+ tab->select_cond= row_cond;
+ else
+ {
+ COND *new_cond= new Item_cond_and(row_cond, idx_remainder_cond);
+ tab->select_cond= new_cond;
+ tab->select_cond->quick_fix_field();
+ ((Item_cond_and*)tab->select_cond)->used_tables_cache=
+ row_cond->used_tables() | idx_remainder_cond->used_tables();
+ }
+ }
+ else
+ tab->select_cond= idx_remainder_cond;
+ if (tab->select)
+ {
+ DBUG_EXECUTE("where",
+ print_where(tab->select->cond,
+ "select_cond",
+ QT_ORDINARY););
+
+ tab->select->cond= tab->select_cond;
+ }
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+
=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc 2009-12-15 07:16:46 +0000
+++ b/sql/opt_range.cc 2009-12-22 12:33:21 +0000
@@ -720,6 +720,7 @@ public:
uint8 first_null_comp; /* first null component if any, 0 - otherwise */
};
+
class TABLE_READ_PLAN;
class TRP_RANGE;
class TRP_ROR_INTERSECT;
@@ -789,7 +790,9 @@ static SEL_ARG null_element(SEL_ARG::IMP
static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
uint length);
bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
+static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
+#include "opt_range_mrr.cc"
/*
SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
@@ -1165,7 +1168,7 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T
my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
/* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
- mrr_buf_size= thd->variables.read_rnd_buff_size;
+ mrr_buf_size= thd->variables.mrr_buff_size;
mrr_buf_desc= NULL;
if (!no_alloc && !parent_alloc)
@@ -4875,7 +4878,6 @@ static TRP_RANGE *get_key_scans_params(P
uint UNINIT_VAR(best_mrr_flags), /* protected by key_to_read */
UNINIT_VAR(best_buf_size); /* protected by key_to_read */
TRP_RANGE* read_plan= NULL;
- bool pk_is_clustered= param->table->file->primary_key_is_clustered();
DBUG_ENTER("get_key_scans_params");
/*
Note that there may be trees that have type SEL_TREE::KEY but contain no
@@ -7281,284 +7283,6 @@ void SEL_ARG::test_use_count(SEL_ARG *ro
}
#endif
-
-/****************************************************************************
- MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
- ****************************************************************************/
-
-/* MRR range sequence, SEL_ARG* implementation: stack entry */
-typedef struct st_range_seq_entry
-{
- /*
- Pointers in min and max keys. They point to right-after-end of key
- images. The 0-th entry has these pointing to key tuple start.
- */
- uchar *min_key, *max_key;
-
- /*
- Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
- min_key_flag may have NULL_RANGE set.
- */
- uint min_key_flag, max_key_flag;
-
- /* Number of key parts */
- uint min_key_parts, max_key_parts;
- SEL_ARG *key_tree;
-} RANGE_SEQ_ENTRY;
-
-
-/*
- MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
-*/
-typedef struct st_sel_arg_range_seq
-{
- uint keyno; /* index of used tree in SEL_TREE structure */
- uint real_keyno; /* Number of the index in tables */
- PARAM *param;
- SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
-
- RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
- int i; /* Index of last used element in the above array */
-
- bool at_start; /* TRUE <=> The traversal has just started */
-} SEL_ARG_RANGE_SEQ;
-
-
-/*
- Range sequence interface, SEL_ARG* implementation: Initialize the traversal
-
- SYNOPSIS
- init()
- init_params SEL_ARG tree traversal context
- n_ranges [ignored] The number of ranges obtained
- flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
-
- RETURN
- Value of init_param
-*/
-
-range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
-{
- SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
- seq->at_start= TRUE;
- seq->stack[0].key_tree= NULL;
- seq->stack[0].min_key= seq->param->min_key;
- seq->stack[0].min_key_flag= 0;
- seq->stack[0].min_key_parts= 0;
-
- seq->stack[0].max_key= seq->param->max_key;
- seq->stack[0].max_key_flag= 0;
- seq->stack[0].max_key_parts= 0;
- seq->i= 0;
- return init_param;
-}
-
-
-static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree)
-{
- RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1];
- RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i];
-
- cur->key_tree= key_tree;
- cur->min_key= prev->min_key;
- cur->max_key= prev->max_key;
- cur->min_key_parts= prev->min_key_parts;
- cur->max_key_parts= prev->max_key_parts;
-
- uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length;
- cur->min_key_parts += key_tree->store_min(stor_length, &cur->min_key,
- prev->min_key_flag);
- cur->max_key_parts += key_tree->store_max(stor_length, &cur->max_key,
- prev->max_key_flag);
-
- cur->min_key_flag= prev->min_key_flag | key_tree->min_flag;
- cur->max_key_flag= prev->max_key_flag | key_tree->max_flag;
-
- if (key_tree->is_null_interval())
- cur->min_key_flag |= NULL_RANGE;
- (arg->i)++;
-}
-
-
-/*
- Range sequence interface, SEL_ARG* implementation: get the next interval
-
- SYNOPSIS
- sel_arg_range_seq_next()
- rseq Value returned from sel_arg_range_seq_init
- range OUT Store information about the range here
-
- DESCRIPTION
- This is "get_next" function for Range sequence interface implementation
- for SEL_ARG* tree.
-
- IMPLEMENTATION
- The traversal also updates those param members:
- - is_ror_scan
- - range_count
- - max_key_part
-
- RETURN
- 0 Ok
- 1 No more ranges in the sequence
-*/
-
-//psergey-merge-todo: support check_quick_keys:max_keypart
-uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
-{
- SEL_ARG *key_tree;
- SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq;
- if (seq->at_start)
- {
- key_tree= seq->start;
- seq->at_start= FALSE;
- goto walk_up_n_right;
- }
-
- key_tree= seq->stack[seq->i].key_tree;
- /* Ok, we're at some "full tuple" position in the tree */
-
- /* Step down if we can */
- if (key_tree->next && key_tree->next != &null_element)
- {
- //step down; (update the tuple, we'll step right and stay there)
- seq->i--;
- step_down_to(seq, key_tree->next);
- key_tree= key_tree->next;
- seq->param->is_ror_scan= FALSE;
- goto walk_right_n_up;
- }
-
- /* Ok, can't step down, walk left until we can step down */
- while (1)
- {
- if (seq->i == 1) // can't step left
- return 1;
- /* Step left */
- seq->i--;
- key_tree= seq->stack[seq->i].key_tree;
-
- /* Step down if we can */
- if (key_tree->next && key_tree->next != &null_element)
- {
- // Step down; update the tuple
- seq->i--;
- step_down_to(seq, key_tree->next);
- key_tree= key_tree->next;
- break;
- }
- }
-
- /*
- Ok, we've stepped down from the path to previous tuple.
- Walk right-up while we can
- */
-walk_right_n_up:
- while (key_tree->next_key_part && key_tree->next_key_part != &null_element &&
- key_tree->next_key_part->part == key_tree->part + 1 &&
- key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
- {
- {
- RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
- uint min_key_length= cur->min_key - seq->param->min_key;
- uint max_key_length= cur->max_key - seq->param->max_key;
- uint len= cur->min_key - cur[-1].min_key;
- if (!(min_key_length == max_key_length &&
- !memcmp(cur[-1].min_key, cur[-1].max_key, len) &&
- !key_tree->min_flag && !key_tree->max_flag))
- {
- seq->param->is_ror_scan= FALSE;
- if (!key_tree->min_flag)
- cur->min_key_parts +=
- key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno],
- &cur->min_key,
- &cur->min_key_flag);
- if (!key_tree->max_flag)
- cur->max_key_parts +=
- key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno],
- &cur->max_key,
- &cur->max_key_flag);
- break;
- }
- }
-
- /*
- Ok, current atomic interval is in form "t.field=const" and there is
- next_key_part interval. Step right, and walk up from there.
- */
- key_tree= key_tree->next_key_part;
-
-walk_up_n_right:
- while (key_tree->prev && key_tree->prev != &null_element)
- {
- /* Step up */
- key_tree= key_tree->prev;
- }
- step_down_to(seq, key_tree);
- }
-
- /* Ok got a tuple */
- RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
- uint min_key_length= cur->min_key - seq->param->min_key;
-
- range->ptr= (char*)(int)(key_tree->part);
- if (cur->min_key_flag & GEOM_FLAG)
- {
- range->range_flag= cur->min_key_flag;
-
- /* Here minimum contains also function code bits, and maximum is +inf */
- range->start_key.key= seq->param->min_key;
- range->start_key.length= min_key_length;
- range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
- }
- else
- {
- range->range_flag= cur->min_key_flag | cur->max_key_flag;
-
- range->start_key.key= seq->param->min_key;
- range->start_key.length= cur->min_key - seq->param->min_key;
- range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
- range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
- HA_READ_KEY_EXACT);
-
- range->end_key.key= seq->param->max_key;
- range->end_key.length= cur->max_key - seq->param->max_key;
- range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
- HA_READ_AFTER_KEY);
- range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
-
- if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
- (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts &&
- (seq->param->table->key_info[seq->real_keyno].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
- HA_NOSAME &&
- range->start_key.length == range->end_key.length &&
- !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
- range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
-
- if (seq->param->is_ror_scan)
- {
- /*
- If we get here, the condition on the key was converted to form
- "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
- somecond(keyXpart{key_tree->part})"
- Check if
- somecond is "keyXpart{key_tree->part} = const" and
- uncovered "tail" of KeyX parts is either empty or is identical to
- first members of clustered primary key.
- */
- if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
- (range->start_key.length == range->end_key.length) &&
- !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) &&
- is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1)))
- seq->param->is_ror_scan= FALSE;
- }
- }
- seq->param->range_count++;
- seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
- return 0;
-}
-
-
/*
Calculate cost and E(#rows) for a given index and intervals tree
@@ -7633,7 +7357,7 @@ ha_rows check_quick_select(PARAM *param,
if (current_thd->lex->sql_command != SQLCOM_SELECT)
*mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
- *bufsize= param->thd->variables.read_rnd_buff_size;
+ *bufsize= param->thd->variables.mrr_buff_size;
rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
bufsize, mrr_flags, cost);
if (rows != HA_POS_ERROR)
@@ -8148,7 +7872,7 @@ QUICK_RANGE_SELECT *get_quick_select_for
quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS;
#endif
- quick->mrr_buf_size= thd->variables.read_rnd_buff_size;
+ quick->mrr_buf_size= thd->variables.mrr_buff_size;
if (table->file->multi_range_read_info(quick->index, 1, (uint)records,
&quick->mrr_buf_size,
&quick->mrr_flags, &cost))
@@ -8518,75 +8242,6 @@ int QUICK_RANGE_SELECT::reset()
/*
- Range sequence interface implementation for array<QUICK_RANGE>: initialize
-
- SYNOPSIS
- quick_range_seq_init()
- init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
- n_ranges Number of ranges in the sequence (ignored)
- flags MRR flags (currently not used)
-
- RETURN
- Opaque value to be passed to quick_range_seq_next
-*/
-
-range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
-{
- QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
- quick->qr_traversal_ctx.first= (QUICK_RANGE**)quick->ranges.buffer;
- quick->qr_traversal_ctx.cur= (QUICK_RANGE**)quick->ranges.buffer;
- quick->qr_traversal_ctx.last= quick->qr_traversal_ctx.cur +
- quick->ranges.elements;
- return &quick->qr_traversal_ctx;
-}
-
-
-/*
- Range sequence interface implementation for array<QUICK_RANGE>: get next
-
- SYNOPSIS
- quick_range_seq_next()
- rseq Value returned from quick_range_seq_init
- range OUT Store information about the range here
-
- RETURN
- 0 Ok
- 1 No more ranges in the sequence
-*/
-
-uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
-{
- QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
-
- if (ctx->cur == ctx->last)
- return 1; /* no more ranges */
-
- QUICK_RANGE *cur= *(ctx->cur);
- key_range *start_key= &range->start_key;
- key_range *end_key= &range->end_key;
-
- start_key->key= cur->min_key;
- start_key->length= cur->min_length;
- start_key->keypart_map= cur->min_keypart_map;
- start_key->flag= ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
- (cur->flag & EQ_RANGE) ?
- HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
- end_key->key= cur->max_key;
- end_key->length= cur->max_length;
- end_key->keypart_map= cur->max_keypart_map;
- /*
- We use HA_READ_AFTER_KEY here because if we are reading on a key
- prefix. We want to find all keys with this prefix.
- */
- end_key->flag= (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
- HA_READ_AFTER_KEY);
- range->range_flag= cur->flag;
- ctx->cur++;
- return 0;
-}
-
-
-/*
Get next possible record using quick-struct.
SYNOPSIS
@@ -9658,7 +9313,7 @@ get_best_group_min_max(PARAM *param, SEL
uint mrr_flags= HA_MRR_USE_DEFAULT_IMPL;
uint mrr_bufsize=0;
cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
- FALSE /*don't care(*/,
+ FALSE /*don't care*/,
cur_index_tree, TRUE,
&mrr_flags, &mrr_bufsize,
&dummy_cost);
=== modified file 'sql/opt_range.h'
--- a/sql/opt_range.h 2009-12-15 07:16:46 +0000
+++ b/sql/opt_range.h 2009-12-22 12:33:21 +0000
@@ -317,7 +317,7 @@ protected:
public:
uint mrr_flags; /* Flags to be used with MRR interface */
protected:
- uint mrr_buf_size; /* copy from thd->variables.read_rnd_buff_size */
+ uint mrr_buf_size; /* copy from thd->variables.mrr_buff_size */
HANDLER_BUFFER *mrr_buf_desc; /* the handler buffer */
/* Info about index we're scanning */
=== added file 'sql/opt_range_mrr.cc'
--- a/sql/opt_range_mrr.cc 1970-01-01 00:00:00 +0000
+++ b/sql/opt_range_mrr.cc 2009-12-22 12:33:21 +0000
@@ -0,0 +1,349 @@
+
+/****************************************************************************
+ MRR Range Sequence Interface implementation that walks a SEL_ARG* tree.
+ ****************************************************************************/
+
+/* MRR range sequence, SEL_ARG* implementation: stack entry */
+typedef struct st_range_seq_entry
+{
+ /*
+ Pointers in min and max keys. They point to right-after-end of key
+ images. The 0-th entry has these pointing to key tuple start.
+ */
+ uchar *min_key, *max_key;
+
+ /*
+ Flags, for {keypart0, keypart1, ... this_keypart} subtuple.
+ min_key_flag may have NULL_RANGE set.
+ */
+ uint min_key_flag, max_key_flag;
+
+ /* Number of key parts */
+ uint min_key_parts, max_key_parts;
+ SEL_ARG *key_tree;
+} RANGE_SEQ_ENTRY;
+
+
+/*
+ MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
+*/
+typedef struct st_sel_arg_range_seq
+{
+ uint keyno; /* index of used tree in SEL_TREE structure */
+ uint real_keyno; /* Number of the index in tables */
+ PARAM *param;
+ SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
+
+ RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
+ int i; /* Index of last used element in the above array */
+
+ bool at_start; /* TRUE <=> The traversal has just started */
+} SEL_ARG_RANGE_SEQ;
+
+
+/*
+ Range sequence interface, SEL_ARG* implementation: Initialize the traversal
+
+ SYNOPSIS
+ init()
+ init_params SEL_ARG tree traversal context
+ n_ranges [ignored] The number of ranges obtained
+ flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+ RETURN
+ Value of init_param
+*/
+
+range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+ SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
+ seq->at_start= TRUE;
+ seq->stack[0].key_tree= NULL;
+ seq->stack[0].min_key= seq->param->min_key;
+ seq->stack[0].min_key_flag= 0;
+ seq->stack[0].min_key_parts= 0;
+
+ seq->stack[0].max_key= seq->param->max_key;
+ seq->stack[0].max_key_flag= 0;
+ seq->stack[0].max_key_parts= 0;
+ seq->i= 0;
+ return init_param;
+}
+
+
+static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree)
+{
+ RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1];
+ RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i];
+
+ cur->key_tree= key_tree;
+ cur->min_key= prev->min_key;
+ cur->max_key= prev->max_key;
+ cur->min_key_parts= prev->min_key_parts;
+ cur->max_key_parts= prev->max_key_parts;
+
+ uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length;
+ cur->min_key_parts += key_tree->store_min(stor_length, &cur->min_key,
+ prev->min_key_flag);
+ cur->max_key_parts += key_tree->store_max(stor_length, &cur->max_key,
+ prev->max_key_flag);
+
+ cur->min_key_flag= prev->min_key_flag | key_tree->min_flag;
+ cur->max_key_flag= prev->max_key_flag | key_tree->max_flag;
+
+ if (key_tree->is_null_interval())
+ cur->min_key_flag |= NULL_RANGE;
+ (arg->i)++;
+}
+
+
+/*
+ Range sequence interface, SEL_ARG* implementation: get the next interval
+
+ SYNOPSIS
+ sel_arg_range_seq_next()
+ rseq Value returned from sel_arg_range_seq_init
+ range OUT Store information about the range here
+
+ DESCRIPTION
+ This is "get_next" function for Range sequence interface implementation
+ for SEL_ARG* tree.
+
+ IMPLEMENTATION
+ The traversal also updates those param members:
+ - is_ror_scan
+ - range_count
+ - max_key_part
+
+ RETURN
+ 0 Ok
+ 1 No more ranges in the sequence
+*/
+
+uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+ SEL_ARG *key_tree;
+ SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq;
+ if (seq->at_start)
+ {
+ key_tree= seq->start;
+ seq->at_start= FALSE;
+ goto walk_up_n_right;
+ }
+
+ key_tree= seq->stack[seq->i].key_tree;
+ /* Ok, we're at some "full tuple" position in the tree */
+
+ /* Step down if we can */
+ if (key_tree->next && key_tree->next != &null_element)
+ {
+ //step down; (update the tuple, we'll step right and stay there)
+ seq->i--;
+ step_down_to(seq, key_tree->next);
+ key_tree= key_tree->next;
+ seq->param->is_ror_scan= FALSE;
+ goto walk_right_n_up;
+ }
+
+ /* Ok, can't step down, walk left until we can step down */
+ while (1)
+ {
+ if (seq->i == 1) // can't step left
+ return 1;
+ /* Step left */
+ seq->i--;
+ key_tree= seq->stack[seq->i].key_tree;
+
+ /* Step down if we can */
+ if (key_tree->next && key_tree->next != &null_element)
+ {
+ // Step down; update the tuple
+ seq->i--;
+ step_down_to(seq, key_tree->next);
+ key_tree= key_tree->next;
+ break;
+ }
+ }
+
+ /*
+ Ok, we've stepped down from the path to previous tuple.
+ Walk right-up while we can
+ */
+walk_right_n_up:
+ while (key_tree->next_key_part && key_tree->next_key_part != &null_element &&
+ key_tree->next_key_part->part == key_tree->part + 1 &&
+ key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
+ {
+ {
+ RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+ uint min_key_length= cur->min_key - seq->param->min_key;
+ uint max_key_length= cur->max_key - seq->param->max_key;
+ uint len= cur->min_key - cur[-1].min_key;
+ if (!(min_key_length == max_key_length &&
+ !memcmp(cur[-1].min_key, cur[-1].max_key, len) &&
+ !key_tree->min_flag && !key_tree->max_flag))
+ {
+ seq->param->is_ror_scan= FALSE;
+ if (!key_tree->min_flag)
+ cur->min_key_parts +=
+ key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno],
+ &cur->min_key,
+ &cur->min_key_flag);
+ if (!key_tree->max_flag)
+ cur->max_key_parts +=
+ key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno],
+ &cur->max_key,
+ &cur->max_key_flag);
+ break;
+ }
+ }
+
+ /*
+ Ok, current atomic interval is in form "t.field=const" and there is
+ next_key_part interval. Step right, and walk up from there.
+ */
+ key_tree= key_tree->next_key_part;
+
+walk_up_n_right:
+ while (key_tree->prev && key_tree->prev != &null_element)
+ {
+ /* Step up */
+ key_tree= key_tree->prev;
+ }
+ step_down_to(seq, key_tree);
+ }
+
+ /* Ok got a tuple */
+ RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+ uint min_key_length= cur->min_key - seq->param->min_key;
+
+ range->ptr= (char*)(int)(key_tree->part);
+ if (cur->min_key_flag & GEOM_FLAG)
+ {
+ range->range_flag= cur->min_key_flag;
+
+ /* Here minimum contains also function code bits, and maximum is +inf */
+ range->start_key.key= seq->param->min_key;
+ range->start_key.length= min_key_length;
+ range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+ }
+ else
+ {
+ range->range_flag= cur->min_key_flag | cur->max_key_flag;
+
+ range->start_key.key= seq->param->min_key;
+ range->start_key.length= cur->min_key - seq->param->min_key;
+ range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
+ range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
+ HA_READ_KEY_EXACT);
+
+ range->end_key.key= seq->param->max_key;
+ range->end_key.length= cur->max_key - seq->param->max_key;
+ range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
+ HA_READ_AFTER_KEY);
+ range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts);
+
+ if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+ (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts &&
+ (seq->param->table->key_info[seq->real_keyno].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
+ HA_NOSAME &&
+ range->start_key.length == range->end_key.length &&
+ !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
+ range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE);
+
+ if (seq->param->is_ror_scan)
+ {
+ /*
+ If we get here, the condition on the key was converted to form
+ "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
+ somecond(keyXpart{key_tree->part})"
+ Check if
+ somecond is "keyXpart{key_tree->part} = const" and
+ uncovered "tail" of KeyX parts is either empty or is identical to
+ first members of clustered primary key.
+ */
+ if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag &&
+ (range->start_key.length == range->end_key.length) &&
+ !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) &&
+ is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1)))
+ seq->param->is_ror_scan= FALSE;
+ }
+ }
+ seq->param->range_count++;
+ seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+ return 0;
+}
+
+/****************************************************************************
+ MRR Range Sequence Interface implementation that walks array<QUICK_RANGE>
+ ****************************************************************************/
+
+/*
+ Range sequence interface implementation for array<QUICK_RANGE>: initialize
+
+ SYNOPSIS
+ quick_range_seq_init()
+ init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
+ n_ranges Number of ranges in the sequence (ignored)
+ flags MRR flags (currently not used)
+
+ RETURN
+ Opaque value to be passed to quick_range_seq_next
+*/
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+ QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
+ quick->qr_traversal_ctx.first= (QUICK_RANGE**)quick->ranges.buffer;
+ quick->qr_traversal_ctx.cur= (QUICK_RANGE**)quick->ranges.buffer;
+ quick->qr_traversal_ctx.last= quick->qr_traversal_ctx.cur +
+ quick->ranges.elements;
+ return &quick->qr_traversal_ctx;
+}
+
+
+/*
+ Range sequence interface implementation for array<QUICK_RANGE>: get next
+
+ SYNOPSIS
+ quick_range_seq_next()
+ rseq Value returned from quick_range_seq_init
+ range OUT Store information about the range here
+
+ RETURN
+ 0 Ok
+ 1 No more ranges in the sequence
+*/
+
+uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+ QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
+
+ if (ctx->cur == ctx->last)
+ return 1; /* no more ranges */
+
+ QUICK_RANGE *cur= *(ctx->cur);
+ key_range *start_key= &range->start_key;
+ key_range *end_key= &range->end_key;
+
+ start_key->key= cur->min_key;
+ start_key->length= cur->min_length;
+ start_key->keypart_map= cur->min_keypart_map;
+ start_key->flag= ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
+ (cur->flag & EQ_RANGE) ?
+ HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
+ end_key->key= cur->max_key;
+ end_key->length= cur->max_length;
+ end_key->keypart_map= cur->max_keypart_map;
+ /*
+ We use HA_READ_AFTER_KEY here because if we are reading on a key
+ prefix. We want to find all keys with this prefix.
+ */
+ end_key->flag= (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
+ HA_READ_AFTER_KEY);
+ range->range_flag= cur->flag;
+ ctx->cur++;
+ return 0;
+}
+
+
=== modified file 'sql/set_var.cc'
--- a/sql/set_var.cc 2009-12-21 02:26:15 +0000
+++ b/sql/set_var.cc 2009-12-22 12:49:15 +0000
@@ -528,6 +528,8 @@ static sys_var_bool_ptr sys_user
static sys_var_thd_ulong sys_read_rnd_buff_size(&vars, "read_rnd_buffer_size",
&SV::read_rnd_buff_size);
+static sys_var_thd_ulong sys_mrr_buff_size(&vars, "mrr_buffer_size",
+ &SV::mrr_buff_size);
static sys_var_thd_ulong sys_div_precincrement(&vars, "div_precision_increment",
&SV::div_precincrement);
static sys_var_long_ptr sys_rpl_recovery_rank(&vars, "rpl_recovery_rank",
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2009-12-21 02:26:15 +0000
+++ b/sql/sql_class.h 2009-12-22 12:49:15 +0000
@@ -340,6 +340,7 @@ struct system_variables
ulong query_cache_type;
ulong read_buff_size;
ulong read_rnd_buff_size;
+ ulong mrr_buff_size;
ulong div_precincrement;
ulong sortbuff_size;
ulong thread_handling;
=== modified file 'storage/maria/ha_maria.cc'
--- a/storage/maria/ha_maria.cc 2009-12-15 07:16:46 +0000
+++ b/storage/maria/ha_maria.cc 2009-12-22 12:33:21 +0000
@@ -2022,16 +2022,15 @@ int ha_maria::delete_row(const uchar * b
C_MODE_START
-my_bool index_cond_func_maria(void *arg)
+ICP_RESULT index_cond_func_maria(void *arg)
{
ha_maria *h= (ha_maria*)arg;
- /*if (h->in_range_read)*/
if (h->end_range)
{
if (h->compare_key2(h->end_range) > 0)
- return 2; /* caller should return HA_ERR_END_OF_FILE already */
+ return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
}
- return (my_bool)h->pushed_idx_cond->val_int();
+ return h->pushed_idx_cond->val_int() ? ICP_MATCH : ICP_NO_MATCH;
}
C_MODE_END
=== modified file 'storage/maria/ha_maria.h'
--- a/storage/maria/ha_maria.h 2009-12-15 07:16:46 +0000
+++ b/storage/maria/ha_maria.h 2009-12-22 12:33:21 +0000
@@ -29,7 +29,7 @@
#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */
C_MODE_START
-my_bool index_cond_func_maria(void *arg);
+ICP_RESULT index_cond_func_maria(void *arg);
C_MODE_END
extern ulong maria_sort_buffer_size;
@@ -187,5 +187,5 @@ public:
Item *idx_cond_push(uint keyno, Item* idx_cond);
private:
DsMrr_impl ds_mrr;
- friend my_bool index_cond_func_maria(void *arg);
+ friend ICP_RESULT index_cond_func_maria(void *arg);
};
=== modified file 'storage/maria/ma_key.c'
--- a/storage/maria/ma_key.c 2009-12-15 07:16:46 +0000
+++ b/storage/maria/ma_key.c 2009-12-22 12:33:21 +0000
@@ -669,10 +669,10 @@ int _ma_read_key_record(MARIA_HA *info,
will look for column values there)
RETURN
- -1 Error
- 0 Index condition is not satisfied, continue scanning
- 1 Index condition is satisfied
- 2 Index condition is not satisfied, end the scan.
+ ICP_ERROR Error
+ ICP_NO_MATCH Index condition is not satisfied, continue scanning
+ ICP_MATCH Index condition is satisfied
+ ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan.
*/
int ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record)
=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h 2009-12-15 07:16:46 +0000
+++ b/storage/maria/maria_def.h 2009-12-22 12:33:21 +0000
@@ -477,8 +477,7 @@ typedef struct st_maria_block_scan
MARIA_RECORD_POS row_base_page;
} MARIA_BLOCK_SCAN;
-/*psergey-todo: do really need to have copies of this all over the place?*/
-typedef my_bool (*index_cond_func_t)(void *param);
+typedef ICP_RESULT (*index_cond_func_t)(void *param);
struct st_maria_handler
{
=== modified file 'storage/myisam/mi_key.c'
--- a/storage/myisam/mi_key.c 2009-12-15 07:16:46 +0000
+++ b/storage/myisam/mi_key.c 2009-12-22 12:33:21 +0000
@@ -504,10 +504,10 @@ int _mi_read_key_record(MI_INFO *info, m
will look for column values there)
RETURN
- -1 Error
- 0 Index condition is not satisfied, continue scanning
- 1 Index condition is satisfied
- 2 Index condition is not satisfied, end the scan.
+ ICP_ERROR Error
+ ICP_NO_MATCH Index condition is not satisfied, continue scanning
+ ICP_MATCH Index condition is satisfied
+ ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan.
*/
int mi_check_index_cond(register MI_INFO *info, uint keynr, uchar *record)
@@ -516,7 +516,7 @@ int mi_check_index_cond(register MI_INFO
{
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
- return -1;
+ return ICP_ERROR;
}
return info->index_cond_func(info->index_cond_func_arg);
}
=== modified file 'storage/myisam/mi_rkey.c'
--- a/storage/myisam/mi_rkey.c 2009-12-15 07:16:46 +0000
+++ b/storage/myisam/mi_rkey.c 2009-12-22 12:33:21 +0000
@@ -29,7 +29,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, i
MI_KEYDEF *keyinfo;
HA_KEYSEG *last_used_keyseg;
uint pack_key_length, use_key_length, nextflag;
- int res= 0;
+ ICP_RESULT res= ICP_NO_MATCH;
DBUG_ENTER("mi_rkey");
DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
(long) info, (long) buf, inx, search_flag));
@@ -118,7 +118,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, i
(search_flag != HA_READ_KEY_EXACT ||
last_used_keyseg != keyinfo->seg + keyinfo->keysegs)) ||
(info->index_cond_func &&
- !(res= mi_check_index_cond(info, inx, buf))))
+ (res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
uint not_used[2];
/*
@@ -146,7 +146,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, i
break;
}
}
- if (res == 2)
+ if (res == ICP_OUT_OF_RANGE)
{
info->lastpos= HA_OFFSET_ERROR;
if (share->concurrent_insert)
=== modified file 'storage/myisam/mi_rnext.c'
--- a/storage/myisam/mi_rnext.c 2009-12-15 07:16:46 +0000
+++ b/storage/myisam/mi_rnext.c 2009-12-22 12:33:21 +0000
@@ -28,7 +28,7 @@ int mi_rnext(MI_INFO *info, uchar *buf,
{
int error,changed;
uint flag;
- int res= 0;
+ ICP_RESULT res= 0;
DBUG_ENTER("mi_rnext");
if ((inx = _mi_check_index(info,inx)) < 0)
@@ -87,7 +87,7 @@ int mi_rnext(MI_INFO *info, uchar *buf,
while ((info->s->concurrent_insert &&
info->lastpos >= info->state->data_file_length) ||
(info->index_cond_func &&
- !(res= mi_check_index_cond(info, inx, buf))))
+ (res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH))
{
/*
Skip rows that are either inserted by other threads since
@@ -100,7 +100,7 @@ int mi_rnext(MI_INFO *info, uchar *buf,
info->s->state.key_root[inx])))
break;
}
- if (!error && res == 2)
+ if (!error && res == ICP_OUT_OF_RANGE)
{
if (info->s->concurrent_insert)
rw_unlock(&info->s->key_root_lock[inx]);
=== modified file 'storage/myisam/mi_rnext_same.c'
--- a/storage/myisam/mi_rnext_same.c 2009-12-15 07:16:46 +0000
+++ b/storage/myisam/mi_rnext_same.c 2009-12-22 12:33:21 +0000
@@ -75,9 +75,13 @@ int mi_rnext_same(MI_INFO *info, uchar *
info->lastpos= HA_OFFSET_ERROR;
break;
}
- /* Skip rows that are inserted by other threads since we got a lock */
+ /*
+ Skip
+ - rows that are inserted by other threads since we got a lock
+ - rows that don't match index condition */
if (info->lastpos < info->state->data_file_length &&
- (!info->index_cond_func || mi_check_index_cond(info, inx, buf)))
+ (!info->index_cond_func ||
+ mi_check_index_cond(info, inx, buf) != ICP_NO_MATCH))
break;
}
}
=== modified file 'storage/xtradb/handler/ha_innodb.cc'
--- a/storage/xtradb/handler/ha_innodb.cc 2009-12-15 07:16:46 +0000
+++ b/storage/xtradb/handler/ha_innodb.cc 2009-12-22 12:33:21 +0000
@@ -114,7 +114,7 @@ static pthread_mutex_t commit_cond_m;
static bool innodb_inited = 0;
C_MODE_START
-static uint index_cond_func_innodb(void *arg);
+static int index_cond_func_innodb(void *arg);
C_MODE_END
@@ -10765,24 +10765,12 @@ ha_rows ha_innobase::multi_range_read_in
{
/* See comments in ha_myisam::multi_range_read_info_const */
ds_mrr.init(this, table);
- //psergey-mrr-fix:
+
if (prebuilt->select_lock_type != LOCK_NONE)
*flags |= HA_MRR_USE_DEFAULT_IMPL;
- uint orig_flags= *flags;
-
ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
bufsz, flags, cost);
-
- bool disable_ds_mrr= true;
- disable_ds_mrr= false;
-// DBUG_EXECUTE_IF("optimizer_innodb_ds_mrr", disable_ds_mrr= false;);
- if (!disable_ds_mrr)
- return res;
-
- /* Disable DS-MRR: enable MS-MRR only after critical bugs are fixed */
- *bufsz= 0;
- *flags = orig_flags | HA_MRR_USE_DEFAULT_IMPL;
return res;
}
@@ -10791,17 +10779,7 @@ ha_rows ha_innobase::multi_range_read_in
uint *flags, COST_VECT *cost)
{
ds_mrr.init(this, table);
- uint orig_flags= *flags;
-
ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
- bool disable_ds_mrr= false;
- // DBUG_EXECUTE_IF("optimizer_innodb_ds_mrr", disable_ds_mrr= false;);
- if (!disable_ds_mrr)
- return res;
-
- /* Disable DS-MRR: enable MS-MRR only after critical bugs are fixed */
- *bufsz= 0;
- *flags = orig_flags | HA_MRR_USE_DEFAULT_IMPL;
return res;
}
@@ -10818,15 +10796,15 @@ C_MODE_START
See note on ICP_RESULT for return values description.
*/
-static uint index_cond_func_innodb(void *arg)
+static int index_cond_func_innodb(void *arg)
{
ha_innobase *h= (ha_innobase*)arg;
if (h->end_range)
{
if (h->compare_key2(h->end_range) > 0)
- return 2; /* caller should return HA_ERR_END_OF_FILE already */
+ return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
}
- return test(h->pushed_idx_cond->val_int());
+ return h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH;
}
C_MODE_END
@@ -10834,8 +10812,7 @@ C_MODE_END
Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
{
- // V :psergey-mrrr-merge: V
- if (keyno_arg != primary_key && (prebuilt->select_lock_type == LOCK_NONE))
+ if ((keyno_arg != primary_key) && (prebuilt->select_lock_type == LOCK_NONE))
{
pushed_idx_cond_keyno= keyno_arg;
pushed_idx_cond= idx_cond_arg;
=== modified file 'storage/xtradb/include/row0mysql.h'
--- a/storage/xtradb/include/row0mysql.h 2009-12-15 07:16:46 +0000
+++ b/storage/xtradb/include/row0mysql.h 2009-12-22 12:33:21 +0000
@@ -564,7 +564,7 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
-typedef uint (*index_cond_func_t)(void *param);
+typedef int (*index_cond_func_t)(void *param);
/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
handle used within MySQL; these are used to save CPU time. */
=== modified file 'storage/xtradb/row/row0sel.c'
--- a/storage/xtradb/row/row0sel.c 2009-12-15 07:16:46 +0000
+++ b/storage/xtradb/row/row0sel.c 2009-12-22 12:33:21 +0000
@@ -3116,10 +3116,14 @@ row_sel_pop_cached_row_for_mysql(
/* Copy NULL bit of the current field from cached_rec
to buf */
if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
+ /*buf[templ->mysql_null_byte_offset]
^= (buf[templ->mysql_null_byte_offset]
^ cached_rec[templ->mysql_null_byte_offset])
- & (byte)templ->mysql_null_bit_mask;
+ & (byte)templ->mysql_null_bit_mask;*/
+ byte *null_byte= buf + templ->mysql_null_byte_offset;
+ (*null_byte)&= ~templ->mysql_null_bit_mask;
+ (*null_byte)|= cached_rec[templ->mysql_null_byte_offset] &
+ templ->mysql_null_bit_mask;
}
}
}
@@ -3354,10 +3358,8 @@ row_search_for_mysql(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- /*psergey-mrr:*/
ibool some_fields_in_buffer;
ibool get_clust_rec = 0;
- /*:psergey-mrr*/
rec_offs_init(offsets_);
@@ -4210,11 +4212,8 @@ no_gap_lock:
information via the clustered index record. */
ut_ad(index != clust_index);
- /*psergey-mrr:*/
get_clust_rec = TRUE;
goto idx_cond_check;
- /**goto requires_clust_rec;**/
- /*:psergey-mrr*/
}
}
@@ -4260,22 +4259,20 @@ no_gap_lock:
idx_cond_check:
- if (prebuilt->idx_cond_func)
- {
- int res;
- ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
- row_sel_store_mysql_rec(buf, prebuilt, rec,
- offsets, 0, prebuilt->n_index_fields);
- res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg);
- if (res == 0)
- goto next_rec;
- if (res == 2)
- {
- err = DB_RECORD_NOT_FOUND;
- goto idx_cond_failed;
- }
- }
+ if (prebuilt->idx_cond_func) {
+ int res;
+ ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE);
+ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ row_sel_store_mysql_rec(buf, prebuilt, rec,
+ offsets, 0, prebuilt->n_index_fields);
+ res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg);
+ if (res == 0)
+ goto next_rec;
+ if (res == 2) {
+ err = DB_RECORD_NOT_FOUND;
+ goto idx_cond_failed;
+ }
+ }
/* Get the clustered index record if needed, if we did not do the
search using the clustered index. */