maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #02601
Rev 2774: BUG#45174: XOR in subqueries produces differing results in 5.1 and 5.4 in file:///home/psergey/dev/maria-5.3-subqueries-r7/
At file:///home/psergey/dev/maria-5.3-subqueries-r7/
------------------------------------------------------------
revno: 2774
revision-id: psergey@xxxxxxxxxxxx-20100313200452-kq4dxayp7b45zum1
parent: psergey@xxxxxxxxxxxx-20100307154145-ksby2b1l0sqm1xne
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.3-subqueries-r7
timestamp: Sat 2010-03-13 23:04:52 +0300
message:
BUG#45174: XOR in subqueries produces differing results in 5.1 and 5.4
BUG#50019: Wrong result for IN-subquery with materialization
- Fix equality substitution in presense of semi-join materialization, lookup and scan variants
(started off from fix by Evgen Potemkin, then modified it to work in all cases)
=== modified file 'mysql-test/r/subselect_mat.result'
--- a/mysql-test/r/subselect_mat.result 2010-01-17 14:51:10 +0000
+++ b/mysql-test/r/subselect_mat.result 2010-03-13 20:04:52 +0000
@@ -1219,3 +1219,28 @@
pk
2
DROP TABLE t1, t2;
+#
+# BUG#50019: Wrong result for IN-subquery with materialization
+#
+create table t1(i int);
+insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t2(i int);
+insert into t2 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t3(i int);
+insert into t3 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+i
+1
+2
+3
+4
+set @save_optimizer_switch=@@optimizer_switch;
+set session optimizer_switch='materialization=off';
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+i
+1
+2
+3
+4
+set session optimizer_switch=@save_optimizer_switch;
+drop table t1, t2, t3;
=== modified file 'mysql-test/r/subselect_sj.result'
--- a/mysql-test/r/subselect_sj.result 2010-02-24 11:33:42 +0000
+++ b/mysql-test/r/subselect_sj.result 2010-03-13 20:04:52 +0000
@@ -871,3 +871,39 @@
DROP TABLE t1, t2, t3;
DROP VIEW v2, v3;
# End of Bug#49198
+#
+# Bug#45174: Incorrectly applied equality propagation caused wrong
+# result on a query with a materialized semi-join.
+#
+CREATE TABLE `t1` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`varchar_key` varchar(1) NOT NULL,
+`varchar_nokey` varchar(1) NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `varchar_key` (`varchar_key`)
+);
+INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+CREATE TABLE `t2` (
+`varchar_nokey` varchar(1) NOT NULL
+);
+INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM t2
+WHERE ( `varchar_nokey` , `varchar_nokey` ) IN (
+SELECT `varchar_key` , `varchar_nokey`
+FROM t1
+WHERE `varchar_nokey` < 'n' XOR `pk` ) ;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t2 ALL NULL NULL NULL NULL 18 100.00
+1 PRIMARY t1 ALL varchar_key NULL NULL NULL 15 100.00 Using where; Materialize
+Warnings:
+Note 1003 select `test`.`t2`.`varchar_nokey` AS `varchar_nokey` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`varchar_nokey` = `test`.`t1`.`varchar_key`) and ((`test`.`t1`.`varchar_nokey` < 'n') xor `test`.`t1`.`pk`))
+SELECT varchar_nokey
+FROM t2
+WHERE ( `varchar_nokey` , `varchar_nokey` ) IN (
+SELECT `varchar_key` , `varchar_nokey`
+FROM t1
+WHERE `varchar_nokey` < 'n' XOR `pk` ) ;
+varchar_nokey
+DROP TABLE t1, t2;
+# End of the test for bug#45174.
=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result 2010-03-07 15:41:45 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result 2010-03-13 20:04:52 +0000
@@ -876,6 +876,42 @@
DROP VIEW v2, v3;
# End of Bug#49198
#
+# Bug#45174: Incorrectly applied equality propagation caused wrong
+# result on a query with a materialized semi-join.
+#
+CREATE TABLE `t1` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`varchar_key` varchar(1) NOT NULL,
+`varchar_nokey` varchar(1) NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `varchar_key` (`varchar_key`)
+);
+INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+CREATE TABLE `t2` (
+`varchar_nokey` varchar(1) NOT NULL
+);
+INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM t2
+WHERE ( `varchar_nokey` , `varchar_nokey` ) IN (
+SELECT `varchar_key` , `varchar_nokey`
+FROM t1
+WHERE `varchar_nokey` < 'n' XOR `pk` ) ;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t2 ALL NULL NULL NULL NULL 18 100.00
+1 PRIMARY t1 ALL varchar_key NULL NULL NULL 15 100.00 Using where; Materialize
+Warnings:
+Note 1003 select `test`.`t2`.`varchar_nokey` AS `varchar_nokey` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`varchar_nokey` = `test`.`t1`.`varchar_key`) and ((`test`.`t1`.`varchar_nokey` < 'n') xor `test`.`t1`.`pk`))
+SELECT varchar_nokey
+FROM t2
+WHERE ( `varchar_nokey` , `varchar_nokey` ) IN (
+SELECT `varchar_key` , `varchar_nokey`
+FROM t1
+WHERE `varchar_nokey` < 'n' XOR `pk` ) ;
+varchar_nokey
+DROP TABLE t1, t2;
+# End of the test for bug#45174.
+#
# BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off
#
CREATE TABLE t0 (a INT);
=== modified file 'mysql-test/t/subselect_mat.test'
--- a/mysql-test/t/subselect_mat.test 2010-01-17 14:51:10 +0000
+++ b/mysql-test/t/subselect_mat.test 2010-03-13 20:04:52 +0000
@@ -889,3 +889,19 @@
SELECT pk FROM t1 WHERE (b,c,d) IN (SELECT b,c,d FROM t2 WHERE pk > 0);
DROP TABLE t1, t2;
+--echo #
+--echo # BUG#50019: Wrong result for IN-subquery with materialization
+--echo #
+create table t1(i int);
+insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t2(i int);
+insert into t2 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t3(i int);
+insert into t3 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+set @save_optimizer_switch=@@optimizer_switch;
+set session optimizer_switch='materialization=off';
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+set session optimizer_switch=@save_optimizer_switch;
+drop table t1, t2, t3;
+
=== modified file 'mysql-test/t/subselect_sj.test'
--- a/mysql-test/t/subselect_sj.test 2010-02-24 11:33:42 +0000
+++ b/mysql-test/t/subselect_sj.test 2010-03-13 20:04:52 +0000
@@ -770,3 +770,42 @@
DROP VIEW v2, v3;
--echo # End of Bug#49198
+
+--echo #
+--echo # Bug#45174: Incorrectly applied equality propagation caused wrong
+--echo # result on a query with a materialized semi-join.
+--echo #
+
+CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL AUTO_INCREMENT,
+ `varchar_key` varchar(1) NOT NULL,
+ `varchar_nokey` varchar(1) NOT NULL,
+ PRIMARY KEY (`pk`),
+ KEY `varchar_key` (`varchar_key`)
+);
+
+INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+
+CREATE TABLE `t2` (
+ `varchar_nokey` varchar(1) NOT NULL
+);
+
+INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM t2
+WHERE ( `varchar_nokey` , `varchar_nokey` ) IN (
+SELECT `varchar_key` , `varchar_nokey`
+FROM t1
+WHERE `varchar_nokey` < 'n' XOR `pk` ) ;
+
+SELECT varchar_nokey
+FROM t2
+WHERE ( `varchar_nokey` , `varchar_nokey` ) IN (
+SELECT `varchar_key` , `varchar_nokey`
+FROM t1
+WHERE `varchar_nokey` < 'n' XOR `pk` ) ;
+
+DROP TABLE t1, t2;
+
+--echo # End of the test for bug#45174.
=== modified file 'sql/item.cc'
--- a/sql/item.cc 2010-02-24 11:33:42 +0000
+++ b/sql/item.cc 2010-03-13 20:04:52 +0000
@@ -4761,7 +4761,7 @@
return this;
return const_item;
}
- Item_field *subst= item_equal->get_first();
+ Item_field *subst= item_equal->get_first(this);
if (subst && field->table != subst->field->table && !field->eq(subst->field))
return subst;
}
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc 2010-02-17 10:05:27 +0000
+++ b/sql/item_cmpfunc.cc 2010-03-13 20:04:52 +0000
@@ -5369,7 +5369,7 @@
void Item_equal::fix_length_and_dec()
{
- Item *item= get_first();
+ Item *item= get_first(NULL);
eval_item= cmp_item::get_comparator(item->result_type(),
item->collation.collation);
}
@@ -5432,3 +5432,128 @@
str->append(')');
}
+
+/*
+ @brief Get the first equal field of multiple equality.
+ @param[in] field the field to get equal field to
+
+ @details Get the first field of multiple equality that is equal to the
+ given field. In order to make semi-join materialization strategy work
+ correctly we can't propagate equal fields from upper select to a
+ materialized semi-join.
+ Thus the fields is returned according to following rules:
+
+ 1) If the given field belongs to a semi-join then the first field in
+ multiple equality which belong to the same semi-join is returned.
+ Otherwise NULL is returned.
+ 2) If the given field doesn't belong to a semi-join then
+ the first field in the multiple equality that doesn't belong to any
+ semi-join is returned.
+ If all fields in the equality are belong to semi-join(s) then NULL
+ is returned.
+ 3) If no field is given then the first field in the multiple equality
+ is returned without regarding whether it belongs to a semi-join or not.
+
+ @retval Found first field in the multiple equality.
+ @retval 0 if no field found.
+*/
+
+Item_field* Item_equal::get_first(Item_field *field)
+{
+ List_iterator<Item_field> it(fields);
+ Item_field *item;
+ JOIN_TAB *field_tab;
+
+ if (!field)
+ return fields.head();
+
+ /*
+ Of all equal fields, return the first one we can use. Normally, this is the
+ field which belongs to the table that is the first in the join order.
+
+ There is one exception to this: When semi-join materialization strategy is
+ used, and the given field belongs to a table within the semi-join nest, we
+ must pick the first field in the semi-join nest.
+
+ Example: suppose we have a join order:
+
+ ot1 ot2 SJ-Mat(it1 it2 it3) ot3
+
+ and equality ot2.col = it1.col = it2.col
+ If we're looking for best substitute for 'it2.col', we should pick it1.col
+ and not ot2.col.
+
+ eliminate_item_equal() also has code that deals with equality substitution
+ in presense of SJM nests.
+ */
+
+ field_tab= field->field->table->reginfo.join_tab;
+
+ TABLE_LIST *emb_nest= field->field->table->pos_in_table_list->embedding;
+
+ if (emb_nest && emb_nest->sj_mat_info && emb_nest->sj_mat_info->is_used)
+ {
+ /*
+ It's a field from an materialized semi-join. We can substitute it only
+ for a field from the same semi-join.
+ */
+ JOIN_TAB *first;
+ JOIN *join= field_tab->join;
+ uint tab_idx= field_tab - field_tab->join->join_tab;
+
+ /* Find the first table of this semi-join nest */
+ for (uint i= tab_idx; i != join->const_tables; i--)
+ {
+ if (join->join_tab[i].table->map & emb_nest->sj_inner_tables)
+ first= join->join_tab + i;
+ else
+ // Found first tab that doesn't belong to current SJ.
+ break;
+ }
+ /* Find an item to substitute for. */
+ while ((item= it++))
+ {
+ if (item->field->table->reginfo.join_tab >= first)
+ {
+ /*
+ If we found given field then return NULL to avoid unnecessary
+ substitution.
+ */
+ return (item != field) ? item : NULL;
+ }
+ }
+ }
+ else
+ {
+#if 0
+ /*
+ The field is not in SJ-Materialization nest. We must return the first
+ field that's not embedded in a SJ-Materialization nest.
+ Example: suppose we have a join order:
+
+ SJ-Mat(it1 it2) ot1 ot2
+
+ and equality ot2.col = ot1.col = it2.col
+ If we're looking for best substitute for 'ot2.col', we should pick ot1.col
+ and not it2.col, because when we run a join between ot1 and ot2
+ execution of SJ-Mat(...) has already finished and we can't rely on the
+ value of it*.*.
+ psergey-fix-fix: ^^ THAT IS INCORRECT ^^. Pick the first, whatever that
+ is.
+ */
+ while ((item= it++))
+ {
+ TABLE_LIST *emb_nest= item->field->table->pos_in_table_list->embedding;
+ if (!emb_nest || !emb_nest->sj_mat_info ||
+ !emb_nest->sj_mat_info->is_used)
+ {
+ return item;
+ }
+ }
+#endif
+ return fields.head();
+ }
+ // Shouldn't get here.
+ DBUG_ASSERT(0);
+ return NULL;
+}
=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h 2010-02-17 10:05:27 +0000
+++ b/sql/item_cmpfunc.h 2010-03-13 20:04:52 +0000
@@ -1589,7 +1589,7 @@
void add(Item_field *f);
uint members();
bool contains(Field *field);
- Item_field* get_first() { return fields.head(); }
+ Item_field* get_first(Item_field *field);
uint n_fields() { return fields.elements; }
void merge(Item_equal *item);
void update_const();
=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc 2010-03-07 15:41:45 +0000
+++ b/sql/opt_subselect.cc 2010-03-13 20:04:52 +0000
@@ -2159,6 +2159,8 @@
if (tablenr != first)
pos->sj_strategy= SJ_OPT_NONE;
remaining_tables |= s->table->map;
+ //s->sj_strategy= pos->sj_strategy;
+ join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy;
}
}
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-03-07 15:41:45 +0000
+++ b/sql/sql_select.cc 2010-03-13 20:04:52 +0000
@@ -8867,6 +8867,15 @@
}
+static TABLE_LIST* embedding_sjm(Item_field *item_field)
+{
+ TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
+ if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
+ return nest;
+ else
+ return NULL;
+}
+
/**
Generate minimal set of simple equalities equivalent to a multiple equality.
@@ -8900,6 +8909,23 @@
So only t1.a=t3.c should be left in the lower level.
If cond is equal to 0, then not more then one equality is generated
and a pointer to it is returned as the result of the function.
+
+ Equality substutution and semi-join materialization nests:
+
+ In case join order looks like this:
+
+ outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3
+
+ We must not construct equalities like
+
+ outer_tbl1.col = inner_tbl1.col
+
+ because they would get attached to inner_tbl1 and will get evaluated
+ during materialization phase, when we don't have current value of
+ outer_tbl1.col.
+
+ Item_equal::get_first() also takes similar measures for dealing with
+ equality substitution in presense of SJM nests.
@return
- The condition with generated simple equalities or
@@ -8917,18 +8943,44 @@
Item *item_const= item_equal->get_const();
Item_equal_iterator it(*item_equal);
Item *head;
+ TABLE_LIST *current_sjm= NULL;
+ Item *current_sjm_head= NULL;
+
+ /*
+ Pick the "head" item: the constant one or the first in the join order
+ that's not inside some SJM nest.
+ */
if (item_const)
head= item_const;
else
{
- head= item_equal->get_first();
+ TABLE_LIST *emb_nest;
+ Item_field *item_field;
+ head= item_field= item_equal->get_first(NULL);
it++;
+ if ((emb_nest= embedding_sjm(item_field)))
+ {
+ current_sjm= emb_nest;
+ current_sjm_head= head;
+ }
}
+
Item_field *item_field;
+ /*
+ For each other item, generate "item=head" equality (except the tables that
+ are within SJ-Materialization nests, for those "head" is defined
+ differently)
+ */
while ((item_field= it++))
{
Item_equal *upper= item_field->find_item_equal(upper_levels);
Item_field *item= item_field;
+ TABLE_LIST *field_sjm= embedding_sjm(item_field);
+
+ /*
+ Check if "item_field=head" equality is already guaranteed to be true
+ on upper AND-levels.
+ */
if (upper)
{
if (item_const && upper->get_const())
@@ -8943,65 +8995,29 @@
}
}
}
- if (item == item_field)
+
+ bool produce_equality= test(item == item_field);
+ if (!item_const && field_sjm && field_sjm != current_sjm)
+ {
+ /* Entering an SJM nest */
+ current_sjm_head= item_field;
+ if (!field_sjm->sj_mat_info->is_sj_scan)
+ produce_equality= FALSE;
+ }
+
+ if (produce_equality)
{
if (eq_item)
eq_list.push_back(eq_item);
- /*
- item_field might refer to a table that is within a semi-join
- materialization nest. In that case, the join order looks like this:
-
- outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3
-
- We must not construct equalities like
-
- outer_tbl1.col = inner_tbl1.col
-
- because they would get attached to inner_tbl1 and will get evaluated
- during materialization phase, when we don't have current value of
- outer_tbl1.col.
- */
- TABLE_LIST *emb_nest=
- item_field->field->table->pos_in_table_list->embedding;
- if (!item_const && emb_nest && emb_nest->sj_mat_info &&
- emb_nest->sj_mat_info->is_used)
- {
- /*
- Find the first equal expression that refers to a table that is
- within the semijoin nest. If we can't find it, do nothing
- */
- List_iterator<Item_field> fit(item_equal->fields);
- Item_field *head_in_sjm;
- bool found= FALSE;
- while ((head_in_sjm= fit++))
- {
- if (head_in_sjm->used_tables() & emb_nest->sj_inner_tables)
- {
- if (head_in_sjm == item_field)
- {
- /* This is the first table inside the semi-join*/
- eq_item= new Item_func_eq(item_field, head);
- /* Tell make_cond_for_table don't use this. */
- eq_item->marker=3;
- }
- else
- {
- eq_item= new Item_func_eq(item_field, head_in_sjm);
- found= TRUE;
- }
- break;
- }
- }
- if (!found)
- continue;
- }
- else
- eq_item= new Item_func_eq(item_field, head);
+
+ eq_item= new Item_func_eq(item_field, current_sjm? current_sjm_head: head);
+
if (!eq_item)
return 0;
eq_item->set_cmp_func();
eq_item->quick_fix_field();
}
+ current_sjm= field_sjm;
}
if (!cond && !eq_list.head())
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2010-03-05 18:54:48 +0000
+++ b/sql/sql_select.h 2010-03-13 20:04:52 +0000
@@ -279,6 +279,13 @@
/* NestedOuterJoins: Bitmap of nested joins this table is part of */
nested_join_map embedding_map;
+ /*
+ Semi-join strategy to be used for this join table. This is a copy of
+ POSITION::sj_strategy field. This field is set up by the
+ fix_semijion_strategies_for_picked_join_order.
+ */
+ uint sj_strategy;
+
void cleanup();
inline bool is_using_loose_index_scan()
{