← Back to team overview

maria-developers team mailing list archive

Rev 2774: BUG#45174: XOR in subqueries produces differing results in 5.1 and 5.4 in file:///home/psergey/dev/maria-5.3-subqueries-r7/

 

At file:///home/psergey/dev/maria-5.3-subqueries-r7/

------------------------------------------------------------
revno: 2774
revision-id: psergey@xxxxxxxxxxxx-20100313200452-kq4dxayp7b45zum1
parent: psergey@xxxxxxxxxxxx-20100307154145-ksby2b1l0sqm1xne
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.3-subqueries-r7
timestamp: Sat 2010-03-13 23:04:52 +0300
message:
  BUG#45174: XOR in subqueries produces differing results in 5.1 and 5.4
  BUG#50019: Wrong result for IN-subquery with materialization
  - Fix equality substitution in presense of semi-join materialization, lookup and scan variants
    (started off from fix by Evgen Potemkin, then modified it to work in all cases)
=== modified file 'mysql-test/r/subselect_mat.result'
--- a/mysql-test/r/subselect_mat.result	2010-01-17 14:51:10 +0000
+++ b/mysql-test/r/subselect_mat.result	2010-03-13 20:04:52 +0000
@@ -1219,3 +1219,28 @@
 pk
 2
 DROP TABLE t1, t2;
+#
+# BUG#50019: Wrong result for IN-subquery with materialization
+#
+create table t1(i int);
+insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t2(i int);
+insert into t2 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t3(i int);
+insert into t3 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+i
+1
+2
+3
+4
+set @save_optimizer_switch=@@optimizer_switch;
+set session optimizer_switch='materialization=off';
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+i
+1
+2
+3
+4
+set session optimizer_switch=@save_optimizer_switch;
+drop table t1, t2, t3;

=== modified file 'mysql-test/r/subselect_sj.result'
--- a/mysql-test/r/subselect_sj.result	2010-02-24 11:33:42 +0000
+++ b/mysql-test/r/subselect_sj.result	2010-03-13 20:04:52 +0000
@@ -871,3 +871,39 @@
 DROP TABLE t1, t2, t3;
 DROP VIEW v2, v3;
 # End of Bug#49198
+#
+# Bug#45174: Incorrectly applied equality propagation caused wrong
+# result on a query with a materialized semi-join.
+#
+CREATE TABLE `t1` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`varchar_key` varchar(1) NOT NULL,
+`varchar_nokey` varchar(1) NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `varchar_key` (`varchar_key`)
+);
+INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+CREATE TABLE `t2` (
+`varchar_nokey` varchar(1) NOT NULL
+);
+INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM t2  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM t1  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	18	100.00	
+1	PRIMARY	t1	ALL	varchar_key	NULL	NULL	NULL	15	100.00	Using where; Materialize
+Warnings:
+Note	1003	select `test`.`t2`.`varchar_nokey` AS `varchar_nokey` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`varchar_nokey` = `test`.`t1`.`varchar_key`) and ((`test`.`t1`.`varchar_nokey` < 'n') xor `test`.`t1`.`pk`))
+SELECT varchar_nokey
+FROM t2  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM t1  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+varchar_nokey
+DROP TABLE t1, t2;
+# End of the test for bug#45174.

=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result	2010-03-07 15:41:45 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result	2010-03-13 20:04:52 +0000
@@ -876,6 +876,42 @@
 DROP VIEW v2, v3;
 # End of Bug#49198
 #
+# Bug#45174: Incorrectly applied equality propagation caused wrong
+# result on a query with a materialized semi-join.
+#
+CREATE TABLE `t1` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`varchar_key` varchar(1) NOT NULL,
+`varchar_nokey` varchar(1) NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `varchar_key` (`varchar_key`)
+);
+INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+CREATE TABLE `t2` (
+`varchar_nokey` varchar(1) NOT NULL
+);
+INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM t2  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM t1  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	18	100.00	
+1	PRIMARY	t1	ALL	varchar_key	NULL	NULL	NULL	15	100.00	Using where; Materialize
+Warnings:
+Note	1003	select `test`.`t2`.`varchar_nokey` AS `varchar_nokey` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`varchar_nokey` = `test`.`t1`.`varchar_key`) and ((`test`.`t1`.`varchar_nokey` < 'n') xor `test`.`t1`.`pk`))
+SELECT varchar_nokey
+FROM t2  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM t1  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+varchar_nokey
+DROP TABLE t1, t2;
+# End of the test for bug#45174.
+#
 # BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off
 #
 CREATE TABLE t0 (a INT);

=== modified file 'mysql-test/t/subselect_mat.test'
--- a/mysql-test/t/subselect_mat.test	2010-01-17 14:51:10 +0000
+++ b/mysql-test/t/subselect_mat.test	2010-03-13 20:04:52 +0000
@@ -889,3 +889,19 @@
 SELECT pk FROM t1 WHERE (b,c,d) IN (SELECT b,c,d FROM t2 WHERE pk > 0);
 DROP TABLE t1, t2;
 
+--echo #
+--echo # BUG#50019: Wrong result for IN-subquery with materialization
+--echo #
+create table t1(i int);
+insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t2(i int);
+insert into t2 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+create table t3(i int);
+insert into t3 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+set @save_optimizer_switch=@@optimizer_switch;
+set session optimizer_switch='materialization=off';
+select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5);
+set session optimizer_switch=@save_optimizer_switch;
+drop table t1, t2, t3;
+

=== modified file 'mysql-test/t/subselect_sj.test'
--- a/mysql-test/t/subselect_sj.test	2010-02-24 11:33:42 +0000
+++ b/mysql-test/t/subselect_sj.test	2010-03-13 20:04:52 +0000
@@ -770,3 +770,42 @@
 DROP VIEW v2, v3;
 
 --echo # End of Bug#49198
+
+--echo #
+--echo # Bug#45174: Incorrectly applied equality propagation caused wrong
+--echo # result on a query with a materialized semi-join.
+--echo #
+
+CREATE TABLE `t1` (
+  `pk` int(11) NOT NULL AUTO_INCREMENT,
+  `varchar_key` varchar(1) NOT NULL,
+  `varchar_nokey` varchar(1) NOT NULL,
+  PRIMARY KEY (`pk`),
+  KEY `varchar_key` (`varchar_key`)
+);
+
+INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+
+CREATE TABLE `t2` (
+  `varchar_nokey` varchar(1) NOT NULL
+);
+
+INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM t2  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM t1  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+
+SELECT varchar_nokey
+FROM t2  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM t1  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+
+DROP TABLE t1, t2;
+
+--echo # End of the test for bug#45174.

=== modified file 'sql/item.cc'
--- a/sql/item.cc	2010-02-24 11:33:42 +0000
+++ b/sql/item.cc	2010-03-13 20:04:52 +0000
@@ -4761,7 +4761,7 @@
         return this;
       return const_item;
     }
-    Item_field *subst= item_equal->get_first();
+    Item_field *subst= item_equal->get_first(this);
     if (subst && field->table != subst->field->table && !field->eq(subst->field))
       return subst;
   }

=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc	2010-02-17 10:05:27 +0000
+++ b/sql/item_cmpfunc.cc	2010-03-13 20:04:52 +0000
@@ -5369,7 +5369,7 @@
 
 void Item_equal::fix_length_and_dec()
 {
-  Item *item= get_first();
+  Item *item= get_first(NULL);
   eval_item= cmp_item::get_comparator(item->result_type(),
                                       item->collation.collation);
 }
@@ -5432,3 +5432,128 @@
   str->append(')');
 }
 
+
+/*
+  @brief Get the first equal field of multiple equality.
+  @param[in] field   the field to get equal field to
+
+  @details Get the first field of multiple equality that is equal to the
+  given field. In order to make semi-join materialization strategy work
+  correctly we can't propagate equal fields from upper select to a
+  materialized semi-join.
+  Thus the fields is returned according to following rules:
+
+  1) If the given field belongs to a semi-join then the first field in
+     multiple equality which belong to the same semi-join is returned.
+     Otherwise NULL is returned.
+  2) If the given field doesn't belong to a semi-join then
+     the first field in the multiple equality that doesn't belong to any
+     semi-join is returned.
+     If all fields in the equality are belong to semi-join(s) then NULL
+     is returned.
+  3) If no field is given then the first field in the multiple equality
+     is returned without regarding whether it belongs to a semi-join or not.
+
+  @retval Found first field in the multiple equality.
+  @retval 0 if no field found.
+*/
+
+Item_field* Item_equal::get_first(Item_field *field)
+{
+  List_iterator<Item_field> it(fields);
+  Item_field *item;
+  JOIN_TAB *field_tab;
+
+  if (!field)
+    return fields.head();
+
+  /*
+    Of all equal fields, return the first one we can use. Normally, this is the
+    field which belongs to the table that is the first in the join order.
+
+    There is one exception to this: When semi-join materialization strategy is
+    used, and the given field belongs to a table within the semi-join nest, we
+    must pick the first field in the semi-join nest.
+
+    Example: suppose we have a join order:
+
+       ot1 ot2  SJ-Mat(it1  it2  it3)  ot3
+
+    and equality ot2.col = it1.col = it2.col
+    If we're looking for best substitute for 'it2.col', we should pick it1.col
+    and not ot2.col.
+    
+    eliminate_item_equal() also has code that deals with equality substitution
+    in presense of SJM nests.
+  */
+
+  field_tab= field->field->table->reginfo.join_tab;
+
+  TABLE_LIST *emb_nest= field->field->table->pos_in_table_list->embedding;
+
+  if (emb_nest && emb_nest->sj_mat_info && emb_nest->sj_mat_info->is_used)
+  {
+    /*
+      It's a field from an materialized semi-join. We can substitute it only
+      for a field from the same semi-join.
+    */
+    JOIN_TAB *first;
+    JOIN *join= field_tab->join;
+    uint tab_idx= field_tab - field_tab->join->join_tab;
+
+    /* Find the first table of this semi-join nest */
+    for (uint i= tab_idx; i != join->const_tables; i--)
+    {
+      if (join->join_tab[i].table->map & emb_nest->sj_inner_tables)
+        first= join->join_tab + i;
+      else
+        // Found first tab that doesn't belong to current SJ.
+        break;
+    }
+    /* Find an item to substitute for. */
+    while ((item= it++))
+    {
+      if (item->field->table->reginfo.join_tab >= first)
+      {
+        /*
+          If we found given field then return NULL to avoid unnecessary
+          substitution.
+        */
+        return (item != field) ? item : NULL;
+      }
+    }
+  }
+  else
+  {
+#if 0    
+    /*
+      The field is not in SJ-Materialization nest. We must return the first
+      field that's not embedded in a SJ-Materialization nest.
+      Example: suppose we have a join order:
+
+          SJ-Mat(it1  it2)  ot1  ot2
+
+      and equality ot2.col = ot1.col = it2.col
+      If we're looking for best substitute for 'ot2.col', we should pick ot1.col
+      and not it2.col, because when we run a join between ot1 and ot2
+      execution of SJ-Mat(...) has already finished and we can't rely on the
+      value of it*.*.
+      psergey-fix-fix: ^^ THAT IS INCORRECT ^^. Pick the first, whatever that
+      is.
+    */
+    while ((item= it++))
+    {
+      TABLE_LIST *emb_nest= item->field->table->pos_in_table_list->embedding;
+      if (!emb_nest || !emb_nest->sj_mat_info || 
+          !emb_nest->sj_mat_info->is_used)
+      {
+        return item;
+      }
+    }
+#endif
+    return fields.head();
+  }
+  // Shouldn't get here.
+  DBUG_ASSERT(0);
+  return NULL;
+}

=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h	2010-02-17 10:05:27 +0000
+++ b/sql/item_cmpfunc.h	2010-03-13 20:04:52 +0000
@@ -1589,7 +1589,7 @@
   void add(Item_field *f);
   uint members();
   bool contains(Field *field);
-  Item_field* get_first() { return fields.head(); }
+  Item_field* get_first(Item_field *field);
   uint n_fields() { return fields.elements; }
   void merge(Item_equal *item);
   void update_const();

=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc	2010-03-07 15:41:45 +0000
+++ b/sql/opt_subselect.cc	2010-03-13 20:04:52 +0000
@@ -2159,6 +2159,8 @@
     if (tablenr != first)
       pos->sj_strategy= SJ_OPT_NONE;
     remaining_tables |= s->table->map;
+    //s->sj_strategy= pos->sj_strategy;
+    join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy;
   }
 }
 

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2010-03-07 15:41:45 +0000
+++ b/sql/sql_select.cc	2010-03-13 20:04:52 +0000
@@ -8867,6 +8867,15 @@
 }
 
 
+static TABLE_LIST* embedding_sjm(Item_field *item_field)
+{
+  TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
+  if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
+    return nest;
+  else
+    return NULL;
+}
+
 /**
   Generate minimal set of simple equalities equivalent to a multiple equality.
 
@@ -8900,6 +8909,23 @@
     So only t1.a=t3.c should be left in the lower level.
     If cond is equal to 0, then not more then one equality is generated
     and a pointer to it is returned as the result of the function.
+    
+    Equality substutution and semi-join materialization nests:
+
+       In case join order looks like this:
+
+          outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 
+
+        We must not construct equalities like 
+
+           outer_tbl1.col = inner_tbl1.col 
+
+        because they would get attached to inner_tbl1 and will get evaluated
+        during materialization phase, when we don't have current value of
+        outer_tbl1.col.
+
+        Item_equal::get_first() also takes similar measures for dealing with
+        equality substitution in presense of SJM nests.
 
   @return
     - The condition with generated simple equalities or
@@ -8917,18 +8943,44 @@
   Item *item_const= item_equal->get_const();
   Item_equal_iterator it(*item_equal);
   Item *head;
+  TABLE_LIST *current_sjm= NULL;
+  Item *current_sjm_head= NULL;
+
+  /* 
+    Pick the "head" item: the constant one or the first in the join order
+    that's not inside some SJM nest.
+  */
   if (item_const)
     head= item_const;
   else
   {
-    head= item_equal->get_first();
+    TABLE_LIST *emb_nest;
+    Item_field *item_field;
+    head= item_field= item_equal->get_first(NULL);
     it++;
+    if ((emb_nest= embedding_sjm(item_field)))
+    {
+      current_sjm= emb_nest;
+      current_sjm_head= head;
+    }
   }
+
   Item_field *item_field;
+  /*
+    For each other item, generate "item=head" equality (except the tables that 
+    are within SJ-Materialization nests, for those "head" is defined
+    differently)
+  */
   while ((item_field= it++))
   {
     Item_equal *upper= item_field->find_item_equal(upper_levels);
     Item_field *item= item_field;
+    TABLE_LIST *field_sjm= embedding_sjm(item_field);
+
+    /* 
+      Check if "item_field=head" equality is already guaranteed to be true 
+      on upper AND-levels.
+    */
     if (upper)
     { 
       if (item_const && upper->get_const())
@@ -8943,65 +8995,29 @@
         }
       }
     }
-    if (item == item_field)
+    
+    bool produce_equality= test(item == item_field);
+    if (!item_const && field_sjm && field_sjm != current_sjm)
+    {
+      /* Entering an SJM nest */
+      current_sjm_head= item_field;
+      if (!field_sjm->sj_mat_info->is_sj_scan)
+        produce_equality= FALSE;
+    }
+
+    if (produce_equality)
     {
       if (eq_item)
         eq_list.push_back(eq_item);
-      /*
-        item_field might refer to a table that is within a semi-join
-        materialization nest. In that case, the join order looks like this:
-
-          outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 
-
-        We must not construct equalities like 
-
-           outer_tbl1.col = inner_tbl1.col 
-
-        because they would get attached to inner_tbl1 and will get evaluated
-        during materialization phase, when we don't have current value of
-        outer_tbl1.col.
-      */
-      TABLE_LIST *emb_nest= 
-        item_field->field->table->pos_in_table_list->embedding;
-      if (!item_const && emb_nest && emb_nest->sj_mat_info &&
-          emb_nest->sj_mat_info->is_used)
-      {
-        /* 
-          Find the first equal expression that refers to a table that is
-          within the semijoin nest. If we can't find it, do nothing
-        */
-        List_iterator<Item_field> fit(item_equal->fields);
-        Item_field *head_in_sjm;
-        bool found= FALSE;
-        while ((head_in_sjm= fit++))
-        {
-          if (head_in_sjm->used_tables() & emb_nest->sj_inner_tables)
-          {
-            if (head_in_sjm == item_field)
-            {
-              /* This is the first table inside the semi-join*/
-              eq_item= new Item_func_eq(item_field, head);
-              /* Tell make_cond_for_table don't use this. */
-              eq_item->marker=3;
-            }
-            else
-            {
-              eq_item= new Item_func_eq(item_field, head_in_sjm);
-              found= TRUE;
-            }
-            break;
-          }
-        }
-        if (!found)
-          continue;
-      }
-      else
-        eq_item= new Item_func_eq(item_field, head);
+      
+      eq_item= new Item_func_eq(item_field, current_sjm? current_sjm_head: head);
+
       if (!eq_item)
         return 0;
       eq_item->set_cmp_func();
       eq_item->quick_fix_field();
     }
+    current_sjm= field_sjm;
   }
 
   if (!cond && !eq_list.head())

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2010-03-05 18:54:48 +0000
+++ b/sql/sql_select.h	2010-03-13 20:04:52 +0000
@@ -279,6 +279,13 @@
   /* NestedOuterJoins: Bitmap of nested joins this table is part of */
   nested_join_map embedding_map;
 
+  /*
+    Semi-join strategy to be used for this join table. This is a copy of
+    POSITION::sj_strategy field. This field is set up by the
+    fix_semijion_strategies_for_picked_join_order.
+  */
+  uint sj_strategy;
+
   void cleanup();
   inline bool is_using_loose_index_scan()
   {