← Back to team overview

maria-developers team mailing list archive

Rev 2820: Apply Evgen's fix: in file:///home/psergey/dev/mysql-next-fix-subq-r2/

 

At file:///home/psergey/dev/mysql-next-fix-subq-r2/

------------------------------------------------------------
revno: 2820
revision-id: psergey@xxxxxxxxxxxx-20090729161849-ynumr03ety244ueu
parent: psergey@xxxxxxxxxxxx-20090708174703-dz9uf5b0m6pcvtl6
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: mysql-next-fix-subq-r2
timestamp: Wed 2009-07-29 20:18:49 +0400
message:
  Apply Evgen's fix: 
  
  Bug#45174: Incorrectly applied equality propagation caused wrong result
  on a query with a materialized semi-join.
  
  Equality propagation is done after query execution plan is chosen. It
  substitutes fields from tables being retrieved later for fields from tables
  being retrieved earlier. Materialized semi-joins are exception to this rule.
  For field which belongs to a table within a materialized semi-join, we can
  only pick fields from the same semi-join.
  
  Example: suppose we have a join order:
  
     ot1 ot2  SJ-Mat(it1  it2  it3)  ot3
  
  and equality ot2.col = it1.col = it2.col
  If we're looking for best substitute for 'it2.col', we should pick it1.col
  and not ot2.col.
  
  For a field that is not in a materialized semi-join we must pick a field
  that's not embedded in a materialized semi-join.
  
  Example: suppose we have a join order:
  
      SJ-Mat(it1  it2)  ot1  ot2
  
  and equality ot2.col = ot1.col = it2.col
  If we're looking for best substitute for 'ot2.col', we should pick ot1.col
  and not it2.col, because when we run a join between ot1 and ot2
  execution of SJ-Mat(...) has already finished and we can't rely on the value
  of it*.*.
  
  Now the Item_equal::get_first function accepts as a parameter a field being
  substituted and checks whether it belongs to a materialized semi-join.
  Depending on the check result a field to substitute for or  NULL is returned.
  
  The is_sj_materialization_strategy method is added to the JOIN_TAB class to
  check whether JOIN_TAB belongs to a materialized semi-join.
=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result	2009-04-30 19:37:21 +0000
+++ b/mysql-test/r/subselect3.result	2009-07-29 16:18:49 +0000
@@ -1081,8 +1081,8 @@
 insert into t3 select A.a + 10*B.a, 'filler' from t0 A, t0 B;
 explain select * from t3 where a in (select a from t2) and (a > 5 or a < 10);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	2	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t2.a	1	
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	2	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t2.a	1	Using index condition
 select * from t3 where a in (select a from t2);
 a	filler
 1	filler
@@ -1129,8 +1129,8 @@
 explain select * from t1, t3 where t3.a in (select a from t2) and (t3.a < 10 or t3.a >30) and t1.a =3;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	Using where
-1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	Using index condition
 explain select straight_join * from t1 A, t1 B where A.a in (select a from t2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	A	ALL	NULL	NULL	NULL	NULL	10	Using where
@@ -1158,14 +1158,14 @@
 explain select * from t0, t3 where t3.a in (select a from t2) and (t3.a < 10 or t3.a >30);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t0	system	NULL	NULL	NULL	NULL	1	
-1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	Using index condition
 create table t4 as select a as x, a as y from t1;
 explain select * from t0, t3 where (t3.a, t3.b) in (select x,y from t4) and (t3.a < 10 or t3.a >30);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t0	system	NULL	NULL	NULL	NULL	1	
-1	PRIMARY	t4	ALL	NULL	NULL	NULL	NULL	10	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t4.x	10	Using where
+1	PRIMARY	t4	ALL	NULL	NULL	NULL	NULL	10	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t4.x	10	Using index condition; Using where
 drop table t0,t1,t2,t3,t4;
 create table t0 (a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

=== modified file 'mysql-test/r/subselect3_jcl6.result'
--- a/mysql-test/r/subselect3_jcl6.result	2009-04-30 19:37:21 +0000
+++ b/mysql-test/r/subselect3_jcl6.result	2009-07-29 16:18:49 +0000
@@ -1086,8 +1086,8 @@
 insert into t3 select A.a + 10*B.a, 'filler' from t0 A, t0 B;
 explain select * from t3 where a in (select a from t2) and (a > 5 or a < 10);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	2	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t2.a	1	Using join buffer
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	2	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t2.a	1	Using index condition; Using join buffer
 select * from t3 where a in (select a from t2);
 a	filler
 1	filler
@@ -1134,8 +1134,8 @@
 explain select * from t1, t3 where t3.a in (select a from t2) and (t3.a < 10 or t3.a >30) and t1.a =3;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	Using where
-1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	Using join buffer
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	Using index condition; Using join buffer
 explain select straight_join * from t1 A, t1 B where A.a in (select a from t2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	A	ALL	NULL	NULL	NULL	NULL	10	Using where
@@ -1163,14 +1163,14 @@
 explain select * from t0, t3 where t3.a in (select a from t2) and (t3.a < 10 or t3.a >30);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t0	system	NULL	NULL	NULL	NULL	1	
-1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	Using join buffer
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t2.a	10	Using index condition; Using join buffer
 create table t4 as select a as x, a as y from t1;
 explain select * from t0, t3 where (t3.a, t3.b) in (select x,y from t4) and (t3.a < 10 or t3.a >30);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t0	system	NULL	NULL	NULL	NULL	1	
-1	PRIMARY	t4	ALL	NULL	NULL	NULL	NULL	10	Using where; Materialize; Scan
-1	PRIMARY	t3	ref	a	a	5	test.t4.x	10	Using where; Using join buffer
+1	PRIMARY	t4	ALL	NULL	NULL	NULL	NULL	10	Materialize; Scan
+1	PRIMARY	t3	ref	a	a	5	test.t4.x	10	Using index condition; Using where; Using join buffer
 drop table t0,t1,t2,t3,t4;
 create table t0 (a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

=== modified file 'mysql-test/r/subselect_sj.result'
--- a/mysql-test/r/subselect_sj.result	2009-07-06 07:57:39 +0000
+++ b/mysql-test/r/subselect_sj.result	2009-07-29 16:18:49 +0000
@@ -372,3 +372,39 @@
 3
 2
 drop table t1, t2, t3;
+#
+# Bug#45174: Incorrectly applied equality propagation caused wrong
+# result on a query with a materialized semi-join.
+#
+CREATE TABLE `CC` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`varchar_key` varchar(1) NOT NULL,
+`varchar_nokey` varchar(1) NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `varchar_key` (`varchar_key`)
+);
+INSERT INTO `CC` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+CREATE TABLE `C` (
+`varchar_nokey` varchar(1) NOT NULL
+);
+INSERT INTO `C` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM C  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM CC  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	C	ALL	NULL	NULL	NULL	NULL	18	100.00	
+1	PRIMARY	CC	ALL	varchar_key	NULL	NULL	NULL	15	100.00	Using where; Materialize
+Warnings:
+Note	1003	select `test`.`C`.`varchar_nokey` AS `varchar_nokey` from `test`.`C` semi join (`test`.`CC`) where ((`test`.`CC`.`varchar_key` = `test`.`C`.`varchar_nokey`) and (`test`.`CC`.`varchar_nokey` = `test`.`CC`.`varchar_key`) and ((`test`.`CC`.`varchar_nokey` < 'n') xor `test`.`CC`.`pk`))
+SELECT varchar_nokey
+FROM C  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM CC  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+varchar_nokey
+DROP TABLE CC, C;
+# End of the test for bug#45174.

=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result	2009-07-06 07:57:39 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result	2009-07-29 16:18:49 +0000
@@ -376,6 +376,42 @@
 3
 2
 drop table t1, t2, t3;
+#
+# Bug#45174: Incorrectly applied equality propagation caused wrong
+# result on a query with a materialized semi-join.
+#
+CREATE TABLE `CC` (
+`pk` int(11) NOT NULL AUTO_INCREMENT,
+`varchar_key` varchar(1) NOT NULL,
+`varchar_nokey` varchar(1) NOT NULL,
+PRIMARY KEY (`pk`),
+KEY `varchar_key` (`varchar_key`)
+);
+INSERT INTO `CC` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+CREATE TABLE `C` (
+`varchar_nokey` varchar(1) NOT NULL
+);
+INSERT INTO `C` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM C  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM CC  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	C	ALL	NULL	NULL	NULL	NULL	18	100.00	
+1	PRIMARY	CC	ALL	varchar_key	NULL	NULL	NULL	15	100.00	Using where; Materialize
+Warnings:
+Note	1003	select `test`.`C`.`varchar_nokey` AS `varchar_nokey` from `test`.`C` semi join (`test`.`CC`) where ((`test`.`CC`.`varchar_key` = `test`.`C`.`varchar_nokey`) and (`test`.`CC`.`varchar_nokey` = `test`.`CC`.`varchar_key`) and ((`test`.`CC`.`varchar_nokey` < 'n') xor `test`.`CC`.`pk`))
+SELECT varchar_nokey
+FROM C  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM CC  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+varchar_nokey
+DROP TABLE CC, C;
+# End of the test for bug#45174.
 set join_cache_level=default;
 show variables like 'join_cache_level';
 Variable_name	Value

=== modified file 'mysql-test/t/subselect_sj.test'
--- a/mysql-test/t/subselect_sj.test	2009-07-06 07:57:39 +0000
+++ b/mysql-test/t/subselect_sj.test	2009-07-29 16:18:49 +0000
@@ -22,7 +22,6 @@
 create table t12 like t10;
 insert into t12 select * from t10;
 
-
 --echo Flattened because of dependency, t10=func(t1)
 explain select * from t1 where a in (select pk from t10);
 select * from t1 where a in (select pk from t10);
@@ -252,3 +251,43 @@
 where a in (select c from t2 where d >= some(select e from t3 where b=e));
 
 drop table t1, t2, t3;
+
+--echo #
+--echo # Bug#45174: Incorrectly applied equality propagation caused wrong
+--echo # result on a query with a materialized semi-join.
+--echo #
+
+CREATE TABLE `CC` (
+  `pk` int(11) NOT NULL AUTO_INCREMENT,
+  `varchar_key` varchar(1) NOT NULL,
+  `varchar_nokey` varchar(1) NOT NULL,
+  PRIMARY KEY (`pk`),
+  KEY `varchar_key` (`varchar_key`)
+);
+
+INSERT INTO `CC` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p');
+
+CREATE TABLE `C` (
+  `varchar_nokey` varchar(1) NOT NULL
+);
+
+INSERT INTO `C` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u');
+
+EXPLAIN EXTENDED SELECT varchar_nokey
+FROM C  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM CC  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+
+SELECT varchar_nokey
+FROM C  
+WHERE ( `varchar_nokey`  , `varchar_nokey`  )  IN (  
+SELECT `varchar_key`  , `varchar_nokey`  
+FROM CC  
+WHERE `varchar_nokey`  < 'n' XOR `pk`  )   ;
+
+DROP TABLE CC, C;
+
+--echo # End of the test for bug#45174.
+

=== modified file 'sql/item.cc'
--- a/sql/item.cc	2009-07-06 07:57:39 +0000
+++ b/sql/item.cc	2009-07-29 16:18:49 +0000
@@ -4895,7 +4895,7 @@
         return this;
       return const_item;
     }
-    Item_field *subst= item_equal->get_first();
+    Item_field *subst= item_equal->get_first(this);
     if (subst && field->table != subst->field->table && !field->eq(subst->field))
       return subst;
   }

=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc	2009-07-06 07:57:39 +0000
+++ b/sql/item_cmpfunc.cc	2009-07-29 16:18:49 +0000
@@ -5377,7 +5377,7 @@
 
 void Item_equal::fix_length_and_dec()
 {
-  Item *item= get_first();
+  Item *item= get_first(NULL);
   eval_item= cmp_item::get_comparator(item->result_type(),
                                       item->collation.collation);
 }
@@ -5440,3 +5440,107 @@
   str->append(')');
 }
 
+
+/*
+  @brief Get the first field of multiple equality.
+  @param[in] field   the field to get equal field to
+
+  @details Get the first field of multiple equality that is equal to the
+  given field. In order to make semi-join materialization strategy work
+  correctly we can't propagate equal fields from upper select to the semi-join.
+  Thus the fields is returned according to following rules:
+
+  1) If the given field belongs to a semi-join then the first field in
+     multiple equality which belong to the same semi-join is returned.
+     Otherwise NULL is returned.
+  2) If no field is given or the field doesn't belong to a semi-join then
+     the first field in the multiple equality is returned.
+
+  @retval Found first field in the multiple equality.
+  @retval 0 if no field found.
+*/
+
+Item_field* Item_equal::get_first(Item_field *field)
+{
+  List_iterator<Item_field> it(fields);
+  Item_field *item;
+  JOIN_TAB *field_tab;
+
+  if (!field)
+    return fields.head();
+  /*
+    Of all equal fields, return the first one we can use. Normally, this is the
+    field which belongs to the table that is the first in the join order.
+
+    There is one exception to this: When semi-join materialization strategy is
+    used, and the given field belongs to a table within the semi-join nest, we
+    must pick the first field in the semi-join nest.
+
+    Example: suppose we have a join order:
+
+       ot1 ot2  SJ-Mat(it1  it2  it3)  ot3
+
+    and equality ot2.col = it1.col = it2.col
+    If we're looking for best substitute for 'it2.col', we should pick it1.col
+    and not ot2.col.
+  */
+
+  field_tab= field->field->table->reginfo.join_tab;
+  if (field_tab->is_sj_materialization_strategy())
+  {
+    /*
+      It's a field from an materialized semi-join. We can substitute it only
+      for a field from the same semi-join.
+    */
+    JOIN_TAB *first;
+    JOIN *join= field_tab->join;
+    uint tab_idx= field_tab - field_tab->join->join_tab;
+    /* Find first table of this semi-join. */
+    for (int i=tab_idx; i >= join->const_tables; i--)
+    {
+      if (join->best_positions[i].sj_strategy == SJ_OPT_MATERIALIZE ||
+          join->best_positions[i].sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
+        first= join->join_tab + i;
+      else
+        // Found first tab that doesn't belong to current SJ.
+        break;
+    }
+    /* Find an item to substitute for. */
+    while ((item= it++))
+    {
+      if (item->field->table->reginfo.join_tab >= first)
+      {
+        /*
+          If we found given field then return NULL to avoid unnecessary
+          substitution.
+        */
+        return (item != field) ? item : NULL;
+      }
+    }
+  }
+  else
+  {
+    /*
+      The field is not in SJ-Materialization nest. We must return the first field
+      that's not embedded in a SJ-Materialization nest.
+      Example: suppose we have a join order:
+
+          SJ-Mat(it1  it2)  ot1  ot2
+
+      and equality ot2.col = ot1.col = it2.col
+      If we're looking for best substitute for 'ot2.col', we should pick ot1.col
+      and not it2.col, because when we run a join between ot1 and ot2
+      execution of SJ-Mat(...) has already finished and we can't rely on the
+      value of it*.*.
+    */
+    while ((item= it++))
+    {
+      field_tab= item->field->table->reginfo.join_tab;
+      if (!field_tab->is_sj_materialization_strategy())
+        return item;
+    }
+  }
+  // Shouldn't get here.
+  DBUG_ASSERT(0);
+  return NULL;
+}

=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h	2009-07-06 07:57:39 +0000
+++ b/sql/item_cmpfunc.h	2009-07-29 16:18:49 +0000
@@ -1593,7 +1593,7 @@
   void add(Item_field *f);
   uint members();
   bool contains(Field *field);
-  Item_field* get_first() { return fields.head(); }
+  Item_field* get_first(Item_field *field);
   void merge(Item_equal *item);
   void update_const();
   enum Functype functype() const { return MULT_EQUAL_FUNC; }

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2009-07-06 14:33:29 +0000
+++ b/sql/sql_select.cc	2009-07-29 16:18:49 +0000
@@ -10379,6 +10379,21 @@
 
 
 /**
+  Check whether the JOIN_TAB belongs to a materialized semi-join.
+*/
+
+bool JOIN_TAB::is_sj_materialization_strategy()
+{
+  uint tab_idx= this - join->join_tab;
+
+  return (emb_sj_nest &&
+          ((join->best_positions[tab_idx].sj_strategy == SJ_OPT_MATERIALIZE ||
+            join->best_positions[tab_idx].sj_strategy == SJ_OPT_MATERIALIZE_SCAN)));
+
+}
+
+
+/**
   Partially cleanup JOIN after it has executed: close index or rnd read
   (table cursors), free quick selects.
 
@@ -11720,7 +11735,7 @@
     head= item_const;
   else
   {
-    head= item_equal->get_first();
+    head= item_equal->get_first(NULL);
     it++;
   }
   Item_field *item_field;

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2009-05-07 20:48:24 +0000
+++ b/sql/sql_select.h	2009-07-29 16:18:49 +0000
@@ -332,6 +332,7 @@
       return first_inner;
     return first_sj_inner_tab; 
   }
+  bool is_sj_materialization_strategy();
 } JOIN_TAB;
 
 /*