← Back to team overview

maria-developers team mailing list archive

[psergey@xxxxxxxxxxxx: Rev 2750: BUG#31480: Incorrect result for nested subquery when executed via semi join: in file:///home/psergey/dev/maria-5.3-subqueries-r3/]

 

Hello Igor,

I believe that the following patch has the property that 

  item_subselect->update_used_tables()

will handle table-bit re-assignments, i.e. it is what we have discussed earlier
today. I've also pushed this into 5.3-sj-subqueries tree.

----- Forwarded message from Sergey Petrunya <psergey@xxxxxxxxxxxx> -----

From: Sergey Petrunya <psergey@xxxxxxxxxxxx>
To: maria-developers@xxxxxxxxxxxxxxxxxxx
X-Mailer: mail (GNU Mailutils 1.2)
Date: Tue,  2 Feb 2010 23:00:49 +0300 (MSK)
Subject: [Maria-developers] Rev 2750: BUG#31480: Incorrect result for nested
	subquery when executed via semi join: in
	file:///home/psergey/dev/maria-5.3-subqueries-r3/

At file:///home/psergey/dev/maria-5.3-subqueries-r3/

------------------------------------------------------------
revno: 2750
revision-id: psergey@xxxxxxxxxxxx-20100202200045-13q0nb5dwzm739j6
parent: psergey@xxxxxxxxxxxx-20100128134833-9000udjp5wa3tsff
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.3-subqueries-r3
timestamp: Tue 2010-02-02 23:00:45 +0300
message:
  BUG#31480: Incorrect result for nested subquery when executed via semi join:
        
  A mark-2 fix that can survive FROM subquery handling and has some code 
  unification with table elimination:
        
  Each subquery predicate now stores a (flat) list of all references from inside 
  to outside the subquery. We actually store (select, referred_item) pairs which
  allows Item_subselect::fix_after_pullout() to recalculate subquery predicate's 
  attributes after a broad range of  FROM- and IN-subselect flattening operations.
=== modified file 'mysql-test/r/subselect_sj.result'
--- a/mysql-test/r/subselect_sj.result	2010-01-17 14:51:10 +0000
+++ b/mysql-test/r/subselect_sj.result	2010-02-02 20:00:45 +0000
@@ -779,3 +779,48 @@
 1	PRIMARY	it2	ALL	NULL	NULL	NULL	NULL	20	Using where; End temporary
 DROP TABLE ot1, it1, it2;
 # End of BUG#38075
+#
+# BUG#31480: Incorrect result for nested subquery when executed via semi join
+#
+create table t1 (a int not null, b int not null);
+create table t2 (c int not null, d int not null);
+create table t3 (e int not null);
+insert into t1 values (1,10);
+insert into t1 values (2,10);
+insert into t1 values (1,20);
+insert into t1 values (2,20);
+insert into t1 values (3,20);
+insert into t1 values (2,30);
+insert into t1 values (4,40);
+insert into t2 values (2,10);
+insert into t2 values (2,20);
+insert into t2 values (4,10);
+insert into t2 values (5,10);
+insert into t2 values (3,20);
+insert into t2 values (2,40);
+insert into t3 values (10);
+insert into t3 values (30);
+insert into t3 values (10);
+insert into t3 values (20);
+explain extended
+select a from t1
+where a in (select c from t2 where d >= some(select e from t3 where b=e));
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	6	100.00	Start temporary
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where; End temporary; Using join buffer
+3	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	4	100.00	Using where
+Warnings:
+Note	1276	Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`))))))
+show warnings;
+Level	Code	Message
+Note	1276	Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`))))))
+select a from t1
+where a in (select c from t2 where d >= some(select e from t3 where b=e));
+a
+2
+2
+3
+2
+drop table t1, t2, t3;

=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result	2010-01-17 14:51:10 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result	2010-02-02 20:00:45 +0000
@@ -783,6 +783,51 @@
 1	PRIMARY	it2	ALL	NULL	NULL	NULL	NULL	20	Using where; End temporary; Using join buffer
 DROP TABLE ot1, it1, it2;
 # End of BUG#38075
+#
+# BUG#31480: Incorrect result for nested subquery when executed via semi join
+#
+create table t1 (a int not null, b int not null);
+create table t2 (c int not null, d int not null);
+create table t3 (e int not null);
+insert into t1 values (1,10);
+insert into t1 values (2,10);
+insert into t1 values (1,20);
+insert into t1 values (2,20);
+insert into t1 values (3,20);
+insert into t1 values (2,30);
+insert into t1 values (4,40);
+insert into t2 values (2,10);
+insert into t2 values (2,20);
+insert into t2 values (4,10);
+insert into t2 values (5,10);
+insert into t2 values (3,20);
+insert into t2 values (2,40);
+insert into t3 values (10);
+insert into t3 values (30);
+insert into t3 values (10);
+insert into t3 values (20);
+explain extended
+select a from t1
+where a in (select c from t2 where d >= some(select e from t3 where b=e));
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	6	100.00	Start temporary
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where; End temporary; Using join buffer
+3	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	4	100.00	Using where
+Warnings:
+Note	1276	Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`))))))
+show warnings;
+Level	Code	Message
+Note	1276	Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`))))))
+select a from t1
+where a in (select c from t2 where d >= some(select e from t3 where b=e));
+a
+2
+2
+3
+2
+drop table t1, t2, t3;
 set join_cache_level=default;
 show variables like 'join_cache_level';
 Variable_name	Value

=== modified file 'mysql-test/t/subselect_sj.test'
--- a/mysql-test/t/subselect_sj.test	2010-01-17 14:51:10 +0000
+++ b/mysql-test/t/subselect_sj.test	2010-02-02 20:00:45 +0000
@@ -681,3 +681,41 @@
 DROP TABLE ot1, it1, it2;
 
 --echo # End of BUG#38075
+
+--echo #
+--echo # BUG#31480: Incorrect result for nested subquery when executed via semi join
+--echo #
+create table t1 (a int not null, b int not null);
+create table t2 (c int not null, d int not null);
+create table t3 (e int not null);
+
+insert into t1 values (1,10);
+insert into t1 values (2,10);
+insert into t1 values (1,20);
+insert into t1 values (2,20);
+insert into t1 values (3,20);
+insert into t1 values (2,30);
+insert into t1 values (4,40);
+
+insert into t2 values (2,10);
+insert into t2 values (2,20);
+insert into t2 values (4,10);
+insert into t2 values (5,10);
+insert into t2 values (3,20);
+insert into t2 values (2,40);
+
+insert into t3 values (10);
+insert into t3 values (30);
+insert into t3 values (10);
+insert into t3 values (20);
+
+explain extended
+select a from t1
+where a in (select c from t2 where d >= some(select e from t3 where b=e));
+show warnings;
+
+select a from t1
+where a in (select c from t2 where d >= some(select e from t3 where b=e));
+
+drop table t1, t2, t3;
+

=== modified file 'sql/item.cc'
--- a/sql/item.cc	2010-01-17 14:55:08 +0000
+++ b/sql/item.cc	2010-02-02 20:00:45 +0000
@@ -3646,7 +3646,7 @@
                          substitution)
 */
 
-static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
+static bool mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current,
                               Item_ident *resolved_item,
                               Item_ident *mark_item)
 {
@@ -3657,7 +3657,8 @@
   /* store pointer on SELECT_LEX from which item is dependent */
   if (mark_item)
     mark_item->depended_from= last;
-  current->mark_as_dependent(last, resolved_item);
+  if (current->mark_as_dependent(thd, last, resolved_item))
+    return TRUE;
   if (thd->lex->describe & DESCRIBE_EXTENDED)
   {
     push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
@@ -3667,6 +3668,7 @@
                  resolved_item->field_name,
                  current->select_number, last->select_number);
   }
+  return FALSE;
 }
 
 
@@ -4118,6 +4120,7 @@
                             ((ref_type == REF_ITEM || ref_type == FIELD_ITEM) ?
                              (Item_ident*) (*reference) :
                              0));
+                            
           /*
             A reference to a view field had been found and we
             substituted it instead of this Item (find_field_in_tables
@@ -6437,7 +6440,7 @@
   if (depended_from == new_parent)
   {
     *ref= outer_ref;
-    outer_ref->fix_after_pullout(new_parent, ref);
+    (*ref)->fix_after_pullout(new_parent, ref);
   }
 }
 

=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc	2010-01-28 13:48:33 +0000
+++ b/sql/item_subselect.cc	2010-02-02 20:00:45 +0000
@@ -39,8 +39,8 @@
 Item_subselect::Item_subselect():
   Item_result_field(), value_assigned(0), thd(0), substitution(0),
   engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0),
-  const_item_cache(1), in_fix_fields(0), engine_changed(0), changed(0),
-  is_correlated(FALSE)
+  const_item_cache(1), inside_first_fix_fields(0), done_first_fix_fields(FALSE),
+  engine_changed(0), changed(0), is_correlated(FALSE)
 {
   with_subselect= 1;
   reset();
@@ -167,18 +167,23 @@
 
   DBUG_ASSERT(fixed == 0);
   engine->set_thd((thd= thd_param));
-  if (!in_fix_fields)
-    refers_to.empty();
+  if (!done_first_fix_fields)
+  {
+    done_first_fix_fields= TRUE;
+    inside_first_fix_fields= TRUE;
+  }
+
   eliminated= FALSE;
+  parent_select= thd_param->lex->current_select;
 
   if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res))
     return TRUE;
   
-  in_fix_fields++;
   res= engine->prepare();
 
   // all transformation is done (used by prepared statements)
   changed= 1;
+  inside_first_fix_fields= FALSE;
 
   if (!res)
   {
@@ -210,14 +215,12 @@
       if (!(*ref)->fixed)
 	ret= (*ref)->fix_fields(thd, ref);
       thd->where= save_where;
-      in_fix_fields--;
       return ret;
     }
     // Is it one field subselect?
     if (engine->cols() > max_columns)
     {
       my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
-      in_fix_fields--;
       return TRUE;
     }
     fix_length_and_dec();
@@ -234,7 +237,6 @@
   fixed= 1;
 
 err:
-  in_fix_fields--;
   thd->where= save_where;
   return res;
 }
@@ -242,11 +244,12 @@
 
 bool Item_subselect::enumerate_field_refs_processor(uchar *arg)
 {
-  List_iterator<Item> it(refers_to);
-  Item *item;
-  while ((item= it++))
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  while ((upper= it++))
   {
-    if (item->walk(&Item::enumerate_field_refs_processor, FALSE, arg))
+    if (upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, arg))
       return TRUE;
   }
   return FALSE;
@@ -258,6 +261,115 @@
   return FALSE;
 }
 
+
+bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select, 
+                                       Item *item)
+{
+  if (inside_first_fix_fields)
+  {
+    is_correlated= TRUE;
+    Ref_to_outside *upper;
+    if (!(upper= new (thd->stmt_arena->mem_root) Ref_to_outside()))
+      return TRUE;
+    upper->select= select;
+    upper->item= item;
+    if (upper_refs.push_back(upper, thd->stmt_arena->mem_root))
+      return TRUE;
+  }
+  return FALSE;
+}
+
+/*
+  Adjust attributes after our parent select has been merged into grandparent
+
+  DESCRIPTION
+    Subquery is a composite object which may be correlated, that is, it may
+    have
+    1. references to tables of the parent select (i.e. one that has the clause
+      with the subquery predicate)
+    2. references to tables of the grandparent select
+    3. references to tables of further ancestors.
+    
+    Before the pullout, this item indicates:
+    - #1 with table bits in used_tables()
+    - #2 and #3 with OUTER_REF_TABLE_BIT.
+
+    After parent has been merged with grandparent:
+    - references to parent and grandparent tables should be indicated with 
+      table bits.
+    - references to greatgrandparent and further ancestors - with
+      OUTER_REF_TABLE_BIT.
+*/
+
+void Item_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  recalc_used_tables(new_parent, TRUE);
+  parent_select= new_parent;
+}
+
+
+/*
+  Recalculate used_tables_cache 
+*/
+
+void Item_subselect::recalc_used_tables(st_select_lex *new_parent, 
+                                        bool after_pullout)
+{
+  List_iterator<Ref_to_outside> it(upper_refs);
+  Ref_to_outside *upper;
+  
+  used_tables_cache= 0;
+  while ((upper= it++))
+  {
+    bool found= FALSE;
+    /*
+      Check if
+        1. the upper reference refers to the new immediate parent select, or
+        2. one of the further ancestors.
+
+      We rely on the fact that the tree of selects is modified by some kind of
+      'flattening', i.e. a process where child selects are merged into their
+      parents.
+      The merged selects are removed from the select tree but keep pointers to
+      their parents.
+    */
+    for (st_select_lex *sel= upper->select; sel; sel= sel->outer_select())
+    {
+      /* 
+        If we've reached the new parent select by walking upwards from
+        reference's original select, this means that the reference is now 
+        referring to the direct parent:
+      */
+      if (sel == new_parent)
+      {
+        found= TRUE;
+        /* 
+          upper->item may be NULL when we've referred to a grouping function,
+          in which case we don't care about what it's table_map really is,
+          because item->with_sum_func==1 will ensure correct placement of the
+          item.
+        */
+        if (upper->item)
+        {
+          if (after_pullout)
+            upper->item->fix_after_pullout(new_parent, &(upper->item));
+          upper->item->update_used_tables();
+          used_tables_cache |= upper->item->used_tables();
+        }
+      }
+    }
+    if (!found)
+      used_tables_cache|= OUTER_REF_TABLE_BIT;
+  }
+  /* 
+    Don't update const_tables_cache yet as we don't yet know which of the
+    parent's tables are constant. Parent will call update_used_tables() after
+    he has done const table detection, and that will be our chance to update
+    const_tables_cache.
+  */
+}
+
+
 bool Item_subselect::walk(Item_processor processor, bool walk_subquery,
                           uchar *argument)
 {
@@ -397,6 +509,7 @@
 
 void Item_subselect::update_used_tables()
 {
+  recalc_used_tables(parent_select, FALSE);
   if (!engine->uncacheable())
   {
     // did all used tables become static?
@@ -1843,6 +1956,18 @@
   return result || Item_subselect::fix_fields(thd_arg, ref);
 }
 
+void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref)
+{
+  left_expr->fix_after_pullout(new_parent, &left_expr);
+  Item_subselect::fix_after_pullout(new_parent, ref);
+}
+
+void Item_in_subselect::update_used_tables()
+{
+  Item_subselect::update_used_tables();
+  left_expr->update_used_tables();
+  used_tables_cache |= left_expr->used_tables();
+}
 
 /**
   Try to create an engine to compute the subselect via materialization,

=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h	2010-01-28 13:48:33 +0000
+++ b/sql/item_subselect.h	2010-02-02 20:00:45 +0000
@@ -67,14 +67,32 @@
   bool have_to_be_excluded;
   /* cache of constant state */
   bool const_item_cache;
-
+  
+  bool inside_first_fix_fields;
+  bool done_first_fix_fields;
 public:
-  /* 
-    References from inside the subquery to the select that this predicate is
-    in.  References to parent selects not included.
+  /* A reference from inside subquery predicate to somewhere outside of it */
+  class Ref_to_outside : public Sql_alloc
+  {
+  public:
+    st_select_lex *select; /* Select where the reference is pointing to */
+    /* 
+      What is being referred. This may be NULL when we're referring to an
+      aggregate function.
+    */ 
+    Item *item; 
+  };
+  /*
+    References from within this subquery to somewhere outside of it (i.e. to
+    parent select, grandparent select, etc)
   */
-  List<Item> refers_to;
-  int in_fix_fields;
+  List<Ref_to_outside> upper_refs;
+  st_select_lex *parent_select;
+  
+ /*
+   TRUE<=>Table Elimination has made it redundant to evaluate this select
+          (and so it is not part of QEP, etc)
+ */
   bool eliminated;
   
   /* changed engine indicator */
@@ -117,6 +135,9 @@
     return null_value;
   }
   bool fix_fields(THD *thd, Item **ref);
+  bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
   virtual bool exec();
   virtual void fix_length_and_dec();
   table_map used_tables() const;
@@ -396,6 +417,8 @@
   bool test_limit(st_select_lex_unit *unit);
   virtual void print(String *str, enum_query_type query_type);
   bool fix_fields(THD *thd, Item **ref);
+  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
+  void update_used_tables();
   bool setup_engine();
   bool init_left_expr_cache();
   bool is_expensive_processor(uchar *arg);

=== modified file 'sql/item_sum.cc'
--- a/sql/item_sum.cc	2009-10-15 21:38:29 +0000
+++ b/sql/item_sum.cc	2010-02-02 20:00:45 +0000
@@ -350,7 +350,7 @@
          sl= sl->master_unit()->outer_select() )
       sl->master_unit()->item->with_sum_func= 1;
   }
-  thd->lex->current_select->mark_as_dependent(aggr_sel, NULL);
+  thd->lex->current_select->mark_as_dependent(thd, aggr_sel, NULL);
   return FALSE;
 }
 

=== modified file 'sql/sql_lex.cc'
--- a/sql/sql_lex.cc	2010-01-28 13:48:33 +0000
+++ b/sql/sql_lex.cc	2010-02-02 20:00:45 +0000
@@ -1841,9 +1841,8 @@
     'last' should be reachable from this st_select_lex_node
 */
 
-void st_select_lex::mark_as_dependent(st_select_lex *last, Item *dependency)
+bool st_select_lex::mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency)
 {
-  SELECT_LEX *next_to_last;
   /*
     Mark all selects from resolved to 1 before select where was
     found table as depended (of select where was found table)
@@ -1867,12 +1866,15 @@
           sl->uncacheable|= UNCACHEABLE_UNITED;
       }
     }
-    next_to_last= s;
+
+    Item_subselect *subquery_expr= s->master_unit()->item;
+    if (subquery_expr && subquery_expr->mark_as_dependent(thd, last, 
+                                                          dependency))
+      return TRUE;
   }
   is_correlated= TRUE;
   this->master_unit()->item->is_correlated= TRUE;
-  if (dependency)
-    next_to_last->master_unit()->item->refers_to.push_back(dependency);
+  return FALSE;
 }
 
 bool st_select_lex_node::set_braces(bool value)      { return 1; }

=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h	2010-01-28 13:48:33 +0000
+++ b/sql/sql_lex.h	2010-02-02 20:00:45 +0000
@@ -747,7 +747,7 @@
     return master_unit()->return_after_parsing();
   }
 
-  void mark_as_dependent(st_select_lex *last, Item *dependency);
+  bool mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency);
 
   bool set_braces(bool value);
   bool inc_in_sum_expr();

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2010-01-28 13:48:33 +0000
+++ b/sql/sql_select.h	2010-02-02 20:00:45 +0000
@@ -282,13 +282,11 @@
   }
   bool check_rowid_field()
   {
-/* !!!NB igor: enable the code in this comment after backporting the SJ code 
     if (keep_current_rowid && !used_rowid_fields)
     {
       used_rowid_fields= 1;
       used_fieldlength+= table->file->ref_length;
     }
-*/    
     return test(used_rowid_fields);
   }
   bool is_inner_table_of_semi_join_with_first_match()


_______________________________________________
Mailing list: https://launchpad.net/~maria-developers
Post to     : maria-developers@xxxxxxxxxxxxxxxxxxx
Unsubscribe : https://launchpad.net/~maria-developers
More help   : https://help.launchpad.net/ListHelp

----- End forwarded message -----

-- 
BR
 Sergey
-- 
Sergey Petrunia, Software Developer
Monty Program AB, http://askmonty.org
Blog: http://s.petrunia.net/blog