← Back to team overview

maria-developers team mailing list archive

Rev 2743: MWL#17: Table elimination: last fixes in file:///home/psergey/dev/maria-5.1-table-elim-r11/

 

At file:///home/psergey/dev/maria-5.1-table-elim-r11/

------------------------------------------------------------
revno: 2743
revision-id: psergey@xxxxxxxxxxxx-20090824191048-xev0gm1miw6ezz2r
parent: psergey@xxxxxxxxxxxx-20090824081242-32o90vv8awk27sut
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.1-table-elim-r11
timestamp: Mon 2009-08-24 22:10:48 +0300
message:
  MWL#17: Table elimination: last fixes
  - Add an @@optimizer_switch flag for table_elimination for debug build
  - Better comments 
=== modified file 'mysql-test/t/index_merge_myisam.test'
--- a/mysql-test/t/index_merge_myisam.test	2009-03-14 18:58:23 +0000
+++ b/mysql-test/t/index_merge_myisam.test	2009-08-24 19:10:48 +0000
@@ -25,15 +25,19 @@
 --echo #  we get another @@optimizer_switch user)
 --echo #
 
+--replace_regex /,table_elimination=on//
 select @@optimizer_switch;
 
 set optimizer_switch='index_merge=off,index_merge_union=off';
+--replace_regex /,table_elimination=on//
 select @@optimizer_switch;
 
 set optimizer_switch='index_merge_union=on';
+--replace_regex /,table_elimination=on//
 select @@optimizer_switch;
 
 set optimizer_switch='default,index_merge_sort_union=off';
+--replace_regex /,table_elimination=on//
 select @@optimizer_switch;
 
 --error ER_WRONG_VALUE_FOR_VAR
@@ -71,17 +75,21 @@
 
 set optimizer_switch=default;
 set optimizer_switch='index_merge=off,index_merge_union=off,default';
+--replace_regex /,table_elimination=on//
 select @@optimizer_switch;
 set optimizer_switch=default;
 
 # Check setting defaults for global vars
+--replace_regex /,table_elimination=on//
 select @@global.optimizer_switch;
 set @@global.optimizer_switch=default;
+--replace_regex /,table_elimination=on//
 select @@global.optimizer_switch;
 
 --echo #
 --echo # Check index_merge's @@optimizer_switch flags
 --echo #
+--replace_regex /,table_elimination.on//
 select @@optimizer_switch;
 
 create table t0 (a int);
@@ -182,6 +190,7 @@
 explain select * from t1 where a=10 and b=10 or c=10;
 
 set optimizer_switch=default;
+--replace_regex /,table_elimination.on//
 show variables like 'optimizer_switch';
 
 drop table t0, t1;

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2009-04-25 10:05:32 +0000
+++ b/sql/mysql_priv.h	2009-08-24 19:10:48 +0000
@@ -528,14 +528,27 @@
 #define OPTIMIZER_SWITCH_INDEX_MERGE_UNION 2
 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
-#define OPTIMIZER_SWITCH_LAST 16
-
+
+#ifdef DBUG_OFF
+#  define OPTIMIZER_SWITCH_LAST 16
+#else
+#  define OPTIMIZER_SWITCH_TABLE_ELIMINATION 16
+#  define OPTIMIZER_SWITCH_LAST 32
+#endif
+
+#ifdef DBUG_OFF 
 /* The following must be kept in sync with optimizer_switch_str in mysqld.cc */
-#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
-                                  OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT)
-
+#  define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT)
+#else 
+#  define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
+                                    OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \
+                                    OPTIMIZER_SWITCH_TABLE_ELIMINATION)
+#endif
 
 /*
   Replication uses 8 bytes to store SQL_MODE in the binary log. The day you

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2009-05-19 09:28:05 +0000
+++ b/sql/mysqld.cc	2009-08-24 19:10:48 +0000
@@ -297,9 +297,14 @@
 
 static const char *optimizer_switch_names[]=
 {
-  "index_merge","index_merge_union","index_merge_sort_union", 
-  "index_merge_intersection", "default", NullS
+  "index_merge","index_merge_union","index_merge_sort_union",
+  "index_merge_intersection",
+#ifndef DBUG_OFF
+  "table_elimination",
+#endif
+  "default", NullS
 };
+
 /* Corresponding defines are named OPTIMIZER_SWITCH_XXX */
 static const unsigned int optimizer_switch_names_len[]=
 {
@@ -307,6 +312,9 @@
   sizeof("index_merge_union") - 1,
   sizeof("index_merge_sort_union") - 1,
   sizeof("index_merge_intersection") - 1,
+#ifndef DBUG_OFF
+  sizeof("table_elimination") - 1,
+#endif
   sizeof("default") - 1
 };
 TYPELIB optimizer_switch_typelib= { array_elements(optimizer_switch_names)-1,"",
@@ -382,7 +390,10 @@
 /* Text representation for OPTIMIZER_SWITCH_DEFAULT */
 static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
                                         "index_merge_sort_union=on,"
-                                        "index_merge_intersection=on";
+                                        "index_merge_intersection=on"
+#ifndef DBUG_OFF                                        
+                                        ",table_elimination=on";
+#endif
 static char *mysqld_user, *mysqld_chroot, *log_error_file_ptr;
 static char *opt_init_slave, *language_ptr, *opt_init_connect;
 static char *default_character_set_name;
@@ -6929,8 +6940,11 @@
    0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0},
   {"optimizer_switch", OPT_OPTIMIZER_SWITCH,
    "optimizer_switch=option=val[,option=val...], where option={index_merge, "
-   "index_merge_union, index_merge_sort_union, index_merge_intersection} and "
-   "val={on, off, default}.",
+   "index_merge_union, index_merge_sort_union, index_merge_intersection"
+#ifndef DBUG_OFF
+   ", table_elimination"
+#endif 
+   "} and val={on, off, default}.",
    (uchar**) &optimizer_switch_str, (uchar**) &optimizer_switch_str, 0, GET_STR, REQUIRED_ARG, 
    /*OPTIMIZER_SWITCH_DEFAULT*/0,
    0, 0, 0, 0, 0},

=== modified file 'sql/opt_table_elimination.cc'
--- a/sql/opt_table_elimination.cc	2009-08-24 08:12:42 +0000
+++ b/sql/opt_table_elimination.cc	2009-08-24 19:10:48 +0000
@@ -18,6 +18,7 @@
 
 /*
   OVERVIEW
+  ========
 
   This file contains table elimination module. The idea behind table
   elimination is as follows: suppose we have a left join
@@ -36,7 +37,9 @@
   null-complemented one) and we don't care about what that record combination 
   is.
 
+
   MODULE INTERFACE
+  ================
 
   The module has one entry point - eliminate_tables() function, which one 
   needs to call (once) at some point before the join optimization.
@@ -58,23 +61,25 @@
 
   Table elimination is redone on every PS re-execution.
 
+
   TABLE ELIMINATION ALGORITHM FOR ONE OUTER JOIN
+  ==============================================
 
-  As said above, we can remove inner side of an outer join if it is 
+  As described above, we can remove inner side of an outer join if it is 
 
     1. not referred to from any other parts of the query
     2. always produces one matching record combination.
 
-  We check #1 by doing a recursive descent down the join->join_list while 
-  maintaining a union of used_tables() attribute of all expressions we've seen
-  "elsewhere". When we encounter an outer join, we check if the bitmap of 
-  tables on its inner side has an intersection with tables that are used 
+  We check #1 by doing a recursive descent down the join->join_list while
+  maintaining a union of used_tables() attribute of all Item expressions in
+  other parts of the query. When we encounter an outer join, we check if the
+  bitmap of tables on its inner side has intersection with tables that are used
   elsewhere. No intersection means that inner side of the outer join could 
   potentially be eliminated.
 
   In order to check #2, one needs to prove that inner side of an outer join 
-  is functionally dependent on the outside. We prove dependency by proving
-  functional dependency of intermediate objects:
+  is functionally dependent on the outside. The proof is constructed from
+  functional dependencies of intermediate objects:
 
   - Inner side of outer join is functionally dependent when each of its tables
     are functionally dependent. (We assume a table is functionally dependent 
@@ -91,14 +96,15 @@
     
     where expr is functionally-depdendent.
 
-  Apparently the above rules can be applied recursively. Also, certain entities
-  depend on multiple other entities. We model this by a bipartite graph which
-  has two kinds of nodes:
+  These relationships are modeled as a bipartite directed graph that has
+  dependencies as edges and two kinds of nodes:
 
   Value nodes:
    - Table column values (each is a value of tblX.columnY)
-   - Table nodes (each node represents a table inside an eliminable join nest).
-  each value is either bound (i.e. functionally dependent) or not.
+   - Table values (each node represents a table inside the join nest we're
+     trying to eliminate).
+  A value has one attribute, it is either bound (i.e. functionally dependent) 
+  or not.
 
   Module nodes:
    - Modules representing tblX.colY=expr equalities. Equality module has 
@@ -118,11 +124,54 @@
   (their expressions are either constant or depend only on tables that are
   outside of the outer join in question) and performns a breadth-first
   traversal. If we reach the outer join nest node, it means outer join is
-  functionally-dependant and can be eliminated. Otherwise it cannot.
+  functionally-dependant and can be eliminated. Otherwise it cannot be.
  
-  HANDLING MULTIPLE NESTED OUTER JOINS 
-  (TODO : explanations why 'local bottom up is sufficient')
-
+  HANDLING MULTIPLE NESTED OUTER JOINS
+  ====================================
+
+  Outer joins that are not nested one within another are eliminated
+  independently. For nested outer joins we have the following considerations:
+  
+  1. ON expressions from children outer joins must be taken into account 
+   
+  Consider this example:
+
+    SELECT t0.* 
+    FROM 
+      t0  
+    LEFT JOIN 
+      (t1 LEFT JOIN t2 ON t2.primary_key=t1.col1)
+    ON 
+      t1.primary_key=t0.col AND t2.col1=t1.col2
+
+  Here we cannot eliminate the "... LEFT JOIN t2 ON ..." part alone because the
+  ON clause of top level outer join has references to table t2. 
+  We can eliminate the entire  "... LEFT JOIN (t1 LEFT JOIN t2) ON .." part,
+  but in order to do that, we must look at both ON expressions.
+  
+  2. ON expressions of parent outer joins are useless.
+  Consider an example:
+
+    SELECT t0.* 
+    FROM
+      t0 
+    LEFT JOIN 
+      (t1 LEFT JOIN t2 ON some_expr)
+    ON
+      t2.primary_key=t1.col  -- (*)
+  
+  Here the uppermost ON expression has a clause that gives us functional
+  dependency of table t2 on t1 and hence could be used to eliminate the
+  "... LEFT JOIN t2 ON..." part.
+  However, we would not actually encounter this situation, because before the
+  table elimination we run simplify_joins(), which, among other things, upon
+  seeing a functional dependency condition like (*) will convert the outer join
+  of
+    
+    "... LEFT JOIN t2 ON ..."
+  
+  into inner join and thus make table elimination not to consider eliminating
+  table t2.
 */
 
 class Value_dep;
@@ -157,7 +206,7 @@
 
 /*
   A table field value. There is exactly only one such object for any tblX.fieldY
-  - the field epends on its table and equalities
+  - the field depends on its table and equalities
   - expressions that use the field are its dependencies
 */
 class Field_value : public Value_dep
@@ -175,26 +224,23 @@
     field_index 
   */
   Field_value *next_table_field;
-  /* 
-    Offset of our part of the bitmap psergey-todo: better comment!
+  /*
+    Offset to bits in Func_dep_analyzer::expr_deps
   */
   uint bitmap_offset;
   
-  /*
-    Field became known. Check out
-    - unique keys we belong to
-    - expressions that depend on us.
-  */
   void now_bound(Func_dep_analyzer *fda, Module_dep **bound_modules);
   void signal_from_field_to_exprs(Func_dep_analyzer* fda, 
                                   Module_dep **bound_modules);
 };
 
+
 /*
   A table value. There is one Table_value object for every table that can
   potentially be eliminated.
+  Dependencies:
   - table depends on any of its unique keys
-  - has its fields and embedding outer join as dependency.
+  - has its fields and embedding outer join as dependency
 */
 class Table_value : public Value_dep
 {
@@ -205,13 +251,13 @@
   TABLE *table;
   Field_value *fields; /* Ordered list of fields that belong to this table */
   Key_module *keys; /* Ordered list of Unique keys in this table */
-  //Outer_join_module *outer_join_dep;
   void now_bound(Func_dep_analyzer *fda, Module_dep **bound_modules);
 };
 
 
 /*
-  A 'module'. Module has dependencies
+  A 'module'. Module has unsatisfied dependencies, number of whose is stored in
+  unknown_args. Modules also can be linked together in a list.
 */
 
 class Module_dep : public Sql_alloc
@@ -232,10 +278,11 @@
 
 /*
   This represents either
-   - "tbl.column= expr" equality dependency, i.e. tbl.column depends on fields 
+   - "tbl.column= expr" equality dependency, i.e. tbl.column depends on fields
      used in the expression, or
    - tbl1.col1=tbl2.col2=... multi-equality.
 */
+
 class Equality_module : public Module_dep
 {
 public:
@@ -314,7 +361,11 @@
   /* Element for the outer join we're attempting to eliminate */
   Outer_join_module *outer_join_dep;
 
-  /* Bitmap of how expressions depend on bits */
+  /* 
+    Bitmap of how expressions depend on bits. Given a Field_value object,
+    one can check bitmap_is_set(expr_deps, field_val->bitmap_offset + expr_no)
+    to see if expression equality_mods[expr_no] depends on the given field.
+  */
   MY_BITMAP expr_deps;
 };
 
@@ -453,11 +504,23 @@
   }
   case Item_func::MULT_EQUAL_FUNC:
   {
+    /*
+      The condition is a 
+
+        tbl1.field1 = tbl2.field2 = tbl3.field3 [= const_expr]
+
+      multiple-equality. Do two things:
+       - Collect an ordered List<Field_value> of tblX.colY where tblX is one
+         of those that we're trying to eliminate.
+       - rembember if there was a const_expr or tblY.colZ that we can consider
+         bound.
+      Store all collected information in a Equality_module object.
+    */
     Item_equal *item_equal= (Item_equal*)cond;
     List<Field_value> *fvl;
     if (!(fvl= new List<Field_value>))
       break;
-    
+
     Item_equal_iterator it(*item_equal);
     Item_field *item;
     Item *bound_item= item_equal->get_const();
@@ -554,9 +617,9 @@
                                  Equality_module *end, uint and_level)
 {
   if (start == new_fields)
-    return start;                                // Impossible or
+    return start;  /*  (nothing) OR (...) -> (nothing) */
   if (new_fields == end)
-    return start;                                // No new fields, skip all
+    return start;  /*  (...) OR (nothing) -> (nothing) */
 
   Equality_module *first_free=new_fields;
 
@@ -564,28 +627,6 @@
   {
     for (Equality_module *old=start ; old != first_free ; old++)
     {
-      /* 
-        Merge multiple-equalities:
-        A: YES.  
-          (a=b=c) OR (a=b=d)  produce "a=b".
-
-        TODO:
-          sort by (table_ptr, column_index)
-          then run along the two and produce an intersection
-
-        Q: What about constants?
-          a=b=3  OR a=b=5 -> a=b= (either 3 or 5)
-
-          a=b  OR a=b=5 -> a=b= (any constant)
-        A: keep the constant iff it is present in both sides and is the same.
-        
-        class Multi_equality
-        {
-          Item *const_item;
-          List<...) list;
-        };
-
-      */
       if (old->field == new_fields->field)
       {
         if (!old->field)
@@ -616,7 +657,7 @@
           }
           else
           {
-            // no single constant/bound item.
+            /* no single constant/bound item. */
             old->expression= NULL;
           }
            
@@ -975,7 +1016,7 @@
     }
     else 
     {
-      /* It's a multi-equality*/
+      /* It's a multi-equality */
       eq_mod->unknown_args= !test(eq_mod->expression);
       List_iterator<Field_value> it(*eq_mod->mult_equal_fields);
       Field_value* field_val;
@@ -1050,6 +1091,11 @@
   if (!join->outer_join)
     DBUG_VOID_RETURN;
 
+#ifndef DBUG_OFF
+  if (!optimizer_flag(thd, OPTIMIZER_SWITCH_TABLE_ELIMINATION))
+    DBUG_VOID_RETURN;
+#endif
+
   /* Find the tables that are referred to from WHERE/HAVING */
   used_tables= (join->conds?  join->conds->used_tables() : 0) | 
                (join->having? join->having->used_tables() : 0);
@@ -1188,7 +1234,7 @@
 
 
 /*
-  Check if condition makes the a set of tables functionally-dependent
+  Check if given condition makes given set of tables functionally-dependent
 
   SYNOPSIS
     check_func_dependency()
@@ -1197,8 +1243,8 @@
       cond     Condition to use
 
   DESCRIPTION
-    Check if condition allows to conclude that the table set is functionally
-    dependent on everything else.
+    Check if we can use given condition to infer that the set of given tables
+    is functionally-dependent on everything else.
 
   RETURN 
     TRUE  - Yes, functionally dependent
@@ -1216,7 +1262,10 @@
   
   Func_dep_analyzer fda(join);
   
-  /* Start value */
+  /* 
+    Pre-alloc some Equality_module structures. We don't need this to be
+    guaranteed upper bound.
+  */
   fda.n_equality_mods_alloced= 
     join->thd->lex->current_select->max_equal_elems +
     (join->thd->lex->current_select->cond_count+1)*2 +
@@ -1249,7 +1298,7 @@
   fda.usable_tables= dep_tables;
   /*
     Analyze the the ON expression and create Equality_module objects and
-    Field_value objects for their left parts.
+      Field_value objects for the used fields.
   */
   uint and_level=0;
   build_eq_mods_for_cond(&fda, &last_eq_mod, &and_level, cond);
@@ -1264,14 +1313,14 @@
   
   DBUG_EXECUTE("test", dbug_print_deps(&fda); );
 
-  /* The running wave algorithm itself: */
+  /* The forward running wave algorithm: */
   Value_dep *bound_values= NULL;
   while (bound_modules)
   {
     for (;bound_modules; bound_modules= bound_modules->next)
     {
       if (bound_modules->now_bound(&fda, &bound_values))
-        return TRUE; /* Dependent! */
+        return TRUE; /* Dependent */
     }
     for (;bound_values; bound_values=bound_values->next)
       bound_values->now_bound(&fda, &bound_modules);
@@ -1280,17 +1329,12 @@
 }
 
 
-/*
-  Table is known means that
-  - one more element in outer join nest is known
-  - all its fields are known
-*/
-
 void Table_value::now_bound(Func_dep_analyzer *fda, 
                             Module_dep **bound_modules)
 {
   DBUG_PRINT("info", ("table %s is now bound", table->alias));
-
+  
+  /* Signal to all fields that they are now bound */
   for (Field_value *field_dep= fields; field_dep;
        field_dep= field_dep->next_table_field)
   {
@@ -1302,6 +1346,7 @@
     }
   }
   
+  /* Signal to outer join that one more table is known */
   if (fda->outer_join_dep->unknown_args && 
       !--fda->outer_join_dep->unknown_args)
   {
@@ -1318,6 +1363,7 @@
   DBUG_PRINT("info", ("field %s.%s is now bound", field->table->alias,
                        field->field_name));
 
+  /* Signal to unique keys and expressions that use this field*/
   for (Key_module *key_dep= table->keys; key_dep; 
        key_dep= key_dep->next_table_key)
   {
@@ -1337,8 +1383,8 @@
 
 
 /*
-  Walk through expressions that depend on this field and 'notify' them 
-  that this field is no longer unknown.
+  Walk through expressions that depend on this field and notify them 
+  that this field is now known.
 */
 void Field_value::signal_from_field_to_exprs(Func_dep_analyzer* fda, 
                                              Module_dep **bound_modules)
@@ -1362,16 +1408,16 @@
                                   Value_dep **bound_values)
 {
   DBUG_PRINT("info", ("Outer join eliminated"));
-  return TRUE; /* Signal to finish the process */
+  return TRUE; /* Signal out that the search is finished */
 }
 
 
 bool Equality_module::now_bound(Func_dep_analyzer *fda, 
                                 Value_dep **bound_values)
 {
-  /* For field=expr and we got to know the expr, so we know the field */
   if (mult_equal_fields)
   {
+    /* It's a=b=c=... multiple equality. Mark all equality members as known. */
     List_iterator<Field_value> it(*mult_equal_fields);
     Field_value *fv;
     while ((fv= it++))
@@ -1387,6 +1433,7 @@
   }
   else
   {
+    /* It's a fieldX=exprY equality. Mark exprY as known */
     if (!field->bound)
     {
       /* Mark as bound and add to the list */
@@ -1398,7 +1445,9 @@
   return FALSE;
 }
 
+
 /* Unique key is known means its  table is known */
+
 bool Key_module::now_bound(Func_dep_analyzer *fda, Value_dep **bound_values)
 {
   if (!table->bound)