← Back to team overview

maria-developers team mailing list archive

bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (psergey:2706)

 

#At lp:maria based on revid:knielsen@xxxxxxxxxxxxxxx-20090522175325-xpwm83ilnhqoqjz0

 2706 Sergey Petrunia	2009-06-03
      MWL#17: Table elimination
      - First code. Elimination works for simple cases, passes the testsuite.
      - Known issues:
        = No elimination is done for aggregate functions.
        = EXPLAIN EXTENDED shows eliminated tables (I think it better not)
        = No benchmark yet
        = The code needs some polishing.
      added:
        mysql-test/r/table_elim.result
        mysql-test/t/table_elim.test
      modified:
        sql/sql_select.cc
        sql/sql_select.h
        sql/table.h

per-file messages:
  mysql-test/r/table_elim.result
    MWL#17: Table elimination
    - Testcases
  mysql-test/t/table_elim.test
    MWL#17: Table elimination
    - Testcases
  sql/sql_select.cc
    MWL#17: Table elimination
  sql/sql_select.h
    MWL#17: Table elimination
    - Added JOIN_TAB::eliminated (is JOIN_TAB the best place to store this flag?)
  sql/table.h
    MWL#17: Table elimination
    - ADded NESTED_JOIN::n_tables. We need to have the number of real tables remaining in an outer join nest.
=== added file 'mysql-test/r/table_elim.result'
--- a/mysql-test/r/table_elim.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/r/table_elim.result	2009-06-03 13:10:45 +0000
@@ -0,0 +1,56 @@
+drop table if exists t0, t1, t2, t3;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3);
+create table t0 as select * from t1;
+create table t2 (a int primary key, b int) 
+as select a, a as b from t1 where a in (1,2);
+create table t3 (a int primary key, b int) 
+as select a, a as b from t1 where a in (1,3);
+# This will be  eliminated:
+explain select t1.a from t1 left join t2 on t2.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
+select t1.a from t1 left join t2 on t2.a=t1.a;
+a
+0
+1
+2
+3
+# This will not be eliminated as t2.b is in in select list:
+explain select * from t1 left join t2 on t2.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
+1	SIMPLE	t2	eq_ref	PRIMARY	PRIMARY	4	test.t1.a	1	
+# This will not be eliminated as t2.b is in in order list:
+explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	Using temporary; Using filesort
+1	SIMPLE	t2	eq_ref	PRIMARY	PRIMARY	4	test.t1.a	1	
+# This will not be eliminated as t2.b is in group list:
+explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	Using temporary; Using filesort
+1	SIMPLE	t2	eq_ref	PRIMARY	PRIMARY	4	test.t1.a	1	
+# This will not be eliminated as t2.b is in the WHERE
+explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
+1	SIMPLE	t2	eq_ref	PRIMARY	PRIMARY	4	test.t1.a	1	Using where
+# Elimination of multiple tables:
+explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
+# Elimination of multiple tables (2):
+explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
+# Elimination when done within an outer join nest:
+explain
+select t0.*
+from
+t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and
+t3.a=t1.a) on t0.a=t1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	4	
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	4	
+drop table t0, t1, t2, t3;

=== added file 'mysql-test/t/table_elim.test'
--- a/mysql-test/t/table_elim.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/t/table_elim.test	2009-06-03 13:10:45 +0000
@@ -0,0 +1,52 @@
+#
+# Table elimination (MWL#17) tests
+#
+--disable_warnings
+drop table if exists t0, t1, t2, t3;
+--enable_warnings
+
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3);
+create table t0 as select * from t1;
+
+create table t2 (a int primary key, b int) 
+  as select a, a as b from t1 where a in (1,2);
+
+create table t3 (a int primary key, b int) 
+  as select a, a as b from t1 where a in (1,3);
+
+--echo # This will be  eliminated:
+explain select t1.a from t1 left join t2 on t2.a=t1.a;
+
+select t1.a from t1 left join t2 on t2.a=t1.a;
+
+--echo # This will not be eliminated as t2.b is in in select list:
+explain select * from t1 left join t2 on t2.a=t1.a;
+
+--echo # This will not be eliminated as t2.b is in in order list:
+explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b;
+
+--echo # This will not be eliminated as t2.b is in group list:
+explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b;
+
+## TODO: Aggregate functions prevent table elimination ATM.
+
+--echo # This will not be eliminated as t2.b is in the WHERE
+explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null;
+
+--echo # Elimination of multiple tables:
+explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a;
+
+--echo # Elimination of multiple tables (2):
+explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a;
+
+--echo # Elimination when done within an outer join nest:
+explain
+select t0.*
+from
+  t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and
+  t3.a=t1.a) on t0.a=t1.a;
+
+
+drop table t0, t1, t2, t3;
+

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2009-05-19 09:28:05 +0000
+++ b/sql/sql_select.cc	2009-06-03 13:10:45 +0000
@@ -42,6 +42,11 @@
 #define TMP_ENGINE_HTON myisam_hton
 #endif
 
+#define FT_KEYPART   (MAX_REF_PARTS+10)
+/* Values in optimize */
+#define KEY_OPTIMIZE_EXISTS		1
+#define KEY_OPTIMIZE_REF_OR_NULL	2
+
 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
 			      "MAYBE_REF","ALL","range","index","fulltext",
 			      "ref_or_null","unique_subquery","index_subquery",
@@ -2468,6 +2473,304 @@ static ha_rows get_quick_record_count(TH
   DBUG_RETURN(HA_POS_ERROR);			/* This shouldn't happend */
 }
 
+
+bool has_eq_ref_access_candidate(TABLE *table, table_map can_refer_to_these)
+{
+  KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
+  if (keyuse)
+  {
+    /*
+      walk through all of the KEYUSE elements and
+       - locate unique keys
+       - check if we have eq_ref access for them
+          TODO any other reqs?
+      loops are constructed like in best_access_path
+    */
+    while (keyuse->table == table)
+    {
+      uint key= keyuse->key;
+      key_part_map bound_parts=0;
+      bool ft_key= test(keyuse->keypart == FT_KEYPART); 
+
+      do  /* For each keypart and each way to read it */
+      {
+        if (!(keyuse->used_tables & ~can_refer_to_these) &&
+            !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
+        {
+          bound_parts |= keyuse->keypart_map;
+        }
+        keyuse++;
+      } while (keyuse->table && keyuse->key == key);
+
+      KEY *keyinfo= table->key_info + key;
+      if (!ft_key &&
+          ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) &&
+          bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts))
+      {
+        return TRUE;
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+static void mark_table_as_eliminated(JOIN *join, TABLE *table, uint *const_tbl_count, 
+                                      table_map *const_tables)
+{
+  JOIN_TAB *tab= table->reginfo.join_tab;
+  if (!(*const_tables & tab->table->map))
+  {
+    DBUG_PRINT("info", ("Eliminated table %s", table->alias));
+    tab->type= JT_CONST;
+    tab->eliminated= TRUE;
+    *const_tables |= table->map;
+    join->const_table_map|= table->map;
+    set_position(join, (*const_tbl_count)++, tab, (KEYUSE*)0);
+  }
+}
+
+
+/*
+  Now on to traversal. There can be a situation like this:
+
+    FROM t1 
+         LEFT JOIN t2 ON cond(t1,t2)
+         LEFT JOIN t3 ON cond(..., possibly-t2)  // <--(*)
+         LEFT JOIN t4 ON cond(..., possibly-t2)
+
+  Besides that, simplify_joins() may have created back references, so when
+  we're e.g. looking at outer join (*) we need to look both forward and
+  backward to check if there are any references in preceding/following
+  outer joins'
+
+  TODO would it create only following-sibling references or
+  preceding-sibling as well?
+    And if not, should we rely on that?
+    
+*/
+
+int
+eliminate_tables_for_join_list(JOIN *join, List<TABLE_LIST> *join_list, 
+                               table_map used_tables_elsewhere, 
+                               uint *const_tbl_count, table_map *const_tables)
+{
+  List_iterator<TABLE_LIST> it(*join_list);
+  table_map used_tables_on_right[MAX_TABLES]; // todo change to alloca
+  table_map used_tables_on_left;
+  TABLE_LIST *tbl;
+  int i, n_tables;
+  int eliminated=0;
+
+  /* Collect the reverse-bitmap-array */
+  for (i=0; (tbl= it++); i++)
+  {
+    used_tables_on_right[i]= 0;
+    if (tbl->on_expr)
+      used_tables_on_right[i]= tbl->on_expr->used_tables();
+    if (tbl->nested_join)
+      used_tables_on_right[i]= tbl->nested_join->used_tables;
+  }
+  n_tables= i;
+
+  for (i= n_tables - 2; i > 0; i--)
+    used_tables_on_right[i] |= used_tables_on_right[i+1];
+  
+  it.rewind();
+  
+  /* Walk through tables and join nests and see if we can eliminate them */
+  used_tables_on_left= 0;
+  i= 1;
+  while ((tbl= it++))
+  {
+    table_map tables_used_outside= used_tables_on_left |
+                                   used_tables_on_right[i] |
+                                   used_tables_elsewhere;
+    table_map cur_tables;
+
+    if (tbl->nested_join)
+    {
+      DBUG_ASSERT(tbl->on_expr); 
+      /*
+        There can be cases where table removal is applicable for tables
+        within the outer join but not for the outer join itself. Ask to
+        remove the children first.
+
+        TODO: NoHopelessEliminationAttempts: the below call can return 
+        information about whether it would make any sense to try removing 
+        this entire outer join nest.
+      */
+      int eliminated_in_children= 
+        eliminate_tables_for_join_list(join, &tbl->nested_join->join_list,
+                                       tables_used_outside,
+                                       const_tbl_count, const_tables);
+      tbl->nested_join->n_tables -=eliminated_in_children; 
+      cur_tables= tbl->nested_join->used_tables;
+      if (!(cur_tables & tables_used_outside))
+      {
+        /* 
+          Check if all embedded tables together can produce at most one
+          record combination. This is true when
+           - each of them has one_match(outer-tables) property
+             (this is a stronger condition than all of them together having
+              this property but that's irrelevant here)
+           - there are no outer joins among them
+             (except for the case  of outer join which has all inner tables
+              to be constant and is guaranteed to produce only one record.
+              that record will be null-complemented)
+        */
+        bool one_match= TRUE;
+        List_iterator<TABLE_LIST> it2(tbl->nested_join->join_list);
+        TABLE_LIST *inner;
+        while ((inner= it2++))
+        {
+          /*
+            Bail out if we see an outer join (TODO: handle the above
+            null-complemntated-rows-only case)
+          */
+          if (inner->on_expr)
+          {
+            one_match= FALSE;
+            break;
+          }
+
+          if (inner->table && // <-- to be removed after NoHopelessEliminationAttempts
+              !has_eq_ref_access_candidate(inner->table,
+                                           ~tbl->nested_join->used_tables))
+          {
+            one_match= FALSE;
+            break;
+          }
+        }
+        if (one_match)
+        {
+          it2.rewind();
+          while ((inner= it2++))
+          {
+            mark_table_as_eliminated(join, inner->table, const_tbl_count, 
+                                     const_tables);
+          }
+          eliminated += tbl->nested_join->join_list.elements;
+          //psergey-todo: do we need to do anything about removing the join
+          //nest?
+        }
+        else
+        {
+          eliminated += eliminated_in_children;
+        }
+      }
+    }
+    else if (tbl->on_expr)
+    {
+      cur_tables= tbl->on_expr->used_tables();
+      /* Check and remove */
+      if (!(tbl->table->map & tables_used_outside) && 
+          has_eq_ref_access_candidate(tbl->table, (table_map)-1))
+      {
+        mark_table_as_eliminated(join, tbl->table, const_tbl_count, 
+                                 const_tables);
+        eliminated += 1;
+      }
+    }
+
+    /* Update bitmap of tables we've seen on the left */
+    i++;
+    used_tables_on_left |= cur_tables;
+  }
+  return eliminated;
+}
+
+
+/*
+  Perform table elimination based on outer join
+
+  SELECT * FROM t1 LEFT JOIN 
+    (t2 JOIN t3) ON t3.primary_key=t1.col AND 
+                    t4.primary_key= t2.col
+
+  CRITERIA FOR REMOVING ONE OJ NEST
+    we can't rely on sole presense of eq_refs. Because if we do, we'll miss
+    things like this:
+
+    SELECT * FROM flights LEFT JOIN 
+      (pax as S1 JOIN pax as S2 ON S2.id=S1.spouse AND s1.id=s2.spouse)
+  
+   (no-polygamy schema/query but there can be many couples on the flight)
+   ..
+  
+  REMOVAL PROCESS
+    We can remove an inner side of an outer join if it there is a warranty
+    that it will produce not more than one record:
+
+     ... t1 LEFT JOIN t2 ON (t2.unique_key = expr) ...
+
+    For nested outer joins:
+    - The process naturally occurs bottom-up (in order to remove an
+      outer-join we need to analyze its contents)
+    - If we failed to remove an outer join nest, it makes no sense to 
+      try removing its ancestors, as the 
+         ot LEFT JOIN it ON cond
+      pair may possibly produce two records (one record via match and
+      another one as access-method record).
+
+  Q: If we haven't removed an OUTER JOIN, does it make sense to attempt
+  removing its ancestors? 
+  A: No as the innermost outer join will produce two records => no ancestor 
+  outer join nest will be able to provide the max_fanout==1 guarantee.
+
+  psergey-todo: .
+*/
+
+static void eliminate_tables(JOIN *join, uint *const_tbl_count, table_map *const_tables)
+{
+  Item *item;
+  table_map used_tables;
+  DBUG_ENTER("eliminate_tables");
+  if (!join->outer_join)
+    DBUG_VOID_RETURN;
+
+  /* Find the tables that are referred to from WHERE/HAVING */
+  used_tables= (join->conds?  join->conds->used_tables() : 0) | 
+               (join->having? join->having->used_tables() : 0);
+  
+  /* Add tables referred to from the select list */
+  List_iterator<Item> it(join->fields_list);
+  while ((item= it++))
+    used_tables |= item->used_tables();
+ 
+  /* Add tables referred to from ORDER BY and GROUP BY lists */
+  ORDER *all_lists[]= { join->order, join->group_list};
+  for (int i=0; i < 2; i++)
+  {
+    for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next)
+      used_tables |= (*(cur_list->item))->used_tables();
+  }
+  
+  THD* thd= join->thd;
+  if (join->select_lex == &thd->lex->select_lex)
+  {
+    /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */
+    used_tables |= thd->table_map_for_update;
+
+    /* Multi-table UPDATE: don't eliminate tables referred from SET statement */
+    if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI)
+    {
+      List_iterator<Item> it2(thd->lex->value_list);
+      while ((item= it2++))
+        used_tables |= item->used_tables();
+    }
+  }
+
+  if (((1 << join->tables) - 1) & ~used_tables)
+  {
+    /* There are some time tables that we probably could eliminate */
+    eliminate_tables_for_join_list(join, join->join_list, used_tables,
+                                   const_tbl_count, const_tables);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
 /*
    This structure is used to collect info on potentially sargable
    predicates in order to check whether they become sargable after
@@ -2823,6 +3126,10 @@ make_join_statistics(JOIN *join, TABLE_L
     }
   }
 
+  //psergey-todo: table elimination
+  eliminate_tables(join, &const_count, &found_const_table_map);
+  //:psergey-todo
+
   /* Calc how many (possible) matched records in each table */
 
   for (s=stat ; s < stat_end ; s++)
@@ -2956,9 +3263,6 @@ typedef struct key_field_t {
   bool         *cond_guard; /* See KEYUSE::cond_guard */
 } KEY_FIELD;
 
-/* Values in optimize */
-#define KEY_OPTIMIZE_EXISTS		1
-#define KEY_OPTIMIZE_REF_OR_NULL	2
 
 /**
   Merge new key definitions to old ones, remove those not used in both.
@@ -3563,7 +3867,6 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array
 }
 
 
-#define FT_KEYPART   (MAX_REF_PARTS+10)
 
 static void
 add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
@@ -6021,7 +6324,7 @@ make_outerjoin_info(JOIN *join)
       }
       if (!tab->first_inner)  
         tab->first_inner= nested_join->first_nested;
-      if (++nested_join->counter < nested_join->join_list.elements)
+      if (++nested_join->counter < nested_join->n_tables)
         break;
       /* Table tab is the last inner table for nested join. */
       nested_join->first_nested->last_inner= tab;
@@ -8575,6 +8878,8 @@ simplify_joins(JOIN *join, List<TABLE_LI
       conds= simplify_joins(join, &nested_join->join_list, conds, top);
       used_tables= nested_join->used_tables;
       not_null_tables= nested_join->not_null_tables;  
+      /* The following two might become unequal after table elimination: */
+      nested_join->n_tables= nested_join->join_list.elements;
     }
     else
     {
@@ -8733,7 +9038,7 @@ static uint build_bitmap_for_nested_join
             with anything)
         2. we could run out bits in nested_join_map otherwise.
       */
-      if (nested_join->join_list.elements != 1)
+      if (nested_join->n_tables != 1)
       {
         nested_join->nj_map= (nested_join_map) 1 << first_unused++;
         first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
@@ -8894,7 +9199,7 @@ static bool check_interleaving_with_nj(J
       join->cur_embedding_map |= next_emb->nested_join->nj_map;
     }
     
-    if (next_emb->nested_join->join_list.elements !=
+    if (next_emb->nested_join->n_tables !=
         next_emb->nested_join->counter)
       break;
 
@@ -8928,7 +9233,7 @@ static void restore_prev_nj_state(JOIN_T
   {
     if (!(--last_emb->nested_join->counter))
       join->cur_embedding_map&= ~last_emb->nested_join->nj_map;
-    else if (last_emb->nested_join->join_list.elements-1 ==
+    else if (last_emb->nested_join->n_tables-1 ==
              last_emb->nested_join->counter) 
       join->cur_embedding_map|= last_emb->nested_join->nj_map;
     else
@@ -16202,6 +16507,14 @@ static void select_describe(JOIN *join, 
       tmp3.length(0);
 
       quick_type= -1;
+
+      //psergey-todo:
+      if (tab->eliminated)
+      {
+        used_tables|=table->map;
+        continue;
+      }
+
       item_list.empty();
       /* id */
       item_list.push_back(new Item_uint((uint32)

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2009-04-25 10:05:32 +0000
+++ b/sql/sql_select.h	2009-06-03 13:10:45 +0000
@@ -210,6 +210,9 @@ typedef struct st_join_table {
   JOIN		*join;
   /** Bitmap of nested joins this table is part of */
   nested_join_map embedding_map;
+  
+  //psergey-todo: more justified place
+  bool eliminated;
 
   void cleanup();
   inline bool is_using_loose_index_scan()

=== modified file 'sql/table.h'
--- a/sql/table.h	2009-02-19 09:01:25 +0000
+++ b/sql/table.h	2009-06-03 13:10:45 +0000
@@ -1626,6 +1626,8 @@ typedef struct st_nested_join
     Before each use the counters are zeroed by reset_nj_counters.
   */
   uint              counter;
+  /* Tables left after elimination */
+  uint              n_tables;
   nested_join_map   nj_map;          /* Bit used to identify this nested join*/
 } NESTED_JOIN;