← Back to team overview

maria-developers team mailing list archive

Rev 2739: MWL#17: Table elimination in file:///home/psergey/dev/maria-5.1-table-elim-r11/

 

At file:///home/psergey/dev/maria-5.1-table-elim-r11/

------------------------------------------------------------
revno: 2739
revision-id: psergey@xxxxxxxxxxxx-20090820155102-5zvm1m6idmie9mmv
parent: psergey@xxxxxxxxxxxx-20090819120659-0ndd6mqxeetgee2n
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.1-table-elim-r11
timestamp: Thu 2009-08-20 17:51:02 +0200
message:
  MWL#17: Table elimination
  - Multiple-equality handling
=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h	2008-12-12 11:13:11 +0000
+++ b/sql/item_cmpfunc.h	2009-08-20 15:51:02 +0000
@@ -1578,6 +1578,7 @@
   uint members();
   bool contains(Field *field);
   Item_field* get_first() { return fields.head(); }
+  uint n_fields() { return fields.elements; }
   void merge(Item_equal *item);
   void update_const();
   enum Functype functype() const { return MULT_EQUAL_FUNC; }

=== modified file 'sql/opt_table_elimination.cc'
--- a/sql/opt_table_elimination.cc	2009-08-19 12:06:59 +0000
+++ b/sql/opt_table_elimination.cc	2009-08-20 15:51:02 +0000
@@ -175,7 +175,10 @@
     field_index 
   */
   Field_value *next_table_field;
-  uint bitmap_offset; /* Offset of our part of the bitmap */
+  /* 
+    Offset of our part of the bitmap psergey-todo: better comment!
+  */
+  uint bitmap_offset;
   
   /*
     Field became known. Check out
@@ -214,10 +217,6 @@
 class Module_dep : public Sql_alloc
 {
 public:
-  enum {
-    MODULE_OUTER_JOIN
-  } type; /* Type of the object */
-  
   virtual bool now_bound(Func_dep_analyzer *fda, Value_dep **bound_modules)=0;
   virtual ~Module_dep(){}
   /* 
@@ -232,8 +231,10 @@
 
 
 /*
-  A "tbl.column= expr" equality dependency.  tbl.column depends on fields 
-  used in expr.
+  This represents either
+   - "tbl.column= expr" equality dependency, i.e. tbl.column depends on fields 
+     used in the expression, or
+   - tbl1.col1=tbl2.col2=... multi-equality.
 */
 class Equality_module : public Module_dep
 {
@@ -241,6 +242,8 @@
   Field_value *field;
   Item  *expression;
   
+  List<Field_value> *mult_equal_fields;
+  
   /* Used during condition analysis only, similar to KEYUSE::level */
   uint level;
   bool now_bound(Func_dep_analyzer *fda, Value_dep **bound_values);
@@ -331,7 +334,7 @@
                            Item* cond);
 
 static 
-void build_eq_mods_for_cond(Func_dep_analyzer *fda, Equality_module **fdeps, 
+void build_eq_mods_for_cond(Func_dep_analyzer *fda, Equality_module **eq_mod, 
                             uint *and_level, Item *cond);
 static 
 void add_eq_mod(Func_dep_analyzer *fda, Equality_module **eq_mod, 
@@ -346,6 +349,9 @@
 static Field_value *get_field_value(Func_dep_analyzer *fda, Field *field);
 static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl);
 
+static void add_eq_mod2(Func_dep_analyzer *fda, Equality_module **eq_mod,
+                        uint and_level, Field_value *field_val, Item *right,
+                        List<Field_value>* mult_equal_fields);
 
 
 #ifndef DBUG_OFF
@@ -360,7 +366,7 @@
   SYNOPSIS
     build_eq_mods_for_cond()
       fda                    Table elimination context 
-      fdeps          INOUT  Put produced equality conditions here
+      eq_mod          INOUT  Put produced equality conditions here
       and_level      INOUT  AND-level (like in add_key_fields)
       cond                  Condition to process
 
@@ -369,34 +375,34 @@
 */
 
 static 
-void build_eq_mods_for_cond(Func_dep_analyzer *fda, Equality_module **fdeps, 
+void build_eq_mods_for_cond(Func_dep_analyzer *fda, Equality_module **eq_mod, 
                             uint *and_level, Item *cond)
 {
   if (cond->type() == Item_func::COND_ITEM)
   {
     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
-    Equality_module *org_key_fields= *fdeps;
+    Equality_module *org_key_fields= *eq_mod;
     
     /* AND/OR */
     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
     {
       Item *item;
       while ((item=li++))
-        build_eq_mods_for_cond(fda, fdeps, and_level, item);
-      for (; org_key_fields != *fdeps ; org_key_fields++)
+        build_eq_mods_for_cond(fda, eq_mod, and_level, item);
+      for (; org_key_fields != *eq_mod ; org_key_fields++)
         org_key_fields->level= *and_level;
     }
     else
     {
       Item *item;
       (*and_level)++;
-      build_eq_mods_for_cond(fda, fdeps, and_level, li++);
+      build_eq_mods_for_cond(fda, eq_mod, and_level, li++);
       while ((item=li++))
       {
-        Equality_module *start_key_fields= *fdeps;
+        Equality_module *start_key_fields= *eq_mod;
         (*and_level)++;
-        build_eq_mods_for_cond(fda, fdeps, and_level, item);
-        *fdeps= merge_func_deps(org_key_fields, start_key_fields, *fdeps,
+        build_eq_mods_for_cond(fda, eq_mod, and_level, item);
+        *eq_mod= merge_func_deps(org_key_fields, start_key_fields, *eq_mod,
                                 ++(*and_level));
       }
     }
@@ -414,8 +420,8 @@
   {
     if (cond_func->argument_count() == 2)
     {
-      add_eq_mod(fda, fdeps, *and_level, cond_func, args[0], args[1]);
-      add_eq_mod(fda, fdeps, *and_level, cond_func, args[1], args[0]);
+      add_eq_mod(fda, eq_mod, *and_level, cond_func, args[0], args[1]);
+      add_eq_mod(fda, eq_mod, *and_level, cond_func, args[1], args[0]);
     }
     break;
   }
@@ -426,62 +432,72 @@
         (fld= args[0]->real_item())->type() == Item::FIELD_ITEM &&
         args[1]->eq(args[2], ((Item_field*)fld)->field->binary()))
     {
-      add_eq_mod(fda, fdeps, *and_level, cond_func, args[0], args[1]);
-      add_eq_mod(fda, fdeps, *and_level, cond_func, args[1], args[0]);
+      add_eq_mod(fda, eq_mod, *and_level, cond_func, args[0], args[1]);
+      add_eq_mod(fda, eq_mod, *and_level, cond_func, args[1], args[0]);
     }
     break;
   }
   case Item_func::EQ_FUNC:
   case Item_func::EQUAL_FUNC:
   {
-    add_eq_mod(fda, fdeps, *and_level, cond_func, args[0], args[1]);
-    add_eq_mod(fda, fdeps, *and_level, cond_func, args[1], args[0]);
+    add_eq_mod(fda, eq_mod, *and_level, cond_func, args[0], args[1]);
+    add_eq_mod(fda, eq_mod, *and_level, cond_func, args[1], args[0]);
     break;
   }
   case Item_func::ISNULL_FUNC:
   {
     Item *tmp=new Item_null;
     if (tmp)
-      add_eq_mod(fda, fdeps, *and_level, cond_func, args[0], args[1]);
+      add_eq_mod(fda, eq_mod, *and_level, cond_func, args[0], args[1]);
     break;
   }
   case Item_func::MULT_EQUAL_FUNC:
   {
-    Item_equal *item_equal= (Item_equal *) cond;
-    Item *const_item= item_equal->get_const();
+    Item_equal *item_equal= (Item_equal*)cond;
+    // const item is 'item', field -> NULL. mult_equal_fields <-- an ordered
+    // list of 
+    List<Field_value> *fvl;
+    if (!(fvl= new List<Field_value>))
+      break;
+    
     Item_equal_iterator it(*item_equal);
     Item_field *item;
-    if (const_item)
-    {
-      /*
-        For each field field1 from item_equal consider the equality 
-        field1=const_item as a condition allowing an index access of the table
-        with field1 by the keys value of field1.
-      */
-      while ((item= it++))
-        add_eq_mod(fda, fdeps, *and_level, cond_func, item, const_item);
-    }
-    else 
-    {
-      /*
-        Consider all pairs of different fields included into item_equal.
-        For each of them (field1, field1) consider the equality 
-        field1=field2 as a condition allowing an index access of the table
-        with field1 by the keys value of field2.
-      */   
-      Item_equal_iterator fi(*item_equal);
-      while ((item= fi++))
+    Item *bound_item= item_equal->get_const();
+    while ((item= it++))
+    {
+      if ((item->used_tables() & fda->usable_tables))
       {
-        Field *field= item->field;
-        Item_field *item2;
-        while ((item2= it++))
+        Field_value *field_val;
+        if ((field_val= get_field_value(fda, item->field)))
         {
-          if (!field->eq(item2->field))
-            add_eq_mod(fda, fdeps, *and_level, cond_func, item, item2);
+          List_iterator<Field_value> it2(*fvl);
+          Field_value *other_f;
+          uint field_val_ratio= field_val->field->table->tablenr*MAX_FIELDS +
+                                field_val->field->field_index;
+          bool added= FALSE;
+          while ((other_f= it2++))
+          {
+            uint other_f_ratio= other_f->field->table->tablenr*MAX_FIELDS + 
+                                other_f->field->field_index;
+            if (other_f_ratio > field_val_ratio)
+            {
+              *(it2.ref())= field_val;
+              it2.after(other_f);
+              added= TRUE;
+              break;
+            }
+          }
+          if (!added)
+            fvl->push_back(field_val);
         }
-        it.rewind();
+      }
+      else
+      {
+        if (!bound_item)
+          bound_item= item;
       }
     }
+    add_eq_mod2(fda, eq_mod, *and_level, NULL, bound_item, fvl);
     break;
   }
   default:
@@ -491,6 +507,39 @@
 
 
 /*
+   The only requirement of this function is to order fields in some
+   deterministic way.
+*/
+
+int cmp_equal_fields(Item_field *field1, Item_field *field2, void *arg)
+{
+  int cmp= 0;
+  bool outer_ref= 0;
+  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  {  
+    outer_ref= 1;
+    cmp= -1;
+  }
+  if (field2->used_tables() & OUTER_REF_TABLE_BIT)
+  {
+    outer_ref= 1;
+    cmp++;
+  }
+  if (outer_ref)
+    return cmp;
+  cmp= (int)field2->field->table->tablenr - 
+       (int)field1->field->table->tablenr;
+  if (cmp)
+    return cmp < 0 ? -1 : 1;
+  cmp= (int)field2->field->field_index - 
+       (int)field1->field->field_index;
+
+    return cmp < 0 ? -1 : (cmp ? 1 : 0);
+  
+}
+
+
+/*
   Perform an OR operation on two (adjacent) Equality_module arrays.
 
   SYNOPSIS
@@ -540,9 +589,9 @@
                                  Equality_module *end, uint and_level)
 {
   if (start == new_fields)
-    return start;				// Impossible or
+    return start;                                // Impossible or
   if (new_fields == end)
-    return start;				// No new fields, skip all
+    return start;                                // No new fields, skip all
 
   Equality_module *first_free=new_fields;
 
@@ -551,40 +600,119 @@
     for (Equality_module *old=start ; old != first_free ; old++)
     {
       /* 
-         TODO: does it make sense to attempt to merging multiple-equalities? 
-         A: YES.  
-           (a=b=c) OR (a=b=d)  produce "a=b". 
-         QQ: 
-           What to use for merging? Trivial N*M algorithm or pre-sort and then
-           merge ordered sequences?
+        Merge multiple-equalities:
+        A: YES.  
+          (a=b=c) OR (a=b=d)  produce "a=b".
+
+        TODO:
+          sort by (table_ptr, column_index)
+          then run along the two and produce an intersection
+
+        Q: What about constants?
+          a=b=3  OR a=b=5 -> a=b= (either 3 or 5)
+
+          a=b  OR a=b=5 -> a=b= (any constant)
+        A: keep the constant iff it is present in both sides and is the same.
+        
+        class Multi_equality
+        {
+          Item *const_item;
+          List<...) list;
+        };
+
       */
       if (old->field == new_fields->field)
       {
-	if (!new_fields->expression->const_item())
-	{
-	  /*
-	    If the value matches, we can use the key reference.
-	    If not, we keep it until we have examined all new values
-	  */
-	  if (old->expression->eq(new_fields->expression, old->field->field->binary()))
-	  {
-	    old->level= and_level;
-	  }
-	}
-	else if (old->expression->eq_by_collation(new_fields->expression, 
-                                           old->field->field->binary(),
-                                           old->field->field->charset()))
-	{
-	  old->level= and_level;
-	}
-	else
-	{
+        if (!old->field)
+        {
+          /*
+            OR-ing two multiple equalities. We must compute an intersection of
+            used fields, and check the constants according to these rules:
+
+              a=b=c=d  OR  a=c=e=f  ->  a=c  (compute intersection)
+              a=const1 OR a=b       ->  (nothing)
+              a=const1 OR a=const1  ->  a=const1 
+              a=const1 OR a=const2  ->  (nothing)
+            
+            If we're performing an OR operation over multiple equalities, e.g.
+
+              (a=b=c AND p=q) OR (a=b AND v=z)
+            
+            then we'll need to try combining each equality with each. ANDed
+            equalities are guaranteed to be disjoint, so we'll only get one
+            hit.
+          */
+          if (old->expression && new_fields->expression &&
+              old->expression->eq_by_collation(new_fields->expression, 
+                                               old->mult_equal_fields->head()->field->binary(),
+                                               old->mult_equal_fields->head()->field->charset()))
+          {
+            /* Ok, keep */
+          }
+          else
+          {
+            // no single constant/bound item.
+            old->expression= NULL;
+          }
+           
+          List <Field_value> *fv;
+          if (!(fv= new List<Field_value>))
+            break;
+
+          List_iterator<Field_value> it1(*old->mult_equal_fields);
+          List_iterator<Field_value> it2(*new_fields->mult_equal_fields);
+          Field_value *lfield= it1++;
+          Field_value *rfield= it2++;
+          // Merge
+          while (lfield && rfield)
+          {
+            if (lfield == rfield)
+              fv->push_back(lfield);
+            else
+            {
+              uint left_ratio= lfield->field->table->tablenr*MAX_FIELDS +
+                               lfield->field->field_index;
+              uint right_ratio= rfield->field->table->tablenr*MAX_FIELDS +
+                                rfield->field->field_index;
+              if (left_ratio < right_ratio)
+                lfield=it1++;
+              else
+                rfield=it2++;
+            }
+          }
+
+          if (fv->elements + test(old->expression) > 1)
+          {
+            old->mult_equal_fields= fv;
+            old->level= and_level;
+          }
+        }
+        else if (!new_fields->expression->const_item())
+        {
+          /*
+            If the value matches, we can use the key reference.
+            If not, we keep it until we have examined all new values
+          */
+          if (old->expression->eq(new_fields->expression, 
+                                  old->field->field->binary()))
+          {
+            old->level= and_level;
+          }
+        }
+        else if (old->expression->eq_by_collation(new_fields->expression, 
+                                                  old->field->field->binary(),
+                                                  old->field->field->charset()))
+        {
+          old->level= and_level;
+        }
+        else
+        {
           /* The expressions are different. */
-	  if (old == --first_free)		// If last item
-	    break;
-	  *old= *first_free;			// Remove old value
-	  old--;				// Retry this value
-	}
+          if (old == --first_free)                // If last item
+            break;
+          *old= *first_free;                        // Remove old value
+          old--;                                // Retry this value
+        }
       }
     }
   }
@@ -596,10 +724,10 @@
   for (Equality_module *old=start ; old != first_free ;)
   {
     if (old->level != and_level)
-    {						// Not used in all levels
+    {                                                // Not used in all levels
       if (old == --first_free)
-	break;
-      *old= *first_free;			// Remove old value
+        break;
+      *old= *first_free;                        // Remove old value
       continue;
     }
     old++;
@@ -659,33 +787,42 @@
           return;
       }
     }
-    
-    if (*eq_mod == fda->equality_mods + fda->n_equality_mods_alloced)
-    {
-      /* 
-        We've filled the entire equality_mods array. Replace it with a bigger
-        one. We do it somewhat inefficiently but it doesn't matter.
-      */
-      Equality_module *new_arr;
-      if (!(new_arr= new Equality_module[fda->n_equality_mods_alloced *2]))
-        return;
-      fda->n_equality_mods_alloced *= 2;
-      for (int i= 0; i < *eq_mod - fda->equality_mods; i++)
-        new_arr[i]= fda->equality_mods[i];
-
-      fda->equality_mods= new_arr;
-      *eq_mod= new_arr + (*eq_mod - fda->equality_mods);
-    }
-
-    if (!((*eq_mod)->field= get_field_value(fda, field)))
+    Field_value *field_val;
+    if ((field_val= get_field_value(fda, field)))
+      add_eq_mod2(fda, eq_mod, and_level, field_val, right, NULL);
+  }
+}
+
+
+/* Just add eq_mod w/o any checks */
+static void add_eq_mod2(Func_dep_analyzer *fda, Equality_module **eq_mod,
+                        uint and_level, Field_value *field_val, Item *right,
+                        List<Field_value>* mult_equal_fields)
+{
+  if (*eq_mod == fda->equality_mods + fda->n_equality_mods_alloced)
+  {
+    /* 
+      We've filled the entire equality_mods array. Replace it with a bigger
+      one. We do it somewhat inefficiently but it doesn't matter.
+    */
+    Equality_module *new_arr;
+    if (!(new_arr= new Equality_module[fda->n_equality_mods_alloced *2]))
       return;
-    (*eq_mod)->expression= right;
-    (*eq_mod)->level= and_level;
-    (*eq_mod)++;
+    fda->n_equality_mods_alloced *= 2;
+    for (int i= 0; i < *eq_mod - fda->equality_mods; i++)
+      new_arr[i]= fda->equality_mods[i];
+
+    fda->equality_mods= new_arr;
+    *eq_mod= new_arr + (*eq_mod - fda->equality_mods);
   }
+
+  (*eq_mod)->field= field_val;
+  (*eq_mod)->expression= right;
+  (*eq_mod)->level= and_level;
+  (*eq_mod)->mult_equal_fields= mult_equal_fields;
+  (*eq_mod)++;
 }
 
-
 /*
   Get a Table_value object for the given table, creating it if necessary.
 */
@@ -782,11 +919,15 @@
       */
       fda->equality_mods[expr_offset].unknown_args++;
     }
+    else
+      saw_other_tbl= TRUE;
   }
 
   Func_dep_analyzer *fda;
   /* Offset of the expression we're processing in the dependency bitmap */
-  uint expr_offset; 
+  uint expr_offset;
+
+  bool saw_other_tbl;
 };
 
 
@@ -858,9 +999,21 @@
        eq_mod++)
   {
     deps_recorder.expr_offset= eq_mod - fda->equality_mods;
+    deps_recorder.saw_other_tbl= FALSE;
     eq_mod->unknown_args= 0;
+
+    /* Regular tbl.col=expr(tblX1.col1, tblY1.col2, ...) */
     eq_mod->expression->walk(&Item::check_column_usage_processor, FALSE, 
                              (uchar*)&deps_recorder);
+
+    if (!eq_mod->field)
+    {
+      if (eq_mod->unknown_args)
+        eq_mod->unknown_args= 1;
+      if (deps_recorder.saw_other_tbl)
+        eq_mod->unknown_args= 0;
+    }
+
     if (!eq_mod->unknown_args)
     {
       eq_mod->next= bound_dep;
@@ -1235,12 +1388,30 @@
                                 Value_dep **bound_values)
 {
   /* For field=expr and we got to know the expr, so we know the field */
-  if (!field->bound)
-  {
-    /* Mark as bound and add to the list */
-    field->bound= TRUE;
-    field->next= *bound_values;
-    *bound_values= field;
+  if (mult_equal_fields)
+  {
+    List_iterator<Field_value> it(*mult_equal_fields);
+    Field_value *fv;
+    while ((fv= it++))
+    {
+      if (!fv->bound)
+      {
+        /* Mark as bound and add to the list */
+        fv->bound= TRUE;
+        fv->next= *bound_values;
+        *bound_values= fv;
+      }
+    }
+  }
+  else
+  {
+    if (!field->bound)
+    {
+      /* Mark as bound and add to the list */
+      field->bound= TRUE;
+      field->next= *bound_values;
+      *bound_values= field;
+    }
   }
   return FALSE;
 }
@@ -1313,11 +1484,19 @@
     String str(buf, sizeof(buf), &my_charset_bin);
     str.length(0);
     eq_mod->expression->print(&str, QT_ORDINARY);
-    fprintf(DBUG_FILE, "  equality%d: %s -> %s.%s\n", 
-            eq_mod - fda->equality_mods,
-            str.c_ptr(),
-            eq_mod->field->table->table->alias,
-            eq_mod->field->field->field_name);
+    if (eq_mod->field)
+    {
+      fprintf(DBUG_FILE, "  equality%d: %s -> %s.%s\n", 
+              eq_mod - fda->equality_mods,
+              str.c_ptr(),
+              eq_mod->field->table->table->alias,
+              eq_mod->field->field->field_name);
+    }
+    else
+    {
+      fprintf(DBUG_FILE, "  equality%d: multi-equality", 
+              eq_mod - fda->equality_mods);
+    }
   }
   fprintf(DBUG_FILE,"\n");