← Back to team overview

maria-developers team mailing list archive

Rev 2764: MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs in file:///home/tsk/mprog/src/5.3-mwl68-unmerged/

 

At file:///home/tsk/mprog/src/5.3-mwl68-unmerged/

------------------------------------------------------------
revno: 2764
revision-id: timour@xxxxxxxxxxxx-20100222151655-ltjv0rlv6z2sdiiu
parent: timour@xxxxxxxxxxxx-20100222135709-3568ya6z76hkwfzs
committer: timour@xxxxxxxxxxxx
branch nick: 5.3-mwl68-unmerged
timestamp: Mon 2010-02-22 17:16:55 +0200
message:
  MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs
  
  This patch mainly adds sorting of all indexes for partial matching
  according to their NULL selectivity. The patch also fixes a related bug
  in subselect_rowid_merge_engine::test_null_row() where the wrong matched
  indexes were skipped.
  
  In addition the patch:
  - adds few ::print() methods,
  - renames few variables that had similar names but different purpose.
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc	2010-02-22 13:57:09 +0000
+++ b/sql/item_subselect.cc	2010-02-22 15:16:55 +0000
@@ -3424,7 +3424,10 @@
   DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema");
 
   if (item_in->is_top_level_item())
+  {
     strategy= COMPLETE_MATCH;
+    DBUG_VOID_RETURN;
+  }
   else
   {
     List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
@@ -3846,6 +3849,7 @@
   thd->lex->current_select= materialize_engine->select_lex;
   if ((res= materialize_join->optimize()))
     goto err; /* purecov: inspected */
+  DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
   materialize_join->exec();
   if ((res= test(materialize_join->error || thd->is_fatal_error)))
     goto err;
@@ -3952,7 +3956,7 @@
     lookup_engine->print(str, query_type);
   else
     str->append(STRING_WITH_LEN(
-           "<the access method for lookups is not yet created>"
+           "<engine selected at execution time>"
          ));
 }
 
@@ -3980,11 +3984,10 @@
 }
 
 
-Ordered_key::Ordered_key(uint key_idx_arg, TABLE *tbl_arg,
-                         Item *search_key_arg, ha_rows null_count_arg,
-                         ha_rows min_null_row_arg, ha_rows max_null_row_arg,
-                         uchar *row_num_to_rowid_arg)
-  : key_idx(key_idx_arg), tbl(tbl_arg), search_key(search_key_arg),
+Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
+                         ha_rows null_count_arg, ha_rows min_null_row_arg,
+                         ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg)
+  : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg),
     row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
 {
   DBUG_ASSERT(tbl->file->stats.records > null_count);
@@ -4190,6 +4193,21 @@
 
 
 /*
+  The probability that a certain row does not contain a NULL in some row in
+  a NULL-indexed column.
+  @retval  1  if there are no NULLs
+  @retval  0  if only NULLs
+*/
+
+double Ordered_key::null_selectivity()
+{
+  /* We should not be processing empty tables. */
+  DBUG_ASSERT(tbl->file->stats.records);
+  return (1 - (double) null_count / (double) tbl->file->stats.records);
+}
+
+
+/*
   Compare the value(s) of the current key in 'search_key' with the
   data of the current table record.
 
@@ -4307,6 +4325,34 @@
 }
 
 
+void Ordered_key::print(String *str)
+{
+  uint i;
+  str->append("{idx=");
+  str->qs_append(keyid);
+  str->append(", (");
+  for (i= 0; i < key_column_count - 1; i++)
+  {
+    str->append(key_columns[i]->field->field_name);
+    str->append(", ");
+  }
+  str->append(key_columns[i]->field->field_name);
+  str->append("), ");
+
+  str->append("null_bitmap: (bits=");
+  str->qs_append(null_key.n_bits);
+  str->append(", nulls= ");
+  str->qs_append((double)null_count);
+  str->append(", min_null= ");
+  str->qs_append((double)min_null_row);
+  str->append(", max_null= ");
+  str->qs_append((double)max_null_row);
+  str->append("), ");
+
+  str->append('}');
+}
+
+
 /*
   @param non_null_key_parts  
   @param partial_match_key_parts  A union of all single-column NULL key parts.
@@ -4323,7 +4369,7 @@
   rownum_t cur_rownum= 0;
   select_materialize_with_stats *result_sink=
     (select_materialize_with_stats *) result;
-  uint cur_key= 0;
+  uint cur_keyid= 0;
   Item_in_subselect *item_in= (Item_in_subselect*) item;
   int error;
 
@@ -4346,16 +4392,16 @@
   /* Create the only non-NULL key if there is any. */
   if (non_null_key_parts)
   {
-    non_null_key= new Ordered_key(cur_key, tmp_table, item_in->left_expr,
+    non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
                                   0, 0, 0, row_num_to_rowid);
     if (non_null_key->init(non_null_key_parts))
     {
       // TIMOUR: revert to partial matching via scanning
       return TRUE;
     }
-    merge_keys[cur_key]= non_null_key;
-    merge_keys[cur_key]->first();
-    ++cur_key;
+    merge_keys[cur_keyid]= non_null_key;
+    merge_keys[cur_keyid]->first();
+    ++cur_keyid;
   }
 
   /*
@@ -4379,23 +4425,24 @@
         continue;
 
       if (result_sink->get_null_count_of_col(i) == row_count)
-        bitmap_set_bit(&null_only_columns, cur_key);
+        bitmap_set_bit(&null_only_columns, cur_keyid);
       else
       {
-        merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table,
-                                             item_in->left_expr->element_index(i),
-                                             result_sink->get_null_count_of_col(i),
-                                             result_sink->get_min_null_of_col(i),
-                                             result_sink->get_max_null_of_col(i),
-                                             row_num_to_rowid);
-        if (merge_keys[cur_key]->init(i))
+        merge_keys[cur_keyid]= new Ordered_key(
+                                     cur_keyid, tmp_table,
+                                     item_in->left_expr->element_index(i),
+                                     result_sink->get_null_count_of_col(i),
+                                     result_sink->get_min_null_of_col(i),
+                                     result_sink->get_max_null_of_col(i),
+                                     row_num_to_rowid);
+        if (merge_keys[cur_keyid]->init(i))
         {
           // TIMOUR: revert to partial matching via scanning
           return TRUE;
         }
-        merge_keys[cur_key]->first();
+        merge_keys[cur_keyid]->first();
       }
-      ++cur_key;
+      ++cur_keyid;
     }
   }
 
@@ -4453,12 +4500,14 @@
 
   tmp_table->file->ha_rnd_end();
 
+  /* Sort all the keys by their NULL selectivity. */
+  my_qsort(merge_keys, keys_count, sizeof(Ordered_key*),
+           (qsort_cmp) cmp_keys_by_null_selectivity);
+
   /* Sort the keys in each of the indexes. */
   for (uint i= 0; i < keys_count; i++)
     merge_keys[i]->sort_keys();
 
-  // TIMOUR: sort all the keys by NULL selectivity
-
   if (init_queue(&pq, keys_count, 0, FALSE,
                  subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
   {
@@ -4486,20 +4535,38 @@
 }
 
 
+void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type)
+{
+  str->append(STRING_WITH_LEN("<rowid_merge>("));
+  for (uint i= 0; i < keys_count; i++)
+    merge_keys[i]->print(str);
+  str->append(')');
+}
+
+
 /*
+  Quick sort comparison function to compare keys in order of decreasing bitmap
+  selectivity, so that the most selective keys come first.
+
+  @param  k1 first key to compare
+  @param  k2 second key to compare
+
+  @retval  1  if k1 is less selective than k2
+  @retval  0  if k1 is equally selective as k2
+  @retval -1  if k1 is more selective than k2
 */
 
 int
-subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key *a,
-                                                           Ordered_key *b)
+subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1,
+                                                           Ordered_key **k2)
 {
-  double a_sel= a->null_selectivity();
-  double b_sel= b->null_selectivity();
-  if (a_sel == b_sel)
-    return 0;
-  if (a_sel > b_sel)
+  double k1_sel= (*k1)->null_selectivity();
+  double k2_sel= (*k2)->null_selectivity();
+  if (k1_sel < k2_sel)
     return 1;
-  return -1;
+  if (k1_sel > k2_sel)
+    return -1;
+  return 0;
 }
 
 
@@ -4527,17 +4594,21 @@
 
 bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
 {
+  Ordered_key *cur_key;
+  uint cur_id;
   for (uint i = 0; i < keys_count; i++)
   {
-    if (bitmap_is_set(&matching_keys, i))
+    cur_key= merge_keys[i];
+    cur_id= cur_key->get_keyid();
+    if (bitmap_is_set(&matching_keys, cur_id))
     {
       /*
-        The key 'i' already matches a value in row 'row_num', thus we
-        skip it as it can't possibly match a NULL.
+        The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num',
+        thus we skip it as it can't possibly match a NULL.
       */
       continue;
     }
-    if (!merge_keys[i]->is_null(row_num))
+    if (!cur_key->is_null(row_num))
       return FALSE;
   }
   return TRUE;
@@ -4583,7 +4654,7 @@
     if (merge_keys[i]->get_search_key(0)->is_null())
     {
       ++count_nulls_in_search_key;
-      bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_key_idx());
+      bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid());
     }
     else if (merge_keys[i]->lookup())
       queue_insert(&pq, (uchar *) merge_keys[i]);
@@ -4610,7 +4681,7 @@
   min_key= (Ordered_key*) queue_remove(&pq, 0);
   min_row_num= min_key->current();
   bitmap_copy(&matching_keys, &null_only_columns);
-  bitmap_set_bit(&matching_keys, min_key->get_key_idx());
+  bitmap_set_bit(&matching_keys, min_key->get_keyid());
   bitmap_union(&matching_keys, &matching_outer_cols);
   if (min_key->next_same())
     queue_insert(&pq, (uchar *) min_key);
@@ -4633,7 +4704,7 @@
     cur_row_num= cur_key->current();
 
     if (cur_row_num == min_row_num)
-      bitmap_set_bit(&matching_keys, cur_key->get_key_idx());
+      bitmap_set_bit(&matching_keys, cur_key->get_keyid());
     else
     {
       /* Follows from the correct use of priority queue. */
@@ -4645,7 +4716,7 @@
         min_key= cur_key;
         min_row_num= cur_row_num;
         bitmap_copy(&matching_keys, &null_only_columns);
-        bitmap_set_bit(&matching_keys, min_key->get_key_idx());
+        bitmap_set_bit(&matching_keys, min_key->get_keyid());
         bitmap_union(&matching_keys, &matching_outer_cols);
       }
     }

=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h	2010-02-19 21:55:57 +0000
+++ b/sql/item_subselect.h	2010-02-22 15:16:55 +0000
@@ -752,7 +752,7 @@
     Index of the key in an array of keys. This index allows to
     construct (sub)sets of keys represented by bitmaps.
   */
-  uint key_idx;
+  uint keyid;
   /* The table being indexed. */
   TABLE *tbl;
   /* The columns being indexed. */
@@ -810,7 +810,7 @@
 public:
   static void *operator new(size_t size) throw ()
   { return sql_alloc(size); }
-  Ordered_key(uint key_idx_arg, TABLE *tbl_arg,
+  Ordered_key(uint keyid_arg, TABLE *tbl_arg,
               Item *search_key_arg, ha_rows null_count_arg,
               ha_rows min_null_row_arg, ha_rows max_null_row_arg,
               uchar *row_num_to_rowid_arg);
@@ -822,7 +822,7 @@
   bool init(int col_idx);
 
   uint get_column_count() { return key_column_count; }
-  uint get_key_idx() { return key_idx; }
+  uint get_keyid() { return keyid; }
   uint get_field_idx(uint i)
   {
     DBUG_ASSERT(i < key_column_count);
@@ -841,8 +841,7 @@
   }
 
   void sort_keys();
-
-  double null_selectivity() { return (1 - null_count / null_key.n_bits); }
+  double null_selectivity();
 
   /*
     Position the current element at the first row that matches the key.
@@ -896,6 +895,7 @@
       return FALSE;
     return bitmap_is_set(&null_key, row_num);
   }
+  void print(String *str);
 };
 
 
@@ -957,10 +957,10 @@
   bool has_covering_null_row;
 protected:
   /*
-    Comparison function to compare keys in order of increasing bitmap
+    Comparison function to compare keys in order of decreasing bitmap
     selectivity.
   */
-  static int cmp_keys_by_null_selectivity(Ordered_key *a, Ordered_key *b);
+  static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2);
   /*
     Comparison function used by the priority queue pq, the 'smaller' key
     is the one with the smaller current row number.
@@ -992,7 +992,7 @@
   uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
   void exclude() {}
   table_map upper_select_const_tables() { return 0; }
-  void print(String*, enum_query_type) {}
+  void print(String*, enum_query_type);
   bool change_result(Item_subselect*, select_result_interceptor*)
   { DBUG_ASSERT(FALSE); return false; }
   bool no_tables() { return false; }