maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #02241
Rev 2764: MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs in file:///home/tsk/mprog/src/5.3-mwl68-unmerged/
At file:///home/tsk/mprog/src/5.3-mwl68-unmerged/
------------------------------------------------------------
revno: 2764
revision-id: timour@xxxxxxxxxxxx-20100222151655-ltjv0rlv6z2sdiiu
parent: timour@xxxxxxxxxxxx-20100222135709-3568ya6z76hkwfzs
committer: timour@xxxxxxxxxxxx
branch nick: 5.3-mwl68-unmerged
timestamp: Mon 2010-02-22 17:16:55 +0200
message:
MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs
This patch mainly adds sorting of all indexes for partial matching
according to their NULL selectivity. The patch also fixes a related bug
in subselect_rowid_merge_engine::test_null_row() where the wrong matched
indexes were skipped.
In addition the patch:
- adds few ::print() methods,
- renames few variables that had similar names but different purpose.
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-02-22 13:57:09 +0000
+++ b/sql/item_subselect.cc 2010-02-22 15:16:55 +0000
@@ -3424,7 +3424,10 @@
DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema");
if (item_in->is_top_level_item())
+ {
strategy= COMPLETE_MATCH;
+ DBUG_VOID_RETURN;
+ }
else
{
List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
@@ -3846,6 +3849,7 @@
thd->lex->current_select= materialize_engine->select_lex;
if ((res= materialize_join->optimize()))
goto err; /* purecov: inspected */
+ DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
materialize_join->exec();
if ((res= test(materialize_join->error || thd->is_fatal_error)))
goto err;
@@ -3952,7 +3956,7 @@
lookup_engine->print(str, query_type);
else
str->append(STRING_WITH_LEN(
- "<the access method for lookups is not yet created>"
+ "<engine selected at execution time>"
));
}
@@ -3980,11 +3984,10 @@
}
-Ordered_key::Ordered_key(uint key_idx_arg, TABLE *tbl_arg,
- Item *search_key_arg, ha_rows null_count_arg,
- ha_rows min_null_row_arg, ha_rows max_null_row_arg,
- uchar *row_num_to_rowid_arg)
- : key_idx(key_idx_arg), tbl(tbl_arg), search_key(search_key_arg),
+Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
+ ha_rows null_count_arg, ha_rows min_null_row_arg,
+ ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg)
+ : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg),
row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
{
DBUG_ASSERT(tbl->file->stats.records > null_count);
@@ -4190,6 +4193,21 @@
/*
+ The probability that a certain row does not contain a NULL in some row in
+ a NULL-indexed column.
+ @retval 1 if there are no NULLs
+ @retval 0 if only NULLs
+*/
+
+double Ordered_key::null_selectivity()
+{
+ /* We should not be processing empty tables. */
+ DBUG_ASSERT(tbl->file->stats.records);
+ return (1 - (double) null_count / (double) tbl->file->stats.records);
+}
+
+
+/*
Compare the value(s) of the current key in 'search_key' with the
data of the current table record.
@@ -4307,6 +4325,34 @@
}
+void Ordered_key::print(String *str)
+{
+ uint i;
+ str->append("{idx=");
+ str->qs_append(keyid);
+ str->append(", (");
+ for (i= 0; i < key_column_count - 1; i++)
+ {
+ str->append(key_columns[i]->field->field_name);
+ str->append(", ");
+ }
+ str->append(key_columns[i]->field->field_name);
+ str->append("), ");
+
+ str->append("null_bitmap: (bits=");
+ str->qs_append(null_key.n_bits);
+ str->append(", nulls= ");
+ str->qs_append((double)null_count);
+ str->append(", min_null= ");
+ str->qs_append((double)min_null_row);
+ str->append(", max_null= ");
+ str->qs_append((double)max_null_row);
+ str->append("), ");
+
+ str->append('}');
+}
+
+
/*
@param non_null_key_parts
@param partial_match_key_parts A union of all single-column NULL key parts.
@@ -4323,7 +4369,7 @@
rownum_t cur_rownum= 0;
select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result;
- uint cur_key= 0;
+ uint cur_keyid= 0;
Item_in_subselect *item_in= (Item_in_subselect*) item;
int error;
@@ -4346,16 +4392,16 @@
/* Create the only non-NULL key if there is any. */
if (non_null_key_parts)
{
- non_null_key= new Ordered_key(cur_key, tmp_table, item_in->left_expr,
+ non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
0, 0, 0, row_num_to_rowid);
if (non_null_key->init(non_null_key_parts))
{
// TIMOUR: revert to partial matching via scanning
return TRUE;
}
- merge_keys[cur_key]= non_null_key;
- merge_keys[cur_key]->first();
- ++cur_key;
+ merge_keys[cur_keyid]= non_null_key;
+ merge_keys[cur_keyid]->first();
+ ++cur_keyid;
}
/*
@@ -4379,23 +4425,24 @@
continue;
if (result_sink->get_null_count_of_col(i) == row_count)
- bitmap_set_bit(&null_only_columns, cur_key);
+ bitmap_set_bit(&null_only_columns, cur_keyid);
else
{
- merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table,
- item_in->left_expr->element_index(i),
- result_sink->get_null_count_of_col(i),
- result_sink->get_min_null_of_col(i),
- result_sink->get_max_null_of_col(i),
- row_num_to_rowid);
- if (merge_keys[cur_key]->init(i))
+ merge_keys[cur_keyid]= new Ordered_key(
+ cur_keyid, tmp_table,
+ item_in->left_expr->element_index(i),
+ result_sink->get_null_count_of_col(i),
+ result_sink->get_min_null_of_col(i),
+ result_sink->get_max_null_of_col(i),
+ row_num_to_rowid);
+ if (merge_keys[cur_keyid]->init(i))
{
// TIMOUR: revert to partial matching via scanning
return TRUE;
}
- merge_keys[cur_key]->first();
+ merge_keys[cur_keyid]->first();
}
- ++cur_key;
+ ++cur_keyid;
}
}
@@ -4453,12 +4500,14 @@
tmp_table->file->ha_rnd_end();
+ /* Sort all the keys by their NULL selectivity. */
+ my_qsort(merge_keys, keys_count, sizeof(Ordered_key*),
+ (qsort_cmp) cmp_keys_by_null_selectivity);
+
/* Sort the keys in each of the indexes. */
for (uint i= 0; i < keys_count; i++)
merge_keys[i]->sort_keys();
- // TIMOUR: sort all the keys by NULL selectivity
-
if (init_queue(&pq, keys_count, 0, FALSE,
subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
{
@@ -4486,20 +4535,38 @@
}
+void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type)
+{
+ str->append(STRING_WITH_LEN("<rowid_merge>("));
+ for (uint i= 0; i < keys_count; i++)
+ merge_keys[i]->print(str);
+ str->append(')');
+}
+
+
/*
+ Quick sort comparison function to compare keys in order of decreasing bitmap
+ selectivity, so that the most selective keys come first.
+
+ @param k1 first key to compare
+ @param k2 second key to compare
+
+ @retval 1 if k1 is less selective than k2
+ @retval 0 if k1 is equally selective as k2
+ @retval -1 if k1 is more selective than k2
*/
int
-subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key *a,
- Ordered_key *b)
+subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1,
+ Ordered_key **k2)
{
- double a_sel= a->null_selectivity();
- double b_sel= b->null_selectivity();
- if (a_sel == b_sel)
- return 0;
- if (a_sel > b_sel)
+ double k1_sel= (*k1)->null_selectivity();
+ double k2_sel= (*k2)->null_selectivity();
+ if (k1_sel < k2_sel)
return 1;
- return -1;
+ if (k1_sel > k2_sel)
+ return -1;
+ return 0;
}
@@ -4527,17 +4594,21 @@
bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
{
+ Ordered_key *cur_key;
+ uint cur_id;
for (uint i = 0; i < keys_count; i++)
{
- if (bitmap_is_set(&matching_keys, i))
+ cur_key= merge_keys[i];
+ cur_id= cur_key->get_keyid();
+ if (bitmap_is_set(&matching_keys, cur_id))
{
/*
- The key 'i' already matches a value in row 'row_num', thus we
- skip it as it can't possibly match a NULL.
+ The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num',
+ thus we skip it as it can't possibly match a NULL.
*/
continue;
}
- if (!merge_keys[i]->is_null(row_num))
+ if (!cur_key->is_null(row_num))
return FALSE;
}
return TRUE;
@@ -4583,7 +4654,7 @@
if (merge_keys[i]->get_search_key(0)->is_null())
{
++count_nulls_in_search_key;
- bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_key_idx());
+ bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid());
}
else if (merge_keys[i]->lookup())
queue_insert(&pq, (uchar *) merge_keys[i]);
@@ -4610,7 +4681,7 @@
min_key= (Ordered_key*) queue_remove(&pq, 0);
min_row_num= min_key->current();
bitmap_copy(&matching_keys, &null_only_columns);
- bitmap_set_bit(&matching_keys, min_key->get_key_idx());
+ bitmap_set_bit(&matching_keys, min_key->get_keyid());
bitmap_union(&matching_keys, &matching_outer_cols);
if (min_key->next_same())
queue_insert(&pq, (uchar *) min_key);
@@ -4633,7 +4704,7 @@
cur_row_num= cur_key->current();
if (cur_row_num == min_row_num)
- bitmap_set_bit(&matching_keys, cur_key->get_key_idx());
+ bitmap_set_bit(&matching_keys, cur_key->get_keyid());
else
{
/* Follows from the correct use of priority queue. */
@@ -4645,7 +4716,7 @@
min_key= cur_key;
min_row_num= cur_row_num;
bitmap_copy(&matching_keys, &null_only_columns);
- bitmap_set_bit(&matching_keys, min_key->get_key_idx());
+ bitmap_set_bit(&matching_keys, min_key->get_keyid());
bitmap_union(&matching_keys, &matching_outer_cols);
}
}
=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h 2010-02-19 21:55:57 +0000
+++ b/sql/item_subselect.h 2010-02-22 15:16:55 +0000
@@ -752,7 +752,7 @@
Index of the key in an array of keys. This index allows to
construct (sub)sets of keys represented by bitmaps.
*/
- uint key_idx;
+ uint keyid;
/* The table being indexed. */
TABLE *tbl;
/* The columns being indexed. */
@@ -810,7 +810,7 @@
public:
static void *operator new(size_t size) throw ()
{ return sql_alloc(size); }
- Ordered_key(uint key_idx_arg, TABLE *tbl_arg,
+ Ordered_key(uint keyid_arg, TABLE *tbl_arg,
Item *search_key_arg, ha_rows null_count_arg,
ha_rows min_null_row_arg, ha_rows max_null_row_arg,
uchar *row_num_to_rowid_arg);
@@ -822,7 +822,7 @@
bool init(int col_idx);
uint get_column_count() { return key_column_count; }
- uint get_key_idx() { return key_idx; }
+ uint get_keyid() { return keyid; }
uint get_field_idx(uint i)
{
DBUG_ASSERT(i < key_column_count);
@@ -841,8 +841,7 @@
}
void sort_keys();
-
- double null_selectivity() { return (1 - null_count / null_key.n_bits); }
+ double null_selectivity();
/*
Position the current element at the first row that matches the key.
@@ -896,6 +895,7 @@
return FALSE;
return bitmap_is_set(&null_key, row_num);
}
+ void print(String *str);
};
@@ -957,10 +957,10 @@
bool has_covering_null_row;
protected:
/*
- Comparison function to compare keys in order of increasing bitmap
+ Comparison function to compare keys in order of decreasing bitmap
selectivity.
*/
- static int cmp_keys_by_null_selectivity(Ordered_key *a, Ordered_key *b);
+ static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2);
/*
Comparison function used by the priority queue pq, the 'smaller' key
is the one with the smaller current row number.
@@ -992,7 +992,7 @@
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
void exclude() {}
table_map upper_select_const_tables() { return 0; }
- void print(String*, enum_query_type) {}
+ void print(String*, enum_query_type);
bool change_result(Item_subselect*, select_result_interceptor*)
{ DBUG_ASSERT(FALSE); return false; }
bool no_tables() { return false; }