maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #02239
bzr commit into file:///home/tsk/mprog/src/5.3-mwl68-unmerged/ branch (timour:2764)
#At file:///home/tsk/mprog/src/5.3-mwl68-unmerged/ based on revid:timour@xxxxxxxxxxxx-20100222135709-3568ya6z76hkwfzs
2764 timour@xxxxxxxxxxxx 2010-02-22
MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs
This patch mainly adds sorting of all indexes for partial matching
according to their NULL selectivity. The patch also fixes a related bug
in subselect_rowid_merge_engine::test_null_row() where the wrong matched
indexes were skipped.
In addition the patch:
- adds few ::print() methods,
- renames few variables that had similar names but different purpose.
modified:
sql/item_subselect.cc
sql/item_subselect.h
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-02-22 13:57:09 +0000
+++ b/sql/item_subselect.cc 2010-02-22 15:16:55 +0000
@@ -3424,7 +3424,10 @@ void subselect_hash_sj_engine::set_strat
DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema");
if (item_in->is_top_level_item())
+ {
strategy= COMPLETE_MATCH;
+ DBUG_VOID_RETURN;
+ }
else
{
List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
@@ -3846,6 +3849,7 @@ int subselect_hash_sj_engine::exec()
thd->lex->current_select= materialize_engine->select_lex;
if ((res= materialize_join->optimize()))
goto err; /* purecov: inspected */
+ DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
materialize_join->exec();
if ((res= test(materialize_join->error || thd->is_fatal_error)))
goto err;
@@ -3952,7 +3956,7 @@ void subselect_hash_sj_engine::print(Str
lookup_engine->print(str, query_type);
else
str->append(STRING_WITH_LEN(
- "<the access method for lookups is not yet created>"
+ "<engine selected at execution time>"
));
}
@@ -3980,11 +3984,10 @@ bool subselect_hash_sj_engine::change_re
}
-Ordered_key::Ordered_key(uint key_idx_arg, TABLE *tbl_arg,
- Item *search_key_arg, ha_rows null_count_arg,
- ha_rows min_null_row_arg, ha_rows max_null_row_arg,
- uchar *row_num_to_rowid_arg)
- : key_idx(key_idx_arg), tbl(tbl_arg), search_key(search_key_arg),
+Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg,
+ ha_rows null_count_arg, ha_rows min_null_row_arg,
+ ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg)
+ : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg),
row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
{
DBUG_ASSERT(tbl->file->stats.records > null_count);
@@ -4190,6 +4193,21 @@ void Ordered_key::sort_keys()
/*
+ The probability that a certain row does not contain a NULL in some row in
+ a NULL-indexed column.
+ @retval 1 if there are no NULLs
+ @retval 0 if only NULLs
+*/
+
+double Ordered_key::null_selectivity()
+{
+ /* We should not be processing empty tables. */
+ DBUG_ASSERT(tbl->file->stats.records);
+ return (1 - (double) null_count / (double) tbl->file->stats.records);
+}
+
+
+/*
Compare the value(s) of the current key in 'search_key' with the
data of the current table record.
@@ -4307,6 +4325,34 @@ bool Ordered_key::next_same()
}
+void Ordered_key::print(String *str)
+{
+ uint i;
+ str->append("{idx=");
+ str->qs_append(keyid);
+ str->append(", (");
+ for (i= 0; i < key_column_count - 1; i++)
+ {
+ str->append(key_columns[i]->field->field_name);
+ str->append(", ");
+ }
+ str->append(key_columns[i]->field->field_name);
+ str->append("), ");
+
+ str->append("null_bitmap: (bits=");
+ str->qs_append(null_key.n_bits);
+ str->append(", nulls= ");
+ str->qs_append((double)null_count);
+ str->append(", min_null= ");
+ str->qs_append((double)min_null_row);
+ str->append(", max_null= ");
+ str->qs_append((double)max_null_row);
+ str->append("), ");
+
+ str->append('}');
+}
+
+
/*
@param non_null_key_parts
@param partial_match_key_parts A union of all single-column NULL key parts.
@@ -4323,7 +4369,7 @@ subselect_rowid_merge_engine::init(MY_BI
rownum_t cur_rownum= 0;
select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result;
- uint cur_key= 0;
+ uint cur_keyid= 0;
Item_in_subselect *item_in= (Item_in_subselect*) item;
int error;
@@ -4346,16 +4392,16 @@ subselect_rowid_merge_engine::init(MY_BI
/* Create the only non-NULL key if there is any. */
if (non_null_key_parts)
{
- non_null_key= new Ordered_key(cur_key, tmp_table, item_in->left_expr,
+ non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
0, 0, 0, row_num_to_rowid);
if (non_null_key->init(non_null_key_parts))
{
// TIMOUR: revert to partial matching via scanning
return TRUE;
}
- merge_keys[cur_key]= non_null_key;
- merge_keys[cur_key]->first();
- ++cur_key;
+ merge_keys[cur_keyid]= non_null_key;
+ merge_keys[cur_keyid]->first();
+ ++cur_keyid;
}
/*
@@ -4379,23 +4425,24 @@ subselect_rowid_merge_engine::init(MY_BI
continue;
if (result_sink->get_null_count_of_col(i) == row_count)
- bitmap_set_bit(&null_only_columns, cur_key);
+ bitmap_set_bit(&null_only_columns, cur_keyid);
else
{
- merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table,
- item_in->left_expr->element_index(i),
- result_sink->get_null_count_of_col(i),
- result_sink->get_min_null_of_col(i),
- result_sink->get_max_null_of_col(i),
- row_num_to_rowid);
- if (merge_keys[cur_key]->init(i))
+ merge_keys[cur_keyid]= new Ordered_key(
+ cur_keyid, tmp_table,
+ item_in->left_expr->element_index(i),
+ result_sink->get_null_count_of_col(i),
+ result_sink->get_min_null_of_col(i),
+ result_sink->get_max_null_of_col(i),
+ row_num_to_rowid);
+ if (merge_keys[cur_keyid]->init(i))
{
// TIMOUR: revert to partial matching via scanning
return TRUE;
}
- merge_keys[cur_key]->first();
+ merge_keys[cur_keyid]->first();
}
- ++cur_key;
+ ++cur_keyid;
}
}
@@ -4453,12 +4500,14 @@ subselect_rowid_merge_engine::init(MY_BI
tmp_table->file->ha_rnd_end();
+ /* Sort all the keys by their NULL selectivity. */
+ my_qsort(merge_keys, keys_count, sizeof(Ordered_key*),
+ (qsort_cmp) cmp_keys_by_null_selectivity);
+
/* Sort the keys in each of the indexes. */
for (uint i= 0; i < keys_count; i++)
merge_keys[i]->sort_keys();
- // TIMOUR: sort all the keys by NULL selectivity
-
if (init_queue(&pq, keys_count, 0, FALSE,
subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
{
@@ -4486,20 +4535,38 @@ void subselect_rowid_merge_engine::clean
}
+void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type)
+{
+ str->append(STRING_WITH_LEN("<rowid_merge>("));
+ for (uint i= 0; i < keys_count; i++)
+ merge_keys[i]->print(str);
+ str->append(')');
+}
+
+
/*
+ Quick sort comparison function to compare keys in order of decreasing bitmap
+ selectivity, so that the most selective keys come first.
+
+ @param k1 first key to compare
+ @param k2 second key to compare
+
+ @retval 1 if k1 is less selective than k2
+ @retval 0 if k1 is equally selective as k2
+ @retval -1 if k1 is more selective than k2
*/
int
-subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key *a,
- Ordered_key *b)
+subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1,
+ Ordered_key **k2)
{
- double a_sel= a->null_selectivity();
- double b_sel= b->null_selectivity();
- if (a_sel == b_sel)
- return 0;
- if (a_sel > b_sel)
+ double k1_sel= (*k1)->null_selectivity();
+ double k2_sel= (*k2)->null_selectivity();
+ if (k1_sel < k2_sel)
return 1;
- return -1;
+ if (k1_sel > k2_sel)
+ return -1;
+ return 0;
}
@@ -4527,17 +4594,21 @@ subselect_rowid_merge_engine::cmp_keys_b
bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
{
+ Ordered_key *cur_key;
+ uint cur_id;
for (uint i = 0; i < keys_count; i++)
{
- if (bitmap_is_set(&matching_keys, i))
+ cur_key= merge_keys[i];
+ cur_id= cur_key->get_keyid();
+ if (bitmap_is_set(&matching_keys, cur_id))
{
/*
- The key 'i' already matches a value in row 'row_num', thus we
- skip it as it can't possibly match a NULL.
+ The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num',
+ thus we skip it as it can't possibly match a NULL.
*/
continue;
}
- if (!merge_keys[i]->is_null(row_num))
+ if (!cur_key->is_null(row_num))
return FALSE;
}
return TRUE;
@@ -4583,7 +4654,7 @@ bool subselect_rowid_merge_engine::parti
if (merge_keys[i]->get_search_key(0)->is_null())
{
++count_nulls_in_search_key;
- bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_key_idx());
+ bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid());
}
else if (merge_keys[i]->lookup())
queue_insert(&pq, (uchar *) merge_keys[i]);
@@ -4610,7 +4681,7 @@ bool subselect_rowid_merge_engine::parti
min_key= (Ordered_key*) queue_remove(&pq, 0);
min_row_num= min_key->current();
bitmap_copy(&matching_keys, &null_only_columns);
- bitmap_set_bit(&matching_keys, min_key->get_key_idx());
+ bitmap_set_bit(&matching_keys, min_key->get_keyid());
bitmap_union(&matching_keys, &matching_outer_cols);
if (min_key->next_same())
queue_insert(&pq, (uchar *) min_key);
@@ -4633,7 +4704,7 @@ bool subselect_rowid_merge_engine::parti
cur_row_num= cur_key->current();
if (cur_row_num == min_row_num)
- bitmap_set_bit(&matching_keys, cur_key->get_key_idx());
+ bitmap_set_bit(&matching_keys, cur_key->get_keyid());
else
{
/* Follows from the correct use of priority queue. */
@@ -4645,7 +4716,7 @@ bool subselect_rowid_merge_engine::parti
min_key= cur_key;
min_row_num= cur_row_num;
bitmap_copy(&matching_keys, &null_only_columns);
- bitmap_set_bit(&matching_keys, min_key->get_key_idx());
+ bitmap_set_bit(&matching_keys, min_key->get_keyid());
bitmap_union(&matching_keys, &matching_outer_cols);
}
}
=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h 2010-02-19 21:55:57 +0000
+++ b/sql/item_subselect.h 2010-02-22 15:16:55 +0000
@@ -752,7 +752,7 @@ protected:
Index of the key in an array of keys. This index allows to
construct (sub)sets of keys represented by bitmaps.
*/
- uint key_idx;
+ uint keyid;
/* The table being indexed. */
TABLE *tbl;
/* The columns being indexed. */
@@ -810,7 +810,7 @@ protected:
public:
static void *operator new(size_t size) throw ()
{ return sql_alloc(size); }
- Ordered_key(uint key_idx_arg, TABLE *tbl_arg,
+ Ordered_key(uint keyid_arg, TABLE *tbl_arg,
Item *search_key_arg, ha_rows null_count_arg,
ha_rows min_null_row_arg, ha_rows max_null_row_arg,
uchar *row_num_to_rowid_arg);
@@ -822,7 +822,7 @@ public:
bool init(int col_idx);
uint get_column_count() { return key_column_count; }
- uint get_key_idx() { return key_idx; }
+ uint get_keyid() { return keyid; }
uint get_field_idx(uint i)
{
DBUG_ASSERT(i < key_column_count);
@@ -841,8 +841,7 @@ public:
}
void sort_keys();
-
- double null_selectivity() { return (1 - null_count / null_key.n_bits); }
+ double null_selectivity();
/*
Position the current element at the first row that matches the key.
@@ -896,6 +895,7 @@ public:
return FALSE;
return bitmap_is_set(&null_key, row_num);
}
+ void print(String *str);
};
@@ -957,10 +957,10 @@ protected:
bool has_covering_null_row;
protected:
/*
- Comparison function to compare keys in order of increasing bitmap
+ Comparison function to compare keys in order of decreasing bitmap
selectivity.
*/
- static int cmp_keys_by_null_selectivity(Ordered_key *a, Ordered_key *b);
+ static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2);
/*
Comparison function used by the priority queue pq, the 'smaller' key
is the one with the smaller current row number.
@@ -992,7 +992,7 @@ public:
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
void exclude() {}
table_map upper_select_const_tables() { return 0; }
- void print(String*, enum_query_type) {}
+ void print(String*, enum_query_type);
bool change_result(Item_subselect*, select_result_interceptor*)
{ DBUG_ASSERT(FALSE); return false; }
bool no_tables() { return false; }
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: timour@xxxxxxxxxxxx-20100222151655-ltjv0rlv6z2sdiiu
# target_branch: file:///home/tsk/mprog/src/5.3-mwl68-unmerged/
# testament_sha1: c045b4f1754b68ac22c0d31daf0b3d45f9556077
# timestamp: 2010-02-22 17:17:02 +0200
# base_revision_id: timour@xxxxxxxxxxxx-20100222135709-\
# 3568ya6z76hkwfzs
#
# Begin bundle
IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWY40eAQABdz/gFSQAgB5////
fue/ir////pgDOn1lzYbkss2hQC6tdwDUhQdBShQ6LYD4SmoJNGpvVTyniTaYJ6mp5TNT1PJog09
RoGgZMmnoaglCAE00mQpp6p5J7VPU8p4oynoaR6jaIMIyeSGaQSmpk0SJo01MCZNMT0mmQNDQ0AA
AaA0HGRpkxNBkyYTTIGQ0BoDTJoYATQGOMjTJiaDJkwmmQMhoDQGmTQwAmgMJEiAEMgI0ZNGqT2U
0wT00NTRMQZNAGhpraJhMzCG2twXPm4hih27+DJtrYLd+9Xci8iWF48UkwWWNWPap7ef/WXPuhrd
pOYbJyu3OSCjl33ZU5qJvlPXjXi7e6uGbHx+527cyo0mN72EqoufBwjxyhhxn35V7VodbUn6MbYl
PU/zaAhpCpKsbIemJU2S8I3VQ+EWdSboznJnSgOHM1fxnaJRrpuTzCIIXRAFQZHYDnMzHBXEu0ew
BAa0aGUvHw/kImF93kxbJRZ3JQHCyfYutzk/fck61urMyO03hJDMyQE1tI7hmOfXl186l7Q+HnOk
KVU5MDn8xzpfEToPgg683jK6zYLGosTWDEmwUIoiIvV4YdF6UB5xPZPSzgsSmbtS5I6yYrvCSlQI
hQq9XUkTsG58d3Nz7Yju0Ypqu4LPkdptTGOEDA6iWYhgr5Y8dST1J2QofQQ8qnhDLUIGirzVri7W
TKBSH3saYOpcqEP/dpgrZe4FN80TGtTL7+r5L6J6ZZ24JXQg/2fwgZ7n82mpAKwprx6F1tKK+Xky
OK4nsdcyC0lRX0tIprBtB9hXzXSwwy9PVg9NDJSxLPwbOzXryHGc2Fp1SkIEX0jrkFB8/NwvSZrp
RcxFRzUfIx0upQyGjbQgdbi76SY2+G/YycUZo39zvPTT4nTjHXoWgbXIcIhL4xR9XGhE2kPsYsLo
9+73Xrpo4S9e+wk0elncJopjjJEScQUJndKYTJssZ850KAkIpTWi1dB5gWlrxQ9FN6nJk2uPl7kQ
IfZ09iA48fJIOOcmXNBhfTBaM5HFWkzTirsytaMLjZR5svIL9gZ38GsMk3CWw0nx+UjTjbolBmgN
eMGRRNj0ld0nxVFJbbm2JaGopHwPEggniHSYRE/4VHDJkHejN2mY6SG47PR6/BwecD0a+cNDu4NP
GXl3E7D5imkqDpDd0MXkj2F1VhdSl1/LfEktLSCh7GIpTO4Upl4aJiwzJhndgJ8BZ6sEk7FkYH7E
a6cMCBNbGHX6AMHGYSjGsHY5kcU0nLaIjeZlxogkoHSpJS3mJB2rHhNd9FOBkE/uSxWCZSkgRMDY
aJT+dHbRS9tME6A112rUtJ7EvRKgYy0YanS7scd05qkbiA4KRDRwqglN74FdwPIgkASESI0i0+8e
jxVu9C6QC5+4HdNC82pS+bCrLhCBgaVqI6Y9DIcaA8GhaMT3kOhR5LMVHDhRSI4cSJm44jiJxMj9
ylQpw3br1dCwPSaTS4SHAoaNpbExzabE5Qgh9DRVXBNqwGiMHmxPQc7kmEu+szGWqpBzOc42Hgfy
nAJh9u54GnnBY66DHaMxtmzjQCjsxUiC1KEYQQhNajGx5MQXXJbPq6CzrVWIkZKyMjhrFxgiuJWA
dIqlU+7ELTQ6wiOFGHHYZFj1Zg47TtA3hPE2ZNmVsnZ3GBw6vR7D1eQf0nQkeXYHaXCHkNCYpvLG
/jxJT4HDku/Xi/i9icl+ChF8+x5FRzLApmT0GK+Uym4UaopteZsOO65mEJyojM/OFZab3hetH6MY
KJGKsxmLcImTshbCwIjDh52/F4SpHw2V7g3kyzSEtoNTzrUimdoe5U6IHDgG0e53tKKGpQz9bmKh
qmnsNf0j1wobiQ7KakRkKXkS1zXd4zjztjK0cn0jMPiX2xKqRTlInOPWhIsEqEvM2kbQhIX6Opun
ROLs1lRbDZN3tTLZCPjVfVZ6NJZrAOTnfFWoge/P7T5Xod7sOS+7PI1OBOSgu2BjEZf7gblprAFe
R4TF4YylIkhHeeF9xFiGPjcud7nnogmH4JZ+Zs9wrJIjO1iWRRqt8x+PGzcqatki7pwOZFImkfMs
YwK00W0Vg2R6rZtRDgd6zBfQy2eBVXhSj7itfYvYeQ3QlVoRTcxXCkMktovI+JgLSfSZioPkeZSq
j7vtzHB89FWdZioxN6KzL7kXBWfeyVJ0JVXH4zixB4pH0Jpmon0bcjJMMzAqBZoUiZ+oldZNkKhM
CxmbWyA8MDeMZuOH4F+OKIH7v0l6oc58jMLpQVhv03maj5tBgrlxQfY0DaEWda5mBkczUM3HZf+9
aZZ55Wqb5LN9tUwJVtZedpANg0uTygkkwVwxKZqZ/z+kv0nC/1j9rE55gUhBR5x5YV1QGZDIEkEU
RN8lvppDtLCYS9JSci1ENIGhMLS5YHSQ0KsNR+zG02cCblfJNcY6iBEpJV3kl6nHp4XGchEcn5Xt
psz26nSczqwp889Aqo5my7riLmoz8WExEGsirNzJnDgvpksExLkY5Z0xs3lIC95RWRpzau4vtQvy
f9S4uMBT4BAC8syCnupvSD+3sVk1s61AqjH0Ea1K8z8XNC3WiVBjs0B2husqI56oeRuigsujJXpU
oJSEGHUPPkNayxG5JIK8EZ0w6nHgjlytKSnIcpjEVWRYCwbgfK8QpCkRmKgOEdZTvPGrm8xNMy/x
7Qr7Wg60NoRPaSRsm2CaHFecDuJBpM5zPRx7Rv3p9w0axfrg3STGvvY23slC1wlPsCdZ2YnzJ9Yk
FxB5e5EiWsiT8UbqGrojb1V91eYMAPB6WiGmgaIaYfrYGq8+JYebR2BzYFh6bEHvsWlpKs7mm92g
rF6kR9FFHYD9vQrlbDnod0EvTkBETp8bQlZBVVEweBsEEv689e9AzKTWjjFXr2DZuNlSRu9twjkB
qEsaKNQ2+Z8CwqA7FisQ81vIRCauHrQxoQ5kJQ4YQMUQKABjReAYm0OtHQaIfNPPMxIKw0llJSUK
EkTlwGE5DENKAFvHIwncikEmXVxJRuAlNG8DVqLXU6S6wYxiZdrQWKA/6hdlX3NcohisQ4dnxszT
Vc5nt7KwUVKZNyf1GeqHpO4SqKRLbg1rKxepbDX1nR03XUjb/KxXk5ikiUGMke2IUkxp6isL/Gde
kiaQVCPJqVRAEMCMAzM3hBVJyOSnHxNDCRLUQFURgZFoyOUUCWdd9vQ5cGHFo2z5ElDLBlqz3cek
O4ctBBiHN996vvWnvDiIvaP/PRIaFnHZP6r5Fs84l7ihHVTpWSIKlV2CVC/PMHwrTKc5s1oNhclQ
JaM6EdhpneltJybTQzH+BAQeHh1BMmGiF9ayBGQd4rYli6IHU8of0MiSr8KmMC5SfkTMGO4PQYN5
rPSWGdG3nu4yzvlkX7h1DlwVzSTaJnQ2JNgiaKC1qD9EJqChqIPZwO0khYZiKgPrUvSzkjh4yOLU
sKwTQrBKGrcQyFM+0ClcDUH4FPIdfC2QYKEfLfq8pmLFf3nUGgNOlkDO/tJSY252TcBa7jlyMwop
TPMY1vvqowNhbqJqOqITsTGvBDRaUO06QM9y3bdkaxdyCEQk13xnaNpJQplz1kSWCBk1MSzBlIoH
iP2ri2+8wWT+mGoKe8zoyNLmfQ0DYsRc4K1V0Glu7HBzlKDarRI1U28qPhdiIEFL0aI4RnkUMV3R
2dgmkU8ToRTQxCkgdsq7HIuCUs3qwB8GrzfYNIjAsfh6zZlYylejqubXWQDKLsNA7IEswE0FqW02
I2ppI9YRRWOC1hGSCWeSRMSlWpAXnHFBbcg54ZqQ5f/mHU2WGZIXXaoFYiR20JffTjIS1gjeGrhn
ooPI8wMAMLEaDYGhUfHoQTEc6R3zTG169sCoPEHAqyNuoq8ydKs6eAQsGG0aNYxaC51ZRJwbKFAm
DbaGNiU4ShqbJrCgPxD9xQgVS3QJD5gs2bAYvVRT0IyNyfOFCVnF3JFOFCQjjR4BAA==