maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #03637
MWL#89 "Cost-based choice between Materialization and IN->EXISTS" rev1 for review
Igor,
Please find attached a combined patch for the first implementation of:
MWL#89 Cost-based choice between Materialization and IN->EXISTS
The tree is in Lanunchpad under buildbot:
https://code.launchpad.net/~maria-captains/maria/5.3-mwl89
Timour
=== modified file 'mysql-test/r/subselect3.result'
--- mysql-test/r/subselect3.result 2010-07-10 10:37:30 +0000
+++ mysql-test/r/subselect3.result 2010-10-04 15:26:55 +0000
@@ -840,11 +840,16 @@
1 0 0
2 0 0
11 0 0
-# 2nd and 3rd columns should be same for x == 11 only
+# 2nd and 3rd columns should be same
+EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 3
+3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
+2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
-1 0 1
-2 0 1
+1 0 0
+2 0 0
11 1 1
DROP TABLE t1;
# both columns should be same
=== modified file 'mysql-test/r/subselect3_jcl6.result'
--- mysql-test/r/subselect3_jcl6.result 2010-07-10 10:37:30 +0000
+++ mysql-test/r/subselect3_jcl6.result 2010-10-04 15:26:55 +0000
@@ -844,11 +844,16 @@
1 0 0
2 0 0
11 0 0
-# 2nd and 3rd columns should be same for x == 11 only
+# 2nd and 3rd columns should be same
+EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 3
+3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
+2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
-1 0 1
-2 0 1
+1 0 0
+2 0 0
11 1 1
DROP TABLE t1;
# both columns should be same
=== modified file 'mysql-test/r/subselect_mat.result'
--- mysql-test/r/subselect_mat.result 2010-06-26 10:05:41 +0000
+++ mysql-test/r/subselect_mat.result 2010-10-05 13:06:06 +0000
@@ -1246,3 +1246,29 @@
4
set session optimizer_switch=@save_optimizer_switch;
drop table t1, t2, t3;
+create table t0 (a int);
+insert into t0 values (0),(1),(2);
+create table t1 (a int);
+insert into t1 values (0),(1),(2);
+explain select a, a in (select a from t1) from t0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t0 ALL NULL NULL NULL NULL 3
+2 SUBQUERY t1 ALL NULL NULL NULL NULL 3
+select a, a in (select a from t1) from t0;
+a a in (select a from t1)
+0 1
+1 1
+2 1
+prepare s from 'select a, a in (select a from t1) from t0';
+execute s;
+a a in (select a from t1)
+0 1
+1 1
+2 1
+update t1 set a=123;
+execute s;
+a a in (select a from t1)
+0 0
+1 0
+2 0
+drop table t0, t1;
=== modified file 'mysql-test/t/subselect3.test'
--- mysql-test/t/subselect3.test 2010-07-10 10:37:30 +0000
+++ mysql-test/t/subselect3.test 2010-10-04 15:26:55 +0000
@@ -681,7 +681,8 @@
# The x alias is used below to workaround bug #40674.
# Regression tests for sum function on outer column in subselect from dual:
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1;
---echo # 2nd and 3rd columns should be same for x == 11 only
+--echo # 2nd and 3rd columns should be same
+EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
DROP TABLE t1;
=== modified file 'mysql-test/t/subselect_mat.test'
--- mysql-test/t/subselect_mat.test 2010-03-13 20:04:52 +0000
+++ mysql-test/t/subselect_mat.test 2010-10-04 15:26:55 +0000
@@ -905,3 +905,19 @@
set session optimizer_switch=@save_optimizer_switch;
drop table t1, t2, t3;
+#
+# Test that the contents of the temp table of a materialized subquery is
+# cleaned up between PS re-executions.
+#
+
+create table t0 (a int);
+insert into t0 values (0),(1),(2);
+create table t1 (a int);
+insert into t1 values (0),(1),(2);
+explain select a, a in (select a from t1) from t0;
+select a, a in (select a from t1) from t0;
+prepare s from 'select a, a in (select a from t1) from t0';
+execute s;
+update t1 set a=123;
+execute s;
+drop table t0, t1;
=== modified file 'sql/item.h'
--- sql/item.h 2010-07-10 10:37:30 +0000
+++ sql/item.h 2010-10-04 15:26:55 +0000
@@ -947,6 +947,8 @@
virtual bool register_field_in_read_map(uchar *arg) { return 0; }
virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; }
virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; }
+ virtual bool eliminate_subselect_processor(uchar *arg) { return 0; }
+ virtual bool set_fake_select_as_master_processor(uchar *arg) { return 0; }
/*
The next function differs from the previous one that a bitmap to be updated
is passed as uchar *arg.
=== modified file 'sql/item_cmpfunc.cc'
--- sql/item_cmpfunc.cc 2010-07-16 07:33:01 +0000
+++ sql/item_cmpfunc.cc 2010-10-04 15:26:55 +0000
@@ -1964,6 +1964,18 @@
}
+bool Item_in_optimizer::is_expensive_processor(uchar *arg)
+{
+ return args[1]->is_expensive_processor(arg);
+}
+
+
+bool Item_in_optimizer::is_expensive()
+{
+ return args[1]->is_expensive();
+}
+
+
longlong Item_func_eq::val_int()
{
DBUG_ASSERT(fixed == 1);
@@ -4619,12 +4631,6 @@
longlong Item_func_isnull::val_int()
{
DBUG_ASSERT(fixed == 1);
- /*
- Handle optimization if the argument can't be null
- This has to be here because of the test in update_used_tables().
- */
- if (!used_tables_cache && !with_subselect)
- return cached_value;
return args[0]->is_null() ? 1: 0;
}
@@ -4632,12 +4638,6 @@
{
DBUG_ASSERT(fixed == 1);
DBUG_ENTER("Item_is_not_null_test::val_int");
- if (!used_tables_cache && !with_subselect)
- {
- owner->was_null|= (!cached_value);
- DBUG_PRINT("info", ("cached: %ld", (long) cached_value));
- DBUG_RETURN(cached_value);
- }
if (args[0]->is_null())
{
DBUG_PRINT("info", ("null"));
@@ -4654,19 +4654,9 @@
void Item_is_not_null_test::update_used_tables()
{
if (!args[0]->maybe_null)
- {
used_tables_cache= 0; /* is always true */
- cached_value= (longlong) 1;
- }
else
- {
args[0]->update_used_tables();
- if (!(used_tables_cache=args[0]->used_tables()) && !with_subselect)
- {
- /* Remember if the value is always NULL or never NULL */
- cached_value= (longlong) !args[0]->is_null();
- }
- }
}
@@ -5342,7 +5332,7 @@
/* "NOT (e $cmp$ ANY (SELECT ...)) -> e $rev_cmp$" ALL (SELECT ...) */
Item_func_not_all *new_item= new Item_func_not_all(args[0]);
Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
- allany->func= allany->func_creator(FALSE);
+ allany->create_comp_func(FALSE);
allany->all= !allany->all;
allany->upper_item= new_item;
return new_item;
@@ -5354,7 +5344,7 @@
Item_func_nop_all *new_item= new Item_func_nop_all(args[0]);
Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
allany->all= !allany->all;
- allany->func= allany->func_creator(TRUE);
+ allany->create_comp_func(TRUE);
allany->upper_item= new_item;
return new_item;
}
@@ -5637,6 +5627,9 @@
Item_field *item_field;
if (cond_false)
return 0;
+ /* If there is a single constant and no fields, the equality is TRUE. */
+ if (const_item && !fields.elements)
+ return 1;
List_iterator_fast<Item_field> it(fields);
Item *item= const_item ? const_item : it++;
if ((null_value= item->null_value))
@@ -5657,6 +5650,15 @@
void Item_equal::fix_length_and_dec()
{
Item *item= get_first(NULL);
+ if (!item)
+ {
+ /*
+ If there are no fields, there must be at least a constant, in which
+ case Item_equal::val_int evaluates to TRUE.
+ */
+ DBUG_ASSERT(const_item);
+ return;
+ }
eval_item= cmp_item::get_comparator(item->result_type(),
item->collation.collation);
}
=== modified file 'sql/item_cmpfunc.h'
--- sql/item_cmpfunc.h 2010-07-10 10:37:30 +0000
+++ sql/item_cmpfunc.h 2010-10-04 15:26:55 +0000
@@ -265,6 +265,8 @@
void keep_top_level_cache();
Item *transform(Item_transformer transformer, uchar *arg);
virtual Item *expr_cache_insert_transformer(uchar *thd_arg);
+ bool is_expensive_processor(uchar *arg);
+ bool is_expensive();
};
class Comp_creator
@@ -1302,8 +1304,6 @@
class Item_func_isnull :public Item_bool_func
{
-protected:
- longlong cached_value;
public:
Item_func_isnull(Item *a) :Item_bool_func(a) {}
longlong val_int();
@@ -1321,18 +1321,9 @@
{
used_tables_cache= 0; /* is always false */
const_item_cache= 1;
- cached_value= (longlong) 0;
}
else
- {
args[0]->update_used_tables();
- if ((const_item_cache= !(used_tables_cache= args[0]->used_tables())) &&
- !with_subselect)
- {
- /* Remember if the value is always NULL or never NULL */
- cached_value= (longlong) args[0]->is_null();
- }
- }
}
table_map not_null_tables() const { return 0; }
optimize_type select_optimize() const { return OPTIMIZE_NULL; }
=== modified file 'sql/item_subselect.cc'
--- sql/item_subselect.cc 2010-07-16 08:02:05 +0000
+++ sql/item_subselect.cc 2010-10-05 13:06:06 +0000
@@ -61,7 +61,7 @@
DBUG_ENTER("Item_subselect::init");
DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex));
unit= select_lex->master_unit();
-
+ thd= unit->thd;
if (unit->item)
{
/*
@@ -159,6 +159,9 @@
}
first_execution= TRUE;
is_constant= FALSE;
+ if (in_strategy & SUBS_MATERIALIZATION)
+ in_strategy= 0;
+ pushed_cond_guards= NULL;
Item_subselect::cleanup();
DBUG_VOID_RETURN;
}
@@ -166,6 +169,7 @@
Item_subselect::~Item_subselect()
{
delete engine;
+ engine= NULL;
}
Item_subselect::trans_res
@@ -183,7 +187,8 @@
bool res;
DBUG_ASSERT(fixed == 0);
- engine->set_thd((thd= thd_param));
+ DBUG_ASSERT(thd == thd_param); /* thd can't change during execution. */
+ engine->set_thd(thd);
if (!done_first_fix_fields)
{
done_first_fix_fields= TRUE;
@@ -206,11 +211,7 @@
{
// all transformation is done (used by prepared statements)
changed= 1;
- inside_first_fix_fields= FALSE;
-
-
- // all transformation is done (used by prepared statements)
- changed= 1;
+ inside_first_fix_fields= FALSE;
/*
Substitute the current item with an Item_in_optimizer that was
@@ -220,11 +221,14 @@
*/
if (substitution)
{
- // did we changed top item of WHERE condition
+ /*
+ If the top item of the WHERE/HAVING condition changed,
+ set correct WHERE/HAVING for PS.
+ */
if (unit->outer_select()->where == (*ref))
- unit->outer_select()->where= substitution; // correct WHERE for PS
+ thd->change_item_tree(&(unit->outer_select()->where), substitution);
else if (unit->outer_select()->having == (*ref))
- unit->outer_select()->having= substitution; // correct HAVING for PS
+ thd->change_item_tree(&(unit->outer_select()->having), substitution);
(*ref)= substitution;
substitution->name= name;
@@ -284,6 +288,73 @@
}
+/**
+ Remove a subselect item from its unit so that the unit no longer
+ represents a subquery.
+
+ @param arg unused parameter
+
+ @return
+ FALSE to force the evaluation of the processor for the subsequent items.
+*/
+
+bool Item_subselect::eliminate_subselect_processor(uchar *arg)
+{
+ unit->item= NULL;
+ unit->exclude_from_tree();
+ eliminated= TRUE;
+ return FALSE;
+}
+
+
+/**
+ Adjust the master select of the subquery to be the fake_select which
+ represents the whole UNION right above the subquery, instead of the
+ last query of the UNION.
+
+ @param arg pointer to the fake select
+
+ @return
+ FALSE to force the evaluation of the processor for the subsequent items.
+*/
+
+bool Item_subselect::set_fake_select_as_master_processor(uchar *arg)
+{
+ SELECT_LEX *fake_select= (SELECT_LEX*) arg;
+ /*
+ Move the st_select_lex_unit of a subquery from a global ORDER BY clause to
+ become a direct child of the fake_select of a UNION. In this way the
+ ORDER BY is applied to the temporary table that contains the result of the
+ whole UNION, and all columns in the subquery are resolved against this table.
+
+ Apply the transformation only for immediate child subqueries of a
+ UNION query.
+ */
+ if (unit->outer_select()->master_unit()->fake_select_lex == fake_select)
+ {
+ /*
+ Set the master of the subquery to be the fake select (i.e. the whole UNION),
+ instead of the last query in the UNION.
+ TODO:
+ This is a hack, instead we should call: unit->include_down(fake_select);
+ However, this call results in an infinite loop where
+ some_select_lex->master == some_select_lex.
+ */
+ unit->set_master(fake_select);
+ /* Adjust the name resolution context hierarchy accordingly. */
+ for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+ sl->context.outer_context= &(fake_select->context);
+ /*
+ Undo Item_subselect::eliminate_subselect_processor because at that phase
+ we don't know yet that the ORDER clause will be moved to the fake select.
+ */
+ unit->item= this;
+ eliminated= FALSE;
+ }
+ return FALSE;
+}
+
+
bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select,
Item *item)
{
@@ -558,7 +629,7 @@
- on a cost-based basis, that takes into account the cost of a cache
lookup, the cache hit rate, and the savings per cache hit.
*/
- if (!left_expr_cache && exec_method == MATERIALIZATION)
+ if (!left_expr_cache && (in_strategy & SUBS_MATERIALIZATION))
init_left_expr_cache();
/*
@@ -1012,10 +1083,11 @@
st_select_lex *select_lex):
Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL),
- exec_method(NOT_TRANSFORMED), upper_item(0)
+ in_strategy(0), upper_item(0)
{
DBUG_ENTER("Item_in_subselect::Item_in_subselect");
left_expr= left_exp;
+ func= &eq_creator;
init(select_lex, new select_exists_subselect(this));
max_columns= UINT_MAX;
maybe_null= 1;
@@ -1281,59 +1353,37 @@
}
-/*
- Rewrite a single-column IN/ALL/ANY subselect
-
- SYNOPSIS
- Item_in_subselect::single_value_transformer()
- join Join object of the subquery (i.e. 'child' join).
- func Subquery comparison creator
-
- DESCRIPTION
- Rewrite a single-column subquery using rule-based approach. The subquery
-
- oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
-
- First, try to convert the subquery to scalar-result subquery in one of
- the forms:
-
- - oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect
- - oe $cmp$ <max>(SELECT ...) // handled by Item_maxmin_subselect
+/**
+ Rewrite a single-column IN/ALL/ANY subselect.
+
+ @param join Join object of the subquery (i.e. 'child' join).
+
+ @details
+ Rewrite a single-column subquery using rule-based approach. The subquery
+
+ oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
+
+ First, try to convert the subquery to scalar-result subquery in one of
+ the forms:
+
+ - oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect
+ - oe $cmp$ <max>(SELECT ...) // handled by Item_maxmin_subselect
+
+ If that fails, check if the subquery is a single select without tables,
+ and substitute the subquery predicate with "oe $cmp$ ie".
- If that fails, the subquery will be handled with class Item_in_optimizer,
- Inject the predicates into subquery, i.e. convert it to:
-
- - If the subquery has aggregates, GROUP BY, or HAVING, convert to
-
- SELECT ie FROM ... HAVING subq_having AND
- trigcond(oe $cmp$ ref_or_null_helper<ie>)
-
- the addition is wrapped into trigger only when we want to distinguish
- between NULL and FALSE results.
-
- - Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the
- following:
-
- = If we don't need to distinguish between NULL and FALSE subquery:
-
- SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
-
- = If we need to distinguish between those:
-
- SELECT 1 FROM ...
- WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
- HAVING trigcond(<is_not_null_test>(ie))
-
- RETURN
- RES_OK Either subquery was transformed, or appopriate
- predicates where injected into it.
- RES_REDUCE The subquery was reduced to non-subquery
- RES_ERROR Error
+ If that fails, the subquery predicate is wrapped into an Item_in_optimizer.
+ Later the query optimization phase chooses whether the subquery under the
+ Item_in_optimizer will be further transformed into an equivalent correlated
+ EXISTS by injecting additional predicates, or will be executed via subquery
+ materialization in its unmodified form.
+
+ @retval RES_OK The subquery was transformed
+ @retval RES_ERROR Error
*/
Item_subselect::trans_res
-Item_in_subselect::single_value_transformer(JOIN *join,
- Comp_creator *func)
+Item_in_subselect::single_value_transformer(JOIN *join)
{
SELECT_LEX *select_lex= join->select_lex;
DBUG_ENTER("Item_in_subselect::single_value_transformer");
@@ -1365,7 +1415,7 @@
{
if (substitution)
{
- // It is second (third, ...) SELECT of UNION => All is done
+ /* It is second (third, ...) SELECT of UNION => All is done */
DBUG_RETURN(RES_OK);
}
@@ -1431,6 +1481,36 @@
DBUG_RETURN(RES_OK);
}
+ Item* join_having= join->having ? join->having : join->tmp_having;
+ if (!(join_having || select_lex->with_sum_func ||
+ select_lex->group_list.elements) &&
+ select_lex->table_list.elements == 0 &&
+ !select_lex->master_unit()->is_union())
+ {
+ Item *where_item= (Item*) select_lex->item_list.head();
+ /*
+ it is single select without tables => possible optimization
+ remove the dependence mark since the item is moved to upper
+ select and is not outer anymore.
+ */
+ where_item->walk(&Item::remove_dependence_processor, 0,
+ (uchar *) select_lex->outer_select());
+ substitution= func->create(left_expr, where_item);
+ have_to_be_excluded= 1;
+ if (thd->lex->describe)
+ {
+ char warn_buff[MYSQL_ERRMSG_SIZE];
+ sprintf(warn_buff, ER(ER_SELECT_REDUCED), select_lex->select_number);
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+ ER_SELECT_REDUCED, warn_buff);
+ }
+ DBUG_RETURN(RES_OK);
+ }
+
+ /*
+ Wrap the current IN predicate in an Item_in_optimizer. The actual
+ substitution in the Item tree takes place in Item_subselect::fix_fields.
+ */
if (!substitution)
{
/* We're invoked for the 1st (or the only) SELECT in the subquery UNION */
@@ -1461,33 +1541,37 @@
(char *)in_left_expr_name);
master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
- //psergey: placed then removed: select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
+ // TODO: do we need to set both?
+ // select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
}
- if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
+ DBUG_RETURN(RES_OK);
+}
+
+
+bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
+{
+ bool fix_res= 0;
+ if (!having->fixed)
{
- if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool))))
- DBUG_RETURN(RES_ERROR);
- pushed_cond_guards[0]= TRUE;
+ select_lex->having_fix_field= 1;
+ fix_res= having->fix_fields(thd, 0);
+ select_lex->having_fix_field= 0;
}
-
- /*
- If this IN predicate can be computed via materialization, do not
- perform the IN -> EXISTS transformation.
- */
- if (exec_method == MATERIALIZATION)
- DBUG_RETURN(RES_OK);
-
- /* Perform the IN=>EXISTS transformation. */
- DBUG_RETURN(single_value_in_to_exists_transformer(join, func));
+ return fix_res;
}
/**
- Transofrm an IN predicate into EXISTS via predicate injection.
-
- @details The transformation injects additional predicates into the subquery
- (and makes the subquery correlated) as follows.
+ Create the predicates needed to transform a single-column IN/ALL/ANY
+ subselect into a correlated EXISTS via predicate injection.
+
+ @param join[in] Join object of the subquery (i.e. 'child' join).
+ @param where_item[out] the in-to-exists addition to the where clause
+ @param having_item[out] the in-to-exists addition to the having clause
+
+ @details
+ The correlated predicates are created as follows:
- If the subquery has aggregates, GROUP BY, or HAVING, convert to
@@ -1502,34 +1586,38 @@
= If we don't need to distinguish between NULL and FALSE subquery:
- SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
+ SELECT ie FROM ... WHERE subq_where AND (oe $cmp$ ie)
= If we need to distinguish between those:
- SELECT 1 FROM ...
+ SELECT ie FROM ...
WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
HAVING trigcond(<is_not_null_test>(ie))
- @param join Join object of the subquery (i.e. 'child' join).
- @param func Subquery comparison creator
-
- @retval RES_OK Either subquery was transformed, or appopriate
- predicates where injected into it.
- @retval RES_REDUCE The subquery was reduced to non-subquery
- @retval RES_ERROR Error
+ @retval RES_OK If the new conditions were created successfully
+ @retval RES_ERROR Error
*/
Item_subselect::trans_res
-Item_in_subselect::single_value_in_to_exists_transformer(JOIN * join, Comp_creator *func)
+Item_in_subselect::create_single_in_to_exists_cond(JOIN * join,
+ Item **where_item,
+ Item **having_item)
{
SELECT_LEX *select_lex= join->select_lex;
- DBUG_ENTER("Item_in_subselect::single_value_in_to_exists_transformer");
-
- select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
- if (join->having || select_lex->with_sum_func ||
+ /*
+ The non-transformed HAVING clause of 'join' may be stored in two ways
+ during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
+ */
+ Item* join_having= join->having ? join->having : join->tmp_having;
+
+ DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond");
+
+ *where_item= NULL;
+ *having_item= NULL;
+
+ if (join_having || select_lex->with_sum_func ||
select_lex->group_list.elements)
{
- bool tmp;
Item *item= func->create(expr,
new Item_ref_null_helper(&select_lex->context,
this,
@@ -1545,24 +1633,12 @@
*/
item= new Item_func_trig_cond(item, get_cond_guard(0));
}
-
- /*
- AND and comparison functions can't be changed during fix_fields()
- we can assign select_lex->having here, and pass 0 as last
- argument (reference) to fix_fields()
- */
- select_lex->having= join->having= and_items(join->having, item);
- if (join->having == item)
- item->name= (char*)in_having_cond;
- select_lex->having_fix_field= 1;
- /*
- we do not check join->having->fixed, because Item_and (from and_items)
- or comparison function (from func->create) can't be fixed after creation
- */
- tmp= join->having->fix_fields(thd, 0);
- select_lex->having_fix_field= 0;
- if (tmp)
+
+ if (!join_having)
+ item->name= (char*) in_having_cond;
+ if (fix_having(item, select_lex))
DBUG_RETURN(RES_ERROR);
+ *having_item= item;
}
else
{
@@ -1570,13 +1646,8 @@
if (select_lex->table_list.elements)
{
- bool tmp;
- Item *having= item, *orig_item= item;
- select_lex->item_list.empty();
- select_lex->item_list.push_back(new Item_int("Not_used",
- (longlong) 1,
- MY_INT64_NUM_DECIMAL_DIGITS));
- select_lex->ref_pointer_array[0]= select_lex->item_list.head();
+ Item *having= item;
+ Item *orig_item= item;
item= func->create(expr, item);
if (!abort_on_null && orig_item->maybe_null)
@@ -1588,23 +1659,11 @@
get_cond_guard(0))))
DBUG_RETURN(RES_ERROR);
}
- /*
- Item_is_not_null_test can't be changed during fix_fields()
- we can assign select_lex->having here, and pass 0 as last
- argument (reference) to fix_fields()
- */
- having->name= (char*)in_having_cond;
- select_lex->having= join->having= having;
- select_lex->having_fix_field= 1;
- /*
- we do not check join->having->fixed, because Item_and (from
- and_items) or comparison function (from func->create) can't be
- fixed after creation
- */
- tmp= join->having->fix_fields(thd, 0);
- select_lex->having_fix_field= 0;
- if (tmp)
- DBUG_RETURN(RES_ERROR);
+ having->name= (char*) in_having_cond;
+ if (fix_having(having, select_lex))
+ DBUG_RETURN(RES_ERROR);
+ *having_item= having;
+
item= new Item_cond_or(item,
new Item_func_isnull(orig_item));
}
@@ -1617,37 +1676,21 @@
if (!(item= new Item_func_trig_cond(item, get_cond_guard(0))))
DBUG_RETURN(RES_ERROR);
}
+
/*
TODO: figure out why the following is done here in
single_value_transformer but there is no corresponding action in
row_value_transformer?
*/
- item->name= (char *)in_additional_cond;
-
- /*
- AND can't be changed during fix_fields()
- we can assign select_lex->having here, and pass 0 as last
- argument (reference) to fix_fields()
- */
- select_lex->where= join->conds= and_items(join->conds, item);
- select_lex->where->top_level_item();
- /*
- we do not check join->conds->fixed, because Item_and can't be fixed
- after creation
- */
- if (join->conds->fix_fields(thd, 0))
- DBUG_RETURN(RES_ERROR);
+ item->name= (char *) in_additional_cond;
+ if (!item->fixed && item->fix_fields(thd, 0))
+ DBUG_RETURN(RES_ERROR);
+ *where_item= item;
}
else
{
- bool tmp;
if (select_lex->master_unit()->is_union())
{
- /*
- comparison functions can't be changed during fix_fields()
- we can assign select_lex->having here, and pass 0 as last
- argument (reference) to fix_fields()
- */
Item *new_having=
func->create(expr,
new Item_ref_null_helper(&select_lex->context, this,
@@ -1660,39 +1703,14 @@
get_cond_guard(0))))
DBUG_RETURN(RES_ERROR);
}
- new_having->name= (char*)in_having_cond;
- select_lex->having= join->having= new_having;
- select_lex->having_fix_field= 1;
-
- /*
- we do not check join->having->fixed, because comparison function
- (from func->create) can't be fixed after creation
- */
- tmp= join->having->fix_fields(thd, 0);
- select_lex->having_fix_field= 0;
- if (tmp)
- DBUG_RETURN(RES_ERROR);
+
+ new_having->name= (char*) in_having_cond;
+ if (fix_having(new_having, select_lex))
+ DBUG_RETURN(RES_ERROR);
+ *having_item= new_having;
}
else
- {
- // it is single select without tables => possible optimization
- // remove the dependence mark since the item is moved to upper
- // select and is not outer anymore.
- item->walk(&Item::remove_dependence_processor, 0,
- (uchar *) select_lex->outer_select());
- item= func->create(left_expr, item);
- // fix_field of item will be done in time of substituting
- substitution= item;
- have_to_be_excluded= 1;
- if (thd->lex->describe)
- {
- char warn_buff[MYSQL_ERRMSG_SIZE];
- sprintf(warn_buff, ER(ER_SELECT_REDUCED), select_lex->select_number);
- push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
- ER_SELECT_REDUCED, warn_buff);
- }
- DBUG_RETURN(RES_REDUCE);
- }
+ DBUG_ASSERT(FALSE);
}
}
@@ -1700,6 +1718,22 @@
}
+/**
+ Wrap a multi-column IN/ALL/ANY subselect into an Item_in_optimizer.
+
+ @param join Join object of the subquery (i.e. 'child' join).
+
+ @details
+ The subquery predicate is wrapped into an Item_in_optimizer. Later the query
+ optimization phase chooses whether the subquery under the Item_in_optimizer
+ will be further transformed into an equivalent correlated EXISTS by injecting
+ additional predicates, or will be executed via subquery materialization in its
+ unmodified form.
+
+ @retval RES_OK The subquery was transformed
+ @retval RES_ERROR Error
+*/
+
Item_subselect::trans_res
Item_in_subselect::row_value_transformer(JOIN *join)
{
@@ -1739,60 +1773,51 @@
thd->lex->current_select= current;
master_unit->uncacheable|= UNCACHEABLE_DEPENDENT;
-
- if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
- {
- if (!(pushed_cond_guards= (bool*)join->thd->alloc(sizeof(bool) *
- left_expr->cols())))
- DBUG_RETURN(RES_ERROR);
- for (uint i= 0; i < cols_num; i++)
- pushed_cond_guards[i]= TRUE;
- }
+ // TODO: do we need to set both?
+ //select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
}
- /*
- If this IN predicate can be computed via materialization, do not
- perform the IN -> EXISTS transformation.
- */
- if (exec_method == MATERIALIZATION)
- DBUG_RETURN(RES_OK);
-
- /* Perform the IN=>EXISTS transformation. */
- DBUG_RETURN(row_value_in_to_exists_transformer(join));
+ DBUG_RETURN(RES_OK);
}
/**
- Tranform a (possibly non-correlated) IN subquery into a correlated EXISTS.
-
- @todo
- The IF-ELSE below can be refactored so that there is no duplication of the
- statements that create the new conditions. For this we have to invert the IF
- and the FOR statements as this:
- for (each left operand)
- create the equi-join condition
- if (is_having_used || !abort_on_null)
- create the "is null" and is_not_null_test items
- if (is_having_used)
- add the equi-join and the null tests to HAVING
- else
- add the equi-join and the "is null" to WHERE
- add the is_not_null_test to HAVING
+ Create the predicates needed to transform a multi-column IN/ALL/ANY
+ subselect into a correlated EXISTS via predicate injection.
+
+ @details
+ There are two cases - either the subquery has aggregates, GROUP BY,
+ or HAVING, or not. Both cases are described inline in the code.
+
+ @param join[in] Join object of the subquery (i.e. 'child' join).
+ @param where_item[out] the in-to-exists addition to the where clause
+ @param having_item[out] the in-to-exists addition to the having clause
+
+ @retval RES_OK If the new conditions were created successfully
+ @retval RES_ERROR Error
*/
Item_subselect::trans_res
-Item_in_subselect::row_value_in_to_exists_transformer(JOIN * join)
+Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
+ Item **where_item,
+ Item **having_item)
{
SELECT_LEX *select_lex= join->select_lex;
- Item *having_item= 0;
uint cols_num= left_expr->cols();
- bool is_having_used= (join->having || select_lex->with_sum_func ||
+ /*
+ The non-transformed HAVING clause of 'join' may be stored in two ways
+ during JOIN::optimize: this->tmp_having= this->having; this->having= 0;
+ */
+ Item* join_having= join->having ? join->having : join->tmp_having;
+ bool is_having_used= (join_having || select_lex->with_sum_func ||
select_lex->group_list.first ||
!select_lex->table_list.elements);
- DBUG_ENTER("Item_in_subselect::row_value_in_to_exists_transformer");
-
- select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
+ DBUG_ENTER("Item_in_subselect::create_row_in_to_exists_cond");
+
+ *where_item= NULL;
+ *having_item= NULL;
+
if (is_having_used)
{
/*
@@ -1812,6 +1837,7 @@
for (uint i= 0; i < cols_num; i++)
{
DBUG_ASSERT((left_expr->fixed &&
+
select_lex->ref_pointer_array[i]->fixed) ||
(select_lex->ref_pointer_array[i]->type() == REF_ITEM &&
((Item_ref*)(select_lex->ref_pointer_array[i]))->ref_type() ==
@@ -1845,8 +1871,8 @@
if (!(col_item= new Item_func_trig_cond(col_item, get_cond_guard(i))))
DBUG_RETURN(RES_ERROR);
}
- having_item= and_items(having_item, col_item);
-
+ *having_item= and_items(*having_item, col_item);
+
Item *item_nnull_test=
new Item_is_not_null_test(this,
new Item_ref(&select_lex->context,
@@ -1863,8 +1889,7 @@
item_having_part2= and_items(item_having_part2, item_nnull_test);
item_having_part2->top_level_item();
}
- having_item= and_items(having_item, item_having_part2);
- having_item->top_level_item();
+ *having_item= and_items(*having_item, item_having_part2);
}
else
{
@@ -1885,7 +1910,6 @@
(l2 = v2) and
(l3 = v3)
*/
- Item *where_item= 0;
for (uint i= 0; i < cols_num; i++)
{
Item *item, *item_isnull;
@@ -1943,49 +1967,118 @@
new Item_func_trig_cond(having_col_item, get_cond_guard(i))))
DBUG_RETURN(RES_ERROR);
}
- having_item= and_items(having_item, having_col_item);
+ *having_item= and_items(*having_item, having_col_item);
}
- where_item= and_items(where_item, item);
+ *where_item= and_items(*where_item, item);
}
- /*
- AND can't be changed during fix_fields()
- we can assign select_lex->where here, and pass 0 as last
- argument (reference) to fix_fields()
- */
- select_lex->where= join->conds= and_items(join->conds, where_item);
+ }
+
+ if (*where_item)
+ {
+ if (!(*where_item)->fixed && (*where_item)->fix_fields(thd, 0))
+ DBUG_RETURN(RES_ERROR);
+ (*where_item)->top_level_item();
+ }
+
+ if (*having_item)
+ {
+ if (!join_having)
+ (*having_item)->name= (char*) in_having_cond;
+ if (fix_having(*having_item, select_lex))
+ DBUG_RETURN(RES_ERROR);
+ (*having_item)->top_level_item();
+ }
+
+ DBUG_RETURN(RES_OK);
+}
+
+
+Item_subselect::trans_res
+Item_in_subselect::select_transformer(JOIN *join)
+{
+ return select_in_like_transformer(join);
+}
+
+
+/**
+ Create the predicates needed to transform an IN/ALL/ANY subselect into a
+ correlated EXISTS via predicate injection.
+
+ @param join_arg Join object of the subquery.
+
+ @retval FALSE ok
+ @retval TRUE error
+*/
+
+bool Item_in_subselect::create_in_to_exists_cond(JOIN *join_arg)
+{
+ Item_subselect::trans_res res;
+
+ DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE ||
+ engine->engine_type() == subselect_engine::UNION_ENGINE);
+ /*
+ TODO: the call to init_cond_guards allocates and initializes an
+ array of booleans that may not be used later because we may choose
+ materialization.
+ The two calls below to create_XYZ_cond depend on this boolean array.
+ If the dependency is removed, the call can be moved to a later phase.
+ */
+ init_cond_guards();
+ join_arg->select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
+ if (left_expr->cols() == 1)
+ res= create_single_in_to_exists_cond(join_arg,
+ &(join_arg->in_to_exists_where),
+ &(join_arg->in_to_exists_having));
+ else
+ res= create_row_in_to_exists_cond(join_arg,
+ &(join_arg->in_to_exists_where),
+ &(join_arg->in_to_exists_having));
+ return (res != RES_OK);
+}
+
+
+/**
+ Transform an IN/ALL/ANY subselect into a correlated EXISTS via injecting
+ correlated in-to-exists predicates.
+
+ @param join_arg Join object of the subquery.
+
+ @retval FALSE ok
+ @retval TRUE error
+*/
+
+bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
+{
+ SELECT_LEX *select_lex= join_arg->select_lex;
+ Item *where_item= join_arg->in_to_exists_where;
+ Item *having_item= join_arg->in_to_exists_having;
+
+ DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond");
+
+ if (where_item)
+ {
+ where_item= and_items(join_arg->conds, where_item);
+ if (!where_item->fixed && where_item->fix_fields(thd, 0))
+ DBUG_RETURN(true);
+ // TIMOUR TODO: call optimize_cond() for the new where clause
+ thd->change_item_tree(&select_lex->where, where_item);
select_lex->where->top_level_item();
- if (join->conds->fix_fields(thd, 0))
- DBUG_RETURN(RES_ERROR);
+ join_arg->conds= select_lex->where;
}
+
if (having_item)
{
- bool res;
- select_lex->having= join->having= and_items(join->having, having_item);
- if (having_item == select_lex->having)
- having_item->name= (char*)in_having_cond;
+ Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having;
+ having_item= and_items(join_having, having_item);
+ if (fix_having(having_item, select_lex))
+ DBUG_RETURN(true);
+ // TIMOUR TODO: call optimize_cond() for the new having clause
+ thd->change_item_tree(&select_lex->having, having_item);
select_lex->having->top_level_item();
- /*
- AND can't be changed during fix_fields()
- we can assign select_lex->having here, and pass 0 as last
- argument (reference) to fix_fields()
- */
- select_lex->having_fix_field= 1;
- res= join->having->fix_fields(thd, 0);
- select_lex->having_fix_field= 0;
- if (res)
- {
- DBUG_RETURN(RES_ERROR);
- }
+ join_arg->having= select_lex->having;
}
- DBUG_RETURN(RES_OK);
-}
-
-
-Item_subselect::trans_res
-Item_in_subselect::select_transformer(JOIN *join)
-{
- return select_in_like_transformer(join, &eq_creator);
+ DBUG_RETURN(false);
}
@@ -1993,25 +2086,20 @@
Prepare IN/ALL/ANY/SOME subquery transformation and call appropriate
transformation function.
- To decide which transformation procedure (scalar or row) applicable here
- we have to call fix_fields() for left expression to be able to call
- cols() method on it. Also this method make arena management for
- underlying transformation methods.
-
@param join JOIN object of transforming subquery
- @param func creator of condition function of subquery
-
- @retval
- RES_OK OK
- @retval
- RES_REDUCE OK, and current subquery was reduced during
- transformation
- @retval
- RES_ERROR Error
+
+ @notes
+ To decide which transformation procedure (scalar or row) applicable here
+ we have to call fix_fields() for left expression to be able to call
+ cols() method on it. Also this method make arena management for
+ underlying transformation methods.
+
+ @retval RES_OK OK
+ @retval RES_ERROR Error
*/
Item_subselect::trans_res
-Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
+Item_in_subselect::select_in_like_transformer(JOIN *join)
{
Query_arena *arena, backup;
SELECT_LEX *current= thd->lex->current_select;
@@ -2067,22 +2155,15 @@
goto err;
/*
- If we didn't choose an execution method up to this point, we choose
- the IN=>EXISTS transformation.
- */
- if (exec_method == NOT_TRANSFORMED)
- exec_method= IN_TO_EXISTS;
- arena= thd->activate_stmt_arena_if_needed(&backup);
-
- /*
Both transformers call fix_fields() only for Items created inside them,
and all that items do not make permanent changes in current item arena
which allow to us call them with changed arena (if we do not know nature
of Item, we have to call fix_fields() for it only with original arena to
avoid memory leack)
*/
+ arena= thd->activate_stmt_arena_if_needed(&backup);
if (left_expr->cols() == 1)
- res= single_value_transformer(join, func);
+ res= single_value_transformer(join);
else
{
/* we do not support row operation for ALL/ANY/SOME */
@@ -2105,7 +2186,7 @@
void Item_in_subselect::print(String *str, enum_query_type query_type)
{
- if (exec_method == IN_TO_EXISTS)
+ if (in_strategy & SUBS_IN_TO_EXISTS)
str->append(STRING_WITH_LEN("<exists>"));
else
{
@@ -2121,7 +2202,7 @@
uint outer_cols_num;
List<Item> *inner_cols;
- if (exec_method == SEMI_JOIN)
+ if (in_strategy & SUBS_SEMI_JOIN)
return !( (*ref)= new Item_int(1));
/*
@@ -2194,99 +2275,47 @@
used_tables_cache |= left_expr->used_tables();
}
+
/**
- Try to create an engine to compute the subselect via materialization,
- and if this fails, revert to execution via the IN=>EXISTS transformation.
+ Try to create and initialize an engine to compute a subselect via
+ materialization.
@details
- The purpose of this method is to hide the implementation details
- of this Item's execution. The method creates a new engine for
- materialized execution, and initializes the engine.
-
- If this initialization fails
- - either because it wasn't possible to create the needed temporary table
- and its index,
- - or because of a memory allocation error,
- then we revert back to execution via the IN=>EXISTS tranformation.
-
- The initialization of the new engine is divided in two parts - a permanent
- one that lives across prepared statements, and one that is repeated for each
- execution.
+ The method creates a new engine for materialized execution, and initializes
+ the engine. The initialization may fail
+ - either because it wasn't possible to create the needed temporary table
+ and its index,
+ - or because of a memory allocation error,
@returns
@retval TRUE memory allocation error occurred
@retval FALSE an execution method was chosen successfully
*/
-bool Item_in_subselect::setup_engine()
+bool Item_in_subselect::setup_mat_engine()
{
- subselect_hash_sj_engine *new_engine= NULL;
- bool res= FALSE;
-
- DBUG_ENTER("Item_in_subselect::setup_engine");
-
- if (engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE)
- {
- /* Create/initialize objects in permanent memory. */
- subselect_single_select_engine *old_engine;
- Query_arena *arena= thd->stmt_arena, backup;
-
- old_engine= (subselect_single_select_engine*) engine;
-
- if (arena->is_conventional())
- arena= 0;
- else
- thd->set_n_backup_active_arena(arena, &backup);
-
- if (!(new_engine= new subselect_hash_sj_engine(thd, this,
- old_engine)) ||
- new_engine->init_permanent(unit->get_unit_column_types()))
- {
- Item_subselect::trans_res trans_res;
- /*
- If for some reason we cannot use materialization for this IN predicate,
- delete all materialization-related objects, and apply the IN=>EXISTS
- transformation.
- */
- delete new_engine;
- new_engine= NULL;
- exec_method= NOT_TRANSFORMED;
- if (left_expr->cols() == 1)
- trans_res= single_value_in_to_exists_transformer(old_engine->join,
- &eq_creator);
- else
- trans_res= row_value_in_to_exists_transformer(old_engine->join);
- res= (trans_res != Item_subselect::RES_OK);
- }
- if (new_engine)
- engine= new_engine;
-
- if (arena)
- thd->restore_active_arena(arena, &backup);
- }
- else
- {
- DBUG_ASSERT(engine->engine_type() == subselect_engine::HASH_SJ_ENGINE);
- new_engine= (subselect_hash_sj_engine*) engine;
- }
-
- /* Initilizations done in runtime memory, repeated for each execution. */
- if (new_engine)
- {
- /*
- Reset the LIMIT 1 set in Item_exists_subselect::fix_length_and_dec.
- TODO:
- Currently we set the subquery LIMIT to infinity, and this is correct
- because we forbid at parse time LIMIT inside IN subqueries (see
- Item_in_subselect::test_limit). However, once we allow this, here
- we should set the correct limit if given in the query.
- */
- unit->global_parameters->select_limit= NULL;
- if ((res= new_engine->init_runtime()))
- DBUG_RETURN(res);
- }
-
- DBUG_RETURN(res);
+ subselect_hash_sj_engine *mat_engine= NULL;
+ subselect_single_select_engine *select_engine;
+
+ DBUG_ENTER("Item_in_subselect::setup_mat_engine");
+
+ /*
+ The select_engine (that executes transformed IN=>EXISTS subselects) is
+ pre-created at parse time, and is stored in statment memory (preserved
+ across PS executions).
+ */
+ DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE);
+ select_engine= (subselect_single_select_engine*) engine;
+
+ /* Create/initialize execution objects. */
+ if (!(mat_engine= new subselect_hash_sj_engine(thd, this, select_engine)))
+ DBUG_RETURN(TRUE);
+
+ if (mat_engine->init(&select_engine->join->fields_list))
+ DBUG_RETURN(TRUE);
+
+ engine= mat_engine;
+ DBUG_RETURN(FALSE);
}
@@ -2329,14 +2358,25 @@
}
+bool Item_in_subselect::init_cond_guards()
+{
+ uint cols_num= left_expr->cols();
+ if (!abort_on_null && left_expr->maybe_null && !pushed_cond_guards)
+ {
+ if (!(pushed_cond_guards= (bool*)thd->alloc(sizeof(bool) * cols_num)))
+ return TRUE;
+ for (uint i= 0; i < cols_num; i++)
+ pushed_cond_guards[i]= TRUE;
+ }
+ return FALSE;
+}
+
+
/*
Callback to test if an IN predicate is expensive.
@details
- IN predicates are considered expensive only if they will be executed via
- materialization. The return value affects the behavior of
- make_cond_for_table() in such a way that it is unchanged when we use
- the IN=>EXISTS transformation to compute IN.
+ The return value affects the behavior of make_cond_for_table().
@retval TRUE if the predicate is expensive
@retval FALSE otherwise
@@ -2344,7 +2384,8 @@
bool Item_in_subselect::is_expensive_processor(uchar *arg)
{
- return exec_method == MATERIALIZATION;
+ /* TIMOUR: TODO: decide on a cost basis whether it is expensive or not. */
+ return TRUE;
}
@@ -2352,16 +2393,16 @@
Item_allany_subselect::select_transformer(JOIN *join)
{
DBUG_ENTER("Item_allany_subselect::select_transformer");
- exec_method= IN_TO_EXISTS;
+ in_strategy= SUBS_IN_TO_EXISTS;
if (upper_item)
upper_item->show= 1;
- DBUG_RETURN(select_in_like_transformer(join, func));
+ DBUG_RETURN(select_in_like_transformer(join));
}
void Item_allany_subselect::print(String *str, enum_query_type query_type)
{
- if (exec_method == IN_TO_EXISTS)
+ if (in_strategy & SUBS_IN_TO_EXISTS)
str->append(STRING_WITH_LEN("<exists>"));
else
{
@@ -3787,13 +3828,14 @@
@retval FALSE otherwise
*/
-bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
+bool subselect_hash_sj_engine::init(List<Item> *tmp_columns)
{
+ select_union *result_sink;
/* Options to create_tmp_table. */
ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS;
/* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */
- DBUG_ENTER("subselect_hash_sj_engine::init_permanent");
+ DBUG_ENTER("subselect_hash_sj_engine::init");
if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
thd->mem_root) ||
@@ -3822,15 +3864,16 @@
DBUG_RETURN(TRUE);
}
*/
- if (!(result= new select_materialize_with_stats))
- DBUG_RETURN(TRUE);
-
- if (((select_union*) result)->create_result_table(
- thd, tmp_columns, TRUE, tmp_create_options,
- "materialized subselect", TRUE))
- DBUG_RETURN(TRUE);
-
- tmp_table= ((select_union*) result)->table;
+ if (!(result_sink= new select_materialize_with_stats))
+ DBUG_RETURN(TRUE);
+ result_sink->get_tmp_table_param()->materialized_subquery= true;
+ if (result_sink->create_result_table(thd, tmp_columns, TRUE,
+ tmp_create_options,
+ "materialized subselect", TRUE))
+ DBUG_RETURN(TRUE);
+
+ tmp_table= result_sink->table;
+ result= result_sink;
/*
If the subquery has blobs, or the total key lenght is bigger than
@@ -3867,6 +3910,17 @@
!(lookup_engine= make_unique_engine()))
DBUG_RETURN(TRUE);
+ /*
+ Repeat name resolution for 'cond' since cond is not part of any
+ clause of the query, and it is not 'fixed' during JOIN::prepare.
+ */
+ if (semi_join_conds && !semi_join_conds->fixed &&
+ semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
+ DBUG_RETURN(TRUE);
+ /* Let our engine reuse this query plan for materialization. */
+ materialize_join= materialize_engine->join;
+ materialize_join->change_result(result);
+
DBUG_RETURN(FALSE);
}
@@ -3979,35 +4033,6 @@
}
-/**
- Initialize members of the engine that need to be re-initilized at each
- execution.
-
- @retval TRUE if a memory allocation error occurred
- @retval FALSE if success
-*/
-
-bool subselect_hash_sj_engine::init_runtime()
-{
- /*
- Create and optimize the JOIN that will be used to materialize
- the subquery if not yet created.
- */
- materialize_engine->prepare();
- /*
- Repeat name resolution for 'cond' since cond is not part of any
- clause of the query, and it is not 'fixed' during JOIN::prepare.
- */
- if (semi_join_conds && !semi_join_conds->fixed &&
- semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
- return TRUE;
- /* Let our engine reuse this query plan for materialization. */
- materialize_join= materialize_engine->join;
- materialize_join->change_result(result);
- return FALSE;
-}
-
-
subselect_hash_sj_engine::~subselect_hash_sj_engine()
{
delete lookup_engine;
@@ -4017,6 +4042,16 @@
}
+int subselect_hash_sj_engine::prepare()
+{
+ /*
+ Create and optimize the JOIN that will be used to materialize
+ the subquery if not yet created.
+ */
+ return materialize_engine->prepare();
+}
+
+
/**
Cleanup performed after each PS execution.
@@ -4034,6 +4069,12 @@
count_null_only_columns= 0;
strategy= UNDEFINED;
materialize_engine->cleanup();
+ /*
+ Restore the original Item_in_subselect engine. This engine is created once
+ at parse time and stored across executions, while all other materialization
+ related engines are created and chosen for each execution.
+ */
+ ((Item_in_subselect *) item)->engine= materialize_engine;
if (lookup_engine_type == TABLE_SCAN_ENGINE ||
lookup_engine_type == ROWID_MERGE_ENGINE)
{
@@ -4050,6 +4091,9 @@
DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
lookup_engine->cleanup();
result->cleanup(); /* Resets the temp table as well. */
+ DBUG_ASSERT(tmp_table);
+ free_tmp_table(thd, tmp_table);
+ tmp_table= NULL;
}
@@ -4078,9 +4122,8 @@
the subquery predicate.
*/
thd->lex->current_select= materialize_engine->select_lex;
- if ((res= materialize_join->optimize()))
- goto err; /* purecov: inspected */
- DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
+ /* The subquery should be optimized, and materialized only once. */
+ DBUG_ASSERT(materialize_join->optimized && !is_materialized);
materialize_join->exec();
if ((res= test(materialize_join->error || thd->is_fatal_error)))
goto err;
=== modified file 'sql/item_subselect.h'
--- sql/item_subselect.h 2010-07-10 10:37:30 +0000
+++ sql/item_subselect.h 2010-10-05 13:06:06 +0000
@@ -118,6 +118,12 @@
Item_subselect();
virtual subs_type substype() { return UNKNOWN_SUBS; }
+ bool is_in_predicate()
+ {
+ return (substype() == Item_subselect::IN_SUBS ||
+ substype() == Item_subselect::ALL_SUBS ||
+ substype() == Item_subselect::ANY_SUBS);
+ }
/*
We need this method, because some compilers do not allow 'this'
@@ -180,6 +186,8 @@
enum_parsing_place place() { return parsing_place; }
bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
bool mark_as_eliminated_processor(uchar *arg);
+ bool eliminate_subselect_processor(uchar *arg);
+ bool set_fake_select_as_master_processor(uchar *arg);
bool enumerate_field_refs_processor(uchar *arg);
bool check_vcol_func_processor(uchar *int_arg)
{
@@ -309,6 +317,18 @@
};
+/*
+ Possible methods to execute an IN predicate. These are set by the optimizer
+ based on user-set optimizer switches, semantic analysis and cost comparison.
+*/
+#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
+#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
+#define SUBS_IN_TO_EXISTS 2 /* IN was converted to correlated EXISTS. */
+#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
+/* Partial matching substrategies of MATERIALIZATION. */
+#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
+#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
+
/**
Representation of IN subquery predicates of the form
"left_expr IN (SELECT ...)".
@@ -326,8 +346,6 @@
class Item_in_subselect :public Item_exists_subselect
{
-public:
- Item *left_expr;
protected:
/*
Cache of the left operand of the subquery predicate. Allocated in the
@@ -350,10 +368,24 @@
Item_in_optimizer *optimizer;
bool was_null;
bool abort_on_null;
-public:
/* Used to trigger on/off conditions that were pushed down to subselect */
bool *pushed_cond_guards;
-
+ Comp_creator *func;
+
+protected:
+ bool init_cond_guards();
+ trans_res select_in_like_transformer(JOIN *join);
+ trans_res single_value_transformer(JOIN *join);
+ trans_res row_value_transformer(JOIN * join);
+ bool fix_having(Item *having, st_select_lex *select_lex);
+ trans_res create_single_in_to_exists_cond(JOIN * join,
+ Item **where_item,
+ Item **having_item);
+ trans_res create_row_in_to_exists_cond(JOIN * join,
+ Item **where_item,
+ Item **having_item);
+public:
+ Item *left_expr;
/* Priority of this predicate in the convert-to-semi-join-nest process. */
int sj_convert_priority;
/*
@@ -384,14 +416,8 @@
*/
bool sjm_scan_allowed;
- /* The method chosen to execute the IN predicate. */
- enum enum_exec_method {
- NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
- SEMI_JOIN, /* IN was converted to semi-join nest and should be removed. */
- IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
- MATERIALIZATION /* IN will be executed via subquery materialization. */
- };
- enum_exec_method exec_method;
+ /* A bitmap of possible execution strategies for an IN predicate. */
+ uchar in_strategy;
bool *get_cond_guard(int i)
{
@@ -410,8 +436,9 @@
Item_in_subselect()
:Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), abort_on_null(0),
- pushed_cond_guards(NULL), exec_method(NOT_TRANSFORMED), upper_item(0)
- {}
+ pushed_cond_guards(NULL), func(NULL), in_strategy(0),
+ upper_item(0)
+ {}
void cleanup();
subs_type substype() { return IN_SUBS; }
void reset()
@@ -422,12 +449,9 @@
was_null= 0;
}
trans_res select_transformer(JOIN *join);
- trans_res select_in_like_transformer(JOIN *join, Comp_creator *func);
- trans_res single_value_transformer(JOIN *join, Comp_creator *func);
- trans_res row_value_transformer(JOIN * join);
- trans_res single_value_in_to_exists_transformer(JOIN * join,
- Comp_creator *func);
- trans_res row_value_in_to_exists_transformer(JOIN * join);
+ bool create_in_to_exists_cond(JOIN *join_arg);
+ bool inject_in_to_exists_cond(JOIN *join_arg);
+
virtual bool exec();
longlong val_int();
double val_real();
@@ -442,11 +466,12 @@
bool fix_fields(THD *thd, Item **ref);
void fix_after_pullout(st_select_lex *new_parent, Item **ref);
void update_used_tables();
- bool setup_engine();
+ bool setup_mat_engine();
bool init_left_expr_cache();
/* Inform 'this' that it was computed, and contains a valid result. */
void set_first_execution() { if (first_execution) first_execution= FALSE; }
bool is_expensive_processor(uchar *arg);
+ bool is_expensive() { return TRUE; }
bool expr_cache_is_needed(THD *thd);
/*
@@ -468,7 +493,6 @@
{
public:
chooser_compare_func_creator func_creator;
- Comp_creator *func;
bool all;
Item_allany_subselect(Item * left_expr, chooser_compare_func_creator fc,
@@ -477,6 +501,7 @@
// only ALL subquery has upper not
subs_type substype() { return all?ALL_SUBS:ANY_SUBS; }
trans_res select_transformer(JOIN *join);
+ void create_comp_func(bool invert) { func= func_creator(invert); }
virtual void print(String *str, enum_query_type query_type);
};
@@ -817,10 +842,9 @@
}
~subselect_hash_sj_engine();
- bool init_permanent(List<Item> *tmp_columns);
- bool init_runtime();
+ bool init(List<Item> *tmp_columns);
void cleanup();
- int prepare() { return 0; } /* Override virtual function in base class. */
+ int prepare();
int exec();
virtual void print(String *str, enum_query_type query_type);
uint cols()
=== modified file 'sql/mysql_priv.h'
--- sql/mysql_priv.h 2010-07-10 10:37:30 +0000
+++ sql/mysql_priv.h 2010-10-04 15:26:55 +0000
@@ -563,20 +563,20 @@
#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16
-
#define OPTIMIZER_SWITCH_FIRSTMATCH 32
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128
-#define OPTIMIZER_SWITCH_SEMIJOIN 256
-#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
-#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
-#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
+#define OPTIMIZER_SWITCH_IN_TO_EXISTS 256
+#define OPTIMIZER_SWITCH_SEMIJOIN 512
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11)
+#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12)
#ifdef DBUG_OFF
-# define OPTIMIZER_SWITCH_LAST (1<<12)
-#else
-# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
# define OPTIMIZER_SWITCH_LAST (1<<13)
+#else
+# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
+# define OPTIMIZER_SWITCH_LAST (1<<14)
#endif
#ifdef DBUG_OFF
@@ -589,6 +589,7 @@
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
+ OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
@@ -603,6 +604,7 @@
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
+ OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
=== modified file 'sql/mysqld.cc'
--- sql/mysqld.cc 2010-07-16 08:58:24 +0000
+++ sql/mysqld.cc 2010-10-04 15:26:55 +0000
@@ -304,7 +304,7 @@
"index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection",
"index_condition_pushdown",
- "firstmatch","loosescan","materialization", "semijoin",
+ "firstmatch","loosescan","materialization","in_to_exists","semijoin",
"partial_match_rowid_merge",
"partial_match_table_scan",
"subquery_cache",
@@ -325,6 +325,7 @@
sizeof("firstmatch") - 1,
sizeof("loosescan") - 1,
sizeof("materialization") - 1,
+ sizeof("in_to_exists") - 1,
sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
@@ -412,9 +413,10 @@
"firstmatch=on,"
"loosescan=on,"
"materialization=on,"
+ "in_to_exists=on,"
"semijoin=on,"
"partial_match_rowid_merge=on,"
- "partial_match_table_scan=on,"
+ "partial_match_table_scan=on,"
"subquery_cache=on"
#ifndef DBUG_OFF
",table_elimination=on";
@@ -7233,7 +7235,7 @@
{"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, "
"index_merge_union, index_merge_sort_union, index_merge_intersection, "
- "index_condition_pushdown, firstmatch, loosescan, materialization, "
+ "index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, "
"semijoin, partial_match_rowid_merge, partial_match_table_scan, "
"subquery_cache"
#ifndef DBUG_OFF
=== modified file 'sql/opt_subselect.cc'
--- sql/opt_subselect.cc 2010-07-10 10:37:30 +0000
+++ sql/opt_subselect.cc 2010-10-05 13:06:06 +0000
@@ -67,6 +67,7 @@
{
THD *thd=join->thd;
st_select_lex *select_lex= join->select_lex;
+ st_select_lex_unit* parent_unit= select_lex->master_unit();
DBUG_ENTER("check_and_do_in_subquery_rewrites");
/*
If
@@ -84,8 +85,8 @@
TODO: for PS, make the whole block execute only on the first execution
*/
Item_subselect *subselect;
- if (!thd->lex->view_prepare_mode && // (1)
- (subselect= select_lex->master_unit()->item)) // (2)
+ if (!thd->lex->view_prepare_mode && // (1)
+ (subselect= parent_unit->item)) // (2)
{
Item_in_subselect *in_subs= NULL;
if (subselect->substype() == Item_subselect::IN_SUBS)
@@ -129,6 +130,15 @@
if (failure)
DBUG_RETURN(-1); /* purecov: deadcode */
}
+ if (select_lex == parent_unit->fake_select_lex)
+ {
+ /*
+ The join and its select_lex object represent the 'fake' select used
+ to compute the result of a UNION.
+ */
+ DBUG_RETURN(0);
+ }
+
DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
/*
Check if we're in subquery that is a candidate for flattening into a
@@ -154,8 +164,8 @@
!join->having && !select_lex->with_sum_func && // 4
thd->thd_marker.emb_on_expr_nest && // 5
select_lex->outer_select()->join && // 6
- select_lex->master_unit()->first_select()->leaf_tables && // 7
- in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8
+ parent_unit->first_select()->leaf_tables && // 7
+ !in_subs->in_strategy && // 8
select_lex->outer_select()->leaf_tables && // 9
!((join->select_options | // 10
select_lex->outer_select()->join->select_options) // 10
@@ -175,63 +185,82 @@
else
{
DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
- /*
- Check if the subquery predicate can be executed via materialization.
- The required conditions are:
- 1. Subquery predicate is an IN/=ANY subq predicate
- 2. Subquery is a single SELECT (not a UNION)
- 3. Subquery is not a table-less query. In this case there is no
- point in materializing.
- 3A The upper query is not a table-less SELECT ... FROM DUAL. We
+ /* Test if the user has set a legal combination of optimizer switches. */
+ if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
+ !optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+ my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
+
+ if (in_subs)
+ {
+ /* Subquery predicate is an IN/=ANY predicate. */
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS))
+ in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+ in_subs->in_strategy|= SUBS_MATERIALIZATION;
+
+ /*
+ Check if the subquery predicate can be executed via materialization.
+ The required conditions are:
+ 1. Subquery is a single SELECT (not a UNION)
+ 2. Subquery is not a table-less query. In this case there is no
+ point in materializing.
+ 2A The upper query is not a table-less SELECT ... FROM DUAL. We
can't do materialization for SELECT .. FROM DUAL because it
does not call setup_subquery_materialization(). We could make
SELECT ... FROM DUAL call that function but that doesn't seem
to be the case that is worth handling.
- 4. Either the subquery predicate is a top-level predicate, or at
- least one partial match strategy is enabled. If no partial match
- strategy is enabled, then materialization cannot be used for
- non-top-level queries because it cannot handle NULLs correctly.
- 5. Subquery is non-correlated
- TODO:
- This is an overly restrictive condition. It can be extended to:
- (Subquery is non-correlated ||
- Subquery is correlated to any query outer to IN predicate ||
- (Subquery is correlated to the immediate outer query &&
- Subquery !contains {GROUP BY, ORDER BY [LIMIT],
- aggregate functions}) && subquery predicate is not under "NOT IN"))
- 6. No execution method was already chosen (by a prepared statement).
-
- (*) The subquery must be part of a SELECT statement. The current
- condition also excludes multi-table update statements.
-
- Determine whether we will perform subquery materialization before
- calling the IN=>EXISTS transformation, so that we know whether to
- perform the whole transformation or only that part of it which wraps
- Item_in_subselect in an Item_in_optimizer.
+ 3. Either the subquery predicate is a top-level predicate, or at
+ least one partial match strategy is enabled. If no partial match
+ strategy is enabled, then materialization cannot be used for
+ non-top-level queries because it cannot handle NULLs correctly.
+ 4. Subquery is non-correlated
+ TODO:
+ This is an overly restrictive condition. It can be extended to:
+ (Subquery is non-correlated ||
+ Subquery is correlated to any query outer to IN predicate ||
+ (Subquery is correlated to the immediate outer query &&
+ Subquery !contains {GROUP BY, ORDER BY [LIMIT],
+ aggregate functions}) && subquery predicate is not under "NOT IN"))
+
+ (*) The subquery must be part of a SELECT statement. The current
+ condition also excludes multi-table update statements.
+ */
+ if (!(in_subs->in_strategy & SUBS_MATERIALIZATION &&
+ !select_lex->is_part_of_union() && // 1
+ parent_unit->first_select()->leaf_tables && // 2
+ thd->lex->sql_command == SQLCOM_SELECT && // *
+ select_lex->outer_select()->leaf_tables && // 2A
+ subquery_types_allow_materialization(in_subs) &&
+ // psergey-todo: duplicated_subselect_card_check: where it's done?
+ (in_subs->is_top_level_item() || //3
+ optimizer_flag(thd,
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
+ optimizer_flag(thd,
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
+ !in_subs->is_correlated)) //4
+ {
+ /* Materialization is not possible based on syntactic properties. */
+ in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+ }
+
+ if (!in_subs->in_strategy)
+ {
+ /*
+ If neither materialization is possible, nor the user chose
+ IN-TO-EXISTS, choose IN-TO-EXISTS as the only universal strategy.
+ */
+ in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
+ }
+ }
+
+ /*
+ Transform each subquery predicate according to its overloaded
+ transformer.
*/
- if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) &&
- in_subs && // 1
- !select_lex->is_part_of_union() && // 2
- select_lex->master_unit()->first_select()->leaf_tables && // 3
- thd->lex->sql_command == SQLCOM_SELECT && // *
- select_lex->outer_select()->leaf_tables && // 3A
- subquery_types_allow_materialization(in_subs) &&
- // psergey-todo: duplicated_subselect_card_check: where it's done?
- (in_subs->is_top_level_item() ||
- optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) ||
- optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4
- !in_subs->is_correlated && // 5
- in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
- {
- in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
- }
-
Item_subselect::trans_res trans_res;
if ((trans_res= subselect->select_transformer(join)) !=
Item_subselect::RES_OK)
- {
DBUG_RETURN((trans_res == Item_subselect::RES_ERROR));
- }
}
}
DBUG_RETURN(0);
@@ -509,6 +538,15 @@
FALSE))
DBUG_RETURN(TRUE);
}
+ /*
+ Revert to the IN->EXISTS strategy in the rare case when the subquery could
+ not be flattened.
+ TODO: This is a limitation done for simplicity. Such subqueries could also
+ be executed via materialization. In order to determine this, we should
+ re-run the test for materialization that was done in
+ check_and_do_in_subquery_rewrites.
+ */
+ (*in_subq)->in_strategy= SUBS_IN_TO_EXISTS;
}
if (arena)
@@ -769,8 +807,7 @@
/* 3. Remove the original subquery predicate from the WHERE/ON */
// The subqueries were replaced for Item_int(1) earlier
- subq_pred->exec_method=
- Item_in_subselect::SEMI_JOIN; // for subsequent executions
+ subq_pred->in_strategy= SUBS_SEMI_JOIN; // for subsequent executions
/*TODO: also reset the 'with_subselect' there. */
/* n. Adjust the parent_join->tables counter */
@@ -3362,6 +3399,7 @@
JOIN_TAB* join_tab=join->join_tab;
SELECT_LEX_UNIT *unit= join->unit;
DBUG_ENTER("rewrite_to_index_subquery_engine");
+
/*
is this simple IN subquery?
*/
@@ -3505,3 +3543,269 @@
}
+/**
+ Optimize all subqueries of a query that have were flattened into a semijoin.
+
+ @details
+ Optimize all immediate children subqueries of a query.
+
+ This phase must be called after substitute_for_best_equal_field() because
+ that function may replace items with other items from a multiple equality,
+ and we need to reference the correct items in the index access method of the
+ IN predicate.
+
+ @return Operation status
+ @retval FALSE success.
+ @retval TRUE error occurred.
+*/
+
+bool JOIN::optimize_unflattened_subqueries()
+{
+ return select_lex->optimize_unflattened_subqueries();
+}
+
+
+/**
+ Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
+ based on cost.
+
+ @param join_tables the set of tables joined in the subquery
+
+ @notes
+ The method chooses between the materialization and IN=>EXISTS rewrite
+ strategies for the execution of a non-flattened subquery IN predicate.
+ The cost-based decision is made as follows:
+
+ 1. compute materialize_strategy_cost based on the unmodified subquery
+ 2. reoptimize the subquery taking into account the IN-EXISTS predicates
+ 3. compute in_exists_strategy_cost based on the reoptimized plan
+ 4. compare and set the cheaper strategy
+ if (materialize_strategy_cost >= in_exists_strategy_cost)
+ in_strategy = MATERIALIZATION
+ else
+ in_strategy = IN_TO_EXISTS
+ 5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
+ revert to IN_TO_EXISTS
+ 6. if (in_strategy == MATERIALIZATION)
+ revert the subquery plan to the original one before reoptimizing
+ else
+ inject the IN=>EXISTS predicates into the new EXISTS subquery plan
+
+ The implementation itself is a bit more complicated because it takes into
+ account two more factors:
+ - whether the user allowed both strategies through an optimizer_switch, and
+ - if materialization was the cheaper strategy, whether it can be executed
+ or not.
+
+ @retval FALSE success.
+ @retval TRUE error occurred.
+*/
+
+bool JOIN::choose_subquery_plan(table_map join_tables)
+{
+ /* The original QEP of the subquery. */
+ DYNAMIC_ARRAY save_keyuse; /* Copy of the JOIN::keyuse array. */
+ POSITION save_best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */
+ /* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */
+ KEYUSE *save_join_tab_keyuse[MAX_TABLES];
+ /* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */
+ key_map save_join_tab_checked_keys[MAX_TABLES];
+
+ bool in_exists_reoptimized= false;
+ Item_in_subselect *in_subs;
+
+ if (select_lex->master_unit()->item &&
+ select_lex->master_unit()->item->is_in_predicate())
+ {
+ in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
+ if (in_subs->create_in_to_exists_cond(this))
+ return true;
+ }
+ else
+ return false;
+
+ DBUG_ASSERT(in_subs->in_strategy); /* A strategy must be chosen earlier. */
+ DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
+ DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed);
+ DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed);
+
+ save_keyuse.elements= 0;
+ save_keyuse.buffer= NULL;
+
+ /*
+ Compute and compare the costs of materialization and in-exists if both
+ strategies are possible and allowed by the user (checked during the prepare
+ phase.
+ */
+ if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+ in_subs->in_strategy & SUBS_IN_TO_EXISTS)
+ {
+ JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
+ JOIN *inner_join= this;
+ /* Cost of the outer JOIN. */
+ double outer_read_time= 0, outer_record_count= 0;
+ /* Cost of the unmodified subquery. */
+ double inner_read_time_1= 0, inner_record_count_1= 0;
+ /* Cost of the subquery with injected IN-EXISTS predicates. */
+ double inner_read_time_2= 0, inner_record_count_2= 0;
+ /* The cost to compute IN via materialization. */
+ double materialize_strategy_cost= 0;
+ /* The cost of the IN->EXISTS strategy. */
+ double in_exists_strategy_cost= 1;
+
+ if (outer_join)
+ get_partial_join_cost(outer_join, outer_join->tables,
+ &outer_read_time, &outer_record_count);
+ else
+ {
+ /*
+ TODO: outer_join can be NULL for DELETE statements.
+ How to compute its cost?
+ */
+ outer_read_time= 1; /* TODO */
+ outer_record_count= 1; /* TODO */
+ }
+
+ get_partial_join_cost(inner_join, inner_join->tables,
+ &inner_read_time_1, &inner_record_count_1);
+
+ if (in_to_exists_where && const_tables != tables)
+ {
+ /*
+ Re-optimize and cost the subquery taking into account the IN-EXISTS
+ conditions.
+ */
+ if (save_query_plan(&save_keyuse, save_best_positions,
+ save_join_tab_keyuse, save_join_tab_checked_keys) ||
+ reoptimize(in_to_exists_where, join_tables, save_best_positions))
+ return TRUE;
+ in_exists_reoptimized= true;
+
+ get_partial_join_cost(inner_join, inner_join->tables,
+ &inner_read_time_2, &inner_record_count_2);
+ }
+ else
+ {
+ /* Reoptimization would not produce any better plan. */
+ inner_read_time_2= inner_read_time_1;
+ }
+
+ /* Compute execution costs. */
+ /*
+ 1. Compute the cost of the materialization strategy.
+ */
+ double materialization_cost; /* The cost of executing the subquery and */
+ /* storing its result in an indexed temp table.*/
+ /* The cost of a lookup into the unique index of the materialized table. */
+ double lookup_cost;
+ double write_row_cost= 1; /* TODO: what is the real cost to write a row? */
+ materialization_cost= inner_read_time_1 +
+ inner_record_count_1 * write_row_cost;
+ /*
+ The cost of a hash/btree lookup into a unique index of a materialized
+ subquery.
+ TIMOUR: TODO: the block of code below is exact copy/paste from
+ opt_subselect.cc:optimize_semi_join_nests() - refactor it.
+ */
+ uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
+ if (rowlen * inner_record_count_1 < thd->variables.max_heap_table_size)
+ lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
+ else
+ lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
+ materialize_strategy_cost= materialization_cost +
+ outer_record_count * lookup_cost;
+
+ /*
+ 2. Compute the cost of the IN=>EXISTS strategy.
+ */
+ in_exists_strategy_cost= outer_record_count * inner_read_time_2;
+
+ /* Compare the costs and choose the cheaper strategy. */
+ if (materialize_strategy_cost >= in_exists_strategy_cost)
+ in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+ else
+ in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
+ }
+
+ /*
+ If (1) materialization is a possible strategy based on semantic analysis
+ during the prepare phase, then if
+ (2) it is more expensive than the IN->EXISTS transformation, and
+ (3) it is not possible to create usable indexes for the materialization
+ strategy,
+ fall back to IN->EXISTS.
+ otherwise
+ use materialization.
+ */
+ if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+ in_subs->setup_mat_engine())
+ {
+ /*
+ If materialization was the cheaper or the only user-selected strategy,
+ but it is not possible to execute it due to limitations in the
+ implementation, fall back to IN-TO-EXISTS.
+ */
+ in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+ in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
+ }
+
+ if (in_subs->in_strategy & SUBS_MATERIALIZATION)
+ {
+ /* Restore orginal query plan used for materialization. */
+ if (in_exists_reoptimized)
+ restore_query_plan(&save_keyuse, save_best_positions,
+ save_join_tab_keyuse, save_join_tab_checked_keys);
+
+ /* TODO: should we set/unset this flag for both select_lex and its unit? */
+ in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT;
+ select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT;
+
+ /*
+ Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec.
+ TODO:
+ Currently we set the subquery LIMIT to infinity, and this is correct
+ because we forbid at parse time LIMIT inside IN subqueries (see
+ Item_in_subselect::test_limit). However, once we allow this, here
+ we should set the correct limit if given in the query.
+ */
+ in_subs->unit->global_parameters->select_limit= NULL;
+ in_subs->unit->set_limit(unit->global_parameters);
+ /*
+ Set the limit of this JOIN object as well, because normally its being
+ set in the beginning of JOIN::optimize, which was already done.
+ */
+ select_limit= in_subs->unit->select_limit_cnt;
+ }
+ else if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
+ {
+ /* Keep the new query plan with injected conditions, delete the old one. */
+ if (save_keyuse.elements)
+ {
+ DBUG_ASSERT(in_exists_reoptimized);
+ delete_dynamic(&save_keyuse);
+ }
+
+ if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables)
+ {
+ /*
+ The subquery was not reoptimized either because the user allowed only the
+ IN-EXISTS strategy, or because materialization was not possible based on
+ semantic analysis. Clenup the original plan and reoptimize.
+ */
+ for (uint i= 0; i < tables; i++)
+ {
+ join_tab[i].keyuse= NULL;
+ join_tab[i].checked_keys.clear_all();
+ }
+ if (reoptimize(in_to_exists_where, join_tables, NULL))
+ return TRUE;
+ }
+
+ if (in_subs->inject_in_to_exists_cond(this))
+ return TRUE;
+ }
+ else
+ DBUG_ASSERT(FALSE);
+
+ return FALSE;
+}
=== modified file 'sql/share/errmsg.txt'
--- sql/share/errmsg.txt 2010-06-01 19:52:20 +0000
+++ sql/share/errmsg.txt 2010-10-04 15:26:55 +0000
@@ -6245,3 +6245,5 @@
eng "Unknown option '%-.64s'"
ER_BAD_OPTION_VALUE
eng "Incorrect value '%-.64s' for option '%-.64s'"
+ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
+ eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."
=== modified file 'sql/sql_class.cc'
--- sql/sql_class.cc 2010-07-10 10:37:30 +0000
+++ sql/sql_class.cc 2010-10-04 15:26:55 +0000
@@ -2630,6 +2630,7 @@
for (; free_list; free_list= next)
{
next= free_list->next;
+ DBUG_ASSERT(free_list != next);
free_list->delete_self();
}
/* Postcondition: free_list is 0 */
@@ -2994,14 +2995,28 @@
if (!stat)
return TRUE;
- cleanup();
-
+ reset();
table->file->extra(HA_EXTRA_WRITE_CACHE);
table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
return FALSE;
}
+void select_materialize_with_stats::reset()
+{
+ memset(col_stat, 0, table->s->fields * sizeof(Column_statistics));
+ max_nulls_in_row= 0;
+ count_rows= 0;
+}
+
+
+void select_materialize_with_stats::cleanup()
+{
+ reset();
+ select_union::cleanup();
+}
+
+
/**
Override select_union::send_data to analyze each row for NULLs and to
update null_statistics before sending data to the client.
@@ -3052,6 +3067,7 @@
table_charset= 0;
precomputed_group_by= 0;
bit_fields_as_long= 0;
+ materialized_subquery= 0;
skip_create_table= 0;
DBUG_VOID_RETURN;
}
=== modified file 'sql/sql_class.h'
--- sql/sql_class.h 2010-07-10 10:37:30 +0000
+++ sql/sql_class.h 2010-10-04 15:26:55 +0000
@@ -2852,6 +2852,8 @@
uint convert_blob_length;
CHARSET_INFO *table_charset;
bool schema_table;
+ /* TRUE if the temp table is created for subquery materialization. */
+ bool materialized_subquery;
/*
True if GROUP BY and its aggregate functions are already computed
by a table access method (e.g. by loose index scan). In this case
@@ -2875,8 +2877,8 @@
TMP_TABLE_PARAM()
:copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0),
- schema_table(0), precomputed_group_by(0), force_copy_fields(0),
- bit_fields_as_long(0), skip_create_table(0)
+ schema_table(0), materialized_subquery(0), precomputed_group_by(0),
+ force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
{}
~TMP_TABLE_PARAM()
{
@@ -2905,10 +2907,11 @@
bool send_data(List<Item> &items);
bool send_eof();
bool flush();
-
+ void cleanup();
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long);
+ TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; }
};
/* Base subselect interface class */
@@ -2968,19 +2971,17 @@
*/
ha_rows count_rows;
+protected:
+ void reset();
+
public:
- select_materialize_with_stats() {}
+ select_materialize_with_stats() { tmp_table_param.init(); }
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long);
bool init_result_table(ulonglong select_options);
bool send_data(List<Item> &items);
- void cleanup()
- {
- memset(col_stat, 0, table->s->fields * sizeof(Column_statistics));
- max_nulls_in_row= 0;
- count_rows= 0;
- }
+ void cleanup();
ha_rows get_null_count_of_col(uint idx)
{
DBUG_ASSERT(idx < table->s->fields);
=== modified file 'sql/sql_delete.cc'
--- sql/sql_delete.cc 2010-05-26 06:14:18 +0000
+++ sql/sql_delete.cc 2010-10-04 15:26:55 +0000
@@ -92,6 +92,10 @@
}
}
+ /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
+ if (select_lex->optimize_unflattened_subqueries())
+ DBUG_RETURN(TRUE);
+
const_cond= (!conds || conds->const_item());
safe_update=test(thd->options & OPTION_SAFE_UPDATES);
if (safe_update && const_cond)
=== modified file 'sql/sql_lex.cc'
--- sql/sql_lex.cc 2010-07-10 10:37:30 +0000
+++ sql/sql_lex.cc 2010-10-04 15:26:55 +0000
@@ -1734,17 +1734,29 @@
}
-/*
- excluding select_lex structure (except first (first select can't be
+
+/*
+ Exclude a node from the tree lex structure, but leave it in the global
+ list of nodes.
+*/
+
+void st_select_lex_node::exclude_from_tree()
+{
+ if ((*prev= next))
+ next->prev= prev;
+}
+
+
+/*
+ Exclude select_lex structure (except first (first select can't be
deleted, because it is most upper select))
*/
void st_select_lex_node::exclude()
{
- //exclude from global list
+ /* exclude from global list */
fast_exclude();
- //exclude from other structures
- if ((*prev= next))
- next->prev= prev;
+ /* exclude from other structures */
+ exclude_from_tree();
/*
We do not need following statements, because prev pointer of first
list element point to master->slave
@@ -2145,8 +2157,8 @@
select_limit == 1, and there should be no offset_limit.
*/
(((subs_type == Item_subselect::IN_SUBS) &&
- ((Item_in_subselect*)item)->exec_method ==
- Item_in_subselect::MATERIALIZATION) ?
+ ((Item_in_subselect*)item)->in_strategy &
+ SUBS_MATERIALIZATION) ?
TRUE :
(select_limit->val_int() == 1LL) &&
offset_limit == 0));
@@ -3076,6 +3088,70 @@
str, length));
}
+
+bool st_select_lex::optimize_unflattened_subqueries()
+{
+ for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
+ {
+ Item_subselect *subquery_predicate= un->item;
+ if (subquery_predicate)
+ {
+ for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
+ {
+ JOIN *inner_join= sl->join;
+ SELECT_LEX *save_select= un->thd->lex->current_select;
+ ulonglong save_options;
+ int res;
+ /* We need only 1 row to determine existence */
+ un->set_limit(un->global_parameters);
+ un->thd->lex->current_select= sl;
+ save_options= inner_join->select_options;
+ if (un->outer_select()->options & SELECT_DESCRIBE)
+ {
+ /* Optimize the subquery in the context of EXPLAIN. */
+ set_explain_type();
+ inner_join->select_options= options;
+ }
+ res= inner_join->optimize();
+ inner_join->select_options= save_options;
+ un->thd->lex->current_select= save_select;
+ if (res)
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+
+/**
+ Set the EXPLAIN type for this subquery.
+*/
+
+void st_select_lex::set_explain_type()
+{
+ SELECT_LEX *first= master_unit()->first_select();
+ /* drop UNCACHEABLE_EXPLAIN, because it is for internal usage only */
+ uint8 is_uncacheable= (uncacheable & ~UNCACHEABLE_EXPLAIN);
+
+ type= ((&master_unit()->thd->lex->select_lex == this) ?
+ (first_inner_unit() || next_select() ?
+ "PRIMARY" : "SIMPLE") :
+ ((this == first) ?
+ ((linkage == DERIVED_TABLE_TYPE) ?
+ "DERIVED" :
+ ((is_uncacheable & UNCACHEABLE_DEPENDENT) ?
+ "DEPENDENT SUBQUERY" :
+ (is_uncacheable ? "UNCACHEABLE SUBQUERY" :
+ "SUBQUERY"))) :
+ ((is_uncacheable & UNCACHEABLE_DEPENDENT) ?
+ "DEPENDENT UNION":
+ is_uncacheable ? "UNCACHEABLE UNION":
+ "UNION")));
+ options|= SELECT_DESCRIBE;
+}
+
+
/**
A routine used by the parser to decide whether we are specifying a full
partitioning or if only partitions to add or to split.
@@ -3093,4 +3169,3 @@
(alter_info.flags == ALTER_ADD_PARTITION ||
alter_info.flags == ALTER_REORGANIZE_PARTITION));
}
-
=== modified file 'sql/sql_lex.h'
--- sql/sql_lex.h 2010-07-10 10:37:30 +0000
+++ sql/sql_lex.h 2010-10-04 15:26:55 +0000
@@ -439,6 +439,7 @@
st_select_lex_node(): linkage(UNSPECIFIED_TYPE) {}
virtual ~st_select_lex_node() {}
inline st_select_lex_node* get_master() { return master; }
+ inline void set_master(st_select_lex_node* master_arg) { master= master_arg; }
virtual void init_query();
virtual void init_select();
void include_down(st_select_lex_node *upper);
@@ -446,6 +447,7 @@
void include_standalone(st_select_lex_node *sel, st_select_lex_node **ref);
void include_global(st_select_lex_node **plink);
void exclude();
+ void exclude_from_tree();
virtual st_select_lex_unit* master_unit()= 0;
virtual st_select_lex* outer_select()= 0;
@@ -839,6 +841,15 @@
void clear_index_hints(void) { index_hints= NULL; }
bool is_part_of_union() { return master_unit()->is_union(); }
+ /*
+ Optimize all subqueries that have not been flattened into semi-joins.
+ This functionality is a method of SELECT_LEX instead of JOIN because
+ some SQL statements as DELETE do not have a corresponding JOIN object.
+ */
+ bool optimize_unflattened_subqueries();
+ /* Set the EXPLAIN type for this subquery. */
+ void set_explain_type();
+
private:
/* current index hint kind. used in filling up index_hints */
enum index_hint_type current_index_hint_type;
=== modified file 'sql/sql_select.cc'
--- sql/sql_select.cc 2010-07-15 13:59:10 +0000
+++ sql/sql_select.cc 2010-10-05 13:06:06 +0000
@@ -53,9 +53,9 @@
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
JOIN_TAB *join_tab,
uint tables, COND *conds,
- COND_EQUAL *cond_equal,
table_map table_map, SELECT_LEX *select_lex,
st_sargable_param **sargables);
+static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse);
static int sort_keyuse(KEYUSE *a,KEYUSE *b);
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
table_map used_tables);
@@ -926,9 +926,32 @@
{
DBUG_PRINT("info",("No tables"));
error= 0;
- /* Create all structures needed for materialized subquery execution. */
- if (setup_subquery_materialization())
- DBUG_RETURN(1);
+ if (optimize_unflattened_subqueries())
+ DBUG_RETURN(1);
+ /*
+ TIMOUR: TODO: consider do we need to optimize here at all and refactor
+ this block and JOIN::choose_subquery_plan.
+
+ if (choose_subquery_plan())
+ DBUG_RETURN(1);
+ */
+ Item_in_subselect *in_subs;
+ if (select_lex->master_unit()->item &&
+ select_lex->master_unit()->item->is_in_predicate())
+ {
+ in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
+ if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+ in_subs->setup_mat_engine())
+ in_subs->in_strategy= SUBS_IN_TO_EXISTS;
+ if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
+ {
+ if (in_subs->create_in_to_exists_cond(this))
+ DBUG_RETURN(1);
+ if (in_subs->inject_in_to_exists_cond(this))
+ DBUG_RETURN(1);
+ tmp_having= having;
+ }
+ }
DBUG_RETURN(0);
}
error= -1; // Error is sent to client
@@ -1286,7 +1309,7 @@
init_ftfuncs(thd, select_lex, test(order));
/* Create all structures needed for materialized subquery execution. */
- if (setup_subquery_materialization())
+ if (optimize_unflattened_subqueries())
DBUG_RETURN(1);
int res;
@@ -1381,6 +1404,34 @@
if (join_tab->is_using_loose_index_scan())
tmp_table_param.precomputed_group_by= TRUE;
+ error= 0;
+ DBUG_RETURN(0);
+
+setup_subq_exit:
+ /*
+ Even with zero matching rows, subqueries in the HAVING clause may
+ need to be evaluated if there are aggregate functions in the query.
+ If we planned to materialize the subquery, we need to set it up
+ properly before prematurely leaving optimize().
+ */
+ if (optimize_unflattened_subqueries())
+ DBUG_RETURN(1);
+ error= 0;
+ DBUG_RETURN(0);
+}
+
+
+/**
+ Create and initialize objects neeed for the execution of a query plan.
+*/
+
+int JOIN::init_execution()
+{
+ DBUG_ENTER("JOIN::init_execution");
+
+ DBUG_ASSERT(optimized);
+ initialized= true;
+
/* Create a tmp table if distinct or if the sort is too complicated */
if (need_tmp)
{
@@ -1413,7 +1464,7 @@
select_options,
tmp_rows_limit,
(char *) "")))
- {
+ {
DBUG_RETURN(1);
}
@@ -1499,19 +1550,6 @@
DBUG_RETURN(-1); /* purecov: inspected */
}
- error= 0;
- DBUG_RETURN(0);
-
-setup_subq_exit:
- /*
- Even with zero matching rows, subqueries in the HAVING clause may
- need to be evaluated if there are aggregate functions in the
- query. If we have planned to materialize the subquery, we need to
- set it up properly before prematurely leaving optimize().
- */
- if (setup_subquery_materialization())
- DBUG_RETURN(1);
- error= 0;
DBUG_RETURN(0);
}
@@ -1775,6 +1813,9 @@
int tmp_error;
DBUG_ENTER("JOIN::exec");
+ if (!initialized && init_execution())
+ DBUG_VOID_RETURN;
+
thd_proc_info(thd, "executing");
error= 0;
if (procedure)
@@ -2582,51 +2623,6 @@
}
-/**
- Setup for execution all subqueries of a query, for which the optimizer
- chose hash semi-join.
-
- @details Iterate over all subqueries of the query, and if they are under an
- IN predicate, and the optimizer chose to compute it via hash semi-join:
- - try to initialize all data structures needed for the materialized execution
- of the IN predicate,
- - if this fails, then perform the IN=>EXISTS transformation which was
- previously blocked during JOIN::prepare.
-
- This method is part of the "code generation" query processing phase.
-
- This phase must be called after substitute_for_best_equal_field() because
- that function may replace items with other items from a multiple equality,
- and we need to reference the correct items in the index access method of the
- IN predicate.
-
- @return Operation status
- @retval FALSE success.
- @retval TRUE error occurred.
-*/
-
-bool JOIN::setup_subquery_materialization()
-{
- for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit(); un;
- un= un->next_unit())
- {
- for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
- {
- Item_subselect *subquery_predicate= sl->master_unit()->item;
- if (subquery_predicate &&
- subquery_predicate->substype() == Item_subselect::IN_SUBS)
- {
- Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate;
- if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION &&
- in_subs->setup_engine())
- return TRUE;
- }
- }
- }
- return FALSE;
-}
-
-
/*****************************************************************************
Create JOIN_TABS, make a guess about the table types,
Approximate how many records will be used in each table
@@ -2837,10 +2833,14 @@
}
if (conds || outer_join)
+ {
if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
- conds, join->cond_equal,
- ~outer_join, join->select_lex, &sargables))
- goto error;
+ conds, ~outer_join, join->select_lex, &sargables))
+ goto error;
+ if (keyuse_array->elements && sort_and_filter_keyuse(keyuse_array))
+ goto error;
+ DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
+ }
join->const_table_map= no_rows_const_tables;
join->const_tables= const_count;
@@ -3144,6 +3144,9 @@
sizeof(POSITION)*join->const_tables);
join->best_read=1.0;
}
+ if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
+ goto error;
+
/* Generate an execution plan from the found optimal join order. */
DBUG_RETURN(join->thd->killed || get_best_combination(join));
@@ -4094,11 +4097,10 @@
static bool
update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
- uint tables, COND *cond, COND_EQUAL *cond_equal,
- table_map normal_tables, SELECT_LEX *select_lex,
- SARGABLE_PARAM **sargables)
+ uint tables, COND *cond, table_map normal_tables,
+ SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
{
- uint and_level,i,found_eq_constant;
+ uint and_level,i;
KEY_FIELD *key_fields, *end, *field;
uint sz;
uint m= max(select_lex->max_equal_elems,1);
@@ -4194,67 +4196,76 @@
return TRUE;
}
- /*
- Sort the array of possible keys and remove the following key parts:
- - ref if there is a keypart which is a ref and a const.
- (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
- then we skip the key part corresponding to b=t2.d)
- - keyparts without previous keyparts
- (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
- used in the query, we drop the partial key parts from consideration).
- Special treatment for ft-keys.
- */
- if (keyuse->elements)
+ return FALSE;
+}
+
+
+/**
+ Sort the array of possible keys and remove the following key parts:
+ - ref if there is a keypart which is a ref and a const.
+ (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
+ then we skip the key part corresponding to b=t2.d)
+ - keyparts without previous keyparts
+ (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
+ used in the query, we drop the partial key parts from consideration).
+ Special treatment for ft-keys.
+*/
+
+static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse)
+{
+ KEYUSE key_end, *prev, *save_pos, *use;
+ uint found_eq_constant, i;
+
+ DBUG_ASSERT(keyuse->elements);
+
+ my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
+ (qsort_cmp) sort_keyuse);
+
+ bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */
+ if (insert_dynamic(keyuse, (uchar*) &key_end))
+ return TRUE;
+
+ use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
+ prev= &key_end;
+ found_eq_constant= 0;
+
+ for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{
- KEYUSE key_end,*prev,*save_pos,*use;
-
- my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE),
- (qsort_cmp) sort_keyuse);
-
- bzero((char*) &key_end,sizeof(key_end)); /* Add for easy testing */
- if (insert_dynamic(keyuse,(uchar*) &key_end))
- return TRUE;
-
- use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
- prev= &key_end;
- found_eq_constant=0;
- for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+ if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
+ use->table->const_key_parts[use->key]|= use->keypart_map;
+ if (use->keypart != FT_KEYPART)
{
- if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
- use->table->const_key_parts[use->key]|= use->keypart_map;
- if (use->keypart != FT_KEYPART)
+ if (use->key == prev->key && use->table == prev->table)
{
- if (use->key == prev->key && use->table == prev->table)
- {
- if (prev->keypart+1 < use->keypart ||
- (prev->keypart == use->keypart && found_eq_constant))
- continue; /* remove */
- }
- else if (use->keypart != 0) // First found must be 0
- continue;
+ if (prev->keypart+1 < use->keypart ||
+ (prev->keypart == use->keypart && found_eq_constant))
+ continue; /* remove */
}
+ else if (use->keypart != 0) // First found must be 0
+ continue;
+ }
#ifdef HAVE_valgrind
- /* Valgrind complains about overlapped memcpy when save_pos==use. */
- if (save_pos != use)
+ /* Valgrind complains about overlapped memcpy when save_pos==use. */
+ if (save_pos != use)
#endif
- *save_pos= *use;
- prev=use;
- found_eq_constant= !use->used_tables;
- /* Save ptr to first use */
- if (!use->table->reginfo.join_tab->keyuse)
- use->table->reginfo.join_tab->keyuse=save_pos;
- use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
- save_pos++;
- }
- i=(uint) (save_pos-(KEYUSE*) keyuse->buffer);
- VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
- keyuse->elements=i;
+ *save_pos= *use;
+ prev= use;
+ found_eq_constant= !use->used_tables;
+ /* Save ptr to first use */
+ if (!use->table->reginfo.join_tab->keyuse)
+ use->table->reginfo.join_tab->keyuse=save_pos;
+ use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
+ save_pos++;
}
- DBUG_EXECUTE("opt", print_keyuse_array(keyuse););
+ i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
+ VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
+ keyuse->elements= i;
+
return FALSE;
}
+
/**
Update some values in keyuse for faster choose_plan() loop.
*/
@@ -11408,10 +11419,30 @@
{
if (thd->is_fatal_error)
goto err; // Got OOM
- continue; // Some kindf of const item
+ continue; // Some kind of const item
}
if (type == Item::SUM_FUNC_ITEM)
- ((Item_sum *) item)->result_field= new_field;
+ {
+ Item_sum *agg_item= (Item_sum *) item;
+ /*
+ Update the result field only if it has never been set, or if the
+ created temporary table is not to be used for subquery
+ materialization.
+
+ The reason is that for subqueries that require materialization as part
+ of their plan, we create the 'external' temporary table needed for IN
+ execution, after the 'internal' temporary table needed for grouping.
+ Since both the external and the internal temporary tables are created
+ for the same list of SELECT fields of the subquery, setting
+ 'result_field' for each invocation of create_tmp_table overrides the
+ previous value of 'result_field'.
+
+ The condition below prevents the creation of the external temp table
+ to override the 'result_field' that was set for the internal temp table.
+ */
+ if (!agg_item->result_field || !param->materialized_subquery)
+ agg_item->result_field= new_field;
+ }
tmp_from_field++;
reclength+=new_field->pack_length();
if (!(new_field->flags & NOT_NULL_FLAG))
@@ -18801,28 +18832,9 @@
bool res= 0;
SELECT_LEX *first= unit->first_select();
- for (SELECT_LEX *sl= first;
- sl;
- sl= sl->next_select())
- {
- // drop UNCACHEABLE_EXPLAIN, because it is for internal usage only
- uint8 uncacheable= (sl->uncacheable & ~UNCACHEABLE_EXPLAIN);
- sl->type= (((&thd->lex->select_lex)==sl)?
- (sl->first_inner_unit() || sl->next_select() ?
- "PRIMARY" : "SIMPLE"):
- ((sl == first)?
- ((sl->linkage == DERIVED_TABLE_TYPE) ?
- "DERIVED":
- ((uncacheable & UNCACHEABLE_DEPENDENT) ?
- "DEPENDENT SUBQUERY":
- (uncacheable?"UNCACHEABLE SUBQUERY":
- "SUBQUERY"))):
- ((uncacheable & UNCACHEABLE_DEPENDENT) ?
- "DEPENDENT UNION":
- uncacheable?"UNCACHEABLE UNION":
- "UNION")));
- sl->options|= SELECT_DESCRIBE;
- }
+ for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
+ sl->set_explain_type();
+
if (unit->is_union())
{
unit->fake_select_lex->select_number= UINT_MAX; // jost for initialization
@@ -19240,6 +19252,8 @@
{
DBUG_ENTER("JOIN::change_result");
result= res;
+ if (tmp_join)
+ tmp_join->result= res;
if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
result->prepare2()))
{
@@ -19248,6 +19262,168 @@
DBUG_RETURN(FALSE);
}
+
+/**
+ Save a query execution plan so that the caller can revert to it if needed,
+ and reset the current query plan so that it can be reoptimized.
+
+ @param save_keyuse[out] a KEYUSE array to save JOIN::keyuse
+ @param save_best_positions[out] array to save JOIN::best_positions
+ @param save_join_tab_keyuse[out] array of KEYUSE pointers to save each
+ JOIN_TAB::keyuse pointer
+ @param save_join_tab_checked_keys[out] an array of bitmaps to save
+ each JOIN_TAB::checked_keys
+
+ @retval 0 OK
+ @retval 1 memory allocation error
+*/
+int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
+ POSITION *save_best_positions,
+ KEYUSE **save_join_tab_keyuse,
+ key_map *save_join_tab_checked_keys)
+{
+ if (keyuse.elements)
+ {
+ DYNAMIC_ARRAY tmp_keyuse;
+ if (my_init_dynamic_array(save_keyuse, sizeof(KEYUSE), 20, 64))
+ return 1;
+ /* Swap the current and the backup keyuse arrays. */
+ tmp_keyuse= keyuse;
+ keyuse= (*save_keyuse);
+ (*save_keyuse)= tmp_keyuse;
+
+ for (uint i= 0; i < tables; i++)
+ {
+ save_join_tab_keyuse[i]= join_tab[i].keyuse;
+ join_tab[i].keyuse= NULL;
+ save_join_tab_checked_keys[i]= join_tab[i].checked_keys;
+ join_tab[i].checked_keys.clear_all();
+ }
+ }
+ memcpy((uchar*) save_best_positions, (uchar*) best_positions,
+ sizeof(POSITION) * (tables + 1));
+ memset(best_positions, 0, sizeof(POSITION) * (tables + 1));
+ return 0;
+}
+
+
+/**
+ Restore a query plan previously saved by the caller.
+
+ @param save_keyuse a KEYUSE array to restore into JOIN::keyuse
+ @param save_best_positions array to restore into JOIN::best_positions
+ @param save_join_tab_keyuse array of KEYUSE pointers to restore each
+ JOIN_TAB::keyuse pointer
+ @param save_join_tab_checked_keys an array of bitmaps to restore
+ each JOIN_TAB::checked_keys
+*/
+
+void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
+ POSITION *save_best_positions,
+ KEYUSE **save_join_tab_keyuse,
+ key_map *save_join_tab_checked_keys)
+{
+ if (save_keyuse->elements)
+ {
+ DYNAMIC_ARRAY tmp_keyuse;
+ tmp_keyuse= keyuse;
+ keyuse= (*save_keyuse);
+ (*save_keyuse)= tmp_keyuse;
+ delete_dynamic(save_keyuse);
+
+ for (uint i= 0; i < tables; i++)
+ {
+ join_tab[i].keyuse= save_join_tab_keyuse[i];
+ join_tab[i].checked_keys= save_join_tab_checked_keys[i];
+ }
+
+ }
+ memcpy((uchar*) best_positions, (uchar*) save_best_positions,
+ sizeof(POSITION) * (tables + 1));
+}
+
+
+/**
+ Reoptimize a query plan taking into account an additional conjunct to the
+ WHERE clause.
+
+ @param added_where An extra conjunct to the WHERE clause to reoptimize with
+ @param join_tables The set of tables to reoptimize
+ @param save_best_positions The join order of the original plan to restore to
+ if needed.
+
+ @notes
+ Given a query plan that already optimized taking into account some WHERE clause
+ 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'. The
+ reoptimization works as follows:
+
+ 1. Call update_ref_and_keys *only* for the new conditions 'added_where'
+ that are about to be injected into the query.
+ 2. Expand if necessary the original KEYUSE array JOIN::keyuse to
+ accommodate the new REF accesses computed for the 'added_where' condition.
+ 3. Add the new KEYUSEs into JOIN::keyuse.
+ 4. Re-sort and re-filter the JOIN::keyuse array with the newly added
+ KEYUSE elements.
+
+ @retval 0 OK
+ @retval 1 memory allocation error
+*/
+
+int JOIN::reoptimize(Item *added_where, table_map join_tables,
+ POSITION *save_best_positions)
+{
+ DYNAMIC_ARRAY added_keyuse;
+ SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
+
+ if (my_init_dynamic_array(&added_keyuse, sizeof(KEYUSE), 20, 64))
+ {
+ delete_dynamic(&added_keyuse);
+ return 1;
+ }
+
+ /* Re-run the REF optimizer to take into account the new conditions. */
+ if (update_ref_and_keys(thd, &added_keyuse, join_tab, tables, added_where,
+ ~outer_join, select_lex, &sargables))
+ {
+ delete_dynamic(&added_keyuse);
+ return 1;
+ }
+
+ if (!added_keyuse.elements)
+ {
+ /* No need to optimize if no new access methods were discovered. */
+ if (save_best_positions)
+ memcpy((uchar*) best_positions, (uchar*) save_best_positions,
+ sizeof(POSITION) * (tables + 1));
+ delete_dynamic(&added_keyuse);
+ return 0;
+ }
+
+ /* Add the new access methods to the keyuse array. */
+ if (!keyuse.buffer &&
+ my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
+ {
+ delete_dynamic(&added_keyuse);
+ return 1;
+ }
+ allocate_dynamic(&keyuse, keyuse.elements + added_keyuse.elements);
+ memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
+ added_keyuse.buffer,
+ (size_t) added_keyuse.elements * added_keyuse.size_of_element);
+ keyuse.elements+= added_keyuse.elements;
+ delete_dynamic(&added_keyuse);
+
+ if (sort_and_filter_keyuse(&keyuse))
+ return 1;
+ optimize_keyuse(this, &keyuse);
+
+ /* Re-run the join optimizer to compute a new query plan. */
+ if (choose_plan(this, join_tables))
+ return 1;
+
+ return 0;
+}
+
/**
@} (end of group Query_Optimizer)
*/
=== modified file 'sql/sql_select.h'
--- sql/sql_select.h 2010-07-10 10:37:30 +0000
+++ sql/sql_select.h 2010-10-04 15:26:55 +0000
@@ -1369,8 +1369,21 @@
class JOIN :public Sql_alloc
{
+private:
JOIN(const JOIN &rhs); /**< not implemented */
JOIN& operator=(const JOIN &rhs); /**< not implemented */
+
+protected:
+ /* Support for plan reoptimization with rewritten conditions. */
+ int reoptimize(Item *added_where, table_map join_tables,
+ POSITION *save_best_positions);
+ int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
+ KEYUSE **save_join_tab_keyuse,
+ key_map *save_join_tab_checked_keys);
+ void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
+ KEYUSE **save_join_tab_keyuse,
+ key_map *save_join_tab_checked_keys);
+
public:
JOIN_TAB *join_tab,**best_ref;
JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs
@@ -1573,8 +1586,15 @@
bool union_part; ///< this subselect is part of union
bool optimized; ///< flag to avoid double optimization in EXPLAIN
+ bool initialized; ///< flag to avoid double init_execution calls
Array<Item_in_subselect> sj_subselects;
+ /*
+ Additional WHERE and HAVING predicates to be considered for IN=>EXISTS
+ subquery transformation of a JOIN object.
+ */
+ Item *in_to_exists_where;
+ Item *in_to_exists_having;
/* Temporary tables used to weed-out semi-join duplicates */
List<TABLE> sj_tmp_tables;
@@ -1649,6 +1669,7 @@
ref_pointer_array_size= 0;
zero_result_cause= 0;
optimized= 0;
+ initialized= 0;
cond_equal= 0;
group_optimized_away= 0;
@@ -1662,6 +1683,8 @@
no_const_tables= FALSE;
first_select= sub_select;
+ in_to_exists_where= NULL;
+ in_to_exists_having= NULL;
}
int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
@@ -1670,12 +1693,13 @@
SELECT_LEX_UNIT *unit);
int optimize();
int reinit();
+ int init_execution();
void exec();
int destroy();
void restore_tmp();
bool alloc_func_list();
bool flatten_subqueries();
- bool setup_subquery_materialization();
+ bool optimize_unflattened_subqueries();
bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
bool before_group_by, bool recompute= FALSE);
@@ -1735,6 +1759,7 @@
NULL : join_tab+const_tables;
}
bool setup_subquery_caches();
+ bool choose_subquery_plan(table_map join_tables);
private:
/**
TRUE if the query contains an aggregate function but has no GROUP
=== modified file 'sql/sql_union.cc'
--- sql/sql_union.cc 2010-07-10 10:37:30 +0000
+++ sql/sql_union.cc 2010-10-04 15:26:55 +0000
@@ -136,6 +136,22 @@
}
+/**
+ Reset and empty the temporary table that stores the materialized query result.
+
+ @note The cleanup performed here is exactly the same as for the two temp
+ tables of JOIN - exec_tmp_table_[1 | 2].
+*/
+
+void select_union::cleanup()
+{
+ table->file->extra(HA_EXTRA_RESET_STATE);
+ table->file->ha_delete_all_rows();
+ free_io_cache(table);
+ filesort_free_buffers(table,0);
+}
+
+
/*
initialization procedures before fake_select_lex preparation()
@@ -170,6 +186,8 @@
{
(*order->item)->walk(&Item::change_context_processor, 0,
(uchar*) &fake_select_lex->context);
+ (*order->item)->walk(&Item::set_fake_select_as_master_processor, 0,
+ (uchar*) fake_select_lex);
}
}
@@ -255,6 +273,18 @@
can_skip_order_by= is_union_select && !(sl->braces && sl->explicit_limit);
+ /*
+ Remove all references from the select_lex_units to the subqueries that
+ are inside the ORDER BY clause.
+ */
+ if (can_skip_order_by)
+ {
+ for (ORDER *ord= (ORDER *)sl->order_list.first; ord; ord= ord->next)
+ {
+ (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
+ }
+ }
+
saved_error= join->prepare(&sl->ref_pointer_array,
(TABLE_LIST*) sl->table_list.first,
sl->with_wild,
=== modified file 'sql/sql_update.cc'
--- sql/sql_update.cc 2010-07-10 10:37:30 +0000
+++ sql/sql_update.cc 2010-10-04 15:26:55 +0000
@@ -290,6 +290,10 @@
DBUG_RETURN(1); /* purecov: inspected */
}
+ /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
+ if (select_lex->optimize_unflattened_subqueries())
+ DBUG_RETURN(TRUE);
+
if (select_lex->inner_refs_list.elements &&
fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
DBUG_RETURN(1);