maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #06635
FW: Rev 3758: MDEV-5470: Cost-based subquery item pushdown must take into account semi-joins
Hi Timour,
Please find below a patch that should implement execution-time protection
against malfunction between condition moving and semi-joins.
There needs also to be an optimization-time counterpart, which should have the
same logic. Will you code it yourself or want me to?
----- Forwarded message from Sergei Petrunia <psergey@xxxxxxxxxxxx> -----
Date: Thu, 19 Dec 2013 02:49:22 +0400
From: Sergei Petrunia <psergey@xxxxxxxxxxxx>
To: commits@xxxxxxxxxxx
Subject: Rev 3758: MDEV-5470: Cost-based subquery item pushdown must take into account semi-joins
At http://bazaar.launchpad.net/~maria-captains/maria/10.0-mdev83
------------------------------------------------------------
revno: 3758
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: 10.0-mdev83
timestamp: Thu 2013-12-19 06:50:28 +0400
message:
MDEV-5470: Cost-based subquery item pushdown must take into account semi-joins
- When moving expensive conditions, take into account that semi-joins limit
where they can be moved.
- This is only execution part in JOIN::setup_dynamic_conditions(). Optimization
part is not yet present.
- No testcases yet.
- Moving inside SJ-Materialization nests doesn't seem to be supported (either
before this patch or after. This patch itself does take SJ-Materialization
into account)
=== modified file 'sql/item_cmpfunc.cc'
--- sql/item_cmpfunc.cc 2013-11-01 11:10:38 +0000
+++ sql/item_cmpfunc.cc 2013-12-19 02:50:28 +0000
@@ -6591,6 +6591,8 @@ void Item_func_collect_stats::dbug_print
}
#endif /*DBUG_OFF*/
+JOIN_TAB *find_last_tab_for_dynamic_cond(table_map cond_tables,
+ JOIN *join, JOIN_TAB *join_tab);
Item_func_dynamic_cond::Item_func_dynamic_cond(Item *cond, JOIN_TAB *atab, JOIN_TAB **ctab)
: Item_func_collect_stats(cond)
@@ -6624,13 +6626,15 @@ Item_func_dynamic_cond::Item_func_dynami
if (active_tab->bush_root_tab)
{
first_tab= active_tab->bush_root_tab->bush_children->start;
- last_tab= active_tab->bush_root_tab->bush_children->end;
+ //last_tab= active_tab->bush_root_tab->bush_children->end;
}
else
{
first_tab= join->join_tab + join->const_tables;
- last_tab= join->join_tab + join->top_join_tab_count;
+ //last_tab= join->join_tab + join->top_join_tab_count;
}
+ last_tab= find_last_tab_for_dynamic_cond(cond->used_tables(),
+ active_tab->join, active_tab);
DBUG_ASSERT(active_tab >= first_tab && active_tab < last_tab);
=== modified file 'sql/item_cmpfunc.h'
--- sql/item_cmpfunc.h 2013-10-18 19:56:55 +0000
+++ sql/item_cmpfunc.h 2013-12-19 02:50:28 +0000
@@ -561,14 +561,20 @@ class Item_func_collect_stats : public I
class Item_func_dynamic_cond : public Item_func_collect_stats
{
-protected:
+protected: // as if there were any ancestor classes?
List<Item_subselect> subqueries;
/* The join_tab where the condition is currently 'pushed'.*/
st_join_table *active_tab;
/* The join_tab currently being executed. */
st_join_table **exec_tab;
/* The boundaries of the sub-plan where this predicate can be pushed. */
- st_join_table *first_tab, *last_tab;
+ st_join_table *first_tab;
+ st_join_table *last_tab; //psergey-todo: this should point to the last tab.
+ // conditions depending on semi-join tables have
+ // limits re. how far they can be moved.
+public:
+ inline st_join_table * get_last_tab() { return last_tab; }
+protected:
/*
The relative access cost based on optimizer estimates. This is the cost
of all access methods per one record from the first table in the plan.
=== modified file 'sql/opt_subselect.cc'
--- sql/opt_subselect.cc 2013-11-04 15:56:09 +0000
+++ sql/opt_subselect.cc 2013-12-19 02:50:28 +0000
@@ -1480,6 +1480,7 @@ static bool convert_subq_to_sj(JOIN *par
sj_nest->sj_subq_pred= subq_pred;
sj_nest->original_subq_pred_used_tables= subq_pred->used_tables() |
subq_pred->left_expr->used_tables();
+ sj_nest->sj_strategys_join_tab= uint(-1);
/* Nests do not participate in those 'chains', so: */
/* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
emb_join_list->push_back(sj_nest);
@@ -4321,6 +4322,21 @@ int init_dups_weedout(JOIN *join, uint f
/*
+ For each SJ-inner table in [start_tab; end_tab], set emb_sj_nest to idx
+*/
+static void update_sj_strategys_join_tab(JOIN_TAB *start_tab,
+ JOIN_TAB *end_tab,
+ uint idx)
+{
+ for (JOIN_TAB *j= start_tab; j < end_tab; j++)
+ {
+ TABLE_LIST *emb_nest;
+ if ((emb_nest= j->emb_sj_nest))
+ emb_nest->sj_strategys_join_tab= idx;
+ }
+}
+
+/*
Setup the strategies to eliminate semi-join duplicates.
SYNOPSIS
@@ -4448,6 +4464,10 @@ int setup_semijoin_dups_elimination(JOIN
for (uint j= i; j < i + pos->n_sj_tables; j++)
join->join_tab[j].inside_loosescan_range= TRUE;
+
+ update_sj_strategys_join_tab(join->join_tab + i,
+ join->join_tab + i + pos->n_sj_tables,
+ i + pos->n_sj_tables - 1);
/* Calculate key length */
keylen= 0;
@@ -4461,6 +4481,7 @@ int setup_semijoin_dups_elimination(JOIN
tab[pos->n_sj_tables - 1].do_firstmatch= tab;
i+= pos->n_sj_tables;
pos+= pos->n_sj_tables;
+
break;
}
case SJ_OPT_DUPS_WEEDOUT:
@@ -4503,6 +4524,9 @@ int setup_semijoin_dups_elimination(JOIN
break;
}
}
+ update_sj_strategys_join_tab(join->join_tab + i,
+ join->join_tab + i + pos->n_sj_tables,
+ i + pos->n_sj_tables - 1);
init_dups_weedout(join, first_table, i, i + pos->n_sj_tables - first_table);
i+= pos->n_sj_tables;
@@ -4558,6 +4582,9 @@ int setup_semijoin_dups_elimination(JOIN
}
}
j[-1].do_firstmatch= jump_to;
+ update_sj_strategys_join_tab(join->join_tab + i,
+ join->join_tab + i + pos->n_sj_tables,
+ i + pos->n_sj_tables - 1);
i+= pos->n_sj_tables;
pos+= pos->n_sj_tables;
=== modified file 'sql/sql_select.cc'
--- sql/sql_select.cc 2013-11-08 12:36:02 +0000
+++ sql/sql_select.cc 2013-12-19 02:50:28 +0000
@@ -24608,8 +24608,8 @@ double JOIN::static_pushdown_cost(uint i
@param pred the predicate to be wrapped in Item_func_dynamic_cond
@param init_tab the initial 'active' JOIN_TAB where the dynamic predicate
is pushed to
- @param cur_tab pointer to the JOIN_TAB* that stores the current JOIN_TAB
- during execution
+ @param active_tab_ptr pointer to the JOIN_TAB* that stores the current JOIN_TAB
+ during execution
*/
static Item_func_collect_stats *
wrap_with_dynamic_conds(Item *pred, JOIN_TAB *init_tab, JOIN_TAB **active_tab_ptr,
@@ -24674,6 +24674,81 @@ wrap_with_dynamic_conds(Item *pred, JOIN
}
+/*
+ @brief Find the last JOIN_TAB that a condition can be attached.
+
+
+ @param cond_tables used_tables() of the condition that we intend to move.
+ @param join_tab The left-most join tab where the condition can be
+ attached (in other words, default attachment point)
+
+ @detail
+ In a regular inner join, one can take a condition that is attached to a
+ table $T, and move it to any table that comes after $T.
+
+ When semi-joins are present, this is no longer true. Detailed explanation
+ can be found in maria-developers@, email "Movable conditions and semi-joins".
+ In short:
+
+ - Inside an SJ-Materialization nest, a condition can be moved to any table to
+ the right, but it must stay within the nest. (We don't support nested
+ semi-joins, so one will not find semi-joins inside an SJ-M nest)
+
+ - All other Semi-Join strategies have a "range" where they apply. If a
+ condition depends on a table that is within a semi-join, it must not be
+ moved over the right edge of the range.
+*/
+
+JOIN_TAB *find_last_tab_for_dynamic_cond(table_map cond_tables,
+ JOIN *join, JOIN_TAB *join_tab)
+{
+ if (join_tab->bush_root_tab)
+ {
+ /*
+ We are inside SJ-Materialization nest. There are no semi-joins here,
+ return the last tab in the nest.
+ */
+ return join_tab->bush_root_tab->bush_children->end;
+ }
+
+ /* Start from the right-most bound */
+ uint last_tab = join->top_join_tab_count;
+
+ cond_tables= cond_tables & ~(PSEUDO_TABLE_BITS | join->const_table_map);
+ /*
+ Walk through semi-joins that this table depends on, and check their last
+ join tabs. The smallest last join tab sets the limit about how far right
+ the condition can be moved.
+ */
+ Table_map_iterator tm_it(cond_tables);
+ int tableno;
+ while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+ {
+ TABLE_LIST *sj_nest;
+ if ((sj_nest= join->map2table[tableno]->emb_sj_nest))
+ {
+ if (sj_nest->is_active_sjm())
+ {
+ /*
+ Whoa this condition also depends on a table from within
+ SJ-Materialization nest. (Can happen possible because of equality
+ propagation/substitution). If we didn't return earlier in this
+ function, we're not inside a semi-join nest. A value that depends
+ on a table within a SJ-Materialization nest means that it depends on
+ the materialized table columns.
+ */
+ continue;
+ }
+
+ if (sj_nest->sj_strategys_join_tab != uint(-1) &&
+ sj_nest->sj_strategys_join_tab < last_tab)
+ last_tab= sj_nest->sj_strategys_join_tab;
+ }
+ }
+ return join->join_tab + last_tab;
+}
+
+
/**
Make expensive subqueries dynamically pushdownable.
*/
@@ -24703,7 +24778,12 @@ bool JOIN::setup_dynamic_conditions()
Update the least cardinality join_tab for each tab for the chosen join order.
Check if there are any conditions at all that need to be made dynamic.
*/
- min_tab= NULL;
+ min_tab= NULL;
+ /*
+ psergey-todo: the following loop should walk inside SJ-Materialization
+ nests, too. Maybe, this whole function should be invoked for each
+ SJ-Materilization nest.
+ */
for (tab= last_tab - 1; tab >= first_tab; tab--)
{
if (optimizer_flag(thd, OPTIMIZER_SWITCH_STATIC_COND_PUSHDOWN))
@@ -24746,7 +24826,8 @@ bool JOIN::setup_dynamic_conditions()
*/
for (tab= first_tab; tab < last_tab; tab++)
{
- Item_func_collect_stats *wrapped_select_cond= NULL, *cur_wrapped_cond;
+ Item_func_collect_stats *wrapped_select_cond= NULL;
+ Item_func_dynamic_cond *cur_wrapped_cond;
if (tab->select_cond &&
!(wrapped_select_cond=
@@ -24800,14 +24881,29 @@ bool JOIN::setup_dynamic_conditions()
Add all dynamic conditions pushed to previous JOIN_TABs also to the
current JOIN_TAB. This allows to "move" a dynamic condition
from one tab to another by enabling it for the corrseponding tab.
+
+ psergey-todo: conditions have limits on how far to the right they can be
+ moved (Item_func_dynamic_cond::last_tab). Don't attach conditions to
+ where they can't be moved.
*/
if (!prev_dynamic_conds.is_empty())
{
- List_iterator_fast<Item_func_dynamic_cond> dyn_cond_it(prev_dynamic_conds);
- if (!wrapped_select_cond)
- wrapped_select_cond= dyn_cond_it++;
+ List_iterator<Item_func_dynamic_cond> dyn_cond_it(prev_dynamic_conds);
while ((cur_wrapped_cond= dyn_cond_it++))
- wrapped_select_cond->add_pred(cur_wrapped_cond);
+ {
+ if (!wrapped_select_cond)
+ wrapped_select_cond= cur_wrapped_cond;
+ else
+ wrapped_select_cond->add_pred(cur_wrapped_cond);
+
+ /*
+ psergey-todo: conditions have limits on how far to the right they can be
+ moved (Item_func_dynamic_cond::last_tab). Don't attach conditions to
+ where they can't be moved.
+ */
+ if (cur_wrapped_cond->get_last_tab() == tab)
+ dyn_cond_it.remove();
+ }
}
prev_dynamic_conds.concat(&cur_dynamic_conds);
cur_dynamic_conds.empty();
=== modified file 'sql/table.h'
--- sql/table.h 2013-09-25 18:07:06 +0000
+++ sql/table.h 2013-12-19 02:50:28 +0000
@@ -1721,6 +1721,9 @@ struct TABLE_LIST
table_map sj_inner_tables;
/* Number of IN-compared expressions */
uint sj_in_exprs;
+
+ //psergey-todo: add the index of last table in the join order
+ uint sj_strategys_join_tab;
/* If this is a non-jtbm semi-join nest: corresponding subselect predicate */
Item_in_subselect *sj_subq_pred;
----- End forwarded message -----
--
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB | Skype: sergefp | Blog: http://s.petrunia.net/blog