maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #11974
Re: [Commits] d48294d15c4: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
Hi Varun,
This patch is a step in the right direction, but I think we're not quite there
yet.
The patch adds logic about semi-join materialization into the filesort code
This makes things really messy.
I think, some isolation would be beneficial. The first suggestion:
In Sort_param (as this structure is visible in find_all_keys), add members that control
the behavior inside find_all_keys:
- unpack_function (default is NULL)
- bool set_all_read_bits (default is FALSE)
Now, Sort_param is allocated in filesort(), so we will have to add the same
members in "class Filesort" and have filesort() copy them to Sort_param
(necessary because find_all_keys() doesn't see the "Filesort" object).
Then, these members can be set in the SQL layer e.g. in JOIN::add_sorting_to_table
which creates the Filesort structure. (It would be ideal if semi-join code
set them, but this is probably not possible to achieve as Filesort is set up
after the semi-join code is set up).
If after all these changes "JOIN_TAB::unpacking_to_base_table_fields()" is
still there, please rename it to "unpack_..." so that it follows the
established convention.
That's all the input.
On Sat, Sep 28, 2019 at 02:12:01PM +0530, Varun wrote:
> revision-id: d48294d15c4895215d5facef97fc80c03cd6b4b0 (mariadb-10.4.4-341-gd48294d15c4)
> parent(s): a340af922361e3958e5d6653c8b840771db282f2
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2019-09-28 13:06:44 +0530
> message:
>
> MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
>
> For the case when the SJM scan table is the first table in the join order,
> then if we want to do the sorting on the SJM scan table, then we need to
> make sure that we unpack the values to base table fields in two cases:
> 1) Reading the SJM table and writing the sort-keys inside the sort-buffer
> 2) Reading the sorted data from the sort file
>
> ---
> mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++-
> mysql-test/main/order_by.test | 34 ++++++++++
> sql/filesort.cc | 17 +++++
> sql/opt_subselect.cc | 10 ++-
> sql/records.cc | 13 ++++
> sql/records.h | 1 +
> sql/sql_select.cc | 89 ++++++++++----------------
> sql/sql_select.h | 4 +-
> sql/table.h | 1 +
> 9 files changed, 246 insertions(+), 61 deletions(-)
>
> diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result
> index b059cc686cd..e74583670fc 100644
> --- a/mysql-test/main/order_by.result
> +++ b/mysql-test/main/order_by.result
> @@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND
> books.scheduled_for_removal=0 )
> ORDER BY wings.id;
> id select_type table type possible_keys key key_len ref rows filtered Extra
> -1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort
> +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort
> 1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00
> 2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where
> Warnings:
> @@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A
> set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
> optimizer_use_condition_selectivity=@tmp_o;
> drop table t1,t2,t3,t4;
> +#
> +# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
> +#
> +CREATE TABLE t1 (a INT, b int, primary key(a));
> +CREATE TABLE t2 (a INT, b INT);
> +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
> +(2354,7),(321421,3),(535,2),(4535,3);
> +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
> +# Join order should have the SJM scan table as the first table for both
> +# the queries with GROUP BY and ORDER BY clause.
> +EXPLAIN SELECT t1.a
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +ORDER BY t1.a DESC;
> +id select_type table type possible_keys key key_len ref rows Extra
> +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
> +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index
> +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
> +EXPLAIN FORMAT=JSON SELECT t1.a
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +ORDER BY t1.a DESC;
> +EXPLAIN
> +{
> + "query_block": {
> + "select_id": 1,
> + "read_sorted_file": {
> + "filesort": {
> + "sort_key": "t1.a desc",
> + "table": {
> + "table_name": "<subquery2>",
> + "access_type": "ALL",
> + "possible_keys": ["distinct_key"],
> + "rows": 3,
> + "filtered": 100,
> + "materialized": {
> + "unique": 1,
> + "query_block": {
> + "select_id": 2,
> + "table": {
> + "table_name": "t2",
> + "access_type": "ALL",
> + "rows": 3,
> + "filtered": 100,
> + "attached_condition": "t2.b = 3 and t2.a is not null"
> + }
> + }
> + }
> + }
> + }
> + },
> + "table": {
> + "table_name": "t1",
> + "access_type": "eq_ref",
> + "possible_keys": ["PRIMARY"],
> + "key": "PRIMARY",
> + "key_length": "4",
> + "used_key_parts": ["a"],
> + "ref": ["test.t2.a"],
> + "rows": 1,
> + "filtered": 100,
> + "using_index": true
> + }
> + }
> +}
> +SELECT t1.a
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +ORDER BY t1.a DESC;
> +a
> +273
> +96
> +58
> +EXPLAIN SELECT t1.a, group_concat(t1.b)
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +GROUP BY t1.a DESC;
> +id select_type table type possible_keys key key_len ref rows Extra
> +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
> +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1
> +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
> +EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b)
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +GROUP BY t1.a DESC;
> +EXPLAIN
> +{
> + "query_block": {
> + "select_id": 1,
> + "read_sorted_file": {
> + "filesort": {
> + "sort_key": "t1.a desc",
> + "table": {
> + "table_name": "<subquery2>",
> + "access_type": "ALL",
> + "possible_keys": ["distinct_key"],
> + "rows": 3,
> + "filtered": 100,
> + "materialized": {
> + "unique": 1,
> + "query_block": {
> + "select_id": 2,
> + "table": {
> + "table_name": "t2",
> + "access_type": "ALL",
> + "rows": 3,
> + "filtered": 100,
> + "attached_condition": "t2.b = 3 and t2.a is not null"
> + }
> + }
> + }
> + }
> + }
> + },
> + "table": {
> + "table_name": "t1",
> + "access_type": "eq_ref",
> + "possible_keys": ["PRIMARY"],
> + "key": "PRIMARY",
> + "key_length": "4",
> + "used_key_parts": ["a"],
> + "ref": ["test.t2.a"],
> + "rows": 1,
> + "filtered": 100
> + }
> + }
> +}
> +SELECT t1.a, group_concat(t1.b)
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +GROUP BY t1.a DESC;
> +a group_concat(t1.b)
> +273 3
> +96 2
> +58 1
> +DROP TABLE t1, t2;
> diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test
> index 934c503302f..b3e43d27e2f 100644
> --- a/mysql-test/main/order_by.test
> +++ b/mysql-test/main/order_by.test
> @@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
> optimizer_use_condition_selectivity=@tmp_o;
>
> drop table t1,t2,t3,t4;
> +
> +
> +--echo #
> +--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
> +--echo #
> +
> +CREATE TABLE t1 (a INT, b int, primary key(a));
> +CREATE TABLE t2 (a INT, b INT);
> +
> +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
> + (2354,7),(321421,3),(535,2),(4535,3);
> +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
> +
> +--echo # Join order should have the SJM scan table as the first table for both
> +--echo # the queries with GROUP BY and ORDER BY clause.
> +
> +let $query= SELECT t1.a
> + FROM t1
> + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> + ORDER BY t1.a DESC;
> +
> +eval EXPLAIN $query;
> +eval EXPLAIN FORMAT=JSON $query;
> +eval $query;
> +
> +let $query= SELECT t1.a, group_concat(t1.b)
> + FROM t1
> + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> + GROUP BY t1.a DESC;
> +
> +eval EXPLAIN $query;
> +eval EXPLAIN FORMAT=JSON $query;
> +eval $query;
> +DROP TABLE t1, t2;
> diff --git a/sql/filesort.cc b/sql/filesort.cc
> index 3f4291cfb1f..e5c83293e9f 100644
> --- a/sql/filesort.cc
> +++ b/sql/filesort.cc
> @@ -716,11 +716,21 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
> *found_rows= 0;
> ref_pos= &file->ref[0];
> next_pos=ref_pos;
> + JOIN_TAB *tab= sort_form->reginfo.join_tab;
> + JOIN *join= tab ? tab->join : NULL;
> + bool first_is_in_sjm_nest= FALSE;
>
> DBUG_EXECUTE_IF("show_explain_in_find_all_keys",
> dbug_serve_apcs(thd, 1);
> );
>
> + if (join && join->table_count != join->const_tables &&
> + (join->join_tab + join->const_tables == tab))
> + {
> + TABLE_LIST *tbl_for_first= sort_form->pos_in_table_list;
> + first_is_in_sjm_nest= tbl_for_first && tbl_for_first->is_sjm_scan_table();
> + }
> +
> if (!quick_select)
> {
> next_pos=(uchar*) 0; /* Find records in sequence */
> @@ -756,13 +766,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
> goto err;
> }
>
> + if (first_is_in_sjm_nest)
> + sort_form->column_bitmaps_set(save_read_set, save_write_set);
> +
> DEBUG_SYNC(thd, "after_index_merge_phase1");
> for (;;)
> {
> if (quick_select)
> error= select->quick->get_next();
> else /* Not quick-select */
> + {
> error= file->ha_rnd_next(sort_form->record[0]);
> + if (first_is_in_sjm_nest)
> + tab->unpacking_to_base_table_fields();
> + }
> if (unlikely(error))
> break;
> file->position(sort_form->record[0]);
> diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
> index 87458357865..f837a6394af 100644
> --- a/sql/opt_subselect.cc
> +++ b/sql/opt_subselect.cc
> @@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
> sjm_tab->type= JT_ALL;
>
> /* Initialize full scan */
> - sjm_tab->read_first_record= join_read_record_no_init;
> + sjm_tab->read_first_record= join_init_read_record;
> sjm_tab->read_record.copy_field= sjm->copy_field;
> sjm_tab->read_record.copy_field_end= sjm->copy_field +
> sjm->sjm_table_cols.elements;
> - sjm_tab->read_record.read_record_func= rr_sequential_and_unpack;
> + sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
> }
>
> sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
> @@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
> thd->lex->current_select= save_curr_select;
> DBUG_RETURN(FALSE);
> }
> +
> +
> +bool TABLE_LIST::is_sjm_scan_table()
> +{
> + return is_active_sjm() && sj_mat_info->is_sj_scan;
> +}
> diff --git a/sql/records.cc b/sql/records.cc
> index 3d709182a4e..f6885f773d5 100644
> --- a/sql/records.cc
> +++ b/sql/records.cc
> @@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b)
> return (int) a[7] - (int) b[7];
> #endif
> }
> +
> +
> +int read_record_func_for_rr_and_unpack(READ_RECORD *info)
> +{
> + int error;
> + if ((error= info->read_record_func_and_unpack_calls(info)))
> + return error;
> +
> + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
> + (*cp->do_copy)(cp);
> +
> + return error;
> +}
> diff --git a/sql/records.h b/sql/records.h
> index faf0d13c9a9..037a06b9d34 100644
> --- a/sql/records.h
> +++ b/sql/records.h
> @@ -55,6 +55,7 @@ struct READ_RECORD
> TABLE *table; /* Head-form */
> Unlock_row_func unlock_row;
> Read_func read_record_func;
> + Read_func read_record_func_and_unpack_calls;
> THD *thd;
> SQL_SELECT *select;
> uint ref_length, reclength, rec_cache_size, error_offset;
> diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> index 36d9eda3383..28bc57c692f 100644
> --- a/sql/sql_select.cc
> +++ b/sql/sql_select.cc
> @@ -14015,37 +14015,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
> can be used without tmp. table.
> */
> bool can_subst_to_first_table= false;
> - bool first_is_in_sjm_nest= false;
> - if (first_is_base_table)
> - {
> - TABLE_LIST *tbl_for_first=
> - join->join_tab[join->const_tables].table->pos_in_table_list;
> - first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
> - tbl_for_first->sj_mat_info->is_used;
> - }
> - /*
> - Currently we do not employ the optimization that uses multiple
> - equalities for ORDER BY to remove tmp table in the case when
> - the first table happens to be the result of materialization of
> - a semi-join nest ( <=> first_is_in_sjm_nest == true).
> -
> - When a semi-join nest is materialized and scanned to look for
> - possible matches in the remaining tables for every its row
> - the fields from the result of materialization are copied
> - into the record buffers of tables from the semi-join nest.
> - So these copies are used to access the remaining tables rather
> - than the fields from the result of materialization.
> -
> - Unfortunately now this so-called 'copy back' technique is
> - supported only if the rows are scanned with the rr_sequential
> - function, but not with other rr_* functions that are employed
> - when the result of materialization is required to be sorted.
> -
> - TODO: either to support 'copy back' technique for the above case,
> - or to get rid of this technique altogether.
> - */
> if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
> - first_is_base_table && !first_is_in_sjm_nest &&
> + first_is_base_table &&
> order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
> join->cond_equal)
> {
> @@ -19922,19 +19893,6 @@ do_select(JOIN *join, Procedure *procedure)
> }
>
>
> -int rr_sequential_and_unpack(READ_RECORD *info)
> -{
> - int error;
> - if (unlikely((error= rr_sequential(info))))
> - return error;
> -
> - for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
> - (*cp->do_copy)(cp);
> -
> - return error;
> -}
> -
> -
> /**
> @brief
> Instantiates temporary table
> @@ -21223,6 +21181,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
>
> int join_init_read_record(JOIN_TAB *tab)
> {
> + bool need_unpacking= FALSE;
> + JOIN *join= tab->join;
> /*
> Note: the query plan tree for the below operations is constructed in
> save_agg_explain_data.
> @@ -21232,6 +21192,12 @@ int join_init_read_record(JOIN_TAB *tab)
> if (tab->filesort && tab->sort_table()) // Sort table.
> return 1;
>
> + if (join->top_join_tab_count != join->const_tables)
> + {
> + TABLE_LIST *tbl= tab->table->pos_in_table_list;
> + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
> + }
> +
> tab->build_range_rowid_filter_if_needed();
>
> DBUG_EXECUTE_IF("kill_join_init_read_record",
> @@ -21249,16 +21215,6 @@ int join_init_read_record(JOIN_TAB *tab)
> if (!tab->preread_init_done && tab->preread_init())
> return 1;
>
> -
> - if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
> - tab->select, tab->filesort_result, 1,1, FALSE))
> - return 1;
> - return tab->read_record.read_record();
> -}
> -
> -int
> -join_read_record_no_init(JOIN_TAB *tab)
> -{
> Copy_field *save_copy, *save_copy_end;
>
> /*
> @@ -21268,12 +21224,20 @@ join_read_record_no_init(JOIN_TAB *tab)
> save_copy= tab->read_record.copy_field;
> save_copy_end= tab->read_record.copy_field_end;
>
> - init_read_record(&tab->read_record, tab->join->thd, tab->table,
> - tab->select, tab->filesort_result, 1, 1, FALSE);
> +
> + if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
> + tab->select, tab->filesort_result, 1, 1, FALSE))
> + return 1;
>
> tab->read_record.copy_field= save_copy;
> tab->read_record.copy_field_end= save_copy_end;
> - tab->read_record.read_record_func= rr_sequential_and_unpack;
> +
> + if (need_unpacking)
> + {
> + tab->read_record.read_record_func_and_unpack_calls=
> + tab->read_record.read_record_func;
> + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
> + }
>
> return tab->read_record.read_record();
> }
> @@ -28981,6 +28945,19 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
> }
>
>
> +/*
> + @brief
> + Unpacking temp table fields to base table fields.
> +*/
> +
> +void JOIN_TAB::unpacking_to_base_table_fields()
> +{
> + for (Copy_field *cp= read_record.copy_field;
> + cp != read_record.copy_field_end; cp++)
> + (*cp->do_copy)(cp);
> +}
> +
> +
> /**
> @} (end of group Query_Optimizer)
> */
> diff --git a/sql/sql_select.h b/sql/sql_select.h
> index 4f7bf49f635..545d4a788cc 100644
> --- a/sql/sql_select.h
> +++ b/sql/sql_select.h
> @@ -223,7 +223,7 @@ typedef enum_nested_loop_state
> (*Next_select_func)(JOIN *, struct st_join_table *, bool);
> Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
> int rr_sequential(READ_RECORD *info);
> -int rr_sequential_and_unpack(READ_RECORD *info);
> +int read_record_func_for_rr_and_unpack(READ_RECORD *info);
> Item *remove_pushed_top_conjuncts(THD *thd, Item *cond);
> Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
> COND_EQUAL **cond_eq,
> @@ -676,6 +676,7 @@ typedef struct st_join_table {
> table_map remaining_tables);
> bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
> bool is_const_table);
> + void unpacking_to_base_table_fields();
> } JOIN_TAB;
>
>
> @@ -2352,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
>
> int test_if_item_cache_changed(List<Cached_item> &list);
> int join_init_read_record(JOIN_TAB *tab);
> -int join_read_record_no_init(JOIN_TAB *tab);
> void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
> inline Item * and_items(THD *thd, Item* cond, Item *item)
> {
> diff --git a/sql/table.h b/sql/table.h
> index 1a7e5fbd4dc..35ba9bbb95d 100644
> --- a/sql/table.h
> +++ b/sql/table.h
> @@ -2622,6 +2622,7 @@ struct TABLE_LIST
> */
> const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; }
> bool is_active_sjm();
> + bool is_sjm_scan_table();
> bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); }
> st_select_lex_unit *get_unit();
> st_select_lex *get_single_select();
> _______________________________________________
> commits mailing list
> commits@xxxxxxxxxxx
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
--
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog