← Back to team overview

maria-developers team mailing list archive

Re: [Commits] d48294d15c4: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on

 

Hi Varun,

This patch is a step in the right direction, but I think we're not quite there
yet.

The patch adds logic about semi-join materialization into the filesort code
This makes things really messy.

I think, some isolation would be beneficial. The first suggestion:

In Sort_param (as this structure is visible in find_all_keys), add members that control 
the behavior inside find_all_keys:

- unpack_function (default is NULL)
- bool set_all_read_bits (default is FALSE)

Now, Sort_param is allocated in filesort(), so we will have to add the same
members in "class Filesort" and have filesort() copy them to Sort_param
(necessary because find_all_keys() doesn't see the "Filesort" object).

Then, these members can be set in the SQL layer e.g. in JOIN::add_sorting_to_table 
which creates the Filesort structure.  (It would be ideal if semi-join code 
set them, but this is probably not possible to achieve as Filesort is set up
after the semi-join code is set up).

If after all these changes  "JOIN_TAB::unpacking_to_base_table_fields()" is
still there, please rename it to "unpack_..." so that it follows the
established convention.

That's all the input.

On Sat, Sep 28, 2019 at 02:12:01PM +0530, Varun wrote:
> revision-id: d48294d15c4895215d5facef97fc80c03cd6b4b0 (mariadb-10.4.4-341-gd48294d15c4)
> parent(s): a340af922361e3958e5d6653c8b840771db282f2
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2019-09-28 13:06:44 +0530
> message:
> 
> MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
> 
> For the case when  the SJM scan table is the first table in the join order,
> then if we want to do the sorting on the SJM scan table, then we need to
> make sure that we unpack the values to base table fields in two cases:
>   1) Reading the SJM table and writing the sort-keys inside the sort-buffer
>   2) Reading the sorted data from the sort file
> 
> ---
>  mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++-
>  mysql-test/main/order_by.test   |  34 ++++++++++
>  sql/filesort.cc                 |  17 +++++
>  sql/opt_subselect.cc            |  10 ++-
>  sql/records.cc                  |  13 ++++
>  sql/records.h                   |   1 +
>  sql/sql_select.cc               |  89 ++++++++++----------------
>  sql/sql_select.h                |   4 +-
>  sql/table.h                     |   1 +
>  9 files changed, 246 insertions(+), 61 deletions(-)
> 
> diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result
> index b059cc686cd..e74583670fc 100644
> --- a/mysql-test/main/order_by.result
> +++ b/mysql-test/main/order_by.result
> @@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND
>  books.scheduled_for_removal=0 )
>  ORDER BY wings.id;
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
> -1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	2	100.00	Using temporary; Using filesort
> +1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	2	100.00	Using filesort
>  1	PRIMARY	wings	eq_ref	PRIMARY	PRIMARY	4	test.books.wings_id	1	100.00	
>  2	MATERIALIZED	books	ref	library_idx	library_idx	4	const	2	100.00	Using where
>  Warnings:
> @@ -3436,3 +3436,139 @@ Note	1003	select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A
>  set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
>  optimizer_use_condition_selectivity=@tmp_o;
>  drop table t1,t2,t3,t4;
> +#
> +# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
> +#
> +CREATE TABLE t1 (a INT, b int, primary key(a));
> +CREATE TABLE t2 (a INT, b INT);
> +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
> +(2354,7),(321421,3),(535,2),(4535,3);
> +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
> +# Join order should have the SJM scan table as the first table for both
> +# the queries with GROUP BY and ORDER BY clause.
> +EXPLAIN SELECT t1.a
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +ORDER BY t1.a DESC;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
> +1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	3	Using filesort
> +1	PRIMARY	t1	eq_ref	PRIMARY	PRIMARY	4	test.t2.a	1	Using index
> +2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	3	Using where
> +EXPLAIN FORMAT=JSON SELECT t1.a
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +ORDER BY t1.a DESC;
> +EXPLAIN
> +{
> +  "query_block": {
> +    "select_id": 1,
> +    "read_sorted_file": {
> +      "filesort": {
> +        "sort_key": "t1.a desc",
> +        "table": {
> +          "table_name": "<subquery2>",
> +          "access_type": "ALL",
> +          "possible_keys": ["distinct_key"],
> +          "rows": 3,
> +          "filtered": 100,
> +          "materialized": {
> +            "unique": 1,
> +            "query_block": {
> +              "select_id": 2,
> +              "table": {
> +                "table_name": "t2",
> +                "access_type": "ALL",
> +                "rows": 3,
> +                "filtered": 100,
> +                "attached_condition": "t2.b = 3 and t2.a is not null"
> +              }
> +            }
> +          }
> +        }
> +      }
> +    },
> +    "table": {
> +      "table_name": "t1",
> +      "access_type": "eq_ref",
> +      "possible_keys": ["PRIMARY"],
> +      "key": "PRIMARY",
> +      "key_length": "4",
> +      "used_key_parts": ["a"],
> +      "ref": ["test.t2.a"],
> +      "rows": 1,
> +      "filtered": 100,
> +      "using_index": true
> +    }
> +  }
> +}
> +SELECT t1.a
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +ORDER BY t1.a DESC;
> +a
> +273
> +96
> +58
> +EXPLAIN SELECT t1.a, group_concat(t1.b)
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +GROUP BY t1.a DESC;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
> +1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	3	Using filesort
> +1	PRIMARY	t1	eq_ref	PRIMARY	PRIMARY	4	test.t2.a	1	
> +2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	3	Using where
> +EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b)
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +GROUP BY t1.a DESC;
> +EXPLAIN
> +{
> +  "query_block": {
> +    "select_id": 1,
> +    "read_sorted_file": {
> +      "filesort": {
> +        "sort_key": "t1.a desc",
> +        "table": {
> +          "table_name": "<subquery2>",
> +          "access_type": "ALL",
> +          "possible_keys": ["distinct_key"],
> +          "rows": 3,
> +          "filtered": 100,
> +          "materialized": {
> +            "unique": 1,
> +            "query_block": {
> +              "select_id": 2,
> +              "table": {
> +                "table_name": "t2",
> +                "access_type": "ALL",
> +                "rows": 3,
> +                "filtered": 100,
> +                "attached_condition": "t2.b = 3 and t2.a is not null"
> +              }
> +            }
> +          }
> +        }
> +      }
> +    },
> +    "table": {
> +      "table_name": "t1",
> +      "access_type": "eq_ref",
> +      "possible_keys": ["PRIMARY"],
> +      "key": "PRIMARY",
> +      "key_length": "4",
> +      "used_key_parts": ["a"],
> +      "ref": ["test.t2.a"],
> +      "rows": 1,
> +      "filtered": 100
> +    }
> +  }
> +}
> +SELECT t1.a, group_concat(t1.b)
> +FROM t1
> +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +GROUP BY t1.a DESC;
> +a	group_concat(t1.b)
> +273	3
> +96	2
> +58	1
> +DROP TABLE t1, t2;
> diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test
> index 934c503302f..b3e43d27e2f 100644
> --- a/mysql-test/main/order_by.test
> +++ b/mysql-test/main/order_by.test
> @@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
>      optimizer_use_condition_selectivity=@tmp_o; 
>  
>  drop table t1,t2,t3,t4;
> +
> +
> +--echo #
> +--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
> +--echo #
> +
> +CREATE TABLE t1 (a INT, b int, primary key(a));
> +CREATE TABLE t2 (a INT, b INT);
> +
> +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
> +                            (2354,7),(321421,3),(535,2),(4535,3);
> +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
> +
> +--echo # Join order should have the SJM scan table as the first table for both
> +--echo # the queries with GROUP BY and ORDER BY clause.
> +
> +let $query= SELECT t1.a
> +            FROM t1
> +            WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +            ORDER BY t1.a DESC;
> +
> +eval EXPLAIN $query;
> +eval EXPLAIN FORMAT=JSON $query;
> +eval $query;
> +
> +let $query= SELECT t1.a, group_concat(t1.b)
> +            FROM t1
> +            WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
> +            GROUP BY t1.a DESC;
> +
> +eval EXPLAIN $query;
> +eval EXPLAIN FORMAT=JSON $query;
> +eval $query;
> +DROP TABLE t1, t2;
> diff --git a/sql/filesort.cc b/sql/filesort.cc
> index 3f4291cfb1f..e5c83293e9f 100644
> --- a/sql/filesort.cc
> +++ b/sql/filesort.cc
> @@ -716,11 +716,21 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
>    *found_rows= 0;
>    ref_pos= &file->ref[0];
>    next_pos=ref_pos;
> +  JOIN_TAB *tab= sort_form->reginfo.join_tab;
> +  JOIN *join= tab ? tab->join : NULL;
> +  bool first_is_in_sjm_nest= FALSE;
>  
>    DBUG_EXECUTE_IF("show_explain_in_find_all_keys", 
>                    dbug_serve_apcs(thd, 1);
>                   );
>  
> +  if (join && join->table_count != join->const_tables &&
> +      (join->join_tab + join->const_tables == tab))
> +  {
> +    TABLE_LIST *tbl_for_first= sort_form->pos_in_table_list;
> +    first_is_in_sjm_nest= tbl_for_first && tbl_for_first->is_sjm_scan_table();
> +  }
> +
>    if (!quick_select)
>    {
>      next_pos=(uchar*) 0;			/* Find records in sequence */
> @@ -756,13 +766,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
>        goto err;
>    }
>  
> +  if (first_is_in_sjm_nest)
> +    sort_form->column_bitmaps_set(save_read_set, save_write_set);
> +
>    DEBUG_SYNC(thd, "after_index_merge_phase1");
>    for (;;)
>    {
>      if (quick_select)
>        error= select->quick->get_next();
>      else					/* Not quick-select */
> +    {
>        error= file->ha_rnd_next(sort_form->record[0]);
> +      if (first_is_in_sjm_nest)
> +        tab->unpacking_to_base_table_fields();
> +    }
>      if (unlikely(error))
>        break;
>      file->position(sort_form->record[0]);
> diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
> index 87458357865..f837a6394af 100644
> --- a/sql/opt_subselect.cc
> +++ b/sql/opt_subselect.cc
> @@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
>      sjm_tab->type= JT_ALL;
>  
>      /* Initialize full scan */
> -    sjm_tab->read_first_record= join_read_record_no_init;
> +    sjm_tab->read_first_record= join_init_read_record;
>      sjm_tab->read_record.copy_field= sjm->copy_field;
>      sjm_tab->read_record.copy_field_end= sjm->copy_field +
>                                           sjm->sjm_table_cols.elements;
> -    sjm_tab->read_record.read_record_func= rr_sequential_and_unpack;
> +    sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
>    }
>  
>    sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
> @@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
>    thd->lex->current_select= save_curr_select;
>    DBUG_RETURN(FALSE);
>  }
> +
> +
> +bool TABLE_LIST::is_sjm_scan_table()
> +{
> +  return is_active_sjm() && sj_mat_info->is_sj_scan;
> +}
> diff --git a/sql/records.cc b/sql/records.cc
> index 3d709182a4e..f6885f773d5 100644
> --- a/sql/records.cc
> +++ b/sql/records.cc
> @@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b)
>    return (int) a[7] - (int) b[7];
>  #endif
>  }
> +
> +
> +int read_record_func_for_rr_and_unpack(READ_RECORD *info)
> +{
> +  int error;
> +  if ((error= info->read_record_func_and_unpack_calls(info)))
> +    return error;
> +
> +  for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
> +    (*cp->do_copy)(cp);
> +
> +  return error;
> +}
> diff --git a/sql/records.h b/sql/records.h
> index faf0d13c9a9..037a06b9d34 100644
> --- a/sql/records.h
> +++ b/sql/records.h
> @@ -55,6 +55,7 @@ struct READ_RECORD
>    TABLE *table;                                 /* Head-form */
>    Unlock_row_func unlock_row;
>    Read_func read_record_func;
> +  Read_func read_record_func_and_unpack_calls;
>    THD *thd;
>    SQL_SELECT *select;
>    uint ref_length, reclength, rec_cache_size, error_offset;
> diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> index 36d9eda3383..28bc57c692f 100644
> --- a/sql/sql_select.cc
> +++ b/sql/sql_select.cc
> @@ -14015,37 +14015,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
>              can be used without tmp. table.
>            */
>            bool can_subst_to_first_table= false;
> -          bool first_is_in_sjm_nest= false;
> -          if (first_is_base_table)
> -          {
> -            TABLE_LIST *tbl_for_first=
> -              join->join_tab[join->const_tables].table->pos_in_table_list;
> -            first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
> -                                  tbl_for_first->sj_mat_info->is_used;
> -          }
> -          /*
> -            Currently we do not employ the optimization that uses multiple
> -            equalities for ORDER BY to remove tmp table in the case when
> -            the first table happens to be the result of materialization of
> -            a semi-join nest ( <=> first_is_in_sjm_nest == true).
> -
> -            When a semi-join nest is materialized and scanned to look for
> -            possible matches in the remaining tables for every its row
> -            the fields from the result of materialization are copied
> -            into the record buffers of tables from the semi-join nest.
> -            So these copies are used to access the remaining tables rather
> -            than the fields from the result of materialization.
> -
> -            Unfortunately now this so-called 'copy back' technique is
> -            supported only if the rows  are scanned with the rr_sequential
> -            function, but not with other rr_* functions that are employed
> -            when the result of materialization is required to be sorted.
> -
> -            TODO: either to support 'copy back' technique for the above case,
> -                  or to get rid of this technique altogether.
> -          */
>            if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
> -              first_is_base_table && !first_is_in_sjm_nest &&
> +              first_is_base_table &&
>                order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
>                join->cond_equal)
>            {
> @@ -19922,19 +19893,6 @@ do_select(JOIN *join, Procedure *procedure)
>  }
>  
>  
> -int rr_sequential_and_unpack(READ_RECORD *info)
> -{
> -  int error;
> -  if (unlikely((error= rr_sequential(info))))
> -    return error;
> -  
> -  for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
> -    (*cp->do_copy)(cp);
> -
> -  return error;
> -}
> -
> -
>  /**
>    @brief
>    Instantiates temporary table
> @@ -21223,6 +21181,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
>  
>  int join_init_read_record(JOIN_TAB *tab)
>  {
> +  bool need_unpacking= FALSE;
> +  JOIN *join= tab->join;
>    /* 
>      Note: the query plan tree for the below operations is constructed in
>      save_agg_explain_data.
> @@ -21232,6 +21192,12 @@ int join_init_read_record(JOIN_TAB *tab)
>    if (tab->filesort && tab->sort_table())     // Sort table.
>      return 1;
>  
> +  if (join->top_join_tab_count != join->const_tables)
> +  {
> +    TABLE_LIST *tbl= tab->table->pos_in_table_list;
> +    need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
> +  }
> +
>    tab->build_range_rowid_filter_if_needed();
>  
>    DBUG_EXECUTE_IF("kill_join_init_read_record",
> @@ -21249,16 +21215,6 @@ int join_init_read_record(JOIN_TAB *tab)
>    if (!tab->preread_init_done  && tab->preread_init())
>      return 1;
>  
> -
> -  if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
> -                       tab->select, tab->filesort_result, 1,1, FALSE))
> -    return 1;
> -  return tab->read_record.read_record();
> -}
> -
> -int
> -join_read_record_no_init(JOIN_TAB *tab)
> -{
>    Copy_field *save_copy, *save_copy_end;
>    
>    /*
> @@ -21268,12 +21224,20 @@ join_read_record_no_init(JOIN_TAB *tab)
>    save_copy=     tab->read_record.copy_field;
>    save_copy_end= tab->read_record.copy_field_end;
>    
> -  init_read_record(&tab->read_record, tab->join->thd, tab->table,
> -		   tab->select, tab->filesort_result, 1, 1, FALSE);
> +
> +  if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
> +                       tab->select, tab->filesort_result, 1, 1, FALSE))
> +    return 1;
>  
>    tab->read_record.copy_field=     save_copy;
>    tab->read_record.copy_field_end= save_copy_end;
> -  tab->read_record.read_record_func= rr_sequential_and_unpack;
> +
> +  if (need_unpacking)
> +  {
> +    tab->read_record.read_record_func_and_unpack_calls=
> +                                             tab->read_record.read_record_func;
> +    tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
> +  }
>  
>    return tab->read_record.read_record();
>  }
> @@ -28981,6 +28945,19 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
>  }
>  
>  
> +/*
> +  @brief
> +    Unpacking temp table fields to base table fields.
> +*/
> +
> +void JOIN_TAB::unpacking_to_base_table_fields()
> +{
> +  for (Copy_field *cp= read_record.copy_field;
> +       cp != read_record.copy_field_end; cp++)
> +    (*cp->do_copy)(cp);
> +}
> +
> +
>  /**
>    @} (end of group Query_Optimizer)
>  */
> diff --git a/sql/sql_select.h b/sql/sql_select.h
> index 4f7bf49f635..545d4a788cc 100644
> --- a/sql/sql_select.h
> +++ b/sql/sql_select.h
> @@ -223,7 +223,7 @@ typedef enum_nested_loop_state
>  (*Next_select_func)(JOIN *, struct st_join_table *, bool);
>  Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
>  int rr_sequential(READ_RECORD *info);
> -int rr_sequential_and_unpack(READ_RECORD *info);
> +int read_record_func_for_rr_and_unpack(READ_RECORD *info);
>  Item *remove_pushed_top_conjuncts(THD *thd, Item *cond);
>  Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
>                                             COND_EQUAL **cond_eq,
> @@ -676,6 +676,7 @@ typedef struct st_join_table {
>                                          table_map remaining_tables);
>    bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
>                       bool is_const_table);
> +  void unpacking_to_base_table_fields();
>  } JOIN_TAB;
>  
>  
> @@ -2352,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
>  
>  int test_if_item_cache_changed(List<Cached_item> &list);
>  int join_init_read_record(JOIN_TAB *tab);
> -int join_read_record_no_init(JOIN_TAB *tab);
>  void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
>  inline Item * and_items(THD *thd, Item* cond, Item *item)
>  {
> diff --git a/sql/table.h b/sql/table.h
> index 1a7e5fbd4dc..35ba9bbb95d 100644
> --- a/sql/table.h
> +++ b/sql/table.h
> @@ -2622,6 +2622,7 @@ struct TABLE_LIST
>     */
>    const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; }
>    bool is_active_sjm();
> +  bool is_sjm_scan_table();
>    bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); }
>    st_select_lex_unit *get_unit();
>    st_select_lex *get_single_select();
> _______________________________________________
> commits mailing list
> commits@xxxxxxxxxxx
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits

-- 
BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog