← Back to team overview

maria-developers team mailing list archive

Re: [Commits] 5e23f561984: MDEV-18741: Optimizer trace: multi-part key ranges are printed incorrectly

 

Hi Varun,

Please find my review comments below.

On Fri, Apr 05, 2019 at 02:59:17PM +0530, Varun wrote:
> revision-id: 5e23f56198493ed19ac270759963f615324b2c49 (mariadb-10.4.3-162-g5e23f561984)
> parent(s): 02d9b048a2ab549a3227a81e15ff2f8c45562a65
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2019-04-05 14:58:39 +0530
> message:
> 
> MDEV-18741: Optimizer trace: multi-part key ranges are printed incorrectly
> 
> Changed the function append_range_all_keyparts to use sel_arg_range_seq_init / sel_arg_range_seq_next to produce ranges.
> Also adjusted to print format for the ranges, now the ranges are printed as:
>     (keypart1_min, keypart2_min,..)  OP (keypart1_name,keypart2_name, ..) OP (keypart1_max,keypart2_max, ..)
> 
> Also added more tests for range and index merge access for optimizer trace
> 
> ---
>  mysql-test/main/opt_trace.result                   | 276 ++++++++++-
>  mysql-test/main/opt_trace.test                     |  57 +++
>  mysql-test/main/opt_trace_index_merge.result       | 509 ++++++++++++++++++++-
>  mysql-test/main/opt_trace_index_merge.test         | 112 +++++
>  .../main/opt_trace_index_merge_innodb.result       |   6 +-
>  sql/opt_range.cc                                   | 281 ++++++------
>  sql/opt_range.h                                    |   7 +-
>  7 files changed, 1079 insertions(+), 169 deletions(-)
...

> +explain select * from t1 force index(a_b_c) where a between 1 and 4 and b < 50;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
> +1	SIMPLE	t1	range	a_b_c	a_b_c	8	NULL	4	Using index condition
> +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.analyzing_range_alternatives')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE;
> +JSON_DETAILED(JSON_EXTRACT(trace, '$**.analyzing_range_alternatives'))
> +[
> +    
> +    {
> +        "range_scan_alternatives": 
> +        [
> +            
> +            {
> +                "index": "a_b_c",
> +                "ranges": 
> +                [
> +                    "(1) <= (a) < (4,50)"

In the example above, only the name of the first key part is printed, even if 
the second endpoint uses two. As far as I understand the code, just using

 range->start_key.keypart_map | range->end_key.keypart_map

in print_keyparts_name will be sufficient to fix this.

> +                ],
> +                "rowid_ordered": false,
> +                "using_mrr": false,
> +                "index_only": false,
> +                "rows": 4,
> +                "cost": 6.2648,
> +                "chosen": true
> +            }
> +        ],
> +        "analyzing_roworder_intersect": 
> +        {
> +            "cause": "too few roworder scans"
> +        },
> +        "analyzing_index_merge_union": 
> +        [
> +        ]
> +    }
> +]

> diff --git a/sql/opt_range.cc b/sql/opt_range.cc
> index 5ab3d70214d..1827e2fcaf2 100644
> --- a/sql/opt_range.cc
> +++ b/sql/opt_range.cc
> @@ -431,16 +431,18 @@ static int and_range_trees(RANGE_OPT_PARAM *param,
>  static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree);
>  
>  static void print_key_value(String *out, const KEY_PART_INFO *key_part,
> -                            const uchar *key);
> +                            const uchar* key, uint length);
> +static void print_keyparts_name(String *out, const KEY_PART_INFO *key_part,
> +                                uint n_keypart, key_part_map keypart_map);
>  
>  static void append_range_all_keyparts(Json_writer_array *range_trace,
> -                                      String *range_string,
> -                                      String *range_so_far, const SEL_ARG *keypart,
> +                                      PARAM param, uint idx,
> +                                      SEL_ARG *keypart,
>                                        const KEY_PART_INFO *key_parts);
>  
>  static
> -void append_range(String *out, const KEY_PART_INFO *key_parts,
> -                  const uchar *min_key, const uchar *max_key, const uint flag);
> +void append_range(String *out, const KEY_PART_INFO *key_part,
> +                  KEY_MULTI_RANGE *range, uint n_key_parts);

Why is this function called append_() while other functions that print stuff
into strings are called print_key_value, print_keyparts_name ?

The name "append_range_all_keyparts" is also confusing. The function prints
a sequence of ranges. One can say it "appends" them to the trace, but that
verb is not used to refer to this action anywhere else.
Functions that print something into trace are called trace_XXX(), e.g. 
TRP_RANGE::trace_basic_info(), trace_plan_prefix(), etc.

I think trace_ranges() would be a better name here. What do you think?

>  
>  
>  /*
> @@ -2273,10 +2275,7 @@ void TRP_RANGE::trace_basic_info(const PARAM *param,
>    // TRP_RANGE should not be created if there are no range intervals
>    DBUG_ASSERT(key);
>  
> -  String range_info;
> -  range_info.length(0);
> -  range_info.set_charset(system_charset_info);
> -  append_range_all_keyparts(&trace_range, NULL, &range_info, key, key_part);
> +  append_range_all_keyparts(&trace_range, *param, key_idx, key, key_part);
>  }
>  
>  
> @@ -2489,10 +2488,8 @@ void TRP_GROUP_MIN_MAX::trace_basic_info(const PARAM *param,
>    // can have group quick without ranges
>    if (index_tree)
>    {
> -    String range_info;
> -    range_info.set_charset(system_charset_info);
> -    append_range_all_keyparts(&trace_range, NULL, &range_info, index_tree,
> -                              key_part);
> +    append_range_all_keyparts(&trace_range, *param, param_idx,
> +                              index_tree, key_part);
>    }
>  }
>  
> @@ -6398,20 +6395,9 @@ void TRP_ROR_INTERSECT::trace_basic_info(const PARAM *param,
>      trace_isect_idx.add("rows", (*cur_scan)->records);
>  
>      Json_writer_array trace_range(thd, "ranges");
> -    for (const SEL_ARG *current= (*cur_scan)->sel_arg->first(); current;
> -                                                 current= current->next)
> -    {
> -      String range_info;
> -      range_info.set_charset(system_charset_info);
> -      for (const SEL_ARG *part= current; part;
> -           part= part->next_key_part ? part->next_key_part : nullptr)
> -      {
> -        const KEY_PART_INFO *cur_key_part= key_part + part->part;
> -        append_range(&range_info, cur_key_part, part->min_value,
> -                     part->max_value, part->min_flag | part->max_flag);
> -      }
> -      trace_range.add(range_info.ptr(), range_info.length());
> -    }
> +
> +    append_range_all_keyparts(&trace_range, *param, (*cur_scan)->idx,
> +                              (*cur_scan)->sel_arg->first(), key_part);

What is this for? I think this is not needed (at best). 
Range enumeration should start from the tree root, not from the leftmost
element.

>    }
>  }
>  
...

> @@ -15775,60 +15763,39 @@ void append_range(String *out, const KEY_PART_INFO *key_part,
>  
>    Add ranges to the trace
>    For ex:
> -    query: select * from t1 where a=2 ;
> -    and we have an index on a , so we create a range
> -      2 <= a <= 2
> +    lets say we have an index a_b(a,b)
> +    query: select * from t1 where a=2 and b=4 ;
> +    so we create a range:
> +      (2,4) <= (a,b) <= (2,4)
>      this is added to the trace
>  */
>  
>  static void append_range_all_keyparts(Json_writer_array *range_trace,
> -                                      String *range_string,
> -                                      String *range_so_far, const SEL_ARG *keypart,
> +                                      PARAM param, uint idx,

Is it intentional that PARAM is passed by value? What for?

> +                                      SEL_ARG *keypart,
>                                        const KEY_PART_INFO *key_parts)
>  {
> -
> -  DBUG_ASSERT(keypart);
> -  DBUG_ASSERT(keypart && keypart != &null_element);
> -
> -  // Navigate to first interval in red-black tree
> +  SEL_ARG_RANGE_SEQ seq;
> +  KEY_MULTI_RANGE range;
> +  range_seq_t seq_it;
> +  uint flags= 0;
> +  RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init,
> +                         sel_arg_range_seq_next, 0, 0};
> +  KEY *keyinfo= param.table->key_info + param.real_keynr[idx];
> +  uint n_key_parts= param.table->actual_n_key_parts(keyinfo);
> +  seq.keyno= idx;
> +  seq.real_keyno= param.real_keynr[idx];
> +  seq.param= &param;
> +  seq.start= keypart;
>    const KEY_PART_INFO *cur_key_part= key_parts + keypart->part;
> -  const SEL_ARG *keypart_range= keypart->first();
> -  const size_t save_range_so_far_length= range_so_far->length();
> -
> +  seq_it= seq_if.init((void *) &seq, 0, flags);
>  
> -  while (keypart_range)
> +  while (!seq_if.next(seq_it, &range))
>    {
> -    // Append the current range predicate to the range String
> -    switch (keypart->type)
> -    {
> -      case SEL_ARG::Type::KEY_RANGE:
> -        append_range(range_so_far, cur_key_part, keypart_range->min_value,
> -                     keypart_range->max_value,
> -                     keypart_range->min_flag | keypart_range->max_flag);
> -        break;
> -      case SEL_ARG::Type::MAYBE_KEY:
> -        range_so_far->append("MAYBE_KEY");
> -        break;
> -      case SEL_ARG::Type::IMPOSSIBLE:
> -        range_so_far->append("IMPOSSIBLE");
> -        break;
> -      default:
> -        DBUG_ASSERT(false);
> -        break;
> -    }
> -
> -    if (keypart_range->next_key_part &&
> -        keypart_range->next_key_part->part ==
> -              keypart_range->part + 1 &&
> -        keypart_range->is_singlepoint())
> -    {
> -      append_range_all_keyparts(range_trace, range_string, range_so_far,
> -                                keypart_range->next_key_part, key_parts);
> -    }
> -    else
> -      range_trace->add(range_so_far->c_ptr_safe(), range_so_far->length());
> -    keypart_range= keypart_range->next;
> -    range_so_far->length(save_range_so_far_length);
> +    String range_info;
> +    range_info.set_charset(system_charset_info);
Can you use "StringBuffer<128> tmp(system_charset_info);" like you're already
doing in other places of this patch?

> +    append_range(&range_info, cur_key_part, &range, n_key_parts);
> +    range_trace->add(range_info.c_ptr_safe(), range_info.length());
>    }
>  }
>  

BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog