maria-developers team mailing list archive

Thread
Date
Re: [Commits] fc31f6d: MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record

To: sanja@xxxxxxxxxxx
From: Sergey Petrunia <sergey@xxxxxxxxxxx>
Date: Fri, 3 Apr 2015 00:08:03 +0300
Cc: maria-developers@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1427991581.9331.25554@bell-ThinkPad>
User-agent: Mutt/1.5.21 (2010-09-15)
Hi Sanja,

Please find review comments below.

On Thu, Apr 02, 2015 at 06:19:41PM +0200, sanja@xxxxxxxxxxx wrote:
> revision-id: fc31f6d95720b4b946b8b68c816026d65831f347
> parent(s): 01d7da6785284383b2c04f2d4474feccebb0bb6f
> committer: Oleksandr Byelkin
> branch nick: server
> timestamp: 2015-04-02 18:19:33 +0200
> message:
> 
> MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
> 
> ---
>  mysql-test/r/analyze_format_json.result | 62 +++++++++++++++++++++++++++++
>  mysql-test/t/analyze_format_json.test   | 30 ++++++++++++++
>  sql/sql_explain.cc                      | 26 +++++++++++--
>  sql/sql_explain.h                       | 12 +++++-
>  sql/sql_select.cc                       | 69 ++++++++++++++++++++++++++++++---
>  sql/sql_select.h                        |  7 +++-
>  6 files changed, 195 insertions(+), 11 deletions(-)
> 
> diff --git a/mysql-test/r/analyze_format_json.result b/mysql-test/r/analyze_format_json.result
> index 9022d8c..b3ef075 100644
> --- a/mysql-test/r/analyze_format_json.result
> +++ b/mysql-test/r/analyze_format_json.result
> @@ -264,3 +264,65 @@ ANALYZE
>    }
>  }
>  drop table t1;
> +#
> +# MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
> +#
> +create table t3(a int);
> +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
> +create table t4(a int);
> +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C;
> +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int);
> +insert into t1 values (1,2,10,20,15,15);
> +insert into t1 values (3,5,10,20,15,15);
> +insert into t1 values (10,20,10,20,15,15);
> +insert into t1 values (10,20,1,2,15,15);
> +insert into t1 values (10,20,10,20,1,3);
> +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int,
> +key(key1), key(key2), key(key3), key(key4));
> +insert into t2 select a,a,a,a,a from t3;
> +insert into t2 select 15,15,15,15,15 from t4;
> +analyze format=json
> +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and
> +(t2.key2 between t1.lb2 and t1.rb2) and
> +(t2.key3=t1.c1  OR t2.key4=t1.c2);
> +ANALYZE
> +{
> +  "query_block": {
> +    "select_id": 1,
> +    "r_loops": 1,
> +    "r_total_time_ms": "REPLACED",
> +    "table": {
> +      "table_name": "t1",
> +      "access_type": "ALL",
> +      "r_loops": 1,
> +      "rows": 5,
> +      "r_rows": 5,
> +      "r_total_time_ms": "REPLACED",
> +      "filtered": 100,
> +      "r_filtered": 100
> +    },
> +    "range-checked-for-each-record": {
> +      "keys": ["key1", "key2", "key3", "key4"],
> +      "r_keys_none": 1,
> +      "r_keys_index_merge": 1,
> +      "r_keys": {
> +        "key1": 2,
> +        "key2": 1,
> +        "key3": 0,
> +        "key4": 0
> +      },
It seems wrong to have multiple r_XXX elements. I think, having one will be
better.
How about:
 r_keys: {
    "none": 1,         ## Maybe, change to "full_table_scan" ? (btw can it be a full index scan instead I wonder?)
    "index_merge": nnn,
    "range" : {
      "key1" : nnn,
      "key2" : nnn,
      "key3" : nnn
    }
 }

Let's discuss it.
> +      "table": {
> +        "table_name": "t2",
> +        "access_type": "ALL",
> +        "possible_keys": ["key1", "key2", "key3", "key4"],
> +        "r_loops": 5,
> +        "rows": 1010,
> +        "r_rows": 203.8,
> +        "r_total_time_ms": "REPLACED",
> +        "filtered": 100,
> +        "r_filtered": 98.135
> +      }
> +    }
> +  }
> +}
> +drop table t1,t2,t3,t4;
> diff --git a/mysql-test/t/analyze_format_json.test b/mysql-test/t/analyze_format_json.test
> index 64bafd7..0ae9d4f 100644
> --- a/mysql-test/t/analyze_format_json.test
> +++ b/mysql-test/t/analyze_format_json.test
> @@ -75,3 +75,33 @@ disconnect con1;
>  connection default;
>  drop table t1;
>  
> +
> +--echo #
> +--echo # MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
> +--echo #
> +create table t3(a int);
> +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
> +
> +create table t4(a int);
> +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C;
> +
> +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int);
> +
> +insert into t1 values (1,2,10,20,15,15);
> +insert into t1 values (3,5,10,20,15,15);
> +insert into t1 values (10,20,10,20,15,15);
> +insert into t1 values (10,20,1,2,15,15);
> +insert into t1 values (10,20,10,20,1,3);
> +
> +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int,
> +                 key(key1), key(key2), key(key3), key(key4));
> +insert into t2 select a,a,a,a,a from t3;
> +insert into t2 select 15,15,15,15,15 from t4;
> +
> +--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
> +analyze format=json
> +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and
> +                           (t2.key2 between t1.lb2 and t1.rb2) and
> +                           (t2.key3=t1.c1  OR t2.key4=t1.c2);
> +
> +drop table t1,t2,t3,t4;
> diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
> index 9d82f4f..edbc2bf 100644
> --- a/sql/sql_explain.cc
> +++ b/sql/sql_explain.cc
> @@ -1076,15 +1076,15 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai
>  }
>  
>  

Please add a comment:
  @return 
     ptr  - pointer to the added string
     NULL - out of memory error

Does it make sense to change the return type to 'const char*' ? 

> -bool String_list::append_str(MEM_ROOT *mem_root, const char *str)
> +char *String_list::append_str(MEM_ROOT *mem_root, const char *str)
>  {
>    size_t len= strlen(str);
>    char *cp;
>    if (!(cp = (char*)alloc_root(mem_root, len+1)))
> -    return 1;
> +    return NULL;
>    memcpy(cp, str, len+1);
>    push_back(cp);
> -  return 0;
> +  return cp;
>  }
>  
>  
> @@ -1209,6 +1209,26 @@ void Explain_table_access::print_explain_json(Explain_query *query,
>    {
>      writer->add_member("range-checked-for-each-record").start_object();
>      add_json_keyset(writer, "keys", &possible_keys);
> +    if (is_analyze)
> +    {
> +      writer->add_member("r_keys_none").add_ll(range_checked_fer->none);
> +      writer->add_member("r_keys_index_merge").add_ll(range_checked_fer->
> +                                                      index_merge);
> +      if (range_checked_fer->keys_stat)
> +      {
> +        writer->add_member("r_keys").start_object();
> +        for (uint i= 0; i < range_checked_fer->keys; i++)
> +        {
> +          if (range_checked_fer->keys_stat_names[i])
> +          {
> +            writer->add_member(range_checked_fer->
> +                               keys_stat_names[i]).add_ll(range_checked_fer->
> +                                                          keys_stat[i]);
> +          }
> +        }
> +        writer->end_object();
> +      }
> +    }
If we move this code into Range_checked_fer::print_json(), will it be possible
to make its members private?
>    }
>  
>    if (full_scan_on_null_key)
> diff --git a/sql/sql_explain.h b/sql/sql_explain.h
> index e3b41ee..f86886d 100644
> --- a/sql/sql_explain.h
> +++ b/sql/sql_explain.h
> @@ -54,7 +54,7 @@ it into the slow query log.
>  class String_list: public List<char>
>  {
>  public:
> -  bool append_str(MEM_ROOT *mem_root, const char *str);
> +  char *append_str(MEM_ROOT *mem_root, const char *str);
>  };
>  
>  class Json_writer;
> @@ -627,6 +627,16 @@ class Explain_range_checked_fer : public Sql_alloc
>  public:
>    String_list key_set;
>    key_map keys_map;
> +
> +  ha_rows none, index_merge;
'none' looks weird. Would a 'full_scan' be better.


> +  ha_rows *keys_stat;
> +  char **keys_stat_names;
> +  uint keys;
This needs to be documented.
> +
> +  Explain_range_checked_fer()
> +    :Sql_alloc(), none(0), index_merge(0),
> +    keys_stat(0), keys_stat_names(0), keys(0)
> +  {}
>  };
>  
>  /*
> diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> index 5c3e116..4eb3d81 100644
> --- a/sql/sql_select.cc
> +++ b/sql/sql_select.cc
> @@ -11398,6 +11398,7 @@ void JOIN_TAB::cleanup()
>      table->reginfo.join_tab= 0;
>    }
>    end_read_record(&read_record);
> +  explain_plan= 0;
It's a pointer, right?  Please change s/0/NULL/

>    DBUG_VOID_RETURN;
>  }
>  
> @@ -18762,9 +18763,30 @@ test_if_quick_select(JOIN_TAB *tab)
>  
>    delete tab->select->quick;
>    tab->select->quick=0;
> -  return tab->select->test_quick_select(tab->join->thd, tab->keys,
> -					(table_map) 0, HA_POS_ERROR, 0,
> -                                        FALSE, /*remove where parts*/FALSE);
> +  int res= tab->select->test_quick_select(tab->join->thd, tab->keys,
> +                                          (table_map) 0, HA_POS_ERROR, 0,
> +                                          FALSE, /*remove where parts*/FALSE);
> +  if (tab->explain_plan && tab->explain_plan->range_checked_fer)
> +  {
> +    if (tab->select->quick)
> +    {
> +      QUICK_SELECT_I *quick= tab->select->quick;
> +      if (quick->index == MAX_KEY)
> +        tab->explain_plan->range_checked_fer->index_merge++;
> +      else
> +      {
> +        DBUG_ASSERT(quick->index < tab->explain_plan->range_checked_fer->keys);
> +        DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat);
> +        DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names);
> +        DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names[
> +                    quick->index]);
> +        tab->explain_plan->range_checked_fer->keys_stat[quick->index]++;
> +      }
> +    }
> +    else
> +      tab->explain_plan->range_checked_fer->none++;
> +  }
> +  return res;
I think it's bad that join execution code becomes littered with details about
how things are stored in the Explain data structures.

Can you add a set of methods to range_checked_fer: 
 count_full_scan();
 count_index_merge();
 count_range_scan();

and then only call these methods from here?

>  }
>  
>  
> @@ -23360,6 +23382,32 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
>    return 0;
>  }
>  
What does this function do?  AFAIU it does two things:
- copies indexes from possible_keys into a String_list.
- allocates an empty array of ha_rows(). 

The set of data structures used by Range_checked_fer is quite peculiar. This
function is only useful for Range_checked_fer.  Do you think it would make
sense to made this a method of Range_checked_fer?

> +int append_possible_keys_stat(MEM_ROOT *alloc, String_list &list,
> +                              ha_rows **stat, char ***names,
> +                              TABLE *table, key_map possible_keys)
> +{
> +  uint j;
> +  multi_alloc_root(alloc, stat, sizeof(ha_rows) * table->s->keys,
> +                   names, sizeof(char *) * table->s->keys, NULL);
> +  if ((!(*stat)) || (!(*names)))
> +  {
> +    (*stat)= NULL; (*names)= NULL;
> +    return 1;
> +  }
> +  bzero(*stat, sizeof(ha_rows) * table->s->keys);
> +  for (j= 0; j < table->s->keys; j++)
> +  {
> +    if (possible_keys.is_set(j))
> +    {
> +      char *nm= list.append_str(alloc, table->key_info[j].name);
> +      (*names)[j]= nm;
> +    }
> +    else
> +      (*names)[j]= NULL;
> +  }
> +  return 0;
> +}
> +
>  /*
>    TODO: this function is only applicable for the first non-const optimization
>    join tab. 
> @@ -23400,6 +23448,8 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab
>    quick_type= -1;
>    QUICK_SELECT_I *quick= NULL;
>  
> +
> +  explain_plan= eta;
>    eta->key.clear();
>    eta->quick_info= NULL;
>    
> @@ -23663,9 +23713,16 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab
>        {
>          eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD);
>          eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer;
> -        eta->range_checked_fer->keys_map= tab->keys;
> -        append_possible_keys(thd->mem_root, eta->range_checked_fer->key_set,
> -                             table, tab->keys);
> +        if (eta->range_checked_fer)
> +        {
> +          eta->range_checked_fer->keys_map= tab->keys;
> +          eta->range_checked_fer->keys= table->s->keys;
> +          append_possible_keys_stat(thd->mem_root,
> +                                    eta->range_checked_fer->key_set,
> +                                    &eta->range_checked_fer->keys_stat,
> +                                    &eta->range_checked_fer->keys_stat_names,
> +                                    table, tab->keys);
> +        }
>        }
>        else if (tab->select->cond ||
>                 (tab->cache_select && tab->cache_select->cond))
> diff --git a/sql/sql_select.h b/sql/sql_select.h
> index 0557e32..ef92690 100644
> --- a/sql/sql_select.h
> +++ b/sql/sql_select.h
> @@ -362,7 +362,12 @@ typedef struct st_join_table {
>    SJ_TMP_TABLE  *check_weed_out_table;
>    /* for EXPLAIN only: */
>    SJ_TMP_TABLE  *first_weedout_table;
> -  
> +
> +  /**
> +    reference to saved plan and execution statistics
> +  */
> +  Explain_table_access *explain_plan;
> +
>    /*
>      If set, means we should stop join enumeration after we've got the first
>      match and return to the specified join tab. May point to

BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog