maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #08409
Re: [Commits] fc31f6d: MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
Hi Sanja,
Please find review comments below.
On Thu, Apr 02, 2015 at 06:19:41PM +0200, sanja@xxxxxxxxxxx wrote:
> revision-id: fc31f6d95720b4b946b8b68c816026d65831f347
> parent(s): 01d7da6785284383b2c04f2d4474feccebb0bb6f
> committer: Oleksandr Byelkin
> branch nick: server
> timestamp: 2015-04-02 18:19:33 +0200
> message:
>
> MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
>
> ---
> mysql-test/r/analyze_format_json.result | 62 +++++++++++++++++++++++++++++
> mysql-test/t/analyze_format_json.test | 30 ++++++++++++++
> sql/sql_explain.cc | 26 +++++++++++--
> sql/sql_explain.h | 12 +++++-
> sql/sql_select.cc | 69 ++++++++++++++++++++++++++++++---
> sql/sql_select.h | 7 +++-
> 6 files changed, 195 insertions(+), 11 deletions(-)
>
> diff --git a/mysql-test/r/analyze_format_json.result b/mysql-test/r/analyze_format_json.result
> index 9022d8c..b3ef075 100644
> --- a/mysql-test/r/analyze_format_json.result
> +++ b/mysql-test/r/analyze_format_json.result
> @@ -264,3 +264,65 @@ ANALYZE
> }
> }
> drop table t1;
> +#
> +# MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
> +#
> +create table t3(a int);
> +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
> +create table t4(a int);
> +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C;
> +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int);
> +insert into t1 values (1,2,10,20,15,15);
> +insert into t1 values (3,5,10,20,15,15);
> +insert into t1 values (10,20,10,20,15,15);
> +insert into t1 values (10,20,1,2,15,15);
> +insert into t1 values (10,20,10,20,1,3);
> +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int,
> +key(key1), key(key2), key(key3), key(key4));
> +insert into t2 select a,a,a,a,a from t3;
> +insert into t2 select 15,15,15,15,15 from t4;
> +analyze format=json
> +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and
> +(t2.key2 between t1.lb2 and t1.rb2) and
> +(t2.key3=t1.c1 OR t2.key4=t1.c2);
> +ANALYZE
> +{
> + "query_block": {
> + "select_id": 1,
> + "r_loops": 1,
> + "r_total_time_ms": "REPLACED",
> + "table": {
> + "table_name": "t1",
> + "access_type": "ALL",
> + "r_loops": 1,
> + "rows": 5,
> + "r_rows": 5,
> + "r_total_time_ms": "REPLACED",
> + "filtered": 100,
> + "r_filtered": 100
> + },
> + "range-checked-for-each-record": {
> + "keys": ["key1", "key2", "key3", "key4"],
> + "r_keys_none": 1,
> + "r_keys_index_merge": 1,
> + "r_keys": {
> + "key1": 2,
> + "key2": 1,
> + "key3": 0,
> + "key4": 0
> + },
It seems wrong to have multiple r_XXX elements. I think, having one will be
better.
How about:
r_keys: {
"none": 1, ## Maybe, change to "full_table_scan" ? (btw can it be a full index scan instead I wonder?)
"index_merge": nnn,
"range" : {
"key1" : nnn,
"key2" : nnn,
"key3" : nnn
}
}
Let's discuss it.
> + "table": {
> + "table_name": "t2",
> + "access_type": "ALL",
> + "possible_keys": ["key1", "key2", "key3", "key4"],
> + "r_loops": 5,
> + "rows": 1010,
> + "r_rows": 203.8,
> + "r_total_time_ms": "REPLACED",
> + "filtered": 100,
> + "r_filtered": 98.135
> + }
> + }
> + }
> +}
> +drop table t1,t2,t3,t4;
> diff --git a/mysql-test/t/analyze_format_json.test b/mysql-test/t/analyze_format_json.test
> index 64bafd7..0ae9d4f 100644
> --- a/mysql-test/t/analyze_format_json.test
> +++ b/mysql-test/t/analyze_format_json.test
> @@ -75,3 +75,33 @@ disconnect con1;
> connection default;
> drop table t1;
>
> +
> +--echo #
> +--echo # MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
> +--echo #
> +create table t3(a int);
> +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
> +
> +create table t4(a int);
> +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C;
> +
> +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int);
> +
> +insert into t1 values (1,2,10,20,15,15);
> +insert into t1 values (3,5,10,20,15,15);
> +insert into t1 values (10,20,10,20,15,15);
> +insert into t1 values (10,20,1,2,15,15);
> +insert into t1 values (10,20,10,20,1,3);
> +
> +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int,
> + key(key1), key(key2), key(key3), key(key4));
> +insert into t2 select a,a,a,a,a from t3;
> +insert into t2 select 15,15,15,15,15 from t4;
> +
> +--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
> +analyze format=json
> +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and
> + (t2.key2 between t1.lb2 and t1.rb2) and
> + (t2.key3=t1.c1 OR t2.key4=t1.c2);
> +
> +drop table t1,t2,t3,t4;
> diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
> index 9d82f4f..edbc2bf 100644
> --- a/sql/sql_explain.cc
> +++ b/sql/sql_explain.cc
> @@ -1076,15 +1076,15 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai
> }
>
>
Please add a comment:
@return
ptr - pointer to the added string
NULL - out of memory error
Does it make sense to change the return type to 'const char*' ?
> -bool String_list::append_str(MEM_ROOT *mem_root, const char *str)
> +char *String_list::append_str(MEM_ROOT *mem_root, const char *str)
> {
> size_t len= strlen(str);
> char *cp;
> if (!(cp = (char*)alloc_root(mem_root, len+1)))
> - return 1;
> + return NULL;
> memcpy(cp, str, len+1);
> push_back(cp);
> - return 0;
> + return cp;
> }
>
>
> @@ -1209,6 +1209,26 @@ void Explain_table_access::print_explain_json(Explain_query *query,
> {
> writer->add_member("range-checked-for-each-record").start_object();
> add_json_keyset(writer, "keys", &possible_keys);
> + if (is_analyze)
> + {
> + writer->add_member("r_keys_none").add_ll(range_checked_fer->none);
> + writer->add_member("r_keys_index_merge").add_ll(range_checked_fer->
> + index_merge);
> + if (range_checked_fer->keys_stat)
> + {
> + writer->add_member("r_keys").start_object();
> + for (uint i= 0; i < range_checked_fer->keys; i++)
> + {
> + if (range_checked_fer->keys_stat_names[i])
> + {
> + writer->add_member(range_checked_fer->
> + keys_stat_names[i]).add_ll(range_checked_fer->
> + keys_stat[i]);
> + }
> + }
> + writer->end_object();
> + }
> + }
If we move this code into Range_checked_fer::print_json(), will it be possible
to make its members private?
> }
>
> if (full_scan_on_null_key)
> diff --git a/sql/sql_explain.h b/sql/sql_explain.h
> index e3b41ee..f86886d 100644
> --- a/sql/sql_explain.h
> +++ b/sql/sql_explain.h
> @@ -54,7 +54,7 @@ it into the slow query log.
> class String_list: public List<char>
> {
> public:
> - bool append_str(MEM_ROOT *mem_root, const char *str);
> + char *append_str(MEM_ROOT *mem_root, const char *str);
> };
>
> class Json_writer;
> @@ -627,6 +627,16 @@ class Explain_range_checked_fer : public Sql_alloc
> public:
> String_list key_set;
> key_map keys_map;
> +
> + ha_rows none, index_merge;
'none' looks weird. Would a 'full_scan' be better.
> + ha_rows *keys_stat;
> + char **keys_stat_names;
> + uint keys;
This needs to be documented.
> +
> + Explain_range_checked_fer()
> + :Sql_alloc(), none(0), index_merge(0),
> + keys_stat(0), keys_stat_names(0), keys(0)
> + {}
> };
>
> /*
> diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> index 5c3e116..4eb3d81 100644
> --- a/sql/sql_select.cc
> +++ b/sql/sql_select.cc
> @@ -11398,6 +11398,7 @@ void JOIN_TAB::cleanup()
> table->reginfo.join_tab= 0;
> }
> end_read_record(&read_record);
> + explain_plan= 0;
It's a pointer, right? Please change s/0/NULL/
> DBUG_VOID_RETURN;
> }
>
> @@ -18762,9 +18763,30 @@ test_if_quick_select(JOIN_TAB *tab)
>
> delete tab->select->quick;
> tab->select->quick=0;
> - return tab->select->test_quick_select(tab->join->thd, tab->keys,
> - (table_map) 0, HA_POS_ERROR, 0,
> - FALSE, /*remove where parts*/FALSE);
> + int res= tab->select->test_quick_select(tab->join->thd, tab->keys,
> + (table_map) 0, HA_POS_ERROR, 0,
> + FALSE, /*remove where parts*/FALSE);
> + if (tab->explain_plan && tab->explain_plan->range_checked_fer)
> + {
> + if (tab->select->quick)
> + {
> + QUICK_SELECT_I *quick= tab->select->quick;
> + if (quick->index == MAX_KEY)
> + tab->explain_plan->range_checked_fer->index_merge++;
> + else
> + {
> + DBUG_ASSERT(quick->index < tab->explain_plan->range_checked_fer->keys);
> + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat);
> + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names);
> + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names[
> + quick->index]);
> + tab->explain_plan->range_checked_fer->keys_stat[quick->index]++;
> + }
> + }
> + else
> + tab->explain_plan->range_checked_fer->none++;
> + }
> + return res;
I think it's bad that join execution code becomes littered with details about
how things are stored in the Explain data structures.
Can you add a set of methods to range_checked_fer:
count_full_scan();
count_index_merge();
count_range_scan();
and then only call these methods from here?
> }
>
>
> @@ -23360,6 +23382,32 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
> return 0;
> }
>
What does this function do? AFAIU it does two things:
- copies indexes from possible_keys into a String_list.
- allocates an empty array of ha_rows().
The set of data structures used by Range_checked_fer is quite peculiar. This
function is only useful for Range_checked_fer. Do you think it would make
sense to made this a method of Range_checked_fer?
> +int append_possible_keys_stat(MEM_ROOT *alloc, String_list &list,
> + ha_rows **stat, char ***names,
> + TABLE *table, key_map possible_keys)
> +{
> + uint j;
> + multi_alloc_root(alloc, stat, sizeof(ha_rows) * table->s->keys,
> + names, sizeof(char *) * table->s->keys, NULL);
> + if ((!(*stat)) || (!(*names)))
> + {
> + (*stat)= NULL; (*names)= NULL;
> + return 1;
> + }
> + bzero(*stat, sizeof(ha_rows) * table->s->keys);
> + for (j= 0; j < table->s->keys; j++)
> + {
> + if (possible_keys.is_set(j))
> + {
> + char *nm= list.append_str(alloc, table->key_info[j].name);
> + (*names)[j]= nm;
> + }
> + else
> + (*names)[j]= NULL;
> + }
> + return 0;
> +}
> +
> /*
> TODO: this function is only applicable for the first non-const optimization
> join tab.
> @@ -23400,6 +23448,8 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab
> quick_type= -1;
> QUICK_SELECT_I *quick= NULL;
>
> +
> + explain_plan= eta;
> eta->key.clear();
> eta->quick_info= NULL;
>
> @@ -23663,9 +23713,16 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab
> {
> eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD);
> eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer;
> - eta->range_checked_fer->keys_map= tab->keys;
> - append_possible_keys(thd->mem_root, eta->range_checked_fer->key_set,
> - table, tab->keys);
> + if (eta->range_checked_fer)
> + {
> + eta->range_checked_fer->keys_map= tab->keys;
> + eta->range_checked_fer->keys= table->s->keys;
> + append_possible_keys_stat(thd->mem_root,
> + eta->range_checked_fer->key_set,
> + &eta->range_checked_fer->keys_stat,
> + &eta->range_checked_fer->keys_stat_names,
> + table, tab->keys);
> + }
> }
> else if (tab->select->cond ||
> (tab->cache_select && tab->cache_select->cond))
> diff --git a/sql/sql_select.h b/sql/sql_select.h
> index 0557e32..ef92690 100644
> --- a/sql/sql_select.h
> +++ b/sql/sql_select.h
> @@ -362,7 +362,12 @@ typedef struct st_join_table {
> SJ_TMP_TABLE *check_weed_out_table;
> /* for EXPLAIN only: */
> SJ_TMP_TABLE *first_weedout_table;
> -
> +
> + /**
> + reference to saved plan and execution statistics
> + */
> + Explain_table_access *explain_plan;
> +
> /*
> If set, means we should stop join enumeration after we've got the first
> match and return to the specified join tab. May point to
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog