← Back to team overview

maria-developers team mailing list archive

Re: [Commits] 2da93752430: MDEV-17255: New optimizer defaults and ANALYZE TABLE

 

Hi Varun,

Nice to see that most of the input has been addressed.

However, the issue of missing test coverage is still not resolved:

On Sat, Nov 24, 2018 at 09:47:57PM +0300, Sergey Petrunia wrote:
>> Please also add test coverage for this particular MDEV. This should be a
>> testcase that shows that 
>> 
>> - with PREFERABLY, ANALYZE will collect EITS stats, SELECT will use them.
>> 
>> - with PREFERABLY_FOR_READS :
>> --  ANALYZE will not collect EITS stats
>> --  ANALYZE .. PERSISTENT FOR ... will update  the EITS stats.
>> -- SELECT will use them.

and now it is still not addressed. I made this change:

@@ -104,6 +104,8 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
 inline
 bool check_eits_collection_allowed(THD *thd)
 {
+  if (get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES)
+    DBUG_ASSERT(0);
   return (get_use_stat_tables_mode(thd) == COMPLEMENTARY ||
           get_use_stat_tables_mode(thd) == PREFERABLY);
 }

and the testsuite still passes!


On Wed, Dec 05, 2018 at 07:57:06PM +0530, Varun wrote:
> revision-id: 2da93752430d0dd07344c9d62f2468bba8962b77 (mariadb-10.3.6-266-g2da93752430)
> parent(s): e0739064450f2c2be6d5de1d799582121747dd39
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2018-12-05 19:56:23 +0530
> message:
> 
> MDEV-17255: New optimizer defaults and ANALYZE TABLE
> 
> Added to new values to the server variable use_stat_tables.
> The values are COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES.
> Both these values don't allow to collect EITS for queries like
>     analyze table t1;
> To collect EITS we would need to use the syntax with persistent like
>    analyze table t1 persistent for columns (col1,col2...) index (idx1, idx2...) / ALL
> 
> Changing the default value from NEVER to PREFERABLY_FOR_QUERIES.
> 
> ---
>  mysql-test/include/default_mysqld.cnf     |  1 +
>  mysql-test/main/stat_tables.result        | 59 +++++++++++++++++++++++++++++++
>  mysql-test/main/stat_tables.test          | 43 ++++++++++++++++++++++
>  mysql-test/main/stat_tables_innodb.result | 59 +++++++++++++++++++++++++++++++
>  sql/sql_admin.cc                          |  2 +-
>  sql/sql_statistics.cc                     |  5 ++-
>  sql/sql_statistics.h                      | 27 ++++++++++++++
>  sql/sys_vars.cc                           |  5 +--
>  8 files changed, 195 insertions(+), 6 deletions(-)
> 
> diff --git a/mysql-test/include/default_mysqld.cnf b/mysql-test/include/default_mysqld.cnf
> index 69a2b58288b..5ba3bdeb92c 100644
> --- a/mysql-test/include/default_mysqld.cnf
> +++ b/mysql-test/include/default_mysqld.cnf
> @@ -107,6 +107,7 @@ loose-performance-schema-consumer-thread-instrumentation=ON
>  binlog-direct-non-transactional-updates
>  
>  default-storage-engine=myisam
> +use-stat-tables=preferably_for_queries
>  
>  loose-ssl-ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem
>  loose-ssl-cert=@ENV.MYSQL_TEST_DIR/std_data/server-cert.pem
> diff --git a/mysql-test/main/stat_tables.result b/mysql-test/main/stat_tables.result
> index 308529ece47..67bf3f9237d 100644
> --- a/mysql-test/main/stat_tables.result
> +++ b/mysql-test/main/stat_tables.result
> @@ -605,4 +605,63 @@ SELECT MAX(pk) FROM t1;
>  MAX(pk)
>  NULL
>  DROP TABLE t1;
> +#
> +# MDEV-17255: New optimizer defaults and ANALYZE TABLE
> +#
> +set @save_use_stat_tables= @@use_stat_tables;
> +create table t1 (a int, b int);
> +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will not collect statistics
> +#
> +analyze table t1;
> +Table	Op	Msg_type	Msg_text
> +test.t1	analyze	status	Engine-independent statistics collected
> +test.t1	analyze	status	OK
> +select * from mysql.column_stats;
> +db_name	table_name	column_name	min_value	max_value	nulls_ratio	avg_length	avg_frequency	hist_size	hist_type	histogram
> +test	t1	a	1	4	0.0000	4.0000	2.5000	0	NULL	NULL
> +test	t1	b	2	10	0.0000	4.0000	1.1111	0	NULL	NULL
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	10.00	2.78	10.00	Using where
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will  collect statistics if we use PERSISTENT
> +# for columns, indexes or everything
> +#
> +analyze table t1 persistent for columns (a) indexes ();
> +Table	Op	Msg_type	Msg_text
> +test.t1	analyze	status	Engine-independent statistics collected
> +test.t1	analyze	status	Table is already up to date
> +select * from mysql.column_stats;
> +db_name	table_name	column_name	min_value	max_value	nulls_ratio	avg_length	avg_frequency	hist_size	hist_type	histogram
> +test	t1	a	1	4	0.0000	4.0000	2.5000	0	NULL	NULL
> +test	t1	b	2	10	0.0000	4.0000	1.1111	0	NULL	NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	10.00	2.78	10.00	Using where
> +#
> +# with use_stat_tables= PREFERABLY
> +# analyze table t1 will collect statistics
> +#
> +set @@use_stat_tables=PREFERABLY;
> +analyze table t1;
> +Table	Op	Msg_type	Msg_text
> +test.t1	analyze	status	Engine-independent statistics collected
> +test.t1	analyze	status	Table is already up to date
> +select * from mysql.column_stats;
> +db_name	table_name	column_name	min_value	max_value	nulls_ratio	avg_length	avg_frequency	hist_size	hist_type	histogram
> +test	t1	a	1	4	0.0000	4.0000	2.5000	0	NULL	NULL
> +test	t1	b	2	10	0.0000	4.0000	1.1111	0	NULL	NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a=1 and b=3;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	10.00	2.78	10.00	Using where
> +drop table t1;
>  set use_stat_tables=@save_use_stat_tables;
> diff --git a/mysql-test/main/stat_tables.test b/mysql-test/main/stat_tables.test
> index 19bc0fa2f46..8616f6386e1 100644
> --- a/mysql-test/main/stat_tables.test
> +++ b/mysql-test/main/stat_tables.test
> @@ -384,4 +384,47 @@ SELECT MAX(pk) FROM t1;
>  
>  DROP TABLE t1;
>  
> +--echo #
> +--echo # MDEV-17255: New optimizer defaults and ANALYZE TABLE
> +--echo #
> +
> +set @save_use_stat_tables= @@use_stat_tables;
> +create table t1 (a int, b int);
> +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
> +
> +--echo #
> +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
> +--echo # analyze table t1 will not collect statistics
> +--echo #
> +
> +analyze table t1;
> +select * from mysql.column_stats;
> +analyze
> +select * from t1 where a = 1 and b=3;
> +
> +--echo #
> +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
> +--echo # analyze table t1 will  collect statistics if we use PERSISTENT
> +--echo # for columns, indexes or everything
> +--echo #
> +
> +analyze table t1 persistent for columns (a) indexes ();
> +select * from mysql.column_stats;
> +--echo # filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a = 1 and b=3;
> +
> +--echo #
> +--echo # with use_stat_tables= PREFERABLY
> +--echo # analyze table t1 will collect statistics
> +--echo #
> +
> +set @@use_stat_tables=PREFERABLY;
> +analyze table t1;
> +select * from mysql.column_stats;
> +--echo # filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a=1 and b=3;
> +drop table t1;
> +
>  set use_stat_tables=@save_use_stat_tables;
> diff --git a/mysql-test/main/stat_tables_innodb.result b/mysql-test/main/stat_tables_innodb.result
> index 8198e94dc10..4bb8f34712c 100644
> --- a/mysql-test/main/stat_tables_innodb.result
> +++ b/mysql-test/main/stat_tables_innodb.result
> @@ -632,6 +632,65 @@ SELECT MAX(pk) FROM t1;
>  MAX(pk)
>  NULL
>  DROP TABLE t1;
> +#
> +# MDEV-17255: New optimizer defaults and ANALYZE TABLE
> +#
> +set @save_use_stat_tables= @@use_stat_tables;
> +create table t1 (a int, b int);
> +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will not collect statistics
> +#
> +analyze table t1;
> +Table	Op	Msg_type	Msg_text
> +test.t1	analyze	status	Engine-independent statistics collected
> +test.t1	analyze	status	OK
> +select * from mysql.column_stats;
> +db_name	table_name	column_name	min_value	max_value	nulls_ratio	avg_length	avg_frequency	hist_size	hist_type	histogram
> +test	t1	a	1	4	0.0000	4.0000	2.5000	0	NULL	NULL
> +test	t1	b	2	10	0.0000	4.0000	1.1111	0	NULL	NULL
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	10.00	2.78	10.00	Using where
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will  collect statistics if we use PERSISTENT
> +# for columns, indexes or everything
> +#
> +analyze table t1 persistent for columns (a) indexes ();
> +Table	Op	Msg_type	Msg_text
> +test.t1	analyze	status	Engine-independent statistics collected
> +test.t1	analyze	status	OK
> +select * from mysql.column_stats;
> +db_name	table_name	column_name	min_value	max_value	nulls_ratio	avg_length	avg_frequency	hist_size	hist_type	histogram
> +test	t1	a	1	4	0.0000	4.0000	2.5000	0	NULL	NULL
> +test	t1	b	2	10	0.0000	4.0000	1.1111	0	NULL	NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	10.00	2.78	10.00	Using where
> +#
> +# with use_stat_tables= PREFERABLY
> +# analyze table t1 will collect statistics
> +#
> +set @@use_stat_tables=PREFERABLY;
> +analyze table t1;
> +Table	Op	Msg_type	Msg_text
> +test.t1	analyze	status	Engine-independent statistics collected
> +test.t1	analyze	status	OK
> +select * from mysql.column_stats;
> +db_name	table_name	column_name	min_value	max_value	nulls_ratio	avg_length	avg_frequency	hist_size	hist_type	histogram
> +test	t1	a	1	4	0.0000	4.0000	2.5000	0	NULL	NULL
> +test	t1	b	2	10	0.0000	4.0000	1.1111	0	NULL	NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a=1 and b=3;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	10.00	2.78	10.00	Using where
> +drop table t1;
>  set use_stat_tables=@save_use_stat_tables;
>  set optimizer_switch=@save_optimizer_switch_for_stat_tables_test;
>  SET SESSION STORAGE_ENGINE=DEFAULT;
> diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
> index d0d959de8f9..b39103e382a 100644
> --- a/sql/sql_admin.cc
> +++ b/sql/sql_admin.cc
> @@ -767,7 +767,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
>        }
>        collect_eis=
>          (table->table->s->table_category == TABLE_CATEGORY_USER &&
> -         (get_use_stat_tables_mode(thd) > NEVER ||
> +         (check_eits_collection_allowed(thd) ||
>            lex->with_persistent_for_clause));
>  
>  
> diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
> index 04806f07b3b..8c88f7f927f 100644
> --- a/sql/sql_statistics.cc
> +++ b/sql/sql_statistics.cc
> @@ -3720,9 +3720,8 @@ void set_statistics_for_table(THD *thd, TABLE *table)
>  {
>    TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb;
>    Table_statistics *read_stats= stats_cb->table_stats;
> -  Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
>    table->used_stat_records= 
> -    (use_stat_table_mode <= COMPLEMENTARY ||
> +    (!check_eits_preferred(thd) ||
>       !table->stats_is_read || read_stats->cardinality_is_null) ?
>      table->file->stats.records : read_stats->cardinality;
>    KEY *key_info, *key_info_end;
> @@ -3730,7 +3729,7 @@ void set_statistics_for_table(THD *thd, TABLE *table)
>         key_info < key_info_end; key_info++)
>    {
>      key_info->is_statistics_from_stat_tables=
> -      (use_stat_table_mode > COMPLEMENTARY &&
> +      (check_eits_preferred(thd) &&
>         table->stats_is_read &&
>         key_info->read_stats->avg_frequency_is_inited() &&
>         key_info->read_stats->get_avg_frequency(0) > 0.5);
> diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
> index 39cddf95188..8439ac8db53 100644
> --- a/sql/sql_statistics.h
> +++ b/sql/sql_statistics.h
> @@ -16,12 +16,26 @@
>  #ifndef SQL_STATISTICS_H
>  #define SQL_STATISTICS_H
>  
> +/*
> +  For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are
> +  similar to the COMPLEMENTARY and PREFERABLY respectively except that
> +  with these values we would not be collecting EITS for queries like
> +    ANALYZE TABLE t1;
> +  To collect EITS with these values, we have to use PERSISITENT FOR
> +  analyze table t1 persistent for
> +     columns (col1,col2...) index (idx1, idx2...)
> +     or
> +  analyze table t1 persistent for all
> +*/
> +
>  typedef
>  enum enum_use_stat_tables_mode
>  {
>    NEVER,
>    COMPLEMENTARY,
>    PREFERABLY,
> +  COMPLEMENTARY_FOR_QUERIES,
> +  PREFERABLY_FOR_QUERIES
>  } Use_stat_tables_mode;
>  
>  typedef
> @@ -87,6 +101,19 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
>  { 
>    return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
>  }
> +inline
> +bool check_eits_collection_allowed(THD *thd)
> +{
> +  return (get_use_stat_tables_mode(thd) == COMPLEMENTARY ||
> +          get_use_stat_tables_mode(thd) == PREFERABLY);
> +}
> +
> +inline
> +bool check_eits_preferred(THD *thd)
> +{
> +  return (get_use_stat_tables_mode(thd) == PREFERABLY ||
> +          get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES);
> +}
>  
>  int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
>  int collect_statistics_for_table(THD *thd, TABLE *table);
> diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
> index 420e7feabde..b925de34db4 100644
> --- a/sql/sys_vars.cc
> +++ b/sql/sys_vars.cc
> @@ -5842,12 +5842,13 @@ static Sys_var_ulong Sys_progress_report_time(
>         VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1));
>  
>  const char *use_stat_tables_modes[] =
> -           {"NEVER", "COMPLEMENTARY", "PREFERABLY", 0};
> +           {"NEVER", "COMPLEMENTARY", "PREFERABLY",
> +           "COMPLEMENTARY_FOR_QUERIES", "PREFERABLY_FOR_QUERIES", 0};
>  static Sys_var_enum Sys_optimizer_use_stat_tables(
>         "use_stat_tables",
>         "Specifies how to use system statistics tables",
>         SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG),
> -       use_stat_tables_modes, DEFAULT(2));
> +       use_stat_tables_modes, DEFAULT(4));
>  
>  static Sys_var_ulong Sys_histogram_size(
>         "histogram_size",
> _______________________________________________
> commits mailing list
> commits@xxxxxxxxxxx
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits

-- 
BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog