maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #11556
Re: [Commits] 2da93752430: MDEV-17255: New optimizer defaults and ANALYZE TABLE
Hi Varun,
Nice to see that most of the input has been addressed.
However, the issue of missing test coverage is still not resolved:
On Sat, Nov 24, 2018 at 09:47:57PM +0300, Sergey Petrunia wrote:
>> Please also add test coverage for this particular MDEV. This should be a
>> testcase that shows that
>>
>> - with PREFERABLY, ANALYZE will collect EITS stats, SELECT will use them.
>>
>> - with PREFERABLY_FOR_READS :
>> -- ANALYZE will not collect EITS stats
>> -- ANALYZE .. PERSISTENT FOR ... will update the EITS stats.
>> -- SELECT will use them.
and now it is still not addressed. I made this change:
@@ -104,6 +104,8 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
inline
bool check_eits_collection_allowed(THD *thd)
{
+ if (get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES)
+ DBUG_ASSERT(0);
return (get_use_stat_tables_mode(thd) == COMPLEMENTARY ||
get_use_stat_tables_mode(thd) == PREFERABLY);
}
and the testsuite still passes!
On Wed, Dec 05, 2018 at 07:57:06PM +0530, Varun wrote:
> revision-id: 2da93752430d0dd07344c9d62f2468bba8962b77 (mariadb-10.3.6-266-g2da93752430)
> parent(s): e0739064450f2c2be6d5de1d799582121747dd39
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2018-12-05 19:56:23 +0530
> message:
>
> MDEV-17255: New optimizer defaults and ANALYZE TABLE
>
> Added to new values to the server variable use_stat_tables.
> The values are COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES.
> Both these values don't allow to collect EITS for queries like
> analyze table t1;
> To collect EITS we would need to use the syntax with persistent like
> analyze table t1 persistent for columns (col1,col2...) index (idx1, idx2...) / ALL
>
> Changing the default value from NEVER to PREFERABLY_FOR_QUERIES.
>
> ---
> mysql-test/include/default_mysqld.cnf | 1 +
> mysql-test/main/stat_tables.result | 59 +++++++++++++++++++++++++++++++
> mysql-test/main/stat_tables.test | 43 ++++++++++++++++++++++
> mysql-test/main/stat_tables_innodb.result | 59 +++++++++++++++++++++++++++++++
> sql/sql_admin.cc | 2 +-
> sql/sql_statistics.cc | 5 ++-
> sql/sql_statistics.h | 27 ++++++++++++++
> sql/sys_vars.cc | 5 +--
> 8 files changed, 195 insertions(+), 6 deletions(-)
>
> diff --git a/mysql-test/include/default_mysqld.cnf b/mysql-test/include/default_mysqld.cnf
> index 69a2b58288b..5ba3bdeb92c 100644
> --- a/mysql-test/include/default_mysqld.cnf
> +++ b/mysql-test/include/default_mysqld.cnf
> @@ -107,6 +107,7 @@ loose-performance-schema-consumer-thread-instrumentation=ON
> binlog-direct-non-transactional-updates
>
> default-storage-engine=myisam
> +use-stat-tables=preferably_for_queries
>
> loose-ssl-ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem
> loose-ssl-cert=@ENV.MYSQL_TEST_DIR/std_data/server-cert.pem
> diff --git a/mysql-test/main/stat_tables.result b/mysql-test/main/stat_tables.result
> index 308529ece47..67bf3f9237d 100644
> --- a/mysql-test/main/stat_tables.result
> +++ b/mysql-test/main/stat_tables.result
> @@ -605,4 +605,63 @@ SELECT MAX(pk) FROM t1;
> MAX(pk)
> NULL
> DROP TABLE t1;
> +#
> +# MDEV-17255: New optimizer defaults and ANALYZE TABLE
> +#
> +set @save_use_stat_tables= @@use_stat_tables;
> +create table t1 (a int, b int);
> +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will not collect statistics
> +#
> +analyze table t1;
> +Table Op Msg_type Msg_text
> +test.t1 analyze status Engine-independent statistics collected
> +test.t1 analyze status OK
> +select * from mysql.column_stats;
> +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
> +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
> +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will collect statistics if we use PERSISTENT
> +# for columns, indexes or everything
> +#
> +analyze table t1 persistent for columns (a) indexes ();
> +Table Op Msg_type Msg_text
> +test.t1 analyze status Engine-independent statistics collected
> +test.t1 analyze status Table is already up to date
> +select * from mysql.column_stats;
> +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
> +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
> +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
> +#
> +# with use_stat_tables= PREFERABLY
> +# analyze table t1 will collect statistics
> +#
> +set @@use_stat_tables=PREFERABLY;
> +analyze table t1;
> +Table Op Msg_type Msg_text
> +test.t1 analyze status Engine-independent statistics collected
> +test.t1 analyze status Table is already up to date
> +select * from mysql.column_stats;
> +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
> +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
> +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a=1 and b=3;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
> +drop table t1;
> set use_stat_tables=@save_use_stat_tables;
> diff --git a/mysql-test/main/stat_tables.test b/mysql-test/main/stat_tables.test
> index 19bc0fa2f46..8616f6386e1 100644
> --- a/mysql-test/main/stat_tables.test
> +++ b/mysql-test/main/stat_tables.test
> @@ -384,4 +384,47 @@ SELECT MAX(pk) FROM t1;
>
> DROP TABLE t1;
>
> +--echo #
> +--echo # MDEV-17255: New optimizer defaults and ANALYZE TABLE
> +--echo #
> +
> +set @save_use_stat_tables= @@use_stat_tables;
> +create table t1 (a int, b int);
> +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
> +
> +--echo #
> +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
> +--echo # analyze table t1 will not collect statistics
> +--echo #
> +
> +analyze table t1;
> +select * from mysql.column_stats;
> +analyze
> +select * from t1 where a = 1 and b=3;
> +
> +--echo #
> +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
> +--echo # analyze table t1 will collect statistics if we use PERSISTENT
> +--echo # for columns, indexes or everything
> +--echo #
> +
> +analyze table t1 persistent for columns (a) indexes ();
> +select * from mysql.column_stats;
> +--echo # filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a = 1 and b=3;
> +
> +--echo #
> +--echo # with use_stat_tables= PREFERABLY
> +--echo # analyze table t1 will collect statistics
> +--echo #
> +
> +set @@use_stat_tables=PREFERABLY;
> +analyze table t1;
> +select * from mysql.column_stats;
> +--echo # filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a=1 and b=3;
> +drop table t1;
> +
> set use_stat_tables=@save_use_stat_tables;
> diff --git a/mysql-test/main/stat_tables_innodb.result b/mysql-test/main/stat_tables_innodb.result
> index 8198e94dc10..4bb8f34712c 100644
> --- a/mysql-test/main/stat_tables_innodb.result
> +++ b/mysql-test/main/stat_tables_innodb.result
> @@ -632,6 +632,65 @@ SELECT MAX(pk) FROM t1;
> MAX(pk)
> NULL
> DROP TABLE t1;
> +#
> +# MDEV-17255: New optimizer defaults and ANALYZE TABLE
> +#
> +set @save_use_stat_tables= @@use_stat_tables;
> +create table t1 (a int, b int);
> +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will not collect statistics
> +#
> +analyze table t1;
> +Table Op Msg_type Msg_text
> +test.t1 analyze status Engine-independent statistics collected
> +test.t1 analyze status OK
> +select * from mysql.column_stats;
> +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
> +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
> +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
> +#
> +# with use_stat_tables= PREFERABLY_FOR_QUERIES
> +# analyze table t1 will collect statistics if we use PERSISTENT
> +# for columns, indexes or everything
> +#
> +analyze table t1 persistent for columns (a) indexes ();
> +Table Op Msg_type Msg_text
> +test.t1 analyze status Engine-independent statistics collected
> +test.t1 analyze status OK
> +select * from mysql.column_stats;
> +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
> +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
> +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a = 1 and b=3;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
> +#
> +# with use_stat_tables= PREFERABLY
> +# analyze table t1 will collect statistics
> +#
> +set @@use_stat_tables=PREFERABLY;
> +analyze table t1;
> +Table Op Msg_type Msg_text
> +test.t1 analyze status Engine-independent statistics collected
> +test.t1 analyze status OK
> +select * from mysql.column_stats;
> +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
> +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
> +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
> +# filtered shows that we used the data from stat tables
> +analyze
> +select * from t1 where a=1 and b=3;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
> +drop table t1;
> set use_stat_tables=@save_use_stat_tables;
> set optimizer_switch=@save_optimizer_switch_for_stat_tables_test;
> SET SESSION STORAGE_ENGINE=DEFAULT;
> diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
> index d0d959de8f9..b39103e382a 100644
> --- a/sql/sql_admin.cc
> +++ b/sql/sql_admin.cc
> @@ -767,7 +767,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
> }
> collect_eis=
> (table->table->s->table_category == TABLE_CATEGORY_USER &&
> - (get_use_stat_tables_mode(thd) > NEVER ||
> + (check_eits_collection_allowed(thd) ||
> lex->with_persistent_for_clause));
>
>
> diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
> index 04806f07b3b..8c88f7f927f 100644
> --- a/sql/sql_statistics.cc
> +++ b/sql/sql_statistics.cc
> @@ -3720,9 +3720,8 @@ void set_statistics_for_table(THD *thd, TABLE *table)
> {
> TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb;
> Table_statistics *read_stats= stats_cb->table_stats;
> - Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
> table->used_stat_records=
> - (use_stat_table_mode <= COMPLEMENTARY ||
> + (!check_eits_preferred(thd) ||
> !table->stats_is_read || read_stats->cardinality_is_null) ?
> table->file->stats.records : read_stats->cardinality;
> KEY *key_info, *key_info_end;
> @@ -3730,7 +3729,7 @@ void set_statistics_for_table(THD *thd, TABLE *table)
> key_info < key_info_end; key_info++)
> {
> key_info->is_statistics_from_stat_tables=
> - (use_stat_table_mode > COMPLEMENTARY &&
> + (check_eits_preferred(thd) &&
> table->stats_is_read &&
> key_info->read_stats->avg_frequency_is_inited() &&
> key_info->read_stats->get_avg_frequency(0) > 0.5);
> diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
> index 39cddf95188..8439ac8db53 100644
> --- a/sql/sql_statistics.h
> +++ b/sql/sql_statistics.h
> @@ -16,12 +16,26 @@
> #ifndef SQL_STATISTICS_H
> #define SQL_STATISTICS_H
>
> +/*
> + For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are
> + similar to the COMPLEMENTARY and PREFERABLY respectively except that
> + with these values we would not be collecting EITS for queries like
> + ANALYZE TABLE t1;
> + To collect EITS with these values, we have to use PERSISITENT FOR
> + analyze table t1 persistent for
> + columns (col1,col2...) index (idx1, idx2...)
> + or
> + analyze table t1 persistent for all
> +*/
> +
> typedef
> enum enum_use_stat_tables_mode
> {
> NEVER,
> COMPLEMENTARY,
> PREFERABLY,
> + COMPLEMENTARY_FOR_QUERIES,
> + PREFERABLY_FOR_QUERIES
> } Use_stat_tables_mode;
>
> typedef
> @@ -87,6 +101,19 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
> {
> return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
> }
> +inline
> +bool check_eits_collection_allowed(THD *thd)
> +{
> + return (get_use_stat_tables_mode(thd) == COMPLEMENTARY ||
> + get_use_stat_tables_mode(thd) == PREFERABLY);
> +}
> +
> +inline
> +bool check_eits_preferred(THD *thd)
> +{
> + return (get_use_stat_tables_mode(thd) == PREFERABLY ||
> + get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES);
> +}
>
> int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
> int collect_statistics_for_table(THD *thd, TABLE *table);
> diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
> index 420e7feabde..b925de34db4 100644
> --- a/sql/sys_vars.cc
> +++ b/sql/sys_vars.cc
> @@ -5842,12 +5842,13 @@ static Sys_var_ulong Sys_progress_report_time(
> VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1));
>
> const char *use_stat_tables_modes[] =
> - {"NEVER", "COMPLEMENTARY", "PREFERABLY", 0};
> + {"NEVER", "COMPLEMENTARY", "PREFERABLY",
> + "COMPLEMENTARY_FOR_QUERIES", "PREFERABLY_FOR_QUERIES", 0};
> static Sys_var_enum Sys_optimizer_use_stat_tables(
> "use_stat_tables",
> "Specifies how to use system statistics tables",
> SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG),
> - use_stat_tables_modes, DEFAULT(2));
> + use_stat_tables_modes, DEFAULT(4));
>
> static Sys_var_ulong Sys_histogram_size(
> "histogram_size",
> _______________________________________________
> commits mailing list
> commits@xxxxxxxxxxx
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
--
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog