maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #07349
Re: [GSoC] self-tuning optimizer
Hi all,
As per serg comments on the previous commit, I have modified the code to
add an init function which will initialize all the cost factors at the
start of the server. The latest code is at
https://github.com/igniting/server/commits/selfTuningOptimizer. I have also
attached the diff here.
However, I am still not able to come up with an example for my test case
for READ_TIME_FACTOR and SCAN_TIME_FACTOR. I will go through the code again
to figure it out. Currently the test case which I have written just gives
the workflow: EXPLAIN SELECT ...; Update constant tables directly;
Reconnect; EXPLAIN SELECT.
Once I have this figured out, I will move to measuring the query time, and
updating the stats for the corresponding cost factors.
I will also write a blog which will have more details.
Regards
Anshu Avinash
On Wed, May 28, 2014 at 1:05 AM, Roberto Spadim <roberto@xxxxxxxxxxxxx>
wrote:
> nice :) maybe part of msql? :) very old times
>
> nice, i will stay updated with this gson project, very interesting and a
> very good work :)
>
>
> 2014-05-27 15:35 GMT-03:00 Sergei Golubchik <serg@xxxxxxxxxxx>:
>
> Hi, Roberto!
>>
>> On May 27, Roberto Spadim wrote:
>> > Hi Sergei!
>> > nice :) i'm reading about it
>> > just one doubt i didn't understand everything yet
>> > there's a read time and a scan time, what the difference? read =
>> sequencial
>> > read and scan = non sequencial read? or something like table read cost,
>> > index read cost?
>>
>> Yes, the handler methods are:
>>
>> virtual double handler::scan_time()
>> { return stats.data_file_length / IO_SIZE + 2; }
>> virtual double handler::read_time(uint index, uint ranges, ha_rows rows)
>> { return ranges+rows; }
>>
>> these are the default implementations, in the base handler class. Method
>> names are historical and *very* old.
>>
>> The return values are kind of "number of disk seeks". You can see that
>> scan_time() assumes a sequential disk read of blocks of IO_SIZE bytes.
>>
>> While read_time() assumes random reads of 'rows' rows from the data
>> file and 'ranges' ranges from the index.
>>
>> These default implementations, I suspect, predate even MyISAM.
>>
>> Regards,
>> Sergei
>>
>>
>
>
> --
> Roberto Spadim
> SPAEmpresarial
> Eng. Automação e Controle
>
diff --git a/mysql-test/r/costmodel.result b/mysql-test/r/costmodel.result
new file mode 100644
index 0000000..d37e2b8
--- /dev/null
+++ b/mysql-test/r/costmodel.result
@@ -0,0 +1,46 @@
+DROP DATABASE IF EXISTS world;
+CREATE DATABASE world;
+use world;
+CREATE TABLE Country (
+Code char(3) NOT NULL default '',
+Name char(52) NOT NULL default '',
+SurfaceArea float(10,2) NOT NULL default '0.00',
+Population int(11) NOT NULL default '0',
+Capital int(11) default NULL,
+PRIMARY KEY (Code),
+UNIQUE INDEX (Name)
+);
+CREATE TABLE City (
+ID int(11) NOT NULL auto_increment,
+Name char(35) NOT NULL default '',
+Country char(3) NOT NULL default '',
+Population int(11) NOT NULL default '0',
+PRIMARY KEY (ID),
+INDEX (Population),
+INDEX (Country)
+);
+CREATE TABLE CountryLanguage (
+Country char(3) NOT NULL default '',
+Language char(30) NOT NULL default '',
+Percentage float(3,1) NOT NULL default '0.0',
+PRIMARY KEY (Country, Language),
+INDEX (Percentage)
+);
+EXPLAIN
+SELECT c.name, ci.name FROM Country c, City ci
+WHERE c.capital=ci.id;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE c ALL NULL NULL NULL NULL 239 Using where
+1 SIMPLE ci eq_ref PRIMARY PRIMARY 4 world.c.Capital 1
+use mysql;
+UPDATE optimizer_cost_factors
+SET const_value=1000.0
+WHERE const_name='READ_TIME_FACTOR';
+use world;
+EXPLAIN
+SELECT c.name, ci.name FROM Country c, City ci
+WHERE c.capital=ci.id;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE c ALL NULL NULL NULL NULL 239 Using where
+1 SIMPLE ci eq_ref PRIMARY PRIMARY 4 world.c.Capital 1
+DROP DATABASE world;
diff --git a/mysql-test/t/costmodel.test b/mysql-test/t/costmodel.test
new file mode 100644
index 0000000..6baf691
--- /dev/null
+++ b/mysql-test/t/costmodel.test
@@ -0,0 +1,37 @@
+--disable_warnings
+DROP DATABASE IF EXISTS world;
+--enable_warnings
+
+CREATE DATABASE world;
+use world;
+
+--source include/world_schema.inc
+--disable_query_log
+--disable_result_log
+--disable_warnings
+--source include/world.inc
+--enable_query_log
+--enable_result_log
+--enable_warnings
+
+EXPLAIN
+ SELECT c.name, ci.name FROM Country c, City ci
+ WHERE c.capital=ci.id;
+
+use mysql;
+
+UPDATE optimizer_cost_factors
+SET const_value=1000.0
+WHERE const_name='READ_TIME_FACTOR';
+
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+
+use world;
+
+EXPLAIN
+ SELECT c.name, ci.name FROM Country c, City ci
+ WHERE c.capital=ci.id;
+
+DROP DATABASE world;
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql
index 34742aa..04a2ece 100644
--- a/scripts/mysql_system_tables.sql
+++ b/scripts/mysql_system_tables.sql
@@ -229,3 +229,10 @@ CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name
-- we avoid mixed-engine transactions.
set storage_engine=@orig_storage_engine;
CREATE TABLE IF NOT EXISTS gtid_slave_pos (domain_id INT UNSIGNED NOT NULL, sub_id BIGINT UNSIGNED NOT NULL, server_id INT UNSIGNED NOT NULL, seq_no BIGINT UNSIGNED NOT NULL, PRIMARY KEY (domain_id, sub_id)) comment='Replication slave GTID position';
+
+-- Tables for Self Tuning Cost Optimizer
+
+CREATE TABLE IF NOT EXISTS optimizer_cost_factors (const_name varchar(64) NOT NULL, const_value double NOT NULL, PRIMARY KEY (const_name) ) ENGINE=MyISAM CHARACTER SET utf8 COLLATE utf8_bin comment='Constants for optimizer';
+
+-- Remember for later if all_constants table already existed
+set @had_optimizer_cost_factors_table= @@warning_count != 0;
diff --git a/scripts/mysql_system_tables_data.sql b/scripts/mysql_system_tables_data.sql
index 38e6a54..0760ae7 100644
--- a/scripts/mysql_system_tables_data.sql
+++ b/scripts/mysql_system_tables_data.sql
@@ -53,3 +53,9 @@ INSERT INTO tmp_proxies_priv VALUES ('localhost', 'root', '', '', TRUE, '', now(
REPLACE INTO tmp_proxies_priv SELECT @current_hostname, 'root', '', '', TRUE, '', now() FROM DUAL WHERE @current_hostname != 'localhost';
INSERT INTO proxies_priv SELECT * FROM tmp_proxies_priv WHERE @had_proxies_priv_table=0;
DROP TABLE tmp_proxies_priv;
+
+CREATE TEMPORARY TABLE tmp_optimizer_cost_factors LIKE optimizer_cost_factors;
+INSERT INTO tmp_optimizer_cost_factors VALUES ('READ_TIME_FACTOR', 1.0);
+INSERT INTO tmp_optimizer_cost_factors VALUES ('SCAN_TIME_FACTOR', 1.0);
+INSERT INTO optimizer_cost_factors SELECT * FROM tmp_optimizer_cost_factors WHERE @had_optimizer_cost_factors_table=0;
+DROP TABLE tmp_optimizer_cost_factors;
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 41e2701..380442b 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -86,7 +86,7 @@ SET (SQL_SOURCE
create_options.cc multi_range_read.cc
opt_index_cond_pushdown.cc opt_subselect.cc
opt_table_elimination.cc sql_expression_cache.cc
- gcalc_slicescan.cc gcalc_tools.cc
+ gcalc_slicescan.cc gcalc_tools.cc opt_costmodel.h opt_costmodel.cc
threadpool_common.cc
../sql-common/mysql_async.c
my_apc.cc my_apc.h
diff --git a/sql/handler.h b/sql/handler.h
index 5bba570..fba41ba 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -33,6 +33,7 @@
#include "structs.h" /* SHOW_COMP_OPTION */
#include "sql_array.h" /* Dynamic_array<> */
#include "mdl.h"
+#include "opt_costmodel.h"
#include <my_compare.h>
#include <ft_global.h>
@@ -2741,7 +2742,8 @@ public:
reset_statistics();
}
virtual double scan_time()
- { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
+ { return Cost_factors::get_scan_time_factor() *
+ (ulonglong2double(stats.data_file_length) / IO_SIZE + 2); }
/**
The cost of reading a set of ranges from the table using an index
@@ -2755,7 +2757,7 @@ public:
using an index by calling it using read_time(index, 1, table_size).
*/
virtual double read_time(uint index, uint ranges, ha_rows rows)
- { return rows2double(ranges+rows); }
+ { return Cost_factors::get_read_time_factor() * rows2double(ranges+rows); }
/**
Calculate cost of 'keyread' scan for given index and number of records.
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 90d6659..e05f30b 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -51,6 +51,7 @@
#include "sql_manager.h" // stop_handle_manager, start_handle_manager
#include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit
#include "sys_vars_shared.h"
+#include "opt_costmodel.h"
#include <m_ctype.h>
#include <my_dir.h>
@@ -5412,6 +5413,8 @@ int mysqld_main(int argc, char **argv)
#endif
}
+ Cost_factors::init();
+
init_status_vars();
if (opt_bootstrap) /* If running with bootstrap, do not start replication. */
opt_skip_slave_start= 1;
diff --git a/sql/opt_costmodel.cc b/sql/opt_costmodel.cc
new file mode 100644
index 0000000..72f02fb
--- /dev/null
+++ b/sql/opt_costmodel.cc
@@ -0,0 +1,103 @@
+#include "sql_base.h"
+#include "key.h"
+#include "records.h"
+#include "opt_costmodel.h"
+
+/* Name of database to which the optimizer_cost_factors table belongs */
+static const LEX_STRING db_name= { C_STRING_WITH_LEN("mysql") };
+
+/* Name of all_constants table */
+static const LEX_STRING table_name = { C_STRING_WITH_LEN("optimizer_cost_factors") };
+
+/* Columns in the optimizer_cost_factors_table */
+enum cost_factors_col
+{
+ COST_FACTORS_CONST_NAME,
+ COST_FACTORS_CONST_VALUE
+};
+
+/* Name of the constants present in table */
+static const LEX_STRING read_time_factor_name = { C_STRING_WITH_LEN("READ_TIME_FACTOR") };
+static const LEX_STRING scan_time_factor_name = { C_STRING_WITH_LEN("SCAN_TIME_FACTOR") };
+
+/* Helper functions for Cost_factors::init() */
+
+static
+inline int open_table(THD *thd, TABLE_LIST *table,
+ Open_tables_backup *backup,
+ bool for_write)
+{
+ enum thr_lock_type lock_type_arg= for_write? TL_WRITE: TL_READ;
+ table->init_one_table(db_name.str, db_name.length, table_name.str,
+ table_name.length, table_name.str, lock_type_arg);
+ return open_system_tables_for_read(thd, table, backup);
+}
+
+static inline void clean_up(THD *thd)
+{
+ close_mysql_tables(thd);
+ delete thd;
+}
+
+/* Initialize static class members */
+bool Cost_factors::isInitialized= false;
+double Cost_factors::read_time_factor= 1.0;
+double Cost_factors::scan_time_factor= 1.0;
+
+/* Interface functions */
+
+void Cost_factors::init()
+{
+ TABLE_LIST table_list;
+ Open_tables_backup open_tables_backup;
+ READ_RECORD read_record_info;
+ TABLE *table;
+ MEM_ROOT mem;
+ init_sql_alloc(&mem, 1024, 0, MYF(0));
+ THD *new_thd = new THD;
+
+ if(!new_thd)
+ {
+ free_root(&mem, MYF(0));
+ DBUG_VOID_RETURN;
+ }
+
+ new_thd->thread_stack= (char *) &new_thd;
+ new_thd->store_globals();
+ new_thd->set_db(db_name.str, db_name.length);
+
+ if(open_table(new_thd, &table_list, &open_tables_backup, FALSE))
+ {
+ clean_up(new_thd);
+ DBUG_VOID_RETURN;
+ }
+
+ table= table_list.table;
+ if(init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE))
+ {
+ clean_up(new_thd);
+ DBUG_VOID_RETURN;
+ }
+
+ table->use_all_columns();
+ while (!read_record_info.read_record(&read_record_info))
+ {
+ LEX_STRING const_name;
+ const_name.str= get_field(&mem, table->field[COST_FACTORS_CONST_NAME]);
+ const_name.length= strlen(const_name.str);
+
+ double const_value;
+ const_value= table->field[COST_FACTORS_CONST_VALUE]->val_real();
+ if(!strcmp(const_name.str, read_time_factor_name.str))
+ {
+ Cost_factors::read_time_factor= const_value;
+ }
+ else if(!strcmp(const_name.str, scan_time_factor_name.str))
+ {
+ Cost_factors::scan_time_factor= const_value;
+ }
+ }
+
+ clean_up(new_thd);
+ DBUG_VOID_RETURN;
+}
diff --git a/sql/opt_costmodel.h b/sql/opt_costmodel.h
new file mode 100644
index 0000000..3928691
--- /dev/null
+++ b/sql/opt_costmodel.h
@@ -0,0 +1,25 @@
+/* Interface to get constants */
+
+#ifndef SQL_OPT_COSTMODEL_INCLUDED
+#define SQL_OPT_COSTMODEL_INCLUDED
+
+class Cost_factors
+{
+private:
+ static bool isInitialized;
+ static double read_time_factor;
+ static double scan_time_factor;
+
+public:
+ static void init();
+ static inline double get_read_time_factor()
+ {
+ return read_time_factor;
+ }
+ static inline double get_scan_time_factor()
+ {
+ return scan_time_factor;
+ }
+};
+
+#endif /* SQL_OPT_COSTMODEL_INCLUDED */
Follow ups
References
-
Re: [GSoC] Introduction Mail
From: Colin Charles, 2014-05-08
-
Re: [GSoC] Introduction Mail
From: Anshu Avinash, 2014-05-08
-
Re: [GSoC] Introduction Mail
From: Anshu Avinash, 2014-05-12
-
Re: [GSoC] Introduction Mail
From: Anshu Avinash, 2014-05-19
-
Re: [GSoC] Introduction Mail
From: Roberto Spadim, 2014-05-19
-
Re: [GSoC] Introduction Mail
From: Anshu Avinash, 2014-05-25
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-26
-
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27