← Back to team overview

maria-developers team mailing list archive

Re: [GSoC] self-tuning optimizer

 

Hi all,

As per serg comments on the previous commit, I have modified the code to
add an init function which will initialize all the cost factors at the
start of the server. The latest code is at
https://github.com/igniting/server/commits/selfTuningOptimizer. I have also
attached the diff here.

However, I am still not able to come up with an example for my test case
for READ_TIME_FACTOR and SCAN_TIME_FACTOR. I will go through the code again
to figure it out. Currently the test case which I have written just gives
the workflow: EXPLAIN SELECT ...; Update constant tables directly;
Reconnect; EXPLAIN SELECT.

Once I have this figured out, I will move to measuring the query time, and
updating the stats for the corresponding cost factors.

I will also write a blog which will have more details.


Regards
Anshu Avinash


On Wed, May 28, 2014 at 1:05 AM, Roberto Spadim <roberto@xxxxxxxxxxxxx>
wrote:

> nice :) maybe part of msql? :) very old times
>
> nice, i will stay updated with this gson project, very interesting and a
> very good work :)
>
>
> 2014-05-27 15:35 GMT-03:00 Sergei Golubchik <serg@xxxxxxxxxxx>:
>
> Hi, Roberto!
>>
>> On May 27, Roberto Spadim wrote:
>> > Hi Sergei!
>> > nice :) i'm reading about it
>> > just one doubt i didn't understand everything yet
>> > there's a read time and a scan time, what the difference? read =
>> sequencial
>> > read and scan = non sequencial read? or something like table read cost,
>> > index read cost?
>>
>> Yes, the handler methods are:
>>
>>   virtual double handler::scan_time()
>>   { return stats.data_file_length / IO_SIZE + 2; }
>>   virtual double handler::read_time(uint index, uint ranges, ha_rows rows)
>>   { return ranges+rows; }
>>
>> these are the default implementations, in the base handler class. Method
>> names are historical and *very* old.
>>
>> The return values are kind of "number of disk seeks". You can see that
>> scan_time() assumes a sequential disk read of blocks of IO_SIZE bytes.
>>
>> While read_time() assumes random reads of 'rows' rows from the data
>> file and 'ranges' ranges from the index.
>>
>> These default implementations, I suspect, predate even MyISAM.
>>
>> Regards,
>> Sergei
>>
>>
>
>
> --
> Roberto Spadim
> SPAEmpresarial
> Eng. Automação e Controle
>
diff --git a/mysql-test/r/costmodel.result b/mysql-test/r/costmodel.result
new file mode 100644
index 0000000..d37e2b8
--- /dev/null
+++ b/mysql-test/r/costmodel.result
@@ -0,0 +1,46 @@
+DROP DATABASE IF EXISTS world;
+CREATE DATABASE world;
+use world;
+CREATE TABLE Country (
+Code char(3) NOT NULL default '',
+Name char(52) NOT NULL default '',
+SurfaceArea float(10,2) NOT NULL default '0.00',
+Population int(11) NOT NULL default '0',
+Capital int(11) default NULL,
+PRIMARY KEY  (Code),
+UNIQUE INDEX (Name)
+);
+CREATE TABLE City (
+ID int(11) NOT NULL auto_increment,
+Name char(35) NOT NULL default '',
+Country char(3) NOT NULL default '',
+Population int(11) NOT NULL default '0',
+PRIMARY KEY  (ID),
+INDEX (Population),
+INDEX (Country) 
+);
+CREATE TABLE CountryLanguage (
+Country char(3) NOT NULL default '',
+Language char(30) NOT NULL default '',
+Percentage float(3,1) NOT NULL default '0.0',
+PRIMARY KEY  (Country, Language),
+INDEX (Percentage)
+);
+EXPLAIN
+SELECT c.name, ci.name FROM Country c, City ci
+WHERE c.capital=ci.id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	c	ALL	NULL	NULL	NULL	NULL	239	Using where
+1	SIMPLE	ci	eq_ref	PRIMARY	PRIMARY	4	world.c.Capital	1	
+use mysql;
+UPDATE optimizer_cost_factors
+SET const_value=1000.0
+WHERE const_name='READ_TIME_FACTOR';
+use world;
+EXPLAIN
+SELECT c.name, ci.name FROM Country c, City ci
+WHERE c.capital=ci.id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	c	ALL	NULL	NULL	NULL	NULL	239	Using where
+1	SIMPLE	ci	eq_ref	PRIMARY	PRIMARY	4	world.c.Capital	1	
+DROP DATABASE world;
diff --git a/mysql-test/t/costmodel.test b/mysql-test/t/costmodel.test
new file mode 100644
index 0000000..6baf691
--- /dev/null
+++ b/mysql-test/t/costmodel.test
@@ -0,0 +1,37 @@
+--disable_warnings
+DROP DATABASE IF EXISTS world;
+--enable_warnings
+
+CREATE DATABASE world;
+use world;
+
+--source include/world_schema.inc
+--disable_query_log
+--disable_result_log
+--disable_warnings
+--source include/world.inc
+--enable_query_log
+--enable_result_log
+--enable_warnings
+
+EXPLAIN
+  SELECT c.name, ci.name FROM Country c, City ci
+    WHERE c.capital=ci.id;
+
+use mysql;
+
+UPDATE optimizer_cost_factors
+SET const_value=1000.0
+WHERE const_name='READ_TIME_FACTOR';
+
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+
+use world;
+
+EXPLAIN
+  SELECT c.name, ci.name FROM Country c, City ci
+    WHERE c.capital=ci.id;
+
+DROP DATABASE world;
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql
index 34742aa..04a2ece 100644
--- a/scripts/mysql_system_tables.sql
+++ b/scripts/mysql_system_tables.sql
@@ -229,3 +229,10 @@ CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name
 -- we avoid mixed-engine transactions.
 set storage_engine=@orig_storage_engine;
 CREATE TABLE IF NOT EXISTS gtid_slave_pos (domain_id INT UNSIGNED NOT NULL, sub_id BIGINT UNSIGNED NOT NULL, server_id INT UNSIGNED NOT NULL, seq_no BIGINT UNSIGNED NOT NULL, PRIMARY KEY (domain_id, sub_id)) comment='Replication slave GTID position';
+
+-- Tables for Self Tuning Cost Optimizer
+
+CREATE TABLE IF NOT EXISTS optimizer_cost_factors (const_name varchar(64) NOT NULL, const_value double NOT NULL, PRIMARY KEY (const_name) ) ENGINE=MyISAM CHARACTER SET utf8 COLLATE utf8_bin comment='Constants for optimizer';
+
+-- Remember for later if all_constants table already existed
+set @had_optimizer_cost_factors_table= @@warning_count != 0;
diff --git a/scripts/mysql_system_tables_data.sql b/scripts/mysql_system_tables_data.sql
index 38e6a54..0760ae7 100644
--- a/scripts/mysql_system_tables_data.sql
+++ b/scripts/mysql_system_tables_data.sql
@@ -53,3 +53,9 @@ INSERT INTO tmp_proxies_priv VALUES ('localhost', 'root', '', '', TRUE, '', now(
 REPLACE INTO tmp_proxies_priv SELECT @current_hostname, 'root', '', '', TRUE, '', now() FROM DUAL WHERE @current_hostname != 'localhost';
 INSERT INTO  proxies_priv SELECT * FROM tmp_proxies_priv WHERE @had_proxies_priv_table=0;
 DROP TABLE tmp_proxies_priv;
+
+CREATE TEMPORARY TABLE tmp_optimizer_cost_factors LIKE optimizer_cost_factors;
+INSERT INTO tmp_optimizer_cost_factors VALUES ('READ_TIME_FACTOR', 1.0);
+INSERT INTO tmp_optimizer_cost_factors VALUES ('SCAN_TIME_FACTOR', 1.0);
+INSERT INTO optimizer_cost_factors SELECT * FROM tmp_optimizer_cost_factors WHERE @had_optimizer_cost_factors_table=0;
+DROP TABLE tmp_optimizer_cost_factors;
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 41e2701..380442b 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -86,7 +86,7 @@ SET (SQL_SOURCE
                create_options.cc multi_range_read.cc
                opt_index_cond_pushdown.cc opt_subselect.cc
                opt_table_elimination.cc sql_expression_cache.cc
-               gcalc_slicescan.cc gcalc_tools.cc
+               gcalc_slicescan.cc gcalc_tools.cc opt_costmodel.h opt_costmodel.cc
 			   threadpool_common.cc 
 			   ../sql-common/mysql_async.c
                my_apc.cc my_apc.h
diff --git a/sql/handler.h b/sql/handler.h
index 5bba570..fba41ba 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -33,6 +33,7 @@
 #include "structs.h"                            /* SHOW_COMP_OPTION */
 #include "sql_array.h"          /* Dynamic_array<> */
 #include "mdl.h"
+#include "opt_costmodel.h"
 
 #include <my_compare.h>
 #include <ft_global.h>
@@ -2741,7 +2742,8 @@ public:
     reset_statistics();
   }
   virtual double scan_time()
-  { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
+  { return Cost_factors::get_scan_time_factor() *
+            (ulonglong2double(stats.data_file_length) / IO_SIZE + 2); }
 
   /**
      The cost of reading a set of ranges from the table using an index
@@ -2755,7 +2757,7 @@ public:
      using an index by calling it using read_time(index, 1, table_size).
   */
   virtual double read_time(uint index, uint ranges, ha_rows rows)
-  { return rows2double(ranges+rows); }
+  { return Cost_factors::get_read_time_factor() * rows2double(ranges+rows); }
 
   /**
     Calculate cost of 'keyread' scan for given index and number of records.
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 90d6659..e05f30b 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -51,6 +51,7 @@
 #include "sql_manager.h"  // stop_handle_manager, start_handle_manager
 #include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit
 #include "sys_vars_shared.h"
+#include "opt_costmodel.h"
 
 #include <m_ctype.h>
 #include <my_dir.h>
@@ -5412,6 +5413,8 @@ int mysqld_main(int argc, char **argv)
 #endif
   }
 
+  Cost_factors::init();
+
   init_status_vars();
   if (opt_bootstrap) /* If running with bootstrap, do not start replication. */
     opt_skip_slave_start= 1;
diff --git a/sql/opt_costmodel.cc b/sql/opt_costmodel.cc
new file mode 100644
index 0000000..72f02fb
--- /dev/null
+++ b/sql/opt_costmodel.cc
@@ -0,0 +1,103 @@
+#include "sql_base.h"
+#include "key.h"
+#include "records.h"
+#include "opt_costmodel.h"
+
+/* Name of database to which the optimizer_cost_factors table belongs */
+static const LEX_STRING db_name= { C_STRING_WITH_LEN("mysql") };
+
+/* Name of all_constants table */
+static const LEX_STRING table_name = { C_STRING_WITH_LEN("optimizer_cost_factors") };
+
+/* Columns in the optimizer_cost_factors_table */
+enum cost_factors_col
+{
+  COST_FACTORS_CONST_NAME,
+  COST_FACTORS_CONST_VALUE
+};
+
+/* Name of the constants present in table */
+static const LEX_STRING read_time_factor_name = { C_STRING_WITH_LEN("READ_TIME_FACTOR") };
+static const LEX_STRING scan_time_factor_name = { C_STRING_WITH_LEN("SCAN_TIME_FACTOR") };
+
+/* Helper functions for Cost_factors::init() */
+
+static
+inline int open_table(THD *thd, TABLE_LIST *table,
+                      Open_tables_backup *backup,
+                      bool for_write)
+{
+  enum thr_lock_type lock_type_arg= for_write? TL_WRITE: TL_READ;
+  table->init_one_table(db_name.str, db_name.length, table_name.str,
+                        table_name.length, table_name.str, lock_type_arg);
+  return open_system_tables_for_read(thd, table, backup);
+}
+
+static inline void clean_up(THD *thd)
+{
+  close_mysql_tables(thd);
+  delete thd;
+}
+
+/* Initialize static class members */
+bool Cost_factors::isInitialized= false;
+double Cost_factors::read_time_factor= 1.0;
+double Cost_factors::scan_time_factor= 1.0;
+
+/* Interface functions */
+
+void Cost_factors::init()
+{
+  TABLE_LIST table_list;
+  Open_tables_backup open_tables_backup;
+  READ_RECORD read_record_info;
+  TABLE *table;
+  MEM_ROOT mem;
+  init_sql_alloc(&mem, 1024, 0, MYF(0));
+  THD *new_thd = new THD;
+  
+  if(!new_thd)
+  {
+    free_root(&mem, MYF(0));
+    DBUG_VOID_RETURN;
+  }
+
+  new_thd->thread_stack= (char *) &new_thd;
+  new_thd->store_globals();
+  new_thd->set_db(db_name.str, db_name.length);
+
+  if(open_table(new_thd, &table_list, &open_tables_backup, FALSE))
+  {
+    clean_up(new_thd);
+    DBUG_VOID_RETURN;
+  }
+
+  table= table_list.table;
+  if(init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE))
+  {
+    clean_up(new_thd);
+    DBUG_VOID_RETURN;
+  }
+
+  table->use_all_columns();
+  while (!read_record_info.read_record(&read_record_info))
+  {
+    LEX_STRING const_name;
+    const_name.str= get_field(&mem, table->field[COST_FACTORS_CONST_NAME]);
+    const_name.length= strlen(const_name.str);
+
+    double const_value;
+    const_value= table->field[COST_FACTORS_CONST_VALUE]->val_real();
+    if(!strcmp(const_name.str, read_time_factor_name.str))
+    {
+      Cost_factors::read_time_factor= const_value;
+    }
+    else if(!strcmp(const_name.str, scan_time_factor_name.str))
+    {
+      Cost_factors::scan_time_factor= const_value;
+    }
+  }
+
+  clean_up(new_thd);
+  DBUG_VOID_RETURN;
+}
diff --git a/sql/opt_costmodel.h b/sql/opt_costmodel.h
new file mode 100644
index 0000000..3928691
--- /dev/null
+++ b/sql/opt_costmodel.h
@@ -0,0 +1,25 @@
+/* Interface to get constants */
+
+#ifndef SQL_OPT_COSTMODEL_INCLUDED
+#define SQL_OPT_COSTMODEL_INCLUDED
+
+class Cost_factors
+{
+private:
+  static bool isInitialized;
+  static double read_time_factor;
+  static double scan_time_factor;
+
+public:
+  static void init();
+  static inline double get_read_time_factor()
+  {
+    return read_time_factor;
+  }
+  static inline double get_scan_time_factor()
+  {
+    return scan_time_factor;
+  }
+};
+
+#endif /* SQL_OPT_COSTMODEL_INCLUDED */

Follow ups

References