maria-developers team mailing list archive

Thread
Date

Re: [GSoC] self-tuning optimizer

To: Sergei Golubchik <serg@xxxxxxxxxxx>
From: Anshu Avinash <anshu.avinash35@xxxxxxxxx>
Date: Wed, 11 Jun 2014 11:31:29 +0530
Cc: "maria-developers@xxxxxxxxxxxxxxxxxxx" <maria-developers@xxxxxxxxxxxxxxxxxxx>
In-reply-to: <20140610074903.GA22489@meddwl.fritz.box>

Hi serg,

I have attached the updated diff. I'm still unable to observe the effect
introduced by the cost factors. Maybe I just need to study the usage of
scan_time() and read_time() in greater detail.

Regards
Anshu


On Tue, Jun 10, 2014 at 1:19 PM, Sergei Golubchik <serg@xxxxxxxxxxx> wrote:

> Hi, Anshu!
>
> On Jun 10, Anshu Avinash wrote:
> > Hi serg,
> >
> > I have implemented your suggestions. In the test case I created a table
> > with 25 rows. Explain for 'select * from t1 where a > 19' gave 'ALL'
> while
> > explain for 'select * from t1 where a > 20' gives range.
> > I have also written public methods ha_scan_time() and ha_read_time(). I
> > should replace every occurrence of handler::scan_time() with
> > ha_scan_time(), right? How do I make sure that I don't miss any place?
>
> See other ha_xxx vs xxx methods, e.g. ha_rnd_init and rnd_init.
>
> You make scan_time() protected - and the compiler won't allow it to be
> called outside of the handler class.
>
> > Also regarding making everything in the class Cost_factors static vs
> > creating an object and using it everywhere: cann't we use a namespace
> > Cost_factors? How is it done usually? I don't see much usage of
> > namespaces in the code.
>
> Yes, namespaces aren't used much in MariaDB, but feel free to use them,
> if you'd like.
>
> Regards,
> Sergei
>

diff --git a/mysql-test/r/costmodel.result b/mysql-test/r/costmodel.result
new file mode 100644
index 0000000..6668758
--- /dev/null
+++ b/mysql-test/r/costmodel.result
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a int auto_increment primary key, b int);
+INSERT INTO t1(b) values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),
+(13),(14),(15),(16),(17),(18),(19),(20),(21),(22),(23),(24),(25);
+EXPLAIN
+SELECT * FROM t1
+WHERE a > 21;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	PRIMARY	PRIMARY	4	NULL	5	Using index condition
+use mysql;
+UPDATE optimizer_cost_factors
+SET const_value=2.0
+WHERE const_name='SCAN_TIME_FACTOR';
+use test;
+EXPLAIN
+SELECT * FROM t1
+WHERE a > 21;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	PRIMARY	PRIMARY	4	NULL	5	Using index condition
+DROP TABLE t1;
diff --git a/mysql-test/t/costmodel.test b/mysql-test/t/costmodel.test
new file mode 100644
index 0000000..69b6976
--- /dev/null
+++ b/mysql-test/t/costmodel.test
@@ -0,0 +1,29 @@
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a int auto_increment primary key, b int);
+INSERT INTO t1(b) values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),
+(13),(14),(15),(16),(17),(18),(19),(20),(21),(22),(23),(24),(25);
+
+EXPLAIN
+  SELECT * FROM t1
+    WHERE a > 21;
+
+use mysql;
+
+UPDATE optimizer_cost_factors
+SET const_value=2.0
+WHERE const_name='SCAN_TIME_FACTOR';
+
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+
+use test;
+
+EXPLAIN
+  SELECT * FROM t1
+    WHERE a > 21;
+
+DROP TABLE t1;
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql
index 34742aa..44bd2a4 100644
--- a/scripts/mysql_system_tables.sql
+++ b/scripts/mysql_system_tables.sql
@@ -229,3 +229,7 @@ CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name
 -- we avoid mixed-engine transactions.
 set storage_engine=@orig_storage_engine;
 CREATE TABLE IF NOT EXISTS gtid_slave_pos (domain_id INT UNSIGNED NOT NULL, sub_id BIGINT UNSIGNED NOT NULL, server_id INT UNSIGNED NOT NULL, seq_no BIGINT UNSIGNED NOT NULL, PRIMARY KEY (domain_id, sub_id)) comment='Replication slave GTID position';
+
+-- Tables for Self Tuning Cost Optimizer
+
+CREATE TABLE IF NOT EXISTS optimizer_cost_factors (const_name varchar(64) NOT NULL, const_value double NOT NULL, PRIMARY KEY (const_name) ) ENGINE=MyISAM CHARACTER SET latin1 COLLATE latin1_general_ci comment='Constants for optimizer';
diff --git a/scripts/mysql_system_tables_data.sql b/scripts/mysql_system_tables_data.sql
index 38e6a54..2bc41a8 100644
--- a/scripts/mysql_system_tables_data.sql
+++ b/scripts/mysql_system_tables_data.sql
@@ -53,3 +53,6 @@ INSERT INTO tmp_proxies_priv VALUES ('localhost', 'root', '', '', TRUE, '', now(
 REPLACE INTO tmp_proxies_priv SELECT @current_hostname, 'root', '', '', TRUE, '', now() FROM DUAL WHERE @current_hostname != 'localhost';
 INSERT INTO  proxies_priv SELECT * FROM tmp_proxies_priv WHERE @had_proxies_priv_table=0;
 DROP TABLE tmp_proxies_priv;
+
+INSERT INTO optimizer_cost_factors VALUES ('READ_TIME_FACTOR', 1.0);
+INSERT INTO optimizer_cost_factors VALUES ('SCAN_TIME_FACTOR', 1.0);
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 41e2701..380442b 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -86,7 +86,7 @@ SET (SQL_SOURCE
                create_options.cc multi_range_read.cc
                opt_index_cond_pushdown.cc opt_subselect.cc
                opt_table_elimination.cc sql_expression_cache.cc
-               gcalc_slicescan.cc gcalc_tools.cc
+               gcalc_slicescan.cc gcalc_tools.cc opt_costmodel.h opt_costmodel.cc
 			   threadpool_common.cc 
 			   ../sql-common/mysql_async.c
                my_apc.cc my_apc.h
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 0d554df..9dc9eba 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -1372,7 +1372,7 @@ bool check_if_pq_applicable(Sort_param *param,
         (PQ_slowness * num_rows + param->max_keys_per_buffer) *
         log((double) param->max_keys_per_buffer) / TIME_FOR_COMPARE_ROWID;
       const double pq_io_cost=
-        param->max_rows * table->file->scan_time() / 2.0;
+        param->max_rows * table->file->ha_scan_time() / 2.0;
       const double pq_cost= pq_cpu_cost + pq_io_cost;
 
       if (sort_merge_cost < pq_cost)
diff --git a/sql/handler.h b/sql/handler.h
index 5bba570..c9ca0b3 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -33,6 +33,7 @@
 #include "structs.h"                            /* SHOW_COMP_OPTION */
 #include "sql_array.h"          /* Dynamic_array<> */
 #include "mdl.h"
+#include "opt_costmodel.h"
 
 #include <my_compare.h>
 #include <ft_global.h>
@@ -2740,6 +2741,8 @@ public:
     table_share= share;
     reset_statistics();
   }
+
+protected:
   virtual double scan_time()
   { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
 
@@ -2757,6 +2760,13 @@ public:
   virtual double read_time(uint index, uint ranges, ha_rows rows)
   { return rows2double(ranges+rows); }
 
+public:
+  double ha_scan_time()
+  { return Cost_factors::scan_factor() * scan_time(); }
+
+  double ha_read_time(uint index, uint ranges, ha_rows rows)
+  { return Cost_factors::read_factor() * read_time(index, ranges, rows); }
+
   /**
     Calculate cost of 'keyread' scan for given index and number of records.
 
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index 0c35ac5..41cc9e7 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1812,7 +1812,7 @@ void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
   cost->reset();
   if (table->file->primary_key_is_clustered())
   {
-    cost->io_count= table->file->read_time(table->s->primary_key,
+    cost->io_count= table->file->ha_read_time(table->s->primary_key,
                                            (uint) nrows, nrows);
   }
   else
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 90d6659..e05f30b 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -51,6 +51,7 @@
 #include "sql_manager.h"  // stop_handle_manager, start_handle_manager
 #include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit
 #include "sys_vars_shared.h"
+#include "opt_costmodel.h"
 
 #include <m_ctype.h>
 #include <my_dir.h>
@@ -5412,6 +5413,8 @@ int mysqld_main(int argc, char **argv)
 #endif
   }
 
+  Cost_factors::init();
+
   init_status_vars();
   if (opt_bootstrap) /* If running with bootstrap, do not start replication. */
     opt_skip_slave_start= 1;
diff --git a/sql/opt_costmodel.cc b/sql/opt_costmodel.cc
new file mode 100644
index 0000000..2e76dda
--- /dev/null
+++ b/sql/opt_costmodel.cc
@@ -0,0 +1,100 @@
+#include "sql_base.h"
+#include "key.h"
+#include "records.h"
+#include "opt_costmodel.h"
+
+/* Name of database to which the optimizer_cost_factors table belongs */
+static const LEX_STRING db_name= { C_STRING_WITH_LEN("mysql") };
+
+/* Name of all_constants table */
+static const LEX_STRING table_name = { C_STRING_WITH_LEN("optimizer_cost_factors") };
+
+/* Columns in the optimizer_cost_factors_table */
+enum cost_factors_col
+{
+  COST_FACTORS_CONST_NAME,
+  COST_FACTORS_CONST_VALUE
+};
+
+/* Name of the constants present in table */
+struct st_factor {
+  const char *name;
+  double *value;
+};
+
+st_factor factors[] = {{"READ_TIME_FACTOR", &Cost_factors::read_time_factor},
+                       {"SCAN_TIME_FACTOR", &Cost_factors::scan_time_factor},
+                       {0, 0}};
+
+/* Helper functions for Cost_factors::init() */
+
+static
+inline int open_table(THD *thd, TABLE_LIST *table,
+                      Open_tables_backup *backup,
+                      bool for_write)
+{
+  enum thr_lock_type lock_type_arg= for_write? TL_WRITE: TL_READ;
+  table->init_one_table(db_name.str, db_name.length, table_name.str,
+                        table_name.length, table_name.str, lock_type_arg);
+  return open_system_tables_for_read(thd, table, backup);
+}
+
+/* Interface functions */
+
+void Cost_factors::init()
+{
+  TABLE_LIST table_list;
+  Open_tables_backup open_tables_backup;
+  READ_RECORD read_record_info;
+  TABLE *table;
+  MEM_ROOT mem;
+  init_sql_alloc(&mem, 1024, 0, MYF(0));
+  THD *new_thd = new THD;
+  
+  if(!new_thd)
+  {
+    free_root(&mem, MYF(0));
+    DBUG_VOID_RETURN;
+  }
+
+  new_thd->thread_stack= (char *) &new_thd;
+  new_thd->store_globals();
+  new_thd->set_db(db_name.str, db_name.length);
+
+  if(open_table(new_thd, &table_list, &open_tables_backup, FALSE))
+  {
+    goto end;
+  }
+
+  table= table_list.table;
+  if(init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE))
+  {
+    goto end;
+  }
+
+  table->use_all_columns();
+  while (!read_record_info.read_record(&read_record_info))
+  {
+    char *const_name= get_field(&mem, table->field[COST_FACTORS_CONST_NAME]);
+    double const_value;
+    const_value= table->field[COST_FACTORS_CONST_VALUE]->val_real();
+    st_factor *f;
+    for(f=factors; f->name; f++)
+    {
+      if(strcasecmp(f->name, const_name) == 0)
+      {
+        *(f->value)= const_value;
+        break;
+      }
+    }
+
+    if(f->name == 0)
+      sql_print_warning("Invalid row in the optimizer_cost_factors_table: %s",
+          const_name);
+  }
+
+end:
+  close_mysql_tables(new_thd);
+  delete new_thd;
+  DBUG_VOID_RETURN;
+}
diff --git a/sql/opt_costmodel.h b/sql/opt_costmodel.h
new file mode 100644
index 0000000..147471b
--- /dev/null
+++ b/sql/opt_costmodel.h
@@ -0,0 +1,22 @@
+/* Interface to get constants */
+
+#ifndef SQL_OPT_COSTMODEL_INCLUDED
+#define SQL_OPT_COSTMODEL_INCLUDED
+
+namespace Cost_factors
+{
+  static double read_time_factor;
+  static double scan_time_factor;
+
+  void init();
+  inline double read_factor()
+  {
+    return read_time_factor;
+  }
+  inline double scan_factor()
+  {
+    return scan_time_factor;
+  }
+};
+
+#endif /* SQL_OPT_COSTMODEL_INCLUDED */
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index bfba74c..de78d85 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2961,7 +2961,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   if (!records)
     records++;					/* purecov: inspected */
   scan_time= (double) records / TIME_FOR_COMPARE + 1;
-  read_time= (double) head->file->scan_time() + scan_time + 1.1;
+  read_time= (double) head->file->ha_scan_time() + scan_time + 1.1;
   if (head->force_index)
     scan_time= read_time= DBL_MAX;
   if (limit < records)
@@ -4980,7 +4980,7 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
       We are using the primary key to find the rows.
       Calculate the cost for this.
     */
-    result= param->table->file->read_time(param->table->s->primary_key,
+    result= param->table->file->ha_read_time(param->table->s->primary_key,
                                           (uint)records, records);
   }
   else
@@ -5298,7 +5298,7 @@ skip_to_ror_scan:
     {
       /* Ok, we have index_only cost, now get full rows scan cost */
       cost= param->table->file->
-              read_time(param->real_keynr[(*cur_child)->key_idx], 1,
+              ha_read_time(param->real_keynr[(*cur_child)->key_idx], 1,
                         (*cur_child)->records) +
               rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
     }
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b88aed1..e439fc9 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5888,7 +5888,7 @@ best_access_path(JOIN      *join,
             if (table->covering_keys.is_set(key))
               tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= table->file->read_time(key, 1,
+              tmp= table->file->ha_read_time(key, 1,
                                           (ha_rows) MY_MIN(tmp,s->worst_seeks));
             tmp*= record_count;
           }
@@ -6053,7 +6053,7 @@ best_access_path(JOIN      *join,
             if (table->covering_keys.is_set(key))
               tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
             else
-              tmp= table->file->read_time(key, 1,
+              tmp= table->file->ha_read_time(key, 1,
                                           (ha_rows) MY_MIN(tmp,s->worst_seeks));
             tmp*= record_count;
           }
@@ -6186,7 +6186,7 @@ best_access_path(JOIN      *join,
     {
       /* Estimate cost of reading table. */
       if (s->table->force_index && !best_key) // index scan
-        tmp= s->table->file->read_time(s->ref.key, 1, s->records);
+        tmp= s->table->file->ha_read_time(s->ref.key, 1, s->records);
       else // table scan
         tmp= s->scan_time();
 
@@ -11236,7 +11236,7 @@ double JOIN_TAB::scan_time()
     else
     {
       found_records= records= table->stat_records();
-      read_time= table->file->scan_time();
+      read_time= table->file->ha_scan_time();
       /*
         table->quick_condition_rows has already been set to
         table->file->stats.records
@@ -24643,7 +24643,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
       fanout*= join->best_positions[i].records_read; // fanout is always >= 1
   }
   else
-    read_time= table->file->scan_time();
+    read_time= table->file->ha_scan_time();
 
   /*
     Calculate the selectivity of the ref_key for REF_ACCESS. For
@@ -24802,7 +24802,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
           index entry.
         */
         index_scan_time= select_limit/rec_per_key *
-                         MY_MIN(rec_per_key, table->file->scan_time());
+                         MY_MIN(rec_per_key, table->file->ha_scan_time());
         if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
             index_scan_time < read_time)
         {

Follow ups

Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-06-11

References

Re: [GSoC] Introduction Mail
From: Anshu Avinash, 2014-05-25
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-26
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-27
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-27
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
Re: [GSoC] self-tuning optimizer
From: Anshu Avinash, 2014-06-08
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-06-09
Re: [GSoC] self-tuning optimizer
From: Anshu Avinash, 2014-06-10
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-06-10