maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #07365
Re: [GSoC] self-tuning optimizer
Hi serg,
I have attached the updated diff. I'm still unable to observe the effect
introduced by the cost factors. Maybe I just need to study the usage of
scan_time() and read_time() in greater detail.
Regards
Anshu
On Tue, Jun 10, 2014 at 1:19 PM, Sergei Golubchik <serg@xxxxxxxxxxx> wrote:
> Hi, Anshu!
>
> On Jun 10, Anshu Avinash wrote:
> > Hi serg,
> >
> > I have implemented your suggestions. In the test case I created a table
> > with 25 rows. Explain for 'select * from t1 where a > 19' gave 'ALL'
> while
> > explain for 'select * from t1 where a > 20' gives range.
> > I have also written public methods ha_scan_time() and ha_read_time(). I
> > should replace every occurrence of handler::scan_time() with
> > ha_scan_time(), right? How do I make sure that I don't miss any place?
>
> See other ha_xxx vs xxx methods, e.g. ha_rnd_init and rnd_init.
>
> You make scan_time() protected - and the compiler won't allow it to be
> called outside of the handler class.
>
> > Also regarding making everything in the class Cost_factors static vs
> > creating an object and using it everywhere: cann't we use a namespace
> > Cost_factors? How is it done usually? I don't see much usage of
> > namespaces in the code.
>
> Yes, namespaces aren't used much in MariaDB, but feel free to use them,
> if you'd like.
>
> Regards,
> Sergei
>
diff --git a/mysql-test/r/costmodel.result b/mysql-test/r/costmodel.result
new file mode 100644
index 0000000..6668758
--- /dev/null
+++ b/mysql-test/r/costmodel.result
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a int auto_increment primary key, b int);
+INSERT INTO t1(b) values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),
+(13),(14),(15),(16),(17),(18),(19),(20),(21),(22),(23),(24),(25);
+EXPLAIN
+SELECT * FROM t1
+WHERE a > 21;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 5 Using index condition
+use mysql;
+UPDATE optimizer_cost_factors
+SET const_value=2.0
+WHERE const_name='SCAN_TIME_FACTOR';
+use test;
+EXPLAIN
+SELECT * FROM t1
+WHERE a > 21;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 5 Using index condition
+DROP TABLE t1;
diff --git a/mysql-test/t/costmodel.test b/mysql-test/t/costmodel.test
new file mode 100644
index 0000000..69b6976
--- /dev/null
+++ b/mysql-test/t/costmodel.test
@@ -0,0 +1,29 @@
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a int auto_increment primary key, b int);
+INSERT INTO t1(b) values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),
+(13),(14),(15),(16),(17),(18),(19),(20),(21),(22),(23),(24),(25);
+
+EXPLAIN
+ SELECT * FROM t1
+ WHERE a > 21;
+
+use mysql;
+
+UPDATE optimizer_cost_factors
+SET const_value=2.0
+WHERE const_name='SCAN_TIME_FACTOR';
+
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+
+use test;
+
+EXPLAIN
+ SELECT * FROM t1
+ WHERE a > 21;
+
+DROP TABLE t1;
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql
index 34742aa..44bd2a4 100644
--- a/scripts/mysql_system_tables.sql
+++ b/scripts/mysql_system_tables.sql
@@ -229,3 +229,7 @@ CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name
-- we avoid mixed-engine transactions.
set storage_engine=@orig_storage_engine;
CREATE TABLE IF NOT EXISTS gtid_slave_pos (domain_id INT UNSIGNED NOT NULL, sub_id BIGINT UNSIGNED NOT NULL, server_id INT UNSIGNED NOT NULL, seq_no BIGINT UNSIGNED NOT NULL, PRIMARY KEY (domain_id, sub_id)) comment='Replication slave GTID position';
+
+-- Tables for Self Tuning Cost Optimizer
+
+CREATE TABLE IF NOT EXISTS optimizer_cost_factors (const_name varchar(64) NOT NULL, const_value double NOT NULL, PRIMARY KEY (const_name) ) ENGINE=MyISAM CHARACTER SET latin1 COLLATE latin1_general_ci comment='Constants for optimizer';
diff --git a/scripts/mysql_system_tables_data.sql b/scripts/mysql_system_tables_data.sql
index 38e6a54..2bc41a8 100644
--- a/scripts/mysql_system_tables_data.sql
+++ b/scripts/mysql_system_tables_data.sql
@@ -53,3 +53,6 @@ INSERT INTO tmp_proxies_priv VALUES ('localhost', 'root', '', '', TRUE, '', now(
REPLACE INTO tmp_proxies_priv SELECT @current_hostname, 'root', '', '', TRUE, '', now() FROM DUAL WHERE @current_hostname != 'localhost';
INSERT INTO proxies_priv SELECT * FROM tmp_proxies_priv WHERE @had_proxies_priv_table=0;
DROP TABLE tmp_proxies_priv;
+
+INSERT INTO optimizer_cost_factors VALUES ('READ_TIME_FACTOR', 1.0);
+INSERT INTO optimizer_cost_factors VALUES ('SCAN_TIME_FACTOR', 1.0);
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 41e2701..380442b 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -86,7 +86,7 @@ SET (SQL_SOURCE
create_options.cc multi_range_read.cc
opt_index_cond_pushdown.cc opt_subselect.cc
opt_table_elimination.cc sql_expression_cache.cc
- gcalc_slicescan.cc gcalc_tools.cc
+ gcalc_slicescan.cc gcalc_tools.cc opt_costmodel.h opt_costmodel.cc
threadpool_common.cc
../sql-common/mysql_async.c
my_apc.cc my_apc.h
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 0d554df..9dc9eba 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -1372,7 +1372,7 @@ bool check_if_pq_applicable(Sort_param *param,
(PQ_slowness * num_rows + param->max_keys_per_buffer) *
log((double) param->max_keys_per_buffer) / TIME_FOR_COMPARE_ROWID;
const double pq_io_cost=
- param->max_rows * table->file->scan_time() / 2.0;
+ param->max_rows * table->file->ha_scan_time() / 2.0;
const double pq_cost= pq_cpu_cost + pq_io_cost;
if (sort_merge_cost < pq_cost)
diff --git a/sql/handler.h b/sql/handler.h
index 5bba570..c9ca0b3 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -33,6 +33,7 @@
#include "structs.h" /* SHOW_COMP_OPTION */
#include "sql_array.h" /* Dynamic_array<> */
#include "mdl.h"
+#include "opt_costmodel.h"
#include <my_compare.h>
#include <ft_global.h>
@@ -2740,6 +2741,8 @@ public:
table_share= share;
reset_statistics();
}
+
+protected:
virtual double scan_time()
{ return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
@@ -2757,6 +2760,13 @@ public:
virtual double read_time(uint index, uint ranges, ha_rows rows)
{ return rows2double(ranges+rows); }
+public:
+ double ha_scan_time()
+ { return Cost_factors::scan_factor() * scan_time(); }
+
+ double ha_read_time(uint index, uint ranges, ha_rows rows)
+ { return Cost_factors::read_factor() * read_time(index, ranges, rows); }
+
/**
Calculate cost of 'keyread' scan for given index and number of records.
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index 0c35ac5..41cc9e7 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1812,7 +1812,7 @@ void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
cost->reset();
if (table->file->primary_key_is_clustered())
{
- cost->io_count= table->file->read_time(table->s->primary_key,
+ cost->io_count= table->file->ha_read_time(table->s->primary_key,
(uint) nrows, nrows);
}
else
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 90d6659..e05f30b 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -51,6 +51,7 @@
#include "sql_manager.h" // stop_handle_manager, start_handle_manager
#include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit
#include "sys_vars_shared.h"
+#include "opt_costmodel.h"
#include <m_ctype.h>
#include <my_dir.h>
@@ -5412,6 +5413,8 @@ int mysqld_main(int argc, char **argv)
#endif
}
+ Cost_factors::init();
+
init_status_vars();
if (opt_bootstrap) /* If running with bootstrap, do not start replication. */
opt_skip_slave_start= 1;
diff --git a/sql/opt_costmodel.cc b/sql/opt_costmodel.cc
new file mode 100644
index 0000000..2e76dda
--- /dev/null
+++ b/sql/opt_costmodel.cc
@@ -0,0 +1,100 @@
+#include "sql_base.h"
+#include "key.h"
+#include "records.h"
+#include "opt_costmodel.h"
+
+/* Name of database to which the optimizer_cost_factors table belongs */
+static const LEX_STRING db_name= { C_STRING_WITH_LEN("mysql") };
+
+/* Name of all_constants table */
+static const LEX_STRING table_name = { C_STRING_WITH_LEN("optimizer_cost_factors") };
+
+/* Columns in the optimizer_cost_factors_table */
+enum cost_factors_col
+{
+ COST_FACTORS_CONST_NAME,
+ COST_FACTORS_CONST_VALUE
+};
+
+/* Name of the constants present in table */
+struct st_factor {
+ const char *name;
+ double *value;
+};
+
+st_factor factors[] = {{"READ_TIME_FACTOR", &Cost_factors::read_time_factor},
+ {"SCAN_TIME_FACTOR", &Cost_factors::scan_time_factor},
+ {0, 0}};
+
+/* Helper functions for Cost_factors::init() */
+
+static
+inline int open_table(THD *thd, TABLE_LIST *table,
+ Open_tables_backup *backup,
+ bool for_write)
+{
+ enum thr_lock_type lock_type_arg= for_write? TL_WRITE: TL_READ;
+ table->init_one_table(db_name.str, db_name.length, table_name.str,
+ table_name.length, table_name.str, lock_type_arg);
+ return open_system_tables_for_read(thd, table, backup);
+}
+
+/* Interface functions */
+
+void Cost_factors::init()
+{
+ TABLE_LIST table_list;
+ Open_tables_backup open_tables_backup;
+ READ_RECORD read_record_info;
+ TABLE *table;
+ MEM_ROOT mem;
+ init_sql_alloc(&mem, 1024, 0, MYF(0));
+ THD *new_thd = new THD;
+
+ if(!new_thd)
+ {
+ free_root(&mem, MYF(0));
+ DBUG_VOID_RETURN;
+ }
+
+ new_thd->thread_stack= (char *) &new_thd;
+ new_thd->store_globals();
+ new_thd->set_db(db_name.str, db_name.length);
+
+ if(open_table(new_thd, &table_list, &open_tables_backup, FALSE))
+ {
+ goto end;
+ }
+
+ table= table_list.table;
+ if(init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE))
+ {
+ goto end;
+ }
+
+ table->use_all_columns();
+ while (!read_record_info.read_record(&read_record_info))
+ {
+ char *const_name= get_field(&mem, table->field[COST_FACTORS_CONST_NAME]);
+ double const_value;
+ const_value= table->field[COST_FACTORS_CONST_VALUE]->val_real();
+ st_factor *f;
+ for(f=factors; f->name; f++)
+ {
+ if(strcasecmp(f->name, const_name) == 0)
+ {
+ *(f->value)= const_value;
+ break;
+ }
+ }
+
+ if(f->name == 0)
+ sql_print_warning("Invalid row in the optimizer_cost_factors_table: %s",
+ const_name);
+ }
+
+end:
+ close_mysql_tables(new_thd);
+ delete new_thd;
+ DBUG_VOID_RETURN;
+}
diff --git a/sql/opt_costmodel.h b/sql/opt_costmodel.h
new file mode 100644
index 0000000..147471b
--- /dev/null
+++ b/sql/opt_costmodel.h
@@ -0,0 +1,22 @@
+/* Interface to get constants */
+
+#ifndef SQL_OPT_COSTMODEL_INCLUDED
+#define SQL_OPT_COSTMODEL_INCLUDED
+
+namespace Cost_factors
+{
+ static double read_time_factor;
+ static double scan_time_factor;
+
+ void init();
+ inline double read_factor()
+ {
+ return read_time_factor;
+ }
+ inline double scan_factor()
+ {
+ return scan_time_factor;
+ }
+};
+
+#endif /* SQL_OPT_COSTMODEL_INCLUDED */
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index bfba74c..de78d85 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2961,7 +2961,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
if (!records)
records++; /* purecov: inspected */
scan_time= (double) records / TIME_FOR_COMPARE + 1;
- read_time= (double) head->file->scan_time() + scan_time + 1.1;
+ read_time= (double) head->file->ha_scan_time() + scan_time + 1.1;
if (head->force_index)
scan_time= read_time= DBL_MAX;
if (limit < records)
@@ -4980,7 +4980,7 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
We are using the primary key to find the rows.
Calculate the cost for this.
*/
- result= param->table->file->read_time(param->table->s->primary_key,
+ result= param->table->file->ha_read_time(param->table->s->primary_key,
(uint)records, records);
}
else
@@ -5298,7 +5298,7 @@ skip_to_ror_scan:
{
/* Ok, we have index_only cost, now get full rows scan cost */
cost= param->table->file->
- read_time(param->real_keynr[(*cur_child)->key_idx], 1,
+ ha_read_time(param->real_keynr[(*cur_child)->key_idx], 1,
(*cur_child)->records) +
rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
}
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b88aed1..e439fc9 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5888,7 +5888,7 @@ best_access_path(JOIN *join,
if (table->covering_keys.is_set(key))
tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
else
- tmp= table->file->read_time(key, 1,
+ tmp= table->file->ha_read_time(key, 1,
(ha_rows) MY_MIN(tmp,s->worst_seeks));
tmp*= record_count;
}
@@ -6053,7 +6053,7 @@ best_access_path(JOIN *join,
if (table->covering_keys.is_set(key))
tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
else
- tmp= table->file->read_time(key, 1,
+ tmp= table->file->ha_read_time(key, 1,
(ha_rows) MY_MIN(tmp,s->worst_seeks));
tmp*= record_count;
}
@@ -6186,7 +6186,7 @@ best_access_path(JOIN *join,
{
/* Estimate cost of reading table. */
if (s->table->force_index && !best_key) // index scan
- tmp= s->table->file->read_time(s->ref.key, 1, s->records);
+ tmp= s->table->file->ha_read_time(s->ref.key, 1, s->records);
else // table scan
tmp= s->scan_time();
@@ -11236,7 +11236,7 @@ double JOIN_TAB::scan_time()
else
{
found_records= records= table->stat_records();
- read_time= table->file->scan_time();
+ read_time= table->file->ha_scan_time();
/*
table->quick_condition_rows has already been set to
table->file->stats.records
@@ -24643,7 +24643,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
fanout*= join->best_positions[i].records_read; // fanout is always >= 1
}
else
- read_time= table->file->scan_time();
+ read_time= table->file->ha_scan_time();
/*
Calculate the selectivity of the ref_key for REF_ACCESS. For
@@ -24802,7 +24802,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
index entry.
*/
index_scan_time= select_limit/rec_per_key *
- MY_MIN(rec_per_key, table->file->scan_time());
+ MY_MIN(rec_per_key, table->file->ha_scan_time());
if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
index_scan_time < read_time)
{
Follow ups
References
-
Re: [GSoC] Introduction Mail
From: Anshu Avinash, 2014-05-25
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-26
-
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Roberto Spadim, 2014-05-27
-
Re: [GSoC] self-tuning optimizer
From: Anshu Avinash, 2014-06-08
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-06-09
-
Re: [GSoC] self-tuning optimizer
From: Anshu Avinash, 2014-06-10
-
Re: [GSoC] self-tuning optimizer
From: Sergei Golubchik, 2014-06-10