maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #11522
7c088b624f4: MDEV-14500 filesort to support engines with slow rnd_pos
revision-id: 7c088b624f4f50e1e9f11891c371eba912228a2d (mariadb-10.4.0-14-g7c088b624f4)
parent(s): da8a589a982c45ed161b9a25d61282a6bc1fa037
author: Sergei Golubchik <serg@xxxxxxxxxxx>
committer: Sergei Golubchik <serg@xxxxxxxxxxx>
timestamp: 2018-11-17 14:13:37 +0100
message:
MDEV-14500 filesort to support engines with slow rnd_pos
If the engine wants to avoid rnd_pos() - force a temporary table
before a filesort. But don't do it if addon_fields are used.
---
mysql-test/suite/archive/rnd_pos.result | 56 +++++++++++++++++++++++++++++++++
mysql-test/suite/archive/rnd_pos.test | 27 ++++++++++++++++
sql/filesort.cc | 55 ++++++++++++++++++--------------
sql/filesort.h | 3 ++
sql/handler.h | 7 +++++
sql/sql_select.cc | 15 +++++++++
storage/archive/ha_archive.h | 2 +-
storage/csv/ha_tina.h | 2 +-
8 files changed, 142 insertions(+), 25 deletions(-)
diff --git a/mysql-test/suite/archive/rnd_pos.result b/mysql-test/suite/archive/rnd_pos.result
new file mode 100644
index 00000000000..b6b6748d53f
--- /dev/null
+++ b/mysql-test/suite/archive/rnd_pos.result
@@ -0,0 +1,56 @@
+create table t1(c1 int not null, c2 double not null, c3 char(255) not null) engine=archive;
+insert t1 select seq, seq+0.7, concat('row with c1 = ', seq) from seq_1_to_10;
+explain partitions select c1,c3 from t1 order by c2;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 NULL ALL NULL NULL NULL NULL 10 Using filesort
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 NULL ALL NULL NULL NULL NULL 10 Using temporary; Using filesort
+flush status;
+select c1,c3 from t1 order by c2;
+c1 c3
+1 row with c1 = 1
+2 row with c1 = 2
+3 row with c1 = 3
+4 row with c1 = 4
+5 row with c1 = 5
+6 row with c1 = 6
+7 row with c1 = 7
+8 row with c1 = 8
+9 row with c1 = 9
+10 row with c1 = 10
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+Variable_name Value
+Created_tmp_tables 1
+Handler_tmp_write 10
+Rows_tmp_read 20
+alter table t1 partition by hash (c1) partitions 3;
+explain partitions select c1,c3 from t1 order by c2;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 p0,p1,p2 ALL NULL NULL NULL NULL 10 Using filesort
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 p0,p1,p2 ALL NULL NULL NULL NULL 10 Using temporary; Using filesort
+flush status;
+select c1,c3 from t1 order by c2;
+c1 c3
+1 row with c1 = 1
+2 row with c1 = 2
+3 row with c1 = 3
+4 row with c1 = 4
+5 row with c1 = 5
+6 row with c1 = 6
+7 row with c1 = 7
+8 row with c1 = 8
+9 row with c1 = 9
+10 row with c1 = 10
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+Variable_name Value
+Created_tmp_tables 1
+Handler_tmp_write 10
+Rows_tmp_read 20
+drop table t1;
diff --git a/mysql-test/suite/archive/rnd_pos.test b/mysql-test/suite/archive/rnd_pos.test
new file mode 100644
index 00000000000..7a1d78ea003
--- /dev/null
+++ b/mysql-test/suite/archive/rnd_pos.test
@@ -0,0 +1,27 @@
+#
+# MDEV-14500 Support engines without rnd_pos
+#
+source include/have_archive.inc;
+source include/have_sequence.inc;
+source include/have_partition.inc;
+
+create table t1(c1 int not null, c2 double not null, c3 char(255) not null) engine=archive;
+insert t1 select seq, seq+0.7, concat('row with c1 = ', seq) from seq_1_to_10;
+explain partitions select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+flush status;
+select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+
+alter table t1 partition by hash (c1) partitions 3;
+explain partitions select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+flush status;
+select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+
+drop table t1;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index e682f3389da..cbe79967647 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -1957,6 +1957,30 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
return length;
}
+bool filesort_use_addons(TABLE *table, uint sortlength,
+ uint *length, uint *fields, uint *null_fields)
+{
+ Field **pfield, *field;
+ *length= *fields= *null_fields= 0;
+
+ for (pfield= table->field; (field= *pfield) ; pfield++)
+ {
+ if (!bitmap_is_set(table->read_set, field->field_index))
+ continue;
+ if (field->flags & BLOB_FLAG)
+ return false;
+ (*length)+= field->max_packed_col_length(field->pack_length());
+ if (field->maybe_null())
+ (*null_fields)++;
+ (*fields)++;
+ }
+ if (!*fields)
+ return false;
+ (*length)+= (*null_fields+7)/8;
+
+ return *length + sortlength <
+ table->in_use->variables.max_length_for_sort_data;
+}
/**
Get descriptors of fields appended to sorted fields and
@@ -1991,11 +2015,8 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf)
Field **pfield;
Field *field;
SORT_ADDON_FIELD *addonf;
- uint length= 0;
- uint fields= 0;
- uint null_fields= 0;
+ uint length, fields, null_fields;
MY_BITMAP *read_set= table->read_set;
- ulong max_sort_len= table->in_use->variables.max_length_for_sort_data;
DBUG_ENTER("get_addon_fields");
/*
@@ -2011,26 +2032,14 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf)
addon_buf->str= 0;
addon_buf->length= 0;
- for (pfield= table->field; (field= *pfield) ; pfield++)
- {
- if (!bitmap_is_set(read_set, field->field_index))
- continue;
- if (field->flags & BLOB_FLAG)
- DBUG_RETURN(0);
- length+= field->max_packed_col_length(field->pack_length());
- if (field->maybe_null())
- null_fields++;
- fields++;
- }
- if (!fields)
- DBUG_RETURN(0);
- length+= (null_fields+7)/8;
+ // see remove_const() for HA_SLOW_RND_POS explanation
+ if (table->file->ha_table_flags() & HA_SLOW_RND_POS)
+ sortlength= 0;
- if (length+sortlength > max_sort_len ||
- !my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC),
- &addonf, sizeof(SORT_ADDON_FIELD) * (fields+1),
- &addon_buf->str, length,
- NullS))
+ if (!filesort_use_addons(table, sortlength, &length, &fields, &null_fields) ||
+ !my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC), &addonf,
+ sizeof(SORT_ADDON_FIELD) * (fields+1),
+ &addon_buf->str, length, NullS))
DBUG_RETURN(0);
diff --git a/sql/filesort.h b/sql/filesort.h
index bd1d81f91ef..359f44a3907 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -161,6 +161,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
Filesort_tracker* tracker, JOIN *join=NULL,
table_map first_table_bit=0);
+bool filesort_use_addons(TABLE *table, uint sortlength,
+ uint *length, uint *fields, uint *null_fields);
+
void change_double_for_sort(double nr,uchar *to);
#endif /* FILESORT_INCLUDED */
diff --git a/sql/handler.h b/sql/handler.h
index b98244fe47e..ea514634b86 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -308,6 +308,13 @@ enum enum_alter_inplace_result {
#define HA_SLOW_CMP_REF (1ULL << 54)
#define HA_CMP_REF_IS_EXPENSIVE HA_SLOW_CMP_REF
+/**
+ Some engines are unable to provide an efficient implementation for rnd_pos().
+ Server will try to avoid it, if possible
+
+ TODO better to do it with cost estimates, not with an explicit flag
+*/
+#define HA_SLOW_RND_POS (1ULL << 55)
/* bits in index_flags(index_number) for what you can do with index */
#define HA_READ_NEXT 1 /* TODO really use this flag */
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 3f5def0838e..c16521068c6 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -13181,6 +13181,21 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
JOIN_TAB *head= join->join_tab + join->const_tables;
*simple_order= head->on_expr_ref[0] == NULL;
+ if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS)
+ {
+ uint u1, u2, u3;
+ /*
+ normally the condition is (see filesort_use_addons())
+
+ length + sortlength <= max_length_for_sort_data
+
+ but for HA_SLOW_RND_POS tables we relax it a bit, as the alternative
+ is to use a temporary table, which is rather expensive.
+
+ TODO proper cost estimations
+ */
+ *simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3);
+ }
}
else
{
diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h
index 56ff566db8c..1f25fba4eed 100644
--- a/storage/archive/ha_archive.h
+++ b/storage/archive/ha_archive.h
@@ -108,7 +108,7 @@ class ha_archive: public handler
return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_CAN_BIT_FIELD |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
HA_STATS_RECORDS_IS_EXACT | HA_CAN_EXPORT |
- HA_HAS_RECORDS | HA_CAN_REPAIR |
+ HA_HAS_RECORDS | HA_CAN_REPAIR | HA_SLOW_RND_POS |
HA_FILE_BASED | HA_CAN_INSERT_DELAYED | HA_CAN_GEOMETRY);
}
ulong index_flags(uint idx, uint part, bool all_parts) const
diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h
index c75a64faa52..5b389d984d6 100644
--- a/storage/csv/ha_tina.h
+++ b/storage/csv/ha_tina.h
@@ -107,7 +107,7 @@ class ha_tina: public handler
{
return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_CAN_EXPORT |
- HA_CAN_REPAIR);
+ HA_CAN_REPAIR | HA_SLOW_RND_POS);
}
ulong index_flags(uint idx, uint part, bool all_parts) const
{