← Back to team overview

maria-developers team mailing list archive

7c088b624f4: MDEV-14500 filesort to support engines with slow rnd_pos

 

revision-id: 7c088b624f4f50e1e9f11891c371eba912228a2d (mariadb-10.4.0-14-g7c088b624f4)
parent(s): da8a589a982c45ed161b9a25d61282a6bc1fa037
author: Sergei Golubchik <serg@xxxxxxxxxxx>
committer: Sergei Golubchik <serg@xxxxxxxxxxx>
timestamp: 2018-11-17 14:13:37 +0100
message:

MDEV-14500 filesort to support engines with slow rnd_pos

If the engine wants to avoid rnd_pos() - force a temporary table
before a filesort. But don't do it if addon_fields are used.

---
 mysql-test/suite/archive/rnd_pos.result | 56 +++++++++++++++++++++++++++++++++
 mysql-test/suite/archive/rnd_pos.test   | 27 ++++++++++++++++
 sql/filesort.cc                         | 55 ++++++++++++++++++--------------
 sql/filesort.h                          |  3 ++
 sql/handler.h                           |  7 +++++
 sql/sql_select.cc                       | 15 +++++++++
 storage/archive/ha_archive.h            |  2 +-
 storage/csv/ha_tina.h                   |  2 +-
 8 files changed, 142 insertions(+), 25 deletions(-)

diff --git a/mysql-test/suite/archive/rnd_pos.result b/mysql-test/suite/archive/rnd_pos.result
new file mode 100644
index 00000000000..b6b6748d53f
--- /dev/null
+++ b/mysql-test/suite/archive/rnd_pos.result
@@ -0,0 +1,56 @@
+create table t1(c1 int not null, c2 double not null, c3 char(255) not null) engine=archive;
+insert t1 select seq, seq+0.7, concat('row with c1 = ', seq) from seq_1_to_10;
+explain partitions select c1,c3 from t1 order by c2;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	NULL	ALL	NULL	NULL	NULL	NULL	10	Using filesort
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	NULL	ALL	NULL	NULL	NULL	NULL	10	Using temporary; Using filesort
+flush status;
+select c1,c3 from t1 order by c2;
+c1	c3
+1	row with c1 = 1
+2	row with c1 = 2
+3	row with c1 = 3
+4	row with c1 = 4
+5	row with c1 = 5
+6	row with c1 = 6
+7	row with c1 = 7
+8	row with c1 = 8
+9	row with c1 = 9
+10	row with c1 = 10
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+Variable_name	Value
+Created_tmp_tables	1
+Handler_tmp_write	10
+Rows_tmp_read	20
+alter table t1 partition by hash (c1) partitions 3;
+explain partitions select c1,c3 from t1 order by c2;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	p0,p1,p2	ALL	NULL	NULL	NULL	NULL	10	Using filesort
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	p0,p1,p2	ALL	NULL	NULL	NULL	NULL	10	Using temporary; Using filesort
+flush status;
+select c1,c3 from t1 order by c2;
+c1	c3
+1	row with c1 = 1
+2	row with c1 = 2
+3	row with c1 = 3
+4	row with c1 = 4
+5	row with c1 = 5
+6	row with c1 = 6
+7	row with c1 = 7
+8	row with c1 = 8
+9	row with c1 = 9
+10	row with c1 = 10
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+Variable_name	Value
+Created_tmp_tables	1
+Handler_tmp_write	10
+Rows_tmp_read	20
+drop table t1;
diff --git a/mysql-test/suite/archive/rnd_pos.test b/mysql-test/suite/archive/rnd_pos.test
new file mode 100644
index 00000000000..7a1d78ea003
--- /dev/null
+++ b/mysql-test/suite/archive/rnd_pos.test
@@ -0,0 +1,27 @@
+#
+# MDEV-14500 Support engines without rnd_pos
+#
+source include/have_archive.inc;
+source include/have_sequence.inc;
+source include/have_partition.inc;
+
+create table t1(c1 int not null, c2 double not null, c3 char(255) not null) engine=archive;
+insert t1 select seq, seq+0.7, concat('row with c1 = ', seq) from seq_1_to_10;
+explain partitions select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+flush status;
+select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+
+alter table t1 partition by hash (c1) partitions 3;
+explain partitions select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = 4;
+explain partitions select c1,c3 from t1 order by c2;
+flush status;
+select c1,c3 from t1 order by c2;
+set max_length_for_sort_data = default;
+show status where variable_name like '%tmp%' and value != 0;
+
+drop table t1;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index e682f3389da..cbe79967647 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -1957,6 +1957,30 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
   return length;
 }
 
+bool filesort_use_addons(TABLE *table, uint sortlength,
+                         uint *length, uint *fields, uint *null_fields)
+{
+  Field **pfield, *field;
+  *length= *fields= *null_fields= 0;
+
+  for (pfield= table->field; (field= *pfield) ; pfield++)
+  {
+    if (!bitmap_is_set(table->read_set, field->field_index))
+      continue;
+    if (field->flags & BLOB_FLAG)
+      return false;
+    (*length)+= field->max_packed_col_length(field->pack_length());
+    if (field->maybe_null())
+      (*null_fields)++;
+    (*fields)++;
+  }
+  if (!*fields)
+    return false;
+  (*length)+= (*null_fields+7)/8;
+
+  return *length + sortlength <
+         table->in_use->variables.max_length_for_sort_data;
+}
 
 /**
   Get descriptors of fields appended to sorted fields and
@@ -1991,11 +2015,8 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf)
   Field **pfield;
   Field *field;
   SORT_ADDON_FIELD *addonf;
-  uint length= 0;
-  uint fields= 0;
-  uint null_fields= 0;
+  uint length, fields, null_fields;
   MY_BITMAP *read_set= table->read_set;
-  ulong max_sort_len= table->in_use->variables.max_length_for_sort_data;
   DBUG_ENTER("get_addon_fields");
 
   /*
@@ -2011,26 +2032,14 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf)
   addon_buf->str= 0;
   addon_buf->length= 0;
 
-  for (pfield= table->field; (field= *pfield) ; pfield++)
-  {
-    if (!bitmap_is_set(read_set, field->field_index))
-      continue;
-    if (field->flags & BLOB_FLAG)
-      DBUG_RETURN(0);
-    length+= field->max_packed_col_length(field->pack_length());
-    if (field->maybe_null())
-      null_fields++;
-    fields++;
-  }
-  if (!fields)
-    DBUG_RETURN(0);
-  length+= (null_fields+7)/8;
+  // see remove_const() for HA_SLOW_RND_POS explanation
+  if (table->file->ha_table_flags() & HA_SLOW_RND_POS)
+    sortlength= 0;
 
-  if (length+sortlength > max_sort_len ||
-      !my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC),
-                       &addonf, sizeof(SORT_ADDON_FIELD) * (fields+1),
-                       &addon_buf->str, length,
-                       NullS))
+  if (!filesort_use_addons(table, sortlength, &length, &fields, &null_fields) ||
+      !my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC), &addonf,
+                       sizeof(SORT_ADDON_FIELD) * (fields+1),
+                       &addon_buf->str, length, NullS))
 
     DBUG_RETURN(0);
 
diff --git a/sql/filesort.h b/sql/filesort.h
index bd1d81f91ef..359f44a3907 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -161,6 +161,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
                     Filesort_tracker* tracker, JOIN *join=NULL,
                     table_map first_table_bit=0);
 
+bool filesort_use_addons(TABLE *table, uint sortlength,
+                         uint *length, uint *fields, uint *null_fields);
+
 void change_double_for_sort(double nr,uchar *to);
 
 #endif /* FILESORT_INCLUDED */
diff --git a/sql/handler.h b/sql/handler.h
index b98244fe47e..ea514634b86 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -308,6 +308,13 @@ enum enum_alter_inplace_result {
 #define HA_SLOW_CMP_REF         (1ULL << 54)
 #define HA_CMP_REF_IS_EXPENSIVE HA_SLOW_CMP_REF
 
+/**
+  Some engines are unable to provide an efficient implementation for rnd_pos().
+  Server will try to avoid it, if possible
+
+  TODO better to do it with cost estimates, not with an explicit flag
+*/
+#define HA_SLOW_RND_POS  (1ULL << 55)
 
 /* bits in index_flags(index_number) for what you can do with index */
 #define HA_READ_NEXT            1       /* TODO really use this flag */
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 3f5def0838e..c16521068c6 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -13181,6 +13181,21 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
 
     JOIN_TAB *head= join->join_tab + join->const_tables;
     *simple_order= head->on_expr_ref[0] == NULL;
+    if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS)
+    {
+      uint u1, u2, u3;
+      /*
+        normally the condition is (see filesort_use_addons())
+
+          length + sortlength <= max_length_for_sort_data
+
+        but for HA_SLOW_RND_POS tables we relax it a bit, as the alternative
+        is to use a temporary table, which is rather expensive.
+
+        TODO proper cost estimations
+      */
+      *simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3);
+    }
   }
   else
   {
diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h
index 56ff566db8c..1f25fba4eed 100644
--- a/storage/archive/ha_archive.h
+++ b/storage/archive/ha_archive.h
@@ -108,7 +108,7 @@ class ha_archive: public handler
     return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_CAN_BIT_FIELD |
             HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
             HA_STATS_RECORDS_IS_EXACT | HA_CAN_EXPORT |
-            HA_HAS_RECORDS | HA_CAN_REPAIR |
+            HA_HAS_RECORDS | HA_CAN_REPAIR | HA_SLOW_RND_POS |
             HA_FILE_BASED | HA_CAN_INSERT_DELAYED | HA_CAN_GEOMETRY);
   }
   ulong index_flags(uint idx, uint part, bool all_parts) const
diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h
index c75a64faa52..5b389d984d6 100644
--- a/storage/csv/ha_tina.h
+++ b/storage/csv/ha_tina.h
@@ -107,7 +107,7 @@ class ha_tina: public handler
   {
     return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT |
             HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_CAN_EXPORT |
-            HA_CAN_REPAIR);
+            HA_CAN_REPAIR | HA_SLOW_RND_POS);
   }
   ulong index_flags(uint idx, uint part, bool all_parts) const
   {