← Back to team overview

maria-developers team mailing list archive

MDEV-9332 Bug after upgrade to 10.1.10

 

Hi Sergei,

Please review a patch form MDEV-9332.

It does the following:

- Changes the behaviour of copy_if_not_alloced in case
from->Alloced_length==0 and from_length==0 to return "to" instead of "from",
  because "from" can point to a constant string.
- Rewrites the code to make it more readable (I hope), adding comments.

Thanks.
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index d95b506..90bc6b5 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -5365,14 +5365,17 @@ DROP TABLE t1;
 SET sql_mode=default;
 #
 # Bug#57687 crash when reporting duplicate group_key error and utf8
-# Make sure to modify this when Bug#58081 is fixed.
+# Bug#58081 Duplicate entry error when doing GROUP BY
+# MDEV-9332 Bug after upgrade to 10.1.10
 #
 SET NAMES utf8;
 CREATE TABLE t1 (a INT);
 INSERT INTO t1 VALUES (0), (0), (1), (0), (0);
 SELECT COUNT(*) FROM t1, t1 t2 
 GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size));
-ERROR 23000: Duplicate entry '107374182410737418241' for key 'group_key'
+COUNT(*)
+20
+5
 DROP TABLE t1;
 #
 # Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
diff --git a/mysql-test/r/func_str.result b/mysql-test/r/func_str.result
index 9bc8061..678b6d2 100644
--- a/mysql-test/r/func_str.result
+++ b/mysql-test/r/func_str.result
@@ -4571,5 +4571,17 @@ Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 18446744073709551615) and (format(`test`.`t1`.`a`,0) = '18,446,744,073,709,551,615'))
 DROP TABLE t1;
 #
+# Bug#58081 Duplicate entry error when doing GROUP BY
+# MDEV-9332 Bug after upgrade to 10.1.10
+#
+SET NAMES latin1;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (0),(0),(1),(0),(0);
+SELECT COUNT(*) FROM t1, t1 t2 GROUP BY INSERT('', t2.a, t1.a, @@global.max_binlog_size);
+COUNT(*)
+20
+5
+DROP TABLE t1;
+#
 # End of 10.1 tests
 #
diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
index 426985a..014194d 100644
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -1577,12 +1577,12 @@ SET NAMES utf8;
 
 --echo #
 --echo # Bug#57687 crash when reporting duplicate group_key error and utf8
---echo # Make sure to modify this when Bug#58081 is fixed.
+--echo # Bug#58081 Duplicate entry error when doing GROUP BY
+--echo # MDEV-9332 Bug after upgrade to 10.1.10
 --echo #
 SET NAMES utf8;
 CREATE TABLE t1 (a INT);
 INSERT INTO t1 VALUES (0), (0), (1), (0), (0);
---error ER_DUP_ENTRY
 SELECT COUNT(*) FROM t1, t1 t2 
 GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size));
 DROP TABLE t1;
diff --git a/mysql-test/t/func_str.test b/mysql-test/t/func_str.test
index 511f1f3..2645417 100644
--- a/mysql-test/t/func_str.test
+++ b/mysql-test/t/func_str.test
@@ -1774,5 +1774,15 @@ SELECT * FROM t1 WHERE a=18446744073709551615 AND FORMAT(a,0)='18,446,744,073,70
 DROP TABLE t1;
 
 --echo #
+--echo # Bug#58081 Duplicate entry error when doing GROUP BY
+--echo # MDEV-9332 Bug after upgrade to 10.1.10
+--echo #
+SET NAMES latin1;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (0),(0),(1),(0),(0);
+SELECT COUNT(*) FROM t1, t1 t2 GROUP BY INSERT('', t2.a, t1.a, @@global.max_binlog_size);
+DROP TABLE t1;
+
+--echo #
 --echo # End of 10.1 tests
 --echo #
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index b14c3af..606e71d 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -789,21 +789,117 @@ int stringcmp(const String *s,const String *t)
 }
 
 
+/**
+  Return a string which has the same value with "from" and
+  which is safe to modify, trying to avoid unnecessary allocation
+  and copying when possible.
+
+  @param to           Buffer. Must not be a constant string.
+  @param from         Some existing value. We'll try to reuse it.
+                      Can be a constant or a variable string.
+  @param from_length  The total size that will be possibly needed.
+                      Note, can be 0.
+
+  Note, in some cases "from" and "to" can point to the same object.
+
+  If "from" is a variable string and its allocated memory is enough
+  to store "from_length" bytes, then "from" is returned as is.
+
+  If "from" is a variable string and its allocated memory is not enough
+  to store "from_length" bytes, then then "from" is reallocated and returned.
+
+  Otherwise (if "from" is a constant string, or looks like a constant string),
+  then "to" is reallocated to fit "from_length" bytes, the value is copied
+  from "from" to "to", then "to" is returned.
+*/
 String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
 {
-  if (from->Alloced_length >= from_length)
-    return from;
-  if ((from->alloced && (from->Alloced_length != 0)) || !to || from == to)
+  DBUG_ASSERT(to);
+  /*
+    If "from" is a constant string, e.g.:
+       SELECT INSERT('', <pos>, <length>, <replacement>);
+    we should not return it. See MDEV-9332.
+
+    The code below detects different string types:
+
+    a. All constant strings have Alloced_length==0 and alloced==false.
+       They point to a static memory array, or a mem_root memory,
+       and should stay untouched until the end of their life cycle.
+       Not safe to reuse.
+
+    b. Some variable string have Alloced_length==0 and alloced==false initially,
+       they are not bound to any char array and allocate space of the first use
+       (and become #d). A typical example of such String is Item::str_value.
+       This type of string could be reused, but there is no a way to distinguish
+       them from the true constant strings (#a).
+
+    c. Some variable strings have Alloced_length>0 and alloced==false.
+       They point to a fixed length writtable char array initially but can
+       later allocate more space on the heap when the array is too small
+       (these strings become #d after allocation).
+       Safe to reuse.
+
+    d. Some variable strings have Alloced_length>0 and alloced==true.
+       They already store data on the heap.
+       Safe to reuse.
+
+    e. Some strings can have Alloced_length==0 and alloced==true.
+       This type of strings allocate space on the heap, but then are marked
+       as constant strings using String::mark_as_const().
+       A typical example - the result of a character set conversion
+       of a constant string.
+       Not safe to reuse.
+  */
+  if (from->Alloced_length > 0) // "from" is  #c or #d (not a constant)
   {
-    (void) from->realloc(from_length);
-    return from;
+    if (from->Alloced_length >= from_length)
+      return from; // #c or #d (large enough to store from_length bytes)
+
+    if (from->alloced)
+    {
+      (void) from->realloc(from_length);
+      return from; // #d (reallocated to fit from_length bytes)
+    }
+    /*
+      "from" is of type #c. It currently points to a writtable char array
+      (typically on stack), but is too small for "from_length" bytes.
+      We need to reallocate either "from" or "to".
+
+      "from" typically points to a temporary buffer inside Item_xxx::val_str(),
+      or to Item::str_value, and thus is "less permanent" than "to".
+
+      Reallocating "to" may give more benifits:
+      - "to" can point to a more permanent storage and can be reused
+        for multiple rows, e.g. str_buffer in Protocol::send_result_set_row(),
+        which is passed to val_str() for all string type rows.
+      - "from" can stay pointing to its original fixed length stack char array,
+        and thus reduce the total amount of my_alloc/my_free.
+    */
+  }
+
+  if (from == to)
+  {
+    /*
+      Possible string types:
+      #a  not possible (constants should not be passed as "to")
+      #b  possible     (a fresh variable with no associated char buffer)
+      #c  possible     (a variable with a char buffer,
+                        in case it's smaller than fixed_length)
+      #d  not possible (handled earlier)
+      #e  not possible (constants should not be passed as "to")
+
+      If a string of types #a or #e appears here, that means the caller made
+      something wrong. Otherwise, it's safe to reallocate and return "to".
+    */
+    (void) to->realloc(from_length);
+    return to;
   }
   if (to->realloc(from_length))
     return from;				// Actually an error
   if ((to->str_length=MY_MIN(from->str_length,from_length)))
     memcpy(to->Ptr,from->Ptr,to->str_length);
   to->str_charset=from->str_charset;
-  return to;
+  return to; // "from" was of types #a, #b, #e, or small #c.
 }
 
 

Follow ups