← Back to team overview

maria-developers team mailing list archive

Re: MDEV-13790 UNHEX() of a somewhat complicated CONCAT() returns NULL

 

Hi Sergei,

Thanks for review. Comments follow inline:

On 01/28/2018 07:22 PM, Sergei Golubchik wrote:
Hi, Alexander!

On Nov 03, Alexander Barkov wrote:
Hello Sergei,

Please review a patch for MDEV-13790.

It removes the attempt to search the best suitable buffer
and just returns the result in "str" passed to val_str().

I'm quite sure this optimization gives nothing.
In can be useful only under very rare conditions,
but generates so many bugs.

There're bunch of tricks. What do they all do, could you list them?

One handles the case when an argument is a substring of the already
concatenated string - this is very weird and rare, I agree.

Other accumulates the result in the first argument's result - this
avoids one memcpy per row. Doesn't sound like much, but it's not rare,
so should be very easy to benchmark.

In my patch the first argument goes directly to "str"
parameter of Item_func_concat::val_str().
There should not be an extra memcpy here.


Yet another doubles the buffer size every time - this might be useful to
keep, it easily fits in your new code.

As discussed on IRC, I kept this code.
Please find a new version attached.


What are other tricks that concat was using?

Btw, one comment about your patch — please don't call current_thd in a
loop.

Fixed.


Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx

diff --git a/mysql-test/r/func_concat.result b/mysql-test/r/func_concat.result
index b87ee7bfc52..9ab6f74653e 100644
--- a/mysql-test/r/func_concat.result
+++ b/mysql-test/r/func_concat.result
@@ -262,3 +262,9 @@ c2
 abcdefghi-abcdefghi
 DROP TABLE t1;
 SET optimizer_switch=@save_optimizer_switch;
+#
+# MDEV-13790 UNHEX() of a somewhat complicated CONCAT() returns NULL
+#
+SELECT UNHEX(CONCAT('414C2', HEX(8 + ROUND(RAND()*7)), SUBSTR(SHA(UUID()),6,33),HEX(2+ROUND(RAND()*8)))) IS NULL AS c1;
+c1
+0
diff --git a/mysql-test/t/func_concat.test b/mysql-test/t/func_concat.test
index be573f494a2..69dd2c4063e 100644
--- a/mysql-test/t/func_concat.test
+++ b/mysql-test/t/func_concat.test
@@ -236,3 +236,9 @@ SELECT CONCAT(t2,'-',t2) c2 FROM (SELECT TRIM(t) t2 FROM t1) sub;
 DROP TABLE t1;
 
 SET optimizer_switch=@save_optimizer_switch;
+
+--echo #
+--echo # MDEV-13790 UNHEX() of a somewhat complicated CONCAT() returns NULL
+--echo #
+
+SELECT UNHEX(CONCAT('414C2', HEX(8 + ROUND(RAND()*7)), SUBSTR(SHA(UUID()),6,33),HEX(2+ROUND(RAND()*8)))) IS NULL AS c1;
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index c0d60d7ef8b..378f23f3109 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -630,138 +630,84 @@ String *Item_func_decode_histogram::val_str(String *str)
 
 ///////////////////////////////////////////////////////////////////////////////
 
+/*
+  Realloc the result buffer.
+  NOTE: We should be prudent in the initial allocation unit -- the
+  size of the arguments is a function of data distribution, which
+  can be any. Instead of overcommitting at the first row, we grow
+  the allocated amount by the factor of 2. This ensures that no
+  more than 25% of memory will be overcommitted on average.
+
+  @param IN/OUT str    - the result string
+  @param IN     length - new total space required in "str"
+  @retval       false  - on success
+  @retval       true   - on error
+*/
+
+bool Item_func_concat::realloc_result(String *str, uint length) const
+{
+  if (str->alloced_length() >= length)
+    return false; // Alloced space is big enough, nothing to do.
+
+  if (str->alloced_length() == 0)
+    return str->alloc(length);
+
+  /*
+    Item_func_concat::val_str() makes sure the result length does not grow
+    higher than max_allowed_packet. So "length" is limited to 1G here.
+    We can't say anything about the current value of str->alloced_length(),
+    as str was initially set by args[0]->val_str(str).
+    So multiplication by 2 can overflow, if args[0] for some reasons
+    did not limit the result to max_alloced_packet. But it's not harmful,
+    "str" will be realloced exactly to "length" bytes in case of overflow.
+  */
+  uint new_length= MY_MAX(str->alloced_length() * 2, length);
+  return str->realloc(new_length);
+}
+
+
 /**
   Concatenate args with the following premises:
   If only one arg (which is ok), return value of arg;
-  Don't reallocate val_str() if not absolute necessary.
 */
 
 String *Item_func_concat::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
-  String *res,*res2,*use_as_buff;
-  uint i;
-  bool is_const= 0;
+  THD *thd= current_thd;
+  String *res;
 
   null_value=0;
-  if (!(res=args[0]->val_str(str)))
+  if (!(res= args[0]->val_str(str)))
     goto null;
-  use_as_buff= &tmp_value;
-  /* Item_subselect in --ps-protocol mode will state it as a non-const */
-  is_const= args[0]->const_item() || !args[0]->used_tables();
-  for (i=1 ; i < arg_count ; i++)
-  {
-    if (res->length() == 0)
-    {
-      if (!(res=args[i]->val_str(str)))
-	goto null;
-      /*
-       CONCAT accumulates its result in the result of its the first
-       non-empty argument. Because of this we need is_const to be 
-       evaluated only for it.
-      */
-      is_const= args[i]->const_item() || !args[i]->used_tables();
-    }
-    else
-    {
-      if (!(res2=args[i]->val_str(use_as_buff)))
-	goto null;
-      if (res2->length() == 0)
-	continue;
-      if (res->length()+res2->length() >
-	  current_thd->variables.max_allowed_packet)
-      {
-	push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
-			    ER_WARN_ALLOWED_PACKET_OVERFLOWED,
-			    ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED), func_name(),
-			    current_thd->variables.max_allowed_packet);
-	goto null;
-      }
-      if (!is_const && res->alloced_length() >= res->length()+res2->length())
-      {						// Use old buffer
-	res->append(*res2);
-      }
-      else if (str->alloced_length() >= res->length()+res2->length())
-      {
-	if (str->ptr() == res2->ptr())
-	  str->replace(0,0,*res);
-	else
-	{
-	  str->copy(*res);
-	  str->append(*res2);
-	}
-        res= str;
-        use_as_buff= &tmp_value;
-      }
-      else if (res == &tmp_value)
-      {
-	if (res->append(*res2))			// Must be a blob
-	  goto null;
-      }
-      else if (res2 == &tmp_value)
-      {						// This can happend only 1 time
-	if (tmp_value.replace(0,0,*res))
-	  goto null;
-	res= &tmp_value;
-	use_as_buff=str;			// Put next arg here
-      }
-      else if (tmp_value.is_alloced() && res2->ptr() >= tmp_value.ptr() &&
-	       res2->ptr() <= tmp_value.ptr() + tmp_value.alloced_length())
-      {
-	/*
-	  This happens really seldom:
-	  In this case res2 is sub string of tmp_value.  We will
-	  now work in place in tmp_value to set it to res | res2
-	*/
-	/* Chop the last characters in tmp_value that isn't in res2 */
-	tmp_value.length((uint32) (res2->ptr() - tmp_value.ptr()) +
-			 res2->length());
-	/* Place res2 at start of tmp_value, remove chars before res2 */
-	if (tmp_value.replace(0,(uint32) (res2->ptr() - tmp_value.ptr()),
-			      *res))
-	  goto null;
-	res= &tmp_value;
-	use_as_buff=str;			// Put next arg here
-      }
-      else
-      {						// Two big const strings
-        /*
-          NOTE: We should be prudent in the initial allocation unit -- the
-          size of the arguments is a function of data distribution, which
-          can be any. Instead of overcommitting at the first row, we grow
-          the allocated amount by the factor of 2. This ensures that no
-          more than 25% of memory will be overcommitted on average.
-        */
-
-        uint concat_len= res->length() + res2->length();
-
-        if (tmp_value.alloced_length() < concat_len)
-        {
-          if (tmp_value.alloced_length() == 0)
-          {
-            if (tmp_value.alloc(concat_len))
-              goto null;
-          }
-          else
-          {
-            uint new_len = MY_MAX(tmp_value.alloced_length() * 2, concat_len);
 
-            if (tmp_value.realloc(new_len))
-              goto null;
-          }
-        }
+  if (res != str)
+    str->copy(res->ptr(), res->length(), res->charset());
 
-	if (tmp_value.copy(*res) || tmp_value.append(*res2))
-	  goto null;
-
-	res= &tmp_value;
-	use_as_buff=str;
-      }
-      is_const= 0;
+  for (uint i= 1 ; i < arg_count ; i++)
+  {
+    uint concat_len;
+    if (!(res= args[i]->val_str(&tmp_value)))
+      goto null;
+   if (res->length() == 0)
+     continue;
+    if ((concat_len= str->length() + res->length()) >
+        thd->variables.max_allowed_packet)
+    {
+      push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+                          ER_WARN_ALLOWED_PACKET_OVERFLOWED,
+                          ER(ER_WARN_ALLOWED_PACKET_OVERFLOWED), func_name(),
+                          thd->variables.max_allowed_packet);
+      goto null;
     }
+    DBUG_ASSERT(!res->uses_buffer_owned_by(str));
+    DBUG_ASSERT(!str->uses_buffer_owned_by(res));
+    if (realloc_result(str, concat_len) || str->append(*res))
+      goto null;
   }
-  res->set_charset(collation.collation);
-  return res;
+
+  str->set_charset(collation.collation);
+  return str;
 
 null:
   null_value=1;
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index aa3486d4e73..12c348aa9fa 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -156,6 +156,7 @@ class Item_func_aes_decrypt :public Item_str_func
 class Item_func_concat :public Item_str_func
 {
   String tmp_value;
+  bool realloc_result(String *str, uint length) const;
 public:
   Item_func_concat(List<Item> &list) :Item_str_func(list) {}
   Item_func_concat(Item *a,Item *b) :Item_str_func(a,b) {}

Follow ups

References