← Back to team overview

maria-developers team mailing list archive

Please review MDEV-7055 MySQL#74664 - InnoDB: Failing assertion ...

 

Hi Sergei,

Please review a patch for MDEV-7055.

Thanks.
diff --git a/mysql-test/include/ctype_date_format.inc b/mysql-test/include/ctype_date_format.inc
new file mode 100644
index 0000000..d836090
--- /dev/null
+++ b/mysql-test/include/ctype_date_format.inc
@@ -0,0 +1,14 @@
+--echo #
+--echo # MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845
+--echo #
+SELECT HEX(date_format('0001-01-01', '%Y'));
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f'));
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'));
+SELECT date_format('2001-01-01','%W rubb ish %w');
+SELECT date_format('2001-01-01','%W rubb ish %');
+
+CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (date_format('2001-01-01','%W'));
+SELECT * FROM t1;
+DROP TABLE IF EXISTS t1;
diff --git a/mysql-test/r/ctype_gbk.result b/mysql-test/r/ctype_gbk.result
index 9da3cf9..0c593bc 100644
--- a/mysql-test/r/ctype_gbk.result
+++ b/mysql-test/r/ctype_gbk.result
@@ -621,5 +621,14 @@ A8BD	Å„
 A8BE	ň
 DROP TABLE t1;
 #
+# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845
+#
+SET NAMES gbk;
+CREATE TABLE t1 AS SELECT DATE_FORMAT('2001-01-01',_gbk 0xA1402557) AS a;
+SELECT HEX(a), CONVERT(a USING utf8) FROM t1;
+HEX(a)	CONVERT(a USING utf8)
+A1404D6F6E646179	?Monday
+DROP TABLE t1;
+#
 # End of 5.5 tests
 #
diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
index f9e9a69..6d70b79 100644
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -124,6 +124,37 @@ select 'a  a' > 'a', 'a  \0' < 'a';
 select binary 'a  a' > 'a', binary 'a  \0' > 'a', binary 'a\0' > 'a';
 binary 'a  a' > 'a'	binary 'a  \0' > 'a'	binary 'a\0' > 'a'
 1	1	1
+#
+# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845
+#
+SELECT HEX(date_format('0001-01-01', '%Y'));
+HEX(date_format('0001-01-01', '%Y'))
+0030003000300031
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%f'))
+003000300030003000300039
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'))
+00300030003000310020003000300030003000300039
+SELECT date_format('2001-01-01','%W rubb ish %w');
+date_format('2001-01-01','%W rubb ish %w')
+Monday rubb ish 1
+SELECT date_format('2001-01-01','%W rubb ish %');
+date_format('2001-01-01','%W rubb ish %')
+Monday rubb ish %
+CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` varchar(1) CHARACTER SET ucs2 NOT NULL DEFAULT ''
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (date_format('2001-01-01','%W'));
+Warnings:
+Warning	1265	Data truncated for column 'a' at row 1
+SELECT * FROM t1;
+a
+M
+DROP TABLE IF EXISTS t1;
 SET CHARACTER SET koi8r;
 create table t1 (a varchar(2) character set ucs2 collate ucs2_bin, key(a));
 insert into t1 values ('A'),('A'),('B'),('C'),('D'),('A\t');
diff --git a/mysql-test/r/ctype_ucs2_innodb.result b/mysql-test/r/ctype_ucs2_innodb.result
new file mode 100644
index 0000000..ecd0315
--- /dev/null
+++ b/mysql-test/r/ctype_ucs2_innodb.result
@@ -0,0 +1,45 @@
+SET default_storage_engine=InnoDB;
+SET TIME_ZONE = '+03:00';
+DROP TABLE IF EXISTS t1;
+#
+# Start of 5.5 tests
+#
+SET NAMES latin1;
+SET character_set_connection=ucs2;
+SELECT HEX('a'), HEX('a ');
+HEX('a')	HEX('a ')
+0061	00610020
+#
+# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845
+#
+SELECT HEX(date_format('0001-01-01', '%Y'));
+HEX(date_format('0001-01-01', '%Y'))
+0030003000300031
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%f'))
+003000300030003000300039
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'))
+00300030003000310020003000300030003000300039
+SELECT date_format('2001-01-01','%W rubb ish %w');
+date_format('2001-01-01','%W rubb ish %w')
+Monday rubb ish 1
+SELECT date_format('2001-01-01','%W rubb ish %');
+date_format('2001-01-01','%W rubb ish %')
+Monday rubb ish %
+CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` varchar(1) CHARACTER SET ucs2 NOT NULL DEFAULT ''
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (date_format('2001-01-01','%W'));
+Warnings:
+Warning	1265	Data truncated for column 'a' at row 1
+SELECT * FROM t1;
+a
+M
+DROP TABLE IF EXISTS t1;
+#
+# End of 5.5 tests
+#
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index 1f316b7..c0249cc 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -29,6 +29,37 @@ select 'a  a' > 'a', 'a  \0' < 'a';
 select binary 'a  a' > 'a', binary 'a  \0' > 'a', binary 'a\0' > 'a';
 binary 'a  a' > 'a'	binary 'a  \0' > 'a'	binary 'a\0' > 'a'
 1	1	1
+#
+# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845
+#
+SELECT HEX(date_format('0001-01-01', '%Y'));
+HEX(date_format('0001-01-01', '%Y'))
+00000030000000300000003000000031
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%f'))
+000000300000003000000030000000300000003000000039
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'))
+0000003000000030000000300000003100000020000000300000003000000030000000300000003000000039
+SELECT date_format('2001-01-01','%W rubb ish %w');
+date_format('2001-01-01','%W rubb ish %w')
+Monday rubb ish 1
+SELECT date_format('2001-01-01','%W rubb ish %');
+date_format('2001-01-01','%W rubb ish %')
+Monday rubb ish %
+CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` varchar(1) CHARACTER SET utf32 NOT NULL DEFAULT ''
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (date_format('2001-01-01','%W'));
+Warnings:
+Warning	1265	Data truncated for column 'a' at row 1
+SELECT * FROM t1;
+a
+M
+DROP TABLE IF EXISTS t1;
 select hex(_utf32 0x44);
 hex(_utf32 0x44)
 00000044
diff --git a/mysql-test/r/ctype_utf32_innodb.result b/mysql-test/r/ctype_utf32_innodb.result
new file mode 100644
index 0000000..8362724
--- /dev/null
+++ b/mysql-test/r/ctype_utf32_innodb.result
@@ -0,0 +1,45 @@
+SET default_storage_engine=InnoDB;
+SET TIME_ZONE = '+03:00';
+DROP TABLE IF EXISTS t1;
+#
+# Start of 5.5 tests
+#
+SET NAMES latin1;
+SET character_set_connection=utf32;
+SELECT HEX('a'), HEX('a ');
+HEX('a')	HEX('a ')
+00000061	0000006100000020
+#
+# MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845
+#
+SELECT HEX(date_format('0001-01-01', '%Y'));
+HEX(date_format('0001-01-01', '%Y'))
+00000030000000300000003000000031
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%f'))
+000000300000003000000030000000300000003000000039
+SELECT HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'));
+HEX(date_format('0001-01-01 10:20:30.000009', '%Y %f'))
+0000003000000030000000300000003100000020000000300000003000000030000000300000003000000039
+SELECT date_format('2001-01-01','%W rubb ish %w');
+date_format('2001-01-01','%W rubb ish %w')
+Monday rubb ish 1
+SELECT date_format('2001-01-01','%W rubb ish %');
+date_format('2001-01-01','%W rubb ish %')
+Monday rubb ish %
+CREATE TABLE t1 AS SELECT IF(0=0,'Y','N') AS a LIMIT 0;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` varchar(1) CHARACTER SET utf32 NOT NULL DEFAULT ''
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (date_format('2001-01-01','%W'));
+Warnings:
+Warning	1265	Data truncated for column 'a' at row 1
+SELECT * FROM t1;
+a
+M
+DROP TABLE IF EXISTS t1;
+#
+# End of 5.5 tests
+#
diff --git a/mysql-test/t/ctype_gbk.test b/mysql-test/t/ctype_gbk.test
index b9e25e9..3493cec 100644
--- a/mysql-test/t/ctype_gbk.test
+++ b/mysql-test/t/ctype_gbk.test
@@ -154,7 +154,19 @@ WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
 DROP TABLE t1;
 
 
+--echo #
+--echo # MDEV-7055 MySQL#74664 - InnoDB: Failing assertion: len <= col->len || col->mtype == 5 || (col->len == 0 && col->mtype == 1) in file rem0rec.cc line 845
+--echo #
+
+# Testing format string 0xA140 + '%' + 'W'
+# 0xA140 is an unassigned character in gbk.
+# It should be preserved in the DATE_FORMAT output
+# (should not be replaced to question mark)
 
+SET NAMES gbk;
+CREATE TABLE t1 AS SELECT DATE_FORMAT('2001-01-01',_gbk 0xA1402557) AS a;
+SELECT HEX(a), CONVERT(a USING utf8) FROM t1;
+DROP TABLE t1;
 
 --echo #
 --echo # End of 5.5 tests
diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
index 7fd3768..7211c59 100644
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -11,6 +11,7 @@ SET @test_collation= 'ucs2_general_ci';
 SET NAMES latin1;
 SET character_set_connection=ucs2;
 -- source include/endspace.inc
+-- source include/ctype_date_format.inc
 
 SET CHARACTER SET koi8r;
 
diff --git a/mysql-test/t/ctype_ucs2_innodb.test b/mysql-test/t/ctype_ucs2_innodb.test
new file mode 100644
index 0000000..144e08f
--- /dev/null
+++ b/mysql-test/t/ctype_ucs2_innodb.test
@@ -0,0 +1,23 @@
+-- source include/have_innodb.inc
+-- source include/have_ucs2.inc
+
+SET default_storage_engine=InnoDB;
+
+SET TIME_ZONE = '+03:00';
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+
+SET NAMES latin1;
+SET character_set_connection=ucs2;
+SELECT HEX('a'), HEX('a ');
+-- source include/ctype_date_format.inc
+
+--echo #
+--echo # End of 5.5 tests
+--echo #
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index 1be8925..1694f43 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -14,6 +14,7 @@ SET NAMES latin1;
 SET character_set_connection=utf32;
 select hex('a'), hex('a ');
 -- source include/endspace.inc
+-- source include/ctype_date_format.inc
 
 #
 # Check that incomplete utf32 characters in HEX notation
diff --git a/mysql-test/t/ctype_utf32_innodb.test b/mysql-test/t/ctype_utf32_innodb.test
new file mode 100644
index 0000000..ceb90ff
--- /dev/null
+++ b/mysql-test/t/ctype_utf32_innodb.test
@@ -0,0 +1,23 @@
+-- source include/have_innodb.inc
+-- source include/have_utf32.inc
+
+SET default_storage_engine=InnoDB;
+
+SET TIME_ZONE = '+03:00';
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+
+SET NAMES latin1;
+SET character_set_connection=utf32;
+SELECT HEX('a'), HEX('a ');
+-- source include/ctype_date_format.inc
+
+--echo #
+--echo # End of 5.5 tests
+--echo #
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 522004e..a2a6fff 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -447,6 +447,70 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
 
 
 /**
+  A multi-byte safe helper class to read characters from a string.
+
+  QQ: Serg: which file to put this new class in?
+      It can be helpful for some other purposes
+      (not only here in item_timefunc.cc)
+      I remember you don't like such things in sql_string.h :)
+*/
+class Wchar_reader
+{
+  CHARSET_INFO *m_cs;
+  const char *m_ptr;
+  const char *m_end;
+public:
+  Wchar_reader(CHARSET_INFO *cs, const char *str, size_t length)
+    :m_cs(cs), m_ptr(str), m_end(str + length)
+  { }
+  bool eol() const { return m_ptr >= m_end; }
+  /**
+    Read a character, return its Unicode code point and octet length.
+
+    @param [OUT] wc     - a pointer to a Unicode code point variable
+    @param [OUT] chlen  - a pointer to a character length variable
+    @return false       - on success
+    @return true        - on error (end of line, or a bad byte sequence)
+
+    Converts negative lengths in the range -6..-1
+    (which mb_wc() returns for valid but unassigned characters)
+    to positive lengths 1..6, so the caller does not have
+    to care about unassigned characters. The caller will just see
+    such characters as "U+003F QUESTION MARK", but with length
+    not necessarily equal to 1.
+  */
+  bool read(my_wc_t *wc, int *chlen)
+  {
+    *chlen= m_cs->cset->mb_wc(m_cs, wc, (uchar *) m_ptr, (uchar *) m_end);
+    if (*chlen <= 0)
+    {
+      if (*chlen < -6 || *chlen == 0)
+        return true;     // End of line, or a bad byte sequence
+      *chlen= -(*chlen); // An unassigned (but a valid) character found
+      *wc= '?';          // Initialize *wc to QUESTION MARK
+    }
+    m_ptr+= *chlen;      // Shift the pointer to the next character.
+    return false;
+  }
+  /**
+    Read a character when its length is not important for the caller
+    @param [OUT] wc - a pointer to a Unicode code point variable
+    @return false   - on succes
+    @return true    - on error (end of line, or an invalid byte sequence)
+  */
+  bool read(my_wc_t *wc)
+  {
+    int chlen;
+    return read(wc, &chlen);
+  }
+  /**
+    Return a ponter to the next character in the queue.
+  */
+  const char *ptr() const { return m_ptr; }
+};
+
+
+/**
   Create a formated date/time value in a string.
 */
 
@@ -457,21 +521,29 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
   uint hours_i;
   uint weekday;
   ulong length;
-  const char *ptr, *end;
+  my_wc_t wc;
+  int chlen;
+  Wchar_reader reader(str->charset(), format->format.str,
+                                      format->format.length);
 
   str->length(0);
 
   if (l_time->neg)
     str->append('-');
   
-  end= (ptr= format->format.str) + format->format.length;
-  for (; ptr != end ; ptr++)
+  for ( ; !reader.read(&wc, &chlen) ; )
   {
-    if (*ptr != '%' || ptr+1 == end)
-      str->append(*ptr);
+    if (wc != '%' || reader.eol())
+    {
+      DBUG_ASSERT(chlen > 0); // A regular character, or a trailing '%'
+      str->append(reader.ptr() - chlen, chlen, &my_charset_bin);
+    }
     else
     {
-      switch (*++ptr) {
+      // An % sequnce found, scan the next character after '%'
+      if (reader.read(&wc, &chlen))
+        break;
+      switch (wc) {
       case 'M':
         if (!l_time->month)
           return 1;
@@ -617,7 +689,7 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	if (type == MYSQL_TIMESTAMP_TIME)
 	  return 1;
 	length= (uint) (int10_to_str(calc_week(l_time,
-				       (*ptr) == 'U' ?
+				       wc == 'U' ?
 				       WEEK_FIRST_WEEKDAY : WEEK_MONDAY_FIRST,
 				       &year),
 			     intbuff, 10) - intbuff);
@@ -631,7 +703,7 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	if (type == MYSQL_TIMESTAMP_TIME)
 	  return 1;
 	length= (uint) (int10_to_str(calc_week(l_time,
-				       ((*ptr) == 'V' ?
+				       (wc == 'V' ?
 					(WEEK_YEAR | WEEK_FIRST_WEEKDAY) :
 					(WEEK_YEAR | WEEK_MONDAY_FIRST)),
 				       &year),
@@ -646,7 +718,7 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	if (type == MYSQL_TIMESTAMP_TIME)
 	  return 1;
 	(void) calc_week(l_time,
-			 ((*ptr) == 'X' ?
+			 (wc == 'X' ?
 			  WEEK_YEAR | WEEK_FIRST_WEEKDAY :
 			  WEEK_YEAR | WEEK_MONDAY_FIRST),
 			 &year);
@@ -664,7 +736,8 @@ static bool make_date_time(DATE_TIME_FORMAT *format, MYSQL_TIME *l_time,
 	break;
 
       default:
-	str->append(*ptr);
+        DBUG_ASSERT(chlen > 0);
+	str->append(reader.ptr() - chlen, (size_t) chlen, &my_charset_bin);
 	break;
       }
     }
@@ -1778,20 +1851,22 @@ bool Item_func_date_format::eq(const Item *item, bool binary_cmp) const
 }
 
 
-
 uint Item_func_date_format::format_length(const String *format)
 {
   uint size=0;
-  const char *ptr=format->ptr();
-  const char *end=ptr+format->length();
+  my_wc_t wc;
+  Wchar_reader reader(format->charset(), format->ptr(), format->length());
 
-  for (; ptr != end ; ptr++)
+  for ( ; !reader.read(&wc) ; )
   {
-    if (*ptr != '%' || ptr == end-1)
-      size++;
+    if (wc != '%' || reader.eol())
+      size++; // A regular character, or a trailing '%'
     else
     {
-      switch(*++ptr) {
+      // A '%' sequence found, scan the next character after '%'
+      if (reader.read(&wc))
+        break;
+      switch (wc) {
       case 'M': /* month, textual */
       case 'W': /* day (of the week), textual */
 	size += 64; /* large for UTF8 locale data */
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 885f53a..21fba79 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -537,6 +537,15 @@ bool String::append(IO_CACHE* file, uint32 arg_length)
   return FALSE;
 }
 
+/**
+  Append an ASCII string, optionally fill a prefix.
+  @param s           - a pointer to an ASCII string
+  @param arg_length  - length of the ASCII string
+  @param full_length - the desired character length of the piece
+                       to be added
+  @param fill_char   - make a prefix consisting of this character,
+                       if the desired full_length is bigger that arg_length.
+*/
 bool String::append_with_prefill(const char *s,uint32 arg_length,
 		 uint32 full_length, char fill_char)
 {
@@ -547,8 +556,28 @@ bool String::append_with_prefill(const char *s,uint32 arg_length,
   t_length= full_length - arg_length;
   if (t_length > 0)
   {
-    bfill(Ptr+str_length, t_length, fill_char);
-    str_length=str_length + t_length;
+    if (charset()->mbminlen == 1)
+    {
+      /*
+        An ASCII string can be appended directly
+        to an ASCII-compatible string. This includes
+        multi-byte character sets, like utf8, sjis, etc.
+      */
+      bfill(Ptr+str_length, t_length, fill_char);
+      str_length=str_length + t_length;
+    }
+    else
+    {
+      /*
+        Needs conversion to append an ASCII string to ASCII-incompatible
+        character sets, such as ucs2, utf16, utf16le, utf32.
+      */
+      for (int i= 0; i < t_length; i++)
+      {
+        if (append(&fill_char, 1, &my_charset_latin1))
+          return true;
+      }
+    }
   }
   append(s, arg_length);
   return FALSE;

Follow ups