← Back to team overview

maria-developers team mailing list archive

Re: MDEV-7649 wrong result when comparing utf8 column with an invalid literal

 

Hi Sergei,

Please review a patch for MDEV-7649 for 5.5.
It fixes the case #1 to return empty set.

(by treating bad bytes as greater than any valid character).

Thanks.


On 04/07/2015 02:07 PM, Sergei Golubchik wrote:
Hi, Alexander!

On Apr 07, Alexander Barkov wrote:
Hi Sergei,

I tested comparison behaviour for various different situations.

Please find the comment with the summary table in the end of:
https://mariadb.atlassian.net/browse/MDEV-7649

Currently there are 5 possible reactions on bad bytes on comparison,
depending on the collation, presence of an index, and
character_set_connection value:

- truncate on bad byte, compare only the well-formed prefix (#1)
- treat bad bytes as '?' (#7 and #8)
- empty set with no warning (#3)
- empty set with a warning (#2)
- error (#5 and #6)

I'm in doubts what to do for 10.1 and for 5.5. Please suggest.

1. For 10.1 or 10.2 I think it would be nice to make all these cases
work  exactly the same way. I am not sure which way would be the best.
Any ideas?

I like your last idea from MDEV-7649 -
treating invalid symbols as "larger than any valid symbol".

If that's doable without major changes.

2. For 5.5 we need to fix the bug with minimal changes. Do you agree?
I suggest we fix only #1 and maybe #2.

I'd suggest to fix only #1 - and make the comparison to return an empty
set.

Regards,
Sergei

diff --git a/mysql-test/include/ctype_utf8_ilseq.inc b/mysql-test/include/ctype_utf8_ilseq.inc
new file mode 100644
index 0000000..700fcbf
--- /dev/null
+++ b/mysql-test/include/ctype_utf8_ilseq.inc
@@ -0,0 +1,112 @@
+#
+# Compare a field to an utf8 string literal with illegal byte sequences
+#
+
+--echo #
+--echo # Start of ctype_utf8_ilseq.inc
+--echo #
+
+--eval CREATE TABLE t1 ENGINE=$ENGINE AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;
+ALTER TABLE t1
+  ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+  ADD KEY(ch);
+SHOW CREATE TABLE t1;
+
+INSERT INTO t1 (ch) VALUES ('admin');
+SELECT ch FROM t1 WHERE ch='admin𝌆';
+DELETE FROM t1;
+INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
+INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
+INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
+INSERT INTO t1 (ch) VALUES ('z');
+# LATIN SMALL LETTER A + LATIN CAPITAL LETTER E WITH GRAVE
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
+# LATIN SMALL LETTER A + ARMENIAN SMALL LETTER REH
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
+
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
+SELECT ch FROM t1 WHERE ch='a𝌆' ORDER BY ch;
+SELECT ch FROM t1 WHERE ch='a𝌆b' ORDER BY ch;
+
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
+SELECT ch FROM t1 WHERE ch<'a𝌆' ORDER BY ch;
+SELECT ch FROM t1 WHERE ch<'a𝌆b' ORDER BY ch;
+
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
+SELECT ch FROM t1 WHERE ch>'a𝌆' ORDER BY ch;
+SELECT ch FROM t1 WHERE ch>'a𝌆b' ORDER BY ch;
+
+ALTER TABLE t1 DROP KEY ch;
+
+--echo # 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+
+#
+# Non-equality comparison currently work differently depending on collation:
+#
+# - utf8_general_ci falls back to memcmp() on bad byte
+# - utf8_unicode_ci treats bad bytes greater than any valid character
+#
+#  For example, these two characters:
+#    _utf8 0xD080 (U+00C8 LATIN CAPITAL LETTER E WITH GRAVE)
+#    _utf8 0xD680 (U+0580 ARMENIAN SMALL LETTER REH)
+#
+#  will give different results (depending on collation) when compared 
+#  to an incomplete byte sequence 0xD1 (mb2head not followed by mb2tail).
+#
+# For utf8_general_ci the result depends on the valid side:
+# - 0xD080 is smaller than 0xD1, because 0xD0 < 0xD1
+# - 0xD680 is greater than 0xD1, because 0xD6 > 0xD1
+#
+# For utf8_unicode_ci the result does not depend on the valid side:
+# - 0xD080 is smaller than 0xD1, because 0xD1 is greater than any valid character
+# - 0xD680 is smaller than 0xD1, because 0xD1 is greater than any valid character
+#
+# utf8_general_ci should be eventually fixed to treat bad bytes greater
+# than any valid character, similar to utf8_unicode_ci.
+#
+
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+
+--echo # 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+
+--echo # 0x8F is a bad byte sequence (an mb2tail without mb2head)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+
+--echo # 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+
+DROP TABLE t1;
+
+--echo #
+--echo # End of ctype_utf8_ilseq.inc
+--echo #
diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result
index 1d5dd4e..974c097 100644
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@@ -3216,5 +3216,604 @@ a	c1
 10	=> Ç„
 drop table t1;
 #
+# MDEV-7649 wrong result when comparing utf8 column with an invalid literal
+#
+SET NAMES utf8 COLLATE utf8_unicode_ci;
+#
+# Start of ctype_utf8_ilseq.inc
+#
+CREATE TABLE t1 ENGINE=InnoDB AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
+Warnings:
+Warning	1286	Unknown storage engine 'InnoDB'
+Warning	1266	Using storage engine MyISAM for table 't1'
+ALTER TABLE t1
+ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ADD KEY(ch);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `ch` varchar(60) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
+  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`),
+  KEY `ch` (`ch`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 (ch) VALUES ('admin');
+SELECT ch FROM t1 WHERE ch='admin𝌆';
+ch
+DELETE FROM t1;
+INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
+INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
+INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
+INSERT INTO t1 (ch) VALUES ('z');
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
+ch
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
+ch
+SELECT ch FROM t1 WHERE ch='a𝌆' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch='a𝌆b' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+ALTER TABLE t1 DROP KEY ch;
+# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+a
+a?
+a??
+a???
+a????
+ab
+a?b
+a??b
+a???b
+a????b
+az
+a?z
+a??z
+a???z
+a????z
+aЀ
+aÖ€
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+z
+# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F is a bad byte sequence (an mb2tail without mb2head)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+DROP TABLE t1;
+#
+# End of ctype_utf8_ilseq.inc
+#
+#
+# Start of ctype_utf8_ilseq.inc
+#
+CREATE TABLE t1 ENGINE=MyISAM AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
+ALTER TABLE t1
+ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ADD KEY(ch);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `ch` varchar(60) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
+  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`),
+  KEY `ch` (`ch`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 (ch) VALUES ('admin');
+SELECT ch FROM t1 WHERE ch='admin𝌆';
+ch
+DELETE FROM t1;
+INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
+INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
+INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
+INSERT INTO t1 (ch) VALUES ('z');
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
+ch
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
+ch
+SELECT ch FROM t1 WHERE ch='a𝌆' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch='a𝌆b' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+ALTER TABLE t1 DROP KEY ch;
+# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+a
+a?
+a??
+a???
+a????
+ab
+a?b
+a??b
+a???b
+a????b
+az
+a?z
+a??z
+a???z
+a????z
+aЀ
+aÖ€
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+z
+# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F is a bad byte sequence (an mb2tail without mb2head)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+DROP TABLE t1;
+#
+# End of ctype_utf8_ilseq.inc
+#
+#
+# Start of ctype_utf8_ilseq.inc
+#
+CREATE TABLE t1 ENGINE=HEAP AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
+ALTER TABLE t1
+ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ADD KEY(ch);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `ch` varchar(60) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
+  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`),
+  KEY `ch` (`ch`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1
+INSERT INTO t1 (ch) VALUES ('admin');
+SELECT ch FROM t1 WHERE ch='admin𝌆';
+ch
+DELETE FROM t1;
+INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
+INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
+INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
+INSERT INTO t1 (ch) VALUES ('z');
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
+ch
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
+ch
+SELECT ch FROM t1 WHERE ch='a𝌆' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch='a𝌆b' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+ALTER TABLE t1 DROP KEY ch;
+# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+a
+a?
+a??
+a???
+a????
+ab
+a?b
+a??b
+a???b
+a????b
+az
+a?z
+a??z
+a???z
+a????z
+aЀ
+aÖ€
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+z
+# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F is a bad byte sequence (an mb2tail without mb2head)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+DROP TABLE t1;
+#
+# End of ctype_utf8_ilseq.inc
+#
+#
 # End of 5.5 tests
 #
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index 9a6dadb..d979592 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -5134,5 +5134,603 @@ a	1024
 Warnings:
 Warning	1260	Row 2 was cut by GROUP_CONCAT()
 #
+# MDEV-7649 wrong result when comparing utf8 column with an invalid literal
+#
+SET NAMES utf8 COLLATE utf8_general_ci;
+#
+# Start of ctype_utf8_ilseq.inc
+#
+CREATE TABLE t1 ENGINE=InnoDB AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
+ALTER TABLE t1
+ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ADD KEY(ch);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `ch` varchar(60) CHARACTER SET utf8 NOT NULL DEFAULT '',
+  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`),
+  KEY `ch` (`ch`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+INSERT INTO t1 (ch) VALUES ('admin');
+SELECT ch FROM t1 WHERE ch='admin𝌆';
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+DELETE FROM t1;
+INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
+INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
+INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
+INSERT INTO t1 (ch) VALUES ('z');
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
+ch
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
+ch
+SELECT ch FROM t1 WHERE ch='a𝌆' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch='a𝌆b' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+ALTER TABLE t1 DROP KEY ch;
+# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+a
+a?
+a??
+a???
+a????
+ab
+a?b
+a??b
+a???b
+a????b
+az
+a?z
+a??z
+a???z
+a????z
+aЀ
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+z
+aÖ€
+# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F is a bad byte sequence (an mb2tail without mb2head)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+DROP TABLE t1;
+#
+# End of ctype_utf8_ilseq.inc
+#
+#
+# Start of ctype_utf8_ilseq.inc
+#
+CREATE TABLE t1 ENGINE=MyISAM AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
+ALTER TABLE t1
+ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ADD KEY(ch);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `ch` varchar(60) CHARACTER SET utf8 NOT NULL DEFAULT '',
+  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`),
+  KEY `ch` (`ch`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 (ch) VALUES ('admin');
+SELECT ch FROM t1 WHERE ch='admin𝌆';
+ch
+DELETE FROM t1;
+INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
+INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
+INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
+INSERT INTO t1 (ch) VALUES ('z');
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
+ch
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
+ch
+SELECT ch FROM t1 WHERE ch='a𝌆' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch='a𝌆b' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+ALTER TABLE t1 DROP KEY ch;
+# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+a
+a?
+a??
+a???
+a????
+ab
+a?b
+a??b
+a???b
+a????b
+az
+a?z
+a??z
+a???z
+a????z
+aЀ
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+z
+aÖ€
+# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F is a bad byte sequence (an mb2tail without mb2head)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+DROP TABLE t1;
+#
+# End of ctype_utf8_ilseq.inc
+#
+#
+# Start of ctype_utf8_ilseq.inc
+#
+CREATE TABLE t1 ENGINE=HEAP AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
+ALTER TABLE t1
+ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ADD KEY(ch);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `ch` varchar(60) CHARACTER SET utf8 NOT NULL DEFAULT '',
+  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+  PRIMARY KEY (`id`),
+  KEY `ch` (`ch`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1
+INSERT INTO t1 (ch) VALUES ('admin');
+SELECT ch FROM t1 WHERE ch='admin𝌆';
+ch
+DELETE FROM t1;
+INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
+INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
+INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
+INSERT INTO t1 (ch) VALUES ('z');
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
+INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
+ch
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
+ch
+SELECT ch FROM t1 WHERE ch='a𝌆' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
+SELECT ch FROM t1 WHERE ch='a𝌆b' ORDER BY ch;
+ch
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 WHERE ch<'a𝌆b' ORDER BY ch;
+ch
+a
+a?
+a??
+a???
+a????
+a????b
+a????z
+a???b
+a???z
+a??b
+a??z
+a?b
+a?z
+ab
+az
+aЀ
+aÖ€
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆' ORDER BY ch;
+ch
+z
+SELECT ch FROM t1 WHERE ch>'a𝌆b' ORDER BY ch;
+ch
+z
+ALTER TABLE t1 DROP KEY ch;
+# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+a
+a?
+a??
+a???
+a????
+ab
+a?b
+a??b
+a???b
+a????b
+az
+a?z
+a??z
+a???z
+a????z
+aЀ
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+z
+aÖ€
+# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F is a bad byte sequence (an mb2tail without mb2head)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b''');
+PREPARE stmt FROM @query;
+EXECUTE stmt;
+ch
+DROP TABLE t1;
+#
+# End of ctype_utf8_ilseq.inc
+#
+#
 # End of 5.5 tests
 #
diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test
index 8007b30..b11cf24 100644
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@@ -577,6 +577,19 @@ select * from t1 where c1 = '=> dž';
 select * from t1 where concat(c1) = '=> dž';
 drop table t1;
 
+
+--echo #
+--echo # MDEV-7649 wrong result when comparing utf8 column with an invalid literal
+--echo #
+
+SET NAMES utf8 COLLATE utf8_unicode_ci;
+--let ENGINE=InnoDB
+--source include/ctype_utf8_ilseq.inc
+--let ENGINE=MyISAM
+--source include/ctype_utf8_ilseq.inc
+--let ENGINE=HEAP
+--source include/ctype_utf8_ilseq.inc
+
 --echo #
 --echo # End of 5.5 tests
 --echo #
diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
index 0d104ca..5a7d709 100644
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -1597,5 +1597,17 @@ UNION ALL
 SELECT 'a' AS id, REPEAT('bla bla', 100) AS body) t1;
 
 --echo #
+--echo # MDEV-7649 wrong result when comparing utf8 column with an invalid literal
+--echo #
+
+SET NAMES utf8 COLLATE utf8_general_ci;
+--let ENGINE=InnoDB
+--source include/ctype_utf8_ilseq.inc
+--let ENGINE=MyISAM
+--source include/ctype_utf8_ilseq.inc
+--let ENGINE=HEAP
+--source include/ctype_utf8_ilseq.inc
+
+--echo #
 --echo # End of 5.5 tests
 --echo #
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index ad484ac..6bc71bc 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7075,7 +7075,25 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
     if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc, 
                                           scanner->sbeg,
                                           scanner->send)) <= 0))
-      return -1;
+    {
+      if (scanner->sbeg >= scanner->send)
+        return -1; /* No more bytes, end of line reached */
+      /*
+        There are some more bytes left. Non-positive mb_len means that
+        we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
+      */
+      if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
+      {
+        /* For safety purposes don't go beyond the string range. */
+        scanner->sbeg= scanner->send;
+      }
+      /*
+        Treat every complete or incomplete mbminlen unit as a weight which is
+        greater than weight for any possible normal character.
+        0xFFFF is greater than any possible weight in the UCA weight table.
+      */
+      return 0xFFFF;
+    }
     
     scanner->sbeg+= mb_len;
     if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)

Follow ups

References