← Back to team overview

maria-developers team mailing list archive

MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion

 

Hi Sergei,

Please review a patch for MDEV-6566.

Thanks.
diff --git a/include/m_ctype.h b/include/m_ctype.h
index f08efb4..4fa8779 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -444,7 +444,55 @@ struct my_charset_handler_st
   size_t        (*scan)(CHARSET_INFO *, const char *b, const char *e,
                         int sq);
 
-  /* Copying routines */
+  /* String copying routines and helpers for them */
+  /*
+    charlen() - calculate length of the left-most character in bytes.
+    @param  cs    Character set
+    @param  str   The beginning of the string
+    @param  end   The end of the string
+    
+    @return       MY_CS_ILSEQ if a bad byte sequence was found.
+    @return       MY_CS_TOOSMALLN(x) if the string ended unexpectedly.
+    @return       a positive number in the range 1..mbmaxlen,
+                  if a valid character was found.
+  */
+  int (*charlen)(CHARSET_INFO *cs, const uchar *str, const uchar *end);
+  /*
+    well_formed_char_length() - returns character length of a string.
+    
+    @param cs          Character set
+    @param str         The beginning of the string
+    @param end         The end of the string
+    @param nchars      Not more than "nchars" left-most characters are checked.
+    @param status[OUT] Additional statistics is returned here.
+                       "status" can be uninitialized before the call,
+                       and it is fully initialized after the call.
+    
+    status->m_source_end_pos is set to the position where reading stopped.
+    
+    If a bad byte sequence is found, the function returns immediately and
+    status->m_well_formed_error_pos is set to the position where a bad byte
+    sequence was found.
+    
+    status->m_well_formed_error_pos is set to NULL if no bad bytes were found.
+    If status->m_well_formed_error_pos is NULL after the call, that means:
+    - either the function reached the end of the string,
+    - or all "nchars" characters were read.
+    The caller can check status->m_source_end_pos to detect which of these two
+    happened.
+  */
+  size_t (*well_formed_char_length)(CHARSET_INFO *cs,
+                                    const char *str, const char *end,
+                                    size_t nchars,
+                                    MY_STRCOPY_STATUS *status);
+
+  /*
+    copy_fix() - copy a string like copy_abort(), but fix bad bytes to '?'.
+  */
+  size_t  (*copy_fix)(CHARSET_INFO *,
+                      char *dst, size_t dst_length,
+                      const char *src, size_t src_length,
+                      size_t nchars, MY_STRCOPY_STATUS *status);
   /*
     copy_abort() - copy a string, abort if a bad byte sequence was found.
     Not more than "nchars" characters are copied.
@@ -600,6 +648,10 @@ size_t my_copy_abort_mb(CHARSET_INFO *cs,
                         char *dst, size_t dst_length,
                         const char *src, size_t src_length,
                         size_t nchars, MY_STRCOPY_STATUS *);
+size_t my_copy_fix_mb(CHARSET_INFO *cs,
+                      char *dst, size_t dst_length,
+                      const char *src, size_t src_length,
+                      size_t nchars, MY_STRCOPY_STATUS *);
 
 /* Functions for 8bit */
 extern size_t my_caseup_str_8bit(CHARSET_INFO *, char *);
@@ -691,6 +743,11 @@ int my_wildcmp_bin(CHARSET_INFO *,
 size_t my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, size_t pos);
 size_t my_well_formed_len_8bit(CHARSET_INFO *, const char *b, const char *e,
                              size_t pos, int *error);
+size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
+                                       const char *b, const char *e,
+                                       size_t nchars,
+                                       MY_STRCOPY_STATUS *status);
+int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end);
 uint my_mbcharlen_8bit(CHARSET_INFO *, uint c);
 
 
diff --git a/mysql-test/r/ctype_big5.result b/mysql-test/r/ctype_big5.result
index 175bbf0..d18c2a0 100644
--- a/mysql-test/r/ctype_big5.result
+++ b/mysql-test/r/ctype_big5.result
@@ -597,7 +597,7 @@ Warning	1366	Incorrect string value: '\x80\' for column 'a' at row 61
 Warning	1366	Incorrect string value: '\x80]' for column 'a' at row 62
 Warning	1366	Incorrect string value: '\x80^' for column 'a' at row 63
 Warning	1366	Incorrect string value: '\x80_' for column 'a' at row 64
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 COUNT(*)
 13973
 SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
diff --git a/mysql-test/r/ctype_cp932_binlog_stm.result b/mysql-test/r/ctype_cp932_binlog_stm.result
index 0e6ae25..fd92022 100644
--- a/mysql-test/r/ctype_cp932_binlog_stm.result
+++ b/mysql-test/r/ctype_cp932_binlog_stm.result
@@ -165,7 +165,7 @@ Warning	1366	Incorrect string value: '\x80_' for column 'a' at row 64
 SELECT COUNT(*) FROM t1;
 COUNT(*)
 14623
-SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
+SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
 COUNT(*)
 63
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
diff --git a/mysql-test/r/ctype_eucjpms.result b/mysql-test/r/ctype_eucjpms.result
index a1232c1..49d86c1 100644
--- a/mysql-test/r/ctype_eucjpms.result
+++ b/mysql-test/r/ctype_eucjpms.result
@@ -10101,6 +10101,9 @@ COUNT(*)
 56959
 SELECT COUNT(*) FROM t1 WHERE a<>'';
 COUNT(*)
+56959
+SELECT COUNT(*) FROM t1 WHERE a<>'' AND a<>'?';
+COUNT(*)
 17735
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
 COUNT(*)
@@ -33632,7 +33635,7 @@ CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms);
 INSERT INTO t1 VALUES (0x8EA0);
 SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
 HEX(a)	CHAR_LENGTH(a)
-	0
+3F3F	2
 DROP TABLE t1;
 SELECT _eucjpms 0x8EA0;
 ERROR HY000: Invalid eucjpms character string: '8EA0'
diff --git a/mysql-test/r/ctype_euckr.result b/mysql-test/r/ctype_euckr.result
index dcb68cf..0ee63bb 100644
--- a/mysql-test/r/ctype_euckr.result
+++ b/mysql-test/r/ctype_euckr.result
@@ -407,12 +407,12 @@ Warnings:
 Warning	1366	Incorrect string value: '\xA1\xFF' for column 's1' at row 1
 select hex(s1), hex(convert(s1 using utf8)) from t1 order by binary s1;
 hex(s1)	hex(convert(s1 using utf8))
-	
-	
-	
-	
-	
-	
+3F3F	3F3F
+3F3F	3F3F
+3F40	3F40
+3F5B	3F5B
+3F60	3F60
+3F7B	3F7B
 A141	ECA2A5
 A15A	ECA381
 A161	ECA382
@@ -445,7 +445,7 @@ FROM t1 t11, t1 t12
 WHERE t11.a >= 0x81 AND t11.a <= 0xFE
 AND   t12.a >= 0x41 AND t12.a <= 0xFE
 ORDER BY t11.a, t12.a;
-SELECT s as bad_code FROM t2 WHERE a='' ORDER BY s;
+SELECT s as bad_code FROM t2 WHERE a='?' ORDER BY s;
 bad_code
 815B
 815C
@@ -1959,7 +1959,7 @@ FE7D
 FE7E
 FE7F
 FE80
-DELETE FROM t2 WHERE a='';
+DELETE FROM t2 WHERE a='?';
 ALTER TABLE t2 ADD u VARCHAR(1) CHARACTER SET utf8, ADD a2 VARCHAR(1) CHARACTER SET euckr;
 UPDATE t2 SET u=a, a2=u;
 SELECT s as unassigned_code FROM t2 WHERE u='?';
@@ -24492,7 +24492,7 @@ Warning	1366	Incorrect string value: '\x80\' for column 'a' at row 61
 Warning	1366	Incorrect string value: '\x80]' for column 'a' at row 62
 Warning	1366	Incorrect string value: '\x80^' for column 'a' at row 63
 Warning	1366	Incorrect string value: '\x80_' for column 'a' at row 64
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 COUNT(*)
 22428
 SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
diff --git a/mysql-test/r/ctype_gb2312.result b/mysql-test/r/ctype_gb2312.result
index 5db6e2d..ceecb77 100644
--- a/mysql-test/r/ctype_gb2312.result
+++ b/mysql-test/r/ctype_gb2312.result
@@ -553,7 +553,7 @@ Warning	1366	Incorrect string value: '\x80\' for column 'a' at row 61
 Warning	1366	Incorrect string value: '\x80]' for column 'a' at row 62
 Warning	1366	Incorrect string value: '\x80^' for column 'a' at row 63
 Warning	1366	Incorrect string value: '\x80_' for column 'a' at row 64
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 COUNT(*)
 8178
 SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
diff --git a/mysql-test/r/ctype_gbk.result b/mysql-test/r/ctype_gbk.result
index c5d997b..55561cf 100644
--- a/mysql-test/r/ctype_gbk.result
+++ b/mysql-test/r/ctype_gbk.result
@@ -573,7 +573,7 @@ Warning	1366	Incorrect string value: '\x80\' for column 'a' at row 61
 Warning	1366	Incorrect string value: '\x80]' for column 'a' at row 62
 Warning	1366	Incorrect string value: '\x80^' for column 'a' at row 63
 Warning	1366	Incorrect string value: '\x80_' for column 'a' at row 64
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 COUNT(*)
 23940
 SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
@@ -4946,3 +4946,814 @@ DROP TABLE t1;
 #
 # End of 10.0 tests
 #
+#
+# Start of 10.1 tests
+#
+#
+# MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
+#
+CREATE TABLE t1 (
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+b VARBINARY(16),
+type SET('ascii','bad','head','tail','mb2','unassigned')
+);
+INSERT INTO t1 (b, type) VALUES (0x40,   'ascii,tail');
+INSERT INTO t1 (b, type) VALUES (0x80,   'tail');
+INSERT INTO t1 (b, type) VALUES (0x81,   'head,tail');
+INSERT INTO t1 (b, type) VALUES (0xFF,   'bad');
+INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned');
+INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2');
+INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2');
+CREATE TABLE t2 AS SELECT
+CONCAT(t1.b,t2.b) AS b,
+t1.type AS type1,
+t2.type AS type2,
+CONCAT('[',t1.type,'][',t2.type,']') AS comment
+FROM t1, t1 t2;
+CREATE TABLE t3
+(
+b VARBINARY(16),
+c VARCHAR(16) CHARACTER SET gbk,
+comment VARCHAR(128)
+);
+#
+# A combination of two valid characters, should give no warnings
+# 
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE
+(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
+(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2))
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+16
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+4040	[ascii,tail][ascii,tail]
+40A140	[ascii,tail][mb2,unassigned]
+40A1A3	[ascii,tail][mb2]
+40FE40	[ascii,tail][mb2]
+A14040	[mb2,unassigned][ascii,tail]
+A140A140	[mb2,unassigned][mb2,unassigned]
+A140A1A3	[mb2,unassigned][mb2]
+A140FE40	[mb2,unassigned][mb2]
+A1A340	[mb2][ascii,tail]
+A1A3A140	[mb2][mb2,unassigned]
+A1A3A1A3	[mb2][mb2]
+A1A3FE40	[mb2][mb2]
+FE4040	[mb2][ascii,tail]
+FE40A140	[mb2][mb2,unassigned]
+FE40A1A3	[mb2][mb2]
+FE40FE40	[mb2][mb2]
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that start with a tail or a bad byte,
+# or end with a bad byte, all should be fixed.
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE type1='tail' OR type1='bad' OR type2='bad'
+ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 1
+Warning	1366	Incorrect string value: '\x80@' for column 'c' at row 2
+Warning	1366	Incorrect string value: '\x80\x80' for column 'c' at row 3
+Warning	1366	Incorrect string value: '\x80\x81' for column 'c' at row 4
+Warning	1366	Incorrect string value: '\x80\xA1@' for column 'c' at row 5
+Warning	1366	Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 6
+Warning	1366	Incorrect string value: '\x80\xFE@' for column 'c' at row 7
+Warning	1366	Incorrect string value: '\x80\xFF' for column 'c' at row 8
+Warning	1366	Incorrect string value: '\x81\xFF' for column 'c' at row 9
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 10
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 11
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 12
+Warning	1366	Incorrect string value: '\xFF@' for column 'c' at row 13
+Warning	1366	Incorrect string value: '\xFF\x80' for column 'c' at row 14
+Warning	1366	Incorrect string value: '\xFF\x81' for column 'c' at row 15
+Warning	1366	Incorrect string value: '\xFF\xA1@' for column 'c' at row 16
+Warning	1366	Incorrect string value: '\xFF\xA1\xA3' for column 'c' at row 17
+Warning	1366	Incorrect string value: '\xFF\xFE@' for column 'c' at row 18
+Warning	1366	Incorrect string value: '\xFF\xFF' for column 'c' at row 19
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+19
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+403F	40FF	[ascii,tail][bad]
+3F40	8040	[tail][ascii,tail]
+3F3F	8080	[tail][tail]
+3F3F	8081	[tail][head,tail]
+3FA140	80A140	[tail][mb2,unassigned]
+3FA1A3	80A1A3	[tail][mb2]
+3FFE40	80FE40	[tail][mb2]
+3F3F	80FF	[tail][bad]
+3F3F	81FF	[head,tail][bad]
+A1403F	A140FF	[mb2,unassigned][bad]
+A1A33F	A1A3FF	[mb2][bad]
+FE403F	FE40FF	[mb2][bad]
+3F40	FF40	[bad][ascii,tail]
+3F3F	FF80	[bad][tail]
+3F3F	FF81	[bad][head,tail]
+3FA140	FFA140	[bad][mb2,unassigned]
+3FA1A3	FFA1A3	[bad][mb2]
+3FFE40	FFFE40	[bad][mb2]
+3F3F	FFFF	[bad][bad]
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that start with an ASCII or an MB2 character,
+# followed by a non-ASCII tail, all should be fixed.
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
+AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2))
+ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 1
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 2
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 3
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 4
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 5
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 6
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 7
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 8
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+8
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+403F	4080	[ascii,tail][tail]
+403F	4081	[ascii,tail][head,tail]
+A1403F	A14080	[mb2,unassigned][tail]
+A1403F	A14081	[mb2,unassigned][head,tail]
+A1A33F	A1A380	[mb2][tail]
+A1A33F	A1A381	[mb2][head,tail]
+FE403F	FE4080	[mb2][tail]
+FE403F	FE4081	[mb2][head,tail]
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Other sequences
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\xA3' for column 'c' at row 5
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+6
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+8140	[head,tail][ascii,tail]
+8180	[head,tail][tail]
+8181	[head,tail][head,tail]
+81A140	[head,tail][mb2,unassigned]
+81FE40	[head,tail][mb2]
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+81A13F	81A1A3	[head,tail][mb2]
+DELETE FROM t3;
+DROP TABLE t3;
+DROP TABLE t2;
+CREATE TABLE t2 AS SELECT
+CONCAT(t1.b,t2.b,t3.b) AS b,
+t1.type AS type1,
+t2.type AS type2,
+t3.type AS type3,
+CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment
+FROM t1, t1 t2,t1 t3;
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+343
+CREATE TABLE t3
+(
+b VARBINARY(16),
+c VARCHAR(16) CHARACTER SET gbk,
+comment VARCHAR(128)
+);
+#
+# A combination of three valid characters, should give no warnings
+# 
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE
+(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
+(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND
+(FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3)) 
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+64
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+404040	[ascii,tail][ascii,tail][ascii,tail]
+4040A140	[ascii,tail][ascii,tail][mb2,unassigned]
+4040A1A3	[ascii,tail][ascii,tail][mb2]
+4040FE40	[ascii,tail][ascii,tail][mb2]
+40A14040	[ascii,tail][mb2,unassigned][ascii,tail]
+40A140A140	[ascii,tail][mb2,unassigned][mb2,unassigned]
+40A140A1A3	[ascii,tail][mb2,unassigned][mb2]
+40A140FE40	[ascii,tail][mb2,unassigned][mb2]
+40A1A340	[ascii,tail][mb2][ascii,tail]
+40A1A3A140	[ascii,tail][mb2][mb2,unassigned]
+40A1A3A1A3	[ascii,tail][mb2][mb2]
+40A1A3FE40	[ascii,tail][mb2][mb2]
+40FE4040	[ascii,tail][mb2][ascii,tail]
+40FE40A140	[ascii,tail][mb2][mb2,unassigned]
+40FE40A1A3	[ascii,tail][mb2][mb2]
+40FE40FE40	[ascii,tail][mb2][mb2]
+A1404040	[mb2,unassigned][ascii,tail][ascii,tail]
+A14040A140	[mb2,unassigned][ascii,tail][mb2,unassigned]
+A14040A1A3	[mb2,unassigned][ascii,tail][mb2]
+A14040FE40	[mb2,unassigned][ascii,tail][mb2]
+A140A14040	[mb2,unassigned][mb2,unassigned][ascii,tail]
+A140A140A140	[mb2,unassigned][mb2,unassigned][mb2,unassigned]
+A140A140A1A3	[mb2,unassigned][mb2,unassigned][mb2]
+A140A140FE40	[mb2,unassigned][mb2,unassigned][mb2]
+A140A1A340	[mb2,unassigned][mb2][ascii,tail]
+A140A1A3A140	[mb2,unassigned][mb2][mb2,unassigned]
+A140A1A3A1A3	[mb2,unassigned][mb2][mb2]
+A140A1A3FE40	[mb2,unassigned][mb2][mb2]
+A140FE4040	[mb2,unassigned][mb2][ascii,tail]
+A140FE40A140	[mb2,unassigned][mb2][mb2,unassigned]
+A140FE40A1A3	[mb2,unassigned][mb2][mb2]
+A140FE40FE40	[mb2,unassigned][mb2][mb2]
+A1A34040	[mb2][ascii,tail][ascii,tail]
+A1A340A140	[mb2][ascii,tail][mb2,unassigned]
+A1A340A1A3	[mb2][ascii,tail][mb2]
+A1A340FE40	[mb2][ascii,tail][mb2]
+A1A3A14040	[mb2][mb2,unassigned][ascii,tail]
+A1A3A140A140	[mb2][mb2,unassigned][mb2,unassigned]
+A1A3A140A1A3	[mb2][mb2,unassigned][mb2]
+A1A3A140FE40	[mb2][mb2,unassigned][mb2]
+A1A3A1A340	[mb2][mb2][ascii,tail]
+A1A3A1A3A140	[mb2][mb2][mb2,unassigned]
+A1A3A1A3A1A3	[mb2][mb2][mb2]
+A1A3A1A3FE40	[mb2][mb2][mb2]
+A1A3FE4040	[mb2][mb2][ascii,tail]
+A1A3FE40A140	[mb2][mb2][mb2,unassigned]
+A1A3FE40A1A3	[mb2][mb2][mb2]
+A1A3FE40FE40	[mb2][mb2][mb2]
+FE404040	[mb2][ascii,tail][ascii,tail]
+FE4040A140	[mb2][ascii,tail][mb2,unassigned]
+FE4040A1A3	[mb2][ascii,tail][mb2]
+FE4040FE40	[mb2][ascii,tail][mb2]
+FE40A14040	[mb2][mb2,unassigned][ascii,tail]
+FE40A140A140	[mb2][mb2,unassigned][mb2,unassigned]
+FE40A140A1A3	[mb2][mb2,unassigned][mb2]
+FE40A140FE40	[mb2][mb2,unassigned][mb2]
+FE40A1A340	[mb2][mb2][ascii,tail]
+FE40A1A3A140	[mb2][mb2][mb2,unassigned]
+FE40A1A3A1A3	[mb2][mb2][mb2]
+FE40A1A3FE40	[mb2][mb2][mb2]
+FE40FE4040	[mb2][mb2][ascii,tail]
+FE40FE40A140	[mb2][mb2][mb2,unassigned]
+FE40FE40A1A3	[mb2][mb2][mb2]
+FE40FE40FE40	[mb2][mb2][mb2]
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that start with a tail or a bad byte,
+# or have a bad byte, all should be fixed.
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad'
+ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 1
+Warning	1366	Incorrect string value: '\x80\xFF' for column 'c' at row 2
+Warning	1366	Incorrect string value: '\x81\xFF' for column 'c' at row 3
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 4
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 5
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 6
+Warning	1366	Incorrect string value: '\xFF@' for column 'c' at row 7
+Warning	1366	Incorrect string value: '\xFF\x80' for column 'c' at row 8
+Warning	1366	Incorrect string value: '\xFF\x81' for column 'c' at row 9
+Warning	1366	Incorrect string value: '\xFF\xA1@' for column 'c' at row 10
+Warning	1366	Incorrect string value: '\xFF\xA1\xA3' for column 'c' at row 11
+Warning	1366	Incorrect string value: '\xFF\xFE@' for column 'c' at row 12
+Warning	1366	Incorrect string value: '\xFF\xFF' for column 'c' at row 13
+Warning	1366	Incorrect string value: '\x80@@' for column 'c' at row 14
+Warning	1366	Incorrect string value: '\x80@\x80' for column 'c' at row 15
+Warning	1366	Incorrect string value: '\x80@\x81' for column 'c' at row 16
+Warning	1366	Incorrect string value: '\x80@\xA1@' for column 'c' at row 17
+Warning	1366	Incorrect string value: '\x80@\xA1\xA3' for column 'c' at row 18
+Warning	1366	Incorrect string value: '\x80@\xFE@' for column 'c' at row 19
+Warning	1366	Incorrect string value: '\x80@\xFF' for column 'c' at row 20
+Warning	1366	Incorrect string value: '\x80\x80@' for column 'c' at row 21
+Warning	1366	Incorrect string value: '\x80\x80\x80' for column 'c' at row 22
+Warning	1366	Incorrect string value: '\x80\x80\x81' for column 'c' at row 23
+Warning	1366	Incorrect string value: '\x80\x80\xA1@' for column 'c' at row 24
+Warning	1366	Incorrect string value: '\x80\x80\xA1\xA3' for column 'c' at row 25
+Warning	1366	Incorrect string value: '\x80\x80\xFE@' for column 'c' at row 26
+Warning	1366	Incorrect string value: '\x80\x80\xFF' for column 'c' at row 27
+Warning	1366	Incorrect string value: '\x80\x81@' for column 'c' at row 28
+Warning	1366	Incorrect string value: '\x80\x81\x80' for column 'c' at row 29
+Warning	1366	Incorrect string value: '\x80\x81\x81' for column 'c' at row 30
+Warning	1366	Incorrect string value: '\x80\x81\xA1@' for column 'c' at row 31
+Warning	1366	Incorrect string value: '\x80\x81\xA1\xA3' for column 'c' at row 32
+Warning	1366	Incorrect string value: '\x80\x81\xFE@' for column 'c' at row 33
+Warning	1366	Incorrect string value: '\x80\x81\xFF' for column 'c' at row 34
+Warning	1366	Incorrect string value: '\x80\xA1@@' for column 'c' at row 35
+Warning	1366	Incorrect string value: '\x80\xA1@\x80' for column 'c' at row 36
+Warning	1366	Incorrect string value: '\x80\xA1@\x81' for column 'c' at row 37
+Warning	1366	Incorrect string value: '\x80\xA1@\xA1@' for column 'c' at row 38
+Warning	1366	Incorrect string value: '\x80\xA1@\xA1\xA3' for column 'c' at row 39
+Warning	1366	Incorrect string value: '\x80\xA1@\xFE@' for column 'c' at row 40
+Warning	1366	Incorrect string value: '\x80\xA1@\xFF' for column 'c' at row 41
+Warning	1366	Incorrect string value: '\x80\xA1\xA3@' for column 'c' at row 42
+Warning	1366	Incorrect string value: '\x80\xA1\xA3\x80' for column 'c' at row 43
+Warning	1366	Incorrect string value: '\x80\xA1\xA3\x81' for column 'c' at row 44
+Warning	1366	Incorrect string value: '\x80\xA1\xA3\xA1@' for column 'c' at row 45
+Warning	1366	Incorrect string value: '\x80\xA1\xA3\xA1\xA3' for column 'c' at row 46
+Warning	1366	Incorrect string value: '\x80\xA1\xA3\xFE@' for column 'c' at row 47
+Warning	1366	Incorrect string value: '\x80\xA1\xA3\xFF' for column 'c' at row 48
+Warning	1366	Incorrect string value: '\x80\xFE@@' for column 'c' at row 49
+Warning	1366	Incorrect string value: '\x80\xFE@\x80' for column 'c' at row 50
+Warning	1366	Incorrect string value: '\x80\xFE@\x81' for column 'c' at row 51
+Warning	1366	Incorrect string value: '\x80\xFE@\xA1@' for column 'c' at row 52
+Warning	1366	Incorrect string value: '\x80\xFE@\xA1\xA3' for column 'c' at row 53
+Warning	1366	Incorrect string value: '\x80\xFE@\xFE@' for column 'c' at row 54
+Warning	1366	Incorrect string value: '\x80\xFE@\xFF' for column 'c' at row 55
+Warning	1366	Incorrect string value: '\x80\xFF@' for column 'c' at row 56
+Warning	1366	Incorrect string value: '\x80\xFF\x80' for column 'c' at row 57
+Warning	1366	Incorrect string value: '\x80\xFF\x81' for column 'c' at row 58
+Warning	1366	Incorrect string value: '\x80\xFF\xA1@' for column 'c' at row 59
+Warning	1366	Incorrect string value: '\x80\xFF\xA1\xA3' for column 'c' at row 60
+Warning	1366	Incorrect string value: '\x80\xFF\xFE@' for column 'c' at row 61
+Warning	1366	Incorrect string value: '\x80\xFF\xFF' for column 'c' at row 62
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 63
+Warning	1366	Incorrect string value: '\xFF' for column 'c' at row 64
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+163
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+40403F	4040FF	[ascii,tail][ascii,tail][bad]
+403F3F	4080FF	[ascii,tail][tail][bad]
+403F3F	4081FF	[ascii,tail][head,tail][bad]
+40A1403F	40A140FF	[ascii,tail][mb2,unassigned][bad]
+40A1A33F	40A1A3FF	[ascii,tail][mb2][bad]
+40FE403F	40FE40FF	[ascii,tail][mb2][bad]
+403F40	40FF40	[ascii,tail][bad][ascii,tail]
+403F3F	40FF80	[ascii,tail][bad][tail]
+403F3F	40FF81	[ascii,tail][bad][head,tail]
+403FA140	40FFA140	[ascii,tail][bad][mb2,unassigned]
+403FA1A3	40FFA1A3	[ascii,tail][bad][mb2]
+403FFE40	40FFFE40	[ascii,tail][bad][mb2]
+403F3F	40FFFF	[ascii,tail][bad][bad]
+3F4040	804040	[tail][ascii,tail][ascii,tail]
+3F403F	804080	[tail][ascii,tail][tail]
+3F403F	804081	[tail][ascii,tail][head,tail]
+3F40A140	8040A140	[tail][ascii,tail][mb2,unassigned]
+3F40A1A3	8040A1A3	[tail][ascii,tail][mb2]
+3F40FE40	8040FE40	[tail][ascii,tail][mb2]
+3F403F	8040FF	[tail][ascii,tail][bad]
+3F3F40	808040	[tail][tail][ascii,tail]
+3F3F3F	808080	[tail][tail][tail]
+3F3F3F	808081	[tail][tail][head,tail]
+3F3FA140	8080A140	[tail][tail][mb2,unassigned]
+3F3FA1A3	8080A1A3	[tail][tail][mb2]
+3F3FFE40	8080FE40	[tail][tail][mb2]
+3F3F3F	8080FF	[tail][tail][bad]
+3F8140	808140	[tail][head,tail][ascii,tail]
+3F8180	808180	[tail][head,tail][tail]
+3F8181	808181	[tail][head,tail][head,tail]
+3F81A140	8081A140	[tail][head,tail][mb2,unassigned]
+3F81A13F	8081A1A3	[tail][head,tail][mb2]
+3F81FE40	8081FE40	[tail][head,tail][mb2]
+3F3F3F	8081FF	[tail][head,tail][bad]
+3FA14040	80A14040	[tail][mb2,unassigned][ascii,tail]
+3FA1403F	80A14080	[tail][mb2,unassigned][tail]
+3FA1403F	80A14081	[tail][mb2,unassigned][head,tail]
+3FA140A140	80A140A140	[tail][mb2,unassigned][mb2,unassigned]
+3FA140A1A3	80A140A1A3	[tail][mb2,unassigned][mb2]
+3FA140FE40	80A140FE40	[tail][mb2,unassigned][mb2]
+3FA1403F	80A140FF	[tail][mb2,unassigned][bad]
+3FA1A340	80A1A340	[tail][mb2][ascii,tail]
+3FA1A33F	80A1A380	[tail][mb2][tail]
+3FA1A33F	80A1A381	[tail][mb2][head,tail]
+3FA1A3A140	80A1A3A140	[tail][mb2][mb2,unassigned]
+3FA1A3A1A3	80A1A3A1A3	[tail][mb2][mb2]
+3FA1A3FE40	80A1A3FE40	[tail][mb2][mb2]
+3FA1A33F	80A1A3FF	[tail][mb2][bad]
+3FFE4040	80FE4040	[tail][mb2][ascii,tail]
+3FFE403F	80FE4080	[tail][mb2][tail]
+3FFE403F	80FE4081	[tail][mb2][head,tail]
+3FFE40A140	80FE40A140	[tail][mb2][mb2,unassigned]
+3FFE40A1A3	80FE40A1A3	[tail][mb2][mb2]
+3FFE40FE40	80FE40FE40	[tail][mb2][mb2]
+3FFE403F	80FE40FF	[tail][mb2][bad]
+3F3F40	80FF40	[tail][bad][ascii,tail]
+3F3F3F	80FF80	[tail][bad][tail]
+3F3F3F	80FF81	[tail][bad][head,tail]
+3F3FA140	80FFA140	[tail][bad][mb2,unassigned]
+3F3FA1A3	80FFA1A3	[tail][bad][mb2]
+3F3FFE40	80FFFE40	[tail][bad][mb2]
+3F3F3F	80FFFF	[tail][bad][bad]
+81403F	8140FF	[head,tail][ascii,tail][bad]
+81803F	8180FF	[head,tail][tail][bad]
+81813F	8181FF	[head,tail][head,tail][bad]
+81A1403F	81A140FF	[head,tail][mb2,unassigned][bad]
+81A13F3F	81A1A3FF	[head,tail][mb2][bad]
+81FE403F	81FE40FF	[head,tail][mb2][bad]
+3F3F40	81FF40	[head,tail][bad][ascii,tail]
+3F3F3F	81FF80	[head,tail][bad][tail]
+3F3F3F	81FF81	[head,tail][bad][head,tail]
+3F3FA140	81FFA140	[head,tail][bad][mb2,unassigned]
+3F3FA1A3	81FFA1A3	[head,tail][bad][mb2]
+3F3FFE40	81FFFE40	[head,tail][bad][mb2]
+3F3F3F	81FFFF	[head,tail][bad][bad]
+A140403F	A14040FF	[mb2,unassigned][ascii,tail][bad]
+A1403F3F	A14080FF	[mb2,unassigned][tail][bad]
+A1403F3F	A14081FF	[mb2,unassigned][head,tail][bad]
+A140A1403F	A140A140FF	[mb2,unassigned][mb2,unassigned][bad]
+A140A1A33F	A140A1A3FF	[mb2,unassigned][mb2][bad]
+A140FE403F	A140FE40FF	[mb2,unassigned][mb2][bad]
+A1403F40	A140FF40	[mb2,unassigned][bad][ascii,tail]
+A1403F3F	A140FF80	[mb2,unassigned][bad][tail]
+A1403F3F	A140FF81	[mb2,unassigned][bad][head,tail]
+A1403FA140	A140FFA140	[mb2,unassigned][bad][mb2,unassigned]
+A1403FA1A3	A140FFA1A3	[mb2,unassigned][bad][mb2]
+A1403FFE40	A140FFFE40	[mb2,unassigned][bad][mb2]
+A1403F3F	A140FFFF	[mb2,unassigned][bad][bad]
+A1A3403F	A1A340FF	[mb2][ascii,tail][bad]
+A1A33F3F	A1A380FF	[mb2][tail][bad]
+A1A33F3F	A1A381FF	[mb2][head,tail][bad]
+A1A3A1403F	A1A3A140FF	[mb2][mb2,unassigned][bad]
+A1A3A1A33F	A1A3A1A3FF	[mb2][mb2][bad]
+A1A3FE403F	A1A3FE40FF	[mb2][mb2][bad]
+A1A33F40	A1A3FF40	[mb2][bad][ascii,tail]
+A1A33F3F	A1A3FF80	[mb2][bad][tail]
+A1A33F3F	A1A3FF81	[mb2][bad][head,tail]
+A1A33FA140	A1A3FFA140	[mb2][bad][mb2,unassigned]
+A1A33FA1A3	A1A3FFA1A3	[mb2][bad][mb2]
+A1A33FFE40	A1A3FFFE40	[mb2][bad][mb2]
+A1A33F3F	A1A3FFFF	[mb2][bad][bad]
+FE40403F	FE4040FF	[mb2][ascii,tail][bad]
+FE403F3F	FE4080FF	[mb2][tail][bad]
+FE403F3F	FE4081FF	[mb2][head,tail][bad]
+FE40A1403F	FE40A140FF	[mb2][mb2,unassigned][bad]
+FE40A1A33F	FE40A1A3FF	[mb2][mb2][bad]
+FE40FE403F	FE40FE40FF	[mb2][mb2][bad]
+FE403F40	FE40FF40	[mb2][bad][ascii,tail]
+FE403F3F	FE40FF80	[mb2][bad][tail]
+FE403F3F	FE40FF81	[mb2][bad][head,tail]
+FE403FA140	FE40FFA140	[mb2][bad][mb2,unassigned]
+FE403FA1A3	FE40FFA1A3	[mb2][bad][mb2]
+FE403FFE40	FE40FFFE40	[mb2][bad][mb2]
+FE403F3F	FE40FFFF	[mb2][bad][bad]
+3F4040	FF4040	[bad][ascii,tail][ascii,tail]
+3F403F	FF4080	[bad][ascii,tail][tail]
+3F403F	FF4081	[bad][ascii,tail][head,tail]
+3F40A140	FF40A140	[bad][ascii,tail][mb2,unassigned]
+3F40A1A3	FF40A1A3	[bad][ascii,tail][mb2]
+3F40FE40	FF40FE40	[bad][ascii,tail][mb2]
+3F403F	FF40FF	[bad][ascii,tail][bad]
+3F3F40	FF8040	[bad][tail][ascii,tail]
+3F3F3F	FF8080	[bad][tail][tail]
+3F3F3F	FF8081	[bad][tail][head,tail]
+3F3FA140	FF80A140	[bad][tail][mb2,unassigned]
+3F3FA1A3	FF80A1A3	[bad][tail][mb2]
+3F3FFE40	FF80FE40	[bad][tail][mb2]
+3F3F3F	FF80FF	[bad][tail][bad]
+3F8140	FF8140	[bad][head,tail][ascii,tail]
+3F8180	FF8180	[bad][head,tail][tail]
+3F8181	FF8181	[bad][head,tail][head,tail]
+3F81A140	FF81A140	[bad][head,tail][mb2,unassigned]
+3F81A13F	FF81A1A3	[bad][head,tail][mb2]
+3F81FE40	FF81FE40	[bad][head,tail][mb2]
+3F3F3F	FF81FF	[bad][head,tail][bad]
+3FA14040	FFA14040	[bad][mb2,unassigned][ascii,tail]
+3FA1403F	FFA14080	[bad][mb2,unassigned][tail]
+3FA1403F	FFA14081	[bad][mb2,unassigned][head,tail]
+3FA140A140	FFA140A140	[bad][mb2,unassigned][mb2,unassigned]
+3FA140A1A3	FFA140A1A3	[bad][mb2,unassigned][mb2]
+3FA140FE40	FFA140FE40	[bad][mb2,unassigned][mb2]
+3FA1403F	FFA140FF	[bad][mb2,unassigned][bad]
+3FA1A340	FFA1A340	[bad][mb2][ascii,tail]
+3FA1A33F	FFA1A380	[bad][mb2][tail]
+3FA1A33F	FFA1A381	[bad][mb2][head,tail]
+3FA1A3A140	FFA1A3A140	[bad][mb2][mb2,unassigned]
+3FA1A3A1A3	FFA1A3A1A3	[bad][mb2][mb2]
+3FA1A3FE40	FFA1A3FE40	[bad][mb2][mb2]
+3FA1A33F	FFA1A3FF	[bad][mb2][bad]
+3FFE4040	FFFE4040	[bad][mb2][ascii,tail]
+3FFE403F	FFFE4080	[bad][mb2][tail]
+3FFE403F	FFFE4081	[bad][mb2][head,tail]
+3FFE40A140	FFFE40A140	[bad][mb2][mb2,unassigned]
+3FFE40A1A3	FFFE40A1A3	[bad][mb2][mb2]
+3FFE40FE40	FFFE40FE40	[bad][mb2][mb2]
+3FFE403F	FFFE40FF	[bad][mb2][bad]
+3F3F40	FFFF40	[bad][bad][ascii,tail]
+3F3F3F	FFFF80	[bad][bad][tail]
+3F3F3F	FFFF81	[bad][bad][head,tail]
+3F3FA140	FFFFA140	[bad][bad][mb2,unassigned]
+3F3FA1A3	FFFFA1A3	[bad][bad][mb2]
+3F3FFE40	FFFFFE40	[bad][bad][mb2]
+3F3F3F	FFFFFF	[bad][bad][bad]
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that start with an ASCII or an MB2 character,
+# followed by a pure non-ASCII tail, all should be fixed.
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
+AND type2='tail'
+ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\x80@' for column 'c' at row 1
+Warning	1366	Incorrect string value: '\x80\x80' for column 'c' at row 2
+Warning	1366	Incorrect string value: '\x80\x81' for column 'c' at row 3
+Warning	1366	Incorrect string value: '\x80\xA1@' for column 'c' at row 4
+Warning	1366	Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 5
+Warning	1366	Incorrect string value: '\x80\xFE@' for column 'c' at row 6
+Warning	1366	Incorrect string value: '\x80@' for column 'c' at row 7
+Warning	1366	Incorrect string value: '\x80\x80' for column 'c' at row 8
+Warning	1366	Incorrect string value: '\x80\x81' for column 'c' at row 9
+Warning	1366	Incorrect string value: '\x80\xA1@' for column 'c' at row 10
+Warning	1366	Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 11
+Warning	1366	Incorrect string value: '\x80\xFE@' for column 'c' at row 12
+Warning	1366	Incorrect string value: '\x80@' for column 'c' at row 13
+Warning	1366	Incorrect string value: '\x80\x80' for column 'c' at row 14
+Warning	1366	Incorrect string value: '\x80\x81' for column 'c' at row 15
+Warning	1366	Incorrect string value: '\x80\xA1@' for column 'c' at row 16
+Warning	1366	Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 17
+Warning	1366	Incorrect string value: '\x80\xFE@' for column 'c' at row 18
+Warning	1366	Incorrect string value: '\x80@' for column 'c' at row 19
+Warning	1366	Incorrect string value: '\x80\x80' for column 'c' at row 20
+Warning	1366	Incorrect string value: '\x80\x81' for column 'c' at row 21
+Warning	1366	Incorrect string value: '\x80\xA1@' for column 'c' at row 22
+Warning	1366	Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 23
+Warning	1366	Incorrect string value: '\x80\xFE@' for column 'c' at row 24
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+24
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+403F40	408040	[ascii,tail][tail][ascii,tail]
+403F3F	408080	[ascii,tail][tail][tail]
+403F3F	408081	[ascii,tail][tail][head,tail]
+403FA140	4080A140	[ascii,tail][tail][mb2,unassigned]
+403FA1A3	4080A1A3	[ascii,tail][tail][mb2]
+403FFE40	4080FE40	[ascii,tail][tail][mb2]
+A1403F40	A1408040	[mb2,unassigned][tail][ascii,tail]
+A1403F3F	A1408080	[mb2,unassigned][tail][tail]
+A1403F3F	A1408081	[mb2,unassigned][tail][head,tail]
+A1403FA140	A14080A140	[mb2,unassigned][tail][mb2,unassigned]
+A1403FA1A3	A14080A1A3	[mb2,unassigned][tail][mb2]
+A1403FFE40	A14080FE40	[mb2,unassigned][tail][mb2]
+A1A33F40	A1A38040	[mb2][tail][ascii,tail]
+A1A33F3F	A1A38080	[mb2][tail][tail]
+A1A33F3F	A1A38081	[mb2][tail][head,tail]
+A1A33FA140	A1A380A140	[mb2][tail][mb2,unassigned]
+A1A33FA1A3	A1A380A1A3	[mb2][tail][mb2]
+A1A33FFE40	A1A380FE40	[mb2][tail][mb2]
+FE403F40	FE408040	[mb2][tail][ascii,tail]
+FE403F3F	FE408080	[mb2][tail][tail]
+FE403F3F	FE408081	[mb2][tail][head,tail]
+FE403FA140	FE4080A140	[mb2][tail][mb2,unassigned]
+FE403FA1A3	FE4080A1A3	[mb2][tail][mb2]
+FE403FFE40	FE4080FE40	[mb2][tail][mb2]
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that consist of two ASCII or MB2 characters,
+# followed by a pure non-ASCII tail, all should be fixed.
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND
+(FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND
+type3='tail'
+ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 1
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 2
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 3
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 4
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 5
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 6
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 7
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 8
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 9
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 10
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 11
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 12
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 13
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 14
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 15
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 16
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+16
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+40403F	404080	[ascii,tail][ascii,tail][tail]
+40A1403F	40A14080	[ascii,tail][mb2,unassigned][tail]
+40A1A33F	40A1A380	[ascii,tail][mb2][tail]
+40FE403F	40FE4080	[ascii,tail][mb2][tail]
+A140403F	A1404080	[mb2,unassigned][ascii,tail][tail]
+A140A1403F	A140A14080	[mb2,unassigned][mb2,unassigned][tail]
+A140A1A33F	A140A1A380	[mb2,unassigned][mb2][tail]
+A140FE403F	A140FE4080	[mb2,unassigned][mb2][tail]
+A1A3403F	A1A34080	[mb2][ascii,tail][tail]
+A1A3A1403F	A1A3A14080	[mb2][mb2,unassigned][tail]
+A1A3A1A33F	A1A3A1A380	[mb2][mb2][tail]
+A1A3FE403F	A1A3FE4080	[mb2][mb2][tail]
+FE40403F	FE404080	[mb2][ascii,tail][tail]
+FE40A1403F	FE40A14080	[mb2][mb2,unassigned][tail]
+FE40A1A33F	FE40A1A380	[mb2][mb2][tail]
+FE40FE403F	FE40FE4080	[mb2][mb2][tail]
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that consist of two MB2 characters,
+# followed by a non-ASCII head or tail, all should be fixed.
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2)
+AND NOT FIND_IN_SET('ascii',type3)
+AND NOT FIND_IN_SET('mb2',type3)
+ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 1
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 2
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 3
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 4
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 5
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 6
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 7
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 8
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 9
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+9
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+A140A1403F	A140A14081	[mb2,unassigned][mb2,unassigned][head,tail]
+A140A1A33F	A140A1A381	[mb2,unassigned][mb2][head,tail]
+A140FE403F	A140FE4081	[mb2,unassigned][mb2][head,tail]
+A1A3A1403F	A1A3A14081	[mb2][mb2,unassigned][head,tail]
+A1A3A1A33F	A1A3A1A381	[mb2][mb2][head,tail]
+A1A3FE403F	A1A3FE4081	[mb2][mb2][head,tail]
+FE40A1403F	FE40A14081	[mb2][mb2,unassigned][head,tail]
+FE40A1A33F	FE40A1A381	[mb2][mb2][head,tail]
+FE40FE403F	FE40FE4081	[mb2][mb2][head,tail]
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that consist of head + tail + MB2 should go without warnings
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE FIND_IN_SET('head',type1)
+AND FIND_IN_SET('tail',type2)
+AND FIND_IN_SET('mb2',type3)
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+9
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+8140A140	[head,tail][ascii,tail][mb2,unassigned]
+8140A1A3	[head,tail][ascii,tail][mb2]
+8140FE40	[head,tail][ascii,tail][mb2]
+8180A140	[head,tail][tail][mb2,unassigned]
+8180A1A3	[head,tail][tail][mb2]
+8180FE40	[head,tail][tail][mb2]
+8181A140	[head,tail][head,tail][mb2,unassigned]
+8181A1A3	[head,tail][head,tail][mb2]
+8181FE40	[head,tail][head,tail][mb2]
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+#
+# Sequences that consist of (ascii or mb2) + head + tail should go without warnings
+#
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1))
+AND FIND_IN_SET('head',type2)
+AND FIND_IN_SET('tail',type3)
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+12
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+408140	[ascii,tail][head,tail][ascii,tail]
+408180	[ascii,tail][head,tail][tail]
+408181	[ascii,tail][head,tail][head,tail]
+A1408140	[mb2,unassigned][head,tail][ascii,tail]
+A1408180	[mb2,unassigned][head,tail][tail]
+A1408181	[mb2,unassigned][head,tail][head,tail]
+A1A38140	[mb2][head,tail][ascii,tail]
+A1A38180	[mb2][head,tail][tail]
+A1A38181	[mb2][head,tail][head,tail]
+FE408140	[mb2][head,tail][ascii,tail]
+FE408180	[mb2][head,tail][tail]
+FE408181	[mb2][head,tail][head,tail]
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
+Warnings:
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 1
+Warning	1366	Incorrect string value: '\xA3' for column 'c' at row 3
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 5
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 6
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 7
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 9
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 10
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 12
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 13
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 15
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 16
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 18
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 19
+Warning	1366	Incorrect string value: '\xA3' for column 'c' at row 27
+Warning	1366	Incorrect string value: '\x80' for column 'c' at row 30
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 31
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 35
+Warning	1366	Incorrect string value: '\xA3' for column 'c' at row 37
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 39
+Warning	1366	Incorrect string value: '\xA3' for column 'c' at row 41
+Warning	1366	Incorrect string value: '\x81' for column 'c' at row 43
+Warning	1366	Incorrect string value: '\xA3' for column 'c' at row 45
+SELECT COUNT(*) FROM t3;
+COUNT(*)
+46
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+HEX(c)	comment
+4081A140	[ascii,tail][head,tail][mb2,unassigned]
+4081FE40	[ascii,tail][head,tail][mb2]
+814040	[head,tail][ascii,tail][ascii,tail]
+818040	[head,tail][tail][ascii,tail]
+818140	[head,tail][head,tail][ascii,tail]
+81A14040	[head,tail][mb2,unassigned][ascii,tail]
+81A140A140	[head,tail][mb2,unassigned][mb2,unassigned]
+81A140A1A3	[head,tail][mb2,unassigned][mb2]
+81A140FE40	[head,tail][mb2,unassigned][mb2]
+81A1A340	[head,tail][mb2][ascii,tail]
+81A1A380	[head,tail][mb2][tail]
+81A1A381	[head,tail][mb2][head,tail]
+81A1A3A140	[head,tail][mb2][mb2,unassigned]
+81A1A3FE40	[head,tail][mb2][mb2]
+81FE4040	[head,tail][mb2][ascii,tail]
+81FE40A140	[head,tail][mb2][mb2,unassigned]
+81FE40A1A3	[head,tail][mb2][mb2]
+81FE40FE40	[head,tail][mb2][mb2]
+A14081A140	[mb2,unassigned][head,tail][mb2,unassigned]
+A14081FE40	[mb2,unassigned][head,tail][mb2]
+A1A381A140	[mb2][head,tail][mb2,unassigned]
+A1A381FE40	[mb2][head,tail][mb2]
+FE4081A140	[mb2][head,tail][mb2,unassigned]
+FE4081FE40	[mb2][head,tail][mb2]
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+HEX(c)	HEX(b)	comment
+40403F	404081	[ascii,tail][ascii,tail][head,tail]
+4081A13F	4081A1A3	[ascii,tail][head,tail][mb2]
+40A1403F	40A14081	[ascii,tail][mb2,unassigned][head,tail]
+40A1A33F	40A1A381	[ascii,tail][mb2][head,tail]
+40FE403F	40FE4081	[ascii,tail][mb2][head,tail]
+81403F	814080	[head,tail][ascii,tail][tail]
+81403F	814081	[head,tail][ascii,tail][head,tail]
+81803F	818080	[head,tail][tail][tail]
+81803F	818081	[head,tail][tail][head,tail]
+81813F	818180	[head,tail][head,tail][tail]
+81813F	818181	[head,tail][head,tail][head,tail]
+81A1403F	81A14080	[head,tail][mb2,unassigned][tail]
+81A1403F	81A14081	[head,tail][mb2,unassigned][head,tail]
+81A1A3A13F	81A1A3A1A3	[head,tail][mb2][mb2]
+81FE403F	81FE4080	[head,tail][mb2][tail]
+81FE403F	81FE4081	[head,tail][mb2][head,tail]
+A140403F	A1404081	[mb2,unassigned][ascii,tail][head,tail]
+A14081A13F	A14081A1A3	[mb2,unassigned][head,tail][mb2]
+A1A3403F	A1A34081	[mb2][ascii,tail][head,tail]
+A1A381A13F	A1A381A1A3	[mb2][head,tail][mb2]
+FE40403F	FE404081	[mb2][ascii,tail][head,tail]
+FE4081A13F	FE4081A1A3	[mb2][head,tail][mb2]
+DROP TABLE t3;
+DROP TABLE t2;
+DROP TABLE t1;
+#
+# END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
+#
+#
+# End of 10.1 tests
+#
diff --git a/mysql-test/r/ctype_sjis.result b/mysql-test/r/ctype_sjis.result
index 48456c1..b4ef6f8 100644
--- a/mysql-test/r/ctype_sjis.result
+++ b/mysql-test/r/ctype_sjis.result
@@ -477,7 +477,7 @@ Warning	1366	Incorrect string value: '\x80_' for column 'a' at row 64
 SELECT COUNT(*) FROM t1;
 COUNT(*)
 14623
-SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
+SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
 COUNT(*)
 63
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
diff --git a/mysql-test/r/ctype_ujis.result b/mysql-test/r/ctype_ujis.result
index 413ab4e..4074d98 100644
--- a/mysql-test/r/ctype_ujis.result
+++ b/mysql-test/r/ctype_ujis.result
@@ -2626,7 +2626,7 @@ Warning	1366	Incorrect string value: '\x80_' for column 'a' at row 64
 SELECT COUNT(*) FROM t1;
 COUNT(*)
 44671
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 COUNT(*)
 17735
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
@@ -25938,7 +25938,7 @@ CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis);
 INSERT INTO t1 VALUES (0x8EA0);
 SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
 HEX(a)	CHAR_LENGTH(a)
-	0
+3F3F	2
 DROP TABLE t1;
 SELECT _ujis 0x8EA0;
 ERROR HY000: Invalid ujis character string: '8EA0'
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index db0c45e..c09ac36 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -225,7 +225,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 varchar(10) character set utf8);
 insert into t1 values (0x41FF);
@@ -233,7 +233,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 text character set utf8);
 insert into t1 values (0x41FF);
@@ -241,7 +241,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (a text character set utf8, primary key(a(371)));
 ERROR 42000: Specified key was too long; max key length is 1000 bytes
diff --git a/mysql-test/r/ctype_utf8mb4.result b/mysql-test/r/ctype_utf8mb4.result
index 0dc94e9..d8f4eb3 100644
--- a/mysql-test/r/ctype_utf8mb4.result
+++ b/mysql-test/r/ctype_utf8mb4.result
@@ -225,7 +225,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 varchar(10) character set utf8mb4);
 insert into t1 values (0x41FF);
@@ -233,7 +233,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 text character set utf8mb4);
 insert into t1 values (0x41FF);
@@ -241,7 +241,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (a text character set utf8mb4, primary key(a(371)));
 ERROR 42000: Specified key was too long; max key length is 1000 bytes
@@ -2327,7 +2327,7 @@ select hex(utf8mb4) from t1;
 hex(utf8mb4)
 F0908080
 F0BFBFBF
-
+3F
 delete from t1;
 Testing [F2..F3][80..BF][80..BF][80..BF]
 insert into t1 values (0xF2808080);
@@ -2347,7 +2347,7 @@ select hex(utf8mb4) from t1;
 hex(utf8mb4)
 F4808080
 F48F8080
-
+3F
 drop table t1;
 #
 # Check strnxfrm() with odd length
@@ -2472,45 +2472,45 @@ F3A087AFEA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
-EA9DA8
+3F3F3F3FEA9DA8
 SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
 HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
 F09D8480EA9DA8
 F09D8480EFB9AB
-F09D8480
+F09D84803F3F3F3F
 F09D849EEA9DA8
 F09D849EEFB9AB
-F09D849E
+F09D849E3F3F3F3F
 F09D859EEA9DA8
 F09D859EEFB9AB
-F09D859E
+F09D859E3F3F3F3F
 F09D878FEA9DA8
 F09D878FEFB9AB
-F09D878F
+F09D878F3F3F3F3F
 F09D9C9FEA9DA8
 F09D9C9FEFB9AB
-F09D9C9F
+F09D9C9F3F3F3F3F
 F09D9E9FEA9DA8
 F09D9E9FEFB9AB
-F09D9E9F
+F09D9E9F3F3F3F3F
 F48FBFBFEA9DA8
 F48FBFBFEFB9AB
-F48FBFBF
+F48FBFBF3F3F3F3F
 F3A087AFEA9DA8
 F3A087AFEFB9AB
-F3A087AF
+F3A087AF3F3F3F3F
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
-EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
+EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
-EA9DA8
-EFB9AB
-
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
+3F3F3F3FEA9DA8
+3F3F3F3FEFB9AB
+3F3F3F3F3F3F3F3F
 SELECT count(*) FROM t1, t2
 WHERE t1.utf8mb4_encoding > t2.utf8mb3_encoding;
 count(*)
@@ -2547,7 +2547,7 @@ u_decimal	hex(utf8mb4_encoding)	utf8mb4_encoding
 119070	3F3F3F3F3F3F3F3F3F3F	??????????
 65131	EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB	﹫?????﹫﹫?﹫
 119070	3F3F3F3F3F3F3F3F3F3F	??????????
-1114111		
+1114111	3F3F3F3F	????
 ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb4;
 SHOW CREATE TABLE t2;
 Table	Create Table
@@ -2559,7 +2559,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
 42856	EA9DA8
 65131	EFB9AB
-1114111	
+1114111	3F3F3F3F
 ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
 SHOW CREATE TABLE t2;
 Table	Create Table
@@ -2571,7 +2571,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
 42856	EA9DA8
 65131	EFB9AB
-1114111	
+1114111	3F3F3F3F
 ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
 SHOW CREATE TABLE t1;
 Table	Create Table
@@ -2592,7 +2592,7 @@ u_decimal	hex(utf8mb4_encoding)
 119070	3F3F3F3F3F3F3F3F3F3F
 65131	EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB
 119070	3F3F3F3F3F3F3F3F3F3F
-1114111	
+1114111	3F3F3F3F
 ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb4;
 SHOW CREATE TABLE t1;
 Table	Create Table
@@ -2613,7 +2613,7 @@ u_decimal	hex(utf8mb4_encoding)
 119070	3F3F3F3F3F3F3F3F3F3F
 65131	EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB
 119070	3F3F3F3F3F3F3F3F3F3F
-1114111	
+1114111	3F3F3F3F
 ALTER TABLE t2 MODIFY utf8mb3_encoding VARCHAR(10) CHARACTER SET utf8mb4;
 SHOW CREATE TABLE t2;
 Table	Create Table
@@ -2625,7 +2625,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
 42856	EA9DA8
 65131	EFB9AB
-1114111	
+1114111	3F3F3F3F
 DROP TABLE IF EXISTS t3;
 CREATE TABLE t3 (
 u_decimal int NOT NULL,
@@ -3306,5 +3306,53 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
 # End of 5.6 tests
 #
 #
+# Start of 10.0 tests
+#
+#
+# MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
+#
+#
+# This test sets session character set to 3-byte utf8,
+# but then sends a 4-byte sequence (which is wrong for 3-byte utf8).
+# It should be replaced to four question marks: '????' in both columns
+# (i.e. four unknown bytes are replaced to four question marks),
+# then the rest of the string should be stored, so we get 'a ???? b'.
+#
+SET NAMES utf8;
+CREATE TABLE t1 (
+a VARCHAR(32) CHARACTER SET utf8mb4,
+b VARCHAR(32) CHARACTER SET utf8
+);
+INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'a' at row 1
+Warning	1366	Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'b' at row 1
+SELECT * FROM t1;
+a	b
+a ???? b	a ???? b
+DROP TABLE t1;
+#
+# This test sets session character set to 4-byte utf8,
+# then normally sends a 4-byte sequence.
+# It should be stored AS IS into the utf8mb4 column (a),
+# and should be replaced to a single question mark in the utf8 column (b)
+# (i.e. one character that cannot be converted is replaced to one question mark).
+#
+SET NAMES utf8mb4;
+CREATE TABLE t1 (
+a VARCHAR(32) CHARACTER SET utf8mb4,
+b VARCHAR(32) CHARACTER SET utf8
+);
+INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
+Warnings:
+Warning	1366	Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'b' at row 1
+SELECT * FROM t1;
+a	b
+a 😁 b	a ? b
+DROP TABLE t1;
+#
+# End of 10.0 tests
+#
+#
 # End of tests
 #
diff --git a/mysql-test/r/ctype_utf8mb4_heap.result b/mysql-test/r/ctype_utf8mb4_heap.result
index 57d29a2..7f5125a 100644
--- a/mysql-test/r/ctype_utf8mb4_heap.result
+++ b/mysql-test/r/ctype_utf8mb4_heap.result
@@ -225,7 +225,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 varchar(10) character set utf8mb4) engine heap;
 insert into t1 values (0x41FF);
@@ -233,7 +233,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 CREATE TABLE t1 ( a varchar(10) ) CHARACTER SET utf8mb4 ENGINE heap;
 INSERT INTO t1 VALUES ( 'test' );
@@ -2157,7 +2157,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
 select hex(utf8mb4) from t1;
 hex(utf8mb4)
-
+3F
 F0908080
 F0BFBFBF
 delete from t1;
@@ -2177,7 +2177,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
 select hex(utf8mb4) from t1;
 hex(utf8mb4)
-
+3F
 F4808080
 F48F8080
 drop table t1;
@@ -2274,7 +2274,7 @@ Warning	1366	Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
 UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
 SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
 HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
-EA9DA8
+3F3F3F3FEA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 F09D8480EA9DA8
 F09D849EEA9DA8
@@ -2288,40 +2288,40 @@ F3A087AFEA9DA8
 F48FBFBFEA9DA8
 SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
 HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
-
-EA9DA8
-EFB9AB
-EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
+3F3F3F3F3F3F3F3F
+3F3F3F3FEA9DA8
+3F3F3F3FEFB9AB
+EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
-F09D8480
+F09D84803F3F3F3F
 F09D8480EA9DA8
 F09D8480EFB9AB
-F09D849E
+F09D849E3F3F3F3F
 F09D849EEA9DA8
 F09D849EEFB9AB
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
-F09D859E
+F09D859E3F3F3F3F
 F09D859EEA9DA8
 F09D859EEFB9AB
-F09D878F
+F09D878F3F3F3F3F
 F09D878FEA9DA8
 F09D878FEFB9AB
-F09D9C9F
+F09D9C9F3F3F3F3F
 F09D9C9FEA9DA8
 F09D9C9FEFB9AB
-F09D9E9F
+F09D9E9F3F3F3F3F
 F09D9E9FEA9DA8
 F09D9E9FEFB9AB
-F3A087AF
+F3A087AF3F3F3F3F
 F3A087AFEA9DA8
 F3A087AFEFB9AB
-F48FBFBF
+F48FBFBF3F3F3F3F
 F48FBFBFEA9DA8
 F48FBFBFEFB9AB
 SELECT count(*) FROM t1, t2
@@ -2337,8 +2337,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
 u_decimal	hex(utf8mb4_encoding)	utf8mb4_encoding
-1114111		
 1114111	3F	?
+1114111	3F3F3F3F	????
 119040	3F	?
 119070	3F	?
 119070	3F3F3F3F3F3F3F3F3F3F	??????????
@@ -2358,7 +2358,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8mb4
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
@@ -2370,7 +2370,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
@@ -2382,8 +2382,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
 u_decimal	hex(utf8mb4_encoding)
-1114111	
 1114111	3F
+1114111	3F3F3F3F
 119040	3F
 119070	3F
 119070	3F3F3F3F3F3F3F3F3F3F
@@ -2403,8 +2403,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
 u_decimal	hex(utf8mb4_encoding)
-1114111	
 1114111	3F
+1114111	3F3F3F3F
 119040	3F
 119070	3F
 119070	3F3F3F3F3F3F3F3F3F3F
@@ -2424,7 +2424,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 DROP TABLE IF EXISTS t3;
diff --git a/mysql-test/r/ctype_utf8mb4_innodb.result b/mysql-test/r/ctype_utf8mb4_innodb.result
index ba03a3f..053e6de 100644
--- a/mysql-test/r/ctype_utf8mb4_innodb.result
+++ b/mysql-test/r/ctype_utf8mb4_innodb.result
@@ -225,7 +225,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 varchar(10) character set utf8mb4) engine InnoDB;
 insert into t1 values (0x41FF);
@@ -233,7 +233,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 text character set utf8mb4) engine InnoDB;
 insert into t1 values (0x41FF);
@@ -241,7 +241,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (a text character set utf8mb4, primary key(a(371))) engine InnoDB;
 ERROR 42000: Specified key was too long; max key length is 767 bytes
@@ -2285,7 +2285,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
 select hex(utf8mb4) from t1;
 hex(utf8mb4)
-
+3F
 F0908080
 F0BFBFBF
 delete from t1;
@@ -2305,7 +2305,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
 select hex(utf8mb4) from t1;
 hex(utf8mb4)
-
+3F
 F4808080
 F48F8080
 drop table t1;
@@ -2421,7 +2421,7 @@ Warning	1366	Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
 UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
 SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
 HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
-EA9DA8
+3F3F3F3FEA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 F09D8480EA9DA8
 F09D849EEA9DA8
@@ -2435,40 +2435,40 @@ F3A087AFEA9DA8
 F48FBFBFEA9DA8
 SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
 HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
-
-EA9DA8
-EFB9AB
-EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
+3F3F3F3F3F3F3F3F
+3F3F3F3FEA9DA8
+3F3F3F3FEFB9AB
+EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
-F09D8480
+F09D84803F3F3F3F
 F09D8480EA9DA8
 F09D8480EFB9AB
-F09D849E
+F09D849E3F3F3F3F
 F09D849EEA9DA8
 F09D849EEFB9AB
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
-F09D859E
+F09D859E3F3F3F3F
 F09D859EEA9DA8
 F09D859EEFB9AB
-F09D878F
+F09D878F3F3F3F3F
 F09D878FEA9DA8
 F09D878FEFB9AB
-F09D9C9F
+F09D9C9F3F3F3F3F
 F09D9C9FEA9DA8
 F09D9C9FEFB9AB
-F09D9E9F
+F09D9E9F3F3F3F3F
 F09D9E9FEA9DA8
 F09D9E9FEFB9AB
-F3A087AF
+F3A087AF3F3F3F3F
 F3A087AFEA9DA8
 F3A087AFEFB9AB
-F48FBFBF
+F48FBFBF3F3F3F3F
 F48FBFBFEA9DA8
 F48FBFBFEFB9AB
 SELECT count(*) FROM t1, t2
@@ -2484,8 +2484,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
 u_decimal	hex(utf8mb4_encoding)	utf8mb4_encoding
-1114111		
 1114111	3F	?
+1114111	3F3F3F3F	????
 119040	3F	?
 119070	3F	?
 119070	3F3F3F3F3F3F3F3F3F3F	??????????
@@ -2505,7 +2505,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
@@ -2517,7 +2517,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
@@ -2529,8 +2529,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
 u_decimal	hex(utf8mb4_encoding)
-1114111	
 1114111	3F
+1114111	3F3F3F3F
 119040	3F
 119070	3F
 119070	3F3F3F3F3F3F3F3F3F3F
@@ -2550,8 +2550,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
 u_decimal	hex(utf8mb4_encoding)
-1114111	
 1114111	3F
+1114111	3F3F3F3F
 119040	3F
 119070	3F
 119070	3F3F3F3F3F3F3F3F3F3F
@@ -2571,7 +2571,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 DROP TABLE IF EXISTS t3;
diff --git a/mysql-test/r/ctype_utf8mb4_myisam.result b/mysql-test/r/ctype_utf8mb4_myisam.result
index c4ff8e0..5bfdfe8 100644
--- a/mysql-test/r/ctype_utf8mb4_myisam.result
+++ b/mysql-test/r/ctype_utf8mb4_myisam.result
@@ -225,7 +225,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 varchar(10) character set utf8mb4) engine MyISAM;
 insert into t1 values (0x41FF);
@@ -233,7 +233,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (s1 text character set utf8mb4) engine MyISAM;
 insert into t1 values (0x41FF);
@@ -241,7 +241,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
-41
+413F
 drop table t1;
 create table t1 (a text character set utf8mb4, primary key(a(371))) engine MyISAM;
 ERROR 42000: Specified key was too long; max key length is 1000 bytes
@@ -2285,7 +2285,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
 select hex(utf8mb4) from t1;
 hex(utf8mb4)
-
+3F
 F0908080
 F0BFBFBF
 delete from t1;
@@ -2305,7 +2305,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
 select hex(utf8mb4) from t1;
 hex(utf8mb4)
-
+3F
 F4808080
 F48F8080
 drop table t1;
@@ -2421,7 +2421,7 @@ Warning	1366	Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
 UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
 SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
 HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
-EA9DA8
+3F3F3F3FEA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 F09D8480EA9DA8
 F09D849EEA9DA8
@@ -2435,40 +2435,40 @@ F3A087AFEA9DA8
 F48FBFBFEA9DA8
 SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
 HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
-
-EA9DA8
-EFB9AB
-EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
+3F3F3F3F3F3F3F3F
+3F3F3F3FEA9DA8
+3F3F3F3FEFB9AB
+EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
 EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
-F09D8480
+F09D84803F3F3F3F
 F09D8480EA9DA8
 F09D8480EFB9AB
-F09D849E
+F09D849E3F3F3F3F
 F09D849EEA9DA8
 F09D849EEFB9AB
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
-F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
+F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
 F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
-F09D859E
+F09D859E3F3F3F3F
 F09D859EEA9DA8
 F09D859EEFB9AB
-F09D878F
+F09D878F3F3F3F3F
 F09D878FEA9DA8
 F09D878FEFB9AB
-F09D9C9F
+F09D9C9F3F3F3F3F
 F09D9C9FEA9DA8
 F09D9C9FEFB9AB
-F09D9E9F
+F09D9E9F3F3F3F3F
 F09D9E9FEA9DA8
 F09D9E9FEFB9AB
-F3A087AF
+F3A087AF3F3F3F3F
 F3A087AFEA9DA8
 F3A087AFEFB9AB
-F48FBFBF
+F48FBFBF3F3F3F3F
 F48FBFBFEA9DA8
 F48FBFBFEFB9AB
 SELECT count(*) FROM t1, t2
@@ -2484,8 +2484,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=MyISAM DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
 u_decimal	hex(utf8mb4_encoding)	utf8mb4_encoding
-1114111		
 1114111	3F	?
+1114111	3F3F3F3F	????
 119040	3F	?
 119070	3F	?
 119070	3F3F3F3F3F3F3F3F3F3F	??????????
@@ -2505,7 +2505,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
@@ -2517,7 +2517,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MyISAM DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
@@ -2529,8 +2529,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=MyISAM DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
 u_decimal	hex(utf8mb4_encoding)
-1114111	
 1114111	3F
+1114111	3F3F3F3F
 119040	3F
 119070	3F
 119070	3F3F3F3F3F3F3F3F3F3F
@@ -2550,8 +2550,8 @@ t1	CREATE TABLE `t1` (
 ) ENGINE=MyISAM DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
 u_decimal	hex(utf8mb4_encoding)
-1114111	
 1114111	3F
+1114111	3F3F3F3F
 119040	3F
 119070	3F
 119070	3F3F3F3F3F3F3F3F3F3F
@@ -2571,7 +2571,7 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MyISAM DEFAULT CHARSET=utf8
 SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
 u_decimal	hex(utf8mb3_encoding)
-1114111	
+1114111	3F3F3F3F
 42856	EA9DA8
 65131	EFB9AB
 DROP TABLE IF EXISTS t3;
diff --git a/mysql-test/suite/funcs_2/include/check_charset.inc b/mysql-test/suite/funcs_2/include/check_charset.inc
index df4a58d..0242d43 100644
--- a/mysql-test/suite/funcs_2/include/check_charset.inc
+++ b/mysql-test/suite/funcs_2/include/check_charset.inc
@@ -22,13 +22,15 @@ SHOW TABLE STATUS LIKE 't1';
 
 --disable_warnings
 --disable_query_log
+ALTER TABLE test.t1 ADD code VARCHAR(16) NOT NULL;
 let $1= 221;
 while ($1)
 {
- eval INSERT INTO test.t1 VALUES(CHAR(254-$1));
+ eval INSERT INTO test.t1 VALUES(CHAR(254-$1), HEX(254-$1));
  dec $1;
 }
 DELETE FROM test.t1 WHERE CHAR_LENGTH(a) <> 1;
+DELETE FROM test.t1 WHERE a='?' AND code<>'3F';
 --enable_query_log
 --enable_warnings
 
diff --git a/mysql-test/suite/innodb/r/innodb-update-insert.result b/mysql-test/suite/innodb/r/innodb-update-insert.result
index cd0fed1..034a63b 100644
--- a/mysql-test/suite/innodb/r/innodb-update-insert.result
+++ b/mysql-test/suite/innodb/r/innodb-update-insert.result
@@ -30,7 +30,7 @@ Warnings:
 Warning	1366	Incorrect string value: '\xA3' for column 'f1' at row 1
 select f1 from t1;
 f1
-
+?
 update t1 set f1=0x6a;
 update t1 set f3=repeat(0xb1,8103);
 update t1 set f1=0x4a;
@@ -39,5 +39,5 @@ Warnings:
 Warning	1366	Incorrect string value: '\x82' for column 'f1' at row 1
 select f1 from t1;
 f1
-
+?
 drop table t1;
diff --git a/mysql-test/t/ctype_big5.test b/mysql-test/t/ctype_big5.test
index 5c0bdff..46bb295 100644
--- a/mysql-test/t/ctype_big5.test
+++ b/mysql-test/t/ctype_big5.test
@@ -121,7 +121,7 @@ DROP TEMPORARY TABLE head, tail;
 SHOW CREATE TABLE t1;
 SELECT COUNT(*) FROM t1;
 UPDATE t1 SET a=unhex(code) ORDER BY code;
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 #
 # Display all characters that have upper or lower case mapping.
 #
diff --git a/mysql-test/t/ctype_cp932_binlog_stm.test b/mysql-test/t/ctype_cp932_binlog_stm.test
index 304c9f5..1b92006 100644
--- a/mysql-test/t/ctype_cp932_binlog_stm.test
+++ b/mysql-test/t/ctype_cp932_binlog_stm.test
@@ -99,7 +99,7 @@ DROP TEMPORARY TABLE head, tail;
 SHOW CREATE TABLE t1;
 UPDATE t1 SET a=unhex(code) ORDER BY code;
 SELECT COUNT(*) FROM t1;
-SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
+SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
 #
 # Display all characters that have upper or lower case mapping.
diff --git a/mysql-test/t/ctype_eucjpms.test b/mysql-test/t/ctype_eucjpms.test
index 49ca818..2dd806e 100644
--- a/mysql-test/t/ctype_eucjpms.test
+++ b/mysql-test/t/ctype_eucjpms.test
@@ -446,6 +446,7 @@ SHOW CREATE TABLE t1;
 UPDATE t1 SET a=unhex(code) ORDER BY code;
 SELECT COUNT(*) FROM t1;
 SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'' AND a<>'?';
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
 SELECT * FROM t1 WHERE CHAR_LENGTH(a)=2;
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;
diff --git a/mysql-test/t/ctype_euckr.test b/mysql-test/t/ctype_euckr.test
index 155b8eb..33b3e96 100644
--- a/mysql-test/t/ctype_euckr.test
+++ b/mysql-test/t/ctype_euckr.test
@@ -95,8 +95,8 @@ WHERE t11.a >= 0x81 AND t11.a <= 0xFE
 AND   t12.a >= 0x41 AND t12.a <= 0xFE
 ORDER BY t11.a, t12.a;
 --enable_warnings
-SELECT s as bad_code FROM t2 WHERE a='' ORDER BY s;
-DELETE FROM t2 WHERE a='';
+SELECT s as bad_code FROM t2 WHERE a='?' ORDER BY s;
+DELETE FROM t2 WHERE a='?';
 ALTER TABLE t2 ADD u VARCHAR(1) CHARACTER SET utf8, ADD a2 VARCHAR(1) CHARACTER SET euckr;
 --disable_warnings
 UPDATE t2 SET u=a, a2=u;
@@ -145,7 +145,7 @@ ORDER BY head, tail;
 DROP TEMPORARY TABLE head, tail;
 SHOW CREATE TABLE t1;
 UPDATE t1 SET a=unhex(code) ORDER BY code;
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 #
 # Display all characters that have upper or lower case mapping.
 #
diff --git a/mysql-test/t/ctype_gb2312.test b/mysql-test/t/ctype_gb2312.test
index e3dd448..3ca6941 100644
--- a/mysql-test/t/ctype_gb2312.test
+++ b/mysql-test/t/ctype_gb2312.test
@@ -69,7 +69,7 @@ ORDER BY head, tail;
 DROP TEMPORARY TABLE head, tail;
 SHOW CREATE TABLE t1;
 UPDATE t1 SET a=unhex(code) ORDER BY code;
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 #
 # Display all characters that have upper or lower case mapping.
 #
diff --git a/mysql-test/t/ctype_gbk.test b/mysql-test/t/ctype_gbk.test
index d44009b..d98be88 100644
--- a/mysql-test/t/ctype_gbk.test
+++ b/mysql-test/t/ctype_gbk.test
@@ -104,7 +104,7 @@ ORDER BY head, tail;
 DROP TEMPORARY TABLE head, tail;
 SHOW CREATE TABLE t1;
 UPDATE t1 SET a=unhex(code) ORDER BY code;
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 #
 # Display all characters that have upper or lower case mapping.
 #
@@ -203,3 +203,228 @@ SET NAMES gbk;
 --echo #
 --echo # End of 10.0 tests
 --echo #
+
+
+--echo #
+--echo # Start of 10.1 tests
+--echo #
+
+--echo #
+--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
+--echo #
+
+CREATE TABLE t1 (
+  id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+  b VARBINARY(16),
+  type SET('ascii','bad','head','tail','mb2','unassigned')
+);
+INSERT INTO t1 (b, type) VALUES (0x40,   'ascii,tail');
+INSERT INTO t1 (b, type) VALUES (0x80,   'tail');
+INSERT INTO t1 (b, type) VALUES (0x81,   'head,tail');
+INSERT INTO t1 (b, type) VALUES (0xFF,   'bad');
+INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned');
+INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2');
+INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2');
+CREATE TABLE t2 AS SELECT
+  CONCAT(t1.b,t2.b) AS b,
+  t1.type AS type1,
+  t2.type AS type2,
+  CONCAT('[',t1.type,'][',t2.type,']') AS comment
+FROM t1, t1 t2;
+
+CREATE TABLE t3
+(
+  b VARBINARY(16),
+  c VARCHAR(16) CHARACTER SET gbk,
+  comment VARCHAR(128)
+);
+--echo #
+--echo # A combination of two valid characters, should give no warnings
+--echo # 
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE
+  (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
+  (FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2))
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+--echo #
+--echo # Sequences that start with a tail or a bad byte,
+--echo # or end with a bad byte, all should be fixed.
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE type1='tail' OR type1='bad' OR type2='bad'
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+--echo #
+--echo # Sequences that start with an ASCII or an MB2 character,
+--echo # followed by a non-ASCII tail, all should be fixed.
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
+      AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2))
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+--echo #
+--echo # Other sequences
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t3;
+DROP TABLE t3;
+DROP TABLE t2;
+
+CREATE TABLE t2 AS SELECT
+  CONCAT(t1.b,t2.b,t3.b) AS b,
+  t1.type AS type1,
+  t2.type AS type2,
+  t3.type AS type3,
+  CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment
+FROM t1, t1 t2,t1 t3;
+SELECT COUNT(*) FROM t2;
+
+CREATE TABLE t3
+(
+  b VARBINARY(16),
+  c VARCHAR(16) CHARACTER SET gbk,
+  comment VARCHAR(128)
+);
+
+--echo #
+--echo # A combination of three valid characters, should give no warnings
+--echo # 
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE
+  (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
+  (FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND
+  (FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3)) 
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+--echo #
+--echo # Sequences that start with a tail or a bad byte,
+--echo # or have a bad byte, all should be fixed.
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad'
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+--echo #
+--echo # Sequences that start with an ASCII or an MB2 character,
+--echo # followed by a pure non-ASCII tail, all should be fixed.
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
+      AND type2='tail'
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+--echo #
+--echo # Sequences that consist of two ASCII or MB2 characters,
+--echo # followed by a pure non-ASCII tail, all should be fixed.
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND
+      (FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND
+      type3='tail'
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+
+--echo #
+--echo # Sequences that consist of two MB2 characters,
+--echo # followed by a non-ASCII head or tail, all should be fixed.
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2)
+      AND NOT FIND_IN_SET('ascii',type3)
+      AND NOT FIND_IN_SET('mb2',type3)
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+
+--echo #
+--echo # Sequences that consist of head + tail + MB2 should go without warnings
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE FIND_IN_SET('head',type1)
+  AND FIND_IN_SET('tail',type2)
+  AND FIND_IN_SET('mb2',type3)
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+--echo #
+--echo # Sequences that consist of (ascii or mb2) + head + tail should go without warnings
+--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
+WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1))
+  AND FIND_IN_SET('head',type2)
+  AND FIND_IN_SET('tail',type3)
+ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
+DELETE FROM t3;
+
+
+#--echo #
+#--echo # Other sequences
+#--echo #
+INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
+SELECT COUNT(*) FROM t3;
+SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
+SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
+
+DROP TABLE t3;
+DROP TABLE t2;
+DROP TABLE t1;
+
+--echo #
+--echo # END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
+--echo #
+
+--echo #
+--echo # End of 10.1 tests
+--echo #
diff --git a/mysql-test/t/ctype_sjis.test b/mysql-test/t/ctype_sjis.test
index ae110b2..2777cf6 100644
--- a/mysql-test/t/ctype_sjis.test
+++ b/mysql-test/t/ctype_sjis.test
@@ -145,7 +145,7 @@ DROP TEMPORARY TABLE head, tail;
 SHOW CREATE TABLE t1;
 UPDATE t1 SET a=unhex(code) ORDER BY code;
 SELECT COUNT(*) FROM t1;
-SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
+SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
 #
 # Display all characters that have upper or lower case mapping.
diff --git a/mysql-test/t/ctype_ujis.test b/mysql-test/t/ctype_ujis.test
index 48dc0e6..94fc7ff 100644
--- a/mysql-test/t/ctype_ujis.test
+++ b/mysql-test/t/ctype_ujis.test
@@ -1276,7 +1276,7 @@ SHOW CREATE TABLE t1;
 
 UPDATE t1 SET a=unhex(code) ORDER BY code;
 SELECT COUNT(*) FROM t1;
-SELECT COUNT(*) FROM t1 WHERE a<>'';
+SELECT COUNT(*) FROM t1 WHERE a<>'?';
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
 SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;
 #
diff --git a/mysql-test/t/ctype_utf8mb4.test b/mysql-test/t/ctype_utf8mb4.test
index 7a3c67b..232dd8f 100644
--- a/mysql-test/t/ctype_utf8mb4.test
+++ b/mysql-test/t/ctype_utf8mb4.test
@@ -1832,6 +1832,50 @@ set @@collation_connection=utf8mb4_bin;
 --echo # End of 5.6 tests
 --echo #
 
+--echo #
+--echo # Start of 10.0 tests
+--echo #
+
+--echo #
+--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
+--echo #
+
+--echo #
+--echo # This test sets session character set to 3-byte utf8,
+--echo # but then sends a 4-byte sequence (which is wrong for 3-byte utf8).
+--echo # It should be replaced to four question marks: '????' in both columns
+--echo # (i.e. four unknown bytes are replaced to four question marks),
+--echo # then the rest of the string should be stored, so we get 'a ???? b'.
+--echo #
+SET NAMES utf8;
+CREATE TABLE t1 (
+  a VARCHAR(32) CHARACTER SET utf8mb4,
+  b VARCHAR(32) CHARACTER SET utf8
+);
+INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # This test sets session character set to 4-byte utf8,
+--echo # then normally sends a 4-byte sequence.
+--echo # It should be stored AS IS into the utf8mb4 column (a),
+--echo # and should be replaced to a single question mark in the utf8 column (b)
+--echo # (i.e. one character that cannot be converted is replaced to one question mark).
+--echo #
+
+SET NAMES utf8mb4;
+CREATE TABLE t1 (
+  a VARCHAR(32) CHARACTER SET utf8mb4,
+  b VARCHAR(32) CHARACTER SET utf8
+);
+INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # End of 10.0 tests
+--echo #
 
 --echo #
 --echo # End of tests
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 9fb462e..a0b6395 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -922,8 +922,8 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
       my_charset_same(from_cs, to_cs))
   {
     m_cannot_convert_error_pos= NULL;
-    return to_cs->cset->copy_abort(to_cs, to, to_length, from, from_length,
-                                   nchars, this);
+    return to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length,
+                                 nchars, this);
   }
   else
   {
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index d631bd0..569abfa 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -50,7 +50,7 @@
 
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _big5
 #define IS_MB2_CHAR(x,y)      (isbig5head(x) && isbig5tail(y))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -6843,6 +6843,9 @@ static int func_uni_big5_onechar(int code){
   if (s+2>e)
     return MY_CS_TOOSMALL2;
 
+  if (!IS_MB2_CHAR(hi, s[1]))
+    return MY_CS_ILSEQ;
+
   if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
     return -2;
   
@@ -6894,6 +6897,9 @@ static int func_uni_big5_onechar(int code){
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_big5,
+  my_well_formed_char_length_big5,
+  my_copy_fix_mb,
   my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 6b53b34..dd9e1f8 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -549,6 +549,9 @@ uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_8bit,
+  my_well_formed_char_length_8bit,
+  my_copy_8bit,
   my_copy_8bit,
 };
 
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 13129a6..f678d63 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -186,7 +186,7 @@
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _cp932
 #define IS_8BIT_CHAR(x)       iscp932kata(x)
 #define IS_MB2_CHAR(x,y)      (iscp932head(x) && iscp932tail(y))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -34765,6 +34765,9 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_cp932,
+  my_well_formed_char_length_cp932,
+  my_copy_fix_mb,
   my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index eab9539..0a94aec 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -204,7 +204,7 @@
 
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _euckr
 #define IS_MB2_CHAR(x,y)      (iseuc_kr_head(x) && iseuc_kr_tail(y))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -9928,6 +9928,9 @@ static int func_uni_ksc5601_onechar(int code){
   if (s+2>e)
     return MY_CS_TOOSMALL2;
   
+  if (!IS_MB2_CHAR(hi, s[1]))
+    return MY_CS_ILSEQ;
+  
   if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
     return -2;
   
@@ -9979,6 +9982,9 @@ static int func_uni_ksc5601_onechar(int code){
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_euckr,
+  my_well_formed_char_length_euckr,
+  my_copy_fix_mb,
   my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 52873c2..3832b44 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -198,7 +198,7 @@
 #define IS_MB2_KATA(x,y)      (iseucjpms_ss2(x) && iskata(y))
 #define IS_MB2_CHAR(x,y)      (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
 #define IS_MB3_CHAR(x,y,z)    (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -67511,6 +67511,9 @@ size_t my_numcells_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
     my_strtoll10_8bit,
     my_strntoull10rnd_8bit,
     my_scan_8bit,
+    my_charlen_eucjpms,
+    my_well_formed_char_length_eucjpms,
+    my_copy_fix_mb,
     my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index a4268b8..0478b94 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -167,7 +167,7 @@
 
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _gb2312
 #define IS_MB2_CHAR(x,y)      (isgb2312head(x) && isgb2312tail(y))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -6330,7 +6330,10 @@ static int func_uni_gb2312_onechar(int code){
   
   if (s+2>e)
     return MY_CS_TOOSMALL2;
-  
+
+  if (!IS_MB2_CHAR(hi, s[1]))  
+    return MY_CS_ILSEQ;
+
   if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
     return -2;
   
@@ -6382,6 +6385,9 @@ static int func_uni_gb2312_onechar(int code){
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_gb2312,
+  my_well_formed_char_length_gb2312,
+  my_copy_fix_mb,
   my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 392fdb4..9016093 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -45,7 +45,7 @@
 
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _gbk
 #define IS_MB2_CHAR(x,y)      (isgbkhead(x) && isgbktail(y))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -10724,6 +10724,9 @@ static int func_uni_gbk_onechar(int code){
   if (s+2>e)
     return MY_CS_TOOSMALL2;
     
+  if (!IS_MB2_CHAR(hi, s[1]))
+    return MY_CS_ILSEQ;
+  
   if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
     return -2;
   
@@ -10776,6 +10779,9 @@ static int func_uni_gbk_onechar(int code){
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_gbk,
+  my_well_formed_char_length_gbk,
+  my_copy_fix_mb,
   my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 099f034..80852cc 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -422,6 +422,9 @@ int my_wc_mb_latin1(CHARSET_INFO *cs  __attribute__((unused)),
     my_strtoll10_8bit,
     my_strntoull10rnd_8bit,
     my_scan_8bit,
+    my_charlen_8bit,
+    my_well_formed_char_length_8bit,
+    my_copy_8bit,
     my_copy_8bit,
 };
 
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index fc41563..b380788 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -446,6 +446,99 @@ size_t my_well_formed_len_mb(CHARSET_INFO *cs, const char *b, const char *e,
 }
 
 
+/*
+  Append a badly formed piece of string.
+  Bad bytes are fixed to '?'.
+  
+  @param to        The destination string
+  @param to_end    The end of the destination string
+  @param from      The source string
+  @param from_end  The end of the source string
+  @param nchars    Write not more than "nchars" characters.
+  @param status    Copying status, must be previously initialized,
+                   e.g. using well_formed_char_length() on the original
+                   full source string.
+*/
+static size_t
+my_append_fix_badly_formed_tail(CHARSET_INFO *cs,
+                                char *to, char *to_end,
+                                const char *from, const char *from_end,
+                                size_t nchars,
+                                MY_STRCOPY_STATUS *status)
+{
+  char *to0= to;
+
+  for ( ; nchars; nchars--)
+  {
+    int chlen;
+    if ((chlen= cs->cset->charlen(cs, (const uchar*) from,
+                                      (const uchar *) from_end)) > 0)
+    {
+      /* Found a valid character */         /* chlen == 1..MBMAXLEN  */
+      DBUG_ASSERT(chlen <= (int) cs->mbmaxlen);
+      if (to + chlen > to_end)
+        goto end;                           /* Does not fit to "to" */
+      memcpy(to, from, (size_t) chlen);
+      from+= chlen;
+      to+= chlen;
+      continue;
+    }
+    if (chlen == MY_CS_ILSEQ)              /* chlen == 0 */
+    {
+      DBUG_ASSERT(from < from_end);  /* Shouldn't get MY_CS_ILSEQ if empty */
+      goto bad;
+    }
+    /* Got an incomplete character */       /* chlen == MY_CS_TOOSMALLXXX  */
+    DBUG_ASSERT(chlen >= MY_CS_TOOSMALL6); 
+    DBUG_ASSERT(chlen <= MY_CS_TOOSMALL);
+    if (from >= from_end)                   
+      break;                                /* End of the source string    */
+bad:
+    /* Bad byte sequence, or incomplete character found */
+    if (!status->m_well_formed_error_pos)
+      status->m_well_formed_error_pos= from;
+
+    if ((chlen= cs->cset->wc_mb(cs, '?', (uchar*) to, (uchar *) to_end)) <= 0)
+      break; /* Question mark does not fit into the destination */
+    to+= chlen;
+    from++;
+  }
+end:
+  status->m_source_end_pos= from;
+  return to - to0;
+}
+
+
+size_t
+my_copy_fix_mb(CHARSET_INFO *cs,
+               char *dst, size_t dst_length,
+               const char *src, size_t src_length,
+               size_t nchars, MY_STRCOPY_STATUS *status)
+{
+  size_t well_formed_nchars;
+  size_t well_formed_length;
+  size_t fixed_length;
+
+  set_if_smaller(src_length, dst_length);
+  well_formed_nchars= cs->cset->well_formed_char_length(cs,
+                                                        src, src + src_length,
+                                                        nchars, status);
+  DBUG_ASSERT(well_formed_nchars <= nchars);
+  memmove(dst, src, (well_formed_length= status->m_source_end_pos - src));
+  if (!status->m_well_formed_error_pos)
+    return well_formed_length;
+
+  fixed_length= my_append_fix_badly_formed_tail(cs,
+                                                dst + well_formed_length,
+                                                dst + dst_length,
+                                                src + well_formed_length,
+                                                src + src_length,
+                                                nchars - well_formed_nchars,
+                                                status);
+  return well_formed_length + fixed_length;
+}
+
+
 uint my_instr_mb(CHARSET_INFO *cs,
                  const char *b, size_t b_length, 
                  const char *s, size_t s_length,
diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic
index 70cc89c..aad75f4 100644
--- a/strings/ctype-mb.ic
+++ b/strings/ctype-mb.ic
@@ -29,7 +29,70 @@
 #endif
 
 
-#ifdef WELL_FORMED_LEN
+#ifdef DEFINE_ASIAN_ROUTINES
+#define DEFINE_WELL_FORMED_LEN
+#define DEFINE_WELL_FORMED_CHAR_LENGTH
+#define DEFINE_CHARLEN
+#endif
+
+
+#ifdef DEFINE_CHARLEN
+/**
+  Returns length of the left-most character of a string.
+  @param cs - charset with mbminlen==1 and mbmaxlen<=4
+  @param b  - the beginning of the string
+  @param e  - the end of the string
+
+  @return   MY_CS_ILSEQ         if a bad byte sequence was found
+  @return   MY_CS_TOOSMALL(N)   if the string ended unexpectedly
+  @return   >0                  if a valid character was found
+*/
+static int
+MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
+                          const uchar *b, const uchar *e)
+{
+  DBUG_ASSERT(cs->mbminlen == 1);
+  DBUG_ASSERT(cs->mbmaxlen <= 4);
+
+  if (b >= e)
+    return MY_CS_TOOSMALL;
+  if ((uchar) b[0] < 128)
+    return 1; /* Single byte ASCII character */
+
+#ifdef IS_8BIT_CHAR
+  if (IS_8BIT_CHAR(b[0]))
+  {      
+    /* Single byte non-ASCII character, e.g. half width kana in sjis */
+    return 1;
+  }
+#endif
+
+  if (b + 2 > e)
+    return MY_CS_TOOSMALLN(2);
+  if (IS_MB2_CHAR(b[0], b[1]))
+    return 2; /* Double byte character */
+
+#ifdef IS_MB3_CHAR
+  if (b + 3 > e)
+    return MY_CS_TOOSMALLN(3);
+  if (IS_MB3_CHAR(b[0], b[1], b[2]))
+    return 3; /* Three-byte character */
+#endif
+
+#ifdef IS_MB4_CHAR
+  if (b + 4 > e)
+    return MY_CS_TOOSMALLN(4);
+  if (IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
+    return 4; /* Four-byte character */
+#endif
+
+  /* Wrong byte sequence */
+  return MY_CS_ILSEQ;
+}
+#endif /* DEFINE_WELL_FORMED_LEN */
+
+
+#ifdef DEFINE_WELL_FORMED_LEN
 /**
   Returns well formed length of a character string with
   variable character length for character sets with:
@@ -91,4 +154,105 @@
   return b - b0;
 }
 
-#endif /* WELL_FORMED_LEN */
+#endif /* DEFINE_WELL_FORMED_LEN */
+
+
+
+#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH
+/**
+  Returns well formed length of a string 
+  measured in characters (rather than in bytes).
+  Version for character sets that define IS_MB?_CHAR(), e.g. big5.
+*/
+static size_t
+MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
+                                          const char *b, const char *e,
+                                          size_t nchars,
+                                          MY_STRCOPY_STATUS *status)
+{
+  size_t nchars0= nchars;
+  for ( ; b < e && nchars ; nchars--)
+  {
+    if ((uchar) b[0] < 128)
+    {
+      b++; /* Single byte ASCII character */
+      continue;
+    }
+
+    if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
+    {
+      b+= 2; /* Double byte character */
+      continue;
+    }
+
+#ifdef IS_MB3_CHAR
+    if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
+    {
+      b+= 3; /* Three-byte character */
+      continue;
+    }
+#endif
+
+#ifdef IS_MB4_CHAR
+    if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
+    {
+      b+= 4; /* Four-byte character */
+      continue;
+    }
+#endif
+
+#ifdef IS_8BIT_CHAR
+    if (IS_8BIT_CHAR(b[0]))
+    {      
+      b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
+      continue;
+    }
+#endif
+
+    /* Wrong byte sequence */
+    status->m_source_end_pos= status->m_well_formed_error_pos= b;
+    return nchars0 - nchars;
+  }
+  status->m_source_end_pos= b;
+  status->m_well_formed_error_pos= NULL;
+  return nchars0 - nchars;
+}
+#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */
+
+
+#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+#ifndef CHARLEN
+#error CHARLEN is not defined
+#endif
+/**
+  Returns well formed length of a string 
+  measured in characters (rather than in bytes).
+  Version for character sets that define MB_TO_NATIVE(), e.g. utf8.
+  CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does:
+  - a positive number in the range [1-mbmaxlen] if a valid
+    single-byte or multi-byte character was found
+  - MY_CS_ILSEQ (0) on a bad byte sequence
+  - MY_CS_TOOSMALLxx if the incoming sequence is incomplete
+*/
+static size_t
+MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
+                                          const char *b, const char *e,
+                                          size_t nchars,
+                                          MY_STRCOPY_STATUS *status)
+{
+  size_t nchars0= nchars;
+  int chlen;
+  for ( ; nchars ; nchars--, b+= chlen)
+  {
+    if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0)
+    {
+      status->m_well_formed_error_pos= b < e ? b : NULL;
+      status->m_source_end_pos= b;
+      return nchars0 - nchars;
+    }
+  }
+  status->m_well_formed_error_pos= NULL;
+  status->m_source_end_pos= b;
+  return nchars0 - nchars;
+}
+#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index b010c52..35ae191 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -248,6 +248,13 @@ int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
 }
 
 
+int my_charlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
+                    const uchar *str, const uchar *end)
+{
+  return str >= end ? MY_CS_TOOSMALL : 1;
+}
+
+
 int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
 		  const uchar *str,
 		  const uchar *end __attribute__((unused)))
@@ -1108,6 +1115,19 @@ size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
+size_t
+my_well_formed_char_length_8bit(CHARSET_INFO *cs __attribute__((unused)),
+                                const char *start, const char *end,
+                                size_t nchars, MY_STRCOPY_STATUS *status)
+{
+  size_t nbytes= (size_t) (end - start);
+  size_t res= MY_MIN(nbytes, nchars);
+  status->m_well_formed_error_pos= NULL;
+  status->m_source_end_pos= start + res;
+  return res;
+}
+
+
 /*
   Copy a 8-bit string. Not more than "nchars" character are copied.
 */
@@ -1906,6 +1926,9 @@ uint my_strxfrm_flag_normalize(uint flags, uint maximum)
     my_strtoll10_8bit,
     my_strntoull10rnd_8bit,
     my_scan_8bit,
+    my_charlen_8bit,
+    my_well_formed_char_length_8bit,
+    my_copy_8bit,
     my_copy_8bit,
 };
 
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 432e2e5..cba4fdb 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -187,7 +187,7 @@
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _sjis
 #define IS_8BIT_CHAR(x)       issjiskata(x)
 #define IS_MB2_CHAR(x,y)      (issjishead(x) && issjistail(y))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -34144,6 +34144,9 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_sjis,
+  my_well_formed_char_length_sjis,
+  my_copy_fix_mb,
   my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 343fb81..965c4fe 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -886,6 +886,9 @@ int my_wc_mb_tis620(CHARSET_INFO *cs  __attribute__((unused)),
     my_strtoll10_8bit,
     my_strntoull10rnd_8bit,
     my_scan_8bit,
+    my_charlen_8bit,
+    my_well_formed_char_length_8bit,
+    my_copy_8bit,
     my_copy_8bit,
 };
 
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 8f234e9..ce0585e 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -92,8 +92,81 @@
 }
 
 
+typedef enum
+{
+  MY_CHAR_COPY_OK=       0, /* The character was Okey */
+  MY_CHAR_COPY_ERROR=    1, /* The character was not Ok, and could not fix */
+  MY_CHAR_COPY_FIXED=    2  /* The character was not Ok, was fixed to '?' */
+} my_char_copy_status_t;
+
+
+/*
+  Copies an incomplete character, lef-padding it with 0x00 bytes.
+  
+  @param cs           Character set
+  @param dst          The destination string
+  @param dst_length   Space available in dst
+  @param src          The source string
+  @param src_length   Length of src
+  @param nchars       Copy not more than nchars characters.
+                      The "nchars" parameter of the caller.
+                      Only 0 and non-0 are important here.
+  @param fix          What to do if after zero-padding didn't get a valid 
+                      character:
+                      - FALSE - exit with error.
+                      - TRUE  - try to put '?' instead.
+  
+  @return  MY_CHAR_COPY_OK     if after zero-padding got a valid character.
+                               cs->mbmaxlen bytes were written to "dst".
+  @return  MY_CHAR_COPY_FIXED  if after zero-padding did not get a valid
+                               character, but wrote '?' to the destination
+                               string instead.
+                               cs->mbminlen bytes were written to "dst".
+  @return  MY_CHAR_COPY_ERROR  If failed and nothing was written to "dst".
+                               Possible reasons:
+                               - dst_length was too short
+                               - nchars was 0
+                               - the character after padding appeared not
+                                 to be valid, and could not fix it to '?'.
+*/
+static my_char_copy_status_t
+my_copy_incomplete_char(CHARSET_INFO *cs,
+                        char *dst, size_t dst_length,
+                        const char *src, size_t src_length,
+                        size_t nchars, my_bool fix)
+{
+  size_t pad_length;
+  size_t src_offset= src_length % cs->mbminlen;
+  if (dst_length < cs->mbminlen || !nchars)
+    return MY_CHAR_COPY_ERROR;
+
+  pad_length= cs->mbminlen - src_offset;
+  bzero(dst, pad_length);
+  memmove(dst + pad_length, src, src_offset);
+  /*
+    In some cases left zero-padding can create an incorrect character.
+    For example:
+      INSERT INTO t1 (utf32_column) VALUES (0x110000);
+    We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
+    The valid characters range is limited to 0x00000000..0x0010FFFF.
+    
+    Make sure we didn't pad to an incorrect character.
+  */
+  if (cs->cset->charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
+      (int) cs->mbminlen)
+    return MY_CHAR_COPY_OK;
+
+  if (fix &&
+      cs->cset->wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
+      (int) cs->mbminlen)
+    return MY_CHAR_COPY_FIXED;
+
+  return MY_CHAR_COPY_ERROR;
+}
+
+
 /*
-  Copy an UCS2/UTF16/UTF32 string.
+  Copy an UCS2/UTF16/UTF32 string, abort on a bad byte sequence.
   Not more that "nchars" characters are copied.
 
   UCS2/UTF16/UTF32 may need to prepend zero some bytes,
@@ -107,47 +180,52 @@
                          const char *src, size_t src_length,
                          size_t nchars, MY_STRCOPY_STATUS *status)
 {
-  size_t src_offset;
-
-  if ((src_offset= (src_length % cs->mbminlen)))
+  size_t src_offset= src_length % cs->mbminlen;
+  if (!src_offset)
+    return  my_copy_abort_mb(cs, dst, dst_length,
+                                 src, src_length, nchars, status);
+  if (my_copy_incomplete_char(cs, dst, dst_length,
+                                  src, src_length, nchars, FALSE))
   {
-    int well_formed_error;
-    size_t pad_length;
-    if (dst_length < cs->mbminlen || !nchars)
-    {
-      status->m_source_end_pos= status->m_well_formed_error_pos= src;
-      return 0;
-    }
+    status->m_source_end_pos= status->m_well_formed_error_pos= src;
+    return 0;
+  }
+  return
+    cs->mbminlen /* The left-padded character */ +
+    my_copy_abort_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
+                         src + src_offset, src_length - src_offset,
+                         nchars - 1, status);
+}
 
-    pad_length= cs->mbminlen - src_offset;
-    bzero(dst, pad_length);
-    memmove(dst + pad_length, src, src_offset);
-    /*
-      In some cases left zero-padding can create an incorrect character.
-      For example:
-        INSERT INTO t1 (utf32_column) VALUES (0x110000);
-      We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
-      The valid characters range is limited to 0x00000000..0x0010FFFF.
-      
-      Make sure we didn't pad to an incorrect character.
-    */
-    if (cs->cset->well_formed_len(cs,
-                                  dst, dst + cs->mbminlen, 1,
-                                  &well_formed_error) != cs->mbminlen)
-    {
-      status->m_source_end_pos= status->m_well_formed_error_pos= src;
-      return 0;
-    }
-    nchars--;
-    src+= src_offset;
-    src_length-= src_offset;
-    dst+= cs->mbminlen;
-    dst_length-= cs->mbminlen;
-    return
-      cs->mbminlen /* The left-padded character */ +
-      my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
+
+/*
+  Copy an UCS2/UTF16/UTF32 string, fix bad characters.
+*/
+static size_t
+my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs,
+                       char *dst, size_t dst_length,
+                       const char *src, size_t src_length,
+                       size_t nchars, MY_STRCOPY_STATUS *status)
+{
+  size_t length2, src_offset= src_length % cs->mbminlen;
+  my_char_copy_status_t padstatus;
+  
+  if (!src_offset)
+    return  my_copy_fix_mb(cs, dst, dst_length,
+                               src, src_length, nchars, status);
+  if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length,
+                                          src, src_length, nchars, TRUE)) ==
+      MY_CHAR_COPY_ERROR)
+  {
+    status->m_source_end_pos= status->m_well_formed_error_pos= src;
+    return 0;
   }
-  return  my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
+  length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
+                          src + src_offset, src_length - src_offset,
+                          nchars - 1, status);
+  if (padstatus == MY_CHAR_COPY_FIXED)
+    status->m_well_formed_error_pos= src;
+  return cs->mbminlen /* The left-padded character */ + length2;
 }
 
 
@@ -1475,6 +1553,24 @@
 }
 
 
+static int
+my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
+{
+  my_wc_t wc;
+  return cs->cset->mb_wc(cs, &wc, str, end);
+}
+
+
+#define MY_FUNCTION_NAME(x)       my_ ## x ## _utf16
+#define CHARLEN(cs,str,end)       my_charlen_utf16(cs,str,end)
+#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+#include "ctype-mb.ic"
+#undef MY_FUNCTION_NAME
+#undef CHARLEN
+#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+/* Defines my_well_formed_char_length_utf16 */
+
+
 static uint
 my_mbcharlen_utf16(CHARSET_INFO *cs  __attribute__((unused)),
                    uint c __attribute__((unused)))
@@ -1742,6 +1838,9 @@
   my_strtoll10_mb2,
   my_strntoull10rnd_mb2_or_mb4,
   my_scan_mb2,
+  my_charlen_utf16,
+  my_well_formed_char_length_utf16,
+  my_copy_fix_mb2_or_mb4,
   my_copy_abort_mb2_or_mb4,
 };
 
@@ -1912,6 +2011,9 @@ struct charset_info_st my_charset_utf16_bin=
   my_strtoll10_mb2,
   my_strntoull10rnd_mb2_or_mb4,
   my_scan_mb2,
+  my_charlen_utf16,
+  my_well_formed_char_length_utf16,
+  my_copy_fix_mb2_or_mb4,
   my_copy_abort_mb2_or_mb4,
 };
 
@@ -1987,6 +2089,13 @@ struct charset_info_st my_charset_utf16le_bin=
 
 #ifdef HAVE_CHARSET_utf32
 
+/*
+  Check is b0 and b1 start a valid UTF32 four-byte sequence.
+  Don't accept characters greater than U+10FFFF.
+*/
+#define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10))
+
+
 static int
 my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
              my_wc_t *pwc, const uchar *s, const uchar *e)
@@ -1994,7 +2103,7 @@ struct charset_info_st my_charset_utf16le_bin=
   if (s + 4 > e)
     return MY_CS_TOOSMALL4;
   *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
-  return 4;
+  return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
 }
 
 
@@ -2004,7 +2113,10 @@ struct charset_info_st my_charset_utf16le_bin=
 {
   if (s + 4 > e) 
     return MY_CS_TOOSMALL4;
-  
+
+  if (wc > 0x10FFFF)  
+    return MY_CS_ILUNI;
+
   s[0]= (uchar) (wc >> 24);
   s[1]= (uchar) (wc >> 16) & 0xFF;
   s[2]= (uchar) (wc >> 8)  & 0xFF;
@@ -2263,10 +2375,29 @@ struct charset_info_st my_charset_utf16le_bin=
                   const char *b,
                   const char *e)
 {
-  return b + 4 > e ? 0 : 4;
+  return b + 4 > e || !IS_UTF32_MBHEAD4(b[0], b[1]) ? 0 : 4;
+}
+
+
+static int
+my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
+                 const uchar *b, const uchar *e)
+{
+  return b + 4 > e ? MY_CS_TOOSMALL4 :
+         IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ;
 }
 
 
+#define MY_FUNCTION_NAME(x)       my_ ## x ## _utf32
+#define CHARLEN(cs,str,end)       my_charlen_utf32(cs,str,end)
+#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+#include "ctype-mb.ic"
+#undef MY_FUNCTION_NAME
+#undef CHARLEN
+#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+/* Defines my_well_formed_char_length_utf32 */
+
+
 static uint
 my_mbcharlen_utf32(CHARSET_INFO *cs  __attribute__((unused)) , 
                    uint c __attribute__((unused)))
@@ -2579,8 +2710,7 @@ struct charset_info_st my_charset_utf16le_bin=
   }
   for (; b < e; b+= 4)
   {
-    /* Don't accept characters greater than U+10FFFF */
-    if (b[0] || (uchar) b[1] > 0x10)
+    if (!IS_UTF32_MBHEAD4(b[0], b[1]))
     {
       *error= 1;
       return b - b0;
@@ -2827,6 +2957,9 @@ void my_fill_utf32(CHARSET_INFO *cs,
   my_strtoll10_utf32,
   my_strntoull10rnd_mb2_or_mb4,
   my_scan_utf32,
+  my_charlen_utf32,
+  my_well_formed_char_length_utf32,
+  my_copy_fix_mb2_or_mb4,
   my_copy_abort_mb2_or_mb4,
 };
 
@@ -2961,6 +3094,14 @@ struct charset_info_st my_charset_utf32_bin=
 };
 
 
+static int
+my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+		const uchar *s, const uchar *e)
+{
+  return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2;
+}
+
+
 static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
 		       my_wc_t * pwc, const uchar *s, const uchar *e)
 {
@@ -3264,6 +3405,31 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
 }
 
 
+static size_t
+my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+                                const char *b, const char *e,
+                                size_t nchars, MY_STRCOPY_STATUS *status)
+{
+  size_t length= e - b;
+  if (nchars * 2 <= length)
+  {
+    status->m_well_formed_error_pos= NULL;
+    status->m_source_end_pos= b + (nchars * 2);
+    return nchars;
+  }
+  if (length % 2)
+  {
+    status->m_well_formed_error_pos= status->m_source_end_pos= e - 1;
+  }
+  else
+  {
+    status->m_well_formed_error_pos= NULL;
+    status->m_source_end_pos= e;
+  }
+  return length / 2;
+}
+
+
 static
 int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
 		    const char *str,const char *str_end,
@@ -3446,6 +3612,9 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
     my_strtoll10_mb2,
     my_strntoull10rnd_mb2_or_mb4,
     my_scan_mb2,
+    my_charlen_ucs2,
+    my_well_formed_char_length_ucs2,
+    my_copy_fix_mb2_or_mb4,
     my_copy_abort_mb2_or_mb4,
 };
 
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 99f5be3..104e317 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -197,7 +197,7 @@
 #define IS_MB2_KATA(x,y)      (isujis_ss2(x)    && iskata(y))
 #define IS_MB2_CHAR(x, y)     (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
 #define IS_MB3_CHAR(x, y, z)  (isujis_ss3(x)    && IS_MB2_JIS(y,z))
-#define WELL_FORMED_LEN
+#define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
 
 
@@ -67255,6 +67255,9 @@ size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)),
     my_strtoll10_8bit,
     my_strntoull10rnd_8bit,
     my_scan_8bit,
+    my_charlen_ujis,
+    my_well_formed_char_length_ujis,
+    my_copy_fix_mb,
     my_copy_abort_mb,
 };
 
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 1116228..df70590 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5446,8 +5446,8 @@ int my_wildcmp_utf8(CHARSET_INFO *cs,
 
 
 static
-int my_valid_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
-                            const uchar *s, const uchar *e)
+int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
+                    const uchar *s, const uchar *e)
 {
   uchar c;
 
@@ -5515,7 +5515,7 @@ int my_valid_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
   {
     int mb_len;
 
-    if ((mb_len= my_valid_mbcharlen_utf8(cs, (uchar*) b, (uchar*) e)) <= 0)
+    if ((mb_len= my_charlen_utf8(cs, (uchar*) b, (uchar*) e)) <= 0)
     {
       *error= b < e ? 1 : 0;
       break;
@@ -5526,9 +5526,20 @@ int my_valid_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
   return (size_t) (b - b_start);
 }
 
+
+#define MY_FUNCTION_NAME(x)       my_ ## x ## _utf8
+#define CHARLEN(cs,str,end)       my_charlen_utf8(cs,str,end)
+#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+#include "ctype-mb.ic"
+#undef MY_FUNCTION_NAME
+#undef CHARLEN
+#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+/* my_well_formed_char_length_utf8 */
+
+
 static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e)
 {
-  int  res= my_valid_mbcharlen_utf8(cs, (const uchar*)b, (const uchar*)e);
+  int  res= my_charlen_utf8(cs, (const uchar*) b, (const uchar*) e);
   return (res>1) ? res : 0;
 }
 
@@ -5615,6 +5626,9 @@ static uint my_mbcharlen_utf8(CHARSET_INFO *cs  __attribute__((unused)),
     my_strtoll10_8bit,
     my_strntoull10rnd_8bit,
     my_scan_8bit,
+    my_charlen_utf8,
+    my_well_formed_char_length_utf8,
+    my_copy_fix_mb,
     my_copy_abort_mb,
 };
 
@@ -7125,6 +7139,24 @@ static int hexlo(int x)
 }
 
 
+static int
+my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
+{
+  my_wc_t wc;
+  return cs->cset->mb_wc(cs, &wc, str, end);
+}
+
+
+#define MY_FUNCTION_NAME(x)       my_ ## x ## _filename
+#define CHARLEN(cs,str,end)       my_charlen_filename(cs,str,end)
+#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+#include "ctype-mb.ic"
+#undef MY_FUNCTION_NAME
+#undef CHARLEN
+#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+/* my_well_formed_char_length_filename */
+
+
 static MY_COLLATION_HANDLER my_collation_filename_handler =
 {
     NULL,               /* init */
@@ -7169,6 +7201,9 @@ static int hexlo(int x)
     my_strtoll10_8bit,
     my_strntoull10rnd_8bit,
     my_scan_8bit,
+    my_charlen_filename,
+    my_well_formed_char_length_filename,
+    my_copy_fix_mb,
     my_copy_abort_mb,
 };
 
@@ -7954,8 +7989,8 @@ int main()
 
 
 static int
-my_valid_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
-                           const uchar *s, const uchar *e)
+my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
+                   const uchar *s, const uchar *e)
 {
   uchar c;
 
@@ -8015,7 +8050,7 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
   {
     int mb_len;
 
-    if ((mb_len= my_valid_mbcharlen_utf8mb4(cs, (uchar*) b, (uchar*) e)) <= 0)
+    if ((mb_len= my_charlen_utf8mb4(cs, (uchar*) b, (uchar*) e)) <= 0)
     {
       *error= b < e ? 1 : 0;
       break;
@@ -8027,10 +8062,19 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
 }
 
 
+#define MY_FUNCTION_NAME(x)       my_ ## x ## _utf8mb4
+#define CHARLEN(cs,str,end)       my_charlen_utf8mb4(cs,str,end)
+#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+#include "ctype-mb.ic"
+#undef MY_FUNCTION_NAME
+#undef CHARLEN
+#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
+/* my_well_formed_char_length_utf8mb4 */
+
 static uint
 my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e)
 {
-  int res= my_valid_mbcharlen_utf8mb4(cs, (const uchar*)b, (const uchar*)e);
+  int res= my_charlen_utf8mb4(cs, (const uchar*) b, (const uchar*) e);
   return (res > 1) ? res : 0;
 }
 
@@ -8113,6 +8157,9 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
   my_strtoll10_8bit,
   my_strntoull10rnd_8bit,
   my_scan_8bit,
+  my_charlen_utf8mb4,
+  my_well_formed_char_length_utf8mb4,
+  my_copy_fix_mb,
   my_copy_abort_mb,
 };
 

Follow ups