maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #09478
Re: Please review MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring
Hello Sergei,
On 03/30/2016 08:41 PM, Sergei Golubchik wrote:
> Hi, Alexander!
>
> On Mar 30, Alexander Barkov wrote:
>> commit 4ab28aca964fa646aa55676db813dbed66b83093
>> Author: Alexander Barkov <bar@xxxxxxxxxxx>
>> Date: Mon Mar 28 11:05:51 2016 +0400
>>
>> MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring, part 1.
>> Fixing the debug_sync and create_options related code not to use my_mbcharlen():
>> - debug_sync_token() now uses cs->cset->scan().
>> Passing the end of the string pointer to debug_sync_update() in order to be
>> able to use scan(). Adding support for a new pattern scan(MY_SEQ_NONSPACES).
>> It does scans everything that scan(MY_SEQ_SPACES) does not.
>> - Fixing set_one_value() to iterate bytes one by one. This is safe, because
>> ',' cannot be a part of a multi-byte character in UTF8.
>
> Looks ok, thanks!
> But don't push this commit alone, please.
> Wait until all parts of MDEV-6353 are ready and push them together.
Please review the final version, now removing all my_mbcharlen().
Thanks.
>
> Regards,
> Sergei
> Chief Architect MariaDB
> and security@xxxxxxxxxxx
>
diff --git a/client/mysqltest.cc b/client/mysqltest.cc
index f09ad31..afb746c 100644
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@@ -6571,37 +6571,35 @@ int read_line(char *buf, int size)
if (!skip_char)
{
- /* Could be a multibyte character */
- /* This code is based on the code in "sql_load.cc" */
-#ifdef USE_MB
- int charlen = my_mbcharlen(charset_info, (unsigned char) c);
- /* We give up if multibyte character is started but not */
- /* completed before we pass buf_end */
- if ((charlen > 1) && (p + charlen) <= buf_end)
+ *p++= c;
+ if (use_mb(charset_info))
{
- int i;
- char* mb_start = p;
-
- *p++ = c;
-
- for (i= 1; i < charlen; i++)
- {
- c= my_getc(cur_file->file);
- if (feof(cur_file->file))
- goto found_eof;
- *p++ = c;
- }
- if (! my_ismbchar(charset_info, mb_start, p))
- {
- /* It was not a multiline char, push back the characters */
- /* We leave first 'c', i.e. pretend it was a normal char */
- while (p-1 > mb_start)
- my_ungetc(*--p);
- }
+ const char *mb_start= p - 1;
+ /* Could be a multibyte character */
+ /* See a similar code in "sql_load.cc" */
+ for ( ; p < buf_end; )
+ {
+ int charlen= my_charlen(charset_info, mb_start, p);
+ if (charlen > 0)
+ break; /* Full character */
+ if (MY_CS_IS_TOOSMALL(charlen))
+ {
+ /* We give up if multibyte character is started but not */
+ /* completed before we pass buf_end */
+ c= my_getc(cur_file->file);
+ if (feof(cur_file->file))
+ goto found_eof;
+ *p++ = c;
+ continue;
+ }
+ DBUG_ASSERT(charlen == MY_CS_ILSEQ);
+ /* It was not a multiline char, push back the characters */
+ /* We leave first 'c', i.e. pretend it was a normal char */
+ while (p - 1 > mb_start)
+ my_ungetc(*--p);
+ break;
+ }
}
- else
-#endif
- *p++= c;
}
}
die("The input buffer is too small for this query.x\n" \
diff --git a/include/m_ctype.h b/include/m_ctype.h
index c892d576..bb633f8 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -186,6 +186,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_SEQ_INTTAIL 1
#define MY_SEQ_SPACES 2
+#define MY_SEQ_NONSPACES 3 /* Skip non-space characters, including bad bytes */
/* My charsets_list flags */
#define MY_CS_COMPILED 1 /* compiled-in sets */
@@ -403,7 +404,6 @@ struct my_charset_handler_st
{
my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
/* Multibyte routines */
- uint (*mbcharlen)(CHARSET_INFO *, uint c);
size_t (*numchars)(CHARSET_INFO *, const char *b, const char *e);
size_t (*charpos)(CHARSET_INFO *, const char *b, const char *e,
size_t pos);
@@ -779,7 +779,6 @@ size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
size_t nchars,
MY_STRCOPY_STATUS *status);
int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end);
-uint my_mbcharlen_8bit(CHARSET_INFO *, uint c);
/* Functions for multibyte charsets */
@@ -1010,11 +1009,19 @@ int my_charlen(CHARSET_INFO *cs, const char *str, const char *end)
return (cs->cset->charlen)(cs, (const uchar *) str,
(const uchar *) end);
}
-#ifdef USE_MB
-#define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
-#else
-#define my_mbcharlen(s, a) 1
-#endif
+
+
+/**
+ Convert broken and incomplete byte sequences to 1 byte.
+*/
+static inline
+int my_charlen_fix(CHARSET_INFO *cs, const char *str, const char *end)
+{
+ int char_length= my_charlen(cs, str, end);
+ DBUG_ASSERT(str < end);
+ return char_length > 0 ? (uint) char_length : 0U;
+}
+
#define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
#define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
diff --git a/mysys/charset.c b/mysys/charset.c
index 3c134dc..253dc72 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -54,6 +54,12 @@ get_collation_number_internal(const char *name)
}
+static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch)
+{
+ int chlen= my_charlen(cs, (const char *) &ch, (const char *) &ch + 1);
+ return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE;
+}
+
static my_bool init_state_maps(struct charset_info_st *cs)
{
uint i;
@@ -73,10 +79,8 @@ static my_bool init_state_maps(struct charset_info_st *cs)
state_map[i]=(uchar) MY_LEX_IDENT;
else if (my_isdigit(cs,i))
state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
-#if defined(USE_MB) && defined(USE_MB_IDENT)
- else if (my_mbcharlen(cs, i)>1)
+ else if (is_multi_byte_ident(cs, i))
state_map[i]=(uchar) MY_LEX_IDENT;
-#endif
else if (my_isspace(cs,i))
state_map[i]=(uchar) MY_LEX_SKIP;
else
@@ -909,8 +913,8 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
{
char escape= 0;
#ifdef USE_MB
- int tmp_length;
- if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
+ int tmp_length= my_charlen(charset_info, from, end);
+ if (use_mb_flag && tmp_length > 1)
{
if (to + tmp_length > to_end)
{
@@ -933,7 +937,7 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
multi-byte character into a valid one. For example, 0xbf27 is not
a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
*/
- if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
+ if (use_mb_flag && tmp_length != 1)
escape= *from;
else
#endif
diff --git a/sql/create_options.cc b/sql/create_options.cc
index 66515be..3011c4b 100644
--- a/sql/create_options.cc
+++ b/sql/create_options.cc
@@ -184,7 +184,7 @@ static bool set_one_value(ha_create_table_option *opt,
{
for (end=start;
*end && *end != ',';
- end+= my_mbcharlen(system_charset_info, *end)) /* no-op */;
+ end++) /* no-op */;
if (!my_strnncoll(system_charset_info,
(uchar*)start, end-start,
(uchar*)value->str, value->length))
diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc
index 8b3412e..e84f1e8 100644
--- a/sql/debug_sync.cc
+++ b/sql/debug_sync.cc
@@ -847,16 +847,16 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
to the string terminator ASCII NUL ('\0').
*/
-static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
+static char *debug_sync_token(char **token_p, uint *token_length_p,
+ char *ptr, char *ptrend)
{
DBUG_ASSERT(token_p);
DBUG_ASSERT(token_length_p);
DBUG_ASSERT(ptr);
/* Skip leading space */
- while (my_isspace(system_charset_info, *ptr))
- ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
-
+ ptr+= system_charset_info->cset->scan(system_charset_info,
+ ptr, ptrend, MY_SEQ_SPACES);
if (!*ptr)
{
ptr= NULL;
@@ -867,8 +867,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
*token_p= ptr;
/* Find token end. */
- while (*ptr && !my_isspace(system_charset_info, *ptr))
- ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
+ ptr+= system_charset_info->cset->scan(system_charset_info,
+ ptr, ptrend, MY_SEQ_NONSPACES);
/* Get token length. */
*token_length_p= ptr - *token_p;
@@ -876,18 +876,19 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
/* If necessary, terminate token. */
if (*ptr)
{
+ DBUG_ASSERT(ptr < ptrend);
/* Get terminator character length. */
- uint mbspacelen= my_mbcharlen(system_charset_info, (uchar) *ptr);
+ int mbspacelen= my_charlen(system_charset_info, ptr, ptrend);
/* Terminate token. */
*ptr= '\0';
/* Skip the terminator. */
- ptr+= mbspacelen;
+ ptr+= mbspacelen < 1 ? 1 : mbspacelen;
/* Skip trailing space */
- while (my_isspace(system_charset_info, *ptr))
- ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
+ ptr+= system_charset_info->cset->scan(system_charset_info,
+ ptr, ptrend, MY_SEQ_SPACES);
}
end:
@@ -917,7 +918,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
undefined in this case.
*/
-static char *debug_sync_number(ulong *number_p, char *actstrptr)
+static char *debug_sync_number(ulong *number_p, char *actstrptr,
+ char *actstrend)
{
char *ptr;
char *ept;
@@ -927,7 +929,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
DBUG_ASSERT(actstrptr);
/* Get token from string. */
- if (!(ptr= debug_sync_token(&token, &token_length, actstrptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, actstrptr, actstrend)))
goto end;
*number_p= strtoul(token, &ept, 10);
@@ -971,7 +973,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
for the string.
*/
-static bool debug_sync_eval_action(THD *thd, char *action_str)
+static bool debug_sync_eval_action(THD *thd, char *action_str, char *action_end)
{
st_debug_sync_action *action= NULL;
const char *errmsg;
@@ -986,7 +988,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
/*
Get debug sync point name. Or a special command.
*/
- if (!(ptr= debug_sync_token(&token, &token_length, action_str)))
+ if (!(ptr= debug_sync_token(&token, &token_length, action_str, action_end)))
{
errmsg= "Missing synchronization point name";
goto err;
@@ -1009,7 +1011,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
/*
Get kind of action to be taken at sync point.
*/
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
{
/* No action present. Try special commands. Token unchanged. */
@@ -1090,7 +1092,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "SIGNAL"))
{
/* It is SIGNAL. Signal name must follow. */
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
{
errmsg= "Missing signal name after action SIGNAL";
goto err;
@@ -1108,7 +1110,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
action->execute= 1;
/* Get next token. If none follows, set action. */
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
@@ -1118,7 +1120,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "WAIT_FOR"))
{
/* It is WAIT_FOR. Wait_for signal name must follow. */
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
{
errmsg= "Missing signal name after action WAIT_FOR";
goto err;
@@ -1137,7 +1139,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
action->timeout= opt_debug_sync_timeout;
/* Get next token. If none follows, set action. */
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
/*
@@ -1146,14 +1148,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "TIMEOUT"))
{
/* It is TIMEOUT. Number must follow. */
- if (!(ptr= debug_sync_number(&action->timeout, ptr)))
+ if (!(ptr= debug_sync_number(&action->timeout, ptr, action_end)))
{
errmsg= "Missing valid number after TIMEOUT";
goto err;
}
/* Get next token. If none follows, set action. */
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
}
@@ -1174,14 +1176,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
}
/* Number must follow. */
- if (!(ptr= debug_sync_number(&action->execute, ptr)))
+ if (!(ptr= debug_sync_number(&action->execute, ptr, action_end)))
{
errmsg= "Missing valid number after EXECUTE";
goto err;
}
/* Get next token. If none follows, set action. */
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
@@ -1191,14 +1193,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "HIT_LIMIT"))
{
/* Number must follow. */
- if (!(ptr= debug_sync_number(&action->hit_limit, ptr)))
+ if (!(ptr= debug_sync_number(&action->hit_limit, ptr, action_end)))
{
errmsg= "Missing valid number after HIT_LIMIT";
goto err;
}
/* Get next token. If none follows, set action. */
- if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+ if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
@@ -1246,7 +1248,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
terminators in the string. So we need to take a copy here.
*/
-bool debug_sync_update(THD *thd, char *val_str)
+bool debug_sync_update(THD *thd, char *val_str, size_t len)
{
DBUG_ENTER("debug_sync_update");
DBUG_PRINT("debug_sync", ("set action: '%s'", val_str));
@@ -1255,8 +1257,9 @@ bool debug_sync_update(THD *thd, char *val_str)
debug_sync_eval_action() places '\0' in the string, which itself
must be '\0' terminated.
*/
+ DBUG_ASSERT(val_str[len] == '\0');
DBUG_RETURN(opt_debug_sync_timeout ?
- debug_sync_eval_action(thd, val_str) :
+ debug_sync_eval_action(thd, val_str, val_str + len) :
FALSE);
}
@@ -1592,7 +1595,7 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len)
DBUG_ASSERT(action_str);
value= strmake_root(thd->mem_root, action_str, len);
- rc= debug_sync_eval_action(thd, value);
+ rc= debug_sync_eval_action(thd, value, value + len);
DBUG_RETURN(rc);
}
diff --git a/sql/debug_sync.h b/sql/debug_sync.h
index bf1b316..339a211 100644
--- a/sql/debug_sync.h
+++ b/sql/debug_sync.h
@@ -45,6 +45,9 @@ extern void debug_sync_init_thread(THD *thd);
extern void debug_sync_end_thread(THD *thd);
extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len);
+extern bool debug_sync_update(THD *thd, char *val_str, size_t len);
+extern uchar *debug_sync_value_ptr(THD *thd);
+
#endif /* defined(ENABLED_DEBUG_SYNC) */
#endif /* DEBUG_SYNC_INCLUDED */
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index e3b7056..e29608b 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -3226,7 +3226,7 @@ int select_export::send_data(List<Item> &items)
if ((NEED_ESCAPING(*pos) ||
(check_second_byte &&
- my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
+ ((uchar) *pos) > 0x7F /* a potential MB2HEAD */ &&
pos + 1 < end &&
NEED_ESCAPING(pos[1]))) &&
/*
diff --git a/sql/sys_vars.ic b/sql/sys_vars.ic
index 373f583..bb290b6 100644
--- a/sql/sys_vars.ic
+++ b/sql/sys_vars.ic
@@ -1434,6 +1434,9 @@ class Sys_var_plugin: public sys_var
};
#if defined(ENABLED_DEBUG_SYNC)
+
+#include "debug_sync.h"
+
/**
The class for @@debug_sync session-only variable
*/
@@ -1462,15 +1465,21 @@ class Sys_var_debug_sync :public sys_var
String str(buff, sizeof(buff), system_charset_info), *res;
if (!(res=var->value->val_str(&str)))
+ {
var->save_result.string_value.str= const_cast<char*>("");
+ var->save_result.string_value.length= 0;
+ }
else
+ {
var->save_result.string_value.str= thd->strmake(res->ptr(), res->length());
+ var->save_result.string_value.length= res->length();
+ }
return false;
}
bool session_update(THD *thd, set_var *var)
{
- extern bool debug_sync_update(THD *thd, char *val_str);
- return debug_sync_update(thd, var->save_result.string_value.str);
+ return debug_sync_update(thd, var->save_result.string_value.str,
+ var->save_result.string_value.length);
}
bool global_update(THD *thd, set_var *var)
{
@@ -1488,7 +1497,6 @@ class Sys_var_debug_sync :public sys_var
}
uchar *session_value_ptr(THD *thd, const LEX_STRING *base)
{
- extern uchar *debug_sync_value_ptr(THD *thd);
return debug_sync_value_ptr(thd);
}
uchar *global_value_ptr(THD *thd, const LEX_STRING *base)
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index 478a8f1..2334848 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -561,8 +561,7 @@ static bool append_ident(String *string, const char *name, size_t length,
for (name_end= name+length; name < name_end; name+= clen)
{
uchar c= *(uchar *) name;
- if (!(clen= my_mbcharlen(system_charset_info, c)))
- clen= 1;
+ clen= my_charlen_fix(system_charset_info, name, name_end);
if (clen == 1 && c == (uchar) quote_char &&
(result= string->append("e_char, 1, system_charset_info)))
goto err;
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index 890d1bf..56d900c 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -500,8 +500,7 @@ bool append_ident(String *string, const char *name, uint length,
for (name_end= name+length; name < name_end; name+= clen)
{
uchar c= *(uchar *) name;
- if (!(clen= my_mbcharlen(system_charset_info, c)))
- clen= 1;
+ clen= my_charlen_fix(system_charset_info, name, name_end);
if (clen == 1 && c == (uchar) quote_char &&
(result= string->append("e_char, 1, system_charset_info)))
goto err;
diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc
index 69a05dc..a65338c 100644
--- a/storage/spider/spd_db_conn.cc
+++ b/storage/spider/spd_db_conn.cc
@@ -1370,7 +1370,7 @@ int spider_db_append_name_with_quote_str(
for (name_end = name + length; name < name_end; name += length)
{
head_code = *name;
- if (!(length = my_mbcharlen(system_charset_info, (uchar) head_code)))
+ if ((length= my_charlen(system_charset_info, name, name_end)) < 1)
{
my_message(ER_SPIDER_WRONG_CHARACTER_IN_NAME_NUM,
ER_SPIDER_WRONG_CHARACTER_IN_NAME_STR, MYF(0));
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 9629319..9ae394e 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -848,12 +848,6 @@ static uint16 big5strokexfrm(uint16 i)
}
-static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c)
-{
- return (isbig5head(c)? 2 : 1);
-}
-
-
/* page 0 0xA140-0xC7FC */
static const uint16 tab_big5_uni0[]={
0x3000,0xFF0C,0x3001,0x3002,0xFF0E,0x2022,0xFF1B,0xFF1A,
@@ -6731,7 +6725,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
static MY_CHARSET_HANDLER my_charset_big5_handler=
{
NULL, /* init */
- mbcharlen_big5,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_big5,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 8331de3..aab7f2b 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -225,13 +225,6 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
}
-uint my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
- uint c __attribute__((unused)))
-{
- return 1;
-}
-
-
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *wc,
const uchar *str,
@@ -510,7 +503,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 2163662..151fac8 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]=
#include "ctype-mb.ic"
-static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
-{
- return (iscp932head((uchar) c) ? 2 : 1);
-}
-
-
#define cp932code(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
@@ -34687,7 +34681,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- mbcharlen_cp932,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_cp932,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 19ed586..d238913 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -210,12 +210,6 @@ static const uchar sort_order_euc_kr[]=
#include "ctype-mb.ic"
-static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
-{
- return (iseuc_kr_head(c) ? 2 : 1);
-}
-
-
static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
@@ -9979,7 +9973,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- mbcharlen_euc_kr,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_euckr,
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 469d3a5..caafd1d 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -221,12 +221,6 @@ static const uchar sort_order_eucjpms[]=
#include "strcoll.ic"
-static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
-{
- return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1);
-}
-
-
/* Case info pages for JIS-X-0208 range */
static MY_UNICASE_CHARACTER cA2[256]=
@@ -67511,7 +67505,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- mbcharlen_eucjpms,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_eucjpms,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index a77237c..dbb92fa 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -173,12 +173,6 @@ static const uchar sort_order_gb2312[]=
#include "ctype-mb.ic"
-static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c)
-{
- return (isgb2312head(c)? 2 : 1);
-}
-
-
static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
@@ -6385,7 +6379,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- mbcharlen_gb2312,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_gb2312,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index e4e015a..617d72d 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -3451,11 +3451,6 @@ static uint16 gbksortorder(uint16 i)
}
-static uint mbcharlen_gbk(CHARSET_INFO *cs __attribute__((unused)),uint c)
-{
- return (isgbkhead(c)? 2 : 1);
-}
-
/* page 0 0x8140-0xFE4F */
static const uint16 tab_gbk_uni0[]={
0x4E02,0x4E04,0x4E05,0x4E06,0x4E0F,0x4E12,0x4E17,0x4E1F,
@@ -10666,7 +10661,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- mbcharlen_gbk,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_gbk,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index aba63d9..fe25534 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -396,7 +396,6 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- my_mbcharlen_8bit,
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 3fa66cb..56b3309 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -230,7 +230,7 @@ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
if (*s++ != *t++)
return 1;
}
- else if (my_mbcharlen(cs, *t) > 1)
+ else if (my_charlen(cs, t, t + cs->mbmaxlen) > 1)
return 1;
else if (map[(uchar) *s++] != map[(uchar) *t++])
return 1;
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 5e5a345..f405c4f 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1059,6 +1059,13 @@ size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
break;
}
return (size_t) (str - str0);
+ case MY_SEQ_NONSPACES:
+ for ( ; str < end ; str++)
+ {
+ if (my_isspace(cs, *str))
+ break;
+ }
+ return (size_t) (str - str0);
default:
return 0;
}
@@ -1916,7 +1923,6 @@ my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
MY_CHARSET_HANDLER my_charset_8bit_handler=
{
my_cset_init_8bit,
- my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index ebcea22..e054614 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -192,12 +192,6 @@ static const uchar sort_order_sjis[]=
#include "ctype-mb.ic"
-static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
-{
- return (issjishead((uchar) c) ? 2 : 1);
-}
-
-
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
@@ -34066,7 +34060,6 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- mbcharlen_sjis,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_sjis,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 711bb21..82fd864 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -834,7 +834,6 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 74e474c..b5fab16 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1049,6 +1049,9 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
{
}
return (size_t) (str - str0);
+ case MY_SEQ_NONSPACES:
+ DBUG_ASSERT(0);
+ /* pass through */
default:
return 0;
}
@@ -1431,15 +1434,6 @@ my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
/* Defines my_well_formed_char_length_utf16 */
-static uint
-my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)),
- uint c __attribute__((unused)))
-{
- DBUG_ASSERT(0);
- return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
-}
-
-
static size_t
my_numchars_utf16(CHARSET_INFO *cs,
const char *b, const char *e)
@@ -1567,7 +1561,6 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
MY_CHARSET_HANDLER my_charset_utf16_handler=
{
NULL, /* init */
- my_mbcharlen_utf16, /* mbcharlen */
my_numchars_utf16,
my_charpos_utf16,
my_well_formed_len_utf16,
@@ -1789,7 +1782,6 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
static MY_CHARSET_HANDLER my_charset_utf16le_handler=
{
NULL, /* init */
- my_mbcharlen_utf16,
my_numchars_utf16,
my_charpos_utf16,
my_well_formed_len_utf16,
@@ -2083,14 +2075,6 @@ my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
/* Defines my_well_formed_char_length_utf32 */
-static uint
-my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) ,
- uint c __attribute__((unused)))
-{
- return 4;
-}
-
-
static int
my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
{
@@ -2484,6 +2468,9 @@ my_scan_utf32(CHARSET_INFO *cs,
str+= res;
}
return (size_t) (str - str0);
+ case MY_SEQ_NONSPACES:
+ DBUG_ASSERT(0);
+ /* pass through */
default:
return 0;
}
@@ -2525,7 +2512,6 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
MY_CHARSET_HANDLER my_charset_utf32_handler=
{
NULL, /* init */
- my_mbcharlen_utf32,
my_numchars_utf32,
my_charpos_utf32,
my_well_formed_len_utf32,
@@ -2862,13 +2848,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
-static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
- uint c __attribute__((unused)))
-{
- return 2;
-}
-
-
static
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e)
@@ -3003,7 +2982,6 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
MY_CHARSET_HANDLER my_charset_ucs2_handler=
{
NULL, /* init */
- my_mbcharlen_ucs2, /* mbcharlen */
my_numchars_ucs2,
my_charpos_ucs2,
my_well_formed_len_ucs2,
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index b24fdb3..786ae99 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -220,12 +220,6 @@ static const uchar sort_order_ujis[]=
#include "strcoll.ic"
-static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)
-{
- return (isujis(c)? 2: isujis_ss2(c)? 2: isujis_ss3(c)? 3: 1);
-}
-
-
static
size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)),
const char *str, const char *str_end)
@@ -67255,7 +67249,6 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- mbcharlen_ujis,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_ujis,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 3a5616b..b6a7a0d 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5426,21 +5426,6 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
#include "strcoll.ic"
-static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
- uint c)
-{
- if (c < 0x80)
- return 1;
- else if (c < 0xc2)
- return 0; /* Illegal mb head */
- else if (c < 0xe0)
- return 2;
- else if (c < 0xf0)
- return 3;
- return 0; /* Illegal mb head */;
-}
-
-
static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
{
NULL, /* init */
@@ -5491,7 +5476,6 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
MY_CHARSET_HANDLER my_charset_utf8_handler=
{
NULL, /* init */
- my_mbcharlen_utf8,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_utf8,
@@ -7045,7 +7029,6 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
static MY_CHARSET_HANDLER my_charset_filename_handler=
{
NULL, /* init */
- my_mbcharlen_utf8,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_mb,
@@ -7111,57 +7094,6 @@ struct charset_info_st my_charset_filename=
};
-#ifdef MY_TEST_UTF8
-#include <stdio.h>
-
-static void test_mb(CHARSET_INFO *cs, uchar *s)
-{
- while(*s)
- {
- if (my_ismbhead_utf8(cs,*s))
- {
- uint len=my_mbcharlen_utf8(cs,*s);
- while(len--)
- {
- printf("%c",*s);
- s++;
- }
- printf("\n");
- }
- else
- {
- printf("%c\n",*s);
- s++;
- }
- }
-}
-
-int main()
-{
- char str[1024]=" utf8 test пÑоба ÐÐРРпо-РУССÐÐ";
- CHARSET_INFO *cs;
-
- test_mb(cs,(uchar*)str);
-
- printf("orig :'%s'\n",str);
-
- my_caseup_utf8(cs,str,15);
- printf("caseup :'%s'\n",str);
-
- my_caseup_str_utf8(cs,str);
- printf("caseup_str:'%s'\n",str);
-
- my_casedn_utf8(cs,str,15);
- printf("casedn :'%s'\n",str);
-
- my_casedn_str_utf8(cs,str);
- printf("casedn_str:'%s'\n",str);
-
- return 0;
-}
-
-#endif
-
#endif /* HAVE_CHARSET_UTF8 */
@@ -7755,23 +7687,6 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
#include "strcoll.ic"
-static uint
-my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c)
-{
- if (c < 0x80)
- return 1;
- if (c < 0xc2)
- return 0; /* Illegal mb head */
- if (c < 0xe0)
- return 2;
- if (c < 0xf0)
- return 3;
- if (c < 0xf8)
- return 4;
- return 0; /* Illegal mb head */;
-}
-
-
static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
{
NULL, /* init */
@@ -7807,7 +7722,6 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
{
NULL, /* init */
- my_mbcharlen_utf8mb4,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_utf8mb4,
diff --git a/strings/my_strchr.c b/strings/my_strchr.c
index 0305ef8..2365731 100644
--- a/strings/my_strchr.c
+++ b/strings/my_strchr.c
@@ -38,7 +38,7 @@
const char *acc_end= (ACC) + (LEN); \
for (ptr_str= (STR) ; ptr_str < (END) ; ptr_str+= mbl) \
{ \
- mbl= my_mbcharlen((CS), *(uchar*)ptr_str); \
+ mbl= my_charlen_fix((CS), ptr_str, (END)); \
if (mbl < 2) \
{ \
DBUG_ASSERT(mbl == 1); \
@@ -63,10 +63,9 @@ end: \
char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end,
pchar c)
{
- uint mbl;
while (str < end)
{
- mbl= my_mbcharlen(cs, *(uchar *)str);
+ uint mbl= my_ismbchar(cs, str, end);
if (mbl < 2)
{
if (*str == c)
diff --git a/strings/my_vsnprintf.c b/strings/my_vsnprintf.c
index 4178b20..75514a9 100644
--- a/strings/my_vsnprintf.c
+++ b/strings/my_vsnprintf.c
@@ -168,8 +168,7 @@ static char *backtick_string(CHARSET_INFO *cs, char *to, const char *end,
for ( ; par < par_end; par+= char_len)
{
uchar c= *(uchar *) par;
- if (!(char_len= my_mbcharlen(cs, c)))
- char_len= 1;
+ char_len= my_charlen_fix(cs, par, par_end);
if (char_len == 1 && c == (uchar) quote_char )
{
if (start + 1 >= end)
Follow ups
References