maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #09620
Re: Please review MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring
Hi, Alexander!
TL;DR: looks fine. Few minor comments, see below.
On Apr 08, Alexander Barkov wrote:
> diff --git a/include/m_ctype.h b/include/m_ctype.h
> index c892d576..bb633f8 100644
> --- a/include/m_ctype.h
> +++ b/include/m_ctype.h
> @@ -1010,11 +1009,19 @@ int my_charlen(CHARSET_INFO *cs, const char *str, const char *end)
> return (cs->cset->charlen)(cs, (const uchar *) str,
> (const uchar *) end);
> }
> -#ifdef USE_MB
> -#define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
> -#else
> -#define my_mbcharlen(s, a) 1
> -#endif
> +
> +
> +/**
> + Convert broken and incomplete byte sequences to 1 byte.
> +*/
> +static inline
> +int my_charlen_fix(CHARSET_INFO *cs, const char *str, const char *end)
> +{
> + int char_length= my_charlen(cs, str, end);
> + DBUG_ASSERT(str < end);
> + return char_length > 0 ? (uint) char_length : 0U;
0U? or 1U? you convert to 1 byte, and below you change like
- if (!(clen= my_mbcharlen(system_charset_info, c)))
- clen= 1;
+ clen= my_charlen_fix(system_charset_info, name, name_end);
> +}
> +
>
> #define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
> #define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
> diff --git a/mysys/charset.c b/mysys/charset.c
> index 3c134dc..253dc72 100644
> --- a/mysys/charset.c
> +++ b/mysys/charset.c
> @@ -909,8 +913,8 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
> {
> char escape= 0;
> #ifdef USE_MB
> - int tmp_length;
> - if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
> + int tmp_length= my_charlen(charset_info, from, end);
> + if (use_mb_flag && tmp_length > 1)
Old code didn't call my_ismbchar if use_mb_flag is false.
Now you do. Was it intentional? Is it an issue?
> {
> if (to + tmp_length > to_end)
> {
> diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc
> index 8b3412e..e84f1e8 100644
> --- a/sql/debug_sync.cc
> +++ b/sql/debug_sync.cc
> @@ -876,18 +876,19 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
> /* If necessary, terminate token. */
> if (*ptr)
> {
> + DBUG_ASSERT(ptr < ptrend);
> /* Get terminator character length. */
> - uint mbspacelen= my_mbcharlen(system_charset_info, (uchar) *ptr);
> + int mbspacelen= my_charlen(system_charset_info, ptr, ptrend);
why not my_charlen_fix() ?
>
> /* Terminate token. */
> *ptr= '\0';
>
> /* Skip the terminator. */
> - ptr+= mbspacelen;
> + ptr+= mbspacelen < 1 ? 1 : mbspacelen;
>
> /* Skip trailing space */
> - while (my_isspace(system_charset_info, *ptr))
> - ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
> + ptr+= system_charset_info->cset->scan(system_charset_info,
> + ptr, ptrend, MY_SEQ_SPACES);
> }
>
> end:
> diff --git a/sql/debug_sync.h b/sql/debug_sync.h
> index bf1b316..339a211 100644
> --- a/sql/debug_sync.h
> +++ b/sql/debug_sync.h
> @@ -45,6 +45,9 @@ extern void debug_sync_init_thread(THD *thd);
> extern void debug_sync_end_thread(THD *thd);
> extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len);
>
> +extern bool debug_sync_update(THD *thd, char *val_str, size_t len);
> +extern uchar *debug_sync_value_ptr(THD *thd);
I clearly remember that monty has done it recently somewhere
(I mean, this and including it into sys_vars.h or ic or cc)
> +
> #endif /* defined(ENABLED_DEBUG_SYNC) */
>
> #endif /* DEBUG_SYNC_INCLUDED */
> diff --git a/sql/sql_class.cc b/sql/sql_class.cc
> index e3b7056..e29608b 100644
> --- a/sql/sql_class.cc
> +++ b/sql/sql_class.cc
> @@ -3226,7 +3226,7 @@ int select_export::send_data(List<Item> &items)
>
> if ((NEED_ESCAPING(*pos) ||
> (check_second_byte &&
> - my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
> + ((uchar) *pos) > 0x7F /* a potential MB2HEAD */ &&
Hmm, why do you do this very low-level charset stuff
in sql_class.cc? one is supposed to use charset methods instead.
> pos + 1 < end &&
> NEED_ESCAPING(pos[1]))) &&
> /*
> diff --git a/sql/sys_vars.ic b/sql/sys_vars.ic
> index 373f583..bb290b6 100644
> --- a/sql/sys_vars.ic
> +++ b/sql/sys_vars.ic
> @@ -1462,15 +1465,21 @@ class Sys_var_debug_sync :public sys_var
> String str(buff, sizeof(buff), system_charset_info), *res;
>
> if (!(res=var->value->val_str(&str)))
> + {
> var->save_result.string_value.str= const_cast<char*>("");
> + var->save_result.string_value.length= 0;
use var->save_result.string_value= empty_lex_string;
> + }
> else
> + {
> var->save_result.string_value.str= thd->strmake(res->ptr(), res->length());
> + var->save_result.string_value.length= res->length();
use thd->make_lex_string(&var->save_result.string_value, ...);
> + }
> return false;
> }
> bool session_update(THD *thd, set_var *var)
> {
> - extern bool debug_sync_update(THD *thd, char *val_str);
> - return debug_sync_update(thd, var->save_result.string_value.str);
> + return debug_sync_update(thd, var->save_result.string_value.str,
> + var->save_result.string_value.length);
> }
> bool global_update(THD *thd, set_var *var)
> {
> diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
> index 74e474c..b5fab16 100644
> --- a/strings/ctype-ucs2.c
> +++ b/strings/ctype-ucs2.c
> @@ -1049,6 +1049,9 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
> {
> }
> return (size_t) (str - str0);
> + case MY_SEQ_NONSPACES:
> + DBUG_ASSERT(0);
why? impossible? or not implemented?
please add a comment /* not implemented */ in that's the case.
> + /* pass through */
> default:
> return 0;
> }
> @@ -2484,6 +2468,9 @@ my_scan_utf32(CHARSET_INFO *cs,
> str+= res;
> }
> return (size_t) (str - str0);
> + case MY_SEQ_NONSPACES:
> + DBUG_ASSERT(0);
same as above
> + /* pass through */
> default:
> return 0;
> }
Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx
Follow ups
References