← Back to team overview

maria-developers team mailing list archive

Please review MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring

 

Hi Sergei,

Please review a partial patch for MDEV-6353.

It removes my_mbcharlen() in all remaining pieces of the code,
except LOAD DATA and SELECT INTO OUTFILE.


I'll do LOAD DATA and SELECT INTO OUTFILE in a separate patch.


Thanks.

commit 4ab28aca964fa646aa55676db813dbed66b83093
Author: Alexander Barkov <bar@xxxxxxxxxxx>
Date:   Mon Mar 28 11:05:51 2016 +0400

    MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring, part 1.
    Fixing the debug_sync and create_options related code not to use my_mbcharlen():
    - debug_sync_token() now uses cs->cset->scan().
      Passing the end of the string pointer to debug_sync_update() in order to be
      able to use scan(). Adding support for a new pattern scan(MY_SEQ_NONSPACES).
      It does scans everything that scan(MY_SEQ_SPACES) does not.
    - Fixing set_one_value() to iterate bytes one by one. This is safe, because
      ',' cannot be a part of a multi-byte character in UTF8.

diff --git a/include/m_ctype.h b/include/m_ctype.h
index 615ee6a..a2605dd 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -182,6 +182,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
 
 #define MY_SEQ_INTTAIL	1
 #define MY_SEQ_SPACES	2
+#define MY_SEQ_NONSPACES 3 /* Skip non-space characters, including bad bytes */
 
         /* My charsets_list flags */
 #define MY_CS_COMPILED  1      /* compiled-in sets               */
diff --git a/sql/create_options.cc b/sql/create_options.cc
index 66515be..3011c4b 100644
--- a/sql/create_options.cc
+++ b/sql/create_options.cc
@@ -184,7 +184,7 @@ static bool set_one_value(ha_create_table_option *opt,
       {
         for (end=start;
              *end && *end != ',';
-             end+= my_mbcharlen(system_charset_info, *end)) /* no-op */;
+             end++) /* no-op */;
         if (!my_strnncoll(system_charset_info,
                           (uchar*)start, end-start,
                           (uchar*)value->str, value->length))
diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc
index 8b3412e..e84f1e8 100644
--- a/sql/debug_sync.cc
+++ b/sql/debug_sync.cc
@@ -847,16 +847,16 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
     to the string terminator ASCII NUL ('\0').
 */
 
-static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
+static char *debug_sync_token(char **token_p, uint *token_length_p,
+                              char *ptr, char *ptrend)
 {
   DBUG_ASSERT(token_p);
   DBUG_ASSERT(token_length_p);
   DBUG_ASSERT(ptr);
 
   /* Skip leading space */
-  while (my_isspace(system_charset_info, *ptr))
-    ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
-
+  ptr+= system_charset_info->cset->scan(system_charset_info,
+                                        ptr, ptrend, MY_SEQ_SPACES);
   if (!*ptr)
   {
     ptr= NULL;
@@ -867,8 +867,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
   *token_p= ptr;
 
   /* Find token end. */
-  while (*ptr && !my_isspace(system_charset_info, *ptr))
-    ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
+  ptr+= system_charset_info->cset->scan(system_charset_info,
+                                        ptr, ptrend, MY_SEQ_NONSPACES);
 
   /* Get token length. */
   *token_length_p= ptr - *token_p;
@@ -876,18 +876,19 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
   /* If necessary, terminate token. */
   if (*ptr)
   {
+    DBUG_ASSERT(ptr < ptrend);
     /* Get terminator character length. */
-    uint mbspacelen= my_mbcharlen(system_charset_info, (uchar) *ptr);
+    int mbspacelen= my_charlen(system_charset_info, ptr, ptrend);
 
     /* Terminate token. */
     *ptr= '\0';
 
     /* Skip the terminator. */
-    ptr+= mbspacelen;
+    ptr+= mbspacelen < 1 ? 1 : mbspacelen;
 
     /* Skip trailing space */
-    while (my_isspace(system_charset_info, *ptr))
-      ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
+    ptr+= system_charset_info->cset->scan(system_charset_info,
+                                          ptr, ptrend, MY_SEQ_SPACES);
   }
 
  end:
@@ -917,7 +918,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
     undefined in this case.
 */
 
-static char *debug_sync_number(ulong *number_p, char *actstrptr)
+static char *debug_sync_number(ulong *number_p, char *actstrptr,
+                                                char *actstrend)
 {
   char                  *ptr;
   char                  *ept;
@@ -927,7 +929,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
   DBUG_ASSERT(actstrptr);
 
   /* Get token from string. */
-  if (!(ptr= debug_sync_token(&token, &token_length, actstrptr)))
+  if (!(ptr= debug_sync_token(&token, &token_length, actstrptr, actstrend)))
     goto end;
 
   *number_p= strtoul(token, &ept, 10);
@@ -971,7 +973,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
     for the string.
 */
 
-static bool debug_sync_eval_action(THD *thd, char *action_str)
+static bool debug_sync_eval_action(THD *thd, char *action_str, char *action_end)
 {
   st_debug_sync_action  *action= NULL;
   const char            *errmsg;
@@ -986,7 +988,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
   /*
     Get debug sync point name. Or a special command.
   */
-  if (!(ptr= debug_sync_token(&token, &token_length, action_str)))
+  if (!(ptr= debug_sync_token(&token, &token_length, action_str, action_end)))
   {
     errmsg= "Missing synchronization point name";
     goto err;
@@ -1009,7 +1011,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
   /*
     Get kind of action to be taken at sync point.
   */
-  if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+  if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
   {
     /* No action present. Try special commands. Token unchanged. */
 
@@ -1090,7 +1092,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
   if (!my_strcasecmp(system_charset_info, token, "SIGNAL"))
   {
     /* It is SIGNAL. Signal name must follow. */
-    if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+    if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
     {
       errmsg= "Missing signal name after action SIGNAL";
       goto err;
@@ -1108,7 +1110,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
     action->execute= 1;
 
     /* Get next token. If none follows, set action. */
-    if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+    if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
       goto set_action;
   }
 
@@ -1118,7 +1120,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
   if (!my_strcasecmp(system_charset_info, token, "WAIT_FOR"))
   {
     /* It is WAIT_FOR. Wait_for signal name must follow. */
-    if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+    if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
     {
       errmsg= "Missing signal name after action WAIT_FOR";
       goto err;
@@ -1137,7 +1139,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
     action->timeout= opt_debug_sync_timeout;
 
     /* Get next token. If none follows, set action. */
-    if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+    if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
       goto set_action;
 
     /*
@@ -1146,14 +1148,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
     if (!my_strcasecmp(system_charset_info, token, "TIMEOUT"))
     {
       /* It is TIMEOUT. Number must follow. */
-      if (!(ptr= debug_sync_number(&action->timeout, ptr)))
+      if (!(ptr= debug_sync_number(&action->timeout, ptr, action_end)))
       {
         errmsg= "Missing valid number after TIMEOUT";
         goto err;
       }
 
       /* Get next token. If none follows, set action. */
-      if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+      if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
         goto set_action;
     }
   }
@@ -1174,14 +1176,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
     }
 
     /* Number must follow. */
-    if (!(ptr= debug_sync_number(&action->execute, ptr)))
+    if (!(ptr= debug_sync_number(&action->execute, ptr, action_end)))
     {
       errmsg= "Missing valid number after EXECUTE";
       goto err;
     }
 
     /* Get next token. If none follows, set action. */
-    if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+    if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
       goto set_action;
   }
 
@@ -1191,14 +1193,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
   if (!my_strcasecmp(system_charset_info, token, "HIT_LIMIT"))
   {
     /* Number must follow. */
-    if (!(ptr= debug_sync_number(&action->hit_limit, ptr)))
+    if (!(ptr= debug_sync_number(&action->hit_limit, ptr, action_end)))
     {
       errmsg= "Missing valid number after HIT_LIMIT";
       goto err;
     }
 
     /* Get next token. If none follows, set action. */
-    if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
+    if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
       goto set_action;
   }
 
@@ -1246,7 +1248,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
     terminators in the string. So we need to take a copy here.
 */
 
-bool debug_sync_update(THD *thd, char *val_str)
+bool debug_sync_update(THD *thd, char *val_str, size_t len)
 {
   DBUG_ENTER("debug_sync_update");
   DBUG_PRINT("debug_sync", ("set action: '%s'", val_str));
@@ -1255,8 +1257,9 @@ bool debug_sync_update(THD *thd, char *val_str)
     debug_sync_eval_action() places '\0' in the string, which itself
     must be '\0' terminated.
   */
+  DBUG_ASSERT(val_str[len] == '\0');
   DBUG_RETURN(opt_debug_sync_timeout ?
-              debug_sync_eval_action(thd, val_str) :
+              debug_sync_eval_action(thd, val_str, val_str + len) :
               FALSE);
 }
 
@@ -1592,7 +1595,7 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len)
   DBUG_ASSERT(action_str);
   
   value= strmake_root(thd->mem_root, action_str, len);
-  rc= debug_sync_eval_action(thd, value);
+  rc= debug_sync_eval_action(thd, value, value + len);
   DBUG_RETURN(rc);
 }
 
diff --git a/sql/debug_sync.h b/sql/debug_sync.h
index bf1b316..339a211 100644
--- a/sql/debug_sync.h
+++ b/sql/debug_sync.h
@@ -45,6 +45,9 @@ extern void debug_sync_init_thread(THD *thd);
 extern void debug_sync_end_thread(THD *thd);
 extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len);
 
+extern bool debug_sync_update(THD *thd, char *val_str, size_t len);
+extern uchar *debug_sync_value_ptr(THD *thd);
+
 #endif /* defined(ENABLED_DEBUG_SYNC) */
 
 #endif /* DEBUG_SYNC_INCLUDED */
diff --git a/sql/sys_vars.ic b/sql/sys_vars.ic
index 373f583..bb290b6 100644
--- a/sql/sys_vars.ic
+++ b/sql/sys_vars.ic
@@ -1434,6 +1434,9 @@ class Sys_var_plugin: public sys_var
 };
 
 #if defined(ENABLED_DEBUG_SYNC)
+
+#include "debug_sync.h"
+
 /**
   The class for @@debug_sync session-only variable
 */
@@ -1462,15 +1465,21 @@ class Sys_var_debug_sync :public sys_var
     String str(buff, sizeof(buff), system_charset_info), *res;
 
     if (!(res=var->value->val_str(&str)))
+    {
       var->save_result.string_value.str= const_cast<char*>("");
+      var->save_result.string_value.length= 0;
+    }
     else
+    {
       var->save_result.string_value.str= thd->strmake(res->ptr(), res->length());
+      var->save_result.string_value.length= res->length();
+    }
     return false;
   }
   bool session_update(THD *thd, set_var *var)
   {
-    extern bool debug_sync_update(THD *thd, char *val_str);
-    return debug_sync_update(thd, var->save_result.string_value.str);
+    return debug_sync_update(thd, var->save_result.string_value.str,
+                                  var->save_result.string_value.length);
   }
   bool global_update(THD *thd, set_var *var)
   {
@@ -1488,7 +1497,6 @@ class Sys_var_debug_sync :public sys_var
   }
   uchar *session_value_ptr(THD *thd, const LEX_STRING *base)
   {
-    extern uchar *debug_sync_value_ptr(THD *thd);
     return debug_sync_value_ptr(thd);
   }
   uchar *global_value_ptr(THD *thd, const LEX_STRING *base)
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index b205b1a..5fbb1fe 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1069,6 +1069,13 @@ size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
         break;
     }
     return (size_t) (str - str0);
+  case MY_SEQ_NONSPACES:
+    for ( ; str < end ; str++)
+    {
+      if (my_isspace(cs, *str))
+        break;
+    }
+    return (size_t) (str - str0);
   default:
     return 0;
   }
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 74e474c..058afb7 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1049,6 +1049,9 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
     {
     }
     return (size_t) (str - str0);
+  case MY_SEQ_NONSPACES:
+    DBUG_ASSERT(0);
+    /* pass through */
   default:
     return 0;
   }
@@ -2484,6 +2487,9 @@ my_scan_utf32(CHARSET_INFO *cs,
       str+= res;
     }
     return (size_t) (str - str0);
+  case MY_SEQ_NONSPACES:
+    DBUG_ASSERT(0);
+    /* pass through */
   default:
     return 0;
   }

Follow ups