← Back to team overview

maria-developers team mailing list archive

Please review a patch removing LEX::text_string_is_7bit

 

Hello Sergei,

Can you please review a patch for bb-10.2-ext fixing the problem
that in case of possible changes in look-ahead in rules using
TEXT_STRING or NCHAR_STRING, Lex->text_string_is_7bit can get out of
sync with $1/$2/$3, etc.


Thanks!
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 6a0c215..a3b87d5 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -1056,18 +1056,19 @@ Lex_input_stream::unescape(CHARSET_INFO *cs, char *to,
   Fix sometimes to do only one scan of the string
 */
 
-bool Lex_input_stream::get_text(LEX_STRING *dst, uint sep,
+bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
                                 int pre_skip, int post_skip)
 {
   reg1 uchar c;
   uint found_escape=0;
   CHARSET_INFO *cs= m_thd->charset();
 
-  tok_bitmap= 0;
+  dst->set_8bit(false);
   while (! eof())
   {
     c= yyGet();
-    tok_bitmap|= c;
+    if (c & 0x80)
+      dst->set_8bit(true);
 #ifdef USE_MB
     {
       int l;
@@ -1433,18 +1434,17 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
       }
       /* Found N'string' */
       lip->yySkip();                         // Skip '
-      if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 2, 1))
+      if (lip->get_text(&yylval->lex_string_with_metadata,
+                        (sep= lip->yyGetLast()), 2, 1))
       {
 	state= MY_LEX_CHAR;             // Read char by char
 	break;
       }
 
       lip->body_utf8_append(lip->m_cpp_text_start);
-      lip->body_utf8_append_escape(thd, &yylval->lex_str,
+      lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata,
                                    national_charset_info,
                                    lip->m_cpp_text_end, sep);
-
-      lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
       return(NCHAR_STRING);
     }
     case MY_LEX_IDENT_OR_HEX:
@@ -1798,7 +1798,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
     case MY_LEX_STRING:			// Incomplete text string
     {
       uint sep;
-      if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 1, 1))
+      if (lip->get_text(&yylval->lex_string_with_metadata,
+                        (sep= lip->yyGetLast()), 1, 1))
       {
 	state= MY_LEX_CHAR;		// Read char by char
 	break;
@@ -1806,11 +1807,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
       CHARSET_INFO *strcs= lip->m_underscore_cs ? lip->m_underscore_cs : cs;
       lip->body_utf8_append(lip->m_cpp_text_start);
 
-      lip->body_utf8_append_escape(thd, &yylval->lex_str, strcs,
-                                   lip->m_cpp_text_end, sep);
+      lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata,
+                                   strcs, lip->m_cpp_text_end, sep);
       lip->m_underscore_cs= NULL;
-
-      lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
       return(TEXT_STRING);
     }
     case MY_LEX_COMMENT:			//  Comment
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index e9d9660..49fcc05 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -2246,7 +2246,8 @@ class Lex_input_stream
   /** LALR(2) resolution, value of the look ahead token.*/
   LEX_YYSTYPE lookahead_yylval;
 
-  bool get_text(LEX_STRING *to, uint sep, int pre_skip, int post_skip);
+  bool get_text(Lex_string_with_metadata_st *to,
+                uint sep, int pre_skip, int post_skip);
 
   void add_digest_token(uint token, LEX_YYSTYPE yylval);
 
@@ -2325,9 +2326,6 @@ class Lex_input_stream
   */
   const char *found_semicolon;
 
-  /** Token character bitmaps, to detect 7bit strings. */
-  uchar tok_bitmap;
-
   /** SQL_MODE = IGNORE_SPACE. */
   bool ignore_space;
 
@@ -2565,8 +2563,6 @@ struct LEX: public Query_tables_list
   DYNAMIC_ARRAY plugins;
   plugin_ref plugins_static_buffer[INITIAL_LEX_PLUGIN_LIST_SIZE];
 
-  bool text_string_is_7bit;
-
   /** SELECT of CREATE VIEW statement */
   LEX_STRING create_view_select;
 
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 3f04aa9b..e6996b5 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -906,6 +906,7 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr)
   /* structs */
   LEX_STRING lex_str;
   LEX_SYMBOL symbol;
+  Lex_string_with_metadata_st lex_string_with_metadata;
   struct sys_var_with_base variable;
   struct { int vars, conds, hndlrs, curs; } spblock;
   Lex_length_and_dec_st Lex_length_and_dec;
@@ -1709,14 +1710,18 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %left  INTERVAL_SYM
 
 %type <lex_str>
-        IDENT IDENT_QUOTED TEXT_STRING DECIMAL_NUM FLOAT_NUM NUM LONG_NUM
+        IDENT IDENT_QUOTED DECIMAL_NUM FLOAT_NUM NUM LONG_NUM
         HEX_NUM HEX_STRING
         LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident ident_or_text
         IDENT_sys TEXT_STRING_sys TEXT_STRING_literal
-        NCHAR_STRING opt_component key_cache_name
+        opt_component key_cache_name
         sp_opt_label BIN_NUM label_ident TEXT_STRING_filesystem ident_or_empty
         opt_constraint constraint opt_ident
 
+%type <lex_string_with_metadata>
+        TEXT_STRING
+        NCHAR_STRING
+
 %type <lex_str_ptr>
         opt_table_alias
 
@@ -13697,9 +13702,7 @@ text_literal:
             LEX_STRING tmp;
             CHARSET_INFO *cs_con= thd->variables.collation_connection;
             CHARSET_INFO *cs_cli= thd->variables.character_set_client;
-            uint repertoire= thd->lex->text_string_is_7bit &&
-                             my_charset_is_ascii_based(cs_cli) ?
-                             MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+            uint repertoire= $1.repertoire(cs_cli);
             if (thd->charset_is_collation_connection ||
                 (repertoire == MY_REPERTOIRE_ASCII &&
                  my_charset_is_ascii_based(cs_con)))
@@ -13718,13 +13721,11 @@ text_literal:
           }
         | NCHAR_STRING
           {
-            uint repertoire= Lex->text_string_is_7bit ?
-                             MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
             DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
             $$= new (thd->mem_root) Item_string(thd, $1.str, $1.length,
                                                   national_charset_info,
                                                   DERIVATION_COERCIBLE,
-                                                  repertoire);
+                                                  $1.repertoire());
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
diff --git a/sql/structs.h b/sql/structs.h
index 98eb0f2..c3aa0a8 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -629,4 +629,23 @@ struct Lex_dyncol_type_st: public Lex_length_and_dec_st
 };
 
 
+struct Lex_string_with_metadata_st: public LEX_STRING
+{
+  bool m_is_8bit;
+public:
+  void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
+  // Get string repertoire by the 8-bit flag and the character set
+  uint repertoire(CHARSET_INFO *cs) const
+  {
+    return !m_is_8bit && my_charset_is_ascii_based(cs) ?
+           MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+  }
+  // Get string repertoire by the 8-bit flag, for ASCII-based character sets
+  uint repertoire() const
+  {
+    return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+  }
+};
+
+
 #endif /* STRUCTS_INCLUDED */

Follow ups