maria-developers team mailing list archive

Thread
Date
[Branch ~maria-captains/maria/5.1] Rev 2715: Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters...

To: "askmonty.org Worklog" <worklog-db@xxxxxxxxxxxx>
From: noreply@xxxxxxxxxxxxx
Date: Thu, 02 Jul 2009 10:18:15 -0000
Delivered-to: worklog-db@xxxxxxxxxxxx
Reply-to: noreply@xxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx
------------------------------------------------------------
revno: 2715
committer: Michael Widenius <monty@xxxxxxxxxxxx>
branch nick: mysql-maria
timestamp: Thu 2009-07-02 13:15:33 +0300
message:
  Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f
  This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client
  and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f.
  
  This code is based on Alexander Barkov's code that he has done in MySQL 6.0
modified:
  include/m_ctype.h
  libmysqld/lib_sql.cc
  mysys/charset.c
  scripts/mysql_install_db.sh
  sql/protocol.cc
  sql/protocol.h
  sql/sql_string.cc
  strings/conf_to_src.c
  strings/ctype-extra.c
  strings/ctype-sjis.c
  strings/ctype-uca.c
  strings/ctype-ucs2.c
  strings/ctype-utf8.c
  strings/ctype.c

=== modified file 'include/m_ctype.h'
--- include/m_ctype.h	2008-12-23 14:21:01 +0000
+++ include/m_ctype.h	2009-07-02 10:15:33 +0000
@@ -87,6 +87,7 @@
 #define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
 #define MY_CS_HIDDEN	2048   /* don't display in SHOW          */	
 #define MY_CS_PUREASCII 4096   /* if a charset is pure ascii     */
+#define MY_CS_NONASCII  8192   /* if not ASCII-compatible        */
 #define MY_CHARSET_UNDEFINED 0
 
 /* Character repertoire flags */
@@ -517,6 +518,7 @@
 #define my_strcasecmp(s, a, b)        ((s)->coll->strcasecmp((s), (a), (b)))
 #define my_charpos(cs, b, e, num)     (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
 
+my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
 
 #define use_mb(s)                     ((s)->cset->ismbchar != NULL)
 #define my_ismbchar(s, a, b)          ((s)->cset->ismbchar((s), (a), (b)))

=== modified file 'libmysqld/lib_sql.cc'
--- libmysqld/lib_sql.cc	2009-02-24 11:29:49 +0000
+++ libmysqld/lib_sql.cc	2009-07-02 10:15:33 +0000
@@ -1124,6 +1124,7 @@
   return false;
 }
 
+
 bool Protocol::net_store_data(const uchar *from, size_t length)
 {
   char *field_buf;
@@ -1143,6 +1144,30 @@
   return FALSE;
 }
 
+
+bool Protocol::net_store_data(const uchar *from, size_t length,
+                              CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+  uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+  uint dummy_error;
+  char *field_buf;
+  if (!thd->mysql)            // bootstrap file handling
+    return false;
+
+  if (!(field_buf= (char*) alloc_root(alloc, conv_length + sizeof(uint) + 1)))
+    return true;
+  *next_field= field_buf + sizeof(uint);
+  length= copy_and_convert(*next_field, conv_length, to_cs,
+                           (const char*) from, length, from_cs, &dummy_error);
+  *(uint *) field_buf= length;
+  (*next_field)[length]= 0;
+  if (next_mysql_field->max_length < length)
+    next_mysql_field->max_length= length;
+  ++next_field;
+  ++next_mysql_field;
+  return false;
+}
+
 #if defined(_MSC_VER) && _MSC_VER < 1400
 #define vsnprintf _vsnprintf
 #endif

=== modified file 'mysys/charset.c'
--- mysys/charset.c	2009-02-13 16:41:47 +0000
+++ mysys/charset.c	2009-07-02 10:15:33 +0000
@@ -248,6 +248,7 @@
       {
 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
         copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
+	newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
 #endif        
       }
       else if (!strcmp(cs->csname, "utf8"))
@@ -280,6 +281,8 @@
 
         if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
           all_charsets[cs->number]->state|= MY_CS_PUREASCII;
+        if (!my_charset_is_ascii_compatible(cs))
+	  all_charsets[cs->number]->state|= MY_CS_NONASCII;
       }
     }
     else

=== modified file 'scripts/mysql_install_db.sh'
--- scripts/mysql_install_db.sh	2009-01-06 15:08:15 +0000
+++ scripts/mysql_install_db.sh	2009-07-02 10:15:33 +0000
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Copyright (C) 2002-2003 MySQL AB
+# Copyright (C) 2002-2003 MySQL AB & Monty Program Ab
 # 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -14,7 +14,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-# This scripts creates the MySQL Server system tables
+# This scripts creates the MariaDB Server system tables
 #
 # All unrecognized arguments to this script are passed to mysqld.
 
@@ -38,26 +38,27 @@
 {
   cat <<EOF
 Usage: $0 [OPTIONS]
-  --basedir=path       The path to the MySQL installation directory.
+  --basedir=path       The path to the MariaDB installation directory.
   --builddir=path      If using --srcdir with out-of-directory builds, you
                        will need to set this to the location of the build
                        directory where built files reside.
-  --cross-bootstrap    For internal use.  Used when building the MySQL system
+  --cross-bootstrap    For internal use.  Used when building the MariaDB system
                        tables on a different host than the target.
-  --datadir=path       The path to the MySQL data directory.
+  --datadir=path       The path to the MariaDB data directory.
   --force              Causes mysql_install_db to run even if DNS does not
                        work.  In that case, grant table entries that normally
                        use hostnames will use IP addresses.
-  --ldata=path         The path to the MySQL data directory. Same as --datadir.
+  --ldata=path         The path to the MariaDB data directory. Same as
+                       --datadir.
   --rpm                For internal use.  This option is used by RPM files
-                       during the MySQL installation process.
+                       during the MariaDB installation process.
   --skip-name-resolve  Use IP addresses rather than hostnames when creating
                        grant table entries.  This option can be useful if
                        your DNS does not work.
-  --srcdir=path        The path to the MySQL source directory.  This option
+  --srcdir=path        The path to the MariaDB source directory.  This option
                        uses the compiled binaries and support files within the
                        source tree, useful for if you don't want to install
-                       MySQL yet and just want to create the system tables.
+                       MariaDB yet and just want to create the system tables.
   --user=user_name     The login username to use for running mysqld.  Files
                        and directories created by mysqld will be owned by this
                        user.  You must be root to use this option.  By default
@@ -116,7 +117,7 @@
         defaults="$arg" ;;
 
       --cross-bootstrap|--windows)
-        # Used when building the MySQL system tables on a different host than
+        # Used when building the MariaDB system tables on a different host than
         # the target. The platform-independent files that are created in
         # --datadir on the host can be copied to the target system.
         #
@@ -338,10 +339,10 @@
     fi
     echo "WARNING: The host '$hostname' could not be looked up with resolveip."
     echo "This probably means that your libc libraries are not 100 % compatible"
-    echo "with this binary MySQL version. The MySQL daemon, mysqld, should work"
+    echo "with this binary MariaDB version. The MariaDB daemon, mysqld, should work"
     echo "normally with the exception that host name resolving will not work."
     echo "This means that you should use IP addresses instead of hostnames"
-    echo "when specifying MySQL privileges !"
+    echo "when specifying MariaDB privileges !"
   fi
 fi
 
@@ -388,7 +389,7 @@
   --net_buffer_length=16K"
 
 # Create the system and help tables by passing them to "mysqld --bootstrap"
-s_echo "Installing MySQL system tables..."
+s_echo "Installing MariaDB/MySQL system tables..."
 if { echo "use mysql;"; cat $create_system_tables $fill_system_tables; } | eval "$filter_cmd_line" | $mysqld_install_cmd_line > /dev/null
 then
   s_echo "OK"
@@ -410,14 +411,16 @@
   echo "Try 'mysqld --help' if you have problems with paths.  Using --log"
   echo "gives you a log in $ldata that may be helpful."
   echo
-  echo "The latest information about MySQL is available on the web at"
-  echo "http://www.mysql.com/.  Please consult the MySQL manual section"
+  echo "The latest information about MariaDB is available on the web at"
+  echo "http://askmonty.org/wiki/index.php/MariaDB";.
+  echo "If you have a problem, you can consult the MySQL manual section"
   echo "'Problems running mysql_install_db', and the manual section that"
-  echo "describes problems on your OS.  Another information source are the"
-  echo "MySQL email archives available at http://lists.mysql.com/.";
+  echo "describes problems on your OS at http://dev.mysql.com/doc/";
+  echo "MariaDB is hosted on launchpad; You can find the latest source and"
+  echo "email lists at http://launchpad.net/maria";
   echo
   echo "Please check all of the above before mailing us!  And remember, if"
-  echo "you do mail us, you MUST use the $scriptdir/mysqlbug script!"
+  echo "you do mail us, you should use the $scriptdir/mysqlbug script!"
   echo
   exit 1
 fi
@@ -442,7 +445,7 @@
   s_echo "support-files/mysql.server to the right place for your system"
 
   echo
-  echo "PLEASE REMEMBER TO SET A PASSWORD FOR THE MySQL root USER !"
+  echo "PLEASE REMEMBER TO SET A PASSWORD FOR THE MariaDB root USER !"
   echo "To do so, start the server, then issue the following commands:"
   echo
   echo "$bindir/mysqladmin -u root password 'new-password'"
@@ -455,23 +458,28 @@
   echo "databases and anonymous user created by default.  This is"
   echo "strongly recommended for production servers."
   echo
-  echo "See the manual for more instructions."
+  echo "See the MySQL manual for more instructions."
 
   if test "$in_rpm" -eq 0
   then
     echo
-    echo "You can start the MySQL daemon with:"
+    echo "You can start the MariaDB daemon with:"
     echo "cd $basedir ; $bindir/mysqld_safe &"
     echo
-    echo "You can test the MySQL daemon with mysql-test-run.pl"
+    echo "You can test the MariaDB daemon with mysql-test-run.pl"
     echo "cd $basedir/mysql-test ; perl mysql-test-run.pl"
   fi
 
   echo
   echo "Please report any problems with the $scriptdir/mysqlbug script!"
   echo
-  echo "The latest information about MySQL is available at http://www.mysql.com/";
-  echo "Support MySQL by buying support/licenses from http://shop.mysql.com/";
+  echo "The latest information about MariaDB is available at http://www.askmonty.org/.";
+  echo "You can find additional information about the MySQL part at:"
+  echo "http://dev.mysql.com";
+  echo "Support MariaDB development by buying support/new features from"
+  echo "Monty Program Ab. You can contact us about this at sales@xxxxxxxxxxxx".
+  echo "Alternatively consider joining our community based development effort:"
+  echo "http://askmonty.org/wiki/index.php/MariaDB#How_can_I_participate_in_the_development_of_MariaDB.3F";
   echo
 fi
 

=== modified file 'sql/protocol.cc'
--- sql/protocol.cc	2009-04-25 10:05:32 +0000
+++ sql/protocol.cc	2009-07-02 10:15:33 +0000
@@ -58,6 +58,65 @@
 }
 
 
+/*
+  net_store_data() - extended version with character set conversion.
+  
+  It is optimized for short strings whose length after
+  conversion is garanteed to be less than 251, which accupies
+  exactly one byte to store length. It allows not to use
+  the "convert" member as a temporary buffer, conversion
+  is done directly to the "packet" member.
+  The limit 251 is good enough to optimize send_fields()
+  because column, table, database names fit into this limit.
+*/
+
+#ifndef EMBEDDED_LIBRARY
+bool Protocol::net_store_data(const uchar *from, size_t length,
+                              CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+  uint dummy_errors;
+  /* Calculate maxumum possible result length */
+  size_t conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+  ulong packet_length, new_length;
+  char *length_pos, *to;
+
+  if (conv_length > 250)
+  {
+    /*
+      For strings with conv_length greater than 250 bytes
+      we don't know how many bytes we will need to store length: one or two,
+      because we don't know result length until conversion is done.
+      For example, when converting from utf8 (mbmaxlen=3) to latin1,
+      conv_length=300 means that the result length can vary between 100 to 300.
+      length=100 needs one byte, length=300 needs to bytes.
+      
+      Thus conversion directly to "packet" is not worthy.
+      Let's use "convert" as a temporary buffer.
+    */
+    return (convert->copy((const char*) from, length, from_cs, to_cs,
+                          &dummy_errors) ||
+            net_store_data((const uchar*) convert->ptr(), convert->length()));
+  }
+
+  packet_length= packet->length();
+  new_length= packet_length + conv_length + 1;
+
+  if (new_length > packet->alloced_length() && packet->realloc(new_length))
+    return 1;
+
+  length_pos= (char*) packet->ptr() + packet_length;
+  to= length_pos + 1;
+
+  to+= copy_and_convert(to, conv_length, to_cs,
+                        (const char*) from, length, from_cs, &dummy_errors);
+
+  net_store_length((uchar*) length_pos, to - length_pos - 1);
+  packet->length((uint) (to - packet->ptr()));
+  return 0;
+}
+#endif
+
+
 /**
   Send a error string to client.
 
@@ -773,10 +832,10 @@
       fromcs != &my_charset_bin &&
       tocs != &my_charset_bin)
   {
-    uint dummy_errors;
-    return (convert->copy(from, length, fromcs, tocs, &dummy_errors) ||
-            net_store_data((uchar*) convert->ptr(), convert->length()));
+    /* Store with conversion */
+    return net_store_data((uchar*) from, length, fromcs, tocs);
   }
+  /* Store without conversion */
   return net_store_data((uchar*) from, length);
 }
 
@@ -802,7 +861,7 @@
 {
   CHARSET_INFO *tocs= this->thd->variables.character_set_results;
 #ifndef DBUG_OFF
-  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %*s", field_pos,
+  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %.*s", field_pos,
                       field_count, (int) length, from));
   DBUG_ASSERT(field_pos < field_count);
   DBUG_ASSERT(field_types == 0 ||

=== modified file 'sql/protocol.h'
--- sql/protocol.h	2007-12-20 21:11:37 +0000
+++ sql/protocol.h	2009-07-02 10:15:33 +0000
@@ -42,6 +42,8 @@
   MYSQL_FIELD *next_mysql_field;
   MEM_ROOT *alloc;
 #endif
+  bool net_store_data(const uchar *from, size_t length,
+                      CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
   bool store_string_aux(const char *from, size_t length,
                         CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
 public:

=== modified file 'sql/sql_string.cc'
--- sql/sql_string.cc	2009-04-25 10:05:32 +0000
+++ sql/sql_string.cc	2009-07-02 10:15:33 +0000
@@ -782,10 +782,11 @@
 */
 
 
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
-                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
-                 uint *errors)
+static uint32
+copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                          const char *from, uint32 from_length,
+                          CHARSET_INFO *from_cs,
+                          uint *errors)
 {
   int         cnvres;
   my_wc_t     wc;
@@ -900,6 +901,65 @@
 }
 
 /*
+  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+*/
+uint32
+copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+                 uint *errors)
+{
+  /*
+    If any of the character sets is not ASCII compatible,
+    immediately switch to slow mb_wc->wc_mb method.
+  */
+  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+    return copy_and_convert_extended(to, to_length, to_cs,
+                                     from, from_length, from_cs, errors);
+
+  uint32 length= min(to_length, from_length), length2= length;
+
+#if defined(__i386__)
+  /*
+    Special loop for i386, it allows to refer to a
+    non-aligned memory block as UINT32, which makes
+    it possible to copy four bytes at once. This
+    gives about 10% performance improvement comparing
+    to byte-by-byte loop.
+  */
+  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+  {
+    if ((*(uint32*)from) & 0x80808080)
+      break;
+    *((uint32*) to)= *((const uint32*) from);
+  }
+#endif
+
+  for (; ; *to++= *from++, length--)
+  {
+    if (!length)
+    {
+      *errors= 0;
+      return length2;
+    }
+    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+    {
+      uint32 copied_length= length2 - length;
+      to_length-= copied_length;
+      from_length-= copied_length;
+      return copied_length + copy_and_convert_extended(to, to_length,
+                                                       to_cs,
+                                                       from, from_length,
+                                                       from_cs,
+                                                       errors);
+    }
+  }
+
+  DBUG_ASSERT(FALSE); // Should never get to here
+  return 0;           // Make compiler happy
+}
+
+
+/*
   copy a string,
   with optional character set conversion,
   with optional left padding (for binary -> UCS2 conversion)

=== modified file 'strings/conf_to_src.c'
--- strings/conf_to_src.c	2008-11-14 16:29:38 +0000
+++ strings/conf_to_src.c	2009-07-02 10:15:33 +0000
@@ -184,11 +184,12 @@
 {
   fprintf(f,"{\n");
   fprintf(f,"  %d,%d,%d,\n",cs->number,0,0);
-  fprintf(f,"  MY_CS_COMPILED%s%s%s%s,\n",
+  fprintf(f,"  MY_CS_COMPILED%s%s%s%s%s,\n",
           cs->state & MY_CS_BINSORT         ? "|MY_CS_BINSORT"   : "",
           cs->state & MY_CS_PRIMARY         ? "|MY_CS_PRIMARY"   : "",
           is_case_sensitive(cs)             ? "|MY_CS_CSSORT"    : "",
-          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
+          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
+	  !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
   
   if (cs->name)
   {

=== modified file 'strings/ctype-extra.c'
--- strings/ctype-extra.c	2007-08-20 11:47:31 +0000
+++ strings/ctype-extra.c	2009-07-02 10:15:33 +0000
@@ -6804,7 +6804,7 @@
 #ifdef HAVE_CHARSET_swe7
 {
   10,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
+  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_swedish_ci",                     /* coll name     */
   "",                       /* comment       */
@@ -8454,7 +8454,7 @@
 #ifdef HAVE_CHARSET_swe7
 {
   82,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
+  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_bin",                     /* coll name     */
   "",                       /* comment       */
@@ -8550,72 +8550,6 @@
 }
 ,
 #endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  92,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
-  "geostd8",                     /* cset name     */
-  "geostd8_general_ci",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_general_ci,                   /* ctype         */
-  to_lower_geostd8_general_ci,                /* lower         */
-  to_upper_geostd8_general_ci,                /* upper         */
-  sort_order_geostd8_general_ci,            /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_general_ci,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_simple_ci_handler,
-}
-,
-#endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  93,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
-  "geostd8",                     /* cset name     */
-  "geostd8_bin",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_bin,                   /* ctype         */
-  to_lower_geostd8_bin,                /* lower         */
-  to_upper_geostd8_bin,                /* upper         */
-  NULL,                     /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_bin,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_bin_handler,
-}
-,
-#endif
 #ifdef HAVE_CHARSET_latin1
 {
   94,0,0,

=== modified file 'strings/ctype-sjis.c'
--- strings/ctype-sjis.c	2007-10-04 07:10:15 +0000
+++ strings/ctype-sjis.c	2009-07-02 10:15:33 +0000
@@ -4672,7 +4672,7 @@
 CHARSET_INFO my_charset_sjis_japanese_ci=
 {
     13,0,0,		/* number */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM,	/* state      */
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_NONASCII,	/* state      */
     "sjis",		/* cs name    */
     "sjis_japanese_ci",	/* name */
     "",			/* comment    */
@@ -4704,7 +4704,7 @@
 CHARSET_INFO my_charset_sjis_bin=
 {
     88,0,0,		/* number */
-    MY_CS_COMPILED|MY_CS_BINSORT,	/* state      */
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, /* state      */
     "sjis",		/* cs name    */
     "sjis_bin",		/* name */
     "",			/* comment    */

=== modified file 'strings/ctype-uca.c'
--- strings/ctype-uca.c	2007-07-03 09:06:57 +0000
+++ strings/ctype-uca.c	2009-07-02 10:15:33 +0000
@@ -8086,7 +8086,7 @@
 CHARSET_INFO my_charset_ucs2_unicode_ci=
 {
     128,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_unicode_ci",	/* name         */
     "",			/* comment      */
@@ -8118,7 +8118,7 @@
 CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
 {
     129,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_icelandic_ci",/* name         */
     "",			/* comment      */
@@ -8150,7 +8150,7 @@
 CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
 {
     130,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_latvian_ci",	/* name         */
     "",			/* comment      */
@@ -8182,7 +8182,7 @@
 CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
 {
     131,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_romanian_ci",	/* name         */
     "",			/* comment      */
@@ -8214,7 +8214,7 @@
 CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
 {
     132,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovenian_ci",/* name         */
     "",			/* comment      */
@@ -8246,7 +8246,7 @@
 CHARSET_INFO my_charset_ucs2_polish_uca_ci=
 {
     133,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_polish_ci",	/* name         */
     "",			/* comment      */
@@ -8278,7 +8278,7 @@
 CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
 {
     134,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_estonian_ci",	/* name         */
     "",			/* comment      */
@@ -8310,7 +8310,7 @@
 CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
 {
     135,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish_ci",	/* name         */
     "",			/* comment      */
@@ -8342,7 +8342,7 @@
 CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
 {
     136,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_swedish_ci",	/* name         */
     "",			/* comment      */
@@ -8374,7 +8374,7 @@
 CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
 {
     137,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_turkish_ci",	/* name         */
     "",			/* comment      */
@@ -8406,7 +8406,7 @@
 CHARSET_INFO my_charset_ucs2_czech_uca_ci=
 {
     138,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_czech_ci",	/* name         */
     "",			/* comment      */
@@ -8439,7 +8439,7 @@
 CHARSET_INFO my_charset_ucs2_danish_uca_ci=
 {
     139,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_danish_ci",	/* name         */
     "",			/* comment      */
@@ -8471,7 +8471,7 @@
 CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
 {
     140,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_lithuanian_ci",/* name         */
     "",			/* comment      */
@@ -8503,7 +8503,7 @@
 CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
 {
     141,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovak_ci",	/* name         */
     "",			/* comment      */
@@ -8535,7 +8535,7 @@
 CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
 {
     142,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish2_ci",	/* name         */
     "",			/* comment      */
@@ -8568,7 +8568,7 @@
 CHARSET_INFO my_charset_ucs2_roman_uca_ci=
 {
     143,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_roman_ci",	/* name         */
     "",			/* comment      */
@@ -8601,7 +8601,7 @@
 CHARSET_INFO my_charset_ucs2_persian_uca_ci=
 {
     144,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_persian_ci",	/* name         */
     "",			/* comment      */
@@ -8634,7 +8634,7 @@
 CHARSET_INFO my_charset_ucs2_esperanto_uca_ci=
 {
     145,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_esperanto_ci",/* name         */
     "",			/* comment      */
@@ -8667,7 +8667,7 @@
 CHARSET_INFO my_charset_ucs2_hungarian_uca_ci=
 {
     146,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_hungarian_ci",/* name         */
     "",			/* comment      */

=== modified file 'strings/ctype-ucs2.c'
--- strings/ctype-ucs2.c	2009-02-13 16:41:47 +0000
+++ strings/ctype-ucs2.c	2009-07-02 10:15:33 +0000
@@ -1717,7 +1717,7 @@
 CHARSET_INFO my_charset_ucs2_general_ci=
 {
     35,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_general_ci",	/* name         */
     "",			/* comment      */
@@ -1749,7 +1749,7 @@
 CHARSET_INFO my_charset_ucs2_bin=
 {
     90,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_bin",		/* name         */
     "",			/* comment      */

=== modified file 'strings/ctype-utf8.c'
--- strings/ctype-utf8.c	2008-02-11 12:28:33 +0000
+++ strings/ctype-utf8.c	2009-07-02 10:15:33 +0000
@@ -4204,7 +4204,7 @@
 CHARSET_INFO my_charset_filename=
 {
     17,0,0,             /* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN|MY_CS_NONASCII,
     "filename",         /* cs name      */
     "filename",         /* name         */
     "",                 /* comment      */

=== modified file 'strings/ctype.c'
--- strings/ctype.c	2009-04-25 10:05:32 +0000
+++ strings/ctype.c	2009-07-02 10:15:33 +0000
@@ -405,3 +405,23 @@
   }
   return 1;
 }
+
+
+/*
+  Shared function between conf_to_src and mysys.
+  Check if a 8bit character set is compatible with
+  ascii on the range 0x00..0x7F.
+*/
+my_bool
+my_charset_is_ascii_compatible(CHARSET_INFO *cs)
+{
+  uint i;
+  if (!cs->tab_to_uni)
+    return 1;
+  for (i= 0; i < 128; i++)
+  {
+    if (cs->tab_to_uni[i] != i)
+      return 0;
+  }
+  return 1;
+}



--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1

Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription.