maria-developers team mailing list archive

Thread
Date
bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (monty:2715)

To: maria-developers@xxxxxxxxxxxxxxxxxxx
From: Michael Widenius <monty@xxxxxxxxxxxx>
Date: Thu, 02 Jul 2009 10:15:38 -0000
User-agent: Bazaar (1.13)
#At lp:maria based on revid:monty@xxxxxxxxxxxx-20090630120129-6gan4k9dyjxj83e4

 2715 Michael Widenius	2009-07-02
      Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f
      This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client
      and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f.
      
      This code is based on Alexander Barkov's code that he has done in MySQL 6.0
      modified:
        include/m_ctype.h
        libmysqld/lib_sql.cc
        mysys/charset.c
        scripts/mysql_install_db.sh
        sql/protocol.cc
        sql/protocol.h
        sql/sql_string.cc
        strings/conf_to_src.c
        strings/ctype-extra.c
        strings/ctype-sjis.c
        strings/ctype-uca.c
        strings/ctype-ucs2.c
        strings/ctype-utf8.c
        strings/ctype.c

per-file messages:
  include/m_ctype.h
    Added MY_CS_NONASCII marker
  libmysqld/lib_sql.cc
    Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
  mysys/charset.c
    Mark character sets with MY_CS_NONASCII
  scripts/mysql_install_db.sh
    Fixed messages to refer to MariaDB instead of MySQL
  sql/protocol.cc
    Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
  sql/protocol.h
    Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
  sql/sql_string.cc
    Quicker copy of strings with no characters above 0x7f
  strings/conf_to_src.c
    Added printing of MY_CS_NONASCII
  strings/ctype-extra.c
    Mark incompatible character sets with MY_CS_NONASCII
    Removed duplicated character set geostd
  strings/ctype-sjis.c
    Mark incompatible character sets with MY_CS_NONASCII
  strings/ctype-uca.c
    Mark incompatible character sets with MY_CS_NONASCII
  strings/ctype-ucs2.c
    Mark incompatible character sets with MY_CS_NONASCII
  strings/ctype-utf8.c
    Mark incompatible character sets with MY_CS_NONASCII
  strings/ctype.c
    Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f
=== modified file 'include/m_ctype.h'
--- a/include/m_ctype.h	2008-12-23 14:21:01 +0000
+++ b/include/m_ctype.h	2009-07-02 10:15:33 +0000
@@ -87,6 +87,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
 #define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
 #define MY_CS_HIDDEN	2048   /* don't display in SHOW          */	
 #define MY_CS_PUREASCII 4096   /* if a charset is pure ascii     */
+#define MY_CS_NONASCII  8192   /* if not ASCII-compatible        */
 #define MY_CHARSET_UNDEFINED 0
 
 /* Character repertoire flags */
@@ -517,6 +518,7 @@ uint my_charset_repertoire(CHARSET_INFO 
 #define my_strcasecmp(s, a, b)        ((s)->coll->strcasecmp((s), (a), (b)))
 #define my_charpos(cs, b, e, num)     (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
 
+my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
 
 #define use_mb(s)                     ((s)->cset->ismbchar != NULL)
 #define my_ismbchar(s, a, b)          ((s)->cset->ismbchar((s), (a), (b)))

=== modified file 'libmysqld/lib_sql.cc'
--- a/libmysqld/lib_sql.cc	2009-02-24 11:29:49 +0000
+++ b/libmysqld/lib_sql.cc	2009-07-02 10:15:33 +0000
@@ -1124,6 +1124,7 @@ bool Protocol_text::store_null()
   return false;
 }
 
+
 bool Protocol::net_store_data(const uchar *from, size_t length)
 {
   char *field_buf;
@@ -1143,6 +1144,30 @@ bool Protocol::net_store_data(const ucha
   return FALSE;
 }
 
+
+bool Protocol::net_store_data(const uchar *from, size_t length,
+                              CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+  uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+  uint dummy_error;
+  char *field_buf;
+  if (!thd->mysql)            // bootstrap file handling
+    return false;
+
+  if (!(field_buf= (char*) alloc_root(alloc, conv_length + sizeof(uint) + 1)))
+    return true;
+  *next_field= field_buf + sizeof(uint);
+  length= copy_and_convert(*next_field, conv_length, to_cs,
+                           (const char*) from, length, from_cs, &dummy_error);
+  *(uint *) field_buf= length;
+  (*next_field)[length]= 0;
+  if (next_mysql_field->max_length < length)
+    next_mysql_field->max_length= length;
+  ++next_field;
+  ++next_mysql_field;
+  return false;
+}
+
 #if defined(_MSC_VER) && _MSC_VER < 1400
 #define vsnprintf _vsnprintf
 #endif

=== modified file 'mysys/charset.c'
--- a/mysys/charset.c	2009-02-13 16:41:47 +0000
+++ b/mysys/charset.c	2009-07-02 10:15:33 +0000
@@ -248,6 +248,7 @@ static int add_collation(CHARSET_INFO *c
       {
 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
         copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
+	newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
 #endif        
       }
       else if (!strcmp(cs->csname, "utf8"))
@@ -280,6 +281,8 @@ static int add_collation(CHARSET_INFO *c
 
         if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
           all_charsets[cs->number]->state|= MY_CS_PUREASCII;
+        if (!my_charset_is_ascii_compatible(cs))
+	  all_charsets[cs->number]->state|= MY_CS_NONASCII;
       }
     }
     else

=== modified file 'scripts/mysql_install_db.sh'
--- a/scripts/mysql_install_db.sh	2009-01-06 15:08:15 +0000
+++ b/scripts/mysql_install_db.sh	2009-07-02 10:15:33 +0000
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Copyright (C) 2002-2003 MySQL AB
+# Copyright (C) 2002-2003 MySQL AB & Monty Program Ab
 # 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -14,7 +14,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-# This scripts creates the MySQL Server system tables
+# This scripts creates the MariaDB Server system tables
 #
 # All unrecognized arguments to this script are passed to mysqld.
 
@@ -38,26 +38,27 @@ usage()
 {
   cat <<EOF
 Usage: $0 [OPTIONS]
-  --basedir=path       The path to the MySQL installation directory.
+  --basedir=path       The path to the MariaDB installation directory.
   --builddir=path      If using --srcdir with out-of-directory builds, you
                        will need to set this to the location of the build
                        directory where built files reside.
-  --cross-bootstrap    For internal use.  Used when building the MySQL system
+  --cross-bootstrap    For internal use.  Used when building the MariaDB system
                        tables on a different host than the target.
-  --datadir=path       The path to the MySQL data directory.
+  --datadir=path       The path to the MariaDB data directory.
   --force              Causes mysql_install_db to run even if DNS does not
                        work.  In that case, grant table entries that normally
                        use hostnames will use IP addresses.
-  --ldata=path         The path to the MySQL data directory. Same as --datadir.
+  --ldata=path         The path to the MariaDB data directory. Same as
+                       --datadir.
   --rpm                For internal use.  This option is used by RPM files
-                       during the MySQL installation process.
+                       during the MariaDB installation process.
   --skip-name-resolve  Use IP addresses rather than hostnames when creating
                        grant table entries.  This option can be useful if
                        your DNS does not work.
-  --srcdir=path        The path to the MySQL source directory.  This option
+  --srcdir=path        The path to the MariaDB source directory.  This option
                        uses the compiled binaries and support files within the
                        source tree, useful for if you don't want to install
-                       MySQL yet and just want to create the system tables.
+                       MariaDB yet and just want to create the system tables.
   --user=user_name     The login username to use for running mysqld.  Files
                        and directories created by mysqld will be owned by this
                        user.  You must be root to use this option.  By default
@@ -116,7 +117,7 @@ parse_arguments()
         defaults="$arg" ;;
 
       --cross-bootstrap|--windows)
-        # Used when building the MySQL system tables on a different host than
+        # Used when building the MariaDB system tables on a different host than
         # the target. The platform-independent files that are created in
         # --datadir on the host can be copied to the target system.
         #
@@ -338,10 +339,10 @@ then
     fi
     echo "WARNING: The host '$hostname' could not be looked up with resolveip."
     echo "This probably means that your libc libraries are not 100 % compatible"
-    echo "with this binary MySQL version. The MySQL daemon, mysqld, should work"
+    echo "with this binary MariaDB version. The MariaDB daemon, mysqld, should work"
     echo "normally with the exception that host name resolving will not work."
     echo "This means that you should use IP addresses instead of hostnames"
-    echo "when specifying MySQL privileges !"
+    echo "when specifying MariaDB privileges !"
   fi
 fi
 
@@ -388,7 +389,7 @@ mysqld_install_cmd_line="$mysqld_bootstr
   --net_buffer_length=16K"
 
 # Create the system and help tables by passing them to "mysqld --bootstrap"
-s_echo "Installing MySQL system tables..."
+s_echo "Installing MariaDB/MySQL system tables..."
 if { echo "use mysql;"; cat $create_system_tables $fill_system_tables; } | eval "$filter_cmd_line" | $mysqld_install_cmd_line > /dev/null
 then
   s_echo "OK"
@@ -410,14 +411,16 @@ else
   echo "Try 'mysqld --help' if you have problems with paths.  Using --log"
   echo "gives you a log in $ldata that may be helpful."
   echo
-  echo "The latest information about MySQL is available on the web at"
-  echo "http://www.mysql.com/.  Please consult the MySQL manual section"
+  echo "The latest information about MariaDB is available on the web at"
+  echo "http://askmonty.org/wiki/index.php/MariaDB";.
+  echo "If you have a problem, you can consult the MySQL manual section"
   echo "'Problems running mysql_install_db', and the manual section that"
-  echo "describes problems on your OS.  Another information source are the"
-  echo "MySQL email archives available at http://lists.mysql.com/.";
+  echo "describes problems on your OS at http://dev.mysql.com/doc/";
+  echo "MariaDB is hosted on launchpad; You can find the latest source and"
+  echo "email lists at http://launchpad.net/maria";
   echo
   echo "Please check all of the above before mailing us!  And remember, if"
-  echo "you do mail us, you MUST use the $scriptdir/mysqlbug script!"
+  echo "you do mail us, you should use the $scriptdir/mysqlbug script!"
   echo
   exit 1
 fi
@@ -442,7 +445,7 @@ then
   s_echo "support-files/mysql.server to the right place for your system"
 
   echo
-  echo "PLEASE REMEMBER TO SET A PASSWORD FOR THE MySQL root USER !"
+  echo "PLEASE REMEMBER TO SET A PASSWORD FOR THE MariaDB root USER !"
   echo "To do so, start the server, then issue the following commands:"
   echo
   echo "$bindir/mysqladmin -u root password 'new-password'"
@@ -455,23 +458,28 @@ then
   echo "databases and anonymous user created by default.  This is"
   echo "strongly recommended for production servers."
   echo
-  echo "See the manual for more instructions."
+  echo "See the MySQL manual for more instructions."
 
   if test "$in_rpm" -eq 0
   then
     echo
-    echo "You can start the MySQL daemon with:"
+    echo "You can start the MariaDB daemon with:"
     echo "cd $basedir ; $bindir/mysqld_safe &"
     echo
-    echo "You can test the MySQL daemon with mysql-test-run.pl"
+    echo "You can test the MariaDB daemon with mysql-test-run.pl"
     echo "cd $basedir/mysql-test ; perl mysql-test-run.pl"
   fi
 
   echo
   echo "Please report any problems with the $scriptdir/mysqlbug script!"
   echo
-  echo "The latest information about MySQL is available at http://www.mysql.com/";
-  echo "Support MySQL by buying support/licenses from http://shop.mysql.com/";
+  echo "The latest information about MariaDB is available at http://www.askmonty.org/.";
+  echo "You can find additional information about the MySQL part at:"
+  echo "http://dev.mysql.com";
+  echo "Support MariaDB development by buying support/new features from"
+  echo "Monty Program Ab. You can contact us about this at sales@xxxxxxxxxxxx".
+  echo "Alternatively consider joining our community based development effort:"
+  echo "http://askmonty.org/wiki/index.php/MariaDB#How_can_I_participate_in_the_development_of_MariaDB.3F";
   echo
 fi
 

=== modified file 'sql/protocol.cc'
--- a/sql/protocol.cc	2009-04-25 10:05:32 +0000
+++ b/sql/protocol.cc	2009-07-02 10:15:33 +0000
@@ -58,6 +58,65 @@ bool Protocol_binary::net_store_data(con
 }
 
 
+/*
+  net_store_data() - extended version with character set conversion.
+  
+  It is optimized for short strings whose length after
+  conversion is garanteed to be less than 251, which accupies
+  exactly one byte to store length. It allows not to use
+  the "convert" member as a temporary buffer, conversion
+  is done directly to the "packet" member.
+  The limit 251 is good enough to optimize send_fields()
+  because column, table, database names fit into this limit.
+*/
+
+#ifndef EMBEDDED_LIBRARY
+bool Protocol::net_store_data(const uchar *from, size_t length,
+                              CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+  uint dummy_errors;
+  /* Calculate maxumum possible result length */
+  size_t conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+  ulong packet_length, new_length;
+  char *length_pos, *to;
+
+  if (conv_length > 250)
+  {
+    /*
+      For strings with conv_length greater than 250 bytes
+      we don't know how many bytes we will need to store length: one or two,
+      because we don't know result length until conversion is done.
+      For example, when converting from utf8 (mbmaxlen=3) to latin1,
+      conv_length=300 means that the result length can vary between 100 to 300.
+      length=100 needs one byte, length=300 needs to bytes.
+      
+      Thus conversion directly to "packet" is not worthy.
+      Let's use "convert" as a temporary buffer.
+    */
+    return (convert->copy((const char*) from, length, from_cs, to_cs,
+                          &dummy_errors) ||
+            net_store_data((const uchar*) convert->ptr(), convert->length()));
+  }
+
+  packet_length= packet->length();
+  new_length= packet_length + conv_length + 1;
+
+  if (new_length > packet->alloced_length() && packet->realloc(new_length))
+    return 1;
+
+  length_pos= (char*) packet->ptr() + packet_length;
+  to= length_pos + 1;
+
+  to+= copy_and_convert(to, conv_length, to_cs,
+                        (const char*) from, length, from_cs, &dummy_errors);
+
+  net_store_length((uchar*) length_pos, to - length_pos - 1);
+  packet->length((uint) (to - packet->ptr()));
+  return 0;
+}
+#endif
+
+
 /**
   Send a error string to client.
 
@@ -773,10 +832,10 @@ bool Protocol::store_string_aux(const ch
       fromcs != &my_charset_bin &&
       tocs != &my_charset_bin)
   {
-    uint dummy_errors;
-    return (convert->copy(from, length, fromcs, tocs, &dummy_errors) ||
-            net_store_data((uchar*) convert->ptr(), convert->length()));
+    /* Store with conversion */
+    return net_store_data((uchar*) from, length, fromcs, tocs);
   }
+  /* Store without conversion */
   return net_store_data((uchar*) from, length);
 }
 
@@ -802,7 +861,7 @@ bool Protocol_text::store(const char *fr
 {
   CHARSET_INFO *tocs= this->thd->variables.character_set_results;
 #ifndef DBUG_OFF
-  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %*s", field_pos,
+  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %.*s", field_pos,
                       field_count, (int) length, from));
   DBUG_ASSERT(field_pos < field_count);
   DBUG_ASSERT(field_types == 0 ||

=== modified file 'sql/protocol.h'
--- a/sql/protocol.h	2007-12-20 21:11:37 +0000
+++ b/sql/protocol.h	2009-07-02 10:15:33 +0000
@@ -42,6 +42,8 @@ protected:
   MYSQL_FIELD *next_mysql_field;
   MEM_ROOT *alloc;
 #endif
+  bool net_store_data(const uchar *from, size_t length,
+                      CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
   bool store_string_aux(const char *from, size_t length,
                         CHARSET_INFO *fromcs, CHARSET_INFO *tocs);
 public:

=== modified file 'sql/sql_string.cc'
--- a/sql/sql_string.cc	2009-04-25 10:05:32 +0000
+++ b/sql/sql_string.cc	2009-07-02 10:15:33 +0000
@@ -782,10 +782,11 @@ String *copy_if_not_alloced(String *to,S
 */
 
 
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
-                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
-                 uint *errors)
+static uint32
+copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                          const char *from, uint32 from_length,
+                          CHARSET_INFO *from_cs,
+                          uint *errors)
 {
   int         cnvres;
   my_wc_t     wc;
@@ -900,6 +901,65 @@ my_copy_with_hex_escaping(CHARSET_INFO *
 }
 
 /*
+  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+*/
+uint32
+copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+                 uint *errors)
+{
+  /*
+    If any of the character sets is not ASCII compatible,
+    immediately switch to slow mb_wc->wc_mb method.
+  */
+  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+    return copy_and_convert_extended(to, to_length, to_cs,
+                                     from, from_length, from_cs, errors);
+
+  uint32 length= min(to_length, from_length), length2= length;
+
+#if defined(__i386__)
+  /*
+    Special loop for i386, it allows to refer to a
+    non-aligned memory block as UINT32, which makes
+    it possible to copy four bytes at once. This
+    gives about 10% performance improvement comparing
+    to byte-by-byte loop.
+  */
+  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+  {
+    if ((*(uint32*)from) & 0x80808080)
+      break;
+    *((uint32*) to)= *((const uint32*) from);
+  }
+#endif
+
+  for (; ; *to++= *from++, length--)
+  {
+    if (!length)
+    {
+      *errors= 0;
+      return length2;
+    }
+    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+    {
+      uint32 copied_length= length2 - length;
+      to_length-= copied_length;
+      from_length-= copied_length;
+      return copied_length + copy_and_convert_extended(to, to_length,
+                                                       to_cs,
+                                                       from, from_length,
+                                                       from_cs,
+                                                       errors);
+    }
+  }
+
+  DBUG_ASSERT(FALSE); // Should never get to here
+  return 0;           // Make compiler happy
+}
+
+
+/*
   copy a string,
   with optional character set conversion,
   with optional left padding (for binary -> UCS2 conversion)

=== modified file 'strings/conf_to_src.c'
--- a/strings/conf_to_src.c	2008-11-14 16:29:38 +0000
+++ b/strings/conf_to_src.c	2009-07-02 10:15:33 +0000
@@ -184,11 +184,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
 {
   fprintf(f,"{\n");
   fprintf(f,"  %d,%d,%d,\n",cs->number,0,0);
-  fprintf(f,"  MY_CS_COMPILED%s%s%s%s,\n",
+  fprintf(f,"  MY_CS_COMPILED%s%s%s%s%s,\n",
           cs->state & MY_CS_BINSORT         ? "|MY_CS_BINSORT"   : "",
           cs->state & MY_CS_PRIMARY         ? "|MY_CS_PRIMARY"   : "",
           is_case_sensitive(cs)             ? "|MY_CS_CSSORT"    : "",
-          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
+          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
+	  !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
   
   if (cs->name)
   {

=== modified file 'strings/ctype-extra.c'
--- a/strings/ctype-extra.c	2007-08-20 11:47:31 +0000
+++ b/strings/ctype-extra.c	2009-07-02 10:15:33 +0000
@@ -6804,7 +6804,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_swe7
 {
   10,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
+  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_swedish_ci",                     /* coll name     */
   "",                       /* comment       */
@@ -8454,7 +8454,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_swe7
 {
   82,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
+  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_bin",                     /* coll name     */
   "",                       /* comment       */
@@ -8550,72 +8550,6 @@ CHARSET_INFO compiled_charsets[] = {
 }
 ,
 #endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  92,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
-  "geostd8",                     /* cset name     */
-  "geostd8_general_ci",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_general_ci,                   /* ctype         */
-  to_lower_geostd8_general_ci,                /* lower         */
-  to_upper_geostd8_general_ci,                /* upper         */
-  sort_order_geostd8_general_ci,            /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_general_ci,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_simple_ci_handler,
-}
-,
-#endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  93,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
-  "geostd8",                     /* cset name     */
-  "geostd8_bin",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_bin,                   /* ctype         */
-  to_lower_geostd8_bin,                /* lower         */
-  to_upper_geostd8_bin,                /* upper         */
-  NULL,                     /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_bin,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_bin_handler,
-}
-,
-#endif
 #ifdef HAVE_CHARSET_latin1
 {
   94,0,0,

=== modified file 'strings/ctype-sjis.c'
--- a/strings/ctype-sjis.c	2007-10-04 07:10:15 +0000
+++ b/strings/ctype-sjis.c	2009-07-02 10:15:33 +0000
@@ -4672,7 +4672,7 @@ static MY_CHARSET_HANDLER my_charset_han
 CHARSET_INFO my_charset_sjis_japanese_ci=
 {
     13,0,0,		/* number */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM,	/* state      */
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_NONASCII,	/* state      */
     "sjis",		/* cs name    */
     "sjis_japanese_ci",	/* name */
     "",			/* comment    */
@@ -4704,7 +4704,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci
 CHARSET_INFO my_charset_sjis_bin=
 {
     88,0,0,		/* number */
-    MY_CS_COMPILED|MY_CS_BINSORT,	/* state      */
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, /* state      */
     "sjis",		/* cs name    */
     "sjis_bin",		/* name */
     "",			/* comment    */

=== modified file 'strings/ctype-uca.c'
--- a/strings/ctype-uca.c	2007-07-03 09:06:57 +0000
+++ b/strings/ctype-uca.c	2009-07-02 10:15:33 +0000
@@ -8086,7 +8086,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_u
 CHARSET_INFO my_charset_ucs2_unicode_ci=
 {
     128,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_unicode_ci",	/* name         */
     "",			/* comment      */
@@ -8118,7 +8118,7 @@ CHARSET_INFO my_charset_ucs2_unicode_ci=
 CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
 {
     129,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_icelandic_ci",/* name         */
     "",			/* comment      */
@@ -8150,7 +8150,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_u
 CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
 {
     130,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_latvian_ci",	/* name         */
     "",			/* comment      */
@@ -8182,7 +8182,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca
 CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
 {
     131,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_romanian_ci",	/* name         */
     "",			/* comment      */
@@ -8214,7 +8214,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uc
 CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
 {
     132,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovenian_ci",/* name         */
     "",			/* comment      */
@@ -8246,7 +8246,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_u
 CHARSET_INFO my_charset_ucs2_polish_uca_ci=
 {
     133,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_polish_ci",	/* name         */
     "",			/* comment      */
@@ -8278,7 +8278,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_
 CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
 {
     134,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_estonian_ci",	/* name         */
     "",			/* comment      */
@@ -8310,7 +8310,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uc
 CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
 {
     135,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish_ci",	/* name         */
     "",			/* comment      */
@@ -8342,7 +8342,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca
 CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
 {
     136,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_swedish_ci",	/* name         */
     "",			/* comment      */
@@ -8374,7 +8374,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca
 CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
 {
     137,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_turkish_ci",	/* name         */
     "",			/* comment      */
@@ -8406,7 +8406,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca
 CHARSET_INFO my_charset_ucs2_czech_uca_ci=
 {
     138,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_czech_ci",	/* name         */
     "",			/* comment      */
@@ -8439,7 +8439,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_c
 CHARSET_INFO my_charset_ucs2_danish_uca_ci=
 {
     139,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_danish_ci",	/* name         */
     "",			/* comment      */
@@ -8471,7 +8471,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_
 CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
 {
     140,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_lithuanian_ci",/* name         */
     "",			/* comment      */
@@ -8503,7 +8503,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_
 CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
 {
     141,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovak_ci",	/* name         */
     "",			/* comment      */
@@ -8535,7 +8535,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_
 CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
 {
     142,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish2_ci",	/* name         */
     "",			/* comment      */
@@ -8568,7 +8568,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uc
 CHARSET_INFO my_charset_ucs2_roman_uca_ci=
 {
     143,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_roman_ci",	/* name         */
     "",			/* comment      */
@@ -8601,7 +8601,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_c
 CHARSET_INFO my_charset_ucs2_persian_uca_ci=
 {
     144,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_persian_ci",	/* name         */
     "",			/* comment      */
@@ -8634,7 +8634,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca
 CHARSET_INFO my_charset_ucs2_esperanto_uca_ci=
 {
     145,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_esperanto_ci",/* name         */
     "",			/* comment      */
@@ -8667,7 +8667,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_u
 CHARSET_INFO my_charset_ucs2_hungarian_uca_ci=
 {
     146,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_hungarian_ci",/* name         */
     "",			/* comment      */

=== modified file 'strings/ctype-ucs2.c'
--- a/strings/ctype-ucs2.c	2009-02-13 16:41:47 +0000
+++ b/strings/ctype-ucs2.c	2009-07-02 10:15:33 +0000
@@ -1717,7 +1717,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handl
 CHARSET_INFO my_charset_ucs2_general_ci=
 {
     35,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_general_ci",	/* name         */
     "",			/* comment      */
@@ -1749,7 +1749,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
 CHARSET_INFO my_charset_ucs2_bin=
 {
     90,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_bin",		/* name         */
     "",			/* comment      */

=== modified file 'strings/ctype-utf8.c'
--- a/strings/ctype-utf8.c	2008-02-11 12:28:33 +0000
+++ b/strings/ctype-utf8.c	2009-07-02 10:15:33 +0000
@@ -4204,7 +4204,7 @@ static MY_CHARSET_HANDLER my_charset_fil
 CHARSET_INFO my_charset_filename=
 {
     17,0,0,             /* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN|MY_CS_NONASCII,
     "filename",         /* cs name      */
     "filename",         /* name         */
     "",                 /* comment      */

=== modified file 'strings/ctype.c'
--- a/strings/ctype.c	2009-04-25 10:05:32 +0000
+++ b/strings/ctype.c	2009-07-02 10:15:33 +0000
@@ -405,3 +405,23 @@ my_charset_is_8bit_pure_ascii(CHARSET_IN
   }
   return 1;
 }
+
+
+/*
+  Shared function between conf_to_src and mysys.
+  Check if a 8bit character set is compatible with
+  ascii on the range 0x00..0x7F.
+*/
+my_bool
+my_charset_is_ascii_compatible(CHARSET_INFO *cs)
+{
+  uint i;
+  if (!cs->tab_to_uni)
+    return 1;
+  for (i= 0; i < 128; i++)
+  {
+    if (cs->tab_to_uni[i] != i)
+      return 0;
+  }
+  return 1;
+}