maria-developers team mailing list archive

Thread
Date
bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (monty:2776)

To: maria-developers@xxxxxxxxxxxxxxxxxxx
From: Michael Widenius <monty@xxxxxxxxxxxx>
Date: Thu, 03 Dec 2009 12:02:40 -0000
User-agent: Bazaar (1.17)
#At lp:maria based on revid:monty@xxxxxxxxxxxx-20091203113411-cmr8g2lcp45n0prv

 2776 Michael Widenius	2009-12-03
      Applied patch from to fix some problems with Croatian character set and LIKE queries
      Author: Alexander Barkov
      License: GPL
      modified:
        mysql-test/r/ctype_ucs.result
        mysql-test/t/ctype_ucs.test
        strings/ctype-ucs2.c

per-file messages:
  mysql-test/t/ctype_ucs.test
    Added test case for Croatina character set
=== modified file 'mysql-test/r/ctype_ucs.result'
--- a/mysql-test/r/ctype_ucs.result	2008-12-23 14:21:01 +0000
+++ b/mysql-test/r/ctype_ucs.result	2009-12-03 12:02:37 +0000
@@ -1211,3 +1211,47 @@ HEX(DAYNAME(19700101))
 0427043504420432043504400433
 SET character_set_connection=latin1;
 End of 5.0 tests
+Start of 5.1 tests
+SET NAMES utf8;
+CREATE TABLE t1 (
+a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	30	Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'c%';
+a
+ca
+cc
+cz
+ch
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+a
+ch
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+hex(concat('d',_ucs2 0x017E,'%'))
+0064017E0025
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+hex(a)
+0064017E
+DROP TABLE t1;

=== modified file 'mysql-test/t/ctype_ucs.test'
--- a/mysql-test/t/ctype_ucs.test	2008-12-23 14:21:01 +0000
+++ b/mysql-test/t/ctype_ucs.test	2009-12-03 12:02:37 +0000
@@ -723,3 +723,34 @@ SELECT HEX(DAYNAME(19700101));
 SET character_set_connection=latin1;
 
 --echo End of 5.0 tests
+
+
+--echo Start of 5.1 tests
+#
+# Checking my_like_range_ucs2
+#
+SET NAMES utf8;
+CREATE TABLE t1 (
+  a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+  key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+# This one should scan only one row
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+# This one should scan many rows: 'c' is a contraction head
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+SELECT * FROM t1 WHERE a LIKE 'c%';
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+
+DROP TABLE t1;

=== modified file 'strings/ctype-ucs2.c'
--- a/strings/ctype-ucs2.c	2009-11-30 12:42:24 +0000
+++ b/strings/ctype-ucs2.c	2009-12-03 12:02:37 +0000
@@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO 
   }
 }
 
+
+static inline my_wc_t
+ucs2_to_wc(const uchar *ptr)
+{
+  return (((uint) ptr[0]) << 8) + ptr[1];
+}
+
+
 /*
 ** Calculate min_str and max_str that ranges a LIKE string.
 ** Arguments:
@@ -1531,6 +1539,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO 
   for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
         ; ptr+=2, charlen--)
   {
+    my_wc_t wc;
     if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
     {
       ptr+=2;					/* Skip escape */
@@ -1567,9 +1576,9 @@ fill_max_and_min:
     }
 
     if (have_contractions && ptr + 3 < end &&
-        ptr[0] == '\0' &&
-        my_uca_can_be_contraction_head(cs, (uchar) ptr[1]))
+        my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr))))
     {
+      my_wc_t wc2;
       /* Contraction head found */
       if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
       {
@@ -1581,9 +1590,8 @@ fill_max_and_min:
         Check if the second letter can be contraction part,
         and if two letters really produce a contraction.
       */
-      if (ptr[2] == '\0' &&
-          my_uca_can_be_contraction_tail(cs, (uchar) ptr[3]) &&
-          my_uca_contraction2_weight(cs,(uchar) ptr[1], (uchar) ptr[3]))
+      if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) &&
+          my_uca_contraction2_weight(cs, wc , wc2))
       {
         /* Contraction found */
         if (charlen == 1 || min_str + 2 >= min_end)