maria-developers team mailing list archive

Thread
Date

[Branch ~maria-captains/maria/5.1] Rev 2776: Applied patch from to fix some problems with Croatian character set and LIKE queries

To: "askmonty.org Worklog" <worklog-db@xxxxxxxxxxxx>
From: noreply@xxxxxxxxxxxxx
Date: Thu, 03 Dec 2009 12:25:42 -0000
Delivered-to: worklog-db@xxxxxxxxxxxx
Reply-to: noreply@xxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

------------------------------------------------------------
revno: 2776
committer: Michael Widenius <monty@xxxxxxxxxxxx>
branch nick: maria-5.1
timestamp: Thu 2009-12-03 14:02:37 +0200
message:
  Applied patch from to fix some problems with Croatian character set and LIKE queries
  Author: Alexander Barkov
  License: GPL
modified:
  mysql-test/r/ctype_ucs.result
  mysql-test/t/ctype_ucs.test
  strings/ctype-ucs2.c


--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1

Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription.

=== modified file 'mysql-test/r/ctype_ucs.result'
--- mysql-test/r/ctype_ucs.result	2008-12-23 14:21:01 +0000
+++ mysql-test/r/ctype_ucs.result	2009-12-03 12:02:37 +0000
@@ -1211,3 +1211,47 @@
 0427043504420432043504400433
 SET character_set_connection=latin1;
 End of 5.0 tests
+Start of 5.1 tests
+SET NAMES utf8;
+CREATE TABLE t1 (
+a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	30	Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'c%';
+a
+ca
+cc
+cz
+ch
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+a
+ch
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+hex(concat('d',_ucs2 0x017E,'%'))
+0064017E0025
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+hex(a)
+0064017E
+DROP TABLE t1;

=== modified file 'mysql-test/t/ctype_ucs.test'
--- mysql-test/t/ctype_ucs.test	2008-12-23 14:21:01 +0000
+++ mysql-test/t/ctype_ucs.test	2009-12-03 12:02:37 +0000
@@ -723,3 +723,34 @@
 SET character_set_connection=latin1;
 
 --echo End of 5.0 tests
+
+
+--echo Start of 5.1 tests
+#
+# Checking my_like_range_ucs2
+#
+SET NAMES utf8;
+CREATE TABLE t1 (
+  a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+  key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+# This one should scan only one row
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+# This one should scan many rows: 'c' is a contraction head
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+SELECT * FROM t1 WHERE a LIKE 'c%';
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+
+DROP TABLE t1;

=== modified file 'strings/ctype-ucs2.c'
--- strings/ctype-ucs2.c	2009-11-30 12:42:24 +0000
+++ strings/ctype-ucs2.c	2009-12-03 12:02:37 +0000
@@ -1498,6 +1498,14 @@
   }
 }
 
+
+static inline my_wc_t
+ucs2_to_wc(const uchar *ptr)
+{
+  return (((uint) ptr[0]) << 8) + ptr[1];
+}
+
+
 /*
 ** Calculate min_str and max_str that ranges a LIKE string.
 ** Arguments:
@@ -1531,6 +1539,7 @@
   for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
         ; ptr+=2, charlen--)
   {
+    my_wc_t wc;
     if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
     {
       ptr+=2;					/* Skip escape */
@@ -1567,9 +1576,9 @@
     }
 
     if (have_contractions && ptr + 3 < end &&
-        ptr[0] == '\0' &&
-        my_uca_can_be_contraction_head(cs, (uchar) ptr[1]))
+        my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr))))
     {
+      my_wc_t wc2;
       /* Contraction head found */
       if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
       {
@@ -1581,9 +1590,8 @@
         Check if the second letter can be contraction part,
         and if two letters really produce a contraction.
       */
-      if (ptr[2] == '\0' &&
-          my_uca_can_be_contraction_tail(cs, (uchar) ptr[3]) &&
-          my_uca_contraction2_weight(cs,(uchar) ptr[1], (uchar) ptr[3]))
+      if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) &&
+          my_uca_contraction2_weight(cs, wc , wc2))
       {
         /* Contraction found */
         if (charlen == 1 || min_str + 2 >= min_end)