maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #01668
[Branch ~maria-captains/maria/5.1] Rev 2776: Applied patch from to fix some problems with Croatian character set and LIKE queries
------------------------------------------------------------
revno: 2776
committer: Michael Widenius <monty@xxxxxxxxxxxx>
branch nick: maria-5.1
timestamp: Thu 2009-12-03 14:02:37 +0200
message:
Applied patch from to fix some problems with Croatian character set and LIKE queries
Author: Alexander Barkov
License: GPL
modified:
mysql-test/r/ctype_ucs.result
mysql-test/t/ctype_ucs.test
strings/ctype-ucs2.c
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription.
=== modified file 'mysql-test/r/ctype_ucs.result'
--- mysql-test/r/ctype_ucs.result 2008-12-23 14:21:01 +0000
+++ mysql-test/r/ctype_ucs.result 2009-12-03 12:02:37 +0000
@@ -1211,3 +1211,47 @@
0427043504420432043504400433
SET character_set_connection=latin1;
End of 5.0 tests
+Start of 5.1 tests
+SET NAMES utf8;
+CREATE TABLE t1 (
+a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 30 Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'c%';
+a
+ca
+cc
+cz
+ch
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+a
+ch
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+hex(concat('d',_ucs2 0x017E,'%'))
+0064017E0025
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+hex(a)
+0064017E
+DROP TABLE t1;
=== modified file 'mysql-test/t/ctype_ucs.test'
--- mysql-test/t/ctype_ucs.test 2008-12-23 14:21:01 +0000
+++ mysql-test/t/ctype_ucs.test 2009-12-03 12:02:37 +0000
@@ -723,3 +723,34 @@
SET character_set_connection=latin1;
--echo End of 5.0 tests
+
+
+--echo Start of 5.1 tests
+#
+# Checking my_like_range_ucs2
+#
+SET NAMES utf8;
+CREATE TABLE t1 (
+ a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+ key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+# This one should scan only one row
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+# This one should scan many rows: 'c' is a contraction head
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+SELECT * FROM t1 WHERE a LIKE 'c%';
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+
+DROP TABLE t1;
=== modified file 'strings/ctype-ucs2.c'
--- strings/ctype-ucs2.c 2009-11-30 12:42:24 +0000
+++ strings/ctype-ucs2.c 2009-12-03 12:02:37 +0000
@@ -1498,6 +1498,14 @@
}
}
+
+static inline my_wc_t
+ucs2_to_wc(const uchar *ptr)
+{
+ return (((uint) ptr[0]) << 8) + ptr[1];
+}
+
+
/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
@@ -1531,6 +1539,7 @@
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
; ptr+=2, charlen--)
{
+ my_wc_t wc;
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
{
ptr+=2; /* Skip escape */
@@ -1567,9 +1576,9 @@
}
if (have_contractions && ptr + 3 < end &&
- ptr[0] == '\0' &&
- my_uca_can_be_contraction_head(cs, (uchar) ptr[1]))
+ my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr))))
{
+ my_wc_t wc2;
/* Contraction head found */
if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
{
@@ -1581,9 +1590,8 @@
Check if the second letter can be contraction part,
and if two letters really produce a contraction.
*/
- if (ptr[2] == '\0' &&
- my_uca_can_be_contraction_tail(cs, (uchar) ptr[3]) &&
- my_uca_contraction2_weight(cs,(uchar) ptr[1], (uchar) ptr[3]))
+ if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) &&
+ my_uca_contraction2_weight(cs, wc , wc2))
{
/* Contraction found */
if (charlen == 1 || min_str + 2 >= min_end)