maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #07168
mdev6027 RLIKE: "." no longer matching new line (default_regex_flags)
Hello Serg,
Please review a patch implementing a new system variable
default_regex_flags, to address the remaining incompatibilities
between PCRE and the old regex library.
Greetings.
=== modified file 'mysql-test/r/func_regexp_pcre.result'
--- mysql-test/r/func_regexp_pcre.result 2013-10-08 14:25:17 +0000
+++ mysql-test/r/func_regexp_pcre.result 2014-04-17 12:19:16 +0000
@@ -754,3 +754,88 @@ DROP TABLE t1;
SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');
REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*')
https://mariadb.org
+#
+# MDEV-6027 RLIKE: "." no longer matching new line
+#
+SELECT 'cat and\ndog' RLIKE 'cat.*dog';
+'cat and\ndog' RLIKE 'cat.*dog'
+0
+SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
+'cat and\r\ndog' RLIKE 'cat.*dog'
+0
+SELECT 'a\nb' RLIKE 'a.b';
+'a\nb' RLIKE 'a.b'
+0
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+'a\nb' RLIKE '(?-s)a.b'
+0
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SELECT 'cat and\ndog' RLIKE 'cat.*dog';
+'cat and\ndog' RLIKE 'cat.*dog'
+1
+SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
+'cat and\r\ndog' RLIKE 'cat.*dog'
+1
+SELECT 'a\nb' RLIKE 'a.b';
+'a\nb' RLIKE 'a.b'
+1
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+'a\nb' RLIKE '(?-s)a.b'
+0
+SET default_regex_flags=DEFAULT;
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+ERROR 42000: Got error 'two named subpatterns have the same name at offset 29' from regexp
+SET default_regex_flags='DUPNAMES';
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$')
+Monday Mon
+SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$')
+Tuesday Tue
+SET default_regex_flags=DEFAULT;
+SELECT 'AB' RLIKE 'A B';
+'AB' RLIKE 'A B'
+0
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+'AB' RLIKE 'A# this is a comment\nB'
+0
+SET default_regex_flags='EXTENDED';
+SELECT 'AB' RLIKE 'A B';
+'AB' RLIKE 'A B'
+1
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+'AB' RLIKE 'A# this is a comment\nB'
+1
+SET default_regex_flags=DEFAULT;
+SELECT 'Aq' RLIKE 'A\\q';
+'Aq' RLIKE 'A\\q'
+1
+SET default_regex_flags='EXTRA';
+SELECT 'Aq' RLIKE 'A\\q';
+ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp
+SET default_regex_flags=DEFAULT;
+SELECT 'a\nb\nc' RLIKE '^b$';
+'a\nb\nc' RLIKE '^b$'
+0
+SET default_regex_flags='MULTILINE';
+SELECT 'a\nb\nc' RLIKE '^b$';
+'a\nb\nc' RLIKE '^b$'
+1
+SET default_regex_flags=DEFAULT;
+SELECT REGEXP_SUBSTR('abc','.+');
+REGEXP_SUBSTR('abc','.+')
+abc
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2')
+abc/
+SET default_regex_flags='UNGREEDY';
+SELECT REGEXP_SUBSTR('abc','.+');
+REGEXP_SUBSTR('abc','.+')
+a
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2')
+/abc
+SET default_regex_flags=DEFAULT;
=== modified file 'mysql-test/r/mysqld--help.result'
--- mysql-test/r/mysqld--help.result 2014-03-28 07:31:24 +0000
+++ mysql-test/r/mysqld--help.result 2014-04-17 12:17:07 +0000
@@ -136,6 +136,10 @@
--deadlock-timeout-short=#
Short timeout for the two-step deadlock detection (in
microseconds)
+ --default-regex-flags=name
+ Default flags for the regex library. Syntax:
+ default-regex-flags='[flag[,flag[,flag...]]]'. See the
+ manual for the complete list of valid flags
--default-storage-engine=name
The default storage engine for new tables
--default-time-zone=name
@@ -1079,6 +1083,7 @@ deadlock-search-depth-long 15
deadlock-search-depth-short 4
deadlock-timeout-long 50000000
deadlock-timeout-short 10000
+default-regex-flags
default-storage-engine myisam
default-time-zone (No default value)
default-week-format 0
=== added file 'mysql-test/suite/sys_vars/r/default_regex_flags_basic.result'
--- mysql-test/suite/sys_vars/r/default_regex_flags_basic.result 1970-01-01 00:00:00 +0000
+++ mysql-test/suite/sys_vars/r/default_regex_flags_basic.result 2014-04-17 12:19:56 +0000
@@ -0,0 +1,57 @@
+SET default_regex_flags='';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+
+SET default_regex_flags=DEFAULT;
+SELECT @@default_regex_flags;
+@@default_regex_flags
+
+SET default_regex_flags=NULL;
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL'
+SELECT @@default_regex_flags;
+@@default_regex_flags
+
+SET default_regex_flags='UNKNOWN';
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'UNKNOWN'
+SET default_regex_flags=123;
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '123'
+SET default_regex_flags=123.0;
+ERROR 42000: Incorrect argument type to variable 'default_regex_flags'
+SET default_regex_flags=123e0;
+ERROR 42000: Incorrect argument type to variable 'default_regex_flags'
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SET default_regex_flags=NULL;
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL'
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SET @@default_regex_flags=63;
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
+SET @@default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
+SET @@default_regex_flags=DEFAULT;
+SET @@global.default_regex_flags='MULTILINE';
+SELECT @@session.default_regex_flags;
+@@session.default_regex_flags
+
+# connection con1
+SELECT @@session.default_regex_flags;
+@@session.default_regex_flags
+MULTILINE
+# connection default
+SELECT @@session.default_regex_flags;
+@@session.default_regex_flags
+
+SET @@global.default_regex_flags=DEFAULT;
+SET default_regex_flags=DEFAULT;
=== added file 'mysql-test/suite/sys_vars/t/default_regex_flags_basic.test'
--- mysql-test/suite/sys_vars/t/default_regex_flags_basic.test 1970-01-01 00:00:00 +0000
+++ mysql-test/suite/sys_vars/t/default_regex_flags_basic.test 2014-04-17 12:17:43 +0000
@@ -0,0 +1,43 @@
+
+SET default_regex_flags='';
+SELECT @@default_regex_flags;
+SET default_regex_flags=DEFAULT;
+SELECT @@default_regex_flags;
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags=NULL;
+SELECT @@default_regex_flags;
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags='UNKNOWN';
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags=123;
+--error ER_WRONG_TYPE_FOR_VAR
+SET default_regex_flags=123.0;
+--error ER_WRONG_TYPE_FOR_VAR
+SET default_regex_flags=123e0;
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags=NULL;
+SELECT @@default_regex_flags;
+
+SET @@default_regex_flags=63;
+SELECT @@default_regex_flags;
+SET @@default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY';
+SELECT @@default_regex_flags;
+SET @@default_regex_flags=DEFAULT;
+
+SET @@global.default_regex_flags='MULTILINE';
+SELECT @@session.default_regex_flags;
+connect (con1,localhost,root,,);
+--echo # connection con1
+connection con1;
+SELECT @@session.default_regex_flags;
+connection default;
+--echo # connection default
+disconnect con1;
+SELECT @@session.default_regex_flags;
+SET @@global.default_regex_flags=DEFAULT;
+
+SET default_regex_flags=DEFAULT;
=== modified file 'mysql-test/t/func_regexp_pcre.test'
--- mysql-test/t/func_regexp_pcre.test 2013-10-08 14:25:17 +0000
+++ mysql-test/t/func_regexp_pcre.test 2014-04-17 12:17:49 +0000
@@ -349,3 +349,51 @@ DROP TABLE t1;
SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');
+
+
+--echo #
+--echo # MDEV-6027 RLIKE: "." no longer matching new line
+--echo #
+SELECT 'cat and\ndog' RLIKE 'cat.*dog';
+SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
+SELECT 'a\nb' RLIKE 'a.b';
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+SELECT 'cat and\ndog' RLIKE 'cat.*dog';
+SELECT 'cat and\r\ndog' RLIKE 'cat.*dog';
+SELECT 'a\nb' RLIKE 'a.b';
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+SET default_regex_flags=DEFAULT;
+
+--error ER_REGEXP_ERROR
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+SET default_regex_flags='DUPNAMES';
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+SET default_regex_flags=DEFAULT;
+
+SELECT 'AB' RLIKE 'A B';
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+SET default_regex_flags='EXTENDED';
+SELECT 'AB' RLIKE 'A B';
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+SET default_regex_flags=DEFAULT;
+
+SELECT 'Aq' RLIKE 'A\\q';
+SET default_regex_flags='EXTRA';
+--error ER_REGEXP_ERROR
+SELECT 'Aq' RLIKE 'A\\q';
+SET default_regex_flags=DEFAULT;
+
+SELECT 'a\nb\nc' RLIKE '^b$';
+SET default_regex_flags='MULTILINE';
+SELECT 'a\nb\nc' RLIKE '^b$';
+SET default_regex_flags=DEFAULT;
+
+SELECT REGEXP_SUBSTR('abc','.+');
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+SET default_regex_flags='UNGREEDY';
+SELECT REGEXP_SUBSTR('abc','.+');
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+SET default_regex_flags=DEFAULT;
=== modified file 'sql/item_cmpfunc.cc'
--- sql/item_cmpfunc.cc 2014-03-26 21:25:38 +0000
+++ sql/item_cmpfunc.cc 2014-04-16 12:53:20 +0000
@@ -32,6 +32,7 @@
#include "sql_parse.h" // check_stack_overrun
#include "sql_time.h" // make_truncated_value_warning
#include "sql_base.h" // dynamic_column_error_message
+#include "sys_vars_shared.h" // default_regex_flags_pcre
static Item_result item_store_type(Item_result a, Item *item,
my_bool unsigned_flag)
@@ -5055,6 +5056,11 @@ bool Item_func_like::find_selective_pred
}
+int Regexp_processor_pcre::default_regex_flags()
+{
+ return default_regex_flags_pcre(current_thd);
+}
+
/**
Convert string to lib_charset, if needed.
=== modified file 'sql/item_cmpfunc.h'
--- sql/item_cmpfunc.h 2014-03-26 21:25:38 +0000
+++ sql/item_cmpfunc.h 2014-04-16 12:24:41 +0000
@@ -1513,9 +1513,10 @@ class Regexp_processor_pcre
m_library_charset(&my_charset_utf8_general_ci),
m_subpatterns_needed(0)
{}
+ int default_regex_flags();
void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns)
{
- m_library_flags= extra_flags |
+ m_library_flags= default_regex_flags() | extra_flags |
(data_charset != &my_charset_bin ?
(PCRE_UTF8 | PCRE_UCP) : 0) |
((data_charset->state &
=== modified file 'sql/sql_class.h'
--- sql/sql_class.h 2014-04-15 07:29:57 +0000
+++ sql/sql_class.h 2014-04-16 12:17:43 +0000
@@ -516,6 +516,7 @@ typedef struct system_variables
ulonglong join_buff_size;
ulonglong sortbuff_size;
ulonglong group_concat_max_len;
+ ulonglong default_regex_flags;
ha_rows select_limit;
ha_rows max_join_size;
ha_rows expensive_subquery_limit;
=== modified file 'sql/sys_vars.cc'
--- sql/sys_vars.cc 2014-03-28 07:31:24 +0000
+++ sql/sys_vars.cc 2014-04-17 12:23:12 +0000
@@ -4568,6 +4568,46 @@ static Sys_var_set Sys_log_slow_filter(
log_slow_filter_names,
DEFAULT(MAX_SET(array_elements(log_slow_filter_names)-1)));
+static const char *default_regex_flags_names[]=
+{
+ "DOTALL", // (?s) . matches anything including NL
+ "DUPNAMES", // (?J) Allow duplicate names for subpatterns
+ "EXTENDED", // (?x) Ignore white space and # comments
+ "EXTRA", // (?X) extra features (e.g. error on unknown escape character)
+ "MULTILINE", // (?m) ^ and $ match newlines within data
+ "UNGREEDY", // (?U) Invert greediness of quantifiers
+ 0
+};
+static const int default_regex_flags_to_pcre[]=
+{
+ PCRE_DOTALL,
+ PCRE_DUPNAMES,
+ PCRE_EXTENDED,
+ PCRE_EXTRA,
+ PCRE_MULTILINE,
+ PCRE_UNGREEDY,
+ 0
+};
+int default_regex_flags_pcre(const THD *thd)
+{
+ ulonglong src= thd->variables.default_regex_flags;
+ int i, res= 0;
+ for (i= res= 0; default_regex_flags_to_pcre[i]; i++)
+ {
+ if (src & (1 << i))
+ res|= default_regex_flags_to_pcre[i];
+ }
+ return res;
+}
+static Sys_var_set Sys_default_regex_flags(
+ "default_regex_flags",
+ "Default flags for the regex library. "
+ "Syntax: default-regex-flags='[flag[,flag[,flag...]]]'. "
+ "See the manual for the complete list of valid flags",
+ SESSION_VAR(default_regex_flags), CMD_LINE(REQUIRED_ARG),
+ default_regex_flags_names,
+ DEFAULT(0));
+
static Sys_var_ulong Sys_log_slow_rate_limit(
"log_slow_rate_limit",
"Write to slow log every #th slow query. Set to 1 to log everything. "
=== modified file 'sql/sys_vars_shared.h'
--- sql/sys_vars_shared.h 2011-06-30 15:46:53 +0000
+++ sql/sys_vars_shared.h 2014-04-16 12:51:45 +0000
@@ -36,6 +36,8 @@ extern sys_var *intern_find_sys_var(cons
extern sys_var_chain all_sys_vars;
+extern int default_regex_flags_pcre(const THD *thd);
+
/** wrapper to hide a mutex and an rwlock under a common interface */
class PolyLock
{
Follow ups