← Back to team overview

maria-developers team mailing list archive

mdev6027 RLIKE: "." no longer matching new line (default_regex_flags)

 

Hello Serg,

Please review a patch implementing a new system variable
default_regex_flags, to address the remaining incompatibilities
between PCRE and the old regex library.

Greetings.
=== modified file 'mysql-test/r/func_regexp_pcre.result'
--- mysql-test/r/func_regexp_pcre.result	2013-10-08 14:25:17 +0000
+++ mysql-test/r/func_regexp_pcre.result	2014-04-17 12:19:16 +0000
@@ -754,3 +754,88 @@ DROP TABLE t1;
 SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');
 REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*')
 https://mariadb.org
+#
+# MDEV-6027 RLIKE: "." no longer matching new line
+#
+SELECT  'cat and\ndog'  RLIKE 'cat.*dog';
+'cat and\ndog'  RLIKE 'cat.*dog'
+0
+SELECT  'cat and\r\ndog'  RLIKE 'cat.*dog';
+'cat and\r\ndog'  RLIKE 'cat.*dog'
+0
+SELECT 'a\nb' RLIKE 'a.b';
+'a\nb' RLIKE 'a.b'
+0
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+'a\nb' RLIKE '(?-s)a.b'
+0
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SELECT  'cat and\ndog'  RLIKE 'cat.*dog';
+'cat and\ndog'  RLIKE 'cat.*dog'
+1
+SELECT  'cat and\r\ndog'  RLIKE 'cat.*dog';
+'cat and\r\ndog'  RLIKE 'cat.*dog'
+1
+SELECT 'a\nb' RLIKE 'a.b';
+'a\nb' RLIKE 'a.b'
+1
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+'a\nb' RLIKE '(?-s)a.b'
+0
+SET default_regex_flags=DEFAULT;
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+ERROR 42000: Got error 'two named subpatterns have the same name at offset 29' from regexp
+SET default_regex_flags='DUPNAMES';
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$')
+Monday Mon
+SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$')
+Tuesday Tue
+SET default_regex_flags=DEFAULT;
+SELECT 'AB' RLIKE 'A B';
+'AB' RLIKE 'A B'
+0
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+'AB' RLIKE 'A# this is a comment\nB'
+0
+SET default_regex_flags='EXTENDED';
+SELECT 'AB' RLIKE 'A B';
+'AB' RLIKE 'A B'
+1
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+'AB' RLIKE 'A# this is a comment\nB'
+1
+SET default_regex_flags=DEFAULT;
+SELECT 'Aq' RLIKE 'A\\q';
+'Aq' RLIKE 'A\\q'
+1
+SET default_regex_flags='EXTRA';
+SELECT 'Aq' RLIKE 'A\\q';
+ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp
+SET default_regex_flags=DEFAULT;
+SELECT 'a\nb\nc' RLIKE '^b$';
+'a\nb\nc' RLIKE '^b$'
+0
+SET default_regex_flags='MULTILINE';
+SELECT 'a\nb\nc' RLIKE '^b$';
+'a\nb\nc' RLIKE '^b$'
+1
+SET default_regex_flags=DEFAULT;
+SELECT REGEXP_SUBSTR('abc','.+');
+REGEXP_SUBSTR('abc','.+')
+abc
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2')
+abc/
+SET default_regex_flags='UNGREEDY';
+SELECT REGEXP_SUBSTR('abc','.+');
+REGEXP_SUBSTR('abc','.+')
+a
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2')
+/abc
+SET default_regex_flags=DEFAULT;

=== modified file 'mysql-test/r/mysqld--help.result'
--- mysql-test/r/mysqld--help.result	2014-03-28 07:31:24 +0000
+++ mysql-test/r/mysqld--help.result	2014-04-17 12:17:07 +0000
@@ -136,6 +136,10 @@
  --deadlock-timeout-short=# 
  Short timeout for the two-step deadlock detection (in
  microseconds)
+ --default-regex-flags=name 
+ Default flags for the regex library. Syntax:
+ default-regex-flags='[flag[,flag[,flag...]]]'. See the
+ manual for the complete list of valid flags
  --default-storage-engine=name 
  The default storage engine for new tables
  --default-time-zone=name 
@@ -1079,6 +1083,7 @@ deadlock-search-depth-long 15
 deadlock-search-depth-short 4
 deadlock-timeout-long 50000000
 deadlock-timeout-short 10000
+default-regex-flags 
 default-storage-engine myisam
 default-time-zone (No default value)
 default-week-format 0

=== added file 'mysql-test/suite/sys_vars/r/default_regex_flags_basic.result'
--- mysql-test/suite/sys_vars/r/default_regex_flags_basic.result	1970-01-01 00:00:00 +0000
+++ mysql-test/suite/sys_vars/r/default_regex_flags_basic.result	2014-04-17 12:19:56 +0000
@@ -0,0 +1,57 @@
+SET default_regex_flags='';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+
+SET default_regex_flags=DEFAULT;
+SELECT @@default_regex_flags;
+@@default_regex_flags
+
+SET default_regex_flags=NULL;
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL'
+SELECT @@default_regex_flags;
+@@default_regex_flags
+
+SET default_regex_flags='UNKNOWN';
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'UNKNOWN'
+SET default_regex_flags=123;
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '123'
+SET default_regex_flags=123.0;
+ERROR 42000: Incorrect argument type to variable 'default_regex_flags'
+SET default_regex_flags=123e0;
+ERROR 42000: Incorrect argument type to variable 'default_regex_flags'
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SET default_regex_flags=NULL;
+ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'NULL'
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SET @@default_regex_flags=63;
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
+SET @@default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL
+SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY';
+SELECT @@default_regex_flags;
+@@default_regex_flags
+DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
+SET @@default_regex_flags=DEFAULT;
+SET @@global.default_regex_flags='MULTILINE';
+SELECT @@session.default_regex_flags;
+@@session.default_regex_flags
+
+# connection con1
+SELECT @@session.default_regex_flags;
+@@session.default_regex_flags
+MULTILINE
+# connection default
+SELECT @@session.default_regex_flags;
+@@session.default_regex_flags
+
+SET @@global.default_regex_flags=DEFAULT;
+SET default_regex_flags=DEFAULT;

=== added file 'mysql-test/suite/sys_vars/t/default_regex_flags_basic.test'
--- mysql-test/suite/sys_vars/t/default_regex_flags_basic.test	1970-01-01 00:00:00 +0000
+++ mysql-test/suite/sys_vars/t/default_regex_flags_basic.test	2014-04-17 12:17:43 +0000
@@ -0,0 +1,43 @@
+
+SET default_regex_flags='';
+SELECT @@default_regex_flags;
+SET default_regex_flags=DEFAULT;
+SELECT @@default_regex_flags;
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags=NULL;
+SELECT @@default_regex_flags;
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags='UNKNOWN';
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags=123;
+--error ER_WRONG_TYPE_FOR_VAR
+SET default_regex_flags=123.0;
+--error ER_WRONG_TYPE_FOR_VAR
+SET default_regex_flags=123e0;
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+--error ER_WRONG_VALUE_FOR_VAR
+SET default_regex_flags=NULL;
+SELECT @@default_regex_flags;
+
+SET @@default_regex_flags=63;
+SELECT @@default_regex_flags;
+SET @@default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+SET @@default_regex_flags='DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY';
+SELECT @@default_regex_flags;
+SET @@default_regex_flags=DEFAULT;
+
+SET @@global.default_regex_flags='MULTILINE';
+SELECT @@session.default_regex_flags;
+connect (con1,localhost,root,,);
+--echo # connection con1
+connection con1;
+SELECT @@session.default_regex_flags;
+connection default;
+--echo # connection default
+disconnect con1;
+SELECT @@session.default_regex_flags;
+SET @@global.default_regex_flags=DEFAULT;
+
+SET default_regex_flags=DEFAULT;

=== modified file 'mysql-test/t/func_regexp_pcre.test'
--- mysql-test/t/func_regexp_pcre.test	2013-10-08 14:25:17 +0000
+++ mysql-test/t/func_regexp_pcre.test	2014-04-17 12:17:49 +0000
@@ -349,3 +349,51 @@ DROP TABLE t1;
 
 
 SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');
+
+
+--echo #
+--echo # MDEV-6027 RLIKE: "." no longer matching new line
+--echo #
+SELECT  'cat and\ndog'  RLIKE 'cat.*dog';
+SELECT  'cat and\r\ndog'  RLIKE 'cat.*dog';
+SELECT 'a\nb' RLIKE 'a.b';
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+SET default_regex_flags='DOTALL';
+SELECT @@default_regex_flags;
+SELECT  'cat and\ndog'  RLIKE 'cat.*dog';
+SELECT  'cat and\r\ndog'  RLIKE 'cat.*dog';
+SELECT 'a\nb' RLIKE 'a.b';
+SELECT 'a\nb' RLIKE '(?-s)a.b';
+SET default_regex_flags=DEFAULT;
+
+--error ER_REGEXP_ERROR
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+SET default_regex_flags='DUPNAMES';
+SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+SELECT REGEXP_SUBSTR('Tuesday Tue','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
+SET default_regex_flags=DEFAULT;
+
+SELECT 'AB' RLIKE 'A B';
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+SET default_regex_flags='EXTENDED';
+SELECT 'AB' RLIKE 'A B';
+SELECT 'AB' RLIKE 'A# this is a comment\nB';
+SET default_regex_flags=DEFAULT;
+
+SELECT 'Aq' RLIKE 'A\\q';
+SET default_regex_flags='EXTRA';
+--error ER_REGEXP_ERROR
+SELECT 'Aq' RLIKE 'A\\q';
+SET default_regex_flags=DEFAULT;
+
+SELECT 'a\nb\nc' RLIKE '^b$';
+SET default_regex_flags='MULTILINE';
+SELECT 'a\nb\nc' RLIKE '^b$';
+SET default_regex_flags=DEFAULT;
+
+SELECT REGEXP_SUBSTR('abc','.+');
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+SET default_regex_flags='UNGREEDY';
+SELECT REGEXP_SUBSTR('abc','.+');
+SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2');
+SET default_regex_flags=DEFAULT;

=== modified file 'sql/item_cmpfunc.cc'
--- sql/item_cmpfunc.cc	2014-03-26 21:25:38 +0000
+++ sql/item_cmpfunc.cc	2014-04-16 12:53:20 +0000
@@ -32,6 +32,7 @@
 #include "sql_parse.h"                          // check_stack_overrun
 #include "sql_time.h"                  // make_truncated_value_warning
 #include "sql_base.h"                  // dynamic_column_error_message
+#include "sys_vars_shared.h"           // default_regex_flags_pcre
 
 static Item_result item_store_type(Item_result a, Item *item,
                                    my_bool unsigned_flag)
@@ -5055,6 +5056,11 @@ bool Item_func_like::find_selective_pred
 }
 
 
+int Regexp_processor_pcre::default_regex_flags()
+{
+  return default_regex_flags_pcre(current_thd);
+}
+
 
 /**
   Convert string to lib_charset, if needed.

=== modified file 'sql/item_cmpfunc.h'
--- sql/item_cmpfunc.h	2014-03-26 21:25:38 +0000
+++ sql/item_cmpfunc.h	2014-04-16 12:24:41 +0000
@@ -1513,9 +1513,10 @@ class Regexp_processor_pcre
     m_library_charset(&my_charset_utf8_general_ci),
     m_subpatterns_needed(0)
   {}
+  int default_regex_flags();
   void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns)
   {
-    m_library_flags= extra_flags |
+    m_library_flags= default_regex_flags() | extra_flags |
                     (data_charset != &my_charset_bin ?
                      (PCRE_UTF8 | PCRE_UCP) : 0) |
                     ((data_charset->state &

=== modified file 'sql/sql_class.h'
--- sql/sql_class.h	2014-04-15 07:29:57 +0000
+++ sql/sql_class.h	2014-04-16 12:17:43 +0000
@@ -516,6 +516,7 @@ typedef struct system_variables
   ulonglong join_buff_size;
   ulonglong sortbuff_size;
   ulonglong group_concat_max_len;
+  ulonglong default_regex_flags;
   ha_rows select_limit;
   ha_rows max_join_size;
   ha_rows expensive_subquery_limit;

=== modified file 'sql/sys_vars.cc'
--- sql/sys_vars.cc	2014-03-28 07:31:24 +0000
+++ sql/sys_vars.cc	2014-04-17 12:23:12 +0000
@@ -4568,6 +4568,46 @@ static Sys_var_set Sys_log_slow_filter(
        log_slow_filter_names,
        DEFAULT(MAX_SET(array_elements(log_slow_filter_names)-1)));
 
+static const char *default_regex_flags_names[]= 
+{
+  "DOTALL",    // (?s)  . matches anything including NL
+  "DUPNAMES",  // (?J)  Allow duplicate names for subpatterns
+  "EXTENDED",  // (?x)  Ignore white space and # comments
+  "EXTRA",     // (?X)  extra features (e.g. error on unknown escape character)
+  "MULTILINE", // (?m)  ^ and $ match newlines within data
+  "UNGREEDY",  // (?U)  Invert greediness of quantifiers
+  0
+};
+static const int default_regex_flags_to_pcre[]=
+{
+  PCRE_DOTALL,
+  PCRE_DUPNAMES,
+  PCRE_EXTENDED,
+  PCRE_EXTRA,
+  PCRE_MULTILINE,
+  PCRE_UNGREEDY,
+  0
+};
+int default_regex_flags_pcre(const THD *thd)
+{
+  ulonglong src= thd->variables.default_regex_flags;
+  int i, res= 0;
+  for (i= res= 0; default_regex_flags_to_pcre[i]; i++)
+  {
+    if (src & (1 << i))
+      res|= default_regex_flags_to_pcre[i];
+  }
+  return res;
+}
+static Sys_var_set Sys_default_regex_flags(
+       "default_regex_flags",
+       "Default flags for the regex library. "
+       "Syntax: default-regex-flags='[flag[,flag[,flag...]]]'. "
+       "See the manual for the complete list of valid flags",
+       SESSION_VAR(default_regex_flags), CMD_LINE(REQUIRED_ARG),
+       default_regex_flags_names,
+       DEFAULT(0));
+
 static Sys_var_ulong Sys_log_slow_rate_limit(
        "log_slow_rate_limit",
        "Write to slow log every #th slow query. Set to 1 to log everything. "

=== modified file 'sql/sys_vars_shared.h'
--- sql/sys_vars_shared.h	2011-06-30 15:46:53 +0000
+++ sql/sys_vars_shared.h	2014-04-16 12:51:45 +0000
@@ -36,6 +36,8 @@ extern sys_var *intern_find_sys_var(cons
 
 extern sys_var_chain all_sys_vars;
 
+extern int default_regex_flags_pcre(const THD *thd);
+
 /** wrapper to hide a mutex and an rwlock under a common interface */
 class PolyLock
 {


Follow ups