← Back to team overview

maria-developers team mailing list archive

Re: afedc29f773: MDEV-14024 PCRE2.

 

Hi, Alexey!

Thanks!
Few minor comments, see below

On Nov 23, Alexey Botchkov wrote:
> revision-id: afedc29f773362f342756e9cdd9bbd9ed0f1abe2 (versioning-1.0.3-68-gafedc29f773)
> parent(s): 4ec8598c1dcb63499bce998142b8e5b8b09b2d30
> author: Alexey Botchkov <holyfoot@xxxxxxxxxxxx>
> committer: Alexey Botchkov <holyfoot@xxxxxxxxxxxx>
> timestamp: 2018-06-26 14:10:46 +0400
> message:
> 
> MDEV-14024 PCRE2.
> 
> Related changes in the server code.
> 
> diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake
> index 4c113929866..264f93a60a2 100644
> --- a/cmake/pcre.cmake
> +++ b/cmake/pcre.cmake
> @@ -5,24 +5,17 @@ SET(WITH_PCRE "auto" CACHE STRING
>  
>  MACRO (CHECK_PCRE)
>    IF(WITH_PCRE STREQUAL "system" OR WITH_PCRE STREQUAL "auto")
> -    CHECK_LIBRARY_EXISTS(pcre pcre_stack_guard "" HAVE_PCRE_STACK_GUARD)
> +    CHECK_LIBRARY_EXISTS(pcre2-8 pcre2_match "" HAVE_PCRE2)
> -    IF(NOT CMAKE_CROSSCOMPILING)
> -      SET(CMAKE_REQUIRED_LIBRARIES "pcre")
> -      CHECK_C_SOURCE_RUNS("
> -        #include <pcre.h>
> -        int main() {
> -        return -pcre_exec(NULL, NULL, NULL, -999, -999, 0, NULL, 0) < 256;
> -        }"  PCRE_STACK_SIZE_OK)
> -      SET(CMAKE_REQUIRED_LIBRARIES)
> -    ENDIF()
>    ENDIF()
> -  IF(NOT HAVE_PCRE_STACK_GUARD OR NOT PCRE_STACK_SIZE_OK OR
> -     WITH_PCRE STREQUAL "bundled")
> +  IF(NOT HAVE_PCRE2 OR WITH_PCRE STREQUAL "bundled")
>      IF (WITH_PCRE STREQUAL "system")
> -      MESSAGE(FATAL_ERROR "system pcre is not found or unusable")
> +      MESSAGE(FATAL_ERROR "system pcre2-8 library is not found or unusable")
>      ENDIF()
> -    SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre ${CMAKE_SOURCE_DIR}/pcre)
> -    ADD_SUBDIRECTORY(pcre)
> +    SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2
> +                      ${CMAKE_BINARY_DIR}/pcre2/src ${CMAKE_SOURCE_DIR}/pcre2/src)

That's a bit strange. I'd expect only .../pcre2/src paths, not .../pcre2.
you need pcre2.h and pcre2posix.h, they're both in src/ right?

> +    SET(PCRE_BUILD_TESTS OFF CACHE BOOL "Disable tests.")
> +    SET(PCRE2_BUILD_PCRE2GREP OFF CACHE BOOL "Disable pcre2grep")
> +    ADD_SUBDIRECTORY(pcre2)
>    ENDIF()
>  ENDMACRO()
>  
> diff --git a/extra/mariabackup/CMakeLists.txt b/extra/mariabackup/CMakeLists.txt
> index 7df5fa17903..a9d7153b893 100644
> --- a/extra/mariabackup/CMakeLists.txt
> +++ b/extra/mariabackup/CMakeLists.txt
> @@ -37,7 +37,7 @@ INCLUDE_DIRECTORIES(
>    )
>  
>  IF(NOT HAVE_SYSTEM_REGEX)
> -  INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre)
> +  INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2/src)

Hmm, not INCLUDE_DIRECTORIES(${PCRE_INCLUDES}) ?
pcre2.h is in the ${CMAKE_BINARY_DIR}, it's generated.

>  ENDIF()
>  
>  IF(WITH_WSREP)
> diff --git a/mysql-test/main/func_regexp_pcre.test b/mysql-test/main/func_regexp_pcre.test
> index 21600390bb2..30969b3e9ae 100644
> --- a/mysql-test/main/func_regexp_pcre.test
> +++ b/mysql-test/main/func_regexp_pcre.test
> @@ -382,6 +382,7 @@ SELECT 'AB' RLIKE 'A B';
>  SELECT 'AB' RLIKE 'A# this is a comment\nB';
>  SET default_regex_flags=DEFAULT;
>  
> +--error ER_REGEXP_ERROR
>  SELECT 'Aq' RLIKE 'A\\q';

you forgot to update func_regexp_pcre.result 

>  SET default_regex_flags='EXTRA';
>  --error ER_REGEXP_ERROR
> diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
> index 76f4788c1cf..450a9c54176 100644
> --- a/sql/item_cmpfunc.cc
> +++ b/sql/item_cmpfunc.cc
> @@ -5485,124 +5486,44 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error)
>  */
>  void Regexp_processor_pcre::pcre_exec_warn(int rc) const
>  {
> -  char buf[64];
> -  const char *errmsg= NULL;
> +  PCRE2_UCHAR8 buf[128];
>    THD *thd= current_thd;
>  
> -  /*
> -    Make a descriptive message only for those pcre_exec() error codes
> -    that can actually happen in MariaDB.
> -  */
> -  switch (rc)
> +  int errlen= pcre2_get_error_message(rc, buf, sizeof(buf));
> +  if (errlen <= 0)
>    {
> -  case PCRE_ERROR_NULL:
> -    errmsg= "pcre_exec: null argument passed";
> -    break;
> -  case PCRE_ERROR_BADOPTION:
> -    errmsg= "pcre_exec: bad option";
> -    break;
> -  case PCRE_ERROR_BADMAGIC:
> -    errmsg= "pcre_exec: bad magic - not a compiled regex";
> -    break;
> -  case PCRE_ERROR_UNKNOWN_OPCODE:
> -    errmsg= "pcre_exec: error in compiled regex";
> -    break;
> -  case PCRE_ERROR_NOMEMORY:
> -    errmsg= "pcre_exec: Out of memory";
> -    break;
> -  case PCRE_ERROR_NOSUBSTRING:
> -    errmsg= "pcre_exec: no substring";
> -    break;
> -  case PCRE_ERROR_MATCHLIMIT:
> -    errmsg= "pcre_exec: match limit exceeded";
> -    break;
> -  case PCRE_ERROR_CALLOUT:
> -    errmsg= "pcre_exec: callout error";
> -    break;
> -  case PCRE_ERROR_BADUTF8:
> -    errmsg= "pcre_exec: Invalid utf8 byte sequence in the subject string";
> -    break;
> -  case PCRE_ERROR_BADUTF8_OFFSET:
> -    errmsg= "pcre_exec: Started at invalid location within utf8 byte sequence";
> -    break;
> -  case PCRE_ERROR_PARTIAL:
> -    errmsg= "pcre_exec: partial match";
> -    break;
> -  case PCRE_ERROR_INTERNAL:
> -    errmsg= "pcre_exec: internal error";
> -    break;
> -  case PCRE_ERROR_BADCOUNT:
> -    errmsg= "pcre_exec: ovesize is negative";
> -    break;
> -  case PCRE_ERROR_RECURSIONLIMIT:
> -    my_snprintf(buf, sizeof(buf), "pcre_exec: recursion limit of %ld exceeded",
> -                m_pcre_extra.match_limit_recursion);
> -    errmsg= buf;
> -    break;
> -  case PCRE_ERROR_BADNEWLINE:
> -    errmsg= "pcre_exec: bad newline options";
> -    break;
> -  case PCRE_ERROR_BADOFFSET:
> -    errmsg= "pcre_exec: start offset negative or greater than string length";
> -    break;
> -  case PCRE_ERROR_SHORTUTF8:
> -    errmsg= "pcre_exec: ended in middle of utf8 sequence";
> -    break;
> -  case PCRE_ERROR_JIT_STACKLIMIT:
> -    errmsg= "pcre_exec: insufficient stack memory for JIT compile";
> -    break;
> -  case PCRE_ERROR_RECURSELOOP:
> -    errmsg= "pcre_exec: Recursion loop detected";
> -    break;
> -  case PCRE_ERROR_BADMODE:
> -    errmsg= "pcre_exec: compiled pattern passed to wrong bit library function";
> -    break;
> -  case PCRE_ERROR_BADENDIANNESS:
> -    errmsg= "pcre_exec: compiled pattern passed to wrong endianness processor";
> -    break;
> -  case PCRE_ERROR_JIT_BADOPTION:
> -    errmsg= "pcre_exec: bad jit option";
> -    break;
> -  case PCRE_ERROR_BADLENGTH:
> -    errmsg= "pcre_exec: negative length";
> -    break;
> -  default:
> -    /*
> -      As other error codes should normally not happen,
> -      we just report the error code without textual description
> -      of the code.
> -    */
> -    my_snprintf(buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc);
> -    errmsg= buf;
> +    my_snprintf((char *)buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc);
>    }
>    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> -                      ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), errmsg);
> +                      ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf);
>  }
>  
>  
>  /**
>    Call pcre_exec() and send a warning if pcre_exec() returned with an error.
>  */
> -int Regexp_processor_pcre::pcre_exec_with_warn(const pcre *code,
> -                                               const pcre_extra *extra,
> +int Regexp_processor_pcre::pcre_exec_with_warn(const pcre2_code *code,
> +                                               pcre2_match_data *data,
>                                                 const char *subject,
>                                                 int length, int startoffset,
> -                                               int options, int *ovector,
> -                                               int ovecsize)
> +                                               uint options)
>  {
> -  int rc= pcre_exec(code, extra, subject, length,
> -                    startoffset, options, ovector, ovecsize);
> +  int rc= pcre2_match(code, (PCRE2_SPTR8) subject, (PCRE2_SIZE) length,
> +                      (PCRE2_SIZE) startoffset, options, data, NULL);
>    DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;);
> -  if (unlikely(rc < PCRE_ERROR_NOMATCH))
> -    pcre_exec_warn(rc);
> +  if (unlikely(rc < PCRE2_ERROR_NOMATCH))
> +    m_SubStrVec= NULL;
> +  else
> +    m_SubStrVec= pcre2_get_ovector_pointer(data);
> +

The function name is still pcre_exec_with_warn, the comment still says
"Call pcre_exec() and send a warning". But neither seems to be
happening here.

where do you do warnings now?

>    return rc;
>  }
>  
>  
>  bool Regexp_processor_pcre::exec(const char *str, size_t length, size_t offset)
>  {
> -  m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra, str, (int)length, (int)offset, 0,
> -                                      m_SubStrVec, array_elements(m_SubStrVec));
> +  m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data,
> +                                      str, (int)length, (int)offset, 0);
>    return false;
>  }
>  
> diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
> index ccb7d3b29ed..e8abae3487d 100644
> --- a/sql/sys_vars.cc
> +++ b/sql/sys_vars.cc
> @@ -5692,19 +5692,21 @@ static const char *default_regex_flags_names[]=
>    "DOTALL",    // (?s)  . matches anything including NL
>    "DUPNAMES",  // (?J)  Allow duplicate names for subpatterns
>    "EXTENDED",  // (?x)  Ignore white space and # comments
> -  "EXTRA",     // (?X)  extra features (e.g. error on unknown escape character)
> +  "EXTENDED_MORE",//(?xx)  Ignore white space and # comments inside cheracter

perhaps, update all flag descriptions from man pcre2syntax? Like

  "EXTENDED",     // (?x) extended: ignore white space except in classes
  "EXTENDED_MORE",//(?xx) as (?x) but also ignore space and tab in classes

also, I don't quite like EXTENDED_MORE name. But I don't have a good
suggestion. May be just re-purpose EXTENDED to do (?xx) without
introducing a new flag?

> +  "EXTRA",     // means nothing since PCRE2

may be a warning when it's used?

>    "MULTILINE", // (?m)  ^ and $ match newlines within data
>    "UNGREEDY",  // (?U)  Invert greediness of quantifiers
>    0
>  };
>  static const int default_regex_flags_to_pcre[]=
>  {
> -  PCRE_DOTALL,
> -  PCRE_DUPNAMES,
> -  PCRE_EXTENDED,
> -  PCRE_EXTRA,
> -  PCRE_MULTILINE,
> -  PCRE_UNGREEDY,
> +  PCRE2_DOTALL,
> +  PCRE2_DUPNAMES,
> +  PCRE2_EXTENDED,
> +  PCRE2_EXTENDED_MORE,
> +  0, /* EXTRA flag not available since PCRE2 */
> +  PCRE2_MULTILINE,
> +  PCRE2_UNGREEDY,
>    0
>  };
>  int default_regex_flags_pcre(const THD *thd)
> diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
> index 2f04a33558a..51257febc0d 100644
> --- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
> +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
> @@ -1,6 +1,8 @@
>  ## feature detection
>  find_package(Threads)
> -find_package(ZLIB REQUIRED)
> +IF(WITH_EXTERNAL_ZLIB)
> +  find_package(ZLIB REQUIRED)
> +ENDIF()

push it in separate commit, please.

>  
>  option(USE_VALGRIND "Build to run safely under valgrind (often slower)." ON)
>  if(USE_VALGRIND)
> 

Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx