maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #11532
Re: afedc29f773: MDEV-14024 PCRE2.
Hi, Alexey!
Thanks!
Few minor comments, see below
On Nov 23, Alexey Botchkov wrote:
> revision-id: afedc29f773362f342756e9cdd9bbd9ed0f1abe2 (versioning-1.0.3-68-gafedc29f773)
> parent(s): 4ec8598c1dcb63499bce998142b8e5b8b09b2d30
> author: Alexey Botchkov <holyfoot@xxxxxxxxxxxx>
> committer: Alexey Botchkov <holyfoot@xxxxxxxxxxxx>
> timestamp: 2018-06-26 14:10:46 +0400
> message:
>
> MDEV-14024 PCRE2.
>
> Related changes in the server code.
>
> diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake
> index 4c113929866..264f93a60a2 100644
> --- a/cmake/pcre.cmake
> +++ b/cmake/pcre.cmake
> @@ -5,24 +5,17 @@ SET(WITH_PCRE "auto" CACHE STRING
>
> MACRO (CHECK_PCRE)
> IF(WITH_PCRE STREQUAL "system" OR WITH_PCRE STREQUAL "auto")
> - CHECK_LIBRARY_EXISTS(pcre pcre_stack_guard "" HAVE_PCRE_STACK_GUARD)
> + CHECK_LIBRARY_EXISTS(pcre2-8 pcre2_match "" HAVE_PCRE2)
> - IF(NOT CMAKE_CROSSCOMPILING)
> - SET(CMAKE_REQUIRED_LIBRARIES "pcre")
> - CHECK_C_SOURCE_RUNS("
> - #include <pcre.h>
> - int main() {
> - return -pcre_exec(NULL, NULL, NULL, -999, -999, 0, NULL, 0) < 256;
> - }" PCRE_STACK_SIZE_OK)
> - SET(CMAKE_REQUIRED_LIBRARIES)
> - ENDIF()
> ENDIF()
> - IF(NOT HAVE_PCRE_STACK_GUARD OR NOT PCRE_STACK_SIZE_OK OR
> - WITH_PCRE STREQUAL "bundled")
> + IF(NOT HAVE_PCRE2 OR WITH_PCRE STREQUAL "bundled")
> IF (WITH_PCRE STREQUAL "system")
> - MESSAGE(FATAL_ERROR "system pcre is not found or unusable")
> + MESSAGE(FATAL_ERROR "system pcre2-8 library is not found or unusable")
> ENDIF()
> - SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre ${CMAKE_SOURCE_DIR}/pcre)
> - ADD_SUBDIRECTORY(pcre)
> + SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2
> + ${CMAKE_BINARY_DIR}/pcre2/src ${CMAKE_SOURCE_DIR}/pcre2/src)
That's a bit strange. I'd expect only .../pcre2/src paths, not .../pcre2.
you need pcre2.h and pcre2posix.h, they're both in src/ right?
> + SET(PCRE_BUILD_TESTS OFF CACHE BOOL "Disable tests.")
> + SET(PCRE2_BUILD_PCRE2GREP OFF CACHE BOOL "Disable pcre2grep")
> + ADD_SUBDIRECTORY(pcre2)
> ENDIF()
> ENDMACRO()
>
> diff --git a/extra/mariabackup/CMakeLists.txt b/extra/mariabackup/CMakeLists.txt
> index 7df5fa17903..a9d7153b893 100644
> --- a/extra/mariabackup/CMakeLists.txt
> +++ b/extra/mariabackup/CMakeLists.txt
> @@ -37,7 +37,7 @@ INCLUDE_DIRECTORIES(
> )
>
> IF(NOT HAVE_SYSTEM_REGEX)
> - INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre)
> + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2/src)
Hmm, not INCLUDE_DIRECTORIES(${PCRE_INCLUDES}) ?
pcre2.h is in the ${CMAKE_BINARY_DIR}, it's generated.
> ENDIF()
>
> IF(WITH_WSREP)
> diff --git a/mysql-test/main/func_regexp_pcre.test b/mysql-test/main/func_regexp_pcre.test
> index 21600390bb2..30969b3e9ae 100644
> --- a/mysql-test/main/func_regexp_pcre.test
> +++ b/mysql-test/main/func_regexp_pcre.test
> @@ -382,6 +382,7 @@ SELECT 'AB' RLIKE 'A B';
> SELECT 'AB' RLIKE 'A# this is a comment\nB';
> SET default_regex_flags=DEFAULT;
>
> +--error ER_REGEXP_ERROR
> SELECT 'Aq' RLIKE 'A\\q';
you forgot to update func_regexp_pcre.result
> SET default_regex_flags='EXTRA';
> --error ER_REGEXP_ERROR
> diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
> index 76f4788c1cf..450a9c54176 100644
> --- a/sql/item_cmpfunc.cc
> +++ b/sql/item_cmpfunc.cc
> @@ -5485,124 +5486,44 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error)
> */
> void Regexp_processor_pcre::pcre_exec_warn(int rc) const
> {
> - char buf[64];
> - const char *errmsg= NULL;
> + PCRE2_UCHAR8 buf[128];
> THD *thd= current_thd;
>
> - /*
> - Make a descriptive message only for those pcre_exec() error codes
> - that can actually happen in MariaDB.
> - */
> - switch (rc)
> + int errlen= pcre2_get_error_message(rc, buf, sizeof(buf));
> + if (errlen <= 0)
> {
> - case PCRE_ERROR_NULL:
> - errmsg= "pcre_exec: null argument passed";
> - break;
> - case PCRE_ERROR_BADOPTION:
> - errmsg= "pcre_exec: bad option";
> - break;
> - case PCRE_ERROR_BADMAGIC:
> - errmsg= "pcre_exec: bad magic - not a compiled regex";
> - break;
> - case PCRE_ERROR_UNKNOWN_OPCODE:
> - errmsg= "pcre_exec: error in compiled regex";
> - break;
> - case PCRE_ERROR_NOMEMORY:
> - errmsg= "pcre_exec: Out of memory";
> - break;
> - case PCRE_ERROR_NOSUBSTRING:
> - errmsg= "pcre_exec: no substring";
> - break;
> - case PCRE_ERROR_MATCHLIMIT:
> - errmsg= "pcre_exec: match limit exceeded";
> - break;
> - case PCRE_ERROR_CALLOUT:
> - errmsg= "pcre_exec: callout error";
> - break;
> - case PCRE_ERROR_BADUTF8:
> - errmsg= "pcre_exec: Invalid utf8 byte sequence in the subject string";
> - break;
> - case PCRE_ERROR_BADUTF8_OFFSET:
> - errmsg= "pcre_exec: Started at invalid location within utf8 byte sequence";
> - break;
> - case PCRE_ERROR_PARTIAL:
> - errmsg= "pcre_exec: partial match";
> - break;
> - case PCRE_ERROR_INTERNAL:
> - errmsg= "pcre_exec: internal error";
> - break;
> - case PCRE_ERROR_BADCOUNT:
> - errmsg= "pcre_exec: ovesize is negative";
> - break;
> - case PCRE_ERROR_RECURSIONLIMIT:
> - my_snprintf(buf, sizeof(buf), "pcre_exec: recursion limit of %ld exceeded",
> - m_pcre_extra.match_limit_recursion);
> - errmsg= buf;
> - break;
> - case PCRE_ERROR_BADNEWLINE:
> - errmsg= "pcre_exec: bad newline options";
> - break;
> - case PCRE_ERROR_BADOFFSET:
> - errmsg= "pcre_exec: start offset negative or greater than string length";
> - break;
> - case PCRE_ERROR_SHORTUTF8:
> - errmsg= "pcre_exec: ended in middle of utf8 sequence";
> - break;
> - case PCRE_ERROR_JIT_STACKLIMIT:
> - errmsg= "pcre_exec: insufficient stack memory for JIT compile";
> - break;
> - case PCRE_ERROR_RECURSELOOP:
> - errmsg= "pcre_exec: Recursion loop detected";
> - break;
> - case PCRE_ERROR_BADMODE:
> - errmsg= "pcre_exec: compiled pattern passed to wrong bit library function";
> - break;
> - case PCRE_ERROR_BADENDIANNESS:
> - errmsg= "pcre_exec: compiled pattern passed to wrong endianness processor";
> - break;
> - case PCRE_ERROR_JIT_BADOPTION:
> - errmsg= "pcre_exec: bad jit option";
> - break;
> - case PCRE_ERROR_BADLENGTH:
> - errmsg= "pcre_exec: negative length";
> - break;
> - default:
> - /*
> - As other error codes should normally not happen,
> - we just report the error code without textual description
> - of the code.
> - */
> - my_snprintf(buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc);
> - errmsg= buf;
> + my_snprintf((char *)buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc);
> }
> push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> - ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), errmsg);
> + ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf);
> }
>
>
> /**
> Call pcre_exec() and send a warning if pcre_exec() returned with an error.
> */
> -int Regexp_processor_pcre::pcre_exec_with_warn(const pcre *code,
> - const pcre_extra *extra,
> +int Regexp_processor_pcre::pcre_exec_with_warn(const pcre2_code *code,
> + pcre2_match_data *data,
> const char *subject,
> int length, int startoffset,
> - int options, int *ovector,
> - int ovecsize)
> + uint options)
> {
> - int rc= pcre_exec(code, extra, subject, length,
> - startoffset, options, ovector, ovecsize);
> + int rc= pcre2_match(code, (PCRE2_SPTR8) subject, (PCRE2_SIZE) length,
> + (PCRE2_SIZE) startoffset, options, data, NULL);
> DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;);
> - if (unlikely(rc < PCRE_ERROR_NOMATCH))
> - pcre_exec_warn(rc);
> + if (unlikely(rc < PCRE2_ERROR_NOMATCH))
> + m_SubStrVec= NULL;
> + else
> + m_SubStrVec= pcre2_get_ovector_pointer(data);
> +
The function name is still pcre_exec_with_warn, the comment still says
"Call pcre_exec() and send a warning". But neither seems to be
happening here.
where do you do warnings now?
> return rc;
> }
>
>
> bool Regexp_processor_pcre::exec(const char *str, size_t length, size_t offset)
> {
> - m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra, str, (int)length, (int)offset, 0,
> - m_SubStrVec, array_elements(m_SubStrVec));
> + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data,
> + str, (int)length, (int)offset, 0);
> return false;
> }
>
> diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
> index ccb7d3b29ed..e8abae3487d 100644
> --- a/sql/sys_vars.cc
> +++ b/sql/sys_vars.cc
> @@ -5692,19 +5692,21 @@ static const char *default_regex_flags_names[]=
> "DOTALL", // (?s) . matches anything including NL
> "DUPNAMES", // (?J) Allow duplicate names for subpatterns
> "EXTENDED", // (?x) Ignore white space and # comments
> - "EXTRA", // (?X) extra features (e.g. error on unknown escape character)
> + "EXTENDED_MORE",//(?xx) Ignore white space and # comments inside cheracter
perhaps, update all flag descriptions from man pcre2syntax? Like
"EXTENDED", // (?x) extended: ignore white space except in classes
"EXTENDED_MORE",//(?xx) as (?x) but also ignore space and tab in classes
also, I don't quite like EXTENDED_MORE name. But I don't have a good
suggestion. May be just re-purpose EXTENDED to do (?xx) without
introducing a new flag?
> + "EXTRA", // means nothing since PCRE2
may be a warning when it's used?
> "MULTILINE", // (?m) ^ and $ match newlines within data
> "UNGREEDY", // (?U) Invert greediness of quantifiers
> 0
> };
> static const int default_regex_flags_to_pcre[]=
> {
> - PCRE_DOTALL,
> - PCRE_DUPNAMES,
> - PCRE_EXTENDED,
> - PCRE_EXTRA,
> - PCRE_MULTILINE,
> - PCRE_UNGREEDY,
> + PCRE2_DOTALL,
> + PCRE2_DUPNAMES,
> + PCRE2_EXTENDED,
> + PCRE2_EXTENDED_MORE,
> + 0, /* EXTRA flag not available since PCRE2 */
> + PCRE2_MULTILINE,
> + PCRE2_UNGREEDY,
> 0
> };
> int default_regex_flags_pcre(const THD *thd)
> diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
> index 2f04a33558a..51257febc0d 100644
> --- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
> +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
> @@ -1,6 +1,8 @@
> ## feature detection
> find_package(Threads)
> -find_package(ZLIB REQUIRED)
> +IF(WITH_EXTERNAL_ZLIB)
> + find_package(ZLIB REQUIRED)
> +ENDIF()
push it in separate commit, please.
>
> option(USE_VALGRIND "Build to run safely under valgrind (often slower)." ON)
> if(USE_VALGRIND)
>
Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx