maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #11199
Re: 57673bc: MDEV-14024 PCRE2
Hi, Alexey!
On Apr 03, Alexey Botchkov wrote:
> revision-id: 57673bcfeecf51d1dd8f4907adc8ca11d84796d3 (mariadb-10.3.5-76-g57673bc)
> parent(s): d614d4b2e61552a5b74259f91b3e14b23707743f
> committer: Alexey Botchkov
> timestamp: 2018-04-03 02:59:51 +0400
> message:
>
> MDEV-14024 PCRE2.
>
> server ocde switched to the PCRE2 features.
^^^ s/ocde/code/
> diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake
> index 4c11392..271dd06 100644
> --- a/cmake/pcre.cmake
> +++ b/cmake/pcre.cmake
> @@ -5,24 +5,16 @@ SET(WITH_PCRE "auto" CACHE STRING
>
> + IF(NOT HAVE_PCRE2 OR WITH_PCRE STREQUAL "bundled")
> IF (WITH_PCRE STREQUAL "system")
> - MESSAGE(FATAL_ERROR "system pcre is not found or unusable")
> + MESSAGE(FATAL_ERROR "system pcre2-8 is not found or unusable")
better "pcre2-8 library" or simply "pcre2"
> diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
> index 89aa307..08aa293 100644
> --- a/sql/item_cmpfunc.cc
> +++ b/sql/item_cmpfunc.cc
> @@ -5481,15 +5481,6 @@ int Regexp_processor_pcre::default_regex_flags()
> return default_regex_flags_pcre(current_thd);
> }
>
> -void Regexp_processor_pcre::set_recursion_limit(THD *thd)
> -{
> - long stack_used;
> - DBUG_ASSERT(thd == current_thd);
> - stack_used= available_stack_size(thd->thread_stack, &stack_used);
> - m_pcre_extra.match_limit_recursion=
> - (ulong)((my_thread_stack_size - STACK_MIN_SIZE - stack_used)/my_pcre_frame_size);
> -}
> -
>
> /**
> Convert string to lib_charset, if needed.
> @@ -5537,19 +5528,32 @@ bool Regexp_processor_pcre::compile(String *pattern, bool send_error)
> if (!(pattern= convert_if_needed(pattern, &pattern_converter)))
> return true;
>
> - m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags,
> - &pcreErrorStr, &pcreErrorOffset, NULL);
> + m_pcre= pcre2_compile((PCRE2_SPTR8) pattern->ptr(), pattern->length(),
> + m_library_flags,
> + &pcreErrorNumber, &pcreErrorOffset, NULL);
>
> if (m_pcre == NULL)
> {
> if (send_error)
> {
> char buff[MAX_FIELD_WIDTH];
> - my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset);
> + int lmsg= pcre2_get_error_message(pcreErrorNumber,
> + (PCRE2_UCHAR8 *)buff, sizeof(buff));
> + if (lmsg >= 0)
> + my_snprintf(buff+lmsg, sizeof(buff)-lmsg,
> + " at offset %d", pcreErrorOffset);
if lmsg < 0, does it guarantee that the buffer is 0-terminated?
> my_error(ER_REGEXP_ERROR, MYF(0), buff);
> }
> return true;
> }
> +
> + m_pcre_match_data= pcre2_match_data_create_from_pattern(m_pcre, NULL);
> + if (m_pcre_match_data == NULL)
> + {
> + my_error(ER_OUT_OF_RESOURCES, MYF(0));
> + return true;
> + }
> +
> return false;
> }
>
> @@ -5570,124 +5574,43 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error)
> */
> void Regexp_processor_pcre::pcre_exec_warn(int rc) const
> {
...
> - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> - ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), errmsg);
> + int errlen= pcre2_get_error_message(rc, buf, sizeof(buf));
> + if (errlen >= 0)
> + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> + ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf);
perhaps, issue some generic message if pcre2_get_error_message fails?
> }
> @@ -5697,14 +5620,12 @@ bool Regexp_processor_pcre::exec(String *str, int offset,
> {
> if (!(str= convert_if_needed(str, &subject_converter)))
> return true;
> - m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra,
> - str->c_ptr_safe(), str->length(),
> - offset, 0,
> - m_SubStrVec, array_elements(m_SubStrVec));
> + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data,
> + str->ptr(), str->length(), offset, 0);
> if (m_pcre_exec_rc > 0)
> {
> uint i;
> - for (i= 0; i < n_result_offsets_to_convert; i++)
> + for (i= 0; i < n_result_offsets_to_convert && i < m_pcre_exec_rc-1; i++)
hmm, what does that mean?
that both
n_result_offsets_to_convert < m_pcre_exec_rc-1
and
m_pcre_exec_rc-1 < n_result_offsets_to_convert
are possible?
> {
> /*
> Convert byte offset into character offset.
> diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
> index 3d11a22..d3f52ed 100644
> --- a/sql/item_cmpfunc.h
> +++ b/sql/item_cmpfunc.h
> @@ -25,7 +25,8 @@
>
> #include "item_func.h" /* Item_int_func, Item_bool_func */
> #define PCRE_STATIC 1 /* Important on Windows */
> -#include "pcre.h" /* pcre header file */
> +#define PCRE2_CODE_UNIT_WIDTH 8
you sure, you want it here and not, say, in one universally-included
headers like my_config.h? You don't define it in other places now, so
they include different function names and macros.
> +#include "pcre2.h" /* pcre header file */
> #include "item.h"
>
> extern Item_result item_cmp_type(Item_result a,Item_result b);
> diff --git a/sql/mysqld.cc b/sql/mysqld.cc
> index fe2b9c8..dcf0996 100644
> --- a/sql/mysqld.cc
> +++ b/sql/mysqld.cc
> @@ -111,7 +111,7 @@
> #include "sp_rcontext.h"
> #include "sp_cache.h"
> #include "sql_reload.h" // reload_acl_and_cache
> -#include "pcre.h"
> +#include "pcre2.h"
do you still need to include pcre header here?
>
> #ifdef HAVE_POLL_H
> #include <poll.h>
> diff --git a/storage/mroonga/CMakeLists.txt b/storage/mroonga/CMakeLists.txt
> index 5d8e8c1..57d12a1 100644
> --- a/storage/mroonga/CMakeLists.txt
> +++ b/storage/mroonga/CMakeLists.txt
> @@ -189,8 +189,8 @@ else()
> set(MYSQL_VARIANT "MySQL")
> endif()
>
> -if(EXISTS "${MYSQL_SOURCE_DIR}/pcre")
> - set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/pcre")
> +if(EXISTS "${MYSQL_SOURCE_DIR}/pcre2")
> + set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/pcre2/src")
eh, is that enough?
there were a lot more changes in the server, than just correcting the
path.
> else()
> set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/regex")
> endif()
Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx