← Back to team overview

maria-developers team mailing list archive

Re: 57673bc: MDEV-14024 PCRE2

 

Hi, Alexey!

On Apr 03, Alexey Botchkov wrote:
> revision-id: 57673bcfeecf51d1dd8f4907adc8ca11d84796d3 (mariadb-10.3.5-76-g57673bc)
> parent(s): d614d4b2e61552a5b74259f91b3e14b23707743f
> committer: Alexey Botchkov
> timestamp: 2018-04-03 02:59:51 +0400
> message:
> 
> MDEV-14024 PCRE2.
> 
> server ocde switched to the PCRE2 features.

^^^ s/ocde/code/

> diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake
> index 4c11392..271dd06 100644
> --- a/cmake/pcre.cmake
> +++ b/cmake/pcre.cmake
> @@ -5,24 +5,16 @@ SET(WITH_PCRE "auto" CACHE STRING
>  
> +  IF(NOT HAVE_PCRE2 OR WITH_PCRE STREQUAL "bundled")
>      IF (WITH_PCRE STREQUAL "system")
> -      MESSAGE(FATAL_ERROR "system pcre is not found or unusable")
> +      MESSAGE(FATAL_ERROR "system pcre2-8 is not found or unusable")

better "pcre2-8 library" or simply "pcre2"

> diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
> index 89aa307..08aa293 100644
> --- a/sql/item_cmpfunc.cc
> +++ b/sql/item_cmpfunc.cc
> @@ -5481,15 +5481,6 @@ int Regexp_processor_pcre::default_regex_flags()
>    return default_regex_flags_pcre(current_thd);
>  }
>  
> -void Regexp_processor_pcre::set_recursion_limit(THD *thd)
> -{
> -  long stack_used;
> -  DBUG_ASSERT(thd == current_thd);
> -  stack_used= available_stack_size(thd->thread_stack, &stack_used);
> -  m_pcre_extra.match_limit_recursion=
> -    (ulong)((my_thread_stack_size - STACK_MIN_SIZE - stack_used)/my_pcre_frame_size);
> -}
> -
>  
>  /**
>    Convert string to lib_charset, if needed.
> @@ -5537,19 +5528,32 @@ bool Regexp_processor_pcre::compile(String *pattern, bool send_error)
>    if (!(pattern= convert_if_needed(pattern, &pattern_converter)))
>      return true;
>  
> -  m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags,
> -                       &pcreErrorStr, &pcreErrorOffset, NULL);
> +  m_pcre= pcre2_compile((PCRE2_SPTR8) pattern->ptr(), pattern->length(),
> +                        m_library_flags,
> +                        &pcreErrorNumber, &pcreErrorOffset, NULL);
>  
>    if (m_pcre == NULL)
>    {
>      if (send_error)
>      {
>        char buff[MAX_FIELD_WIDTH];
> -      my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset);
> +      int lmsg= pcre2_get_error_message(pcreErrorNumber,
> +                                        (PCRE2_UCHAR8 *)buff, sizeof(buff));
> +      if (lmsg >= 0)
> +        my_snprintf(buff+lmsg, sizeof(buff)-lmsg,
> +                    " at offset %d", pcreErrorOffset);

if lmsg < 0, does it guarantee that the buffer is 0-terminated?

>        my_error(ER_REGEXP_ERROR, MYF(0), buff);
>      }
>      return true;
>    }
> +
> +  m_pcre_match_data= pcre2_match_data_create_from_pattern(m_pcre, NULL);
> +  if (m_pcre_match_data == NULL)
> +  {
> +    my_error(ER_OUT_OF_RESOURCES, MYF(0));
> +    return true;
> +  }
> +
>    return false;
>  }
>  
> @@ -5570,124 +5574,43 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error)
>  */
>  void Regexp_processor_pcre::pcre_exec_warn(int rc) const
>  {
...
> -  push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> -                      ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), errmsg);
> +  int errlen= pcre2_get_error_message(rc, buf, sizeof(buf));
> +  if (errlen >= 0)
> +    push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> +                        ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf);

perhaps, issue some generic message if pcre2_get_error_message fails?

>  }
> @@ -5697,14 +5620,12 @@ bool Regexp_processor_pcre::exec(String *str, int offset,
>  {
>    if (!(str= convert_if_needed(str, &subject_converter)))
>      return true;
> -  m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra,
> -                                      str->c_ptr_safe(), str->length(),
> -                                      offset, 0,
> -                                      m_SubStrVec, array_elements(m_SubStrVec));
> +  m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data,
> +                                      str->ptr(), str->length(), offset, 0);
>    if (m_pcre_exec_rc > 0)
>    {
>      uint i;
> -    for (i= 0; i < n_result_offsets_to_convert; i++)
> +    for (i= 0; i < n_result_offsets_to_convert && i < m_pcre_exec_rc-1; i++)

hmm, what does that mean?
that both

  n_result_offsets_to_convert < m_pcre_exec_rc-1

and

  m_pcre_exec_rc-1 < n_result_offsets_to_convert

are possible?

>      {
>        /*
>          Convert byte offset into character offset.
> diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
> index 3d11a22..d3f52ed 100644
> --- a/sql/item_cmpfunc.h
> +++ b/sql/item_cmpfunc.h
> @@ -25,7 +25,8 @@
>  
>  #include "item_func.h"             /* Item_int_func, Item_bool_func */
>  #define PCRE_STATIC 1             /* Important on Windows */
> -#include "pcre.h"                 /* pcre header file */
> +#define PCRE2_CODE_UNIT_WIDTH 8

you sure, you want it here and not, say, in one universally-included
headers like my_config.h? You don't define it in other places now, so
they include different function names and macros.

> +#include "pcre2.h"                 /* pcre header file */
>  #include "item.h"
>  
>  extern Item_result item_cmp_type(Item_result a,Item_result b);
> diff --git a/sql/mysqld.cc b/sql/mysqld.cc
> index fe2b9c8..dcf0996 100644
> --- a/sql/mysqld.cc
> +++ b/sql/mysqld.cc
> @@ -111,7 +111,7 @@
>  #include "sp_rcontext.h"
>  #include "sp_cache.h"
>  #include "sql_reload.h"  // reload_acl_and_cache
> -#include "pcre.h"
> +#include "pcre2.h"

do you still need to include pcre header here?

>  
>  #ifdef HAVE_POLL_H
>  #include <poll.h>
> diff --git a/storage/mroonga/CMakeLists.txt b/storage/mroonga/CMakeLists.txt
> index 5d8e8c1..57d12a1 100644
> --- a/storage/mroonga/CMakeLists.txt
> +++ b/storage/mroonga/CMakeLists.txt
> @@ -189,8 +189,8 @@ else()
>    set(MYSQL_VARIANT "MySQL")
>  endif()
>  
> -if(EXISTS "${MYSQL_SOURCE_DIR}/pcre")
> -  set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/pcre")
> +if(EXISTS "${MYSQL_SOURCE_DIR}/pcre2")
> +  set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/pcre2/src")

eh, is that enough?
there were a lot more changes in the server, than just correcting the
path.

>  else()
>    set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/regex")
>  endif()

Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx