← Back to team overview

maria-developers team mailing list archive

Re: [Commits] 13b5098: MDEV-9531: GROUP_CONCAT with ORDER BY inside takes a lot of memory while it's executed

 

Hi, Oleksandr!

On Jun 28, Oleksandr Byelkin wrote:
> revision-id: 13b5098fcaa888173472d255e29aff22bcc5baae (mariadb-10.1.13-18-g13b5098)
> parent(s): 732adec0a4c75d99389230feeb0deca0ad668de7
> committer: Oleksandr Byelkin
> timestamp: 2016-06-28 10:59:59 +0200
> message:
> 
> MDEV-9531: GROUP_CONCAT with ORDER BY inside takes a lot of memory while it's executed
> 
> Limitation added to Red-Black tree.
> 
> ---
>  include/my_tree.h                    |  14 +++-
>  mysql-test/r/group_concat_big.result |   6 ++
>  mysql-test/t/group_concat_big.result |   6 ++
>  mysql-test/t/group_concat_big.test   |   6 ++
>  mysys/tree.c                         | 156 ++++++++++++++++++++++++-----------
>  sql/item_sum.cc                      |  45 ++++++++--
>  6 files changed, 177 insertions(+), 56 deletions(-)
> 
> diff --git a/include/my_tree.h b/include/my_tree.h
> index f8be55f..f1916b9 100644
> --- a/include/my_tree.h
> +++ b/include/my_tree.h
> @@ -57,11 +57,14 @@ typedef struct st_tree_element {
>  } TREE_ELEMENT;
>  
>  #define ELEMENT_CHILD(element, offs) (*(TREE_ELEMENT**)((char*)element + offs))
> +#define R_ELEMENT_CHILD(element, offs) ((TREE_ELEMENT**)((char*)element + offs))

Please,

  #define ELEMENT_CHILD_PTR(element, offs) ((TREE_ELEMENT**)((char*)element + offs))
  #define ELEMENT_CHILD(element, offs) (*ELEMENT_CHILD_PTR(element, offs))

>  typedef struct st_tree {
>    TREE_ELEMENT *root,null_element;
>    TREE_ELEMENT **parents[MAX_TREE_HEIGHT];
> +  TREE_ELEMENT *free_element;
>    uint offset_to_key,elements_in_tree,size_of_element;
> +  uint elements_limit, del_direction;
>    size_t memory_limit, allocated;
>    qsort_cmp2 compare;
>    void *custom_arg;
> diff --git a/mysql-test/r/group_concat_big.result b/mysql-test/r/group_concat_big.result
> new file mode 100644
> index 0000000..4de0ebb
> --- /dev/null
> +++ b/mysql-test/r/group_concat_big.result
> @@ -0,0 +1,6 @@
> +SELECT GROUP_CONCAT( seq, seq, seq, seq, seq, seq, seq, seq ORDER BY
> +2,1,3,4,6,5,8,7 ) AS cfield1 FROM seq_1_to_50000000;
> +cfield1
> +11111111,22222222,33333333,44444444,55555555,66666666,77777777,88888888,99999999,1010101010101010,1111111111111111,1212121212121212,1313131313131313,1414141414141414,1515151515151515,1616161616161616,1717171717171717,1818181818181818,1919191919191919,2020202020202020,2121212121212121,2222222222222222,2323232323232323,2424242424242424,2525252525252525,2626262626262626,2727272727272727,2828282828282828,2929292929292929,3030303030303030,3131313131313131,3232323232323232,3333333333333333,3434343434343434,3535353535353535,3636363636363636,3737373737373737,3838383838383838,3939393939393939,4040404040404040,4141414141414141,4242424242424242,4343434343434343,4444444444444444,4545454545454545,4646464646464646,4747474747474747,4848484848484848,4949494949494949,5050505050505050,5151515151515151,5252525252525252,5353535353535353,5454545454545454,5555555555555555,5656565656565656,5757575757575757,5858585858585858,5959595959595959,6060606060606060,6161616161616161,6262626262626262,6363636
>  363636363,6464646464646464,65656565
> +Warnings:
> +Warning	1260	Row 65 was cut by GROUP_CONCAT()

1. Same result without your patch?
2. I suppose all rows after 65 skipped completely, aren't they?

> diff --git a/mysql-test/t/group_concat_big.result b/mysql-test/t/group_concat_big.result
> new file mode 100644
> index 0000000..4de0ebb
> --- /dev/null
> +++ b/mysql-test/t/group_concat_big.result
> @@ -0,0 +1,6 @@
> +SELECT GROUP_CONCAT( seq, seq, seq, seq, seq, seq, seq, seq ORDER BY
> +2,1,3,4,6,5,8,7 ) AS cfield1 FROM seq_1_to_50000000;
> +cfield1
> +11111111,22222222,33333333,44444444,55555555,66666666,77777777,88888888,99999999,1010101010101010,1111111111111111,1212121212121212,1313131313131313,1414141414141414,1515151515151515,1616161616161616,1717171717171717,1818181818181818,1919191919191919,2020202020202020,2121212121212121,2222222222222222,2323232323232323,2424242424242424,2525252525252525,2626262626262626,2727272727272727,2828282828282828,2929292929292929,3030303030303030,3131313131313131,3232323232323232,3333333333333333,3434343434343434,3535353535353535,3636363636363636,3737373737373737,3838383838383838,3939393939393939,4040404040404040,4141414141414141,4242424242424242,4343434343434343,4444444444444444,4545454545454545,4646464646464646,4747474747474747,4848484848484848,4949494949494949,5050505050505050,5151515151515151,5252525252525252,5353535353535353,5454545454545454,5555555555555555,5656565656565656,5757575757575757,5858585858585858,5959595959595959,6060606060606060,6161616161616161,6262626262626262,6363636
>  363636363,6464646464646464,65656565
> +Warnings:
> +Warning	1260	Row 65 was cut by GROUP_CONCAT()

Interesting. How did you manage to include the same file twice in a
commit diff?

> diff --git a/mysys/tree.c b/mysys/tree.c
> index a9fc542..6c094d9 100644
> --- a/mysys/tree.c
> +++ b/mysys/tree.c
> @@ -157,6 +172,10 @@ static void free_tree(TREE *tree, myf free_flags)
>        free_root(&tree->mem_root, free_flags);
>      }
>    }
> +  if (tree->free_element && tree->with_delete && tree->free)
> +      (*tree->free)(tree->free_element, free_free,
> +                    tree->custom_arg);

"&& tree->with_delete" is wrong here

> +  tree->free_element= 0;
>    tree->root= &tree->null_element;
>    tree->elements_in_tree=0;
>    tree->allocated=0;
> @@ -190,6 +209,42 @@ static void delete_tree_element(TREE *tree, TREE_ELEMENT *element)
>  }
>  

function comment would be nice here

> +void tree_exclude(TREE *tree, TREE_ELEMENT ***parent)
> +{
> +  int remove_colour;
> +  TREE_ELEMENT ***org_parent, *nod;
> +  TREE_ELEMENT *element= **parent;
> @@ -202,7 +257,32 @@ TREE_ELEMENT *tree_insert(TREE *tree, void *key, uint key_size,
>                            void* custom_arg)
>  {
>    int cmp;
> -  TREE_ELEMENT *element,***parent;
> +  TREE_ELEMENT *element, ***parent;
> +
> +  if (tree->elements_limit && tree->elements_in_tree &&

no need for "&& tree->elements_in_tree"

> +      tree->elements_in_tree >= tree->elements_limit)
> +  {
> +    /*
> +      The limit reached so we should remove one.
> +      It is done on the very beginning because:
> +      1) "parents" will be used
> +      2) removing make incorrect search pass

what does that mean?

> +      If we will not insert now, we leave freed element for future use
> +    */
> +    DBUG_ASSERT(key_size == 0);

why?

> +
> +    parent= tree->parents;
> +    *parent = &tree->root; element= tree->root;
> +    while (element != &tree->null_element)
> +    {
> +      *++parent= R_ELEMENT_CHILD(element, tree->del_direction);
> +      element= ELEMENT_CHILD(element, tree->del_direction);
> +    }
> +    parent--;
> +    tree->free_element= **parent;
> +    tree_exclude(tree, parent);
> +    tree->elements_in_tree--;
> +  }
>  
>    parent= tree->parents;
>    *parent = &tree->root; element= tree->root;
> diff --git a/sql/item_sum.cc b/sql/item_sum.cc
> index 1cfee1a..8bcc2ce 100644
> --- a/sql/item_sum.cc
> +++ b/sql/item_sum.cc
> @@ -3390,7 +3390,7 @@ Item_func_group_concat::fix_fields(THD *thd, Item **ref)
>    fixed= 1;
>    return FALSE;
>  }
> -
> +struct tree_node_size {void *left,*right; uint32 f;};

why do you need that?

>  
>  bool Item_func_group_concat::setup(THD *thd)
>  {
> @@ -3499,17 +3499,50 @@ bool Item_func_group_concat::setup(THD *thd)
>  
>    if (arg_count_order)
>    {
> +    uint min_rec_length= 1; //at least 1 byte per rec (coma)
> +    {
> +      List_iterator_fast<Item> li(all_fields);
> +      Item *item;
> +      while ((item= li++))
> +      {
> +        switch (item->result_type())
> +        {
> +          case STRING_RESULT:
> +            break;  // could be empty string
> +          case DECIMAL_RESULT:
> +          case REAL_RESULT:
> +            min_rec_length+=2;
> +            break;
> +          case INT_RESULT:
> +            min_rec_length++;
> +            break;
> +          case TIME_RESULT:
> +            min_rec_length+=6;
> +            break;
> +          default:
> +          case ROW_RESULT:
> +            DBUG_ASSERT(0);
> +            break;
> +        }
> +      }
> +    }

yeah... that'll work, of course, but for strings (most common case)
you'll allocates tens or hundreds (if not thousands) times more memory
than needed :(

A hackish workaround could be to adjust tree->elements_limit (in
Item_func_group_concat::add) after each insertion. But in this case it
would be simpler to limit the tree by size (in bytes) and adjust tree
size after each insertion. What do you think about it?

Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx


Follow ups