maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #12492
Re: [Commits] cdc305c8dd8: MDEV-19620: Changing join_buffer_size causes different results
On Tue, Dec 29, 2020 at 7:59 PM Sergey Petrunia <sergey@xxxxxxxxxxx> wrote:
> Hi Varun,
>
> Please find input below.
>
> On Mon, Dec 28, 2020 at 02:34:40PM +0530, varun wrote:
> > revision-id: cdc305c8dd89a726e09e5fe70ff890d06609cbfb
> (mariadb-10.3.21-309-gcdc305c8dd8)
> > parent(s): 043bd85a574a88856ab9c6d497e682ed06fe45e9
> > author: Varun Gupta
> > committer: Varun Gupta
> > timestamp: 2020-12-28 14:12:14 +0530
> > message:
> >
> > MDEV-19620: Changing join_buffer_size causes different results
> >
> > The scenario here is that query refinement phase decides to use a hash
> join.
> > When the join buffers are allocated in the JOIN::init_join_caches, for a
> table
> > the size exceeds the value for join_buffer_space_limit (which is the
> limit of the
> > space available for all join buffers). When this happens then we
> disallow join
> > buffering for the table, this is done in the revise_cache_usage and
> set_join_cache_denial.
> >
> > In this issue the hash key is created on an index for which ref access
> is possible, so
> > when we disallow hash join then instead of switching to REF access we
> switch to a table
> > scan. This is a problem because the equijoin conditions for which a
> lookup can be made
> > are not attached to the table(or are not evaluated for the table). This
> leads to incorrect
> > results.
> >
> > The fix here would be to switch to using a lookup because it was picked
> by the join planner
> > to be more efficient than the table scan.
> >
> > ---
> > mysql-test/main/join_cache.result | 138
> ++++++++++++++++++++++++++++++++++++++
> > mysql-test/main/join_cache.test | 105 +++++++++++++++++++++++++++++
> > sql/sql_select.cc | 69 +++++++++++++++----
> > sql/sql_select.h | 6 ++
> > 4 files changed, 304 insertions(+), 14 deletions(-)
> >
> > diff --git a/mysql-test/main/join_cache.result
> b/mysql-test/main/join_cache.result
> > index 3d1d91df997..e58503f422f 100644
> > --- a/mysql-test/main/join_cache.result
> > +++ b/mysql-test/main/join_cache.result
> > @@ -6128,4 +6128,142 @@ EXPLAIN
> > }
> > }
> > drop table t1,t2,t3;
> > +#
> > +# MDEV-19620: Changing join_buffer_size causes different results
> > +#
> > +SET @save_join_cache_level= @@join_cache_level;
> > +SET @save_join_buffer_size= @@join_buffer_size;
> > +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
> > +SET join_cache_level = 3;
> > +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500)) ENGINE=MyISAM;
> > +INSERT INTO t1 VALUES ('1','v'),('7','s');
> > +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500))
> ENGINE=MyISAM;
> > +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','p'),(3,'1','q');
> > +INSERT INTO t2 VALUES (4,'7','g'),(5,'4','p'),(6,'1','q');
> > +INSERT INTO t2 VALUES (16,'7','g'),(17,'4','p'),(28,'1','q');
> > +#
> > +# Hash join + table Scan on t2
> > +#
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 ALL NULL NULL NULL NULL 9
> Using where
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +i2 c2 pk3 i3 c3
> > +1 v NULL NULL NULL
> > +7 s NULL NULL NULL
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 hash_ALL NULL #hash#$hj 503
> test.t1.c2 9 Using where; Using join buffer (flat, BNLH join)
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +i2 c2 pk3 i3 c3
> > +1 v NULL NULL NULL
> > +7 s NULL NULL NULL
> > +#
> > +# HASH join + ref access on t2
> > +#
> > +ALTER TABLE t2 ADD KEY k1(c3);
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 ref k1 k1 503 test.t1.c2 2
> Using where
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +i2 c2 pk3 i3 c3
> > +1 v NULL NULL NULL
> > +7 s NULL NULL NULL
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 hash_ALL k1 #hash#k1 503
> test.t1.c2 9 Using where; Using join buffer (flat, BNLH join)
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +i2 c2 pk3 i3 c3
> > +1 v NULL NULL NULL
> > +7 s NULL NULL NULL
> > +#
> > +# Hash join + index scan on t2
> > +#
> > +ALTER TABLE t2 DROP KEY k1;
> > +ALTER TABLE t2 ADD KEY k1(i3,c3);
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 index NULL k1 806 NULL 9
> Using where; Using index
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +c3 i3 i2 c2
> > +NULL NULL 1 v
> > +NULL NULL 7 s
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 hash_index NULL #hash#$hj:k1 503:806
> test.t1.c2 9 Using where; Using index; Using join buffer (flat,
> BNLH join)
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +c3 i3 i2 c2
> > +NULL NULL 1 v
> > +NULL NULL 7 s
> > +DROP TABLE t1,t2;
> > +#
> > +# Hash join + range scan on t2
> > +#
> > +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> > +INSERT INTO t1 VALUES ('1','v'),('7','s');
> > +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500),
> INDEX(i3,c3));
> > +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> Using where
> > +1 SIMPLE t2 range i3 i3 303 NULL 2
> Using index condition; Using where
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +i2 c2 pk3 i3 c3
> > +7 s 2 4 s
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> Using where
> > +1 SIMPLE t2 hash_range i3 #hash#$hj:i3 503:303
> test.t1.c2 2 Using where; Using join buffer (flat, BNLH join)
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +i2 c2 pk3 i3 c3
> > +7 s 2 4 s
> > +DROP TABLE t1,t2;
> > +#
> > +# Hash join + eq ref access on t2
> > +#
> > +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> > +INSERT INTO t1 VALUES ('1','v'),('7','s');
> > +CREATE TABLE t2 (pk3 INT, i3 VARCHAR(300), c3 VARCHAR(500) PRIMARY KEY);
> > +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 502 test.t1.c2 1
> Using where
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +c3 i3 i2 c2
> > +NULL NULL 1 v
> > +s 4 7 s
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +id select_type table type possible_keys key key_len
> ref rows Extra
> > +1 SIMPLE t1 ALL NULL NULL NULL NULL 2
> > +1 SIMPLE t2 hash_ALL PRIMARY #hash#PRIMARY 502
> test.t1.c2 3 Using where; Using join buffer (flat, BNLH join)
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +c3 i3 i2 c2
> > +NULL NULL 1 v
> > +s 4 7 s
> > +DROP TABLE t1,t2;
> > +SET @save_join_cache_level= @@join_cache_level;
> > +SET @save_join_buffer_size= @@join_buffer_size;
> > +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
> > set @@optimizer_switch=@save_optimizer_switch;
> > diff --git a/mysql-test/main/join_cache.test
> b/mysql-test/main/join_cache.test
> > index 91339c2cb21..6670c62516b 100644
> > --- a/mysql-test/main/join_cache.test
> > +++ b/mysql-test/main/join_cache.test
> > @@ -4054,5 +4054,110 @@ where
> >
> > drop table t1,t2,t3;
> >
> > +--echo #
> > +--echo # MDEV-19620: Changing join_buffer_size causes different results
> > +--echo #
> > +
> > +SET @save_join_cache_level= @@join_cache_level;
> > +SET @save_join_buffer_size= @@join_buffer_size;
> > +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
> > +SET join_cache_level = 3;
> > +
> > +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500)) ENGINE=MyISAM;
> > +INSERT INTO t1 VALUES ('1','v'),('7','s');
> > +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500))
> ENGINE=MyISAM;
> > +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','p'),(3,'1','q');
> > +INSERT INTO t2 VALUES (4,'7','g'),(5,'4','p'),(6,'1','q');
> > +INSERT INTO t2 VALUES (16,'7','g'),(17,'4','p'),(28,'1','q');
> > +
> > +--echo #
> > +--echo # Hash join + table Scan on t2
> > +--echo #
> > +
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +
> > +--echo #
> > +--echo # HASH join + ref access on t2
> > +--echo #
> > +
> > +ALTER TABLE t2 ADD KEY k1(c3);
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> > +
> > +--echo #
> > +--echo # Hash join + index scan on t2
> > +--echo #
> > +ALTER TABLE t2 DROP KEY k1;
> > +ALTER TABLE t2 ADD KEY k1(i3,c3);
> > +
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +DROP TABLE t1,t2;
> > +
> > +--echo #
> > +--echo # Hash join + range scan on t2
> > +--echo #
> > +
> > +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> > +INSERT INTO t1 VALUES ('1','v'),('7','s');
> > +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500),
> INDEX(i3,c3));
> > +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> > +
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> > +
> > +DROP TABLE t1,t2;
> > +
> > +--echo #
> > +--echo # Hash join + eq ref access on t2
> > +--echo #
> > +
> > +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> > +INSERT INTO t1 VALUES ('1','v'),('7','s');
> > +CREATE TABLE t2 (pk3 INT, i3 VARCHAR(300), c3 VARCHAR(500) PRIMARY KEY);
> > +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> > +
> > +set join_buffer_size=1024;
> > +set join_buffer_space_limit=2048;
> > +
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +
> > +set join_buffer_space_limit=262144;
> > +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> > +DROP TABLE t1,t2;
> > +
> > +SET @save_join_cache_level= @@join_cache_level;
> > +SET @save_join_buffer_size= @@join_buffer_size;
> > +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
> > +
> > # The following command must be the last one in the file
> > set @@optimizer_switch=@save_optimizer_switch;
> > diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> > index 203422f0b43..8b7deb98dc6 100644
> > --- a/sql/sql_select.cc
> > +++ b/sql/sql_select.cc
> > @@ -111,6 +111,7 @@ static bool best_extension_by_limited_search(JOIN
> *join,
> > uint prune_level,
> > uint use_cond_selectivity);
> > static uint determine_search_depth(JOIN* join);
> > +static void pick_table_access_method(JOIN_TAB *tab);
> > C_MODE_START
> > static int join_tab_cmp(const void *dummy, const void* ptr1, const
> void* ptr2);
> > static int join_tab_cmp_straight(const void *dummy, const void* ptr1,
> const void* ptr2);
> > @@ -10081,6 +10082,7 @@ static bool create_ref_for_key(JOIN *join,
> JOIN_TAB *j,
> > j->ref.disable_cache= FALSE;
> > j->ref.null_ref_part= NO_REF_PART;
> > j->ref.const_ref_part_map= 0;
> > + j->ref.not_null_keyparts= 0;
> > keyuse=org_keyuse;
> >
> > store_key **ref_key= j->ref.key_copy;
> > @@ -10173,23 +10175,12 @@ static bool create_ref_for_key(JOIN *join,
> JOIN_TAB *j,
> > }
> > } /* not ftkey */
> > *ref_key=0; // end_marker
> > + j->ref.not_null_keyparts= not_null_keyparts;
> > if (j->type == JT_FT)
> > DBUG_RETURN(0);
> > - ulong key_flags= j->table->actual_key_flags(keyinfo);
> > if (j->type == JT_CONST)
> > j->table->const_table= 1;
> > - else if (!((keyparts == keyinfo->user_defined_key_parts &&
> > - (
> > - (key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) ==
> HA_NOSAME ||
> > - /* Unique key and all keyparts are NULL rejecting */
> > - ((key_flags & HA_NOSAME) && keyparts ==
> not_null_keyparts)
> > - )) ||
> > - /* true only for extended keys */
> > - (keyparts > keyinfo->user_defined_key_parts &&
> > - MY_TEST(key_flags & HA_EXT_NOSAME) &&
> > - keyparts == keyinfo->ext_key_parts)
> > - ) ||
> > - null_ref_key)
> > + else if (!j->is_eq_ref_access()|| null_ref_key)
> > {
> > /* Must read with repeat */
> > j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
> > @@ -11582,11 +11573,25 @@ void set_join_cache_denial(JOIN_TAB *join_tab)
> > don't do join buffering for the first table in sjm nest.
> > */
> > join_tab[-1].next_select= sub_select;
> > - if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
> > + if ((join_tab->type == JT_REF || join_tab->type == JT_HASH) &&
> > + join_tab->is_ref_for_hash_join())
> > {
> > join_tab->type= JT_ALL;
> > join_tab->ref.key_parts= 0;
> Do we ever get here if join_tab->type == JT_REF? I think we don't.
> If this is so, please remove it.
>
Yes we do reach here for a case with BKA join. I had added a DBUG_ASSERT(0)
to check that.
>
> > }
> > +
> > + if (join_tab->type == JT_HASH && !join_tab->is_ref_for_hash_join())
> > + {
> > + join_tab->type= join_tab->is_eq_ref_access() ? JT_EQ_REF : JT_REF;
> > + pick_table_access_method(join_tab);
> > + }
> Also please add a comment explaining that the join optimizer's choice was
> ref access and we fall back to that.
>
> Got it
> > +
> > + if (join_tab->type == JT_HASH_NEXT)
> > + {
> > + join_tab->type = JT_NEXT;
> > + DBUG_ASSERT(join_tab->ref.key_parts == 0);
> > + }
> > +
> > join_tab->join->return_tab= join_tab;
> > }
> > }
> > @@ -27954,6 +27959,42 @@ void JOIN_TAB::partial_cleanup()
> > }
> >
> >
> > +/*
> > + @brief
> > + Check if the access method for the table is EQ_REF access or not
> > +
> > + @retval
> > + TRUE EQ_REF access
> > + FALSE Otherwise
> > +*/
> > +bool JOIN_TAB::is_eq_ref_access()
> > +{
> > +
> > + KEY *keyinfo;
> > + if (!is_hash_join_key_no(ref.key))
> > + keyinfo= table->key_info + ref.key;
> > + else
> > + keyinfo= hj_key;
>
> This code now looks as if it was possible that hash join is used and also
> eq_ref is used.
>
> Is this really possible? I've tried catching an example of this with MTR
> and I
> wasn't able to do so.
>
> Maybe we should just return false if hash join is used and also a comment
> about
> this?
>
> Yes sure, this looks confusing, will think and change this.
> > +
> > + uint keyparts= ref.key_parts;
> > + ulong key_flags= table->actual_key_flags(keyinfo);
> > + if ( (keyparts == keyinfo->user_defined_key_parts &&
> > + (
> > + (key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
> > + /* Unique key and all keyparts are NULL rejecting */
> > + ((key_flags & HA_NOSAME) && keyparts ==
> ref.not_null_keyparts)
> > + )
> > + ) ||
> > + /* true only for extended keys */
> > + (keyparts > keyinfo->user_defined_key_parts &&
> > + MY_TEST(key_flags & HA_EXT_NOSAME) &&
> > + keyparts == keyinfo->ext_key_parts)
> > + )
> > + return true;
> > + return false;
> > +}
> > +
> > +
> > /**
> > @} (end of group Query_Optimizer)
> > */
>
> BR
> Sergei
> --
> Sergei Petrunia, Software Developer
> MariaDB Corporation | Skype: sergefp | Blog: http://petrunia.net
>
>
>
References