← Back to team overview

maria-developers team mailing list archive

Rev 2773: BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off in file:///home/psergey/dev/maria-5.3-subqueries-r7/

 

At file:///home/psergey/dev/maria-5.3-subqueries-r7/

------------------------------------------------------------
revno: 2773
revision-id: psergey@xxxxxxxxxxxx-20100307154145-ksby2b1l0sqm1xne
parent: igor@xxxxxxxxxxxx-20100306191455-g2srime08w9u6l49
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.3-subqueries-r7
timestamp: Sun 2010-03-07 18:41:45 +0300
message:
  BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off
  - The problem was that DuplicateWeedout strategy setup code wasn't aware of the 
    fact that join buffering will be used and applied optimization that doesn't work
    together with join buffering. Fixed by making DuplicateWeedout setup code to have 
    a pessimistic check about whether there is a chance that join buffering will be 
    used.
  - Make JOIN_CACHE_BKA::init() correctly process Copy_field elements that denote saving
    current rowids in the join buffer.
=== modified file 'mysql-test/r/subselect_sj2.result'
--- a/mysql-test/r/subselect_sj2.result	2010-02-17 10:47:55 +0000
+++ b/mysql-test/r/subselect_sj2.result	2010-03-07 15:41:45 +0000
@@ -264,8 +264,8 @@
 from t0 where a in
 (select t2.a+t3.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	10	
-1	PRIMARY	t1	index	NULL	a	5	NULL	10	Using index; Start temporary; Using join buffer
+1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	10	Start temporary
+1	PRIMARY	t1	index	NULL	a	5	NULL	10	Using index; Using join buffer
 1	PRIMARY	t2	ref	a	a	5	test.t1.a	1	Using index
 1	PRIMARY	t3	ref	a	a	5	test.t1.a	1	Using where; Using index; End temporary
 drop table t0, t1,t2,t3;

=== modified file 'mysql-test/r/subselect_sj2_jcl6.result'
--- a/mysql-test/r/subselect_sj2_jcl6.result	2010-03-05 18:54:48 +0000
+++ b/mysql-test/r/subselect_sj2_jcl6.result	2010-03-07 15:41:45 +0000
@@ -268,8 +268,8 @@
 from t0 where a in
 (select t2.a+t3.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	10	
-1	PRIMARY	t1	index	NULL	a	5	NULL	10	Using index; Start temporary; Using join buffer
+1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	10	Start temporary
+1	PRIMARY	t1	index	NULL	a	5	NULL	10	Using index; Using join buffer
 1	PRIMARY	t2	ref	a	a	5	test.t1.a	1	Using index
 1	PRIMARY	t3	ref	a	a	5	test.t1.a	1	Using where; Using index; End temporary
 drop table t0, t1,t2,t3;
@@ -421,20 +421,23 @@
 where t0.a in ( select t1.a from t1,t2 where t2.a=t0.a and
 t1.b=t2.b);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	5	100.00	
-1	PRIMARY	t1	ref	a	a	5	test.t0.a	1	100.00	Start temporary; Using join buffer
+1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	5	100.00	Start temporary
+1	PRIMARY	t1	ref	a	a	5	test.t0.a	1	100.00	Using join buffer
 1	PRIMARY	t2	eq_ref	PRIMARY	PRIMARY	4	test.t0.a	1	100.00	Using where; End temporary; Using join buffer
 Warnings:
 Note	1276	Field or reference 'test.t0.a' of SELECT #2 was resolved in SELECT #1
 Note	1003	select `test`.`t0`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1`) join `test`.`t0` where ((`test`.`t2`.`b` = `test`.`t1`.`b`) and (`test`.`t1`.`a` = `test`.`t0`.`a`) and (`test`.`t2`.`a` = `test`.`t0`.`a`))
 update t1 set a=3, b=11 where a=4;
 update t2 set b=11 where a=3;
-
+# Not anymore:
 # The following query gives wrong result due to Bug#49129
 select * from t0 where t0.a in 
 (select t1.a from t1, t2 where t2.a=t0.a and t1.b=t2.b);
 a
 0
+1
+2
+3
 drop table t0, t1, t2;
 CREATE TABLE t1 (
 id int(11) NOT NULL,

=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result	2010-03-05 18:54:48 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result	2010-03-07 15:41:45 +0000
@@ -875,6 +875,37 @@
 DROP TABLE t1, t2, t3;
 DROP VIEW v2, v3;
 # End of Bug#49198
+#
+# BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off
+#
+CREATE TABLE t0 (a INT);
+INSERT INTO t0 VALUES (0),(1),(2),(3),(4);
+CREATE TABLE t1 (a INT, b INT, KEY(a));
+INSERT INTO t1 SELECT a, a from t0;
+CREATE TABLE t2 (a INT, b INT, PRIMARY KEY(a));
+INSERT INTO t2 SELECT * FROM t1;
+UPDATE t1 SET a=3, b=11 WHERE a=4;
+UPDATE t2 SET b=11 WHERE a=3;
+set @save_optimizer_switch=@@optimizer_switch;
+set optimizer_switch='firstmatch=off';
+The following should use a join order of t0,t1,t2, with DuplicateElimination:
+explain
+SELECT * FROM t0 WHERE t0.a IN 
+(SELECT t1.a FROM t1, t2 WHERE t2.a=t0.a AND t1.b=t2.b);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	5	Start temporary
+1	PRIMARY	t1	ref	a	a	5	test.t0.a	1	Using join buffer
+1	PRIMARY	t2	eq_ref	PRIMARY	PRIMARY	4	test.t0.a	1	Using where; End temporary; Using join buffer
+SELECT * FROM t0 WHERE t0.a IN 
+(SELECT t1.a FROM t1, t2 WHERE t2.a=t0.a AND t1.b=t2.b);
+a
+0
+1
+2
+3
+set optimizer_switch=@save_optimizer_switch;
+drop table t0, t1, t2;
+# End
 set join_cache_level=default;
 show variables like 'join_cache_level';
 Variable_name	Value

=== modified file 'mysql-test/t/subselect_sj2.test'
--- a/mysql-test/t/subselect_sj2.test	2010-01-17 14:51:10 +0000
+++ b/mysql-test/t/subselect_sj2.test	2010-03-07 15:41:45 +0000
@@ -583,7 +583,7 @@
 
 if (`select @@join_cache_level=6`)
 {
-  --echo 
+  --echo # Not anymore:
   --echo # The following query gives wrong result due to Bug#49129
 }
 select * from t0 where t0.a in 

=== modified file 'mysql-test/t/subselect_sj_jcl6.test'
--- a/mysql-test/t/subselect_sj_jcl6.test	2010-01-17 14:51:10 +0000
+++ b/mysql-test/t/subselect_sj_jcl6.test	2010-03-07 15:41:45 +0000
@@ -7,5 +7,33 @@
 
 --source t/subselect_sj.test
 
+--echo #
+--echo # BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off
+--echo #
+CREATE TABLE t0 (a INT);
+INSERT INTO t0 VALUES (0),(1),(2),(3),(4);
+CREATE TABLE t1 (a INT, b INT, KEY(a));
+INSERT INTO t1 SELECT a, a from t0;
+CREATE TABLE t2 (a INT, b INT, PRIMARY KEY(a));
+INSERT INTO t2 SELECT * FROM t1;
+UPDATE t1 SET a=3, b=11 WHERE a=4;
+UPDATE t2 SET b=11 WHERE a=3;
+
+set @save_optimizer_switch=@@optimizer_switch;
+set optimizer_switch='firstmatch=off';
+
+--echo The following should use a join order of t0,t1,t2, with DuplicateElimination:
+explain
+SELECT * FROM t0 WHERE t0.a IN 
+  (SELECT t1.a FROM t1, t2 WHERE t2.a=t0.a AND t1.b=t2.b);
+
+SELECT * FROM t0 WHERE t0.a IN 
+  (SELECT t1.a FROM t1, t2 WHERE t2.a=t0.a AND t1.b=t2.b);
+
+set optimizer_switch=@save_optimizer_switch;
+drop table t0, t1, t2;
+
+--echo # End
+
 set join_cache_level=default;
 show variables like 'join_cache_level';

=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc	2010-02-25 08:09:10 +0000
+++ b/sql/opt_subselect.cc	2010-03-07 15:41:45 +0000
@@ -3035,10 +3035,24 @@
           forwards, but do not destroy other duplicate elimination methods.
         */
         uint first_table= i;
+        uint join_cache_level= join->thd->variables.join_cache_level;
         for (uint j= i; j < i + pos->n_sj_tables; j++)
         {
-          if (join->best_positions[j].use_join_buffer && j <= no_jbuf_after)
+          /*
+            When we'll properly take join buffering into account during
+            join optimization, the below check should be changed to 
+            "if (join->best_positions[j].use_join_buffer && 
+                 j <= no_jbuf_after)".
+            For now, use a rough criteria:
+          */
+          JOIN_TAB *js_tab=join->join_tab + j; 
+          if (j != join->const_tables && js_tab->use_quick != 2 &&
+              j <= no_jbuf_after &&
+              ((js_tab->type == JT_ALL && join_cache_level != 0) ||
+               (join_cache_level > 4 && (tab->type == JT_REF || 
+                                         tab->type == JT_EQ_REF))))
           {
+            /* Looks like we'll be using join buffer */
             first_table= join->const_tables;
             break;
           }
@@ -3116,7 +3130,12 @@
         JOIN_TAB *j, *jump_to= tab-1;
         for (j= tab; j != tab + pos->n_sj_tables; j++)
         {
-          if (!tab->emb_sj_nest)
+          /*
+            NOTE: this loop probably doesn't do the right thing for the case 
+            where FirstMatch's duplicate-generating range is interleaved with
+            "unrelated" tables (as specified in WL#3750, section 2.2).
+          */
+          if (!j->emb_sj_nest)
             jump_to= tab;
           else
           {

=== modified file 'sql/sql_join_cache.cc'
--- a/sql/sql_join_cache.cc	2010-03-06 19:14:55 +0000
+++ b/sql/sql_join_cache.cc	2010-03-07 15:41:45 +0000
@@ -608,7 +608,12 @@
       copy_end= cache->field_descr+cache->fields;
       for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
       {
-        if (copy->field->table == tab->table &&
+        /*
+          (1) - when we store rowids for DuplicateWeedout, they have
+                copy->field==NULL
+        */
+        if (copy->field &&  // (1)
+            copy->field->table == tab->table &&
             bitmap_is_set(key_read_set, copy->field->field_index))
         {
           *copy_ptr++= copy; 

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2010-03-05 18:54:48 +0000
+++ b/sql/sql_select.cc	2010-03-07 15:41:45 +0000
@@ -5632,7 +5632,11 @@
     uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
 			     (join_tab->table->s->reclength-rec_length));
     rec_length+=(uint) max(4,blob_length);
-  }
+  }  
+  /*
+    psergey-todo: why we don't count here rowid that we might need to store
+    when using DuplicateElimination?
+  */
   join_tab->used_fields=fields;
   join_tab->used_fieldlength=rec_length;
   join_tab->used_blobs=blobs;