← Back to team overview

maria-developers team mailing list archive

Igor please review: MDEV-22377: Subquery in an [UPDATE] query uses full scan instead of range

 

Hello Igor,

Could you please review the below:

commit 793b100cc01228f01ead841d6c45f9e2b07786f9
Author: Sergei Petrunia <psergey@xxxxxxxxxxxx>
Date:   Thu Apr 30 01:25:11 2020 +0300

    MDEV-22377: Subquery in an [UPDATE] query uses full scan instead of range
    
    When doing IN->EXISTS rewrite, Item_in_subselect::inject_in_to_exists_cond
    injects equalities into subquery's WHERE condition.
    
    The problem is that build_equal_items() has already been called for the
    subquery's WHERE, and tampering with the WHERE condition can prevent
    equality propagation from working.
    
    If the subquery's WHERE is an Item_cond_and with multiple equalities:
    - Item_equal objects form a suffix sub-list of the list in WHERE's
      Item_cond_and::list. The suffix is stored in
      JOIN::cond_equal->current_level.
    - Item_cond_and::m_cond_equal must also be preserved.
    
    This patch makes inject_in_to_exists_cond() not to break these properties

diff --git a/mysql-test/main/subselect2.result b/mysql-test/main/subselect2.result
index a3d7fda7abc..c54d635230f 100644
--- a/mysql-test/main/subselect2.result
+++ b/mysql-test/main/subselect2.result
@@ -262,7 +262,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 2	DEPENDENT SUBQUERY	t2c	ALL	NULL	NULL	NULL	NULL	2	100.00	Using where; Using join buffer (incremental, BNL join)
 Warnings:
 Note	1276	Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1
-Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a`,`test`.`t1`.`c1` AS `c1` from `test`.`t1` where !<expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t2a`.`c2` from `test`.`t2` `t2a` join `test`.`t2` `t2b` join `test`.`t2` `t2c` where (`test`.`t2b`.`m` <> `test`.`t1`.`a` or `test`.`t2b`.`m` = `test`.`t2a`.`m`) and trigcond(<cache>(`test`.`t1`.`c1`) = `test`.`t2a`.`c2` or `test`.`t2a`.`c2` is null) and `test`.`t2c`.`c2` = `test`.`t2b`.`c2` and `test`.`t2b`.`n` = `test`.`t2a`.`m` having trigcond(`test`.`t2a`.`c2` is null))))
+Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a`,`test`.`t1`.`c1` AS `c1` from `test`.`t1` where !<expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t2a`.`c2` from `test`.`t2` `t2a` join `test`.`t2` `t2b` join `test`.`t2` `t2c` where `test`.`t2c`.`c2` = `test`.`t2b`.`c2` and `test`.`t2b`.`n` = `test`.`t2a`.`m` and (`test`.`t2b`.`m` <> `test`.`t1`.`a` or `test`.`t2b`.`m` = `test`.`t2a`.`m`) and trigcond(<cache>(`test`.`t1`.`c1`) = `test`.`t2a`.`c2` or `test`.`t2a`.`c2` is null) having trigcond(`test`.`t2a`.`c2` is null))))
 DROP TABLE t1,t2;
 #
 # MDEV-614, also MDEV-536, also LP:1050806:
diff --git a/mysql-test/main/subselect3.result b/mysql-test/main/subselect3.result
index 5c4544a1b05..0034f61ac23 100644
--- a/mysql-test/main/subselect3.result
+++ b/mysql-test/main/subselect3.result
@@ -169,7 +169,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 2	DEPENDENT SUBQUERY	t2	ref	a	a	5	test.t1.b	1	100.00	Using where
 Warnings:
 Note	1276	Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1
-Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) and `test`.`t2`.`a` = `test`.`t1`.`b` having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3`
+Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3`
 drop table t1, t2, t3;
 create table t1 (a int NOT NULL, b int NOT NULL, key(a));
 insert into t1 values 
@@ -197,7 +197,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 2	DEPENDENT SUBQUERY	t2	ref	a	a	4	test.t1.b	1	100.00	Using where
 Warnings:
 Note	1276	Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1
-Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`) and `test`.`t2`.`a` = `test`.`t1`.`b`))) AS `Z` from `test`.`t3`
+Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`)))) AS `Z` from `test`.`t3`
 drop table t1,t2,t3;
 create table t1 (oref int, grp int);
 insert into t1 (oref, grp) values
diff --git a/mysql-test/main/subselect3_jcl6.result b/mysql-test/main/subselect3_jcl6.result
index 4260676cc37..b7b18bf80e0 100644
--- a/mysql-test/main/subselect3_jcl6.result
+++ b/mysql-test/main/subselect3_jcl6.result
@@ -172,7 +172,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 2	DEPENDENT SUBQUERY	t2	ref	a	a	5	test.t1.b	1	100.00	Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
 Warnings:
 Note	1276	Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1
-Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) and `test`.`t2`.`a` = `test`.`t1`.`b` having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3`
+Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3`
 drop table t1, t2, t3;
 create table t1 (a int NOT NULL, b int NOT NULL, key(a));
 insert into t1 values 
@@ -200,7 +200,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 2	DEPENDENT SUBQUERY	t2	ref	a	a	4	test.t1.b	1	100.00	Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
 Warnings:
 Note	1276	Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1
-Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`) and `test`.`t2`.`a` = `test`.`t1`.`b`))) AS `Z` from `test`.`t3`
+Note	1003	/* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`)))) AS `Z` from `test`.`t3`
 drop table t1,t2,t3;
 create table t1 (oref int, grp int);
 insert into t1 (oref, grp) values
diff --git a/mysql-test/main/subselect4.result b/mysql-test/main/subselect4.result
index 3dc019cc6c1..e355cfc60f0 100644
--- a/mysql-test/main/subselect4.result
+++ b/mysql-test/main/subselect4.result
@@ -2349,7 +2349,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	t2	system	NULL	NULL	NULL	NULL	1	100.00	
 2	DEPENDENT SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	2	100.00	Using where
 Warnings:
-Note	1003	/* select#1 */ select 3 AS `f` from dual where !<expr_cache><3>(<in_optimizer>(3,<exists>(/* select#2 */ select `test`.`t1`.`b` from `test`.`t1` where (`test`.`t1`.`c` = 'USA' or `test`.`t1`.`c` <> 'USA') and trigcond(<cache>(3) = `test`.`t1`.`b` or `test`.`t1`.`b` is null) and `test`.`t1`.`b` = `test`.`t1`.`a` having trigcond(`test`.`t1`.`b` is null))))
+Note	1003	/* select#1 */ select 3 AS `f` from dual where !<expr_cache><3>(<in_optimizer>(3,<exists>(/* select#2 */ select `test`.`t1`.`b` from `test`.`t1` where `test`.`t1`.`b` = `test`.`t1`.`a` and (`test`.`t1`.`c` = 'USA' or `test`.`t1`.`c` <> 'USA') and trigcond(<cache>(3) = `test`.`t1`.`b` or `test`.`t1`.`b` is null) having trigcond(`test`.`t1`.`b` is null))))
 SELECT * FROM t2 
 WHERE f NOT IN (SELECT b FROM t1
 WHERE 0 OR (c IN ('USA') OR c NOT IN ('USA')) AND a = b);
@@ -2582,3 +2582,29 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	100	
 drop table t0, t1, t2;
+#
+# MDEV-22377: Subquery in an UPDATE query uses full scan instead of range
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(nkey1 int, col1 int);
+insert into t1 select A.a+10*B.a+100*C.a, A.a+10*B.a+100*C.a from t0 A, t0 B, t0 C;
+create table t2(key1 int, col1 int, key(key1));
+insert into t2 select A.a+10*B.a, A.a+10*B.a from t0 A, t0 B;
+alter table t0 add b int;
+#  This must use the join order of t0,t2,t1
+#  And table t2 must use range access, not full scan:
+explain
+select *
+from t0 
+where 
+a not in (SELECT t1.col1 
+FROM t1 JOIN t2 ON t2.key1 = t1.nkey1 
+WHERE 
+t1.nkey1 IN (1,2,3,4,5) and t2.col1>t0.b
+);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t0	ALL	NULL	NULL	NULL	NULL	10	Using where
+2	DEPENDENT SUBQUERY	t2	range	key1	key1	5	NULL	5	Using index condition; Using where
+2	DEPENDENT SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	1000	Using where; Using join buffer (flat, BNL join)
+drop table t0, t1, t2;
diff --git a/mysql-test/main/subselect4.test b/mysql-test/main/subselect4.test
index bc8db41c505..e3d8b5be832 100644
--- a/mysql-test/main/subselect4.test
+++ b/mysql-test/main/subselect4.test
@@ -2115,6 +2115,32 @@ where exists (select * from t2 where t2.a=t1.a order by t2.b limit 2,3);
 explain
 select * from t1 where t1.a in (select t2.a from t2 order by t2.b);
 
+drop table t0, t1, t2;
+
+--echo #
+--echo # MDEV-22377: Subquery in an UPDATE query uses full scan instead of range
+--echo #
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
 
+create table t1(nkey1 int, col1 int);
+insert into t1 select A.a+10*B.a+100*C.a, A.a+10*B.a+100*C.a from t0 A, t0 B, t0 C;
+
+create table t2(key1 int, col1 int, key(key1));
+insert into t2 select A.a+10*B.a, A.a+10*B.a from t0 A, t0 B;
+
+alter table t0 add b int;
+--echo #  This must use the join order of t0,t2,t1
+--echo #  And table t2 must use range access, not full scan:
+explain
+select *
+from t0 
+where 
+  a not in (SELECT t1.col1 
+            FROM t1 JOIN t2 ON t2.key1 = t1.nkey1 
+            WHERE 
+              t1.nkey1 IN (1,2,3,4,5) and t2.col1>t0.b
+            );
 drop table t0, t1, t2;
 
+
diff --git a/mysql-test/main/subselect_mat_cost_bugs.result b/mysql-test/main/subselect_mat_cost_bugs.result
index a18c5e608f1..755f1e2ad13 100644
--- a/mysql-test/main/subselect_mat_cost_bugs.result
+++ b/mysql-test/main/subselect_mat_cost_bugs.result
@@ -100,7 +100,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 2	DEPENDENT SUBQUERY	t2	index	c3	c3	9	NULL	2	100.00	Using where; Using index; Using join buffer (flat, BNL join)
 Warnings:
 Note	1276	Field or reference 'test.t1.pk' of SELECT #2 was resolved in SELECT #1
-Note	1003	/* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where <expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`pk`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t1a`.`c1` from `test`.`t1b` join `test`.`t2` left join `test`.`t1a` on(`test`.`t1a`.`c2` = `test`.`t1b`.`pk` and 2) where `test`.`t1`.`pk` <> 0 and <cache>(`test`.`t1`.`c1`) = `test`.`t1a`.`c1` and `test`.`t2`.`c3` = `test`.`t1b`.`c4`)))
+Note	1003	/* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where <expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`pk`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t1a`.`c1` from `test`.`t1b` join `test`.`t2` left join `test`.`t1a` on(`test`.`t1a`.`c2` = `test`.`t1b`.`pk` and 2) where `test`.`t2`.`c3` = `test`.`t1b`.`c4` and `test`.`t1`.`pk` <> 0 and <cache>(`test`.`t1`.`c1`) = `test`.`t1a`.`c1`)))
 SELECT pk
 FROM t1
 WHERE c1 IN
@@ -364,7 +364,7 @@ AND a = SOME (SELECT b FROM t5));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	2	Using where
 2	DEPENDENT SUBQUERY	t5	index	c	c	10	NULL	2	Using where; Using index; Start temporary
-2	DEPENDENT SUBQUERY	t4	eq_ref	PRIMARY	PRIMARY	4	test.t5.b	1	Using index condition; Using where; End temporary
+2	DEPENDENT SUBQUERY	t4	eq_ref	PRIMARY	PRIMARY	4	test.t5.b	1	Using where; End temporary
 SELECT *
 FROM t3
 WHERE t3.b > ALL (
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 736dfbd33b4..726c58e49a2 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -2704,9 +2704,28 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
       and_args= ((Item_cond*) join_arg->conds)->argument_list();
       if (join_arg->cond_equal)
         and_args->disjoin((List<Item> *) &join_arg->cond_equal->current_level);
+
+      /*
+        If subquery's WHERE is an Item_cond_and, add the injected conditions
+        into it manually. If we just called and_items(...) that could
+        potentially replace the top-level Item_cond_and and current top-level
+        Item_cond_and::m_cond_equal will be lost.
+      */
+      if (where_item->type() == Item::COND_ITEM &&
+          ((Item_cond*)where_item)->functype() == Item_func::COND_AND_FUNC)
+      {
+        List_iterator<Item> it(*((Item_cond*)where_item)->argument_list());
+        while (Item *item=it++)
+          and_args->push_back(item);
+      }
+      else
+        and_args->push_back(where_item);
+
+      where_item= join_arg->conds;
     }
+    else
+      where_item= and_items(thd, join_arg->conds, where_item);
 
-    where_item= and_items(thd, join_arg->conds, where_item);
     if (where_item->fix_fields_if_needed(thd, 0))
       DBUG_RETURN(true);
     // TIMOUR TODO: call optimize_cond() for the new where clause
@@ -2719,12 +2738,15 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
     {
       /* The argument list of the top-level AND may change after fix fields. */
       and_args= ((Item_cond*) join_arg->conds)->argument_list();
-      List_iterator<Item_equal> li(join_arg->cond_equal->current_level);
-      Item_equal *elem;
-      while ((elem= li++))
-      {
-        and_args->push_back(elem, thd->mem_root);
-      }
+
+      /*
+        Note that Item_equal objects must be present in this form:
+        Item_cond::conds list must have the end of the list to be the
+        same linked list as join_arg->cond_equal->current_level.
+        The code in substitute_for_best_equal_field depends on it (see the
+        disjoin() call).
+      */
+      and_args->append((List<Item> *)&join_arg->cond_equal->current_level);
     }
   }
 

BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog