maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #11820
Re: [Commits] 3032cd8e91f: step #1: if a derived table has SELECT DISTINCT, provide index statistics for it so that the join optimizer in the
Hi Varun,
Please mention the MDEV# in the commit comment.
Otherwise, the patch is ok to push.
On Mon, Apr 22, 2019 at 07:09:45PM +0530, Varun wrote:
> revision-id: 3032cd8e91f1e1ead8b6f941e75cd29e473e7eaa (mariadb-10.3.10-283-g3032cd8e91f)
> parent(s): f4019f5b3544a18f3ddf32df2c5214c3f8dabdce
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2019-04-22 18:19:25 +0530
> message:
>
> step #1: if a derived table has SELECT DISTINCT, provide index statistics for it so that the join optimizer in the
> upper select knows that ref access to the table will produce one row.
>
> ---
> mysql-test/main/cte_nonrecursive.result | 8 ++--
> mysql-test/main/derived.result | 54 ++++++++++++++++++++++
> mysql-test/main/derived.test | 30 ++++++++++++
> mysql-test/main/derived_view.result | 2 +-
> mysql-test/main/subselect_extra.result | 2 +-
> mysql-test/main/subselect_extra_no_semijoin.result | 2 +-
> sql/sql_lex.h | 1 +
> sql/sql_union.cc | 40 ++++++++++++++++
> sql/table.cc | 20 ++++++++
> 9 files changed, 152 insertions(+), 7 deletions(-)
>
> diff --git a/mysql-test/main/cte_nonrecursive.result b/mysql-test/main/cte_nonrecursive.result
> index b846ec2d8ac..d80d34ecc7f 100644
> --- a/mysql-test/main/cte_nonrecursive.result
> +++ b/mysql-test/main/cte_nonrecursive.result
> @@ -244,7 +244,7 @@ with t as (select distinct a from t1 where b >= 'c')
> select * from t as r1, t as r2 where r1.a=r2.a;
> id select_type table type possible_keys key key_len ref rows Extra
> 1 PRIMARY <derived2> ALL NULL NULL NULL NULL 8 Using where
> -1 PRIMARY <derived3> ref key0 key0 5 r1.a 2
> +1 PRIMARY <derived3> ref key0 key0 5 r1.a 1
> 3 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
> 2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
> explain
> @@ -253,7 +253,7 @@ select * from (select distinct a from t1 where b >= 'c') as r1,
> where r1.a=r2.a;
> id select_type table type possible_keys key key_len ref rows Extra
> 1 PRIMARY <derived2> ALL NULL NULL NULL NULL 8 Using where
> -1 PRIMARY <derived3> ref key0 key0 5 r1.a 2
> +1 PRIMARY <derived3> ref key0 key0 5 r1.a 1
> 3 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
> 2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary
> # two references to t specified by a query
> @@ -369,7 +369,7 @@ select c as a from t2 where c < 4)
> select * from t2,t where t2.c=t.a;
> id select_type table type possible_keys key key_len ref rows Extra
> 1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where
> -1 PRIMARY <derived2> ref key0 key0 5 test.t2.c 2
> +1 PRIMARY <derived2> ref key0 key0 5 test.t2.c 1
> 2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where
> 3 UNION t2 ALL NULL NULL NULL NULL 4 Using where
> NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
> @@ -381,7 +381,7 @@ select c as a from t2 where c < 4) as t
> where t2.c=t.a;
> id select_type table type possible_keys key key_len ref rows Extra
> 1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where
> -1 PRIMARY <derived2> ref key0 key0 5 test.t2.c 2
> +1 PRIMARY <derived2> ref key0 key0 5 test.t2.c 1
> 2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where
> 3 UNION t2 ALL NULL NULL NULL NULL 4 Using where
> NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
> diff --git a/mysql-test/main/derived.result b/mysql-test/main/derived.result
> index f0d0289c1ce..857246d68b4 100644
> --- a/mysql-test/main/derived.result
> +++ b/mysql-test/main/derived.result
> @@ -1195,3 +1195,57 @@ drop table t1,t2,t3;
> #
> # End of 10.2 tests
> #
> +#
> +# MDEV-9959: A serious MariaDB server performance bug
> +#
> +create table t1(a int);
> +insert into t1 values (1),(2),(3),(4),(5),(6);
> +create table t2(a int, b int,c int);
> +insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
> +create table t3(a int, b int);
> +insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
> +table "<derived2>" should have type=ref and rows=1
> +one select in derived table
> +with distinct
> +analyze select * from t1 , ((select distinct t2.a from t2 order by c))q where t1.a=q.a;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 PRIMARY t1 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using where
> +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 1 1.00 100.00 100.00
> +2 DERIVED t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using temporary; Using filesort
> +analyze select * from t1 , ((select distinct t2.a, t2.b from t2 order by c))q where t1.a=q.a;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 PRIMARY t1 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using where
> +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 2 1.00 100.00 100.00
> +2 DERIVED t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using temporary; Using filesort
> +# multiple selects in derived table
> +# NO UNION ALL
> +analyze select * from t1 , ( (select t2.a from t2 order by c) union (select t2.a from t2 order by c))q where t1.a=q.a;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 PRIMARY t1 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using where
> +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 1 1.00 100.00 100.00
> +2 DERIVED t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00
> +3 UNION t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00
> +NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL 6.00 NULL NULL
> +select * from t1 , ( (select t2.a from t2 order by c) union (select t2.a from t2 order by c))q where t1.a=q.a;
> +a a
> +1 1
> +2 2
> +3 3
> +4 4
> +5 5
> +6 6
> +# UNION ALL and EXCEPT
> +analyze select * from t1 , ( (select t2.a from t2 order by c) union all (select t2.a from t2 order by c) except(select t3.a from t3 order by b))q where t1.a=q.a;
> +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
> +1 PRIMARY t1 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using where
> +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 1 0.50 100.00 100.00
> +2 DERIVED t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00
> +3 UNION t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00
> +4 EXCEPT t3 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00
> +NULL UNIT RESULT <unit2,3,4> ALL NULL NULL NULL NULL NULL 3.00 NULL NULL
> +select * from t1 , ( (select t2.a from t2 order by c) union all (select t2.a from t2 order by c) except(select t3.a from t3 order by b))q where t1.a=q.a;
> +a a
> +3 3
> +4 4
> +6 6
> +drop table t1,t2,t3;
> diff --git a/mysql-test/main/derived.test b/mysql-test/main/derived.test
> index 6c51f23c51e..990f955450a 100644
> --- a/mysql-test/main/derived.test
> +++ b/mysql-test/main/derived.test
> @@ -1032,3 +1032,33 @@ drop table t1,t2,t3;
> --echo #
> --echo # End of 10.2 tests
> --echo #
> +
> +--echo #
> +--echo # MDEV-9959: A serious MariaDB server performance bug
> +--echo #
> +
> +create table t1(a int);
> +insert into t1 values (1),(2),(3),(4),(5),(6);
> +create table t2(a int, b int,c int);
> +insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
> +create table t3(a int, b int);
> +insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
> +
> +--echo table "<derived2>" should have type=ref and rows=1
> +--echo one select in derived table
> +
> +--echo with distinct
> +analyze select * from t1 , ((select distinct t2.a from t2 order by c))q where t1.a=q.a;
> +analyze select * from t1 , ((select distinct t2.a, t2.b from t2 order by c))q where t1.a=q.a;
> +
> +--echo # multiple selects in derived table
> +--echo # NO UNION ALL
> +analyze select * from t1 , ( (select t2.a from t2 order by c) union (select t2.a from t2 order by c))q where t1.a=q.a;
> +select * from t1 , ( (select t2.a from t2 order by c) union (select t2.a from t2 order by c))q where t1.a=q.a;
> +
> +--echo # UNION ALL and EXCEPT
> +analyze select * from t1 , ( (select t2.a from t2 order by c) union all (select t2.a from t2 order by c) except(select t3.a from t3 order by b))q where t1.a=q.a;
> +
> +select * from t1 , ( (select t2.a from t2 order by c) union all (select t2.a from t2 order by c) except(select t3.a from t3 order by b))q where t1.a=q.a;
> +
> +drop table t1,t2,t3;
> diff --git a/mysql-test/main/derived_view.result b/mysql-test/main/derived_view.result
> index 86dd73f5733..30831e75341 100644
> --- a/mysql-test/main/derived_view.result
> +++ b/mysql-test/main/derived_view.result
> @@ -1525,7 +1525,7 @@ EXPLAIN
> SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
> id select_type table type possible_keys key key_len ref rows Extra
> 1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where
> -1 PRIMARY <derived3> ref key0 key0 10 test.t1.a,test.t1.b 2 FirstMatch(t1)
> +1 PRIMARY <derived3> ref key0 key0 10 test.t1.a,test.t1.b 1 FirstMatch(t1)
> 3 DERIVED t2 ALL NULL NULL NULL NULL 6
> 4 UNION t3 ALL NULL NULL NULL NULL 4
> NULL UNION RESULT <union3,4> ALL NULL NULL NULL NULL NULL
> diff --git a/mysql-test/main/subselect_extra.result b/mysql-test/main/subselect_extra.result
> index a3a0f1f9a15..dbcf00268c2 100644
> --- a/mysql-test/main/subselect_extra.result
> +++ b/mysql-test/main/subselect_extra.result
> @@ -409,7 +409,7 @@ EXPLAIN
> SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
> id select_type table type possible_keys key key_len ref rows Extra
> 1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where
> -1 PRIMARY <derived3> ref key0 key0 10 test.t1.a,test.t1.b 2 FirstMatch(t1)
> +1 PRIMARY <derived3> ref key0 key0 10 test.t1.a,test.t1.b 1 FirstMatch(t1)
> 3 DERIVED t2 ALL NULL NULL NULL NULL 6
> 4 UNION t3 ALL NULL NULL NULL NULL 4
> NULL UNION RESULT <union3,4> ALL NULL NULL NULL NULL NULL
> diff --git a/mysql-test/main/subselect_extra_no_semijoin.result b/mysql-test/main/subselect_extra_no_semijoin.result
> index ec9ddb0452e..49a1431eb9b 100644
> --- a/mysql-test/main/subselect_extra_no_semijoin.result
> +++ b/mysql-test/main/subselect_extra_no_semijoin.result
> @@ -411,7 +411,7 @@ EXPLAIN
> SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
> id select_type table type possible_keys key key_len ref rows Extra
> 1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where
> -2 DEPENDENT SUBQUERY <derived3> index_subquery key0 key0 10 func,func 2 Using where
> +2 DEPENDENT SUBQUERY <derived3> index_subquery key0 key0 10 func,func 1 Using where
> 3 DERIVED t2 ALL NULL NULL NULL NULL 6
> 4 UNION t3 ALL NULL NULL NULL NULL 4
> NULL UNION RESULT <union3,4> ALL NULL NULL NULL NULL NULL
> diff --git a/sql/sql_lex.h b/sql/sql_lex.h
> index 72ca4ac0b43..4eaec7d062b 100644
> --- a/sql/sql_lex.h
> +++ b/sql/sql_lex.h
> @@ -896,6 +896,7 @@ class st_select_lex_unit: public st_select_lex_node {
> bool union_needs_tmp_table();
>
> void set_unique_exclude();
> + bool check_distinct_in_union();
>
> friend struct LEX;
> friend int subselect_union_engine::exec();
> diff --git a/sql/sql_union.cc b/sql/sql_union.cc
> index 87fbbebe4ba..3fb5552c77a 100644
> --- a/sql/sql_union.cc
> +++ b/sql/sql_union.cc
> @@ -2049,3 +2049,43 @@ void st_select_lex_unit::set_unique_exclude()
> }
> }
> }
> +
> +/**
> + @brief
> + Check if the derived table is guaranteed to have distinct rows because of
> + UNION operations used to populate it.
> +
> + @detail
> + UNION operation removes duplicate rows from its output. That is, a query like
> +
> + select * from t1 UNION select * from t2
> +
> + will not produce duplicate rows in its output, even if table t1 (and/or t2)
> + contain duplicate rows. EXCEPT and INTERSECT operations also have this
> + property.
> +
> + On the other hand, UNION ALL operation doesn't remove duplicates. (The SQL
> + standard also defines EXCEPT ALL and INTERSECT ALL, but we don't support
> + them).
> +
> + st_select_lex_unit computes its value left to right. That is, if there is
> + a st_select_lex_unit object describing
> +
> + (select #1) OP1 (select #2) OP2 (select #3)
> +
> + then ((select #1) OP1 (select #2)) is computed first, and OP2 is computed
> + second.
> +
> + How can one tell if st_select_lex_unit is guaranteed to have distinct
> + output rows? This depends on whether the last operation was duplicate-
> + removing or not:
> + - UNION ALL is not duplicate-removing
> + - all other operations are duplicate-removing
> +*/
> +
> +bool st_select_lex_unit::check_distinct_in_union()
> +{
> + if (union_distinct && !union_distinct->next_select())
> + return true;
> + return false;
> +}
> diff --git a/sql/table.cc b/sql/table.cc
> index 80995abc1f9..c4494c9ae4b 100644
> --- a/sql/table.cc
> +++ b/sql/table.cc
> @@ -7269,6 +7269,26 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
> key_part_info++;
> }
>
> + /*
> + For the case when there is a derived table that would give distinct rows,
> + the index statistics are passed to the join optimizer to tell that a ref
> + access to all the fields of the derived table will produce only one row.
> + */
> +
> + st_select_lex_unit* derived= pos_in_table_list ?
> + pos_in_table_list->derived: NULL;
> + if (derived)
> + {
> + st_select_lex* first= derived->first_select();
> + uint select_list_items= first->get_item_list()->elements;
> + if (key_parts == select_list_items)
> + {
> + if ((!first->is_part_of_union() && (first->options & SELECT_DISTINCT)) ||
> + derived->check_distinct_in_union())
> + keyinfo->rec_per_key[key_parts - 1]= 1;
> + }
> + }
> +
> set_if_bigger(s->max_key_length, keyinfo->key_length);
> s->keys++;
> return FALSE;
> _______________________________________________
> commits mailing list
> commits@xxxxxxxxxxx
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
--
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog