← Back to team overview

maria-developers team mailing list archive

Re: [Commits] 167a96b5157: MDEV-9959: A serious MariaDB server performance bug

 

On Tue, Mar 27, 2018 at 04:48:10PM +0530, Varun wrote:
> revision-id: 167a96b5157049408a6ad4bca7abcd376af93fb5 (mariadb-10.3.0-644-g167a96b5157)
> parent(s): 4359c6b4806605c78987e50cab3a6b42016b7603
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2018-03-27 16:45:46 +0530
> message:
> 
> MDEV-9959: A serious MariaDB server performance bug
> 
> step#1: if a derived table has SELECT DISTINCT, provide index statistics for it so that the join optimizer in the
> upper select knows that ref access to the table will produce one row.
> 
> Added handling for multiple selects in the derived table
> 
> ---
>  mysql-test/r/cte_nonrecursive.result            |  8 +--
>  mysql-test/r/cte_recursive.result               |  6 +-
>  mysql-test/r/derived_cond_pushdown.result       | 82 ++++++++++++-------------
>  mysql-test/r/derived_view.result                |  2 +-
>  mysql-test/r/join_cache.result                  |  6 +-
>  mysql-test/r/mdev9959.result                    | 46 ++++++++++++++
>  mysql-test/r/subselect_extra.result             |  2 +-
>  mysql-test/r/subselect_extra_no_semijoin.result |  2 +-
>  mysql-test/t/mdev9959.test                      | 25 ++++++++
>  sql/sql_lex.h                                   |  2 +
>  sql/sql_union.cc                                | 60 ++++++++++++++++++
>  sql/sql_yacc.yy                                 |  3 +
>  sql/table.cc                                    | 19 ++++++
>  13 files changed, 209 insertions(+), 54 deletions(-)
> 
> diff --git a/mysql-test/r/derived_cond_pushdown.result b/mysql-test/r/derived_cond_pushdown.result
> index 32d3c88cc8d..3723e25a494 100644
> --- a/mysql-test/r/derived_cond_pushdown.result
> +++ b/mysql-test/r/derived_cond_pushdown.result
> @@ -5187,7 +5187,7 @@ explain select * from v2_union as v,t2 where
>  ((v.a=6) or (v.a=8)) and (v.c>200) and (v.a=t2.a);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	6	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where

As agreed on the call: need to check what is the cause of this. Here, the temp
table has distinct rows, that is, {a,b,c} are distinct. But ref access only
uses the first component, where does rows=1 come from? (if this is how
best_access_path computes an estimate for prefix when it only has the estimate
for the full key ... fine)


>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
>  3	UNION	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
>  4	UNION	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
> @@ -5213,7 +5213,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 6,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v.c > 200",
>        "materialized": {
> @@ -5358,7 +5358,7 @@ a	b	c	a	b	c	d
>  explain select * from v3_union as v,t2 where (v.a=t2.a) and (v.c>6);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	4	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  3	UNION	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -5382,7 +5382,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 4,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v.c > 6",
>        "materialized": {
> @@ -5476,7 +5476,7 @@ a	b	c	a	b	c	d
>  explain select * from v3_union as v,t2 where (v.a=t2.a) and ((t2.a>1) or (v.b<20));
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	4	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  3	UNION	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -5500,7 +5500,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 4,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "t2.a > 1 or v.b < 20",
>        "materialized": {
> @@ -5561,7 +5561,7 @@ explain select * from v3_union as v,t2 where
>  (v.a=t2.a) and ((v.b=19) or (v.b=21)) and ((v.c<3) or (v.c>600));
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	4	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  3	UNION	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -5586,7 +5586,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 4,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "(v.b = 19 or v.b = 21) and (v.c < 3 or v.c > 600)",
>        "materialized": {
> @@ -5645,7 +5645,7 @@ a	b	c	a	b	c	d
>  explain select * from v4_union as v,t2 where (v.a=t2.a) and (v.b<20);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	4	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
>  3	UNION	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -5669,7 +5669,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 4,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v.b < 20",
>        "materialized": {
> @@ -5752,7 +5752,7 @@ explain select * from v4_union as v,t2 where
>  (v.a=t2.a) and ((t2.a<3) or (v.b<40)) and (v.c>500);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	4	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
>  3	UNION	t1	ALL	NULL	NULL	NULL	NULL	20	Using where
>  NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -5777,7 +5777,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 4,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "(t2.a < 3 or v.b < 40) and v.c > 500",
>        "materialized": {
> @@ -7952,7 +7952,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a<5);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -7976,7 +7976,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "materialized": {
>          "query_block": {
> @@ -8226,7 +8226,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.c>500);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -8250,7 +8250,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c > 500",
>        "materialized": {
> @@ -8317,7 +8317,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a<5) and (v1.c>500);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -8341,7 +8341,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c > 500",
>        "materialized": {
> @@ -8411,7 +8411,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and ((v1.b>27) or (v1.b<19));
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -8435,7 +8435,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.b > 27 or v1.b < 19",
>        "materialized": {
> @@ -8508,7 +8508,7 @@ explain select * from v1,t2 where
>  (v1.a=t2.a) and ((v1.c<400) or (v1.c>800));
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -8533,7 +8533,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c < 400 or v1.c > 800",
>        "materialized": {
> @@ -8762,7 +8762,7 @@ where
>  ((d1.a>4) and (d1.c>500)));
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.b	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.b	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -8796,7 +8796,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["b"],
>        "ref": ["test.t2.b"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "t2.c = 988 and t2.b > 13 or d1.a > 4 and d1.c > 500",
>        "materialized": {
> @@ -8872,7 +8872,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>5) and (v1.c>200);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	3	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	UNION	<derived4>	ALL	NULL	NULL	NULL	NULL	18	Using where
>  4	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
> @@ -8899,7 +8899,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 3,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c > 200",
>        "materialized": {
> @@ -9138,7 +9138,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>5) and (v1.c>200);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	3	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	UNION	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  4	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
> @@ -9163,7 +9163,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 3,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c > 200",
>        "materialized": {
> @@ -9260,7 +9260,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>4) and (v1.c<200);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	3	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  4	UNION	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
> @@ -9285,7 +9285,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 3,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c < 200",
>        "materialized": {
> @@ -9498,7 +9498,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>4) and (v1.c<130);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	<derived4>	ALL	NULL	NULL	NULL	NULL	18	Using where
>  4	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
> @@ -9525,7 +9525,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c < 130",
>        "materialized": {
> @@ -9643,7 +9643,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>4) and (v1.c<130);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	3	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	<derived4>	ALL	NULL	NULL	NULL	NULL	18	Using where
>  4	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
> @@ -9671,7 +9671,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 3,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c < 130",
>        "materialized": {
> @@ -10143,7 +10143,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and (v1.a<2) and (v1.b<30) and (v1.c>450);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -10167,7 +10167,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.b < 30 and v1.c > 450",
>        "materialized": {
> @@ -10243,7 +10243,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.a=t2.a) and ((v1.a<2) or (v1.a<5)) and (v1.c>450);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	Using where
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  3	EXCEPT	t1	ALL	NULL	NULL	NULL	NULL	18	Using where; Using temporary; Using filesort
>  NULL	EXCEPT RESULT	<except2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -10267,7 +10267,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["a"],
>        "ref": ["test.t2.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.c > 450",
>        "materialized": {
> @@ -10636,7 +10636,7 @@ a	b	c	a	b	c
>  explain select * from v1,t2 where (v1.b=t2.b) and (v1.a<3);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.b	2	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.b	1	Using where
>  2	DERIVED	t3	range	i1	i1	5	NULL	1	Using index condition
>  3	UNION	t3	ALL	NULL	NULL	NULL	NULL	20	Using where; Using temporary; Using filesort
>  NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
> @@ -10660,7 +10660,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["b"],
>        "ref": ["test.t2.b"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "attached_condition": "v1.a < 3",
>        "materialized": {
> @@ -12333,7 +12333,7 @@ EXPLAIN
>        "key_length": "5",
>        "used_key_parts": ["b"],
>        "ref": ["test.t1.a"],
> -      "rows": 2,
> +      "rows": 1,
>        "filtered": 100,
>        "materialized": {
>          "query_block": {
> diff --git a/mysql-test/r/derived_view.result b/mysql-test/r/derived_view.result
> index 85e56ff176e..3a0fde7b053 100644
> --- a/mysql-test/r/derived_view.result
> +++ b/mysql-test/r/derived_view.result
> @@ -1525,7 +1525,7 @@ EXPLAIN
>  SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
> -1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	2	FirstMatch(t1)
> +1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	1	FirstMatch(t1)

Interesting.. here, we potentially could infer that this semi-join can be
converted to an inner join (as the subquery may only have one match).

But I think this is outside of the scope of this MDEV.

>  3	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	
>  4	UNION	t3	ALL	NULL	NULL	NULL	NULL	4	
>  NULL	UNION RESULT	<union3,4>	ALL	NULL	NULL	NULL	NULL	NULL	
> diff --git a/mysql-test/r/join_cache.result b/mysql-test/r/join_cache.result
> index eea397402ad..cc185b640bc 100644
> --- a/mysql-test/r/join_cache.result
> +++ b/mysql-test/r/join_cache.result
> @@ -5197,7 +5197,7 @@ SELECT * FROM (SELECT DISTINCT * FROM t1) t
>  WHERE t.a IN (SELECT t2.a FROM t2);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
>  1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	Using where; Start temporary
> -1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	End temporary
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	End temporary
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	3	Using temporary
>  SELECT * FROM (SELECT DISTINCT * FROM t1) t
>  WHERE t.a IN (SELECT t2.a FROM t2);
> @@ -5208,8 +5208,8 @@ EXPLAIN
>  SELECT * FROM (SELECT DISTINCT * FROM t1) t
>  WHERE t.a  IN (SELECT t2.a FROM t2);
>  id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
> -1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	3	
> -1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	Using where; Start temporary; End temporary; Using join buffer (flat, BNL join)
> +1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	Using where; Start temporary
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	End temporary
>  2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	3	Using temporary
>  SELECT * FROM (SELECT DISTINCT * FROM t1) t
>  WHERE t.a  IN (SELECT t2.a FROM t2);
> diff --git a/mysql-test/r/mdev9959.result b/mysql-test/r/mdev9959.result
> new file mode 100644
> index 00000000000..049e0350cca
> --- /dev/null
> +++ b/mysql-test/r/mdev9959.result
> @@ -0,0 +1,46 @@
> +create table t1(a int);
> +insert into t1 values (1),(2),(3),(4),(5),(6);
> +create table t2(a int, b int,c int);
> +insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
> +create table t3(a int, b int);
> +insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
> +table "<derived2>" should have type=ref and rows=1
> +one select in derived table
> +with distinct
> +analyze select * from t1 , ((select distinct t2.a from t2 order by c))q  where t1.a=q.a;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	1.00	100.00	100.00	
> +2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using temporary; Using filesort
> +# multiple selects in derived table
> +# NO UNION ALL
> +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union  (select t2.a,t2.b from t2 order by c))q  where t1.a=q.a;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	1.00	100.00	100.00	
> +2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
> +3	UNION	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
> +NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	6.00	NULL	NULL	
> +select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
> +a	a
> +1	1
> +2	2
> +3	3
> +4	4
> +5	5
> +6	6
> +# UNION ALL and EXCEPT
> +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union all (select  t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q  where t1.a=q.a;
> +id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
> +1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
> +1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	0.50	100.00	100.00	
> +2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
> +3	UNION	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
> +4	EXCEPT	t3	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
> +NULL	UNIT RESULT	<unit2,3,4>	ALL	NULL	NULL	NULL	NULL	NULL	3.00	NULL	NULL	
> +select * from t1 , ( (select  t2.a,t2.b from t2 order by c) union all (select t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q  where t1.a=q.a;
> +a	a	b
> +3	3	1
> +4	4	2
> +6	6	2
> +drop table t1,t2,t3;
> diff --git a/mysql-test/t/mdev9959.test b/mysql-test/t/mdev9959.test
> new file mode 100644
> index 00000000000..36f364261c9
> --- /dev/null
> +++ b/mysql-test/t/mdev9959.test
> @@ -0,0 +1,25 @@
> +create table t1(a int);
> +insert into t1 values (1),(2),(3),(4),(5),(6);
> +create table t2(a int, b int,c int);
> +insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
> +create table t3(a int, b int);
> +insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
> +
> +--echo table "<derived2>" should have type=ref and rows=1
> +--echo one select in derived table
> +
> +--echo with distinct
> +analyze select * from t1 , ((select distinct t2.a from t2 order by c))q  where t1.a=q.a; 
> +
> +--echo # multiple selects in derived table
> +--echo # NO UNION ALL
> +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union  (select t2.a,t2.b from t2 order by c))q  where t1.a=q.a;
> +select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
> +
> +--echo # UNION ALL and EXCEPT
> +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union all (select  t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q  where t1.a=q.a;
> +
> +select * from t1 , ( (select  t2.a,t2.b from t2 order by c) union all (select t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q  where t1.a=q.a;
> +
> +drop table t1,t2,t3;
> +
> diff --git a/sql/sql_lex.h b/sql/sql_lex.h
> index f0241a32acf..16d5e55d251 100644
> --- a/sql/sql_lex.h
> +++ b/sql/sql_lex.h
> @@ -761,6 +761,7 @@ class st_select_lex_unit: public st_select_lex_node {
>    Procedure *last_procedure;	 /* Pointer to procedure, if such exists */
>  
>    bool columns_are_renamed;
> +  bool union_all;           /* TRUE if there is a UNION ALL operation */

This is not initialized properly. The testcase:

create table t10 (a int , b int);
insert into t10 values (1,1),(1,1);
create table t11 (a int , b int);
create table t12 (a int , b int);
insert into t11 values (2,2),(2,2);
create table t13 (a int , b int);
insert into t12 values (3,3),(3,3);
create table ten(a int);
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

explain select * from ten A, (select * from t10 union select * from t11 union select * from t12) T where T.a=A.a;
 +------+--------------+--------------+------+---------------+------+---------+----------+------+-------------+
| id   | select_type  | table        | type | possible_keys | key  | key_len | ref      | rows | Extra       |
+------+--------------+--------------+------+---------------+------+---------+----------+------+-------------+
|    1 | PRIMARY      | A            | ALL  | NULL          | NULL | NULL    | NULL     |   10 | Using where |
|    1 | PRIMARY      | <derived2>   | ref  | key0          | key0 | 5       | test.A.a |    1 |             |
|    2 | DERIVED      | t10          | ALL  | NULL          | NULL | NULL    | NULL     |    2 |             |
|    3 | UNION        | t11          | ALL  | NULL          | NULL | NULL    | NULL     |    2 |             |
|    4 | UNION        | t12          | ALL  | NULL          | NULL | NULL    | NULL     |    2 |             |
| NULL | UNION RESULT | <union2,3,4> | ALL  | NULL          | NULL | NULL    | NULL     | NULL |             |
+------+--------------+--------------+------+---------------+------+---------+----------+------+-------------+

(gdb) wher
  #0  st_select_lex_unit::check_distinct_in_union (this=0x7fff700149e0) at /home/psergey/dev-git/10.3-cp/sql/sql_union.cc:2100
  #1  0x0000555555d2a693 in TABLE::add_tmp_key (this=0x7fff70062908, key=0, key_parts=1, next_field_no=0x555555c6bf47 <get_next_field_for_derived_key(uchar*)>, arg=0x7fffd98e9230 "X\207\006p\377\177", unique=false) at /home/psergey/dev-git/10.3-cp/sql/table.cc:7270
  #2  0x0000555555c6c214 in generate_derived_keys_for_table (keyuse=0x7fff700687b0, count=1, keys=1) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:11091
  #3  0x0000555555c6c42a in generate_derived_keys (keyuse_array=0x7fff70018968) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:11166
  #4  0x0000555555c60267 in sort_and_filter_keyuse (thd=0x7fff70000d60, keyuse=0x7fff70018968, skip_unprefixed_keyparts=true) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:6403
  #5  0x0000555555c5a910 in make_join_statistics (join=0x7fff70018678, tables_list=..., keyuse_array=0x7fff70018968) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:4622
  #6  0x0000555555c513d4 in JOIN::optimize_inner (this=0x7fff70018678) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:1888
  #7  0x0000555555c4fa73 in JOIN::optimize (this=0x7fff70018678) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:1451
  #8  0x0000555555c59638 in mysql_select (thd=0x7fff70000d60, tables=0x7fff70013f68, wild_num=1, fields=..., conds=0x7fff70017d98, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748612, result=0x7fff70017ff0, unit=0x7fff70004c28, select_lex=0x7fff70005398) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:4225
  #9  0x0000555555c91d6f in mysql_explain_union (thd=0x7fff70000d60, unit=0x7fff70004c28, result=0x7fff70017ff0) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:25866
  #10 0x0000555555c15da6 in execute_sqlcom_select (thd=0x7fff70000d60, all_tables=0x7fff70013f68) at /home/psergey/dev-git/10.3-cp/sql/sql_parse.cc:6487
  #11 0x0000555555c0c7a5 in mysql_execute_command (thd=0x7fff70000d60) at /home/psergey/dev-git/10.3-cp/sql/sql_parse.cc:3821
  #12 0x0000555555c1a0fd in mysql_parse (thd=0x7fff70000d60, rawbuf=0x7fff70013ca8 "explain select * from ten A, (select * from t10 union select * from t11 union select * from t12) T where T.a=A.a", length=112, parser_state=0x7fffd98ea5d0, is_com_multi=false, is_next_command=false) at /home/psergey/dev-git/10.3-cp/sql/sql_parse.cc:8091

(gdb) p union_all
  $36 = 165


>  
>    void init_query();
>    st_select_lex* outer_select();
> @@ -800,6 +801,7 @@ class st_select_lex_unit: public st_select_lex_node {
>    bool union_needs_tmp_table();
>  
>    void set_unique_exclude();
> +  bool check_distinct_in_union();
can this be made 'const'? if yes, please make it
>  
>    friend struct LEX;
>    friend int subselect_union_engine::exec();
> diff --git a/sql/sql_union.cc b/sql/sql_union.cc
> index 857c9a117f5..6dac0cb0458 100644
> --- a/sql/sql_union.cc
> +++ b/sql/sql_union.cc
> @@ -1960,3 +1960,63 @@ void st_select_lex_unit::set_unique_exclude()
>      }
>    }
>  }
> +
> +/*
> +  Check if the selects in the derived table can give distinct rows irrespective
> +  of the data given for the tables.
> +
> +  for example:
> +  select * from
> +  ((select t1.a from t1) op (select t2.a from t2) op (select t3.a from t3));
> +  the op here being UNION/INTERSECT/EXCEPT
> +
> +  so this function would check if the derived table like the case above
> +  can give distinct rows or not irrespective of the data in the tables.

I think the above is difficult to read. Readability is a matter of opinion, but
I would suggest another variant:

/*
  @brief
  Check if the derived table is guaranteed to have distinct rows because of
  UNION operations used to populate it.

  @detail
    UNION operation removes duplicate rows from its output. That is, a query like

      select * from t1 UNION select * from t2
    
    will not produce duplicate rows in its output, even if table t1 (and/or t2)
    contain duplicate rows. EXCEPT and INTERSECT operations also have this
    property.

    On the other hand, UNION ALL operation doesn't remove duplicates. (The SQL
    standard also defines EXCEPT ALL and INTERSECT ALL, but we don't support
    them).

    st_select_lex_unit computes its value left to right. That is, if there is 
     a st_select_lex_unit object describing

      (select #1) OP1 (select #2) OP2 (select #3)
   
    then ((select #1) OP1 (select #2)) is computed first, and OP2 is computed
    second.

    How can one tell if st_select_lex_unit is guaranteed to have distinct
    output rows? This depends on whether the last operation was duplicate-
    removing or not:
    - UNION ALL is not duplicate-removing
    - all other operations are duplicate-removing
*/

Is the above correct?  

> +
> +  So what the function is handling:
> +    - If there is no UNION ALL, we are guarenteed distinct rows.
> +
> +    Example
> +    ((select t1.a from t1) UNION (select t2.a from t2);
> +
> +    - If there is UNION ALL, we can still guarantee distinct rows if
> +      the last operation of the selects does not involve UNION ALL or
> +      INTERSECT is not there
.

> +
> +      Example:
> +      ((select t1.a from t1) UNION ALL (select t2.a from t2) UNION/EXCEPT
> +      (select t3.a from t3)): guarantees distinct rows, UNION and EXCEPT
> +      have the same precedence.
> +
> +      Example:
> +      ((select t1.a from t1) UNION ALL (select t2.a from t2) INTERSECT
> +      (select t3.a from t3)): does not guarantee distinct rows because
> +      INTERSECT has higher precedence than UNION so we would evaluate
> +      the INTERSECT part first and then do UNION ALL, so there we can
> +      end up with duplicates, so distinct rows are not guaranteed.
> +
> +  @retval false Distinct rows are not guaranteed
> +  @retval true  Distinct rows are guanranteed irrespective of the data
> +                in the tables
> +
> +*/
> +
> +bool st_select_lex_unit::check_distinct_in_union()
> +{
> +  bool is_intersect_present=FALSE;
> +  st_select_lex* first= first_select();
> +  for(st_select_lex *sl=first; sl ; sl=sl->next_select())
> +    is_intersect_present|= (sl->linkage == INTERSECT_TYPE);
> +
> +  if (!union_all)
> +    return true;
> +  else
> +  {
> +    if (union_distinct)
> +    {
> +      if (!is_intersect_present && !union_distinct->next_select())
> +        return true;
> +    }
I'm unable to undestand what kind of check we are making here.
According to my variant of the comment above, we should just check 
whether the last operation (operation made with the output of the last select)
was duplicate-removing or not.

We seem to be doing something more complex?

> +  }
> +  return false;
> +}
> diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
> index 35ec2d29d21..112873e8362 100644
> --- a/sql/sql_yacc.yy
> +++ b/sql/sql_yacc.yy
> @@ -571,7 +571,10 @@ bool LEX::add_select_to_union_list(bool is_union_distinct,
>        current_select;
>    }
>    else
> +  {
>      DBUG_ASSERT(type == UNION_TYPE);
> +    current_select->master_unit()->union_all= true;
> +  }
>    return FALSE;
>  }
>  
> diff --git a/sql/table.cc b/sql/table.cc
> index 4f90d429ce5..d2938a29ea6 100644
> --- a/sql/table.cc
> +++ b/sql/table.cc
> @@ -7113,6 +7113,25 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
>    if (!keyinfo->rec_per_key)
>      return TRUE;
>    bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
> +  /*
> +    For the case when there is a derived table that would give distinct rows,
> +    the index statistics are passed to the join optimizer to tell that
> +    a ref access to the derived table will produce only one row.
> +  */
> +
> +  st_select_lex_unit* derived= pos_in_table_list ? pos_in_table_list->derived: NULL;
> +  if (derived)
> +  {
> +    /*
> +      This handles the case when we have a single select in the derived table
> +    */

Does it anymore?  Now it is handling the UNION case, too.

> +    st_select_lex* first= derived->first_select();
> +    if ((first && !first->is_part_of_union() && 
> +        first->options & SELECT_DISTINCT) ||
please put brackets around the condition on the line above as some too-smart
compilers now complain about a possible typo.

> +        derived->check_distinct_in_union())
> +        keyinfo->rec_per_key[key_parts-1]=1;
> +  }
> +
>    keyinfo->read_stats= NULL;
>    keyinfo->collected_stats= NULL;
>  

BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog