← Back to team overview

maria-developers team mailing list archive

Fwd: 41f885f64fe: MDEV-10859: Wrong result of aggregate window function in query with HAVING and no ORDER BY

 

Hi Igor, Sergey!

I need a review on this patch that fixes a problem with Window Functions
being computed over too many rows. The details are in the commit message.

Thank you!
Vicențiu

---------- Forwarded message ---------
From: vicentiu <vicentiu@xxxxxxxxxxx>
Date: Tue, 14 Feb 2017 at 14:08
Subject: 41f885f64fe: MDEV-10859: Wrong result of aggregate window function
in query with HAVING and no ORDER BY
To: <commits@xxxxxxxxxxx>


revision-id: 41f885f64fe4c02e03ec655f225b1dc01e9d5376
(mariadb-10.2.3-252-g41f885f64fe)
parent(s): d731ce21a7bc25a49958789d24b3f0eec5845aea
author: Vicențiu Ciorbaru
committer: Vicențiu Ciorbaru
timestamp: 2017-02-14 14:02:29 +0200
message:

MDEV-10859: Wrong result of aggregate window function in query with HAVING
and no ORDER BY

Window functions need to be computed after applying the HAVING clause.
An optimization that we have for regular, non-window function, cases is
to apply having only during sending of the rows to the client. This
allows rows that should be filtered from the temporary table used to
store aggregation results to be stored there.

This behaviour is undesireable for window functions, as we have to
compute window functions on the result-set after HAVING is applied.
Storing extra rows in the table leads to wrong values as the frame
bounds might capture those -to be filtered afterwards- rows.

---
 mysql-test/r/win.result | 33 +++++++++++++++++++++++++++++++++
 mysql-test/t/win.test   | 23 +++++++++++++++++++++++
 sql/sql_select.cc       | 11 ++++++++---
 sql/sql_window.cc       |  6 ++++++
 4 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result
index fd3aea80083..b3adf358be6 100644
--- a/mysql-test/r/win.result
+++ b/mysql-test/r/win.result
@@ -2925,3 +2925,36 @@ WHERE EXISTS ( SELECT * FROM t2 WHERE c IN ( SELECT
MAX(d) FROM t3 ) );
 a      MAX(a)  AVG(a) OVER (PARTITION BY b)
 NULL   NULL    NULL
 DROP TABLE t1,t2,t3;
+#
+# MDEV-MDEV-10859: Wrong result of aggregate window function in query
+#                  with HAVING and no ORDER BY
+#
+create table empsalary (depname varchar(32), empno smallint primary key,
salary int);
+insert into empsalary values
+('develop', 1, 5000), ('develop', 2, 4000),('sales', 3, '6000'),('sales',
4, 5000);
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary;
+depname        empno   salary  avg(salary) OVER (PARTITION BY depname)
+develop        1       5000    4500.0000
+develop        2       4000    4500.0000
+sales  3       6000    5500.0000
+sales  4       5000    5500.0000
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary ORDER BY depname;
+depname        empno   salary  avg(salary) OVER (PARTITION BY depname)
+develop        1       5000    4500.0000
+develop        2       4000    4500.0000
+sales  3       6000    5500.0000
+sales  4       5000    5500.0000
+#
+# These last 2 should have the same row results, ignoring order.
+#
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary HAVING empno > 1;
+depname        empno   salary  avg(salary) OVER (PARTITION BY depname)
+develop        2       4000    4000.0000
+sales  3       6000    5500.0000
+sales  4       5000    5500.0000
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary HAVING empno > 1 ORDER BY depname;
+depname        empno   salary  avg(salary) OVER (PARTITION BY depname)
+develop        2       4000    4000.0000
+sales  3       6000    5500.0000
+sales  4       5000    5500.0000
+drop table empsalary;
diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test
index aa636f7a294..90bbf41688f 100644
--- a/mysql-test/t/win.test
+++ b/mysql-test/t/win.test
@@ -1711,3 +1711,26 @@ SELECT a, MAX(a), AVG(a) OVER (PARTITION BY b) FROM
t1
 WHERE EXISTS ( SELECT * FROM t2 WHERE c IN ( SELECT MAX(d) FROM t3 ) );

 DROP TABLE t1,t2,t3;
+
+--echo #
+--echo # MDEV-MDEV-10859: Wrong result of aggregate window function in
query
+--echo #                  with HAVING and no ORDER BY
+--echo #
+
+create table empsalary (depname varchar(32), empno smallint primary key,
salary int);
+insert into empsalary values
+  ('develop', 1, 5000), ('develop', 2, 4000),('sales', 3,
'6000'),('sales', 4, 5000);
+
+--sorted_result
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary;
+--sorted_result
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary ORDER BY depname;
+--echo #
+--echo # These last 2 should have the same row results, ignoring order.
+--echo #
+--sorted_result
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary HAVING empno > 1;
+--sorted_result
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname)
FROM empsalary HAVING empno > 1 ORDER BY depname;
+
+drop table empsalary;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 5c7ae1e88c1..473270f8a69 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -2411,13 +2411,18 @@ bool JOIN::make_aggr_tables_info()

       If having is not handled here, it will be checked before the row
       is sent to the client.
+
+      In the case of window functions however, we *must* make sure to not
+      store any rows which don't match HAVING within the temp table,
+      as rows will end up being used during their computation.
     */
     if (having &&
-        (sort_and_group || (exec_tmp_table->distinct && !group_list)))
+        (sort_and_group || (exec_tmp_table->distinct && !group_list) ||
+         select_lex->have_window_funcs()))
     {
-      // Attach HAVING to tmp table's condition
+      /* Attach HAVING to tmp table's condition */
       curr_tab->having= having;
-      having= NULL; // Already done
+      having= NULL; /* Already done */
     }

    /* Change sum_fields reference to calculated fields in tmp_table */
diff --git a/sql/sql_window.cc b/sql/sql_window.cc
index 37095ad78ca..2bdac89f293 100644
--- a/sql/sql_window.cc
+++ b/sql/sql_window.cc
@@ -2840,6 +2840,12 @@ bool Window_funcs_computation::setup(THD *thd,
   order_window_funcs_by_window_specs(window_funcs);

   SQL_SELECT *sel= NULL;
+  /*
+     If the tmp table is filtered during sorting
+     (ex: SELECT with HAVING && ORDER BY), we must make sure to keep the
+     filtering conditions when we perform sorting for window function
+     computation.
+  */
   if (tab->filesort && tab->filesort->select)
   {
     sel= tab->filesort->select;