← Back to team overview

maria-developers team mailing list archive

27b046d2e95: MDEV-14203: rpl.rpl_extra_col_master_myisam, rpl.rpl_slave_load_tmpdir_not_exist failed in buildbot with a warning

 

revision-id: 27b046d2e95add5ba87e231279d2a7774db5b0c7 (mariadb-10.2.31-317-g27b046d2e95)
parent(s): a1e52e7f32cee7c204078db4c4beb88250d7e1f6
author: Sujatha
committer: Sujatha
timestamp: 2020-07-21 11:16:40 +0530
message:

MDEV-14203: rpl.rpl_extra_col_master_myisam, rpl.rpl_slave_load_tmpdir_not_exist failed in buildbot with a warning

Problem:
=======
rpl.rpl_slave_load_tmpdir_not_exist 'stmt' w3 [ fail ]  Found warnings/errors
in server log file!

Test ended at 2017-09-27 20:34:55
[Warning] Master is configured to log replication events with checksum, but
will not send such events to slaves that cannot process them
^ Found warnings in /mnt/buildbot/build/mariadb-10.2.10/mysql-test/var/3/log/mysqld.1.err
ok
Analysis:
========
When slave tries to connect to master 'get_master_version_and_clock' function
is invoked to perform elaborated slave-master handshake. During this process
slave server queries master server, to know if it is checksum aware and at the
same time master is notified about its CRC-awareness. The master's side
instant value of @@global.binlog_checksum is stored in the dump thread's
uservar area as well as cached locally to become known in consensus by master
and slave.

Post hand-shake slave requests master for binlog dump. It sends
'COM_BINLOG_DUMP'. This command is sent to master by 'cli_advanced_command'
call. If there is some temporary network failure during this request_dump
call, 'end_server' is invoked to close the current connection between master
and slave. Upon connection close the dump thread on the master gets terminated
and it clears the 'uservar' data it got through master-slave handshake.

The 'COM_BINLOG_DUMP' command is sent once again without master-slave
handshake. Since the checksum data is not available with new dump thread a
warning gets reported.

Fix:
===
Upon network write error donot attempt reconnect, proceed to master-slave
handshake. This ensures that master is aware of slave's capability to use
checksums.

---
 .../rpl/r/rpl_dump_request_retry_warning.result    | 22 ++++++++
 .../rpl/t/rpl_dump_request_retry_warning.test      | 60 ++++++++++++++++++++++
 sql-common/client.c                                |  2 +
 sql/net_serv.cc                                    | 13 +++++
 sql/slave.cc                                       |  3 +-
 5 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/mysql-test/suite/rpl/r/rpl_dump_request_retry_warning.result b/mysql-test/suite/rpl/r/rpl_dump_request_retry_warning.result
new file mode 100644
index 00000000000..3a80d5b5f31
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_dump_request_retry_warning.result
@@ -0,0 +1,22 @@
+include/master-slave.inc
+[connection master]
+CREATE TABLE t1(a int);
+INSERT INTO t1 VALUES(1);
+connection slave;
+SET @saved_dbug = @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug= 'd,simulate_error_on_packet_write';
+START SLAVE;
+SET DEBUG_SYNC= 'now WAIT_FOR parked';
+SET @@GLOBAL.debug_dbug = @saved_dbug;
+SET DEBUG_SYNC= 'now SIGNAL continue';
+SET DEBUG_SYNC= 'RESET';
+include/wait_for_slave_io_to_start.inc
+include/wait_for_slave_sql_to_start.inc
+connection master;
+include/sync_slave_sql_with_master.inc
+SELECT * FROM t1;
+a
+1
+connection master;
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test b/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test
new file mode 100644
index 00000000000..d9fb8258765
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test
@@ -0,0 +1,60 @@
+# ==== Purpose ====
+#
+# Test verifies that, due to a temporary network error, if request dump
+# command specific packet write operation fails then the write error gets
+# handled appropriately. Further retry will be initiated with appropriate
+# slave registration on master. This will ensure that master has all the
+# details of slave and no warnings are reported on the master side.
+#
+# ==== Implementation ====
+#
+# Steps:
+#    0 - Skip the slave start.
+#    1 - Enable debug simulation which will simulate packet write error during
+#        dump request command execution.
+#    2 - Start the slave. Observe that slave is able to reconnect post
+#        temporary network write error.
+#
+# ==== References ====
+#
+# MDEV-14203: rpl.rpl_extra_col_master_myisam,
+#             rpl.rpl_slave_load_tmpdir_not_exist failed in buildbot with a
+#             warning
+#
+# MDEV-13258: rpl.rpl_skip_replication, rpl.rpl_set_statement_default_master
+#             failed in buildbot
+# 
+
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--let $rpl_skip_start_slave=1
+--source include/master-slave.inc
+
+# Do an insert on master
+CREATE TABLE t1(a int);
+INSERT INTO t1 VALUES(1);
+
+# Add a debug point and start the slave so that dump request fails.
+connection slave;
+SET @saved_dbug = @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug= 'd,simulate_error_on_packet_write';
+
+START SLAVE;
+SET DEBUG_SYNC= 'now WAIT_FOR parked';
+SET @@GLOBAL.debug_dbug = @saved_dbug;
+SET DEBUG_SYNC= 'now SIGNAL continue';
+SET DEBUG_SYNC= 'RESET';
+
+--source include/wait_for_slave_io_to_start.inc
+--source include/wait_for_slave_sql_to_start.inc
+
+# Sync the slave and verify that slave has caught up with the master.
+connection master;
+--source include/sync_slave_sql_with_master.inc
+SELECT * FROM t1;
+
+# Cleanup
+connection master;
+DROP TABLE t1;
+
+--source include/rpl_end.inc
diff --git a/sql-common/client.c b/sql-common/client.c
index 227759f4163..b66eee2a508 100644
--- a/sql-common/client.c
+++ b/sql-common/client.c
@@ -721,6 +721,8 @@ cli_advanced_command(MYSQL *mysql, enum enum_server_command command,
       set_mysql_error(mysql, CR_NET_PACKET_TOO_LARGE, unknown_sqlstate);
       goto end;
     }
+    if (net->last_errno == ER_NET_ERROR_ON_WRITE && command == COM_BINLOG_DUMP)
+      goto end;
     end_server(mysql);
     if (mysql_reconnect(mysql) || stmt_skip)
       goto end;
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index f39eaaadf46..e625b307343 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -45,6 +45,7 @@
 #include <violite.h>
 #include <signal.h>
 #include "probes_mysql.h"
+#include <debug_sync.h>
 
 #ifdef EMBEDDED_LIBRARY
 #undef MYSQL_SERVER
@@ -486,6 +487,18 @@ net_write_command(NET *net,uchar command,
   DBUG_ENTER("net_write_command");
   DBUG_PRINT("enter",("length: %lu", (ulong) len));
 
+  DBUG_EXECUTE_IF("simulate_error_on_packet_write",
+                  {
+                    if (command == COM_BINLOG_DUMP)
+                    {
+                      net->last_errno = ER_NET_ERROR_ON_WRITE;
+                      socket_errno = 2;
+                      DBUG_ASSERT(!debug_sync_set_action(
+                      (THD *)net->thd,
+                      STRING_WITH_LEN("now SIGNAL parked WAIT_FOR continue")));
+                      DBUG_RETURN(true);
+                    }
+                  };);
   MYSQL_NET_WRITE_START(length);
 
   buff[4]=command;				/* For first packet */
diff --git a/sql/slave.cc b/sql/slave.cc
index 1bf83aa9652..87eacfcfd0a 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3297,7 +3297,8 @@ static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
       in the future, we should do a better error analysis, but for
       now we just fill up the error log :-)
     */
-    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
+    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED ||
+        mysql_errno(mysql) == ER_NET_ERROR_ON_WRITE)
       *suppress_warnings= TRUE;                 // Suppress reconnect warning
     else
       sql_print_error("Error on COM_BINLOG_DUMP: %d  %s, will retry in %d secs",