← Back to team overview

maria-developers team mailing list archive

0038c24f2d3: MDEV-16146: MariaDB slave stops with following errors.

 

revision-id: 0038c24f2d388b9b0f3bff9219a53c8900153d02 (mariadb-10.5.4-539-g0038c24f2d3)
parent(s): 190a8312f598fc4892331225104297f6288f23ac
author: Sujatha
committer: Sujatha
timestamp: 2021-03-18 18:41:30 +0530
message:

MDEV-16146: MariaDB slave stops with following errors.

Problem:
========
180511 11:07:58 [ERROR] Slave I/O: Unexpected master's heartbeat data:
heartbeat is not compatible with local info;the event's data: log_file_name
mysql-bin.000009 log_pos 1054262041, Error_code: 1623

Analysis:
=========
In replication setup when master server doesn't have any events to send to
slave server it sends an 'Heartbeat_log_event'. This event carries the
current binary log filename and offset details. The offset values is stored
within 4 bytes of event header. When the size of binary log is higher
than UINT32_MAX the log_pos values will not fit in 4 bytes memory.  It
overflows and hence slave stops with an error.

Fix:
===
Since we cannot extend the fixed header of Log_event class an additional
header named 'extra_header' is introduced. 'extra_header' contains

HB_FLAGS               - 2 bytes
HB_LONG_LOG_POS_OFFSET - 8 bytes

This 'extra_header' is added only in a case where log_pos > UINT32_MAX and
'HB_LONG_LOG_POS_OFFSET_F' will be enabled. On slave while reading the
'Heartbeat_log_event' if 'HB_LONG_LOG_POS_OFFSET_F' is found to be set then
value of 'log_pos' is extracted from 'extra_header'.

---
 .../suite/rpl/r/rpl_incompatible_heartbeat.result  | 20 +++++++++
 .../suite/rpl/t/rpl_incompatible_heartbeat.test    | 47 ++++++++++++++++++++++
 sql/log_event.h                                    | 19 ++++++++-
 sql/log_event_server.cc                            |  8 +++-
 sql/slave.cc                                       | 14 +++++++
 sql/sql_repl.cc                                    | 26 ++++++++++--
 6 files changed, 128 insertions(+), 6 deletions(-)

diff --git a/mysql-test/suite/rpl/r/rpl_incompatible_heartbeat.result b/mysql-test/suite/rpl/r/rpl_incompatible_heartbeat.result
new file mode 100644
index 00000000000..fe2b0436f2d
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_incompatible_heartbeat.result
@@ -0,0 +1,20 @@
+include/master-slave.inc
+[connection master]
+connection master;
+SET @saved_dbug = @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug= '+d,simulate_pos_4G';
+connection slave;
+include/stop_slave.inc
+SET @saved_dbug = @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug= '+d,simulate_pos_4G';
+CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=0.001;
+include/start_slave.inc
+connection master;
+SET @@GLOBAL.debug_dbug = @saved_dbug;
+connection slave;
+SET @@GLOBAL.debug_dbug = @saved_dbug;
+connection master;
+CREATE TABLE t (f INT) ENGINE=INNODB;
+INSERT INTO t VALUES (10);
+DROP TABLE t;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_incompatible_heartbeat.test b/mysql-test/suite/rpl/t/rpl_incompatible_heartbeat.test
new file mode 100644
index 00000000000..b1aa5435ff4
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_incompatible_heartbeat.test
@@ -0,0 +1,47 @@
+# ==== Purpose ====
+#
+# Test verifies that slave IO thread can process heartbeat events with log_pos
+# values higher than UINT32_MAX.
+#
+# ==== Implementation ====
+#
+# Steps:
+#    0 - Stop slave threads. Configure a small master_heartbeat_period.
+#    1 - Using debug points, simulate a huge binlog offset higher than
+#        UINT32_MAX on master.
+#    2 - Start the slave and observe that slave IO thread is able to process
+#        the offset received through heartbeat event.
+#
+# ==== References ====
+#
+# MDEV-16146: MariaDB slave stops with incompatible heartbeat
+#
+--source include/have_debug.inc
+--source include/have_innodb.inc
+--source include/have_binlog_format_mixed.inc
+# Test simulates binarylog offsets higher than UINT32_MAX
+--source include/have_64bit.inc
+--source include/master-slave.inc
+
+--connection master
+SET @saved_dbug = @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug= '+d,simulate_pos_4G';
+
+--connection slave
+--source include/stop_slave.inc
+SET @saved_dbug = @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug= '+d,simulate_pos_4G';
+CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=0.001;
+--source include/start_slave.inc
+
+--connection master
+sleep 1;
+SET @@GLOBAL.debug_dbug = @saved_dbug;
+--sync_slave_with_master
+SET @@GLOBAL.debug_dbug = @saved_dbug;
+
+--connection master
+CREATE TABLE t (f INT) ENGINE=INNODB;
+INSERT INTO t VALUES (10);
+DROP TABLE t;
+--source include/rpl_end.inc
diff --git a/sql/log_event.h b/sql/log_event.h
index 4e193232f4b..72d8766cea5 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -576,6 +576,22 @@ class String;
 #define MARIA_SLAVE_CAPABILITY_MINE MARIA_SLAVE_CAPABILITY_GTID
 
 
+#define HB_EXTRA_HEADER_LEN 10
+/*
+  Extra header contains flags specific to Heartbeat_log_event and
+  respective data. The length needs to be updated when new memebers are
+  added to this header
+*/
+#define HB_FLAGS_OFFSET 0
+/*
+  When the size of 'log_pos' within Heartbeat_log_event exceeds UINT_MAX32 it
+  cannot be stored in standard header as 'log_pos' is of 4 bytes size. Hence
+  extra_header is introduced. First 2 bytes represent flags. In case of long
+  'log_pos' value 'HB_LONG_LOG_POS_OFFSET_F' bit within the flag will be set.
+  The log_pos is stored witin 'long_log_pos' variables.
+*/
+#define HB_LONG_LOG_POS_OFFSET_F       0x1
+#define HB_LONG_LOG_POS_OFFSET 2
 /**
   @enum Log_event_type
 
@@ -5718,7 +5734,8 @@ bool copy_cache_to_file_wrapped(IO_CACHE *body,
 class Heartbeat_log_event: public Log_event
 {
 public:
-  Heartbeat_log_event(const char* buf, uint event_len,
+  uint16 hb_flags;
+  Heartbeat_log_event(const char* buf, ulong event_len,
                       const Format_description_log_event* description_event);
   Log_event_type get_type_code() { return HEARTBEAT_LOG_EVENT; }
   bool is_valid() const
diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc
index 6dee9b9adf6..2701f4f2f9a 100644
--- a/sql/log_event_server.cc
+++ b/sql/log_event_server.cc
@@ -8493,14 +8493,18 @@ void Ignorable_log_event::pack_info(Protocol *protocol)
 
 
 #if defined(HAVE_REPLICATION)
-Heartbeat_log_event::Heartbeat_log_event(const char* buf, uint event_len,
+Heartbeat_log_event::Heartbeat_log_event(const char* buf, ulong event_len,
                     const Format_description_log_event* description_event)
   :Log_event(buf, description_event)
 {
+  uint16 hb_flags;
   uint8 header_size= description_event->common_header_len;
-  ident_len = event_len - header_size;
+  ident_len = event_len - (header_size + HB_EXTRA_HEADER_LEN);
   set_if_smaller(ident_len,FN_REFLEN-1);
   log_ident= buf + header_size;
+  hb_flags= uint2korr(buf + header_size + ident_len);
+  if (hb_flags & HB_LONG_LOG_POS_OFFSET_F)
+    log_pos= uint8korr(buf + header_size + ident_len + HB_LONG_LOG_POS_OFFSET);
 }
 #endif
 
diff --git a/sql/slave.cc b/sql/slave.cc
index 1da030084ef..f365a1e7b5c 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6493,6 +6493,12 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
 
        TODO: handling `when' for SHOW SLAVE STATUS' snds behind
     */
+    DBUG_EXECUTE_IF("simulate_pos_4G",
+    {
+       inc_pos= mi->master_log_pos; // temp_save
+       mi->master_log_pos= ((ulong)2394967295);
+    };);
+
     if (memcmp(mi->master_log_name, hb.get_log_ident(), hb.get_ident_len()) ||
         mi->master_log_pos > hb.log_pos) {
       /* missed events of heartbeat from the past */
@@ -6504,6 +6510,14 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
       error_msg.append_ulonglong(hb.log_pos);
       goto err;
     }
+    DBUG_EXECUTE_IF("simulate_pos_4G",
+    {
+      if (hb.log_pos > UINT32_MAX)
+      {
+        mi->master_log_pos= inc_pos; // restore
+        DBUG_SET("-d, simulate_pos_4G");
+      }
+    };);
 
     /*
       Heartbeat events doesn't count in the binlog size, so we don't have to
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 010ea794254..d68ae05b60e 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -824,12 +824,14 @@ get_slave_until_gtid(THD *thd, String *out_str)
 */
 static int send_heartbeat_event(binlog_send_info *info,
                                 NET* net, String* packet,
-                                const struct event_coordinates *coord,
+                                struct event_coordinates *coord,
                                 enum enum_binlog_checksum_alg checksum_alg_arg)
 {
   DBUG_ENTER("send_heartbeat_event");
 
   ulong ev_offset;
+  char extra_buf[HB_EXTRA_HEADER_LEN];
+  uint16 hb_flags= 0;
   if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
     DBUG_RETURN(1);
 
@@ -850,19 +852,30 @@ static int send_heartbeat_event(binlog_send_info *info,
   size_t event_len = ident_len + LOG_EVENT_HEADER_LEN +
     (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
   int4store(header + SERVER_ID_OFFSET, global_system_variables.server_id);
+  if (coord->pos <= UINT32_MAX)
+  {
+    int4store(header + LOG_POS_OFFSET, coord->pos);  // log_pos
+  }
+  else
+  {
+    hb_flags|= HB_LONG_LOG_POS_OFFSET_F;
+    int2store(extra_buf + HB_FLAGS_OFFSET, hb_flags);
+    int8store(extra_buf + HB_LONG_LOG_POS_OFFSET, coord->pos);
+    event_len+= HB_EXTRA_HEADER_LEN;
+  }
   int4store(header + EVENT_LEN_OFFSET, event_len);
   int2store(header + FLAGS_OFFSET, 0);
 
-  int4store(header + LOG_POS_OFFSET, coord->pos);  // log_pos
-
   packet->append(header, sizeof(header));
   packet->append(p, ident_len);             // log_file_name
+  packet->append(extra_buf, sizeof(extra_buf));
 
   if (do_checksum)
   {
     char b[BINLOG_CHECKSUM_LEN];
     ha_checksum crc= my_checksum(0, (uchar*) header, sizeof(header));
     crc= my_checksum(crc, (uchar*) p, ident_len);
+    crc= my_checksum(crc, (uchar*) extra_buf, sizeof(extra_buf));
     int4store(b, crc);
     packet->append(b, sizeof(b));
   }
@@ -2525,6 +2538,13 @@ static int wait_new_events(binlog_send_info *info,         /* in */
         }
 #endif
         mysql_bin_log.unlock_binlog_end_pos();
+
+	DBUG_EXECUTE_IF("simulate_pos_4G",
+	{
+	  coord.pos= ((ulong)5394967295);
+	  DBUG_SET("-d, simulate_pos_4G");
+	};);
+
         ret= send_heartbeat_event(info,
                                   info->net, info->packet, &coord,
                                   info->current_checksum_alg);