← Back to team overview

maria-developers team mailing list archive

eb75e8705d9: MDEV-8134: The relay-log is not flushed after the slave-relay-log.999999 showed

 

revision-id: eb75e8705d9a444e10057967eaebf947b1115ff8 (mariadb-10.2.31-695-geb75e8705d9)
parent(s): 53acd1c1d88be82190c56af3e4cc11fb2770a169
author: Sujatha
committer: Sujatha
timestamp: 2021-01-21 13:00:02 +0530
message:

MDEV-8134: The relay-log is not flushed after the slave-relay-log.999999 showed

Problem:
========
Auto purge of relaylogs stops when relay-log-file is
'slave-relay-log.999999' and slave_parallel_threads is enabled.

Analysis:
=========
The problem is that in Relay_log_info::inc_group_relay_log_pos() function,
when two log names are compared via strcmp() function, it gives correct
result, when log name sequence numbers are of same digits(6 digits), But
when the number goes to 7 digits, a 999999 compares greater than
1000000, which is wrong, hence the bug.

Fix:
====
Extract the numeric extension part of the file name, convert it into
unsigned long and compare.

Thanks to David Zhao for the contribution.

---
 .../suite/rpl/r/rpl_relay_max_extension.result     |  37 +++++++
 .../suite/rpl/t/rpl_relay_max_extension.test       | 109 +++++++++++++++++++++
 sql/rpl_parallel.cc                                |   5 +-
 sql/rpl_rli.cc                                     |   4 +-
 sql/sql_repl.cc                                    |  17 ++++
 sql/sql_repl.h                                     |   1 +
 6 files changed, 169 insertions(+), 4 deletions(-)

diff --git a/mysql-test/suite/rpl/r/rpl_relay_max_extension.result b/mysql-test/suite/rpl/r/rpl_relay_max_extension.result
new file mode 100644
index 00000000000..4444398203e
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_relay_max_extension.result
@@ -0,0 +1,37 @@
+include/rpl_init.inc [topology=1->2]
+connection server_2;
+include/stop_slave.inc
+RESET SLAVE;
+include/start_slave.inc
+include/stop_slave.inc
+#
+# Stop slave server
+#
+#
+# Simulate file number get close to 999997
+# by renaming relay logs and modifying index/info files
+#
+# Restart slave server
+#
+SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads;
+SET @save_max_relay_log_size= @@GLOBAL.max_relay_log_size;
+SET GLOBAL slave_parallel_threads=1;
+SET GLOBAL max_relay_log_size=100 * 1024;
+include/start_slave.inc
+connection server_1;
+create table t1 (i int, c varchar(1024));
+#
+# Insert some data to generate enough amount of binary logs
+#
+connection server_2;
+#
+# Assert that 'slave-relay-bin.999999' is purged.
+#
+NOT FOUND /slave-relay-bin.999999/ in slave-relay-bin.index
+include/stop_slave.inc
+SET GLOBAL slave_parallel_threads= @save_slave_parallel_threads;
+SET GLOBAL max_relay_log_size= @save_max_relay_log_size;
+include/start_slave.inc
+connection server_1;
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_relay_max_extension.test b/mysql-test/suite/rpl/t/rpl_relay_max_extension.test
new file mode 100644
index 00000000000..e1e087f2e0e
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_relay_max_extension.test
@@ -0,0 +1,109 @@
+# ==== Purpose ====
+#
+# Test verifies that auto purging mechanism of relay logs works fine when the
+# file extension grows beyond 999999.
+#
+# ==== Implementation ====
+#
+# Steps:
+#    0 - In master-slave setup clear all the relay logs on the slave server.
+#    1 - Start the slave so that new relay logs starting from
+#        'slave-relay-bin.000001' are created.
+#    2 - Get the active relay-log file name by using SHOW SLAVE STATUS.
+#        Shutdown the slave server.
+#    3 - Rename active relay log to '999997' in both 'relay-log.info' and
+#        'slave-relay-bin.index' files.
+#    4 - Restart the slave server by configuring 'slave_parallel_threads=1'
+#        and 'max_relay_log_size=100K'.
+#    5 - Generate load on master such that few relay logs are generated on
+#        slave. The relay log sequence number will change to 7 digits.
+#    6 - Sync slave with master to ensure that relay logs are applied on
+#        slave. They should have been automatically purged.
+#    7 - Assert that there is no 'slave-relay-bin.999999' file in
+#        'relay-log.info'.
+#
+# ==== References ====
+#
+# MDEV-8134: The relay-log is not flushed after the slave-relay-log.999999
+#            showed
+#
+
+--source include/have_innodb.inc
+--source include/have_binlog_format_row.inc
+--let $rpl_topology=1->2
+--source include/rpl_init.inc
+
+--connection server_2
+--source include/stop_slave.inc
+RESET SLAVE;
+--source include/start_slave.inc
+--source include/stop_slave.inc
+--let $relay_log=query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1)
+
+--echo #
+--echo # Stop slave server
+--echo #
+
+--let $datadir = `select @@datadir`
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+--shutdown_server 10
+--source include/wait_until_disconnected.inc
+
+--exec sed -i "s/$relay_log/slave-relay-bin.999997/g" $datadir/relay-log.info
+--exec sed -i "s/$relay_log/slave-relay-bin.999997/g" $datadir/slave-relay-bin.index
+
+--echo #
+--echo # Simulate file number get close to 999997
+--echo # by renaming relay logs and modifying index/info files
+
+--move_file $datadir/$relay_log $datadir/slave-relay-bin.999997
+
+--echo #
+--echo # Restart slave server
+--echo #
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads;
+SET @save_max_relay_log_size= @@GLOBAL.max_relay_log_size;
+
+SET GLOBAL slave_parallel_threads=1;
+SET GLOBAL max_relay_log_size=100 * 1024;
+--source include/start_slave.inc
+
+--connection server_1
+create table t1 (i int, c varchar(1024));
+--echo #
+--echo # Insert some data to generate enough amount of binary logs
+--echo #
+--let $count = 1000
+--disable_query_log
+while ($count)
+{
+  eval insert into t1 values (1001 - $count, repeat('a',1000));
+  dec $count;
+}
+--enable_query_log
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+
+--let $relay_log=query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1)
+
+--echo #
+--echo # Assert that 'slave-relay-bin.999999' is purged.
+--echo #
+let SEARCH_FILE=$datadir/slave-relay-bin.index;
+let SEARCH_PATTERN=slave-relay-bin.999999;
+source include/search_pattern_in_file.inc;
+
+--source include/stop_slave.inc
+SET GLOBAL slave_parallel_threads= @save_slave_parallel_threads;
+SET GLOBAL max_relay_log_size= @save_max_relay_log_size;
+--source include/start_slave.inc
+
+--connection server_1
+DROP TABLE t1;
+--source include/rpl_end.inc
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index 4cf87ba73b7..869640fd46f 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -4,6 +4,7 @@
 #include "rpl_mi.h"
 #include "sql_parse.h"
 #include "debug_sync.h"
+#include "sql_repl.h"
 
 /*
   Code for optional parallel execution of replicated events on the slave.
@@ -82,7 +83,7 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev)
     return;
 
   mysql_mutex_lock(&rli->data_lock);
-  cmp= strcmp(rli->group_relay_log_name, qev->event_relay_log_name);
+  cmp= compare_log_name(rli->group_relay_log_name, qev->event_relay_log_name);
   if (cmp < 0)
   {
     rli->group_relay_log_pos= qev->future_event_relay_log_pos;
@@ -91,7 +92,7 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev)
              rli->group_relay_log_pos < qev->future_event_relay_log_pos)
     rli->group_relay_log_pos= qev->future_event_relay_log_pos;
 
-  cmp= strcmp(rli->group_master_log_name, qev->future_event_master_log_name);
+  cmp= compare_log_name(rli->group_master_log_name, qev->future_event_master_log_name);
   if (cmp < 0)
   {
     strcpy(rli->group_master_log_name, qev->future_event_master_log_name);
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 40ab375571a..5273b33c728 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -989,7 +989,7 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos,
   if (rgi->is_parallel_exec)
   {
     /* In case of parallel replication, do not update the position backwards. */
-    int cmp= strcmp(group_relay_log_name, rgi->event_relay_log_name);
+    int cmp= compare_log_name(group_relay_log_name, rgi->event_relay_log_name);
     if (cmp < 0)
     {
       group_relay_log_pos= rgi->future_event_relay_log_pos;
@@ -1001,7 +1001,7 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos,
       In the parallel case we need to update the master_log_name here, rather
       than in Rotate_log_event::do_update_pos().
     */
-    cmp= strcmp(group_master_log_name, rgi->future_event_master_log_name);
+    cmp= compare_log_name(group_master_log_name, rgi->future_event_master_log_name);
     if (cmp <= 0)
     {
       if (cmp < 0)
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 4af8ebc2dd8..59a3f686e45 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -4541,5 +4541,22 @@ rpl_gtid_pos_update(THD *thd, char *str, size_t len)
     return false;
 }
 
+int compare_log_name(const char *log_1, const char *log_2) {
+  int res= 1;
+  const char *ext1_str= strrchr(log_1, '.');
+  const char *ext2_str= strrchr(log_2, '.');
+  char file_name_1[255], file_name_2[255];
+  strmake(file_name_1, log_1, (ext1_str - log_1));
+  strmake(file_name_2, log_2, (ext2_str - log_2));
+  char *endptr = NULL;
+  res= strcmp(file_name_1, file_name_2);
+  if (!res)
+  {
+    ulong ext1= strtoul(++ext1_str, &endptr, 10);
+    ulong ext2= strtoul(++ext2_str, &endptr, 10);
+    res= (ext1 > ext2 ? 1 : ((ext1 == ext2) ? 0 : -1));
+  }
+  return res;
+}
 
 #endif /* HAVE_REPLICATION */
diff --git a/sql/sql_repl.h b/sql/sql_repl.h
index 8ddfa9239f6..9129aaeed5e 100644
--- a/sql/sql_repl.h
+++ b/sql/sql_repl.h
@@ -56,6 +56,7 @@ bool show_binlogs(THD* thd);
 extern int init_master_info(Master_info* mi);
 void kill_zombie_dump_threads(uint32 slave_server_id);
 int check_binlog_magic(IO_CACHE* log, const char** errmsg);
+int compare_log_name(const char *log_1, const char *log_2);
 
 struct LOAD_FILE_IO_CACHE : public IO_CACHE
 {