← Back to team overview

maria-developers team mailing list archive

5d9386892b5: MDEV-4633: multi_source.simple test fails sporadically

 

revision-id: 5d9386892b58477c4e28de3e40d930b44f5b42d4 (mariadb-10.1.43-274-g5d9386892b5)
parent(s): 3ee2422624ffb3d7ffefff8db7ef9398816299bc
author: Sujatha
committer: Sujatha
timestamp: 2020-09-08 13:10:36 +0530
message:

MDEV-4633: multi_source.simple test fails sporadically

Analysis:
========
Writes to 'rli->log_space_total' needs to be synchronized, otherwise both
SQL_THREAD and IO_THREAD can try to modify the variable simultaneously
resulting in incorrect rli->log_space_total.  In the current test scenario
SQL_THREAD is trying to decrement 'rli->log_space_total' in 'purge_first_log'
and IO_THREAD is trying to increment the 'rli->log_space_total' in
'queue_event' simultaneously. Hence test occasionally fails with  result
mismatch.

Fix:
===
Convert 'rli->log_space_total' variable to atomic type.

---
 sql/log.cc     |  4 +++-
 sql/log.h      |  4 +++-
 sql/rpl_rli.cc | 14 ++++++++------
 sql/slave.cc   | 19 +++++++++++++++----
 4 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/sql/log.cc b/sql/log.cc
index 8049b94bab1..90d5c46a15f 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -4400,7 +4400,9 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
                             0, 0, &log_space_reclaimed);
 
   mysql_mutex_lock(&rli->log_space_lock);
-  rli->log_space_total-= log_space_reclaimed;
+  my_atomic_add64_explicit((volatile int64*)(&rli->log_space_total),
+                           (-(ulonglong)log_space_reclaimed),
+                           MY_MEMORY_ORDER_RELAXED);
   mysql_cond_broadcast(&rli->log_space_cond);
   mysql_mutex_unlock(&rli->log_space_lock);
 
diff --git a/sql/log.h b/sql/log.h
index 277e5c6f69c..9e3807e9939 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -712,7 +712,9 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
     char buf1[22],buf2[22];
 #endif
     DBUG_ENTER("harvest_bytes_written");
-    (*counter)+=bytes_written;
+
+    my_atomic_add64_explicit((volatile int64*)(counter), bytes_written,
+                             MY_MEMORY_ORDER_RELAXED);
     DBUG_PRINT("info",("counter: %s  bytes_written: %s", llstr(*counter,buf1),
 		       llstr(bytes_written,buf2)));
     bytes_written=0;
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index c196a65809a..8815af4ea4a 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -140,8 +140,8 @@ int init_relay_log_info(Relay_log_info* rli,
   rli->slave_skip_counter=0;
   rli->abort_pos_wait=0;
   rli->log_space_limit= relay_log_space_limit;
-  rli->log_space_total= 0;
-
+  my_atomic_store64_explicit((volatile int64*)(&rli->log_space_total), 0,
+                             MY_MEMORY_ORDER_RELAXED);
   char pattern[FN_REFLEN];
   (void) my_realpath(pattern, slave_load_tmpdir, 0);
   if (fn_format(pattern, PREFIX_SQL_LOAD, pattern, "",
@@ -467,7 +467,8 @@ static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo)
                     linfo->log_file_name);
     DBUG_RETURN(1);
   }
-  rli->log_space_total += s.st_size;
+  my_atomic_add64_explicit((volatile int64*)(&rli->log_space_total),
+                           s.st_size, MY_MEMORY_ORDER_RELAXED);
   DBUG_PRINT("info",("log_space_total: %llu", rli->log_space_total));
   DBUG_RETURN(0);
 }
@@ -477,7 +478,8 @@ static int count_relay_log_space(Relay_log_info* rli)
 {
   LOG_INFO linfo;
   DBUG_ENTER("count_relay_log_space");
-  rli->log_space_total= 0;
+  my_atomic_store64_explicit((volatile int64*)(&rli->log_space_total), 0,
+                             MY_MEMORY_ORDER_RELAXED);
   if (rli->relay_log.find_log_pos(&linfo, NullS, 1))
   {
     sql_print_error("Could not find first log while counting relay log space");
@@ -1202,8 +1204,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
     strmake_buf(rli->group_relay_log_name, rli->relay_log.get_log_fname());
     strmake_buf(rli->event_relay_log_name, rli->relay_log.get_log_fname());
     rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
-    rli->log_space_total= 0;
-
+    my_atomic_store64_explicit((volatile int64*)(&rli->log_space_total), 0,
+                               MY_MEMORY_ORDER_RELAXED);
     if (count_relay_log_space(rli))
     {
       *errmsg= "Error counting relay log space";
diff --git a/sql/slave.cc b/sql/slave.cc
index 06f2b0d955a..9f4b0f53fb4 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -2371,7 +2371,10 @@ static bool wait_for_relay_log_space(Relay_log_info* rli)
                   &rli->log_space_lock,
                   &stage_waiting_for_relay_log_space,
                   &old_stage);
-  while (rli->log_space_limit < rli->log_space_total &&
+  while (rli->log_space_limit <
+         (ulonglong)my_atomic_load64_explicit((volatile int64*)
+                                              (&rli->log_space_total),
+                                              MY_MEMORY_ORDER_RELAXED) &&
          !(slave_killed=io_slave_killed(mi)) &&
          !rli->ignore_log_space_limit)
     mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
@@ -2912,7 +2915,10 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
     protocol->store(mi->rli.last_error().message, &my_charset_bin);
     protocol->store((uint32) mi->rli.slave_skip_counter);
     protocol->store((ulonglong) mi->rli.group_master_log_pos);
-    protocol->store((ulonglong) mi->rli.log_space_total);
+    protocol->store((ulonglong)
+                     my_atomic_load64_explicit((volatile int64*)
+                                               (&mi->rli.log_space_total),
+                                               MY_MEMORY_ORDER_RELAXED));
 
     protocol->store(
       mi->rli.until_condition==Relay_log_info::UNTIL_NONE ? "None":
@@ -4428,7 +4434,9 @@ Stopping slave I/O thread due to out-of-memory error from master");
 #endif
 
       if (rli->log_space_limit && rli->log_space_limit <
-          rli->log_space_total &&
+          (ulonglong) my_atomic_load64_explicit((volatile int64*)
+                                                (&rli->log_space_total),
+                                                MY_MEMORY_ORDER_RELAXED) &&
           !rli->ignore_log_space_limit)
         if (wait_for_relay_log_space(rli))
         {
@@ -7031,7 +7039,10 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
              is are able to rotate and purge sometime soon.
          */
         if (rli->log_space_limit && 
-            rli->log_space_limit < rli->log_space_total)
+            rli->log_space_limit <
+            (ulonglong) my_atomic_load64_explicit((volatile int64*)
+                                                  (&rli->log_space_total),
+                                                  MY_MEMORY_ORDER_RELAXED))
         {
           /* force rotation if not in an unfinished group */
           rli->sql_force_rotate_relay= !rli->is_in_group();