← Back to team overview

maria-developers team mailing list archive

e4fa0c805ad: MDEV-4633: multi_source.simple test fails sporadically

 

revision-id: e4fa0c805adf06c637cc3867427ca529b40e19ce (mariadb-10.1.43-267-ge4fa0c805ad)
parent(s): de49e603770463e64cf4a79a7414bac93c02254f
author: Sujatha
committer: Sujatha
timestamp: 2020-08-28 12:03:09 +0530
message:

MDEV-4633: multi_source.simple test fails sporadically

Analysis:
========
Writes to 'rli->log_space_total' needs to be synchronized. All modifications
to 'rli->log_space_total' should be performed by holding
'rli->log_space_lock'. But IO thread doesn't acquire this mutex while writing
events to relay log. Hence there is a race condition between SQL THREAD and
IO THREAD, when both try to modify the same variable. SQL_THREAD is trying to
decrement 'rli->log_space_total' in 'purge_first_log'  and IO_THREAD is trying
to increment the 'rli->log_space_total' in 'queue_event' simultaneously .
Hence test occasionally fails with  result mismatch.

Fix:
===
Acquire 'rli->log_space_lock' while modifying 'rli->log_space_total'.

---
 sql/log.cc     | 15 +++++++++++++++
 sql/log.h      | 13 +------------
 sql/rpl_rli.cc |  4 ++++
 sql/slave.cc   | 28 ++++++++++++++--------------
 4 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/sql/log.cc b/sql/log.cc
index 8049b94bab1..28a3d58d9e4 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -10265,6 +10265,21 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
   return error;
 }
 
+void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info *rli)
+{
+#ifndef DBUG_OFF
+  char buf1[22],buf2[22];
+#endif
+  DBUG_ENTER("harvest_bytes_written");
+  mysql_mutex_lock(&rli->log_space_lock);
+  rli->log_space_total+=bytes_written;
+  DBUG_PRINT("info",("relay_log_space: %s  bytes_written: %s",
+                     llstr(rli->log_space_total,buf1),
+                     llstr(bytes_written,buf2)));
+  bytes_written=0;
+  mysql_mutex_unlock(&rli->log_space_lock);
+  DBUG_VOID_RETURN;
+}
 
 #ifdef INNODB_COMPATIBILITY_HOOKS
 /**
diff --git a/sql/log.h b/sql/log.h
index 277e5c6f69c..3b136c47bcf 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -706,18 +706,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
   {
     bytes_written = 0;
   }
-  void harvest_bytes_written(ulonglong* counter)
-  {
-#ifndef DBUG_OFF
-    char buf1[22],buf2[22];
-#endif
-    DBUG_ENTER("harvest_bytes_written");
-    (*counter)+=bytes_written;
-    DBUG_PRINT("info",("counter: %s  bytes_written: %s", llstr(*counter,buf1),
-		       llstr(bytes_written,buf2)));
-    bytes_written=0;
-    DBUG_VOID_RETURN;
-  }
+  void harvest_bytes_written(Relay_log_info *rli);
   void set_max_size(ulong max_size_arg);
   void signal_update();
   void wait_for_sufficient_commits();
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index c196a65809a..18d7db234fe 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -467,7 +467,9 @@ static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo)
                     linfo->log_file_name);
     DBUG_RETURN(1);
   }
+  mysql_mutex_lock(&rli->log_space_lock);
   rli->log_space_total += s.st_size;
+  mysql_mutex_unlock(&rli->log_space_lock);
   DBUG_PRINT("info",("log_space_total: %llu", rli->log_space_total));
   DBUG_RETURN(0);
 }
@@ -477,7 +479,9 @@ static int count_relay_log_space(Relay_log_info* rli)
 {
   LOG_INFO linfo;
   DBUG_ENTER("count_relay_log_space");
+  mysql_mutex_lock(&rli->log_space_lock);
   rli->log_space_total= 0;
+  mysql_mutex_unlock(&rli->log_space_lock);
   if (rli->relay_log.find_log_pos(&linfo, NullS, 1))
   {
     sql_print_error("Could not find first log while counting relay log space");
diff --git a/sql/slave.cc b/sql/slave.cc
index 06f2b0d955a..3d9e23441bc 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -2508,7 +2508,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
     }
     if (likely (rev || glev))
     {
-      rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+      rli->relay_log.harvest_bytes_written(rli);
       if (flush_master_info(mi, TRUE, TRUE))
         sql_print_error("Failed to flush master info file");
     }
@@ -5270,7 +5270,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
                      "error writing Exec_load event to relay log");
           goto err;
         }
-        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
+        mi->rli.relay_log.harvest_bytes_written(&mi->rli);
         break;
       }
       if (unlikely(cev_not_written))
@@ -5285,7 +5285,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
           goto err;
         }
         cev_not_written=0;
-        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
+        mi->rli.relay_log.harvest_bytes_written(&mi->rli);
       }
       else
       {
@@ -5299,7 +5299,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
                      "error writing Append_block event to relay log");
           goto err;
         }
-        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total) ;
+        mi->rli.relay_log.harvest_bytes_written(&mi->rli) ;
       }
     }
   }
@@ -5489,7 +5489,7 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
       mysql_mutex_unlock(&mi->data_lock);
       DBUG_RETURN(1);
     }
-    rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+    rli->relay_log.harvest_bytes_written(rli);
   }
   delete ev;
   mi->master_log_pos+= inc_pos;
@@ -5547,7 +5547,7 @@ static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
     mysql_mutex_unlock(&mi->data_lock);
     DBUG_RETURN(1);
   }
-  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+  rli->relay_log.harvest_bytes_written(rli);
   delete ev;
   mi->master_log_pos+= inc_pos;
 err:
@@ -5784,9 +5784,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
       mysql_mutex_lock(log_lock);
       if (likely(!rli->relay_log.write_event(&fdle) &&
                  !rli->relay_log.flush_and_sync(NULL)))
-      {
-        rli->relay_log.harvest_bytes_written(&rli->log_space_total);
-      }
+        rli->relay_log.harvest_bytes_written(rli);
       else
       {
         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
@@ -6221,7 +6219,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
           (rli->relay_log.description_event_for_queue))
         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
       else
-        rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+        rli->relay_log.harvest_bytes_written(rli);
     }
     else if (mi->gtid_reconnect_event_skip_count == 0)
     {
@@ -6234,7 +6232,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
       if (rli->relay_log.append_no_lock(&fake_rev))
         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
       else
-        rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+        rli->relay_log.harvest_bytes_written(rli);
     }
   }
   else
@@ -6302,7 +6300,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
     {
       mi->master_log_pos+= inc_pos;
       DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
-      rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+      rli->relay_log.harvest_bytes_written(rli);
     }
     else
     {
@@ -6382,7 +6380,7 @@ void end_relay_log_info(Relay_log_info* rli)
   log_lock= rli->relay_log.get_log_lock();
   mysql_mutex_lock(log_lock);
   rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
-  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+  rli->relay_log.harvest_bytes_written(rli);
   mysql_mutex_unlock(log_lock);
   /*
     Delete the slave's temporary tables from memory.
@@ -7318,7 +7316,9 @@ int rotate_relay_log(Master_info* mi)
     Note that it needs to be protected by mi->data_lock.
   */
   mysql_mutex_assert_owner(&mi->data_lock);
-  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+
+  rli->relay_log.harvest_bytes_written(rli);
+
 end:
   DBUG_RETURN(error);
 }