← Back to team overview

maria-developers team mailing list archive

Re: 4b164f176e6: MDEV-25114 Crash: WSREP: invalid state ROLLED_BACK (FATAL)

 

Hi, Jan!

Here's an idea of the fix:

Let's always use the KILL mutex locking order, that is

  victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex

For this we need to fix wsrep_abort_transaction(), which is called from the
server, and wsrep_innobase_kill_one_trx(), which is called from BF
thread.

wsrep_abort_transaction() can be fixed by not invoking
wsrep_innobase_kill_one_trx() and always using KILL code path (that is
wsrep_thd_awake) and forcing rollback after the kill.

wsrep_innobase_kill_one_trx() can be fixed by not locking LOCK_thd_data
at all, just don't lock it. We know that the victim waits on a lock
inside InnoDB and we've locked trx mutex and lock_sys mutex. The victim
cannot go away, cannot modify its data, it cannot do anything. So,
LOCK_thd_data doesn't seem to be necessary at that point.

I've attached a demo patch. It compiles, but I didn't try to run it,
it's only to show the idea, not a working fix (I already suspect I
removed too much from wsrep_abort_transaction()). Note it's the patch
for 10.2 at the commit 29bbcac0ee8^ - that is one commit before my fix.

On Oct 12, Jan Lindström wrote:
> Hi Sergei,
> 
> Update on wsrep_close_connections problem. My suggestion to fix this issue
> is on
> https://github.com/MariaDB/server/commit/99cbe03a44cc95e6f548550df51e7201ebea3b9d
> 
> If you have a better solution, please advise.
> 
> R: Jan
 
Regards,
Sergei
VP of MariaDB Server Engineering
and security@xxxxxxxxxxx
diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h
index 54b9eb9250c..3f397276498 100644
--- a/include/mysql/service_wsrep.h
+++ b/include/mysql/service_wsrep.h
@@ -91,7 +91,7 @@ extern struct wsrep_service_st {
   enum wsrep_trx_status       (*wsrep_run_wsrep_commit_func)(THD *thd, bool all);
   void                        (*wsrep_thd_LOCK_func)(THD *thd);
   void                        (*wsrep_thd_UNLOCK_func)(THD *thd);
-  void                        (*wsrep_thd_awake_func)(THD *thd, my_bool signal);
+  void                        (*wsrep_thd_awake_func)(THD *thd, my_bool signal, my_bool sync);
   enum wsrep_conflict_state   (*wsrep_thd_conflict_state_func)(MYSQL_THD, my_bool);
   const char *                (*wsrep_thd_conflict_state_str_func)(THD *thd);
   enum wsrep_exec_mode        (*wsrep_thd_exec_mode_func)(THD *thd);
@@ -139,7 +139,7 @@ extern struct wsrep_service_st {
 #define wsrep_run_wsrep_commit(T,A) wsrep_service->wsrep_run_wsrep_commit_func(T,A)
 #define wsrep_thd_LOCK(T) wsrep_service->wsrep_thd_LOCK_func(T)
 #define wsrep_thd_UNLOCK(T) wsrep_service->wsrep_thd_UNLOCK_func(T)
-#define wsrep_thd_awake(T,S) wsrep_service->wsrep_thd_awake_func(T,S)
+#define wsrep_thd_awake(T,S,L) wsrep_service->wsrep_thd_awake_func(T,S,L)
 #define wsrep_thd_conflict_state(T,S) wsrep_service->wsrep_thd_conflict_state_func(T,S)
 #define wsrep_thd_conflict_state_str(T) wsrep_service->wsrep_thd_conflict_state_str_func(T)
 #define wsrep_thd_exec_mode(T) wsrep_service->wsrep_thd_exec_mode_func(T)
@@ -220,7 +220,7 @@ void wsrep_lock_rollback();
 void wsrep_post_commit(THD* thd, bool all);
 void wsrep_thd_LOCK(THD *thd);
 void wsrep_thd_UNLOCK(THD *thd);
-void wsrep_thd_awake(THD *thd, my_bool signal);
+void wsrep_thd_awake(THD *thd, my_bool signal, my_bool lock);
 void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state);
 bool wsrep_thd_ignore_table(THD *thd);
 void wsrep_unlock_rollback();
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 92736eacee2..e248f644a0a 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1693,7 +1693,9 @@ void THD::awake(killed_state state_to_set)
   DBUG_PRINT("enter", ("this: %p current_thd: %p  state: %d",
                        this, current_thd, (int) state_to_set));
   THD_CHECK_SENTRY(this);
-  mysql_mutex_assert_owner(&LOCK_thd_data);
+
+  if (!WSREP_ON || !wsrep_thd_is_BF(current_thd, FALSE))
+    mysql_mutex_assert_owner(&LOCK_thd_data);
 
   print_aborted_warning(3, "KILLED");
 
diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc
index 53941c06892..7973b325b68 100644
--- a/sql/wsrep_dummy.cc
+++ b/sql/wsrep_dummy.cc
@@ -83,7 +83,7 @@ void wsrep_thd_LOCK(THD *)
 void wsrep_thd_UNLOCK(THD *)
 { }
 
-void wsrep_thd_awake(THD *, my_bool)
+void wsrep_thd_awake(THD *, my_bool, my_bool)
 { }
 
 const char *wsrep_thd_conflict_state_str(THD *)
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index d392d1c2a61..ff503ddf82b 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2758,13 +2758,15 @@ extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id)
 }
 
 
-extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
+extern "C" void wsrep_thd_awake(THD *thd, my_bool signal, my_bool sync)
 {
   if (signal)
   {
-    mysql_mutex_lock(&thd->LOCK_thd_data);
+    if (sync)
+      mysql_mutex_lock(&thd->LOCK_thd_data);
     thd->awake(KILL_QUERY);
-    mysql_mutex_unlock(&thd->LOCK_thd_data);
+    if (sync)
+      mysql_mutex_unlock(&thd->LOCK_thd_data);
   }
   else
   {
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index e475852cb90..fcc2949f9ce 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -19572,7 +19572,7 @@ wsrep_innobase_kill_one_trx(
 		WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
 			    victim_trx->id);
 		wsrep_thd_UNLOCK(thd);
-		wsrep_thd_awake(thd, signal);
+		wsrep_thd_awake(thd, signal, 0);
 		DBUG_VOID_RETURN;
 		break;
 	case ABORTED:
@@ -19610,7 +19610,7 @@ wsrep_innobase_kill_one_trx(
 					    TRX_ID_FMT,
 					    victim_trx->id);
 				wsrep_thd_UNLOCK(thd);
-				wsrep_thd_awake(thd, signal);
+				wsrep_thd_awake(thd, signal, 0);
 				DBUG_VOID_RETURN;
 				break;
 			case WSREP_OK:
@@ -19629,7 +19629,7 @@ wsrep_innobase_kill_one_trx(
 			}
 		}
 		wsrep_thd_UNLOCK(thd);
-		wsrep_thd_awake(thd, signal);
+		wsrep_thd_awake(thd, signal, 0);
 		break;
 	case QUERY_EXEC:
 		/* it is possible that victim trx is itself waiting for some
@@ -19652,7 +19652,7 @@ wsrep_innobase_kill_one_trx(
 			}
 
 			wsrep_thd_UNLOCK(thd);
-			wsrep_thd_awake(thd, signal);
+			wsrep_thd_awake(thd, signal, 0);
 		} else {
 			/* abort currently executing query */
 			DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu",
@@ -19662,7 +19662,7 @@ wsrep_innobase_kill_one_trx(
 			/* Note that innobase_kill_query will take lock_mutex
 			and trx_mutex */
 			wsrep_thd_UNLOCK(thd);
-			wsrep_thd_awake(thd, signal);
+			wsrep_thd_awake(thd, signal, 0);
 
 			/* for BF thd, we need to prevent him from committing */
 			if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
@@ -19727,29 +19727,13 @@ wsrep_abort_transaction(
 {
 	DBUG_ENTER("wsrep_abort_transaction");
 
-	trx_t* victim_trx	= thd_to_trx(victim_thd);
-	trx_t* bf_trx		= (bf_thd) ? thd_to_trx(bf_thd) : NULL;
-
 	WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %d",
 			wsrep_thd_query(bf_thd),
 			wsrep_thd_query(victim_thd),
 			wsrep_thd_conflict_state(victim_thd, FALSE));
 
-	if (victim_trx) {
-		lock_mutex_enter();
-		trx_mutex_enter(victim_trx);
-		wsrep_innobase_kill_one_trx(bf_thd, bf_trx, victim_trx, signal);
-		lock_mutex_exit();
-		trx_mutex_exit(victim_trx);
-		wsrep_srv_conc_cancel_wait(victim_trx);
-		DBUG_VOID_RETURN;
-	} else {
-		WSREP_DEBUG("victim does not have transaction");
-		wsrep_thd_LOCK(victim_thd);
-		wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
-		wsrep_thd_UNLOCK(victim_thd);
-		wsrep_thd_awake(victim_thd, signal);
-	}
+	thd_mark_transaction_to_rollback(victim_thd, 1);
+	wsrep_thd_awake(victim_thd, signal, 1);
 
 	DBUG_VOID_RETURN;
 }

Follow ups

References