← Back to team overview

maria-developers team mailing list archive

Re: [Commits] 842ef80b029: MDEV-18464: Port kill_one_trx fixes from 10.4 to 10.1

 

Hi Jan,

On Fri, Mar 22, 2019 at 1:59 PM jan <jan.lindstrom@xxxxxxxxxxx> wrote:
>
> revision-id: 842ef80b029c975b7f7f7ec25fe55817ec626f02 (mariadb-10.1.38-68-g842ef80b029)

Was this pushed to buildbot as well? I did not find the commit ID.

First, some general comments:

* The functions trx_start_if_not_started_low() and
trx_start_for_ddl_low() differ between XtraDB and InnoDB with regard
to #ifdef WITH_WSREP. Why? Are the debug assignments truly necessary?
I see that the difference was introduced 5 years ago in MDEV-6247.
* The files storage/{innobase,xtradb}/trx/trx0roll.cc differ from each
other after the change. It looks like the innobase version is better.
Please remove the unnecessary lines from xtradb:

--- storage/xtradb/trx/trx0roll.cc    2019-03-26 10:20:31.767971537 +0200
+++ storage/innobase/trx/trx0roll.cc    2019-03-26 10:20:31.759971465 +0200
@@ -33,8 +33,6 @@
 #include "trx0roll.ic"
 #endif

-#include <mysql/service_wsrep.h>
-
 #include "fsp0fsp.h"
 #include "mach0data.h"
 #include "trx0rseg.h"
@@ -51,9 +49,6 @@
 #include "pars0pars.h"
 #include "srv0mon.h"
 #include "trx0sys.h"
-#ifdef WITH_WSREP
-#include "ha_prototypes.h"
-#endif /* WITH_WSREP */

 /** This many pages must be undone before a truncate is tried within
 rollback */
@@ -1075,12 +1070,6 @@
     if (trx->update_undo) {
         trx_undo_truncate_end(trx, trx->update_undo, limit);
     }
-
-#ifdef WITH_WSREP_OUT
-    if (wsrep_on(trx->mysql_thd)) {
-        trx->lock.was_chosen_as_deadlock_victim = FALSE;
-    }
-#endif /* WITH_WSREP */
 }

 /***********************************************************************//**

On to the detailed review:

> diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
> index fe16b8272b8..b83734b154a 100644
> --- a/storage/innobase/include/trx0trx.h
> +++ b/storage/innobase/include/trx0trx.h
> @@ -1,7 +1,7 @@
>  /*****************************************************************************
>
>  Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
> -Copyright (c) 2015, 2018, MariaDB Corporation.
> +Copyright (c) 2015, 2019, MariaDB Corporation.
>
>  This program is free software; you can redistribute it and/or modify it under
>  the terms of the GNU General Public License as published by the Free Software
> @@ -613,6 +613,9 @@ struct trx_lock_t {
>                                         transaction as a victim in deadlock
>                                         resolution, it sets this to TRUE.
>                                         Protected by trx->mutex. */
> +       bool            was_chosen_as_replication_victim;
> +                                       /* replication has marked this
> +                                       trx to abort */
>         time_t          wait_started;   /*!< lock wait started at this time,
>                                         protected only by lock_sys->mutex */
>
> @@ -624,6 +627,12 @@ struct trx_lock_t {
>                                         only be modified by the thread that is
>                                         serving the running transaction. */
>
> +#ifdef WITH_WSREP
> +       bool            was_chosen_as_wsrep_victim;
> +                                       /*!< high priority wsrep thread has
> +                                       marked this trx to abort */
> +#endif /* WITH_WSREP */
> +
>         mem_heap_t*     lock_heap;      /*!< memory heap for trx_locks;
>                                         protected by lock_sys->mutex */
>
> @@ -695,14 +704,6 @@ lock_rec_convert_impl_to_expl()) will access transactions associated
>  to other connections. The locks of transactions are protected by
>  lock_sys->mutex and sometimes by trx->mutex. */
>
> -enum trx_abort_t {
> -       TRX_SERVER_ABORT = 0,
> -#ifdef WITH_WSREP
> -       TRX_WSREP_ABORT,
> -#endif
> -       TRX_REPLICATION_ABORT
> -};
> -
>  struct trx_t{
>         ulint           magic_n;
>
> @@ -880,8 +881,6 @@ struct trx_t{
>         /*------------------------------*/
>         THD*            mysql_thd;      /*!< MySQL thread handle corresponding
>                                         to this trx, or NULL */
> -       trx_abort_t     abort_type;     /*!< Transaction abort type*/
> -
>         const char*     mysql_log_file_name;
>                                         /*!< if MySQL binlog is used, this field
>                                         contains a pointer to the latest file

Why are you replacing trx_t::abort_type with two fields in trx_lock_t
that are located far apart from each other?
To avoid changing the performance too much due to caching, I would
suggest to introduce the two fields in trx_t, replacing the former
location of abort_type.
Do we really need such long names? Would trx_t::replication_victim and
trx_t::wsrep_victim suffice?

Please document how concurrent access to the fields is being
protected. Apparently for the WSREP field it is both trx->mutex and
lock_sys->mutex.

> diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
> index 3970a559a4c..984ac6cacbc 100644
> --- a/storage/innobase/lock/lock0lock.cc
> +++ b/storage/innobase/lock/lock0lock.cc
> @@ -4785,9 +4783,8 @@ lock_report_waiters_to_mysql(
>                                 innobase_kill_query. We mark this by setting
>                                 current_lock_mutex_owner, so we can avoid trying
>                                 to recursively take lock_sys->mutex. */
> -                               w_trx->abort_type = TRX_REPLICATION_ABORT;
> +                               w_trx->lock.was_chosen_as_replication_victim = true;
>                                 thd_report_wait_for(mysql_thd, w_trx->mysql_thd);
> -                               w_trx->abort_type = TRX_SERVER_ABORT;
>                         }
>                         ++i;
>                 }

Here, we seem to be only holding lock_sys->mutex, not w_trx->mutex.

> @@ -7993,6 +7981,37 @@ lock_trx_handle_wait(
>         return DB_LOCK_WAIT;
>  }
>
> +/*********************************************************************//**
> +Check whether the transaction has already been rolled back because it
> +was selected as a deadlock victim, or if it has to wait then cancel
> +the wait lock.
> +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
> +UNIV_INTERN
> +dberr_t
> +lock_trx_handle_wait(
> +/*=================*/
> +       trx_t*  trx)    /*!< in/out: trx lock state */
> +{
> +       bool lock_mutex_taken = false;

Do we really need this variable?

> +#ifdef WITH_WSREP
> +       /* We already own mutexes */
> +       if (trx->lock.was_chosen_as_wsrep_victim) {
> +               return lock_trx_handle_wait_low(trx);
> +       }
> +#endif /* WITH_WSREP */
> +       if (!trx->lock.was_chosen_as_replication_victim) {
> +               lock_mutex_enter();
> +               lock_mutex_taken = true;
> +       }
> +       trx_mutex_enter(trx);
> +       dberr_t err = lock_trx_handle_wait_low(trx);
> +       if (lock_mutex_taken) {
> +               lock_mutex_exit();
> +       }
> +       trx_mutex_exit(trx);
> +       return err;
> +}

I would expand the second part of the code and remove the condition
before releasing the 2 mutexes:

if (!trx->lock.was_chosen_as_replication_victim) {
 lock_mutex_enter();
 trx_mutex_enter(trx);
 dberr_t err = lock_trx_handle_wait_low(trx);
 lock_mutex_exit();
 trx_mutex_exit(trx);
 return err;
} else {
 trx_mutex_enter(trx);
 dberr_t err = lock_trx_handle_wait_low(trx);
 trx_mutex_exit(trx);
 return err;
}

Also, I think that it would be better to declare
lock_trx_handle_wait_low() only static but not inline, to reduce
unnecessary code expansion.

Best regards,

Marko