← Back to team overview

maria-developers team mailing list archive

Re: [Commits] a6fb98f: MDEV-6368: assertion xid_seqno > trx_sys_cur_xid_seqno

 

Hi Nirbhay,

I'm not expert on this code area but some questions, comments below.

On Tue, May 3, 2016 at 1:26 AM, Nirbhay Choubey <nirbhay@xxxxxxxxxxx> wrote:

> revision-id: a6fb98fd74ab6f14fab0c1ef4630b010ff28880f
> (mariadb-10.1.13-6-ga6fb98f)
> parent(s): f8cdb9e7e8fb692f8eb3b9bb4792cd60653e0eb8
> author: Nirbhay Choubey
> committer: Nirbhay Choubey
> timestamp: 2016-05-02 18:26:58 -0400
> message:
>
> MDEV-6368: assertion xid_seqno > trx_sys_cur_xid_seqno
>
> - Validate the specified wsrep_start_position value by also
> checking the return status of wsrep->sst_received. This also
> ensures that changes in wsrep_start_position is not allowed
> when the node is not in JOINING state.
> - Do not allow decrease in seqno within same UUID.
> - The initial checkpoint in SEs should be [0...:-1].
>
>
> diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc
> index 4f6cc51..44fdd03 100644
> --- a/sql/wsrep_sst.cc
> +++ b/sql/wsrep_sst.cc
> @@ -264,42 +264,91 @@ void wsrep_sst_complete (const wsrep_uuid_t*
> sst_uuid,
>    mysql_mutex_unlock (&LOCK_wsrep_sst);
>  }
>
> -void wsrep_sst_received (wsrep_t*            const wsrep,
> +/*
> +  If wsrep provider is loaded, inform that the new state snapshot
> +  has been received. Also update the local checkpoint.
> +
> +  @param wsrep     [IN]               wsrep handle
> +  @param uuid      [IN]               Initial state UUID
> +  @param seqno     [IN]               Initial state sequence number
> +  @param state     [IN]               Always NULL, also ignored by wsrep
> provider (?)
> +  @param state_len [IN]               Always 0, also ignored by wsrep
> provider (?)
> +  @param implicit  [IN]               Whether invoked implicitly due to
> SST
> +                                      (true) or explicitly because if
> change
> +                                      in wsrep_start_position by user
> (false).
> +  @return false                       Success
> +          true                        Error
> +
> +*/
> +bool wsrep_sst_received (wsrep_t*            const wsrep,
>                           const wsrep_uuid_t&       uuid,
>                           wsrep_seqno_t       const seqno,
>                           const void*         const state,
> -                         size_t              const state_len)
>

Why not const size_t state_len ? Use of const here is confusing.


> +                         size_t              const state_len,
> +                         bool                const implicit)
>  {
> -    wsrep_get_SE_checkpoint(local_uuid, local_seqno);
> +  /*
> +    To keep track of whether the local uuid:seqno should be updated.
> Also, note
> +    that local state (uuid:seqno) is updated/checkpointed only after we
> get an
> +    OK from wsrep provider. By doing so, the values remain consistent
> across
> +    the server & wsrep provider.
> +  */
> +  bool do_update= false;
> +
> +  // Get the locally stored uuid:seqno.
> +  wsrep_get_SE_checkpoint(local_uuid, local_seqno);
>

What if this fails, no error handling ?


>
> -    if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) ||
> -        local_seqno < seqno || seqno < 0)
> +  if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) ||
> +      local_seqno < seqno)
> +  {
> +    do_update= true;
> +  }
> +  else if (local_seqno > seqno)
> +  {
> +    WSREP_WARN("SST position can't be set in past. Requested: %lld,
> Current: "
> +               " %lld.", (long long)seqno, (long long)local_seqno);
> +    /*
> +      If we are here because of SET command, simply return true (error)
> instead of
> +      aborting.
> +    */
> +    if (implicit)
>      {
> -        wsrep_set_SE_checkpoint(uuid, seqno);
> -        local_uuid = uuid;
> -        local_seqno = seqno;
> +      WSREP_WARN("Can't continue.");
> +      unireg_abort(1);
>      }
> -    else if (local_seqno > seqno)
> +    else
>      {
> -        WSREP_WARN("SST postion is in the past: %lld, current: %lld. "
> -                   "Can't continue.",
> -                   (long long)seqno, (long long)local_seqno);
> -        unireg_abort(1);
> +      return true;
>      }
> +  }
>
>  #ifdef GTID_SUPPORT
> -    wsrep_init_sidno(uuid);
> +  wsrep_init_sidno(uuid);
>  #endif /* GTID_SUPPORT */
>
> -    if (wsrep)
> -    {
> -        int const rcode(seqno < 0 ? seqno : 0);
> -        wsrep_gtid_t const state_id = {
> -            uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno)
> -        };
> +  if (wsrep)
> +  {
> +    int const rcode(seqno < 0 ? seqno : 0);
>

When seqno can be < 0 ?


> +    wsrep_gtid_t const state_id= {uuid,
> +      (rcode ? WSREP_SEQNO_UNDEFINED : seqno)};
> +
> +    wsrep_status_t ret= wsrep->sst_received(wsrep, &state_id, state,
> +                                            state_len, rcode);
>
> -        wsrep->sst_received(wsrep, &state_id, state, state_len, rcode);
> +    if (ret != WSREP_OK)
> +    {
> +      return true;
>      }
> +  }
> +
> +  // Now is the good time to update the local state and checkpoint.
> +  if (do_update) {
> +    wsrep_set_SE_checkpoint(uuid, seqno);
>

Again, I would add error handling.


> +    local_uuid= uuid;
> +    local_seqno= seqno;
> +  }
> +
> +  return false;
>  }
>
>  // Let applier threads to continue
> @@ -308,7 +357,7 @@ void wsrep_sst_continue ()
>    if (sst_needed)
>    {
>      WSREP_INFO("Signalling provider to continue.");
> -    wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0);
> +    wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0, true);
>

You should check the return code.


>    }
>  }
>
> diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc
> index 9c01e54..bd94190 100644
>
>
>  bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type
> type)
>  {
> -  WSREP_INFO ("wsrep_start_position var submitted: '%s'",
> -              wsrep_start_position);
> -  // since this value passed wsrep_start_position_check, don't check
> anything
> -  // here
> -  wsrep_set_local_position (wsrep_start_position, true);
> -  return 0;
> +  // Print a confirmation that wsrep_start_position has been updated.
> +  WSREP_INFO ("wsrep_start_position set to '%s'", wsrep_start_position);
> +  return false;
>

Hmm, normally I thought that system variable set would go like if
(variable_check()) variable_update()
but now update does not do nothing ?


>  }


>  void wsrep_start_position_init (const char* val)
> @@ -164,7 +194,7 @@ void wsrep_start_position_init (const char* val)
>      return;
>    }
>
> -  wsrep_set_local_position (val, false);
> +  wsrep_set_local_position (val, strlen(val), false);
>

Add error handling.


>  }
>
>  static bool refresh_provider_options()
> diff --git a/storage/innobase/trx/trx0sys.cc
> b/storage/innobase/trx/trx0sys.cc
> index 76c8613..1625e8d 100644
> --- a/storage/innobase/trx/trx0sys.cc
> +++ b/storage/innobase/trx/trx0sys.cc
> @@ -349,10 +349,10 @@ void read_wsrep_xid_uuid(const XID* xid, unsigned
> char* buf)
>              unsigned char xid_uuid[16];
>              long long xid_seqno = read_wsrep_xid_seqno(xid);
>              read_wsrep_xid_uuid(xid, xid_uuid);
> -            if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8))
> +            if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16) &&
> +                (xid_seqno > -1))
>

Does this mean that you ignore uuid if it is exactly the same, or is there
error handling on else?

R: Jan

Follow ups