← Back to team overview

maria-developers team mailing list archive

Re: Patch for parallel replication DBUG_ASSERT check on DROP TABLE et al

 

Monty writes:

> Can you please check if this looks correct?

> diff --git a/sql/mdl.cc b/sql/mdl.cc
> index 28d2006..3b1f9f2 100644
> --- a/sql/mdl.cc
> +++ b/sql/mdl.cc
> @@ -17,6 +17,7 @@
>  #include "sql_class.h"
>  #include "debug_sync.h"
>  #include "sql_array.h"
> +#include "rpl_rli.h"
>  #include <hash.h>
>  #include <mysqld_error.h>
>  #include <mysql/plugin.h>
> @@ -442,6 +443,7 @@ class MDL_lock
>    virtual void notify_conflicting_locks(MDL_context *ctx) = 0;
>  
>    virtual bitmap_t hog_lock_types_bitmap() const = 0;
> +  bool check_if_conflicting_replication_locks(MDL_context *ctx);

This should probably be #ifdef NDEBUG, since you're only using it in debug
build?

>  
>    /** List of granted tickets for this lock. */
>    Ticket_list m_granted;
> @@ -2290,6 +2292,44 @@ void MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx)
>    }
>  }
>  
> +/**
> +  Check if there is any conflicting lock that could cause this thread
> +  to wait for another thread which is not ready to commit.
> +  This is always an error, as the upper level of parallel replication
> +  should not allow a scheduling of a conflicting DDL until all earlier
> +  transactions has commited.
> +
> +  This function is only called for a slave using parallel replication
> +  and trying to get an exclusive lock for the table.
> +*/
> +
> +bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
> +{
> +  Ticket_iterator it(m_granted);
> +  MDL_ticket *conflicting_ticket;
> +
> +  while ((conflicting_ticket= it++))
> +  {
> +    if (conflicting_ticket->get_ctx() != ctx)
> +    {
> +      MDL_context *conflicting_ctx= conflicting_ticket->get_ctx();
> +
> +      /*
> +        If the conflicting thread is another parallel replication
> +        thread for the same master and it's not in commit stage, then
> +        the current transaction has started too early and something is
> +        seriously wrong.
> +      */
> +      if (conflicting_ctx->get_thd()->rgi_slave &&
> +          conflicting_ctx->get_thd()->rgi_slave->rli ==
> +          ctx->get_thd()->rgi_slave->rli &&
> +          !conflicting_ctx->get_thd()->rgi_slave->did_mark_start_commit)
> +        return 1;                               // Fatal error

No, this is not correct. For example, the threads might be in different
domains, or using different multi-source connections. You need a check like
in thd_report_wait_for(). _If_ T1 and T2 are in the same parallel
replication domain, _and_ T1 goes before T2, _and_ there is a DDL lock
conflict but T1 has not started to commit, then it indicates a potential
problem.

Also, the comments are too vague, "Something is seriously wrong" is not a
good description. What is wrong? Please describe the issue being checked
more precisely. What is the role of each of the two threads involved? What
is the situation being checked? Why is it wrong?

Also, use some temporaries for conflicting_ctx->get_thd()->rgi_slave etc, so
the condition becomes more readable.

> +    }
> +  }
> +  return 0;
> +}
> +
>  
>  /**
>    Acquire one lock with waiting for conflicting locks to go away if needed.
> @@ -2355,6 +2395,11 @@ MDL_context::acquire_lock(MDL_request *mdl_request, ulong lock_wait_timeout)
>    if (lock->needs_notification(ticket) && lock_wait_timeout)
>      lock->notify_conflicting_locks(this);
>  
> +  DBUG_ASSERT(mdl_request->type != MDL_INTENTION_EXCLUSIVE ||
> +              !(get_thd()->rgi_slave &&
> +                get_thd()->rgi_slave->is_parallel_exec &&
> +                lock->check_if_conflicting_replication_locks(this)));
> +
>    mysql_prlock_unlock(&lock->m_rwlock);
>  
>    will_wait_for(ticket);
> diff --git a/sql/mdl.h b/sql/mdl.h
> index c4d792a..13de602 100644
> --- a/sql/mdl.h
> +++ b/sql/mdl.h
> @@ -910,7 +910,6 @@ class MDL_context
>     */
>    MDL_wait_for_subgraph *m_waiting_for;
>  private:
> -  THD *get_thd() const { return m_owner->get_thd(); }
>    MDL_ticket *find_ticket(MDL_request *mdl_req,
>                            enum_mdl_duration *duration);
>    void release_locks_stored_before(enum_mdl_duration duration, MDL_ticket *sentinel);
> @@ -919,6 +918,7 @@ class MDL_context
>                               MDL_ticket **out_ticket);
>  
>  public:
> +  THD *get_thd() const { return m_owner->get_thd(); }
>    void find_deadlock();
>  
>    ulong get_thread_id() const { return thd_get_thread_id(get_thd()); }

 - Kristian.


Follow ups