maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #04888
Re: [Commits] Rev 3435: MDEV-532: Async InnoDB commit checkpoint. in http://bazaar.launchpad.net/~maria-captains/maria/10.0
Hi Serg,
As we discussed under review of MDEV-232, here is a separate patch that makes
InnoDB/XtraDB commit checkpointing be more asynchroneous.
See MDEV-532 for further description of this task.
I hope you will review this, at your convenience.
- Kristian.
knielsen@xxxxxxxxxxxxxxx writes:
> At http://bazaar.launchpad.net/~maria-captains/maria/10.0
>
> ------------------------------------------------------------
> revno: 3435
> revision-id: knielsen@xxxxxxxxxxxxxxx-20120914124453-zsap6hjclq3vrb6n
> parent: knielsen@xxxxxxxxxxxxxxx-20120913123129-kaujy4cw0jc9o08k
> committer: knielsen@xxxxxxxxxxxxxxx
> branch nick: work-10.0-mdev225-181-232
> timestamp: Fri 2012-09-14 14:44:53 +0200
> message:
> MDEV-532: Async InnoDB commit checkpoint.
>
> Make the commit checkpoint inside InnoDB be asynchroneous.
> Implement a background thread in binlog to do the writing and flushing of
> binlog checkpoint events to disk.
> === modified file 'mysql-test/suite/binlog/r/binlog_checkpoint.result'
> --- a/mysql-test/suite/binlog/r/binlog_checkpoint.result 2012-09-13 12:31:29 +0000
> +++ b/mysql-test/suite/binlog/r/binlog_checkpoint.result 2012-09-14 12:44:53 +0000
> @@ -70,8 +70,14 @@ show binlog events in 'master-bin.000003
> Log_name Pos Event_type Server_id End_log_pos Info
> master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
> master-bin.000003 # Binlog_checkpoint # # master-bin.000001
> +SET DEBUG_SYNC= "RESET";
> +SET @old_dbug= @@global.DEBUG_DBUG;
> +SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
> SET DEBUG_SYNC= "now SIGNAL con2_continue";
> con1 is still pending, no new binlog checkpoint should have been logged.
> +SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
> +SET GLOBAL debug_dbug= @old_dbug;
> +SET DEBUG_SYNC= "RESET";
> show binlog events in 'master-bin.000003' from <binlog_start>;
> Log_name Pos Event_type Server_id End_log_pos Info
> master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
>
> === modified file 'mysql-test/suite/binlog/r/binlog_xa_recover.result'
> --- a/mysql-test/suite/binlog/r/binlog_xa_recover.result 2012-09-13 12:31:29 +0000
> +++ b/mysql-test/suite/binlog/r/binlog_xa_recover.result 2012-09-14 12:44:53 +0000
> @@ -118,7 +118,11 @@ master-bin.00000<binlog_start> # Table_m
> master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
> master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
> SET DEBUG_SYNC= "now SIGNAL con10_cont";
> +SET @old_dbug= @@global.DEBUG_DBUG;
> +SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
> SET DEBUG_SYNC= "now SIGNAL con12_cont";
> +SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
> +SET GLOBAL debug_dbug= @old_dbug;
> SET DEBUG_SYNC= "now SIGNAL con11_cont";
> Checking that master-bin.000004 is the last binlog checkpoint
> show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
>
> === modified file 'mysql-test/suite/binlog/t/binlog_checkpoint.test'
> --- a/mysql-test/suite/binlog/t/binlog_checkpoint.test 2012-09-13 12:31:29 +0000
> +++ b/mysql-test/suite/binlog/t/binlog_checkpoint.test 2012-09-14 12:44:53 +0000
> @@ -71,6 +71,12 @@ SET DEBUG_SYNC= "now WAIT_FOR con2_ready
> --let $binlog_file= master-bin.000003
> --source include/show_binlog_events.inc
>
> +# We need to sync the test case with the background processing of the
> +# commit checkpoint, otherwise we get nondeterministic results.
> +SET DEBUG_SYNC= "RESET";
> +SET @old_dbug= @@global.DEBUG_DBUG;
> +SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
> +
> SET DEBUG_SYNC= "now SIGNAL con2_continue";
>
> connection con2;
> @@ -78,6 +84,12 @@ reap;
>
> connection default;
> --echo con1 is still pending, no new binlog checkpoint should have been logged.
> +# Make sure commit checkpoint is processed before we check that no checkpoint
> +# event has been binlogged.
> +SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
> +SET GLOBAL debug_dbug= @old_dbug;
> +SET DEBUG_SYNC= "RESET";
> +
> --let $binlog_file= master-bin.000003
> --source include/show_binlog_events.inc
>
>
> === modified file 'mysql-test/suite/binlog/t/binlog_xa_recover.test'
> --- a/mysql-test/suite/binlog/t/binlog_xa_recover.test 2012-09-13 12:31:29 +0000
> +++ b/mysql-test/suite/binlog/t/binlog_xa_recover.test 2012-09-14 12:44:53 +0000
> @@ -14,8 +14,24 @@ CREATE TABLE t1 (a INT PRIMARY KEY, b ME
> # Insert some data to force a couple binlog rotations (3), so we get some
> # normal binlog checkpoints before starting the test.
> INSERT INTO t1 VALUES (100, REPEAT("x", 4100));
> +# Wait for the master-bin.000002 binlog checkpoint to appear.
> +--let $wait_for_all= 0
> +--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000002"
> +--let $field= Info
> +--let $condition= = "master-bin.000002"
> +--source include/wait_show_condition.inc
> INSERT INTO t1 VALUES (101, REPEAT("x", 4100));
> +--let $wait_for_all= 0
> +--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000003"
> +--let $field= Info
> +--let $condition= = "master-bin.000003"
> +--source include/wait_show_condition.inc
> INSERT INTO t1 VALUES (102, REPEAT("x", 4100));
> +--let $wait_for_all= 0
> +--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000004"
> +--let $field= Info
> +--let $condition= = "master-bin.000004"
> +--source include/wait_show_condition.inc
>
> # Now start a bunch of transactions that span multiple binlog
> # files. Leave then in the state prepared-but-not-committed in the engine
> @@ -153,10 +169,19 @@ SET DEBUG_SYNC= "now SIGNAL con10_cont";
> connection con10;
> reap;
> connection default;
> +
> +# We need to sync the test case with the background processing of the
> +# commit checkpoint, otherwise we get nondeterministic results.
> +SET @old_dbug= @@global.DEBUG_DBUG;
> +SET GLOBAL debug_dbug="+d,binlog_background_checkpoint_processed";
> +
> SET DEBUG_SYNC= "now SIGNAL con12_cont";
> connection con12;
> reap;
> connection default;
> +SET DEBUG_SYNC= "now WAIT_FOR binlog_background_checkpoint_processed";
> +SET GLOBAL debug_dbug= @old_dbug;
> +
> SET DEBUG_SYNC= "now SIGNAL con11_cont";
> connection con11;
> reap;
> @@ -210,7 +235,20 @@ RESET MASTER;
> # crash recovery fails due to the error insert used for previous test.
> INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
> INSERT INTO t1 VALUES (22, REPEAT("x", 4100));
> +# Wait for the master-bin.000003 binlog checkpoint to appear.
> +--let $wait_for_all= 0
> +--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000003"
> +--let $field= Info
> +--let $condition= = "master-bin.000003"
> +--source include/wait_show_condition.inc
> INSERT INTO t1 VALUES (23, REPEAT("x", 4100));
> +# Wait for the last (master-bin.000004) binlog checkpoint to appear.
> +--let $wait_for_all= 0
> +--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000004"
> +--let $field= Info
> +--let $condition= = "master-bin.000004"
> +--source include/wait_show_condition.inc
> +
> --write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
> wait-binlog_xa_recover.test
> EOF
>
> === modified file 'mysql-test/suite/perfschema/r/all_instances.result'
> --- a/mysql-test/suite/perfschema/r/all_instances.result 2012-06-22 09:46:28 +0000
> +++ b/mysql-test/suite/perfschema/r/all_instances.result 2012-09-14 12:44:53 +0000
> @@ -76,6 +76,7 @@ wait/synch/mutex/sql/Master_info::run_lo
> wait/synch/mutex/sql/Master_info::sleep_lock
> wait/synch/mutex/sql/MDL_map::mutex
> wait/synch/mutex/sql/MDL_wait::LOCK_wait_status
> +wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_thread
> wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index
> wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xid_list
> wait/synch/mutex/sql/MYSQL_RELAY_LOG::LOCK_index
> @@ -129,6 +130,8 @@ wait/synch/cond/sql/Master_info::sleep_c
> wait/synch/cond/sql/Master_info::start_cond
> wait/synch/cond/sql/Master_info::stop_cond
> wait/synch/cond/sql/MDL_context::COND_wait_status
> +wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_thread
> +wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_thread_end
> wait/synch/cond/sql/MYSQL_BIN_LOG::COND_queue_busy
> wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list
> wait/synch/cond/sql/MYSQL_BIN_LOG::update_cond
>
> === modified file 'mysql-test/suite/perfschema/r/relaylog.result'
> --- a/mysql-test/suite/perfschema/r/relaylog.result 2012-06-22 09:46:28 +0000
> +++ b/mysql-test/suite/perfschema/r/relaylog.result 2012-09-14 12:44:53 +0000
> @@ -56,8 +56,11 @@ where event_name like "%MYSQL_BIN_LOG%"
> and event_name not like "%MYSQL_BIN_LOG::update_cond"
> order by event_name;
> EVENT_NAME COUNT_STAR
> +wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_thread NONE
> +wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_thread_end NONE
> wait/synch/cond/sql/MYSQL_BIN_LOG::COND_queue_busy NONE
> wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list NONE
> +wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_thread MANY
> wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index MANY
> wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xid_list MANY
> "Expect no slave relay log"
> @@ -131,8 +134,11 @@ where event_name like "%MYSQL_BIN_LOG%"
> and event_name not like "%MYSQL_BIN_LOG::update_cond"
> order by event_name;
> EVENT_NAME COUNT_STAR
> +wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_thread MANY
> +wait/synch/cond/sql/MYSQL_BIN_LOG::COND_binlog_thread_end NONE
> wait/synch/cond/sql/MYSQL_BIN_LOG::COND_queue_busy NONE
> -wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list NONE
> +wait/synch/cond/sql/MYSQL_BIN_LOG::COND_xid_list MANY
> +wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_thread MANY
> wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index MANY
> wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_xid_list MANY
> "Expect a slave relay log"
>
> === modified file 'sql/debug_sync.cc'
> --- a/sql/debug_sync.cc 2012-03-28 17:26:00 +0000
> +++ b/sql/debug_sync.cc 2012-09-14 12:44:53 +0000
> @@ -984,6 +984,7 @@ static bool debug_sync_eval_action(THD *
> DBUG_ENTER("debug_sync_eval_action");
> DBUG_ASSERT(thd);
> DBUG_ASSERT(action_str);
> + DBUG_PRINT("debug_sync", ("action_str='%s'", action_str));
>
> /*
> Get debug sync point name. Or a special command.
>
> === modified file 'sql/log.cc'
> --- a/sql/log.cc 2012-09-13 12:31:29 +0000
> +++ b/sql/log.cc 2012-09-14 12:44:53 +0000
> @@ -53,6 +53,7 @@
> #include "rpl_handler.h"
> #include "debug_sync.h"
> #include "sql_show.h"
> +#include "my_pthread.h"
>
> /* max size of the log message */
> #define MAX_LOG_BUFFER_SIZE 1024
> @@ -106,6 +107,14 @@ static SHOW_VAR binlog_status_vars_detai
> {NullS, NullS, SHOW_LONG}
> };
>
> +/* Variables for the binlog background thread. */
> +static bool binlog_thread_started= false;
> +static bool binlog_background_thread_stop= false;
> +static MYSQL_BIN_LOG::xid_count_per_binlog *
> + binlog_background_thread_queue= NULL;
> +
> +static bool start_binlog_background_thread();
> +
>
> /**
> purge logs, master and slave sides both, related error code
> @@ -2957,12 +2966,27 @@ void MYSQL_BIN_LOG::cleanup()
> my_free(b);
> }
>
> + /* Wait for the binlog thread to stop. */
> + if (!is_relay_log && binlog_thread_started)
> + {
> + mysql_mutex_lock(&LOCK_binlog_thread);
> + binlog_background_thread_stop= true;
> + mysql_cond_signal(&COND_binlog_thread);
> + while (binlog_background_thread_stop)
> + mysql_cond_wait(&COND_binlog_thread_end, &LOCK_binlog_thread);
> + mysql_mutex_unlock(&LOCK_binlog_thread);
> + binlog_thread_started= false;
> + }
> +
> mysql_mutex_destroy(&LOCK_log);
> mysql_mutex_destroy(&LOCK_index);
> mysql_mutex_destroy(&LOCK_xid_list);
> + mysql_mutex_destroy(&LOCK_binlog_thread);
> mysql_cond_destroy(&update_cond);
> mysql_cond_destroy(&COND_queue_busy);
> mysql_cond_destroy(&COND_xid_list);
> + mysql_cond_destroy(&COND_binlog_thread);
> + mysql_cond_destroy(&COND_binlog_thread_end);
> }
> DBUG_VOID_RETURN;
> }
> @@ -2988,6 +3012,11 @@ void MYSQL_BIN_LOG::init_pthread_objects
> mysql_cond_init(m_key_update_cond, &update_cond, 0);
> mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
> mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
> +
> + mysql_mutex_init(key_BINLOG_LOCK_binlog_thread,
> + &LOCK_binlog_thread, MY_MUTEX_INIT_FAST);
> + mysql_cond_init(key_BINLOG_COND_binlog_thread, &COND_binlog_thread, 0);
> + mysql_cond_init(key_BINLOG_COND_binlog_thread_end, &COND_binlog_thread_end, 0);
> }
>
>
> @@ -3085,6 +3114,10 @@ bool MYSQL_BIN_LOG::open(const char *log
> DBUG_ENTER("MYSQL_BIN_LOG::open");
> DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
>
> + if (!is_relay_log && !binlog_thread_started &&
> + start_binlog_background_thread())
> + DBUG_RETURN(1);
> +
> if (init_and_set_log_file_name(log_name, new_name, log_type_arg,
> io_cache_type_arg))
> {
> @@ -5540,11 +5573,7 @@ bool general_log_write(THD *thd, enum en
> }
>
>
> -/*
> - I would like to make this function static, but this causes compiler warnings
> - when it is declared as friend function in log.h.
> -*/
> -void
> +static void
> binlog_checkpoint_callback(void *cookie)
> {
> MYSQL_BIN_LOG::xid_count_per_binlog *entry=
> @@ -8116,9 +8145,128 @@ int TC_LOG_BINLOG::unlog(ulong cookie, m
> void
> TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
> {
> - mark_xid_done(((xid_count_per_binlog *)cookie)->binlog_id, true);
> + xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
> + mysql_mutex_lock(&LOCK_binlog_thread);
> + entry->next_in_queue= binlog_background_thread_queue;
> + binlog_background_thread_queue= entry;
> + mysql_cond_signal(&COND_binlog_thread);
> + mysql_mutex_unlock(&LOCK_binlog_thread);
> }
>
> +/*
> + Binlog service thread.
> +
> + This thread is used to log binlog checkpoints in the background, rather than
> + in the context of random storage engine threads that happen to call
> + commit_checkpoint_notify_ha() and may not like the delays while syncing
> + binlog to disk or may not be setup with all my_thread_init() and other
> + necessary stuff.
> +
> + In the future, this thread could also be used to do log rotation in the
> + background, which could elimiate all stalls around binlog rotations.
> +*/
> +pthread_handler_t
> +binlog_background_thread(void *arg __attribute__((unused)))
> +{
> + bool stop;
> + MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
> + THD *thd;
> +
> + my_thread_init();
> + thd= new THD;
> + thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
> + my_pthread_setspecific_ptr(THR_THD, thd);
> + mysql_mutex_lock(&LOCK_thread_count);
> + thd->thread_id= thread_id++;
> + mysql_mutex_unlock(&LOCK_thread_count);
> +
> + for (;;)
> + {
> + /*
> + Wait until there is something in the queue to process, or we are asked
> + to shut down.
> + */
> + thd_proc_info(thd, "Waiting for background binlog tasks");
> + mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_thread);
> + for (;;)
> + {
> + stop= binlog_background_thread_stop;
> + queue= binlog_background_thread_queue;
> + if (stop || queue)
> + break;
> + mysql_cond_wait(&mysql_bin_log.COND_binlog_thread,
> + &mysql_bin_log.LOCK_binlog_thread);
> + }
> + /* Grab the queue, if any. */
> + binlog_background_thread_queue= NULL;
> + mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_thread);
> +
> + /* Process any incoming commit_checkpoint_notify() calls. */
> + while (queue)
> + {
> + thd_proc_info(thd, "Processing binlog checkpoint notification");
> + /* Grab next pointer first, as mark_xid_done() may free the element. */
> + next= queue->next_in_queue;
> + mysql_bin_log.mark_xid_done(queue->binlog_id, true);
> + queue= next;
> +
> + DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
> + DBUG_ASSERT(!debug_sync_set_action(
> + thd,
> + STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
> + );
> + }
> +
> + if (stop)
> + break;
> + }
> +
> + thd_proc_info(thd, "Stopping binlog background thread");
> +
> + mysql_mutex_lock(&LOCK_thread_count);
> + delete thd;
> + mysql_mutex_unlock(&LOCK_thread_count);
> +
> + my_thread_end();
> +
> + /* Signal that we are (almost) stopped. */
> + mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_thread);
> + binlog_background_thread_stop= false;
> + mysql_cond_signal(&mysql_bin_log.COND_binlog_thread_end);
> + mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_thread);
> +
> + return 0;
> +}
> +
> +#ifdef HAVE_PSI_INTERFACE
> +static PSI_thread_key key_thread_binlog;
> +
> +static PSI_thread_info all_binlog_threads[]=
> +{
> + { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
> +};
> +#endif /* HAVE_PSI_INTERFACE */
> +
> +static bool
> +start_binlog_background_thread()
> +{
> + pthread_t th;
> +
> +#ifdef HAVE_PSI_INTERFACE
> + if (PSI_server)
> + PSI_server->register_thread("sql", all_binlog_threads,
> + array_elements(all_binlog_threads));
> +#endif
> +
> + if (mysql_thread_create(key_thread_binlog, &th, NULL,
> + binlog_background_thread, NULL))
> + return 1;
> +
> + binlog_thread_started= true;
> + return 0;
> +}
> +
> +
> int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
> IO_CACHE *first_log,
> Format_description_log_event *fdle)
>
> === modified file 'sql/log.h'
> --- a/sql/log.h 2012-09-13 12:31:29 +0000
> +++ b/sql/log.h 2012-09-14 12:44:53 +0000
> @@ -395,8 +395,6 @@ class MYSQL_QUERY_LOG: public MYSQL_LOG
> #define BINLOG_COOKIE_IS_DUMMY(c) \
> ( ((ulong)(c)>>1) == BINLOG_COOKIE_DUMMY_ID )
>
> -void binlog_checkpoint_callback(void *cookie);
> -
> class binlog_cache_mngr;
> class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
> {
> @@ -451,27 +449,6 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
> };
>
> /*
> - A list of struct xid_count_per_binlog is used to keep track of how many
> - XIDs are in prepared, but not committed, state in each binlog. And how
> - many commit_checkpoint_request()'s are pending.
> -
> - When count drops to zero in a binlog after rotation, it means that there
> - are no more XIDs in prepared state, so that binlog is no longer needed
> - for XA crash recovery, and we can log a new binlog checkpoint event.
> -
> - The list is protected against simultaneous access from multiple
> - threads by LOCK_xid_list.
> - */
> - struct xid_count_per_binlog : public ilink {
> - char *binlog_name;
> - uint binlog_name_len;
> - ulong binlog_id;
> - /* Total prepared XIDs and pending checkpoint requests in this binlog. */
> - long xid_count;
> - xid_count_per_binlog(); /* Give link error if constructor used. */
> - };
> - I_List<xid_count_per_binlog> binlog_xid_count_list;
> - /*
> When this is set, a RESET MASTER is in progress.
>
> Then we should not write any binlog checkpoints into the binlog (that
> @@ -480,7 +457,6 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
> checkpoint arrives - when all have arrived, RESET MASTER will complete.
> */
> bool reset_master_pending;
> - friend void binlog_checkpoint_callback(void *cookie);
>
> /* LOCK_log and LOCK_index are inited by init_pthread_objects() */
> mysql_mutex_t LOCK_index;
> @@ -553,10 +529,35 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
> int write_transaction_or_stmt(group_commit_entry *entry);
> bool write_transaction_to_binlog_events(group_commit_entry *entry);
> void trx_group_commit_leader(group_commit_entry *leader);
> - void mark_xid_done(ulong cookie, bool write_checkpoint);
> - void mark_xids_active(ulong cookie, uint xid_count);
>
> public:
> + /*
> + A list of struct xid_count_per_binlog is used to keep track of how many
> + XIDs are in prepared, but not committed, state in each binlog. And how
> + many commit_checkpoint_request()'s are pending.
> +
> + When count drops to zero in a binlog after rotation, it means that there
> + are no more XIDs in prepared state, so that binlog is no longer needed
> + for XA crash recovery, and we can log a new binlog checkpoint event.
> +
> + The list is protected against simultaneous access from multiple
> + threads by LOCK_xid_list.
> + */
> + struct xid_count_per_binlog : public ilink {
> + char *binlog_name;
> + uint binlog_name_len;
> + ulong binlog_id;
> + /* Total prepared XIDs and pending checkpoint requests in this binlog. */
> + long xid_count;
> + /* For linking in requests to the binlog background thread. */
> + xid_count_per_binlog *next_in_queue;
> + xid_count_per_binlog(); /* Give link error if constructor used. */
> + };
> + I_List<xid_count_per_binlog> binlog_xid_count_list;
> + mysql_mutex_t LOCK_binlog_thread;
> + mysql_cond_t COND_binlog_thread;
> + mysql_cond_t COND_binlog_thread_end;
> +
> using MYSQL_LOG::generate_name;
> using MYSQL_LOG::is_open;
>
> @@ -712,6 +713,8 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
> bool appendv(const char* buf,uint len,...);
> bool append(Log_event* ev);
>
> + void mark_xids_active(ulong cookie, uint xid_count);
> + void mark_xid_done(ulong cookie, bool write_checkpoint);
> void make_log_name(char* buf, const char* log_ident);
> bool is_active(const char* log_file_name);
> bool can_purge_log(const char *log_file_name);
>
> === modified file 'sql/mysqld.cc'
> --- a/sql/mysqld.cc 2012-09-13 12:31:29 +0000
> +++ b/sql/mysqld.cc 2012-09-14 12:44:53 +0000
> @@ -724,6 +724,7 @@ PSI_mutex_key key_LOCK_des_key_file;
> #endif /* HAVE_OPENSSL */
>
> PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
> + key_BINLOG_LOCK_binlog_thread,
> key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
> key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
> key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
> @@ -766,6 +767,7 @@ static PSI_mutex_info all_server_mutexes
>
> { &key_BINLOG_LOCK_index, "MYSQL_BIN_LOG::LOCK_index", 0},
> { &key_BINLOG_LOCK_xid_list, "MYSQL_BIN_LOG::LOCK_xid_list", 0},
> + { &key_BINLOG_LOCK_binlog_thread, "MYSQL_BIN_LOG::LOCK_binlog_thread", 0},
> { &key_RELAYLOG_LOCK_index, "MYSQL_RELAY_LOG::LOCK_index", 0},
> { &key_delayed_insert_mutex, "Delayed_insert::mutex", 0},
> { &key_hash_filo_lock, "hash_filo::lock", 0},
> @@ -834,6 +836,7 @@ PSI_cond_key key_PAGE_cond, key_COND_act
> #endif /* HAVE_MMAP */
>
> PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond,
> + key_BINLOG_COND_binlog_thread, key_BINLOG_COND_binlog_thread_end,
> key_COND_cache_status_changed, key_COND_manager,
> key_COND_rpl_status, key_COND_server_started,
> key_delayed_insert_cond, key_delayed_insert_cond_client,
> @@ -863,6 +866,8 @@ static PSI_cond_info all_server_conds[]=
> #endif /* HAVE_MMAP */
> { &key_BINLOG_COND_xid_list, "MYSQL_BIN_LOG::COND_xid_list", 0},
> { &key_BINLOG_update_cond, "MYSQL_BIN_LOG::update_cond", 0},
> + { &key_BINLOG_COND_binlog_thread, "MYSQL_BIN_LOG::COND_binlog_thread", 0},
> + { &key_BINLOG_COND_binlog_thread_end, "MYSQL_BIN_LOG::COND_binlog_thread_end", 0},
> { &key_BINLOG_COND_queue_busy, "MYSQL_BIN_LOG::COND_queue_busy", 0},
> { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0},
> { &key_RELAYLOG_COND_queue_busy, "MYSQL_RELAY_LOG::COND_queue_busy", 0},
>
> === modified file 'sql/mysqld.h'
> --- a/sql/mysqld.h 2012-09-13 12:31:29 +0000
> +++ b/sql/mysqld.h 2012-09-14 12:44:53 +0000
> @@ -226,6 +226,7 @@ extern PSI_mutex_key key_LOCK_des_key_fi
> #endif
>
> extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
> + key_BINLOG_LOCK_binlog_thread,
> key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
> key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
> key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
> @@ -257,6 +258,7 @@ extern PSI_cond_key key_PAGE_cond, key_C
> #endif /* HAVE_MMAP */
>
> extern PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond,
> + key_BINLOG_COND_binlog_thread, key_BINLOG_COND_binlog_thread_end,
> key_COND_cache_status_changed, key_COND_manager,
> key_COND_rpl_status, key_COND_server_started,
> key_delayed_insert_cond, key_delayed_insert_cond_client,
>
> === modified file 'sql/rpl_rli.cc'
> --- a/sql/rpl_rli.cc 2012-09-13 12:31:29 +0000
> +++ b/sql/rpl_rli.cc 2012-09-14 12:44:53 +0000
> @@ -58,6 +58,7 @@ Relay_log_info::Relay_log_info(bool is_s
> {
> DBUG_ENTER("Relay_log_info::Relay_log_info");
>
> + relay_log.is_relay_log= TRUE;
> #ifdef HAVE_PSI_INTERFACE
> relay_log.set_psi_keys(key_RELAYLOG_LOCK_index,
> key_RELAYLOG_update_cond,
> @@ -206,8 +207,6 @@ a file name for --relay-log-index option
> name_warning_sent= 1;
> }
>
> - rli->relay_log.is_relay_log= TRUE;
> -
> /*
> note, that if open() fails, we'll still have index file open
> but a destructor will take care of that
>
> === modified file 'sql/sql_class.h'
> --- a/sql/sql_class.h 2012-09-13 12:31:29 +0000
> +++ b/sql/sql_class.h 2012-09-14 12:44:53 +0000
> @@ -1244,7 +1244,8 @@ enum enum_thread_type
> SYSTEM_THREAD_SLAVE_SQL= 4,
> SYSTEM_THREAD_NDBCLUSTER_BINLOG= 8,
> SYSTEM_THREAD_EVENT_SCHEDULER= 16,
> - SYSTEM_THREAD_EVENT_WORKER= 32
> + SYSTEM_THREAD_EVENT_WORKER= 32,
> + SYSTEM_THREAD_BINLOG_BACKGROUND= 64
> };
>
> inline char const *
>
> === modified file 'storage/innobase/handler/ha_innodb.cc'
> --- a/storage/innobase/handler/ha_innodb.cc 2012-09-13 12:31:29 +0000
> +++ b/storage/innobase/handler/ha_innodb.cc 2012-09-14 12:44:53 +0000
> @@ -106,6 +106,7 @@ static ulong commit_threads = 0;
> static mysql_mutex_t commit_threads_m;
> static mysql_cond_t commit_cond;
> static mysql_mutex_t commit_cond_m;
> +static mysql_mutex_t pending_checkpoint_mutex;
> static bool innodb_inited = 0;
>
> #define INSIDE_HA_INNOBASE_CC
> @@ -222,11 +223,13 @@ static mysql_pfs_key_t innobase_share_mu
> static mysql_pfs_key_t commit_threads_m_key;
> static mysql_pfs_key_t commit_cond_mutex_key;
> static mysql_pfs_key_t commit_cond_key;
> +static mysql_pfs_key_t pending_checkpoint_mutex_key;
>
> static PSI_mutex_info all_pthread_mutexes[] = {
> {&commit_threads_m_key, "commit_threads_m", 0},
> {&commit_cond_mutex_key, "commit_cond_mutex", 0},
> - {&innobase_share_mutex_key, "innobase_share_mutex", 0}
> + {&innobase_share_mutex_key, "innobase_share_mutex", 0},
> + {&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0}
> };
>
> static PSI_cond_info all_innodb_conds[] = {
> @@ -2592,6 +2595,9 @@ innobase_init(
> mysql_mutex_init(commit_cond_mutex_key,
> &commit_cond_m, MY_MUTEX_INIT_FAST);
> mysql_cond_init(commit_cond_key, &commit_cond, NULL);
> + mysql_mutex_init(pending_checkpoint_mutex_key,
> + &pending_checkpoint_mutex,
> + MY_MUTEX_INIT_FAST);
> innodb_inited= 1;
> #ifdef MYSQL_DYNAMIC_PLUGIN
> if (innobase_hton != p) {
> @@ -2639,6 +2645,7 @@ innobase_end(
> mysql_mutex_destroy(&commit_threads_m);
> mysql_mutex_destroy(&commit_cond_m);
> mysql_cond_destroy(&commit_cond);
> + mysql_mutex_destroy(&pending_checkpoint_mutex);
> }
>
> DBUG_RETURN(err);
> @@ -3008,6 +3015,16 @@ innobase_rollback_trx(
> DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
> }
>
> +
> +struct pending_checkpoint {
> + struct pending_checkpoint *next;
> + handlerton *hton;
> + void *cookie;
> + ib_uint64_t lsn;
> +};
> +static struct pending_checkpoint *pending_checkpoint_list;
> +static struct pending_checkpoint *pending_checkpoint_list_end;
> +
> /*****************************************************************//**
> Handle a commit checkpoint request from server layer.
> We simply flush the redo log immediately and do the notify call.*/
> @@ -3017,8 +3034,113 @@ innobase_checkpoint_request(
> handlerton *hton,
> void *cookie)
> {
> - log_buffer_flush_to_disk();
> - commit_checkpoint_notify_ha(hton, cookie);
> + ib_uint64_t lsn;
> + ib_uint64_t flush_lsn;
> + struct pending_checkpoint * entry;
> +
> + /* Do the allocation outside of lock to reduce contention. The normal
> + case is that not everything is flushed, so we will need to enqueue. */
> + entry = static_cast<struct pending_checkpoint *>
> + (my_malloc(sizeof(*entry), MYF(MY_WME)));
> + if (!entry) {
> + sql_print_error("Failed to allocate %u bytes."
> + " Commit checkpoint will be skipped.",
> + static_cast<unsigned>(sizeof(*entry)));
> + return;
> + }
> +
> + entry->next = NULL;
> + entry->hton = hton;
> + entry->cookie = cookie;
> +
> + mysql_mutex_lock(&pending_checkpoint_mutex);
> + lsn = log_get_lsn();
> + flush_lsn = log_get_flush_lsn();
> + if (lsn > flush_lsn) {
> + /* Put the request in queue.
> + When the log gets flushed past the lsn, we will remove the
> + entry from the queue and notify the upper layer. */
> + entry->lsn = lsn;
> + if (pending_checkpoint_list_end) {
> + pending_checkpoint_list_end->next = entry;
> + } else {
> + pending_checkpoint_list = entry;
> + }
> + pending_checkpoint_list_end = entry;
> + entry = NULL;
> + }
> + mysql_mutex_unlock(&pending_checkpoint_mutex);
> +
> + if (entry) {
> + /* We are already flushed. Notify the checkpoint immediately. */
> + commit_checkpoint_notify_ha(entry->hton, entry->cookie);
> + my_free(entry);
> + }
> +}
> +
> +/*****************************************************************//**
> +Log code calls this whenever log has been written and/or flushed up
> +to a new position. We use this to notify upper layer of a new commit
> +checkpoint when necessary.*/
> +extern "C" UNIV_INTERN
> +void
> +innobase_mysql_log_notify(
> +/*===============*/
> + ib_uint64_t write_lsn, /*!< in: LSN written to log file */
> + ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
> +{
> + struct pending_checkpoint * pending;
> + struct pending_checkpoint * entry;
> + struct pending_checkpoint * last_ready;
> +
> + /* It is safe to do a quick check for NULL first without lock.
> + Even if we should race, we will at most skip one checkpoint and
> + take the next one, which is harmless. */
> + if (!pending_checkpoint_list)
> + return;
> +
> + mysql_mutex_lock(&pending_checkpoint_mutex);
> + pending = pending_checkpoint_list;
> + if (!pending)
> + {
> + mysql_mutex_unlock(&pending_checkpoint_mutex);
> + return;
> + }
> +
> + last_ready = NULL;
> + for (entry = pending; entry != NULL; entry = entry -> next)
> + {
> + if (entry->lsn > flush_lsn)
> + break;
> + last_ready = entry;
> + }
> +
> + if (last_ready)
> + {
> + /* We found some pending checkpoints that are now flushed to
> + disk. So remove them from the list. */
> + pending_checkpoint_list = entry;
> + if (!entry)
> + pending_checkpoint_list_end = NULL;
> + }
> +
> + mysql_mutex_unlock(&pending_checkpoint_mutex);
> +
> + if (!last_ready)
> + return;
> +
> + /* Now that we have released the lock, notify upper layer about all
> + commit checkpoints that have now completed. */
> + for (;;) {
> + entry = pending;
> + pending = pending->next;
> +
> + commit_checkpoint_notify_ha(entry->hton, entry->cookie);
> +
> + my_free(entry);
> + if (entry == last_ready)
> + break;
> + }
> }
>
> /*****************************************************************//**
>
> === modified file 'storage/innobase/include/ha_prototypes.h'
> --- a/storage/innobase/include/ha_prototypes.h 2011-04-26 17:55:52 +0000
> +++ b/storage/innobase/include/ha_prototypes.h 2012-09-14 12:44:53 +0000
> @@ -136,6 +136,17 @@ innobase_mysql_print_thd(
> uint max_query_len); /*!< in: max query length to print, or 0 to
> use the default max length */
>
> +/*****************************************************************//**
> +Log code calls this whenever log has been written and/or flushed up
> +to a new position. We use this to notify upper layer of a new commit
> +checkpoint when necessary.*/
> +UNIV_INTERN
> +void
> +innobase_mysql_log_notify(
> +/*===============*/
> + ib_uint64_t write_lsn, /*!< in: LSN written to log file */
> + ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
> +
> /**************************************************************//**
> Converts a MySQL type to an InnoDB type. Note that this function returns
> the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
>
> === modified file 'storage/innobase/include/log0log.h'
> --- a/storage/innobase/include/log0log.h 2012-06-07 13:44:26 +0000
> +++ b/storage/innobase/include/log0log.h 2012-09-14 12:44:53 +0000
> @@ -151,6 +151,13 @@ UNIV_INLINE
> ib_uint64_t
> log_get_lsn(void);
> /*=============*/
> +/************************************************************//**
> +Gets the last lsn that is fully flushed to disk.
> +@return last flushed lsn */
> +UNIV_INLINE
> +ib_uint64_t
> +log_get_flush_lsn(void);
> +/*=============*/
> /****************************************************************
> Gets the log group capacity. It is OK to read the value without
> holding log_sys->mutex because it is constant.
>
> === modified file 'storage/innobase/include/log0log.ic'
> --- a/storage/innobase/include/log0log.ic 2011-04-05 07:18:43 +0000
> +++ b/storage/innobase/include/log0log.ic 2012-09-14 12:44:53 +0000
> @@ -411,6 +411,25 @@ log_get_lsn(void)
> return(lsn);
> }
>
> +/************************************************************//**
> +Gets the last lsn that is fully flushed to disk.
> +@return last flushed lsn */
> +UNIV_INLINE
> +ib_uint64_t
> +log_get_flush_lsn(void)
> +/*=============*/
> +{
> + ib_uint64_t lsn;
> +
> + mutex_enter(&(log_sys->mutex));
> +
> + lsn = log_sys->flushed_to_disk_lsn;
> +
> + mutex_exit(&(log_sys->mutex));
> +
> + return(lsn);
> +}
> +
> /****************************************************************
> Gets the log group capacity. It is OK to read the value without
> holding log_sys->mutex because it is constant.
>
> === modified file 'storage/innobase/log/log0log.c'
> --- a/storage/innobase/log/log0log.c 2012-03-21 03:48:12 +0000
> +++ b/storage/innobase/log/log0log.c 2012-09-14 12:44:53 +0000
> @@ -1353,6 +1353,8 @@ log_write_up_to(
> ulint loop_count = 0;
> #endif /* UNIV_DEBUG */
> ulint unlock;
> + ib_uint64_t write_lsn;
> + ib_uint64_t flush_lsn;
>
> if (recv_no_ibuf_operations) {
> /* Recovery is running and no operations on the log files are
> @@ -1530,8 +1532,13 @@ log_write_up_to(
>
> log_flush_do_unlocks(unlock);
>
> + write_lsn = log_sys->write_lsn;
> + flush_lsn = log_sys->flushed_to_disk_lsn;
> +
> mutex_exit(&(log_sys->mutex));
>
> + innobase_mysql_log_notify(write_lsn, flush_lsn);
> +
> return;
>
> do_waits:
>
> === modified file 'storage/xtradb/handler/ha_innodb.cc'
> --- a/storage/xtradb/handler/ha_innodb.cc 2012-09-13 12:31:29 +0000
> +++ b/storage/xtradb/handler/ha_innodb.cc 2012-09-14 12:44:53 +0000
> @@ -120,6 +120,7 @@ static ulong commit_threads = 0;
> static mysql_mutex_t commit_threads_m;
> static mysql_cond_t commit_cond;
> static mysql_mutex_t commit_cond_m;
> +static mysql_mutex_t pending_checkpoint_mutex;
> static bool innodb_inited = 0;
>
>
> @@ -253,11 +254,13 @@ static mysql_pfs_key_t innobase_share_mu
> static mysql_pfs_key_t commit_threads_m_key;
> static mysql_pfs_key_t commit_cond_mutex_key;
> static mysql_pfs_key_t commit_cond_key;
> +static mysql_pfs_key_t pending_checkpoint_mutex_key;
>
> static PSI_mutex_info all_pthread_mutexes[] = {
> {&commit_threads_m_key, "commit_threads_m", 0},
> {&commit_cond_mutex_key, "commit_cond_mutex", 0},
> - {&innobase_share_mutex_key, "innobase_share_mutex", 0}
> + {&innobase_share_mutex_key, "innobase_share_mutex", 0},
> + {&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0}
> };
>
> static PSI_cond_info all_innodb_conds[] = {
> @@ -3060,6 +3063,9 @@ innobase_init(
> mysql_mutex_init(commit_cond_mutex_key,
> &commit_cond_m, MY_MUTEX_INIT_FAST);
> mysql_cond_init(commit_cond_key, &commit_cond, NULL);
> + mysql_mutex_init(pending_checkpoint_mutex_key,
> + &pending_checkpoint_mutex,
> + MY_MUTEX_INIT_FAST);
> innodb_inited= 1;
> #ifdef MYSQL_DYNAMIC_PLUGIN
> if (innobase_hton != p) {
> @@ -3107,6 +3113,7 @@ innobase_end(
> mysql_mutex_destroy(&commit_threads_m);
> mysql_mutex_destroy(&commit_cond_m);
> mysql_cond_destroy(&commit_cond);
> + mysql_mutex_destroy(&pending_checkpoint_mutex);
> }
>
> DBUG_RETURN(err);
> @@ -3500,6 +3507,16 @@ innobase_rollback_trx(
> DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
> }
>
> +
> +struct pending_checkpoint {
> + struct pending_checkpoint *next;
> + handlerton *hton;
> + void *cookie;
> + ib_uint64_t lsn;
> +};
> +static struct pending_checkpoint *pending_checkpoint_list;
> +static struct pending_checkpoint *pending_checkpoint_list_end;
> +
> /*****************************************************************//**
> Handle a commit checkpoint request from server layer.
> We simply flush the redo log immediately and do the notify call.*/
> @@ -3509,8 +3526,113 @@ innobase_checkpoint_request(
> handlerton *hton,
> void *cookie)
> {
> - log_buffer_flush_to_disk();
> - commit_checkpoint_notify_ha(hton, cookie);
> + ib_uint64_t lsn;
> + ib_uint64_t flush_lsn;
> + struct pending_checkpoint * entry;
> +
> + /* Do the allocation outside of lock to reduce contention. The normal
> + case is that not everything is flushed, so we will need to enqueue. */
> + entry = static_cast<struct pending_checkpoint *>
> + (my_malloc(sizeof(*entry), MYF(MY_WME)));
> + if (!entry) {
> + sql_print_error("Failed to allocate %u bytes."
> + " Commit checkpoint will be skipped.",
> + static_cast<unsigned>(sizeof(*entry)));
> + return;
> + }
> +
> + entry->next = NULL;
> + entry->hton = hton;
> + entry->cookie = cookie;
> +
> + mysql_mutex_lock(&pending_checkpoint_mutex);
> + lsn = log_get_lsn();
> + flush_lsn = log_get_flush_lsn();
> + if (lsn > flush_lsn) {
> + /* Put the request in queue.
> + When the log gets flushed past the lsn, we will remove the
> + entry from the queue and notify the upper layer. */
> + entry->lsn = lsn;
> + if (pending_checkpoint_list_end) {
> + pending_checkpoint_list_end->next = entry;
> + } else {
> + pending_checkpoint_list = entry;
> + }
> + pending_checkpoint_list_end = entry;
> + entry = NULL;
> + }
> + mysql_mutex_unlock(&pending_checkpoint_mutex);
> +
> + if (entry) {
> + /* We are already flushed. Notify the checkpoint immediately. */
> + commit_checkpoint_notify_ha(entry->hton, entry->cookie);
> + my_free(entry);
> + }
> +}
> +
> +/*****************************************************************//**
> +Log code calls this whenever log has been written and/or flushed up
> +to a new position. We use this to notify upper layer of a new commit
> +checkpoint when necessary.*/
> +extern "C" UNIV_INTERN
> +void
> +innobase_mysql_log_notify(
> +/*===============*/
> + ib_uint64_t write_lsn, /*!< in: LSN written to log file */
> + ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
> +{
> + struct pending_checkpoint * pending;
> + struct pending_checkpoint * entry;
> + struct pending_checkpoint * last_ready;
> +
> + /* It is safe to do a quick check for NULL first without lock.
> + Even if we should race, we will at most skip one checkpoint and
> + take the next one, which is harmless. */
> + if (!pending_checkpoint_list)
> + return;
> +
> + mysql_mutex_lock(&pending_checkpoint_mutex);
> + pending = pending_checkpoint_list;
> + if (!pending)
> + {
> + mysql_mutex_unlock(&pending_checkpoint_mutex);
> + return;
> + }
> +
> + last_ready = NULL;
> + for (entry = pending; entry != NULL; entry = entry -> next)
> + {
> + if (entry->lsn > flush_lsn)
> + break;
> + last_ready = entry;
> + }
> +
> + if (last_ready)
> + {
> + /* We found some pending checkpoints that are now flushed to
> + disk. So remove them from the list. */
> + pending_checkpoint_list = entry;
> + if (!entry)
> + pending_checkpoint_list_end = NULL;
> + }
> +
> + mysql_mutex_unlock(&pending_checkpoint_mutex);
> +
> + if (!last_ready)
> + return;
> +
> + /* Now that we have released the lock, notify upper layer about all
> + commit checkpoints that have now completed. */
> + for (;;) {
> + entry = pending;
> + pending = pending->next;
> +
> + commit_checkpoint_notify_ha(entry->hton, entry->cookie);
> +
> + my_free(entry);
> + if (entry == last_ready)
> + break;
> + }
> }
>
> /*****************************************************************//**
>
> === modified file 'storage/xtradb/include/ha_prototypes.h'
> --- a/storage/xtradb/include/ha_prototypes.h 2012-02-21 19:51:56 +0000
> +++ b/storage/xtradb/include/ha_prototypes.h 2012-09-14 12:44:53 +0000
> @@ -136,6 +136,17 @@ innobase_mysql_print_thd(
> uint max_query_len); /*!< in: max query length to print, or 0 to
> use the default max length */
>
> +/*****************************************************************//**
> +Log code calls this whenever log has been written and/or flushed up
> +to a new position. We use this to notify upper layer of a new commit
> +checkpoint when necessary.*/
> +UNIV_INTERN
> +void
> +innobase_mysql_log_notify(
> +/*===============*/
> + ib_uint64_t write_lsn, /*!< in: LSN written to log file */
> + ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
> +
> /**************************************************************//**
> Converts a MySQL type to an InnoDB type. Note that this function returns
> the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
>
> === modified file 'storage/xtradb/include/log0log.h'
> --- a/storage/xtradb/include/log0log.h 2012-08-27 16:13:17 +0000
> +++ b/storage/xtradb/include/log0log.h 2012-09-14 12:44:53 +0000
> @@ -151,6 +151,13 @@ UNIV_INLINE
> ib_uint64_t
> log_get_lsn(void);
> /*=============*/
> +/************************************************************//**
> +Gets the last lsn that is fully flushed to disk.
> +@return last flushed lsn */
> +UNIV_INLINE
> +ib_uint64_t
> +log_get_flush_lsn(void);
> +/*=============*/
> /****************************************************************
> Gets the log group capacity. It is OK to read the value without
> holding log_sys->mutex because it is constant.
>
> === modified file 'storage/xtradb/include/log0log.ic'
> --- a/storage/xtradb/include/log0log.ic 2011-07-14 19:22:41 +0000
> +++ b/storage/xtradb/include/log0log.ic 2012-09-14 12:44:53 +0000
> @@ -411,6 +411,25 @@ log_get_lsn(void)
> return(lsn);
> }
>
> +/************************************************************//**
> +Gets the last lsn that is fully flushed to disk.
> +@return last flushed lsn */
> +UNIV_INLINE
> +ib_uint64_t
> +log_get_flush_lsn(void)
> +/*=============*/
> +{
> + ib_uint64_t lsn;
> +
> + mutex_enter(&(log_sys->mutex));
> +
> + lsn = log_sys->flushed_to_disk_lsn;
> +
> + mutex_exit(&(log_sys->mutex));
> +
> + return(lsn);
> +}
> +
> /****************************************************************
> Gets the log group capacity. It is OK to read the value without
> holding log_sys->mutex because it is constant.
>
> === modified file 'storage/xtradb/log/log0log.c'
> --- a/storage/xtradb/log/log0log.c 2012-08-27 16:13:17 +0000
> +++ b/storage/xtradb/log/log0log.c 2012-09-14 12:44:53 +0000
> @@ -1390,6 +1390,8 @@ log_write_up_to(
> ulint loop_count = 0;
> #endif /* UNIV_DEBUG */
> ulint unlock;
> + ib_uint64_t write_lsn;
> + ib_uint64_t flush_lsn;
>
> if (recv_no_ibuf_operations) {
> /* Recovery is running and no operations on the log files are
> @@ -1568,8 +1570,13 @@ log_write_up_to(
>
> log_flush_do_unlocks(unlock);
>
> + write_lsn = log_sys->write_lsn;
> + flush_lsn = log_sys->flushed_to_disk_lsn;
> +
> mutex_exit(&(log_sys->mutex));
>
> + innobase_mysql_log_notify(write_lsn, flush_lsn);
> +
> return;
>
> do_waits:
>
> _______________________________________________
> commits mailing list
> commits@xxxxxxxxxxx
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
Follow ups