maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #11552
Re: [Commits] 8bfb140d5dc: Move deletion of old GTID rows to slave background thread
-
To:
Kristian Nielsen <knielsen@xxxxxxxxxxxxxxx>
-
From:
andrei.elkin@xxxxxxxxxx
-
Date:
Mon, 03 Dec 2018 20:07:53 +0200
-
Cc:
commits@xxxxxxxxxxx, maria-developers@xxxxxxxxxxxxxxxxxxx
-
In-reply-to:
<E1gQzKw-0002o5-4I@urd> (Kristian Nielsen's message of "Sun, 25 Nov 2018 19:40:54 +0100")
-
Organization:
Home sweet home
-
Razorgate-kas:
Status: not_detected
-
Razorgate-kas:
Rate: 0
-
Razorgate-kas:
Envelope from:
-
Razorgate-kas:
Version: 5.5.3
-
Razorgate-kas:
LuaCore: 80 2014-11-10_18-01-23 260f8afb9361da3c7edfd3a8e3a4ca908191ad29
-
Razorgate-kas:
Lua profiles 69136 [Nov 12 2014]
-
Razorgate-kas:
Method: none
-
User-agent:
Gnus/5.13 (Gnus v5.13) Emacs/26.0.50 (gnu/linux)
Kristian, hello.
The patch is great and instructive in many ways.
Thanks!
There is something to improve in the test organization, like
to base two tests of
> storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> storage/tokudb /mysql-test/tokudb_rpl /t/mdev12179.test
on a common parent.
I thought for a second to place it in mysql-test/include/
but again the parent file is so specific that I had to stop it.
This apparently can wait until a third engine shows up and require the
same coverage.
Cheers,
Andrei
> revision-id: 8bfb140d5dc247c183787b8a0a1799cf375845bd (mariadb-10.3.10-25-g8bfb140d5dc)
> parent(s): 74387028a06c557f36a0fd1bbde347f1551c8fb7
> author: Kristian Nielsen
> committer: Kristian Nielsen
> timestamp: 2018-11-25 19:38:33 +0100
> message:
>
> Move deletion of old GTID rows to slave background thread
>
> This patch changes how old rows in mysql.gtid_slave_pos* tables are deleted.
> Instead of doing it as part of every replicated transaction in
> record_gtid(), it is done periodically (every @@gtid_cleanup_batch_size
> transaction) in the slave background thread.
>
> This removes the deletion step from the replication process in SQL or worker
> threads, which could speed up replication with many small transactions. It
> also decreases contention on the global mutex LOCK_slave_state. And it
> simplifies the logic, eg. when a replicated transaction fails after having
> deleted old rows.
>
> With this patch, the deletion of old GTID rows happens asynchroneously and
> slightly non-deterministic. Thus the number of old rows in
> mysql.gtid_slave_pos can temporarily exceed @@gtid_cleanup_batch_size. But
> all old rows will be deleted eventually after sufficiently many new GTIDs
> have been replicated.
>
> ---
> mysql-test/main/mysqld--help.result | 10 +
> mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result | 40 +-
> mysql-test/suite/rpl/r/rpl_gtid_stop_start.result | 8 +-
> .../suite/rpl/r/rpl_parallel_optimistic.result | 14 +-
> mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test | 68 +++-
> .../suite/rpl/t/rpl_parallel_optimistic.test | 42 ++-
> .../sys_vars/r/sysvars_server_notembedded.result | 14 +
> sql/log_event.cc | 6 +-
> sql/mysqld.cc | 1 +
> sql/mysqld.h | 1 +
> sql/rpl_gtid.cc | 413 +++++++++++++--------
> sql/rpl_gtid.h | 12 +-
> sql/rpl_rli.cc | 87 +----
> sql/rpl_rli.h | 11 -
> sql/slave.cc | 35 +-
> sql/slave.h | 1 +
> sql/sys_vars.cc | 13 +
> .../mysql-test/rocksdb_rpl/r/mdev12179.result | 18 +
> .../mysql-test/rocksdb_rpl/t/mdev12179.test | 85 +++++
> .../mysql-test/tokudb_rpl/r/mdev12179.result | 18 +
> .../tokudb/mysql-test/tokudb_rpl/t/mdev12179.test | 85 +++++
> 21 files changed, 675 insertions(+), 307 deletions(-)
>
> diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
> index 5a7153f32d3..4f801ec5275 100644
> --- a/mysql-test/main/mysqld--help.result
> +++ b/mysql-test/main/mysqld--help.result
> @@ -294,6 +294,15 @@ The following specify which files/extra groups are read (specified before remain
> --group-concat-max-len=#
> The maximum length of the result of function
> GROUP_CONCAT()
> + --gtid-cleanup-batch-size=#
> + Normally does not need tuning. How many old rows must
> + accumulate in the mysql.gtid_slave_pos table before a
> + background job will be run to delete them. Can be
> + increased to reduce number of commits if using many
> + different engines with --gtid_pos_auto_engines, or to
> + reduce CPU overhead if using a huge number of different
> + gtid_domain_ids. Can be decreased to reduce number of old
> + rows in the table.
> --gtid-domain-id=# Used with global transaction ID to identify logically
> independent replication streams. When events can
> propagate through multiple parallel paths (for example
> @@ -1425,6 +1434,7 @@ gdb FALSE
> general-log FALSE
> getopt-prefix-matching FALSE
> group-concat-max-len 1048576
> +gtid-cleanup-batch-size 64
> gtid-domain-id 0
> gtid-ignore-duplicates FALSE
> gtid-pos-auto-engines
> diff --git a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
> index aaeb0c8f119..55d2831dcf4 100644
> --- a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
> +++ b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
> @@ -16,36 +16,32 @@ INSERT INTO t1 VALUES (1);
> connection slave;
> connection slave;
> include/stop_slave.inc
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 2;
> SET @old_dbug= @@GLOBAL.debug_dbug;
> SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
> SET sql_log_bin= 0;
> -CALL mtr.add_suppression("Can't find file");
> +CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
> SET sql_log_bin= 1;
> include/start_slave.inc
> connection master;
> -INSERT INTO t1 VALUES (2);
> -connection slave;
> -include/wait_for_slave_sql_error.inc [errno=1942]
> -STOP SLAVE IO_THREAD;
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> -ORDER BY domain_id, sub_id DESC LIMIT 1;
> -domain_id server_id seq_no
> -0 1 3
> +connection slave;
> +SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
> +FROM mysql.gtid_slave_pos;
> +SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
> +IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count))
> +OK
> SET GLOBAL debug_dbug= @old_dbug;
> -include/start_slave.inc
> connection master;
> -INSERT INTO t1 VALUES (3);
> -connection slave;
> -connection slave;
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> -ORDER BY domain_id, sub_id DESC LIMIT 1;
> -domain_id server_id seq_no
> -0 1 4
> -SELECT * FROM t1 ORDER BY i;
> -i
> -1
> -2
> -3
> +connection slave;
> +connection slave;
> +SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
> +FROM mysql.gtid_slave_pos
> +WHERE seq_no <= @pre_max_seq_no;
> +IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
> +OK
> connection master;
> DROP TABLE t1;
> +connection slave;
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
> include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
> index ff845794c22..b27ffed9f94 100644
> --- a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
> +++ b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
> @@ -171,7 +171,7 @@ include/start_slave.inc
> *** MDEV-4692: mysql.gtid_slave_pos accumulates values for a domain ***
> SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
> domain_id COUNT(*)
> -0 2
> +0 3
> 1 2
> connection server_1;
> INSERT INTO t1 VALUES (11);
> @@ -179,7 +179,7 @@ connection server_2;
> FLUSH NO_WRITE_TO_BINLOG TABLES;
> SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
> domain_id COUNT(*)
> -0 2
> +0 4
> 1 2
> include/start_slave.inc
> connection server_1;
> @@ -189,8 +189,8 @@ connection server_2;
> FLUSH NO_WRITE_TO_BINLOG TABLES;
> SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
> domain_id COUNT(*)
> -0 2
> -1 2
> +0 3
> +1 1
> *** MDEV-4650: show variables; ERROR 1946 (HY000): Failed to load replication slave GTID position ***
> connection server_2;
> SET sql_log_bin=0;
> diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
> index ca202a66b0e..83343e52cab 100644
> --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
> +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
> @@ -12,6 +12,8 @@ SET GLOBAL slave_parallel_threads=10;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
> SET GLOBAL slave_parallel_mode='optimistic';
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 1000000;
> connection server_1;
> INSERT INTO t1 VALUES(1,1);
> BEGIN;
> @@ -131,6 +133,11 @@ c
> 204
> 205
> 206
> +SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
> +FROM mysql.gtid_slave_pos;
> +IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
> +OK
> +SET GLOBAL gtid_cleanup_batch_size=1;
> *** Test @@skip_parallel_replication. ***
> connection server_2;
> include/stop_slave.inc
> @@ -651,9 +658,10 @@ DROP TABLE t1, t2, t3;
> include/save_master_gtid.inc
> connection server_2;
> include/sync_with_master_gtid.inc
> -Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
> -select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
> -count(4) <= 4
> +SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
> +FROM mysql.gtid_slave_pos;
> +COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
> 1
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
> connection server_1;
> include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
> index e1f5696f5a1..a28bff3d27a 100644
> --- a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
> +++ b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
> @@ -28,37 +28,79 @@ INSERT INTO t1 VALUES (1);
> # Inject an artificial error deleting entries, and check that the error handling code works.
> --connection slave
> --source include/stop_slave.inc
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 2;
> SET @old_dbug= @@GLOBAL.debug_dbug;
> SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
> SET sql_log_bin= 0;
> -CALL mtr.add_suppression("Can't find file");
> +CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
> SET sql_log_bin= 1;
> --source include/start_slave.inc
>
> --connection master
> -INSERT INTO t1 VALUES (2);
> +--disable_query_log
> +let $i = 20;
> +while ($i) {
> + eval INSERT INTO t1 VALUES ($i+10);
> + dec $i;
> +}
> +--enable_query_log
> +--save_master_pos
>
> --connection slave
> ---let $slave_sql_errno= 1942
> ---source include/wait_for_slave_sql_error.inc
> -STOP SLAVE IO_THREAD;
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> - ORDER BY domain_id, sub_id DESC LIMIT 1;
> +--sync_with_master
> +
> +# Now wait for the slave background thread to try to delete old rows and
> +# hit the error injection.
> +--let _TEST_MYSQLD_ERROR_LOG=$MYSQLTEST_VARDIR/log/mysqld.2.err
> +--perl
> + open F, '<', $ENV{'_TEST_MYSQLD_ERROR_LOG'} or die;
> + outer: while (1) {
> + inner: while (<F>) {
> + last outer if /<DEBUG> Error deleting old GTID row/;
> + }
> + # Easy way to do sub-second sleep without extra modules.
> + select(undef, undef, undef, 0.1);
> + }
> +EOF
> +
> +# Since we injected error in the cleanup code, the rows should remain in
> +# mysql.gtid_slave_pos. Check that we have at least 20 (more robust against
> +# non-deterministic cleanup and future changes than checking for exact number).
> +SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
> + FROM mysql.gtid_slave_pos;
> +SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
> SET GLOBAL debug_dbug= @old_dbug;
> ---source include/start_slave.inc
>
> --connection master
> -INSERT INTO t1 VALUES (3);
> +--disable_query_log
> +let $i = 20;
> +while ($i) {
> + eval INSERT INTO t1 VALUES ($i+40);
> + dec $i;
> +}
> +--enable_query_log
> --sync_slave_with_master
>
> --connection slave
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> - ORDER BY domain_id, sub_id DESC LIMIT 1;
> -SELECT * FROM t1 ORDER BY i;
> -
> +# Now check that 1) rows are being deleted again after removing error
> +# injection, and 2) old rows are left that failed their delete while errors
> +# where injected (again compensating for non-deterministic deletion).
> +# Deletion is async and slightly non-deterministic, so we wait for at
> +# least 10 of the 20 new rows to be deleted.
> +let $wait_condition=
> + SELECT COUNT(*) <= 20-10
> + FROM mysql.gtid_slave_pos
> + WHERE seq_no > @pre_max_seq_no;
> +--source include/wait_condition.inc
> +SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
> + FROM mysql.gtid_slave_pos
> + WHERE seq_no <= @pre_max_seq_no;
>
> # Clean up
> --connection master
> DROP TABLE t1;
> +--connection slave
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
>
> --source include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
> index e08472d5f51..0060cf4416c 100644
> --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
> +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
> @@ -21,6 +21,10 @@ SET GLOBAL slave_parallel_threads=10;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
> SET GLOBAL slave_parallel_mode='optimistic';
> +# Run the first part of the test with high batch size and see that
> +# old rows remain in the table.
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 1000000;
>
>
> --connection server_1
> @@ -108,7 +112,12 @@ SELECT * FROM t3 ORDER BY c;
> SELECT * FROM t1 ORDER BY a;
> SELECT * FROM t2 ORDER BY a;
> SELECT * FROM t3 ORDER BY c;
> -#SHOW STATUS LIKE 'Slave_retried_transactions';
> +# Check that we have a bunch of old rows left-over - they were not deleted
> +# due to high @@gtid_cleanup_batch_size. Then set a low
> +# @@gtid_cleanup_batch_size so we can test that rows start being deleted.
> +SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
> + FROM mysql.gtid_slave_pos;
> +SET GLOBAL gtid_cleanup_batch_size=1;
>
>
> --echo *** Test @@skip_parallel_replication. ***
> @@ -557,25 +566,18 @@ DROP TABLE t1, t2, t3;
>
> --connection server_2
> --source include/sync_with_master_gtid.inc
> -# Check for left-over rows in table mysql.gtid_slave_pos (MDEV-12147).
> -#
> -# There was a bug when a transaction got a conflict and was rolled back. It
> -# might have also handled deletion of some old rows, and these deletions would
> -# then also be rolled back. And since the deletes were never re-tried, old no
> -# longer needed rows would accumulate in the table without limit.
> -#
> -# The earlier part of this test file have plenty of transactions being rolled
> -# back. But the last DROP TABLE statement runs on its own and should never
> -# conflict, thus at this point the mysql.gtid_slave_pos table should be clean.
> -#
> -# To support @@gtid_pos_auto_engines, when a row is inserted in the table, it
> -# is associated with the engine of the table at insertion time, and it will
> -# only be deleted during record_gtid from a table of the same engine. Since we
> -# alter the table from MyISAM to InnoDB at the start of this test, we should
> -# end up with 4 rows: two left-over from when the table was MyISAM, and two
> -# left-over from the InnoDB part.
> ---echo Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
> -select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
> +# Check that old rows are deleted from mysql.gtid_slave_pos.
> +# Deletion is asynchronous, so use wait_condition.inc.
> +# Also, there is a small amount of non-determinism in the deletion of old
> +# rows, so it is not guaranteed that there can never be more than
> +# @@gtid_cleanup_batch_size rows in the table; so allow a bit of slack
> +# here.
> +let $wait_condition=
> + SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
> + FROM mysql.gtid_slave_pos;
> +--source include/wait_condition.inc
> +eval $wait_condition;
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
>
> --connection server_1
> --source include/rpl_end.inc
> diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> index e8e4d671eb9..5c5ca8b66b2 100644
> --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> @@ -1202,6 +1202,20 @@ NUMERIC_BLOCK_SIZE NULL
> ENUM_VALUE_LIST NULL
> READ_ONLY NO
> COMMAND_LINE_ARGUMENT NULL
> +VARIABLE_NAME GTID_CLEANUP_BATCH_SIZE
> +SESSION_VALUE NULL
> +GLOBAL_VALUE 64
> +GLOBAL_VALUE_ORIGIN COMPILE-TIME
> +DEFAULT_VALUE 64
> +VARIABLE_SCOPE GLOBAL
> +VARIABLE_TYPE INT UNSIGNED
> +VARIABLE_COMMENT Normally does not need tuning. How many old rows must accumulate in the mysql.gtid_slave_pos table before a background job will be run to delete them. Can be increased to reduce number of commits if using many different engines with --gtid_pos_auto_engines, or to reduce CPU overhead if using a huge number of different gtid_domain_ids. Can be decreased to reduce number of old rows in the table.
> +NUMERIC_MIN_VALUE 0
> +NUMERIC_MAX_VALUE 2147483647
> +NUMERIC_BLOCK_SIZE 1
> +ENUM_VALUE_LIST NULL
> +READ_ONLY NO
> +COMMAND_LINE_ARGUMENT REQUIRED
> VARIABLE_NAME GTID_CURRENT_POS
> SESSION_VALUE NULL
> GLOBAL_VALUE
> diff --git a/sql/log_event.cc b/sql/log_event.cc
> index 8813d20578e..e10480fb015 100644
> --- a/sql/log_event.cc
> +++ b/sql/log_event.cc
> @@ -5565,7 +5565,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
> gtid= rgi->current_gtid;
> if (unlikely(rpl_global_gtid_slave_state->record_gtid(thd, >id,
> sub_id,
> - rgi, false,
> + true, false,
> &hton)))
> {
> int errcode= thd->get_stmt_da()->sql_errno();
> @@ -8362,7 +8362,7 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi)
> {
> if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i],
> sub_id_list[i],
> - NULL, false, &hton)))
> + false, false, &hton)))
> return ret;
> rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i],
> hton, NULL);
> @@ -8899,7 +8899,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
> rgi->gtid_pending= false;
>
> gtid= rgi->current_gtid;
> - err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, rgi,
> + err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, true,
> false, &hton);
> if (unlikely(err))
> {
> diff --git a/sql/mysqld.cc b/sql/mysqld.cc
> index afef4a5f52c..07bdd66f74c 100644
> --- a/sql/mysqld.cc
> +++ b/sql/mysqld.cc
> @@ -580,6 +580,7 @@ ulong opt_binlog_commit_wait_count= 0;
> ulong opt_binlog_commit_wait_usec= 0;
> ulong opt_slave_parallel_max_queued= 131072;
> my_bool opt_gtid_ignore_duplicates= FALSE;
> +uint opt_gtid_cleanup_batch_size= 64;
>
> const double log_10[] = {
> 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
> diff --git a/sql/mysqld.h b/sql/mysqld.h
> index d5cabd790b2..261748372f9 100644
> --- a/sql/mysqld.h
> +++ b/sql/mysqld.h
> @@ -258,6 +258,7 @@ extern ulong opt_slave_parallel_mode;
> extern ulong opt_binlog_commit_wait_count;
> extern ulong opt_binlog_commit_wait_usec;
> extern my_bool opt_gtid_ignore_duplicates;
> +extern uint opt_gtid_cleanup_batch_size;
> extern ulong back_log;
> extern ulong executed_events;
> extern char language[FN_REFLEN];
> diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc
> index fabd09adaa7..196c2fe3d16 100644
> --- a/sql/rpl_gtid.cc
> +++ b/sql/rpl_gtid.cc
> @@ -79,7 +79,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi)
> rgi->gtid_pending= false;
> if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE)
> {
> - if (record_gtid(thd, &rgi->current_gtid, sub_id, NULL, false, &hton))
> + if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false, &hton))
> DBUG_RETURN(1);
> update_state_hash(sub_id, &rgi->current_gtid, hton, rgi);
> }
> @@ -244,7 +244,7 @@ rpl_slave_state_free_element(void *arg)
>
>
> rpl_slave_state::rpl_slave_state()
> - : last_sub_id(0), gtid_pos_tables(0), loaded(false)
> + : pending_gtid_count(0), last_sub_id(0), gtid_pos_tables(0), loaded(false)
> {
> mysql_mutex_init(key_LOCK_slave_state, &LOCK_slave_state,
> MY_MUTEX_INIT_SLOW);
> @@ -331,14 +331,11 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
> }
> }
> rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL;
> -
> -#ifdef HAVE_REPLICATION
> - rgi->pending_gtid_deletes_clear();
> -#endif
> }
>
> if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME))))
> return 1;
> + list_elem->domain_id= domain_id;
> list_elem->server_id= server_id;
> list_elem->sub_id= sub_id;
> list_elem->seq_no= seq_no;
> @@ -348,6 +345,15 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
> if (last_sub_id < sub_id)
> last_sub_id= sub_id;
>
> +#ifdef HAVE_REPLICATION
> + ++pending_gtid_count;
> + if (pending_gtid_count >= opt_gtid_cleanup_batch_size)
> + {
> + pending_gtid_count = 0;
> + slave_background_gtid_pending_delete_request();
> + }
> +#endif
> +
> return 0;
> }
>
> @@ -382,20 +388,22 @@ rpl_slave_state::get_element(uint32 domain_id)
>
>
> int
> -rpl_slave_state::put_back_list(uint32 domain_id, list_element *list)
> +rpl_slave_state::put_back_list(list_element *list)
> {
> - element *e;
> + element *e= NULL;
> int err= 0;
>
> mysql_mutex_lock(&LOCK_slave_state);
> - if (!(e= (element *)my_hash_search(&hash, (const uchar *)&domain_id, 0)))
> - {
> - err= 1;
> - goto end;
> - }
> while (list)
> {
> list_element *next= list->next;
> +
> + if ((!e || e->domain_id != list->domain_id) &&
> + !(e= (element *)my_hash_search(&hash, (const uchar *)&list->domain_id, 0)))
> + {
> + err= 1;
> + goto end;
> + }
> e->add(list);
> list= next;
> }
> @@ -572,12 +580,12 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename)
> /*
> Write a gtid to the replication slave state table.
>
> + Do it as part of the transaction, to get slave crash safety, or as a separate
> + transaction if !in_transaction (eg. MyISAM or DDL).
> +
> gtid The global transaction id for this event group.
> sub_id Value allocated within the sub_id when the event group was
> read (sub_id must be consistent with commit order in master binlog).
> - rgi rpl_group_info context, if we are recording the gtid transactionally
> - as part of replicating a transactional event. NULL if called from
> - outside of a replicated transaction.
>
> Note that caller must later ensure that the new gtid and sub_id is inserted
> into the appropriate HASH element with rpl_slave_state.add(), so that it can
> @@ -585,16 +593,13 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename)
> */
> int
> rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> - rpl_group_info *rgi, bool in_statement,
> + bool in_transaction, bool in_statement,
> void **out_hton)
> {
> TABLE_LIST tlist;
> int err= 0, not_sql_thread;
> bool table_opened= false;
> TABLE *table;
> - list_element *delete_list= 0, *next, *cur, **next_ptr_ptr, **best_ptr_ptr;
> - uint64 best_sub_id;
> - element *elem;
> ulonglong thd_saved_option= thd->variables.option_bits;
> Query_tables_list lex_backup;
> wait_for_commit* suspended_wfc;
> @@ -684,7 +689,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> thd->wsrep_ignore_table= true;
> #endif
>
> - if (!rgi)
> + if (!in_transaction)
> {
> DBUG_PRINT("info", ("resetting OPTION_BEGIN"));
> thd->variables.option_bits&=
> @@ -716,168 +721,280 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> my_error(ER_OUT_OF_RESOURCES, MYF(0));
> goto end;
> }
> +end:
>
> - mysql_mutex_lock(&LOCK_slave_state);
> - if ((elem= get_element(gtid->domain_id)) == NULL)
> +#ifdef WITH_WSREP
> + thd->wsrep_ignore_table= false;
> +#endif
> +
> + if (table_opened)
> {
> - mysql_mutex_unlock(&LOCK_slave_state);
> - my_error(ER_OUT_OF_RESOURCES, MYF(0));
> - err= 1;
> - goto end;
> + if (err || (err= ha_commit_trans(thd, FALSE)))
> + ha_rollback_trans(thd, FALSE);
> +
> + close_thread_tables(thd);
> + if (in_transaction)
> + thd->mdl_context.release_statement_locks();
> + else
> + thd->mdl_context.release_transactional_locks();
> }
> + thd->lex->restore_backup_query_tables_list(&lex_backup);
> + thd->variables.option_bits= thd_saved_option;
> + thd->resume_subsequent_commits(suspended_wfc);
> + DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
> + {
> + if (gtid->server_id == 100)
> + my_sleep(500000);
> + });
> + DBUG_RETURN(err);
> +}
>
> - /* Now pull out all GTIDs that were recorded in this engine. */
> - delete_list = NULL;
> - next_ptr_ptr= &elem->list;
> - cur= elem->list;
> - best_sub_id= 0;
> - best_ptr_ptr= NULL;
> - while (cur)
> +
> +/*
> + Return a list of all old GTIDs in any mysql.gtid_slave_pos* table that are
> + no longer needed and can be deleted from the table.
> +
> + Within each domain, we need to keep around the latest GTID (the one with the
> + highest sub_id), but any others in that domain can be deleted.
> +*/
> +rpl_slave_state::list_element *
> +rpl_slave_state::gtid_grab_pending_delete_list()
> +{
> + uint32 i;
> + list_element *full_list;
> +
> + mysql_mutex_lock(&LOCK_slave_state);
> + full_list= NULL;
> + for (i= 0; i < hash.records; ++i)
> {
> - list_element *next= cur->next;
> - if (cur->hton == hton)
> - {
> - /* Belongs to same engine, so move it to the delete list. */
> - cur->next= delete_list;
> - delete_list= cur;
> - if (cur->sub_id > best_sub_id)
> + element *elem= (element *)my_hash_element(&hash, i);
> + list_element *elist= elem->list;
> + list_element *last_elem, **best_ptr_ptr, *cur, *next;
> + uint64 best_sub_id;
> +
> + if (!elist)
> + continue; /* Nothing here */
> +
> + /* Delete any old stuff, but keep around the most recent one. */
> + cur= elist;
> + best_sub_id= cur->sub_id;
> + best_ptr_ptr= &elist;
> + last_elem= cur;
> + while ((next= cur->next)) {
> + last_elem= next;
> + if (next->sub_id > best_sub_id)
> {
> - best_sub_id= cur->sub_id;
> - best_ptr_ptr= &delete_list;
> - }
> - else if (best_ptr_ptr == &delete_list)
> + best_sub_id= next->sub_id;
> best_ptr_ptr= &cur->next;
> - }
> - else
> - {
> - /* Another engine, leave it in the list. */
> - if (cur->sub_id > best_sub_id)
> - {
> - best_sub_id= cur->sub_id;
> - /* Current best is not on the delete list. */
> - best_ptr_ptr= NULL;
> }
> - *next_ptr_ptr= cur;
> - next_ptr_ptr= &cur->next;
> + cur= next;
> }
> - cur= next;
> - }
> - *next_ptr_ptr= NULL;
> - /*
> - If the highest sub_id element is on the delete list, put it back on the
> - original list, to preserve the highest sub_id element in the table for
> - GTID position recovery.
> - */
> - if (best_ptr_ptr)
> - {
> + /*
> + Append the new elements to the full list. Note the order is important;
> + we do it here so that we do not break the list if best_sub_id is the
> + last of the new elements.
> + */
> + last_elem->next= full_list;
> + /*
> + Delete the highest sub_id element from the old list, and put it back as
> + the single-element new list.
> + */
> cur= *best_ptr_ptr;
> *best_ptr_ptr= cur->next;
> - cur->next= elem->list;
> + cur->next= NULL;
> elem->list= cur;
> +
> + /*
> + Collect the full list so far here. Note that elist may have moved if we
> + deleted the first element, so order is again important.
> + */
> + full_list= elist;
> }
> mysql_mutex_unlock(&LOCK_slave_state);
>
> - if (!delete_list)
> - goto end;
> + return full_list;
> +}
> +
>
> - /* Now delete any already committed GTIDs. */
> - bitmap_set_bit(table->read_set, table->field[0]->field_index);
> - bitmap_set_bit(table->read_set, table->field[1]->field_index);
> +/* Find the mysql.gtid_slave_posXXX table associated with a given hton. */
> +LEX_CSTRING *
> +rpl_slave_state::select_gtid_pos_table(void *hton)
> +{
> + struct gtid_pos_table *table_entry;
>
> - if ((err= table->file->ha_index_init(0, 0)))
> + /*
> + See comments on rpl_slave_state::gtid_pos_tables for rules around proper
> + access to the list.
> + */
> + table_entry= (struct gtid_pos_table *)
> + my_atomic_loadptr_explicit(>id_pos_tables, MY_MEMORY_ORDER_ACQUIRE);
> +
> + while (table_entry)
> {
> - table->file->print_error(err, MYF(0));
> - goto end;
> + if (table_entry->table_hton == hton)
> + {
> + if (likely(table_entry->state == GTID_POS_AVAILABLE))
> + return &table_entry->table_name;
> + }
> + table_entry= table_entry->next;
> }
> - cur = delete_list;
> - while (cur)
> - {
> - uchar key_buffer[4+8];
>
> - DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete",
> - { err= ENOENT;
> - table->file->print_error(err, MYF(0));
> - /* `break' does not work inside DBUG_EXECUTE_IF */
> - goto dbug_break; });
> + table_entry= (struct gtid_pos_table *)
> + my_atomic_loadptr_explicit(&default_gtid_pos_table, MY_MEMORY_ORDER_ACQUIRE);
> + return &table_entry->table_name;
> +}
>
> - next= cur->next;
>
> - table->field[1]->store(cur->sub_id, true);
> - /* domain_id is already set in table->record[0] from write_row() above. */
> - key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false);
> - if (table->file->ha_index_read_map(table->record[1], key_buffer,
> - HA_WHOLE_KEY, HA_READ_KEY_EXACT))
> - /* We cannot find the row, assume it is already deleted. */
> - ;
> - else if ((err= table->file->ha_delete_row(table->record[1])))
> - table->file->print_error(err, MYF(0));
> - /*
> - In case of error, we still discard the element from the list. We do
> - not want to endlessly error on the same element in case of table
> - corruption or such.
> - */
> - cur= next;
> - if (err)
> - break;
> - }
> -IF_DBUG(dbug_break:, )
> - table->file->ha_index_end();
> +void
> +rpl_slave_state::gtid_delete_pending(THD *thd,
> + rpl_slave_state::list_element **list_ptr)
> +{
> + int err= 0;
> + ulonglong thd_saved_option;
>
> -end:
> + if (unlikely(!loaded))
> + return;
>
> #ifdef WITH_WSREP
> - thd->wsrep_ignore_table= false;
> + /*
> + Updates in slave state table should not be appended to galera transaction
> + writeset.
> + */
> + thd->wsrep_ignore_table= true;
> #endif
>
> - if (table_opened)
> + thd_saved_option= thd->variables.option_bits;
> + thd->variables.option_bits&=
> + ~(ulonglong)(OPTION_NOT_AUTOCOMMIT |OPTION_BEGIN |OPTION_BIN_LOG |
> + OPTION_GTID_BEGIN);
> +
> + while (*list_ptr)
> {
> - if (err || (err= ha_commit_trans(thd, FALSE)))
> - {
> - /*
> - If error, we need to put any remaining delete_list back into the HASH
> - so we can do another delete attempt later.
> - */
> - if (delete_list)
> - {
> - put_back_list(gtid->domain_id, delete_list);
> - delete_list = 0;
> - }
> + LEX_CSTRING *gtid_pos_table_name, *tmp_table_name;
> + Query_tables_list lex_backup;
> + TABLE_LIST tlist;
> + TABLE *table;
> + handler::Table_flags direct_pos;
> + list_element *cur, **cur_ptr_ptr;
> + bool table_opened= false;
> + void *hton= (*list_ptr)->hton;
>
> - ha_rollback_trans(thd, FALSE);
> + thd->reset_for_next_command();
> +
> + /*
> + Only the SQL thread can call select_gtid_pos_table without a mutex
> + Other threads needs to use a mutex and take into account that the
> + result may change during execution, so we have to make a copy.
> + */
> + mysql_mutex_lock(&LOCK_slave_state);
> + tmp_table_name= select_gtid_pos_table(hton);
> + gtid_pos_table_name= thd->make_clex_string(tmp_table_name->str,
> + tmp_table_name->length);
> + mysql_mutex_unlock(&LOCK_slave_state);
> + if (!gtid_pos_table_name)
> + {
> + /* Out of memory - we can try again later. */
> + break;
> }
> - close_thread_tables(thd);
> - if (rgi)
> +
> + thd->lex->reset_n_backup_query_tables_list(&lex_backup);
> + tlist.init_one_table(&MYSQL_SCHEMA_NAME, gtid_pos_table_name, NULL, TL_WRITE);
> + if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0)))
> + goto end;
> + table_opened= true;
> + table= tlist.table;
> +
> + if ((err= gtid_check_rpl_slave_state_table(table)))
> + goto end;
> +
> + direct_pos= table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION;
> + bitmap_set_all(table->write_set);
> + table->rpl_write_set= table->write_set;
> +
> + /* Now delete any already committed GTIDs. */
> + bitmap_set_bit(table->read_set, table->field[0]->field_index);
> + bitmap_set_bit(table->read_set, table->field[1]->field_index);
> +
> + if (!direct_pos && (err= table->file->ha_index_init(0, 0)))
> {
> - thd->mdl_context.release_statement_locks();
> - /*
> - Save the list of old gtid entries we deleted. If this transaction
> - fails later for some reason and is rolled back, the deletion of those
> - entries will be rolled back as well, and we will need to put them back
> - on the to-be-deleted list so we can re-do the deletion. Otherwise
> - redundant rows in mysql.gtid_slave_pos may accumulate if transactions
> - are rolled back and retried after record_gtid().
> - */
> -#ifdef HAVE_REPLICATION
> - rgi->pending_gtid_deletes_save(gtid->domain_id, delete_list);
> -#endif
> + table->file->print_error(err, MYF(0));
> + goto end;
> }
> - else
> +
> + cur = *list_ptr;
> + cur_ptr_ptr = list_ptr;
> + do
> {
> - thd->mdl_context.release_transactional_locks();
> -#ifdef HAVE_REPLICATION
> - rpl_group_info::pending_gtid_deletes_free(delete_list);
> -#endif
> + uchar key_buffer[4+8];
> + list_element *next= cur->next;
> +
> + if (cur->hton == hton)
> + {
> + int res;
> +
> + table->field[0]->store((ulonglong)cur->domain_id, true);
> + table->field[1]->store(cur->sub_id, true);
> + if (direct_pos)
> + {
> + res= table->file->ha_rnd_pos_by_record(table->record[0]);
> + }
> + else
> + {
> + key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false);
> + res= table->file->ha_index_read_map(table->record[0], key_buffer,
> + HA_WHOLE_KEY, HA_READ_KEY_EXACT);
> + }
> + DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete",
> + { res= 1;
> + err= ENOENT;
> + sql_print_error("<DEBUG> Error deleting old GTID row");
> + });
> + if (res)
> + /* We cannot find the row, assume it is already deleted. */
> + ;
> + else if ((err= table->file->ha_delete_row(table->record[0])))
> + {
> + sql_print_error("Error deleting old GTID row: %s",
> + thd->get_stmt_da()->message());
> + /*
> + In case of error, we still discard the element from the list. We do
> + not want to endlessly error on the same element in case of table
> + corruption or such.
> + */
> + }
> + *cur_ptr_ptr= next;
> + my_free(cur);
> + }
> + else
> + {
> + /* Leave this one in the list until we get to the table for its hton. */
> + cur_ptr_ptr= &cur->next;
> + }
> + cur= next;
> + if (err)
> + break;
> + } while (cur);
> +end:
> + if (table_opened)
> + {
> + if (!direct_pos)
> + table->file->ha_index_end();
> +
> + if (err || (err= ha_commit_trans(thd, FALSE)))
> + ha_rollback_trans(thd, FALSE);
> }
> + close_thread_tables(thd);
> + thd->mdl_context.release_transactional_locks();
> + thd->lex->restore_backup_query_tables_list(&lex_backup);
> +
> + if (err)
> + break;
> }
> - thd->lex->restore_backup_query_tables_list(&lex_backup);
> thd->variables.option_bits= thd_saved_option;
> - thd->resume_subsequent_commits(suspended_wfc);
> - DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
> - {
> - if (gtid->server_id == 100)
> - my_sleep(500000);
> - });
> - DBUG_RETURN(err);
> +
> +#ifdef WITH_WSREP
> + thd->wsrep_ignore_table= false;
> +#endif
> }
>
>
> @@ -1251,7 +1368,7 @@ rpl_slave_state::load(THD *thd, const char *state_from_master, size_t len,
>
> if (gtid_parser_helper(&state_from_master, end, >id) ||
> !(sub_id= next_sub_id(gtid.domain_id)) ||
> - record_gtid(thd, >id, sub_id, NULL, in_statement, &hton) ||
> + record_gtid(thd, >id, sub_id, false, in_statement, &hton) ||
> update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, hton, NULL))
> return 1;
> if (state_from_master == end)
> diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h
> index 0fc92d5e33c..60d822f7b0d 100644
> --- a/sql/rpl_gtid.h
> +++ b/sql/rpl_gtid.h
> @@ -118,8 +118,9 @@ struct rpl_slave_state
> {
> struct list_element *next;
> uint64 sub_id;
> - uint64 seq_no;
> + uint32 domain_id;
> uint32 server_id;
> + uint64 seq_no;
> /*
> hton of mysql.gtid_slave_pos* table used to record this GTID.
> Can be NULL if the gtid table failed to load (eg. missing
> @@ -191,6 +192,8 @@ struct rpl_slave_state
>
> /* Mapping from domain_id to its element. */
> HASH hash;
> + /* GTIDs added since last purge of old mysql.gtid_slave_pos rows. */
> + uint32 pending_gtid_count;
> /* Mutex protecting access to the state. */
> mysql_mutex_t LOCK_slave_state;
> /* Auxiliary buffer to sort gtid list. */
> @@ -233,7 +236,10 @@ struct rpl_slave_state
> int truncate_state_table(THD *thd);
> void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename);
> int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> - rpl_group_info *rgi, bool in_statement, void **out_hton);
> + bool in_transaction, bool in_statement, void **out_hton);
> + list_element *gtid_grab_pending_delete_list();
> + LEX_CSTRING *select_gtid_pos_table(void *hton);
> + void gtid_delete_pending(THD *thd, rpl_slave_state::list_element **list_ptr);
> uint64 next_sub_id(uint32 domain_id);
> int iterate(int (*cb)(rpl_gtid *, void *), void *data,
> rpl_gtid *extra_gtids, uint32 num_extra,
> @@ -245,7 +251,7 @@ struct rpl_slave_state
> bool is_empty();
>
> element *get_element(uint32 domain_id);
> - int put_back_list(uint32 domain_id, list_element *list);
> + int put_back_list(list_element *list);
>
> void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton,
> rpl_group_info *rgi);
> diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
> index b275ad884bd..2d91620c898 100644
> --- a/sql/rpl_rli.cc
> +++ b/sql/rpl_rli.cc
> @@ -1820,6 +1820,7 @@ rpl_load_gtid_slave_state(THD *thd)
> int err= 0;
> uint32 i;
> load_gtid_state_cb_data cb_data;
> + rpl_slave_state::list_element *old_gtids_list;
> DBUG_ENTER("rpl_load_gtid_slave_state");
>
> mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
> @@ -1905,6 +1906,13 @@ rpl_load_gtid_slave_state(THD *thd)
> rpl_global_gtid_slave_state->loaded= true;
> mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
>
> + /* Clear out no longer needed elements now. */
> + old_gtids_list=
> + rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
> + rpl_global_gtid_slave_state->gtid_delete_pending(thd, &old_gtids_list);
> + if (old_gtids_list)
> + rpl_global_gtid_slave_state->put_back_list(old_gtids_list);
> +
> end:
> if (array_inited)
> delete_dynamic(&array);
> @@ -2086,7 +2094,6 @@ rpl_group_info::reinit(Relay_log_info *rli)
> long_find_row_note_printed= false;
> did_mark_start_commit= false;
> gtid_ev_flags2= 0;
> - pending_gtid_delete_list= NULL;
> last_master_timestamp = 0;
> gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL;
> speculation= SPECULATE_NO;
> @@ -2217,12 +2224,6 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
> erroneously update the GTID position.
> */
> gtid_pending= false;
> -
> - /*
> - Rollback will have undone any deletions of old rows we might have made
> - in mysql.gtid_slave_pos. Put those rows back on the list to be deleted.
> - */
> - pending_gtid_deletes_put_back();
> }
> m_table_map.clear_tables();
> slave_close_thread_tables(thd);
> @@ -2448,78 +2449,6 @@ rpl_group_info::unmark_start_commit()
> }
>
>
> -/*
> - When record_gtid() has deleted any old rows from the table
> - mysql.gtid_slave_pos as part of a replicated transaction, save the list of
> - rows deleted here.
> -
> - If later the transaction fails (eg. optimistic parallel replication), the
> - deletes will be undone when the transaction is rolled back. Then we can
> - put back the list of rows into the rpl_global_gtid_slave_state, so that
> - we can re-do the deletes and avoid accumulating old rows in the table.
> -*/
> -void
> -rpl_group_info::pending_gtid_deletes_save(uint32 domain_id,
> - rpl_slave_state::list_element *list)
> -{
> - /*
> - We should never get to a state where we try to save a new pending list of
> - gtid deletes while we still have an old one. But make sure we handle it
> - anyway just in case, so we avoid leaving stray entries in the
> - mysql.gtid_slave_pos table.
> - */
> - DBUG_ASSERT(!pending_gtid_delete_list);
> - if (unlikely(pending_gtid_delete_list))
> - pending_gtid_deletes_put_back();
> -
> - pending_gtid_delete_list= list;
> - pending_gtid_delete_list_domain= domain_id;
> -}
> -
> -
> -/*
> - Take the list recorded by pending_gtid_deletes_save() and put it back into
> - rpl_global_gtid_slave_state. This is needed if deletion of the rows was
> - rolled back due to transaction failure.
> -*/
> -void
> -rpl_group_info::pending_gtid_deletes_put_back()
> -{
> - if (pending_gtid_delete_list)
> - {
> - rpl_global_gtid_slave_state->put_back_list(pending_gtid_delete_list_domain,
> - pending_gtid_delete_list);
> - pending_gtid_delete_list= NULL;
> - }
> -}
> -
> -
> -/*
> - Free the list recorded by pending_gtid_deletes_save(). Done when the deletes
> - in the list have been permanently committed.
> -*/
> -void
> -rpl_group_info::pending_gtid_deletes_clear()
> -{
> - pending_gtid_deletes_free(pending_gtid_delete_list);
> - pending_gtid_delete_list= NULL;
> -}
> -
> -
> -void
> -rpl_group_info::pending_gtid_deletes_free(rpl_slave_state::list_element *list)
> -{
> - rpl_slave_state::list_element *next;
> -
> - while (list)
> - {
> - next= list->next;
> - my_free(list);
> - list= next;
> - }
> -}
> -
> -
> rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter)
> : rpl_filter(filter)
> {
> diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
> index d9f0e0e5d3b..b8b153c34be 100644
> --- a/sql/rpl_rli.h
> +++ b/sql/rpl_rli.h
> @@ -757,11 +757,6 @@ struct rpl_group_info
> /* Needs room for "Gtid D-S-N\x00". */
> char gtid_info_buf[5+10+1+10+1+20+1];
>
> - /* List of not yet committed deletions in mysql.gtid_slave_pos. */
> - rpl_slave_state::list_element *pending_gtid_delete_list;
> - /* Domain associated with pending_gtid_delete_list. */
> - uint32 pending_gtid_delete_list_domain;
> -
> /*
> The timestamp, from the master, of the commit event.
> Used to do delayed update of rli->last_master_timestamp, for getting
> @@ -903,12 +898,6 @@ struct rpl_group_info
> char *gtid_info();
> void unmark_start_commit();
>
> - static void pending_gtid_deletes_free(rpl_slave_state::list_element *list);
> - void pending_gtid_deletes_save(uint32 domain_id,
> - rpl_slave_state::list_element *list);
> - void pending_gtid_deletes_put_back();
> - void pending_gtid_deletes_clear();
> -
> longlong get_row_stmt_start_timestamp()
> {
> return row_stmt_start_timestamp;
> diff --git a/sql/slave.cc b/sql/slave.cc
> index bb1300d36e6..f8499513dd6 100644
> --- a/sql/slave.cc
> +++ b/sql/slave.cc
> @@ -465,6 +465,8 @@ static struct slave_background_gtid_pos_create_t {
> void *hton;
> } *slave_background_gtid_pos_create_list;
>
> +static volatile bool slave_background_gtid_pending_delete_flag;
> +
>
> pthread_handler_t
> handle_slave_background(void *arg __attribute__((unused)))
> @@ -499,6 +501,7 @@ handle_slave_background(void *arg __attribute__((unused)))
> {
> slave_background_kill_t *kill_list;
> slave_background_gtid_pos_create_t *create_list;
> + bool pending_deletes;
>
> thd->ENTER_COND(&COND_slave_background, &LOCK_slave_background,
> &stage_slave_background_wait_request,
> @@ -508,13 +511,15 @@ handle_slave_background(void *arg __attribute__((unused)))
> stop= abort_loop || thd->killed || slave_background_thread_stop;
> kill_list= slave_background_kill_list;
> create_list= slave_background_gtid_pos_create_list;
> - if (stop || kill_list || create_list)
> + pending_deletes= slave_background_gtid_pending_delete_flag;
> + if (stop || kill_list || create_list || pending_deletes)
> break;
> mysql_cond_wait(&COND_slave_background, &LOCK_slave_background);
> }
>
> slave_background_kill_list= NULL;
> slave_background_gtid_pos_create_list= NULL;
> + slave_background_gtid_pending_delete_flag= false;
> thd->EXIT_COND(&old_stage);
>
> while (kill_list)
> @@ -541,6 +546,17 @@ handle_slave_background(void *arg __attribute__((unused)))
> create_list= next;
> }
>
> + if (pending_deletes)
> + {
> + rpl_slave_state::list_element *list;
> +
> + slave_background_gtid_pending_delete_flag= false;
> + list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
> + rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list);
> + if (list)
> + rpl_global_gtid_slave_state->put_back_list(list);
> + }
> +
> mysql_mutex_lock(&LOCK_slave_background);
> } while (!stop);
>
> @@ -615,6 +631,23 @@ slave_background_gtid_pos_create_request(
>
>
> /*
> + Request the slave background thread to delete no longer used rows from the
> + mysql.gtid_slave_pos* tables.
> +
> + This is called from time-critical rpl_slave_state::update(), so we avoid
> + taking any locks here. This means we may race with the background thread
> + to occasionally lose a signal. This is not a problem; any pending rows to
> + be deleted will just be deleted a bit later as part of the next batch.
> +*/
> +void
> +slave_background_gtid_pending_delete_request(void)
> +{
> + slave_background_gtid_pending_delete_flag= true;
> + mysql_cond_signal(&COND_slave_background);
> +}
> +
> +
> +/*
> Start the slave background thread.
>
> This thread is currently used for two purposes:
> diff --git a/sql/slave.h b/sql/slave.h
> index 649d55b45b9..12d569b0333 100644
> --- a/sql/slave.h
> +++ b/sql/slave.h
> @@ -276,6 +276,7 @@ bool net_request_file(NET* net, const char* fname);
> void slave_background_kill_request(THD *to_kill);
> void slave_background_gtid_pos_create_request
> (rpl_slave_state::gtid_pos_table *table_entry);
> +void slave_background_gtid_pending_delete_request(void);
>
> extern bool volatile abort_loop;
> extern Master_info *active_mi; /* active_mi for multi-master */
> diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
> index 6d4c135683a..9348f4e5c98 100644
> --- a/sql/sys_vars.cc
> +++ b/sql/sys_vars.cc
> @@ -1942,6 +1942,19 @@ Sys_var_last_gtid::session_value_ptr(THD *thd, const LEX_CSTRING *base)
> }
>
>
> +static Sys_var_uint Sys_gtid_cleanup_batch_size(
> + "gtid_cleanup_batch_size",
> + "Normally does not need tuning. How many old rows must accumulate in "
> + "the mysql.gtid_slave_pos table before a background job will be run to "
> + "delete them. Can be increased to reduce number of commits if "
> + "using many different engines with --gtid_pos_auto_engines, or to "
> + "reduce CPU overhead if using a huge number of different "
> + "gtid_domain_ids. Can be decreased to reduce number of old rows in the "
> + "table.",
> + GLOBAL_VAR(opt_gtid_cleanup_batch_size), CMD_LINE(REQUIRED_ARG),
> + VALID_RANGE(0,2147483647), DEFAULT(64), BLOCK_SIZE(1));
> +
> +
> static bool
> check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var)
> {
> diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
> index 9c20fea97ae..a1e501f78f4 100644
> --- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
> +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
> @@ -2,6 +2,7 @@ include/master-slave.inc
> [connection master]
> connection server_2;
> include/stop_slave.inc
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -41,6 +42,8 @@ a
> 1
> SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
> domain_id sub_id server_id seq_no
> +0 1 1 1
> +0 2 1 2
> 0 3 1 3
> 0 4 1 4
> SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> @@ -121,6 +124,21 @@ Transactions_multi_engine 6
> DELETE FROM t1 WHERE a >= 100;
> DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
> +connection server_1;
> +include/save_master_gtid.inc
> +connection server_2;
> +include/sync_with_master_gtid.inc
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> +UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
> +COUNT(*)>=10
> +1
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> connection server_2;
> include/stop_slave.inc
> SET sql_log_bin=0;
> diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> index e0d16e7f242..631d9ca533f 100644
> --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> @@ -4,6 +4,12 @@
>
> --connection server_2
> --source include/stop_slave.inc
> +
> +# Set GTID cleanup limit high enough that cleanup will not run and we
> +# can rely on consistent table output in .result.
> +--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> +
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
>
>
> +# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
> +--connection server_1
> +--disable_query_log
> +let $i=10;
> +while ($i) {
> + eval INSERT INTO t1 VALUES (300+$i);
> + eval INSERT INTO t2 VALUES (300+$i);
> + eval INSERT INTO t3 VALUES (300+$i);
> + dec $i;
> +}
> +--enable_query_log
> +--source include/save_master_gtid.inc
> +
> +--connection server_2
> +--source include/sync_with_master_gtid.inc
> +
> +# Check that we have many rows in mysql.gtid_slave_pos now (since
> +# @@gtid_cleanup_batch_size was set to a huge value). No need to check
> +# for an exact number, since that will require changing .result if
> +# anything changes prior to this point, and we just need to know that
> +# we have still have some data in the tables to make the following
> +# test effective.
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
> +
> +# Check that old GTID rows will be deleted when batch delete size is
> +# set reasonably. Old row deletion is not 100% deterministic (by design), so
> +# we must wait for it to occur, but it should occur eventually.
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> +let $i=40;
> +--disable_query_log
> +--let $keep_include_silent=1
> +while ($i) {
> + let N=`SELECT 1+($i MOD 3)`;
> + --connection server_1
> + eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
> + --source include/save_master_gtid.inc
> + --connection server_2
> + --source include/sync_with_master_gtid.inc
> + let $j=50;
> + while ($j) {
> + let $is_done=`SELECT SUM(a)=1 FROM (
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
> + UNION ALL
> + SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
> + UNION ALL
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_rocksdb) outer_select`;
> + if ($is_done) {
> + let $j=0;
> + }
> + if (!$is_done) {
> + real_sleep 0.1;
> + dec $j;
> + }
> + }
> + dec $i;
> + if ($is_done) {
> + let $i=0;
> + }
> +}
> +--enable_query_log
> +--let $keep_include_silent=0
> +if (!$is_done) {
> + --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
> +}
> +
> +--disable_query_log
> +DELETE FROM t1 WHERE a >= 100;
> +DELETE FROM t2 WHERE a >= 100;
> +DELETE FROM t3 WHERE a >= 100;
> +--enable_query_log
> +
> +
> # Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
> # Have mysql.gtid_slave_pos* for myisam and innodb but not rocksdb.
> --connection server_2
> @@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
> SET sql_log_bin=0;
> DROP TABLE mysql.gtid_slave_pos_innodb;
> SET sql_log_bin=1;
> +--disable_query_log
> +eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
> +--enable_query_log
>
> --connection server_1
> DROP TABLE t1;
> diff --git a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
> index d4532eec4e2..d79e7e59aa4 100644
> --- a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
> +++ b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
> @@ -2,6 +2,7 @@ include/master-slave.inc
> [connection master]
> connection server_2;
> include/stop_slave.inc
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -41,6 +42,8 @@ a
> 1
> SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
> domain_id sub_id server_id seq_no
> +0 1 1 1
> +0 2 1 2
> 0 3 1 3
> 0 4 1 4
> SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> @@ -121,6 +124,21 @@ Transactions_multi_engine 6
> DELETE FROM t1 WHERE a >= 100;
> DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
> +connection server_1;
> +include/save_master_gtid.inc
> +connection server_2;
> +include/sync_with_master_gtid.inc
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> +UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
> +COUNT(*)>=10
> +1
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> connection server_2;
> include/stop_slave.inc
> SET sql_log_bin=0;
> diff --git a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
> index ceb119cd0dc..1d19a25889e 100644
> --- a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
> +++ b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
> @@ -4,6 +4,12 @@
>
> --connection server_2
> --source include/stop_slave.inc
> +
> +# Set GTID cleanup limit high enough that cleanup will not run and we
> +# can rely on consistent table output in .result.
> +--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> +
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
>
>
> +# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
> +--connection server_1
> +--disable_query_log
> +let $i=10;
> +while ($i) {
> + eval INSERT INTO t1 VALUES (300+$i);
> + eval INSERT INTO t2 VALUES (300+$i);
> + eval INSERT INTO t3 VALUES (300+$i);
> + dec $i;
> +}
> +--enable_query_log
> +--source include/save_master_gtid.inc
> +
> +--connection server_2
> +--source include/sync_with_master_gtid.inc
> +
> +# Check that we have many rows in mysql.gtid_slave_pos now (since
> +# @@gtid_cleanup_batch_size was set to a huge value). No need to check
> +# for an exact number, since that will require changing .result if
> +# anything changes prior to this point, and we just need to know that
> +# we have still have some data in the tables to make the following
> +# test effective.
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
> +
> +# Check that old GTID rows will be deleted when batch delete size is
> +# set reasonably. Old row deletion is not 100% deterministic (by design), so
> +# we must wait for it to occur, but it should occur eventually.
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> +let $i=40;
> +--disable_query_log
> +--let $keep_include_silent=1
> +while ($i) {
> + let N=`SELECT 1+($i MOD 3)`;
> + --connection server_1
> + eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
> + --source include/save_master_gtid.inc
> + --connection server_2
> + --source include/sync_with_master_gtid.inc
> + let $j=50;
> + while ($j) {
> + let $is_done=`SELECT SUM(a)=1 FROM (
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
> + UNION ALL
> + SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
> + UNION ALL
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_tokudb) outer_select`;
> + if ($is_done) {
> + let $j=0;
> + }
> + if (!$is_done) {
> + real_sleep 0.1;
> + dec $j;
> + }
> + }
> + dec $i;
> + if ($is_done) {
> + let $i=0;
> + }
> +}
> +--enable_query_log
> +--let $keep_include_silent=0
> +if (!$is_done) {
> + --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
> +}
> +
> +--disable_query_log
> +DELETE FROM t1 WHERE a >= 100;
> +DELETE FROM t2 WHERE a >= 100;
> +DELETE FROM t3 WHERE a >= 100;
> +--enable_query_log
> +
> +
> # Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
> # Have mysql.gtid_slave_pos* for myisam and innodb but not tokudb.
> --connection server_2
> @@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
> SET sql_log_bin=0;
> DROP TABLE mysql.gtid_slave_pos_innodb;
> SET sql_log_bin=1;
> +--disable_query_log
> +eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
> +--enable_query_log
>
> --connection server_1
> DROP TABLE t1;
> _______________________________________________
> commits mailing list
> commits@xxxxxxxxxxx
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
Follow ups