maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #13086
Re: MDEV-16329 ALTER ONLINE TABLE
Hi, Nikita,
this is a review of the combined diff of your branch.
Of `git diff github/10.8...github/bb-10.8-online-alter`, that is of
cf7cc376bac fix savepoints in myisam
4a128ca5458 fix main.delayed
7300151cc84 fix skipping rocksdb
19504dfdbcc add binlog/standalone combinations
55e993595aa add rocksdb combination
06645b17450 savepoints
9ffcd13b9fe rename tests
730926ae962 MDEV-16329 [5/5] ALTER ONLINE TABLE
cf52ea33a70 MDEV-16329 [4/5] Refactor MYSQL_BIN_LOG: extract Event_log ancestor
0445c8f1ee9 MDEV-16329 [3/5] use binlog_cache_data directly in most places
3f3da0f65a3 MDEV-16329 [2/5] refactor binlog and cache_mngr
dc79667ca73 MDEV-16329 [1/5] add THD::binlog_get_cache_mngr
6f9dd00ce2d rpl: repack table_def
faf8680df8d Copy_field: add const to arguments
It was mostly fine the last time already, so only few comments below:
> diff --git a/mysql-test/include/have_log_bin_off.require b/mysql-test/include/have_log_bin_off.require
> new file mode 100644
> index 00000000000..979dbe75f80
> --- /dev/null
> +++ b/mysql-test/include/have_log_bin_off.require
> @@ -0,0 +1,2 @@
> +Variable_name Value
> +log_bin OFF
better use `if (...) { skip }` pattern
*.require is an obsolete 20-year old technique from before if- and skip-times
> diff --git a/mysql-test/main/alter_table.test b/mysql-test/main/alter_table.test
> index 31c69783248..91423ee8c1f 100644
> --- a/mysql-test/main/alter_table.test
> +++ b/mysql-test/main/alter_table.test
> @@ -1532,8 +1532,12 @@ ALTER TABLE t1 DROP INDEX i1, DROP INDEX i2, DROP INDEX i3, DROP INDEX i4;
> ALTER TABLE t1 ADD INDEX i1(b), ALGORITHM= INPLACE, LOCK= NONE;
> ALTER TABLE t1 ADD INDEX i2(b), ALGORITHM= INPLACE, LOCK= SHARED;
> ALTER TABLE t1 ADD INDEX i3(b), ALGORITHM= INPLACE, LOCK= EXCLUSIVE;
> ---error ER_ALTER_OPERATION_NOT_SUPPORTED_REASON
> +--disable_info
> +--disable_warnings
> +--error 0,ER_ALTER_OPERATION_NOT_SUPPORTED_REASON
why, because of embedded?
> ALTER TABLE t1 ADD INDEX i4(b), ALGORITHM= COPY, LOCK= NONE;
> +--enable_warnings
> +--enable_info
> ALTER TABLE t1 ADD INDEX i5(b), ALGORITHM= COPY, LOCK= SHARED;
> ALTER TABLE t1 ADD INDEX i6(b), ALGORITHM= COPY, LOCK= EXCLUSIVE;
>
> diff --git a/mysql-test/main/alter_table_locknone.result b/mysql-test/main/alter_table_locknone.result
> new file mode 100644
> index 00000000000..ea040925efe
> --- /dev/null
> +++ b/mysql-test/main/alter_table_locknone.result
> @@ -0,0 +1,270 @@
> +create table t1 (a int not null primary key, b int, c varchar(80), e enum('a','b')) engine=myisam;
> +insert into t1 (a) values (1),(2),(3);
> +alter online table t1 modify b int default 5, alter c set default 'X';
> +alter online table t1 change b new_name int;
> +alter online table t1 modify e enum('a','b','c');
> +alter online table t1 comment "new comment";
> +alter table t1 add constraint q check (a > 0);
> +alter online table t1 drop constraint q;
> +alter online table t1 algorithm=INPLACE, lock=NONE;
> +alter online table t1;
> +alter table t1 algorithm=INPLACE;
> +alter table t1 lock=NONE;
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `a` int(11) NOT NULL,
> + `new_name` int(11) DEFAULT NULL,
> + `c` varchar(80) DEFAULT 'X',
> + `e` enum('a','b','c') DEFAULT NULL,
> + PRIMARY KEY (`a`)
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COMMENT='new comment'
> +drop table t1;
> +create temporary table t1 (a int not null primary key, b int, c varchar(80), e enum('a','b'));
> +insert into t1 (a) values (1),(2),(3);
> +alter online table t1 modify b int default 5, alter c set default 'X';
> +alter online table t1 change b new_name int;
> +alter online table t1 modify e enum('a','b','c');
> +alter online table t1 comment "new comment";
> +alter online table t1 rename to t2;
> +show create table t2;
> +Table Create Table
> +t2 CREATE TEMPORARY TABLE `t2` (
> + `a` int(11) NOT NULL,
> + `new_name` int(11) DEFAULT NULL,
> + `c` varchar(80) DEFAULT 'X',
> + `e` enum('a','b','c') DEFAULT NULL,
> + PRIMARY KEY (`a`)
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COMMENT='new comment'
> +drop table t2;
> +create table t1 (a int not null primary key, b int, c varchar(80), e enum('a','b')) engine=aria;
> +insert into t1 (a) values (1),(2),(3);
> +alter online table t1 modify b int default 5;
> +alter online table t1 change b new_name int;
> +alter online table t1 modify e enum('a','b','c');
> +alter online table t1 comment "new comment";
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `a` int(11) NOT NULL,
> + `new_name` int(11) DEFAULT NULL,
> + `c` varchar(80) DEFAULT NULL,
> + `e` enum('a','b','c') DEFAULT NULL,
> + PRIMARY KEY (`a`)
> +) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1 COMMENT='new comment'
> +alter online table t1 page_checksum=1;
> +alter online table t1 page_checksum=0;
> +ERROR 0A000: LOCK=NONE is not supported for this operation. Try LOCK=SHARED
Why LOCK=NONE would be not supported now?
Shouldn't your new feature allow just everything with LOCK=NONE?
> +drop table t1;
> +create table t1 (a int not null primary key, b int, c varchar(80), e enum('a','b'));
> +insert into t1 (a) values (1),(2),(3);
> +alter online table t1 drop column b, add b int;
> +ERROR 0A000: LOCK=NONE is not supported for this operation. Try LOCK=SHARED
...
> diff --git a/mysql-test/main/alter_table_online.combinations b/mysql-test/main/alter_table_online.combinations
> new file mode 100644
> index 00000000000..ae144432c68
> --- /dev/null
> +++ b/mysql-test/main/alter_table_online.combinations
> @@ -0,0 +1,2 @@
> +[innodb]
> +[rocksdb]
what about non-trans tables?
> diff --git a/mysql-test/main/alter_table_online.test b/mysql-test/main/alter_table_online.test
> index 6ef9661c43b..815483d252d 100644
> --- a/mysql-test/main/alter_table_online.test
> +++ b/mysql-test/main/alter_table_online.test
> @@ -1,335 +1,596 @@
> -#
> -# Test of ALTER ONLINE TABLE syntax
> -#
> -
> ---source include/have_innodb.inc
> ---source include/have_partition.inc
> -#
> -# Test of things that can be done online
> -#
> -
> -create table t1 (a int not null primary key, b int, c varchar(80), e enum('a','b')) engine=myisam;
> -insert into t1 (a) values (1),(2),(3);
> -
> -alter online table t1 modify b int default 5, alter c set default 'X';
> -alter online table t1 change b new_name int;
> -alter online table t1 modify e enum('a','b','c');
> -alter online table t1 comment "new comment";
> -alter table t1 add constraint q check (a > 0);
> -alter online table t1 drop constraint q;
> -
> -# No OPs
> -
> -alter online table t1 algorithm=INPLACE, lock=NONE;
> -alter online table t1;
> -alter table t1 algorithm=INPLACE;
> -alter table t1 lock=NONE;
> -show create table t1;
> -drop table t1;
> +-- source include/have_debug_sync.inc
> +-- source include/not_embedded.inc
> +-- source alter_table_online_binlog.inc
>
> -#
> -# everything with temporary tables is "online", i.e. without locks
> -#
> -create temporary table t1 (a int not null primary key, b int, c varchar(80), e enum('a','b'));
> -insert into t1 (a) values (1),(2),(3);
> -
> -alter online table t1 modify b int default 5, alter c set default 'X';
> -alter online table t1 change b new_name int;
> -alter online table t1 modify e enum('a','b','c');
> -alter online table t1 comment "new comment";
> -alter online table t1 rename to t2;
> -show create table t2;
> -drop table t2;
> +-- disable_query_log
> +-- if ($MTR_COMBINATION_INNODB) {
> +-- source include/have_innodb.inc
> +set default_storage_engine= innodb;
> +-- }
> +-- if ($MTR_COMBINATION_ROCKSDB) {
> +-- source include/have_rocksdb.inc
> +set default_storage_engine= rocksdb;
> +-- }
> +-- enable_query_log
> +
> +-- let $default_engine= `select @@default_storage_engine`
> +
> +--connect (con2, localhost, root,,)
> +--connection default
> +
> +
> +--echo #
> +--echo # Test insert
> +--echo #
> +
> +--echo # Insert and add column
> +create or replace table t1 (a int) engine=innodb;
why do you specify the engine explicitly if you run it in two engine
combinations?
> +insert t1 values (5);
> +
> +--connection con2
> +--send
> +set debug_sync= 'now WAIT_FOR ended';
> +
> +--connection default
> +set debug_sync= 'alter_table_copy_end SIGNAL ended WAIT_FOR end';
> +
...
> +
> +--connection default
> +--reap
> +--sorted_result
> +select * from t1;
> +
> +--echo #
> +--echo # MYISAM. Only Inserts can be tested.
why?
> +--echo #
> +
> +create or replace table t1 (a int) engine=myisam;
> +insert t1 values (5);
> +
> +--connection con2
> +--send
> +set debug_sync= 'now WAIT_FOR ended';
> +
> +--connection default
> +set debug_sync= 'alter_table_copy_end SIGNAL ended WAIT_FOR end';
> +
> +--send
> +alter table t1 add b int NULL, algorithm= copy, lock= none;
> +
...
> diff --git a/mysql-test/main/mdl_sync.test b/mysql-test/main/mdl_sync.test
> index 3df19aca806..8c6bc76f5ca 100644
> --- a/mysql-test/main/mdl_sync.test
> +++ b/mysql-test/main/mdl_sync.test
> @@ -42,7 +42,7 @@ connection con1;
> set debug_sync='mdl_upgrade_lock SIGNAL parked WAIT_FOR go';
> --send alter table t1 rename t3
>
> -connection default;
> + connection default;
eh? (here and many other changes in this file)
> --echo connection: default
> set debug_sync= 'now WAIT_FOR parked';
>
> diff --git a/mysql-test/suite/gcol/inc/gcol_select.inc b/mysql-test/suite/gcol/inc/gcol_select.inc
> index 2386c55fdbc..8f6314233bd 100644
> --- a/mysql-test/suite/gcol/inc/gcol_select.inc
> +++ b/mysql-test/suite/gcol/inc/gcol_select.inc
> @@ -872,7 +872,9 @@ CREATE TABLE t1(a INT);
> INSERT INTO t1 VALUES(2147483647);
> ALTER TABLE t1 ADD COLUMN h INT AS (a) VIRTUAL;
> ALTER TABLE t1 CHANGE h i INT AS (a) VIRTUAL, ALGORITHM=COPY;
> +--error ER_WARN_DATA_OUT_OF_RANGE,ER_ALTER_OPERATION_NOT_SUPPORTED_REASON
why two? embedded, again?
> ALTER TABLE t1 ADD COLUMN b SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, LOCK=NONE;
> +--error ER_WARN_DATA_OUT_OF_RANGE,ER_ALTER_OPERATION_NOT_SUPPORTED_REASON
> ALTER TABLE t1 ADD COLUMN e SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, LOCK=NONE;
> ALTER TABLE t1 ADD COLUMN f SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, LOCK=SHARED;
> ALTER TABLE t1 ADD COLUMN g SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, LOCK=EXCLUSIVE;
> diff --git a/sql/handler.h b/sql/handler.h
> index fe61666bf20..5e46fa59d7e 100644
> --- a/sql/handler.h
> +++ b/sql/handler.h
> @@ -3538,6 +3543,7 @@ class handler :public Sql_alloc
> /** to be actually called to get 'check()' functionality*/
> int ha_check(THD *thd, HA_CHECK_OPT *check_opt);
> int ha_repair(THD* thd, HA_CHECK_OPT* check_opt);
> + virtual void open_read_view(){}
why did you change to that from start_consistent_snapshot()?
> void ha_start_bulk_insert(ha_rows rows, uint flags= 0)
> {
> DBUG_ENTER("handler::ha_start_bulk_insert");
> diff --git a/sql/log.cc b/sql/log.cc
> index 70ceecc66f8..9beab80773c 100644
> --- a/sql/log.cc
> +++ b/sql/log.cc
> @@ -101,6 +102,9 @@ static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
> Log_event *end_ev, bool all, bool using_stmt,
> bool using_trx, bool is_ro_1pc);
>
> +int binlog_online_alter_commit(THD *thd, bool all);
> +void binlog_online_alter_rollback(THD *thd, bool all);
static?
> +
> static const LEX_CSTRING write_error_msg=
> { STRING_WITH_LEN("error writing to the binary log") };
>
> @@ -3565,6 +3613,91 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
> }
>
>
> +bool Event_log::open(const char *log_name,
> + const char *new_name, ulong next_file_number,
> + enum cache_type io_cache_type_arg)
> +{
> + bool error= false;
> + if (log_name || new_name)
> + {
> + error= MYSQL_LOG::open(
> +#ifdef HAVE_PSI_INTERFACE
> + 0,
> +#endif
> + log_name, LOG_NORMAL, new_name, next_file_number, io_cache_type_arg);
> + }
> + else
> + {
> +#ifdef HAVE_PSI_INTERFACE
> + /* Keep the key for reopen */
> + m_log_file_key= 0;
> +#endif
> + error= init_io_cache(&log_file, -1, LOG_BIN_IO_SIZE,
> + io_cache_type_arg, 0, 0,
> + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL));
> +
> + log_state= LOG_OPENED;
> + inited= true;
> + }
> + if (error)
> + return error;
> +
> + longlong bytes_written= write_description_event(
> + (enum_binlog_checksum_alg)binlog_checksum_options,
> + encrypt_binlog, false);
> + error= bytes_written < 0;
> + return error;
> +}
> +
> +longlong
> +Event_log::write_description_event(enum_binlog_checksum_alg checksum_alg,
> + bool encrypt, bool dont_set_created)
> +{
> + Format_description_log_event s(BINLOG_VERSION);
> + /*
> + don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
> + as we won't be able to reset it later
> + */
> + if (io_cache_type == WRITE_CACHE)
this was incorrectly rebased. in the previous patch you *moved*
this block of code from MYSQL_BIN_LOG::open into a separate method.
here you copied it. you forgot to delete it from MYSQL_BIN_LOG::open().
> + s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
> + s.checksum_alg= checksum_alg;
> +
> + crypto.scheme = 0;
> + DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
> + if (!s.is_valid())
> + return -1;
> + s.dont_set_created= dont_set_created;
> + if (write_event(&s, 0, &log_file))
> + return -1;
> +
> + if (encrypt)
> + {
> + uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA);
> + if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
> + {
> + sql_print_error("Failed to enable encryption of binary logs");
> + return -1;
> + }
> +
> + if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED)
> + {
> + if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce)))
> + return -1;
> +
> + Start_encryption_log_event sele(1, key_version, crypto.nonce);
> + sele.checksum_alg= s.checksum_alg;
> + if (write_event(&sele, 0, &log_file))
> + return -1;
> +
> + // Start_encryption_log_event is written, enable the encryption
> + if (crypto.init(sele.crypto_scheme, key_version))
> + return -1;
> + }
> + }
> + return (longlong)s.data_written;
> +}
> +
> +
> /**
> Open a (new) binlog file.
>
> @@ -7252,6 +7477,160 @@ class CacheWriter: public Log_event_writer
> bool first;
> };
>
> +int cache_copy(IO_CACHE *to, IO_CACHE *from)
> +{
> + DBUG_ENTER("cache_copy");
> + if (reinit_io_cache(from, READ_CACHE, 0, 0, 0))
> + DBUG_RETURN(ER_ERROR_ON_WRITE);
> + size_t bytes_in_cache= my_b_bytes_in_cache(from);
> +
> + do
> + {
> + my_b_write(to, from->read_pos, bytes_in_cache);
> +
> + from->read_pos += bytes_in_cache;
> + bytes_in_cache= my_b_fill(from);
> + if (from->error || to->error)
> + DBUG_RETURN(ER_ERROR_ON_WRITE);
> + } while (bytes_in_cache);
> +
> + DBUG_RETURN(0);
> +}
> +
> +int binlog_online_alter_commit(THD *thd, bool all)
> +{
> + DBUG_ENTER("online_alter_commit");
> + int error= 0;
> +#ifdef HAVE_REPLICATION
> +
> + if (thd->online_alter_cache_list.empty())
> + DBUG_RETURN(0);
> +
> + bool is_ending_transaction= ending_trans(thd, all);
> +
> + for (auto &cache_mngr: thd->online_alter_cache_list)
> + {
> + auto *binlog= cache_mngr.share->online_alter_binlog;
> + DBUG_ASSERT(binlog);
> +
> + error= binlog_flush_pending_rows_event(thd,
> + /*
> + do not set STMT_END for last event
> + to leave table open in altering thd
> + */
> + false,
> + true,
> + binlog,
> + is_ending_transaction
> + ? &cache_mngr.trx_cache
> + : &cache_mngr.stmt_cache);
> + if (is_ending_transaction)
> + {
> + mysql_mutex_lock(binlog->get_log_lock());
> + error= binlog->write_cache(thd, &cache_mngr.trx_cache.cache_log);
> +
> + mysql_mutex_unlock(binlog->get_log_lock());
> + }
> + else
> + {
> + error= cache_copy(&cache_mngr.trx_cache.cache_log,
> + &cache_mngr.stmt_cache.cache_log);
Why do you need two caches here? It seems you could have just one trx_cache
and if the statement fails you simply truncate it to the beginning of
the statement.
> + }
> +
> + if (error)
> + {
> + my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG),
> + binlog->get_name(), errno);
> + binlog_online_alter_cleanup(thd->online_alter_cache_list,
> + is_ending_transaction);
> + DBUG_RETURN(error);
> + }
> + }
> +
> + binlog_online_alter_cleanup(thd->online_alter_cache_list,
> + is_ending_transaction);
> +
> + for (TABLE *table= thd->open_tables; table; table= table->next)
> + {
> + table->online_alter_cache= NULL;
Why?
> + }
> +#endif // HAVE_REPLICATION
> + DBUG_RETURN(error);
> +}
> +
> +void binlog_online_alter_rollback(THD *thd, bool all)
> +{
> +#ifdef HAVE_REPLICATION
> + bool is_ending_trans= ending_trans(thd, all);
> +
> + /*
> + This is a crucial moment that we are running through
> + thd->online_alter_cache_list, and not through thd->open_tables to cleanup
> + stmt cache, though both have it. The reason is that the tables can be closed
> + to that moment in case of an error.
> + The same reason applies to the fact we don't store cache_mngr in the table
> + itself -- because it can happen to be not existing.
> + Still in case if tables are left opened
> + */
> + binlog_online_alter_cleanup(thd->online_alter_cache_list, is_ending_trans);
> +#endif // HAVE_REPLICATION
> +}
> +
> +SAVEPOINT** find_savepoint_in_list(THD *thd, LEX_CSTRING name,
> + SAVEPOINT ** const list);
> +
> +SAVEPOINT* savepoint_add(THD *thd, LEX_CSTRING name, SAVEPOINT **list,
> + int (*release_old)(THD*, SAVEPOINT*));
may be in transaction.h?
> +
> +int online_alter_savepoint_set(THD *thd, LEX_CSTRING name)
> +{
> +
> + DBUG_ENTER("binlog_online_alter_savepoint");
> +#ifdef HAVE_REPLICATION
> + if (thd->online_alter_cache_list.empty())
> + DBUG_RETURN(0);
> +
> + if (savepoint_alloc_size < sizeof (SAVEPOINT) + sizeof(my_off_t))
> + savepoint_alloc_size= sizeof (SAVEPOINT) + sizeof(my_off_t);
> +
> + for (auto &cache: thd->online_alter_cache_list)
> + {
> + if (cache.share->db_type()->savepoint_set == NULL)
> + continue;
> +
> + SAVEPOINT *sv= savepoint_add(thd, name, &cache.sv_list, NULL);
wouldn't it be simpler to log SAVEPOINT and ROLLBACK/RELEASE
as Query_log_event to the cache? Then you won't need to do
any maintenance.
> + if(unlikely(sv == NULL))
> + DBUG_RETURN(1);
> + my_off_t *pos= (my_off_t*)(sv+1);
> + *pos= cache.trx_cache.get_byte_position();
> +
> + sv->prev= cache.sv_list;
> + cache.sv_list= sv;
> + }
> +#endif
> + DBUG_RETURN(0);
> +}
> +
> +int online_alter_savepoint_rollback(THD *thd, LEX_CSTRING name)
> +{
> + DBUG_ENTER("online_alter_savepoint_rollback");
> +#ifdef HAVE_REPLICATION
> + for (auto &cache: thd->online_alter_cache_list)
> + {
> + if (cache.share->db_type()->savepoint_set == NULL)
> + continue;
> +
> + SAVEPOINT **sv= find_savepoint_in_list(thd, name, &cache.sv_list);
> + // sv is null if savepoint was set up before online table was modified
> + my_off_t pos= *sv ? *(my_off_t*)(*sv+1) : 0;
> +
> + cache.trx_cache.restore_savepoint(pos);
> + }
> +
> +#endif
> + DBUG_RETURN(0);
> +}
> +
> /*
> Write the contents of a cache to the binary log.
>
> diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
> index cc807852bf2..57757ecf3dd 100644
> --- a/sql/rpl_rli.h
> +++ b/sql/rpl_rli.h
> @@ -900,14 +900,20 @@ struct rpl_group_info
> }
> }
>
> - bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const
> + bool get_table_data(TABLE *table_arg, table_def **tabledef_var,
> + TABLE **conv_table_var,
> + const Copy_field *copy[], const Copy_field **copy_end) const
> {
> DBUG_ASSERT(tabledef_var && conv_table_var);
> for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global)
> if (ptr->table == table_arg)
> {
> - *tabledef_var= &static_cast<RPL_TABLE_LIST*>(ptr)->m_tabledef;
> - *conv_table_var= static_cast<RPL_TABLE_LIST*>(ptr)->m_conv_table;
> + auto *rpl_table_list= static_cast<RPL_TABLE_LIST*>(ptr);
> + if (rpl_table_list->m_tabledef_valid)
when can it be false?
> + *tabledef_var= &rpl_table_list->m_tabledef;
> + *conv_table_var= rpl_table_list->m_conv_table;
> + *copy= rpl_table_list->m_online_alter_copy_fields;
> + *copy_end= rpl_table_list->m_online_alter_copy_fields_end;
> DBUG_PRINT("debug", ("Fetching table data for table %s.%s:"
> " tabledef: %p, conv_table: %p",
> table_arg->s->db.str, table_arg->s->table_name.str,
> diff --git a/sql/sql_table.cc b/sql/sql_table.cc
> index b07efb29bba..9dfe2178667 100644
> --- a/sql/sql_table.cc
> +++ b/sql/sql_table.cc
> @@ -9533,6 +9539,19 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db,
> has been already processed.
> */
> table_list->required_type= TABLE_TYPE_NORMAL;
> +
> +
> + if (alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_SHARED
> + || alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE
> + || thd->locked_tables_mode == LTM_LOCK_TABLES
> + || thd->lex->sql_command == SQLCOM_OPTIMIZE
> + || alter_info->algorithm(thd) == Alter_info::ALTER_TABLE_ALGORITHM_NOCOPY)
why only NOCOPY? What about INPLACE and INSTANT?
the rest of the condition looks fine.
> + online= false;
> +
> + if (online)
> + {
> + table_list->lock_type= TL_READ;
> + }
>
> DEBUG_SYNC(thd, "alter_table_before_open_tables");
>
> @@ -10961,6 +10983,58 @@ bool mysql_trans_commit_alter_copy_data(THD *thd)
> DBUG_RETURN(error);
> }
>
> +#ifdef HAVE_REPLICATION
> +static int online_alter_read_from_binlog(THD *thd, rpl_group_info *rgi,
> + Cache_flip_event_log *log)
> +{
> + MEM_ROOT event_mem_root;
> + Query_arena backup_arena;
> + Query_arena event_arena(&event_mem_root, Query_arena::STMT_INITIALIZED);
> + init_sql_alloc(key_memory_gdl, &event_mem_root,
> + MEM_ROOT_BLOCK_SIZE, 0, MYF(0));
> +
> + int error= 0;
> +
> + IO_CACHE *log_file= log->flip();
> +
> + thd_progress_report(thd, 0, my_b_write_tell(log_file));
> +
> + Abort_on_warning_instant_set old_abort_on_warning(thd, 0);
> + do
> + {
> + const auto *descr_event= rgi->rli->relay_log.description_event_for_exec;
> + auto *ev= Log_event::read_log_event(log_file, descr_event, false);
> + if (!ev)
> + break;
> +
> + ev->thd= thd;
> + thd->set_n_backup_active_arena(&event_arena, &backup_arena);
> + error= ev->apply_event(rgi);
> + thd->restore_active_arena(&event_arena, &backup_arena);
> +
> + event_arena.free_items();
> + free_root(&event_mem_root, MYF(MY_KEEP_PREALLOC));
> + if (ev != rgi->rli->relay_log.description_event_for_exec)
> + delete ev;
> + thd_progress_report(thd, my_b_tell(log_file), thd->progress.max_counter);
> + DEBUG_SYNC(thd, "alter_table_online_progress");
> + } while(!error);
> +
> + return error;
> +}
> +#endif // HAVE_REPLICATION
> +
> +static void online_alter_cleanup_binlog(THD *thd, TABLE_SHARE *s)
> +{
> +#ifdef HAVE_REPLICATION
> + if (!s->online_alter_binlog)
> + return;
> + // s->online_alter_binlog->reset_logs(thd, false, NULL, 0, 0);
forgot to delete?
> + s->online_alter_binlog->cleanup();
> + s->online_alter_binlog->~Cache_flip_event_log();
> + s->online_alter_binlog= NULL;
> +#endif
> +}
>
> static int
> copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
> @@ -11306,6 +11431,76 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
> cleanup_done= 1;
> to->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
>
> +#ifdef HAVE_REPLICATION
> + if (likely(online && error < 0))
> + {
> + Ha_trx_info *trx_info_save= thd->transaction->all.ha_list;
> + thd->transaction->all.ha_list = NULL;
why?
> + thd_progress_next_stage(thd);
> + Table_map_log_event table_event(thd, from, from->s->table_map_id,
> + from->file->has_transactions());
> + Relay_log_info rli(false);
> + rpl_group_info rgi(&rli);
> + RPL_TABLE_LIST rpl_table(to, TL_WRITE, from, table_event.get_table_def(),
> + copy, copy_end);
> + Cache_flip_event_log *binlog= from->s->online_alter_binlog;
> + rgi.thd= thd;
> + rgi.tables_to_lock= &rpl_table;
> +
> + rgi.m_table_map.set_table(from->s->table_map_id, to);
> +
> + DBUG_ASSERT(binlog->is_open());
> +
> + rli.relay_log.description_event_for_exec=
> + new Format_description_log_event(4);
> +
> + // We restore bitmaps, because update event is going to mess up with them.
> + to->default_column_bitmaps();
> +
> + error= online_alter_read_from_binlog(thd, &rgi, binlog);
> +
> + DEBUG_SYNC(thd, "alter_table_online_before_lock");
> +
> + int lock_error=
> + thd->mdl_context.upgrade_shared_lock(from->mdl_ticket, MDL_EXCLUSIVE,
> + (double)thd->variables.lock_wait_timeout);
> + if (!error)
> + error= lock_error;
> +
> + if (!error)
> + {
> + thd_progress_next_stage(thd);
> + error= online_alter_read_from_binlog(thd, &rgi, binlog);
> + }
> +
> + thd->transaction->all.ha_list = trx_info_save;
> + }
> + else if (unlikely(online)) // error was on copy stage
> + {
> + /*
> + We need to issue a barrier to clean up gracefully.
> + Without this, following possible:
> + T1: ALTER TABLE starts
> + T2: INSERT starts
> + T1: ALTER TABLE fails with error (i.e. ER_DUP_KEY)
> + T1: from->s->online_alter_binlog sets to NULL
> + T2: INSERT committs
> + T2: thd->online_alter_cache_list is not empty
> + T2: binlog_commit: DBUG_ASSERT(binlog); is issued.
1. do you have a test for that?
2. can it be fixed, like, on iterating thd->online_alter_cache_list
thd notices that from->s->online_alter_cache_list is NULL, so
it simply discards the cache? Then you won't need MDL_SHARED_NO_WRITE
> + */
> + // Ignore the return result. We already have an error.
> + thd->mdl_context.upgrade_shared_lock(from->mdl_ticket,
> + MDL_SHARED_NO_WRITE,
> + thd->variables.lock_wait_timeout);
> + }
> +#endif
> +
> + if (error > 0 && !from->s->tmp_table)
> + {
> + /* We are going to drop the temporary table */
> + to->file->extra(HA_EXTRA_PREPARE_FOR_DROP);
> + }
> +
> DEBUG_SYNC(thd, "copy_data_between_tables_before_reset_backup_lock");
> if (backup_reset_alter_copy_lock(thd))
> error= 1;
Regards,
Sergei
VP of MariaDB Server Engineering
and security@xxxxxxxxxxx