maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #12701
Re: c80f867b0df: MDEV-23844 Atomic DROP TABLE
Hi, Michael!
On May 12, Michael Widenius wrote:
> revision-id: c80f867b0df (mariadb-10.6.0-78-gc80f867b0df)
> parent(s): eebebe8ef75
> author: Michael Widenius <michael.widenius@xxxxxxxxx>
> committer: Michael Widenius <michael.widenius@xxxxxxxxx>
> timestamp: 2021-05-04 21:09:11 +0300
> message:
>
> MDEV-23844 Atomic DROP TABLE
...
> diff --git a/sql/sql_view.cc b/sql/sql_view.cc
> index e6d726b30d7..ff8aa55adef 100644
> --- a/sql/sql_view.cc
> +++ b/sql/sql_view.cc
> @@ -1861,8 +1868,18 @@ bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode)
> not_exists_count++;
> continue;
> }
> + if (!first_table++)
yuck. so your "first_table" actually means "not_first_view",
because when it's FALSE, it's the first view.
why not to rename it to "not_first_view" ?
> + {
> + if (ddl_log_drop_view_init(thd, &ddl_log_state))
> + DBUG_RETURN(TRUE);
what about a crash here? the first entry is written, the second is not.
> + }
> + if (ddl_log_drop_view(thd, &ddl_log_state, &cpath, &view->db,
> + &view->table_name))
> + DBUG_RETURN(TRUE);
> + debug_crash_here("ddl_log_drop_before_delete_view");
> if (unlikely(mysql_file_delete(key_file_frm, path, MYF(MY_WME))))
> delete_error= TRUE;
> + debug_crash_here("ddl_log_drop_after_delete_view");
>
> some_views_deleted= TRUE;
>
> diff --git a/sql/log.cc b/sql/log.cc
> index 6e0a9706ec8..dba999054e4 100644
> --- a/sql/log.cc
> +++ b/sql/log.cc
> @@ -10471,16 +10483,16 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
> bool last_gtid_standalone= false;
> bool last_gtid_valid= false;
> #endif
> + DBUG_ENTER("TC_LOG_BINLOG::recover");
>
> if (! fdle->is_valid() ||
> - (do_xa && my_hash_init(key_memory_binlog_recover_exec, &xids,
> - &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
> - sizeof(my_xid), 0, 0, MYF(0))))
> + (my_hash_init(key_memory_binlog_recover_exec, &xids,
> + &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
> + sizeof(my_xid), 0, 0, MYF(0))))
> goto err1;
>
> - if (do_xa)
> - init_alloc_root(key_memory_binlog_recover_exec, &mem_root,
> - TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0));
> + init_alloc_root(key_memory_binlog_recover_exec, &mem_root,
> + TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0));
I don't think these two changes are correct. do_xa is TRUE on crash recovery
and FALSE on gtid state recovery on 5.5->10.0 upgade or if gtid state
file was deleted. There is no point to initialize the xid hash or mem_root,
as it's not a crash recovery and there can be no incomplete transactions
or ddl statements.
Yes, I've seen the commit comment:
> - TC_LOG_BINLOG::recover() was changed to always collect Xid's from the
> binary log and always call ddl_log_close_binlogged_events(). This was
> needed to be able to collect DROP TABLE events with embedded Xid's
> (used by ddl log).
but I cannot understand how it applies here.
> fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
>
> diff --git a/sql/sql_table.cc b/sql/sql_table.cc
> index 7a88ffdfd1b..0a83396ae14 100644
> --- a/sql/sql_table.cc
> +++ b/sql/sql_table.cc
> @@ -1363,6 +1373,17 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
>
> thd->replication_flags= 0;
> was_view= table_type == TABLE_TYPE_VIEW;
> +
> + if (!first_table++)
first_table -> not_first_table
> + {
> + LEX_CSTRING comment= {comment_start, (size_t) comment_len};
> + if (ddl_log_drop_table_init(thd, &ddl_log_state, &comment))
> + {
> + error= 1;
> + goto err;
> + }
> + }
> +
> if ((table_type == TABLE_TYPE_UNKNOWN) || (was_view && !drop_view) ||
> (table_type != TABLE_TYPE_SEQUENCE && drop_sequence))
> {
> @@ -1565,6 +1611,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
> table_name.str, (uint)table_name.length);
> mysql_audit_drop_table(thd, table);
> }
> + ddl_log_update_phase(&ddl_log_state, DDL_DROP_PHASE_COLLECT);
better call it DDL_DROP_PHASE_BINLOG. It would've saved me ~10 minutes
reading the code back and forth, as ""DDL_DROP_PHASE_BINLOG" immediately
explains what this phase is for.
>
> if (!dont_log_query &&
> (!error || table_dropped || non_existing_table_error(error)))
> diff --git a/sql/ddl_log.h b/sql/ddl_log.h
> index 316e6708f22..b843404fc3c 100644
> --- a/sql/ddl_log.h
> +++ b/sql/ddl_log.h
> @@ -97,6 +101,14 @@ enum enum_ddl_log_rename_table_phase {
> DDL_RENAME_PHASE_TABLE,
> };
>
> +enum enum_ddl_log_drop_table_phase {
> + DDL_DROP_PHASE_TABLE=0,
> + DDL_DROP_PHASE_TRIGGER,
no phase for deleting data from stat tables?
> + DDL_DROP_PHASE_COLLECT,
> + DDL_DROP_PHASE_RESET, /* Reset found list of dropped tables */
this one is unused
(also unused in further patches, I've checked)
> + DDL_DROP_PHASE_END
> +};
> +
> /*
> Setting ddl_log_entry.phase to this has the same effect as setting
> action_type to DDL_IGNORE_LOG_ENTRY_CODE
> diff --git a/sql/ddl_log.cc b/sql/ddl_log.cc
> index 777dfdddbc7..6145b2f7318 100644
> --- a/sql/ddl_log.cc
> +++ b/sql/ddl_log.cc
> @@ -109,6 +112,7 @@ struct st_global_ddl_log
> };
>
> st_global_ddl_log global_ddl_log;
> +String ddl_drop_query; // Used during startup recovery
static
>
> mysql_mutex_t LOCK_gdl;
>
> @@ -1132,6 +1149,122 @@ static int ddl_log_execute_action(THD *thd, MEM_ROOT *mem_root,
> (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE);
> }
> break;
> + case DDL_LOG_DROP_TABLE_INIT_ACTION:
> + {
> + LEX_CSTRING *comment= &ddl_log_entry->tmp_name;
> + ddl_drop_query.length(0);
> + ddl_drop_query.set_charset(system_charset_info);
> + ddl_drop_query.append(STRING_WITH_LEN("DROP TABLE IF EXISTS "));
> + if (comment->length)
> + {
> + ddl_drop_query.append(comment);
> + ddl_drop_query.append(' ');
> + }
> + /* We don't increment phase as we want to retry this in case of crash */
> + break;
> + }
> + case DDL_LOG_DROP_TABLE_ACTION:
> + {
> + LEX_CSTRING db, table, path;
> + db= ddl_log_entry->db;
> + table= ddl_log_entry->name;
> + /* Note that path is without .frm extension */
> + path= ddl_log_entry->tmp_name;
> +
> + switch (ddl_log_entry->phase) {
> + case DDL_DROP_PHASE_TABLE:
> + if (hton)
> + {
> + if ((error= hton->drop_table(hton, path.str)))
> + {
> + if (!non_existing_table_error(error))
> + break;
> + error= -1;
> + }
> + }
> + else
> + error= ha_delete_table_force(thd, path.str, &db, &table);
> + if (error <= 0)
> + {
> + /* Not found or already deleted. Delete .frm if it exists */
> + strxnmov(to_path, sizeof(to_path)-1, path.str, reg_ext, NullS);
> + mysql_file_delete(key_file_frm, to_path, MYF(MY_WME|MY_IGNORE_ENOENT));
this should rather be inside the previous if().
no need to do it for ha_delete_table_force(), as it doesn't
leave any traces and removes all remnants of a table.
> + }
> + if (ddl_log_increment_phase_no_lock(entry_pos))
> + break;
> + (void) ddl_log_sync_no_lock();
No need to, ddl_log_increment_phase_no_lock() syncs internally
> + /* Fall through */
> + case DDL_DROP_PHASE_TRIGGER:
> + Table_triggers_list::drop_all_triggers(thd, &db, &table,
> + MYF(MY_WME | MY_IGNORE_ENOENT));
> + if (ddl_log_increment_phase_no_lock(entry_pos))
> + break;
> + (void) ddl_log_sync_no_lock();
same. and in other places too, I'd expect.
> + /* Fall through */
> +
> + case DDL_DROP_PHASE_COLLECT:
indentation?
> + append_identifier(thd, &ddl_drop_query, &db);
> + ddl_drop_query.append('.');
> + append_identifier(thd, &ddl_drop_query, &table);
> + ddl_drop_query.append(',');
> + /* We don't increment phase as we want to retry this in case of crash */
> +
> + if (!ddl_log_entry->next_entry && mysql_bin_log.is_open())
> + {
> + /* Last drop table. Write query to binlog */
> + LEX_CSTRING end_comment=
> + { STRING_WITH_LEN(" /* generated by ddl log */")};
"generated by ddl log" ? a "log" cannot "generate" a statement,
may be "generated during ddl recovery" ?
> + ddl_drop_query.length(ddl_drop_query.length()-1);
> + ddl_drop_query.append(&end_comment);
> +
> + mysql_mutex_unlock(&LOCK_gdl);
> + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, ddl_drop_query.ptr(),
> + ddl_drop_query.length(), TRUE, FALSE,
> + FALSE, 0);
> + mysql_mutex_lock(&LOCK_gdl);
> + }
> + break;
> + }
> + break;
> + }
> + case DDL_LOG_DROP_VIEW_INIT_ACTION:
> + {
> + ddl_drop_query.length(0);
> + ddl_drop_query.set_charset(system_charset_info);
> + ddl_drop_query.append(STRING_WITH_LEN("DROP VIEW IF EXISTS "));
> + /* We don't increment phase as we want to retry this in case of crash */
> + break;
> + }
> + case DDL_LOG_DROP_VIEW_ACTION:
> + {
> + LEX_CSTRING db, table, path;
> + db= ddl_log_entry->db;
> + table= ddl_log_entry->name;
> + /* Note that for views path is WITH .frm extension */
> + path= ddl_log_entry->tmp_name;
> +
> + mysql_file_delete(key_file_frm, path.str, MYF(MY_WME|MY_IGNORE_ENOENT));
> + append_identifier(thd, &ddl_drop_query, &db);
> + ddl_drop_query.append('.');
> + append_identifier(thd, &ddl_drop_query, &table);
> + ddl_drop_query.append(',');
> +
> + if (!ddl_log_entry->next_entry)
> + {
> + /* Last drop view. Write query to binlog */
> + LEX_CSTRING end_comment=
> + { STRING_WITH_LEN(" /* generated by ddl log */")};
> + ddl_drop_query.length(ddl_drop_query.length()-1);
> + ddl_drop_query.append(&end_comment);
> +
> + mysql_mutex_unlock(&LOCK_gdl);
> + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, ddl_drop_query.ptr(),
> + ddl_drop_query.length(), TRUE, FALSE,
> + FALSE, 0);
> + mysql_mutex_lock(&LOCK_gdl);
> + }
> + break;
> + }
> default:
> DBUG_ASSERT(0);
> break;
> @@ -1682,6 +1823,7 @@ void ddl_log_release_entries(DDL_LOG_STATE *ddl_log_state)
> next= log_entry->next_active_log_entry;
> ddl_log_release_memory_entry(log_entry);
> }
> + ddl_log_state->list= 0;
may be you should also clear ddl_log_state->execute_entry below?
>
> if (ddl_log_state->execute_entry)
> ddl_log_release_memory_entry(ddl_log_state->execute_entry);
> @@ -1776,6 +1918,33 @@ bool ddl_log_update_xid(DDL_LOG_STATE *state, ulonglong xid)
> }
>
>
> +/*
> + Write ddl_log_entry and write or update ddl_execute_entry
> +*/
> +
> +static bool ddl_log_write(DDL_LOG_STATE *ddl_state,
> + DDL_LOG_ENTRY *ddl_log_entry)
could you use a more descriptive name, please?
> +{
> + int error;
> + DDL_LOG_MEMORY_ENTRY *log_entry;
> + DBUG_ENTER("ddl_log_write");
> +
> + mysql_mutex_lock(&LOCK_gdl);
> + error= ((ddl_log_write_entry(ddl_log_entry, &log_entry)) ||
> + ddl_log_write_execute_entry(log_entry->entry_pos,
> + &ddl_state->execute_entry));
> + mysql_mutex_unlock(&LOCK_gdl);
> + if (error)
> + {
> + if (log_entry)
> + ddl_log_release_memory_entry(log_entry);
> + DBUG_RETURN(1);
> + }
> + add_log_entry(ddl_state, log_entry);
> + DBUG_RETURN(0);
> +}
> +
> +
> /**
> Logging of rename table
> */
Regards,
Sergei
VP of MariaDB Server Engineering
and security@xxxxxxxxxxx
Follow ups