← Back to team overview

maria-developers team mailing list archive

MWL#132: TC plugin, patch for (possible) code review

 

Hi Serg,

And here is the patch for MWL#132, TC plugin.

The patch is fairly short, as most of the real code changes are included in
MWL#116. This patch is mainly about introducing a new plugin interface for
transaction coordinators, and making the three existing TCs in log.cc into
built-in plugins.

My main motivation for this work was Galera, but when I discussed with them at
the Istanbul meeting, I learned that they are not so interested in internal
2-phase commit (instead they download a complete database snapshot at node
recovery after a node crash).

So now I do not have an example of an external TC plugin implementation. My
idea is to implement the MWL#120: stacked event generators. I could then use
this to implement a simple TC example that does (very) basic synchroneous
replication (eg. maybe insert-only with little error handling or
something). This could be a nice example of how to use the facilities that TC
plugins provide for controlling the commit process etc.

So I would like your opinion as to whether this should be pushed to 5.3 now or
wait for MWL#120 (assuming you think it can be made suitable for pushing at
all, of course :-).

 - Kristian.

=== modified file 'include/mysql/plugin.h'
--- include/mysql/plugin.h	2010-05-26 18:55:40 +0000
+++ include/mysql/plugin.h	2010-10-04 10:48:08 +0000
@@ -75,7 +75,8 @@ typedef struct st_mysql_xid MYSQL_XID;
 #define MYSQL_FTPARSER_PLUGIN        2  /* Full-text parser plugin      */
 #define MYSQL_DAEMON_PLUGIN          3  /* The daemon/raw plugin type */
 #define MYSQL_INFORMATION_SCHEMA_PLUGIN  4  /* The I_S plugin type */
-#define MYSQL_MAX_PLUGIN_TYPE_NUM    5  /* The number of plugin types   */
+#define MYSQL_TC_PLUGIN              5  /* Transaction coordinator      */
+#define MYSQL_MAX_PLUGIN_TYPE_NUM    6  /* The number of plugin types   */
 
 /* We use the following strings to define licenses for plugins */
 #define PLUGIN_LICENSE_PROPRIETARY 0
@@ -600,10 +601,10 @@ struct st_mysql_ftparser
 };
 
 /*************************************************************************
-  API for Storage Engine plugin. (MYSQL_DAEMON_PLUGIN)
+  API for Daemon/raw plugin. (MYSQL_DAEMON_PLUGIN)
 */
 
-/* handlertons of different MySQL releases are incompatible */
+/* Internals of different MySQL releases are incompatible */
 #define MYSQL_DAEMON_INTERFACE_VERSION (MYSQL_VERSION_ID << 8)
 
 /*************************************************************************
@@ -653,6 +654,41 @@ struct st_mysql_information_schema
   int interface_version;
 };
 
+/*************************************************************************
+  API for Transaction Coordinator plugin. (MYSQL_TC_PLUGIN)
+*/
+
+/* TCs of different MySQL releases are incompatible */
+#define MYSQL_TC_VERSION (MYSQL_VERSION_ID << 8)
+
+/*
+  The real API is in sql/log.h
+  Here we define only the descriptor structure, that is referred from
+  st_mysql_plugin.
+*/
+
+struct st_mysql_tc
+{
+  int interface_version;
+  /*
+    This method is called to register a plugin as the transaction coordinator.
+    The method returns the TC_LOG * for the class object.
+
+    A plugin can also return NULL to abstain from being a transaction
+    coordinator.
+
+    The plugin can be asked twice to register. First time yield is passed as
+    1; the intension is that in this phase only the plugin explicitly
+    requested by the user should ask to be the primary (and it is a fatal
+    error if two plugins compete to be TC in this phase).
+
+    Second time yield is passed as 0; in this case it means there was no
+    explicit request for a particular TC, and whoever returns non-NULL in this
+    phase first becomes the TC.
+  */
+  struct TC_LOG *(*register_as_primary_tc)(void *arg, int yield);
+};
+
 
 /*
   st_mysql_value struct for reading values from mysqld.

=== modified file 'include/mysql/plugin.h.pp'
--- include/mysql/plugin.h.pp	2010-05-26 18:55:40 +0000
+++ include/mysql/plugin.h.pp	2010-10-04 10:48:08 +0000
@@ -106,6 +106,11 @@ struct st_mysql_information_schema
 {
   int interface_version;
 };
+struct st_mysql_tc
+{
+  int interface_version;
+  struct TC_LOG *(*register_as_primary_tc)(void *arg, int yield);
+};
 struct st_mysql_value
 {
   int (*value_type)(struct st_mysql_value *);

=== modified file 'sql/log.cc'
--- sql/log.cc	2010-10-01 08:49:57 +0000
+++ sql/log.cc	2010-10-05 07:24:25 +0000
@@ -166,6 +166,8 @@ public:
   ~binlog_trx_data()
   {
     DBUG_ASSERT(pending() == NULL);
+    pthread_cond_destroy(&COND_group_commit);
+    pthread_mutex_destroy(&LOCK_group_commit);
     close_cached_file(&trans_log);
   }
 
@@ -6531,6 +6533,49 @@ err1:
                   "--tc-heuristic-recover={commit|rollback}");
   return 1;
 }
+
+static TC_LOG *
+tc_log_mmap_register(void *arg __attribute__((unused)), int yield)
+{
+  /*
+    In the first round, we let any other plugin that wants to be the TC
+    get preference (eg. the binary log).
+
+    But if no-one else wants the TC role, TC_LOG_MMAP serves as a fallback
+    that only does crash recovery (of multi-engine transactions).
+
+    If we do not have multiple engines that are capable of transactions,
+    we do not even need recovery, and TC_LOG_DUMMY will be selected instead.
+  */
+  if (yield || total_ha_2pc <= 1)
+    return NULL;
+
+  return &tc_log_mmap;
+}
+
+static struct st_mysql_tc tc_log_mmap_plugin=
+{
+  MYSQL_TC_VERSION,
+  tc_log_mmap_register
+};
+
+mysql_declare_plugin(tc_log_mmap)
+{
+  MYSQL_TC_PLUGIN,
+  &tc_log_mmap_plugin,
+  "tc_log_mmap",
+  "MariaDB",
+  "Default non-binlog TC, uses mmap()'ed scoreboard for 2pc recovery",
+  PLUGIN_LICENSE_GPL,
+  NULL,
+  NULL,
+  0x0100 /* 1.0 */,
+  NULL,
+  NULL,
+  NULL
+}
+mysql_declare_plugin_end;
+
 #endif
 
 TC_LOG *tc_log;
@@ -6815,6 +6860,79 @@ ulonglong mysql_bin_log_file_pos(void)
 }
 #endif /* INNODB_COMPATIBILITY_HOOKS */
 
+struct select_tc_plugin_data
+{
+  TC_LOG *selected;
+  const char *selected_name;
+  int yield;
+};
+
+static my_bool
+select_tc_plugin_cb(THD *thd __attribute__((unused)), plugin_ref plugin,
+                    void *arg)
+{
+  select_tc_plugin_data *data= static_cast<select_tc_plugin_data *>(arg);
+  /* In the second phase, skip everything after the first TC that registers. */
+  if (!data->yield && data->selected)
+    return FALSE;
+  st_mysql_tc *tcton= static_cast<st_mysql_tc *>(plugin_decl(plugin)->info);
+  TC_LOG *candidate=
+    (*tcton->register_as_primary_tc)(plugin_int_to_ref(plugin), data->yield);
+  if (candidate)
+  {
+    const char *candidate_name= plugin_name(plugin)->str;
+    if (data->selected)
+    {
+      sql_print_error("Fatal: plugins %s and %s both registered as "
+                      "transaction\ncoordinator, but only one is allowed.\n"
+                      "Please disable one of them.", data->selected_name,
+                      candidate_name);
+      return TRUE;
+    }
+    data->selected= candidate;
+    data->selected_name= candidate_name;
+  }
+  return FALSE;
+}
+
+/*
+  Select which plugin to use for the transaction coordinator
+  (there can be only one).
+
+  First, ask all plugins if they insist to be the prefered TC. If more than
+  one does, it is an error.
+
+  If no one insists, try again, this time picking the first one that is
+  willing to act as TC (there will always be one, as either tc_log_dummy or
+  tc_log_mmap will step up).
+*/
+TC_LOG *
+select_tc_plugin()
+{
+  my_bool error;
+  select_tc_plugin_data data;
+  DBUG_ENTER("select_tc_plugin");
+
+  data.selected= NULL;
+  data.yield= 1;
+  for (;;)
+  {
+    error= plugin_foreach(NULL, select_tc_plugin_cb, MYSQL_TC_PLUGIN, &data);
+    if (error)
+      DBUG_RETURN(NULL);
+    if (data.selected)
+      DBUG_RETURN(data.selected);
+    if (!data.yield)
+      break;
+    else
+      data.yield= 0;
+  }
+
+  sql_print_error("Fatal: No transaction coordinator plugin available");
+
+  DBUG_RETURN(NULL);
+}
+
 
 static ulonglong binlog_status_var_num_commits;
 static ulonglong binlog_status_var_num_group_commits;
@@ -6885,6 +7003,24 @@ TC_LOG_BINLOG::set_status_variables()
 struct st_mysql_storage_engine binlog_storage_engine=
 { MYSQL_HANDLERTON_INTERFACE_VERSION };
 
+static TC_LOG *
+tc_log_binlog_register(void *arg __attribute__((unused)),
+                       int yield __attribute__((unused)))
+{
+  DBUG_ENTER("tc_log_binlog_create");
+
+  if (opt_bin_log && total_ha_2pc > 1)
+    DBUG_RETURN(&mysql_bin_log);
+
+  DBUG_RETURN(NULL);
+}
+
+static struct st_mysql_tc tc_log_binlog_plugin=
+{
+  MYSQL_TC_VERSION,
+  tc_log_binlog_register
+};
+
 mysql_declare_plugin(binlog)
 {
   MYSQL_STORAGE_ENGINE_PLUGIN,
@@ -6903,5 +7039,57 @@ mysql_declare_plugin(binlog)
   NULL,                       /* system variables                */
 #endif
   NULL                        /* config options                  */
+},
+{
+  MYSQL_TC_PLUGIN,
+  &tc_log_binlog_plugin,
+  "tc_log_binlog",
+  "MariaDB",
+  "TC for the binary log",
+  PLUGIN_LICENSE_GPL,
+  NULL,
+  NULL,
+  0x0100 /* 1.0 */,
+  NULL,
+  NULL,
+  NULL
+}
+mysql_declare_plugin_end;
+
+
+/*
+  Dummy TC plugin with empty implementation.
+  This is used when there is at most one XA-capable engine, in which case
+  the transaction coordinator is not used.
+*/
+
+static TC_LOG *
+tc_log_dummy_register(void *arg __attribute__((unused)), int yield)
+{
+  if (yield || total_ha_2pc > 1)
+    return NULL;
+  return &tc_log_dummy;
+}
+
+static struct st_mysql_tc tc_log_dummy_plugin=
+{
+  MYSQL_TC_VERSION,
+  tc_log_dummy_register
+};
+
+mysql_declare_plugin(tc_log_dummy)
+{
+  MYSQL_TC_PLUGIN,
+  &tc_log_dummy_plugin,
+  "tc_log_dummy",
+  "MariaDB",
+  "Dummy TC plugin for server without 2-phase commit",
+  PLUGIN_LICENSE_GPL,
+  NULL,
+  NULL,
+  0x0100 /* 1.0 */,
+  NULL,
+  NULL,
+  NULL
 }
 mysql_declare_plugin_end;

=== modified file 'sql/log.h'
--- sql/log.h	2010-10-01 08:49:57 +0000
+++ sql/log.h	2010-10-04 10:48:08 +0000
@@ -781,4 +781,6 @@ extern TYPELIB binlog_format_typelib;
 
 int query_error_code(THD *thd, bool not_killed);
 
+TC_LOG *select_tc_plugin();
+
 #endif /* LOG_H */

=== modified file 'sql/mysqld.cc'
--- sql/mysqld.cc	2010-10-01 08:49:57 +0000
+++ sql/mysqld.cc	2010-10-04 10:48:08 +0000
@@ -4283,10 +4283,11 @@ a file name for --log-bin-index option",
   }
 #endif
 
-  tc_log= (total_ha_2pc > 1 ? (opt_bin_log  ?
-                               (TC_LOG *) &mysql_bin_log :
-                               (TC_LOG *) &tc_log_mmap) :
-           (TC_LOG *) &tc_log_dummy);
+  tc_log= select_tc_plugin();
+  if (!tc_log)
+  {
+    unireg_abort(1);
+  }
 
   if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file))
   {

=== modified file 'sql/sql_builtin.cc.in'
--- sql/sql_builtin.cc.in	2009-11-29 23:08:56 +0000
+++ sql/sql_builtin.cc.in	2010-10-04 10:48:08 +0000
@@ -19,10 +19,10 @@
 typedef struct st_mysql_plugin builtin_plugin[];
 
 extern builtin_plugin 
-  builtin_binlog_plugin@mysql_plugin_defs@;
+  builtin_tc_log_dummy_plugin, builtin_tc_log_mmap_plugin, builtin_binlog_plugin@mysql_plugin_defs@;
 
 struct st_mysql_plugin *mysqld_builtins[]=
 {
-  builtin_binlog_plugin@mysql_plugin_defs@,(struct st_mysql_plugin *)0
+  builtin_tc_log_dummy_plugin, builtin_tc_log_mmap_plugin, builtin_binlog_plugin@mysql_plugin_defs@,(struct st_mysql_plugin *)0
 };
 

=== modified file 'sql/sql_plugin.cc'
--- sql/sql_plugin.cc	2010-09-21 14:21:30 +0000
+++ sql/sql_plugin.cc	2010-10-04 10:48:08 +0000
@@ -59,12 +59,12 @@ extern int finalize_schema_table(st_plug
 */
 plugin_type_init plugin_type_initialize[MYSQL_MAX_PLUGIN_TYPE_NUM]=
 {
-  0,ha_initialize_handlerton,0,0,initialize_schema_table
+  0,ha_initialize_handlerton,0,0,initialize_schema_table,0
 };
 
 plugin_type_init plugin_type_deinitialize[MYSQL_MAX_PLUGIN_TYPE_NUM]=
 {
-  0,ha_finalize_handlerton,0,0,finalize_schema_table
+  0,ha_finalize_handlerton,0,0,finalize_schema_table,0
 };
 
 #ifdef HAVE_DLOPEN