maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #09839
Re: 48d1ba6: MDEV-10296 - Multi-instance table cache
Hi, Sergey!
Ok, see comments below.
But why is this supposed to improve scalability?
Are you talking about the use case of one table being access by multiple
threads? Or this helps in other cases too?
And what is the effect of the more expensive tc_remove_all_unused_tables() ?
On Jun 29, Sergey Vojtovich wrote:
> revision-id: 48d1ba6097939efe5efd28dd3ed9d281cc2bc2f4 (mariadb-10.2.0-93-g48d1ba6)
> parent(s): 8bec9746f0d5b363f385713035ca3f2daff34e1c
> committer: Sergey Vojtovich
> timestamp: 2016-06-29 16:33:08 +0400
> message:
>
> MDEV-10296 - Multi-instance table cache
>
> Improve scalability by implementing multi-instance table cache.
>
> diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> index e075c64..22b12a9 100644
> --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> @@ -4559,6 +4559,20 @@ NUMERIC_BLOCK_SIZE 1
> ENUM_VALUE_LIST NULL
> READ_ONLY NO
> COMMAND_LINE_ARGUMENT REQUIRED
> +VARIABLE_NAME TABLE_OPEN_CACHE_INSTANCES
> +SESSION_VALUE NULL
> +GLOBAL_VALUE 1
> +GLOBAL_VALUE_ORIGIN COMPILE-TIME
Default value 1 and it's not auto-tuned on startup?
You think multi-instance table cache is better stay unused? :)
If not, either use a reasonable default or, may be, choose it on
startup based on my_gencpus()
> +DEFAULT_VALUE 1
> +VARIABLE_SCOPE GLOBAL
> +VARIABLE_TYPE BIGINT UNSIGNED
> +VARIABLE_COMMENT The number of table cache instances
> +NUMERIC_MIN_VALUE 1
> +NUMERIC_MAX_VALUE 64
> +NUMERIC_BLOCK_SIZE 1
> +ENUM_VALUE_LIST NULL
> +READ_ONLY YES
> +COMMAND_LINE_ARGUMENT REQUIRED
> VARIABLE_NAME THREAD_CACHE_SIZE
> SESSION_VALUE NULL
> GLOBAL_VALUE 151
> diff --git a/sql/table_cache.cc b/sql/table_cache.cc
> index bfe51df..f1eb006 100644
> --- a/sql/table_cache.cc
> +++ b/sql/table_cache.cc
> @@ -125,67 +114,101 @@ static int fix_thd_pins(THD *thd)
> part of table definition cache.
> */
>
> +struct Share_free_tables
> {
> + typedef I_P_List <TABLE, TABLE_share> List;
> + List list;
> + char pad[CPU_LEVEL1_DCACHE_LINESIZE];
a comment, please, explaining the padding.
Or see a suggestion at the end.
> +};
>
> +
> +struct Table_cache_instance
> +{
> + /**
> + Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
> + records, Share_free_tables::List (TABLE::prev and TABLE::next),
> + TABLE::in_use.
> + */
> + mysql_mutex_t LOCK_table_cache;
> + I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
> + &TABLE::global_free_prev>,
> + I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
> + free_tables;
> + ulong records;
> + char pad[CPU_LEVEL1_DCACHE_LINESIZE];
why is it padded?
> +
> + Table_cache_instance(): records(0)
> {
> + mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
> + MY_MUTEX_INIT_FAST);
> }
> +
> + ~Table_cache_instance()
> + {
> + mysql_mutex_destroy(&LOCK_table_cache);
> + DBUG_ASSERT(free_tables.is_empty());
> + DBUG_ASSERT(records == 0);
> + }
> +};
>
>
> +static Table_cache_instance *tc;
>
> +
> +static inline Share_free_tables *SHARE_FREE_TABLES(TDC_element *element)
fairly unconventional letter case choice :)
Besides, instead of pointer arithmetics, you could've simply put
Share_free_tables free_tables[1];
at the end of TDC_element.
> {
> + return (Share_free_tables*) (element + 1);
> }
>
>
> +static void intern_close_table(TABLE *table)
> +{
> + delete table->triggers;
> + DBUG_ASSERT(table->file);
old intern_close_table had
if (table->file) // Not true if placeholder
do you mean, these placeholder TABLEs can no longer be in the table
cache?
> + closefrm(table);
> + tdc_release_share(table->s);
old intern_close_table() also did table->alias.free();
> + my_free(table);
> +}
>
> +
> +/**
> + Get number of TABLE objects (used and unused) in table cache.
> */
>
> +uint tc_records(void)
> {
> + ulong total= 0;
> + for (ulong i= 0; i < tc_instances; i++)
> + {
> + mysql_mutex_lock(&tc[i].LOCK_table_cache);
> + total+= tc[i].records;
> + mysql_mutex_unlock(&tc[i].LOCK_table_cache);
> + }
> + return total;
> }
>
>
> /**
> Remove TABLE object from table cache.
> */
>
> static void tc_remove_table(TABLE *table)
> {
> + TDC_element *element= table->s->tdc;
> +
> + mysql_mutex_lock(&element->LOCK_table_share);
> + /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
> + while (element->all_tables_refs)
> + mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
> + element->all_tables.remove(table);
> + mysql_mutex_unlock(&element->LOCK_table_share);
> +
> + intern_close_table(table);
> }
>
>
> static void tc_remove_all_unused_tables(TDC_element *element,
> + Share_free_tables::List *purge_tables,
> bool mark_flushed)
> {
> TABLE *table;
> @@ -200,10 +223,18 @@ static void tc_remove_all_unused_tables(TDC_element *element,
> */
> if (mark_flushed)
> element->flushed= true;
> + for (ulong i= 0; i < tc_instances; i++)
> {
> + mysql_mutex_lock(&tc[i].LOCK_table_cache);
> + while ((table= SHARE_FREE_TABLES(element)[i].list.pop_front()))
> + {
> + tc[i].records--;
> + tc[i].free_tables.remove(table);
> + DBUG_ASSERT(element->all_tables_refs == 0);
> + element->all_tables.remove(table);
> + purge_tables->push_front(table);
> + }
> + mysql_mutex_unlock(&tc[i].LOCK_table_cache);
> }
> }
>
> @@ -225,7 +256,7 @@ static void tc_remove_all_unused_tables(TDC_element *element,
>
> struct tc_purge_arg
> {
> + Share_free_tables::List purge_tables;
> bool mark_flushed;
> };
>
> @@ -281,79 +298,33 @@ static TABLE *tc_free_tables_back(TDC_element *element)
> - free evicted object
> */
>
> +void tc_add_table(THD *thd, TABLE *table)
> {
> + ulong i= thd->thread_id % tc_instances;
> + TABLE *LRU_table= 0;
> + TDC_element *element= table->s->tdc;
>
> + DBUG_ASSERT(table->in_use == thd);
> mysql_mutex_lock(&element->LOCK_table_share);
> + /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
> + while (element->all_tables_refs)
> + mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
> + element->all_tables.push_front(table);
> mysql_mutex_unlock(&element->LOCK_table_share);
>
> + mysql_mutex_lock(&tc[i].LOCK_table_cache);
> + if (tc[i].records == tc_size && (LRU_table= tc[i].free_tables.pop_front()))
> {
> + SHARE_FREE_TABLES(LRU_table->s->tdc)[i].list.remove(LRU_table);
> + /* Needed if MDL deadlock detector chimes in before tc_remove_table() */
> + LRU_table->in_use= thd;
How can MDL deadlock detector get to the unused table? It traverses the
lock dependency graph, and an unused table cannot be locked, can it?
> }
> + else
> + tc[i].records++;
> + mysql_mutex_unlock(&tc[i].LOCK_table_cache);
> +
> + if (LRU_table)
> + tc_remove_table(LRU_table);
> }
>
>
> @@ -369,10 +340,11 @@ void tc_add_table(THD *thd, TABLE *table)
>
> static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
> {
> + ulong i= thd->thread_id % tc_instances;
> TABLE *table;
>
> - mysql_mutex_lock(&element->LOCK_table_share);
> - table= element->free_tables.pop_front();
> + mysql_mutex_lock(&tc[i].LOCK_table_cache);
> + table= SHARE_FREE_TABLES(element)[i].list.pop_front();
> if (table)
> {
> DBUG_ASSERT(!table->in_use);
> @@ -381,8 +353,9 @@ static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
> DBUG_ASSERT(table->db_stat && table->file);
> /* The children must be detached from the table. */
> DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
> + tc[i].free_tables.remove(table);
> }
> - mysql_mutex_unlock(&element->LOCK_table_share);
> + mysql_mutex_unlock(&tc[i].LOCK_table_cache);
> return table;
> }
>
> @@ -413,40 +386,27 @@ static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
> @retval false object released
> */
>
> -bool tc_release_table(TABLE *table)
> +void tc_release_table(TABLE *table)
> {
> + ulong i= table->in_use->thread_id % tc_instances;
> DBUG_ASSERT(table->in_use);
> DBUG_ASSERT(table->file);
>
> - if (table->needs_reopen() || tc_records() > tc_size)
> + mysql_mutex_lock(&tc[i].LOCK_table_cache);
> + if (table->needs_reopen() || table->s->tdc->flushed ||
> + tc[i].records > tc_size)
> {
> - mysql_mutex_lock(&table->s->tdc->LOCK_table_share);
> - goto purge;
> + tc[i].records--;
> + mysql_mutex_unlock(&tc[i].LOCK_table_cache);
> + tc_remove_table(table);
Does it mean that if the table cache is full, you free the table,
instead of adding it to the cache? That's not very LRU-ish
> + }
> + else
> + {
> + table->in_use= 0;
> + SHARE_FREE_TABLES(table->s->tdc)[i].list.push_front(table);
> + tc[i].free_tables.push_back(table);
> + mysql_mutex_unlock(&tc[i].LOCK_table_cache);
> }
> }
>
>
> @@ -573,23 +535,29 @@ static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
> Initialize table definition cache.
> */
>
> -void tdc_init(void)
> +bool tdc_init(void)
> {
> DBUG_ENTER("tdc_init");
> #ifdef HAVE_PSI_INTERFACE
> - init_tc_psi_keys();
> + mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
> + mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
> #endif
> + /* Extra instance is allocated to avoid false sharing */
> + if (!(tc= new Table_cache_instance[tc_instances + 1]))
> + DBUG_RETURN(true);
> tdc_inited= true;
> mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
> MY_MUTEX_INIT_FAST);
> tdc_version= 1L; /* Increments on each reload */
> - lf_hash_init(&tdc_hash, sizeof(TDC_element), LF_HASH_UNIQUE, 0, 0,
> + lf_hash_init(&tdc_hash, sizeof(TDC_element) +
> + sizeof(Share_free_tables) * tc_instances,
> + LF_HASH_UNIQUE, 0, 0,
> (my_hash_get_key) tdc_hash_key,
> &my_charset_bin);
> tdc_hash.alloc.constructor= lf_alloc_constructor;
> tdc_hash.alloc.destructor= lf_alloc_destructor;
> tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
> - DBUG_VOID_RETURN;
> + DBUG_RETURN(false);
> }
>
>
> @@ -631,6 +599,7 @@ void tdc_deinit(void)
> tdc_inited= false;
> lf_hash_destroy(&tdc_hash);
> mysql_mutex_destroy(&LOCK_unused_shares);
> + delete [] tc;
> }
> DBUG_VOID_RETURN;
> }
> @@ -1038,7 +1007,7 @@ bool tdc_remove_table(THD *thd, enum_tdc_remove_table_type remove_type,
> const char *db, const char *table_name,
> bool kill_delayed_threads)
> {
> - TDC_element::TABLE_list purge_tables;
> + Share_free_tables::List purge_tables;
> TABLE *table;
> TDC_element *element;
> uint my_refs= 1;
> diff --git a/sql/table_cache.h b/sql/table_cache.h
> index c05baae..4f449cb 100644
> --- a/sql/table_cache.h
> +++ b/sql/table_cache.h
> @@ -45,7 +43,7 @@ struct TDC_element
> for this share.
> */
> All_share_tables_list all_tables;
> - TABLE_list free_tables;
> + char pad[CPU_LEVEL1_DCACHE_LINESIZE]; // free_tables follows this immediately
hmm. may be you'd rather put padding in one place - at the beginning of
Share_free_tables?
> };
Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx
Follow ups