percona-discussion team mailing list archive
-
percona-discussion team
-
Mailing list archive
-
Message #00414
[Merge] lp:~percona-dev/percona-xtradb/porting-1.0.3 into lp:~percona-dev/percona-xtradb/extensions-1.0
Percona has proposed merging lp:~percona-dev/percona-xtradb/porting-1.0.3 into lp:~percona-dev/percona-xtradb/extensions-1.0.
Requested reviews:
Percona developers (percona-dev)
rw_locks for 1.0.3 plugin
--
https://code.launchpad.net/~percona-dev/percona-xtradb/porting-1.0.3/+merge/4919
Your team Percona developers is subscribed to branch lp:~percona-dev/percona-xtradb/porting-1.0.3.
=== added file 'i_s_innodb_buffer_pool_pages.patch'
--- i_s_innodb_buffer_pool_pages.patch 1970-01-01 00:00:00 +0000
+++ i_s_innodb_buffer_pool_pages.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,818 @@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc 2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc 2009-03-18 18:15:09.000000000 +0900
+@@ -10043,6 +10043,9 @@
+ innobase_system_variables, /* system variables */
+ NULL /* reserved */
+ },
++i_s_innodb_buffer_pool_pages,
++i_s_innodb_buffer_pool_pages_index,
++i_s_innodb_buffer_pool_pages_blob,
+ i_s_innodb_trx,
+ i_s_innodb_locks,
+ i_s_innodb_lock_waits,
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc 2009-03-18 18:08:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc 2009-03-18 18:15:09.000000000 +0900
+@@ -41,6 +41,16 @@
+ #include "buf0buf.h" /* for buf_pool and PAGE_ZIP_MIN_SIZE */
+ #include "ha_prototypes.h" /* for innobase_convert_name() */
+ #include "srv0start.h" /* for srv_was_started */
++#include "btr0btr.h" /* for btr_page_get_index_id */
++#include "dict0dict.h" /* for dict_index_get_if_in_cache */
++/* from buf0buf.c */
++struct buf_chunk_struct{
++ ulint mem_size; /* allocated size of the chunk */
++ ulint size; /* size of frames[] and blocks[] */
++ void* mem; /* pointer to the memory area which
++ was allocated for the frames */
++ buf_block_t* blocks; /* array of buffer control blocks */
++};
+ }
+ #include "handler0vars.h"
+
+@@ -378,6 +388,751 @@
+ };
+
+
++static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "page_type"),
++ STRUCT_FLD(field_length, 64),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "lru_position"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "fix_count"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "flush_type"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_index_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "schema_name"),
++ STRUCT_FLD(field_length, 64),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "table_name"),
++ STRUCT_FLD(field_length, 64),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "index_name"),
++ STRUCT_FLD(field_length, 64),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "n_recs"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "data_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "hashed"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "accessed"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "modified"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "dirty"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "old"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "lru_position"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "fix_count"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "flush_type"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_blob_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "compressed"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "part_len"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "next_page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "lru_position"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "fix_count"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "flush_type"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_fill(
++/*================*/
++ /* out: 0 on success, 1 on failure */
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++
++ ulint n_chunks, n_blocks;
++
++ buf_chunk_t* chunk;
++
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ buf_pool_mutex_enter();
++ mutex_enter(&(dict_sys->mutex));
++
++ chunk = buf_pool->chunks;
++
++ for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++ buf_block_t* block = chunk->blocks;
++
++ for (n_blocks = chunk->size; n_blocks--; block++) {
++ const buf_frame_t* frame = block->frame;
++
++ char page_type[64];
++
++ switch(fil_page_get_type(frame))
++ {
++ case FIL_PAGE_INDEX:
++ strcpy(page_type, "index");
++ break;
++ case FIL_PAGE_UNDO_LOG:
++ strcpy(page_type, "undo_log");
++ break;
++ case FIL_PAGE_INODE:
++ strcpy(page_type, "inode");
++ break;
++ case FIL_PAGE_IBUF_FREE_LIST:
++ strcpy(page_type, "ibuf_free_list");
++ break;
++ case FIL_PAGE_TYPE_ALLOCATED:
++ strcpy(page_type, "allocated");
++ break;
++ case FIL_PAGE_IBUF_BITMAP:
++ strcpy(page_type, "bitmap");
++ break;
++ case FIL_PAGE_TYPE_SYS:
++ strcpy(page_type, "sys");
++ break;
++ case FIL_PAGE_TYPE_TRX_SYS:
++ strcpy(page_type, "trx_sys");
++ break;
++ case FIL_PAGE_TYPE_FSP_HDR:
++ strcpy(page_type, "fsp_hdr");
++ break;
++ case FIL_PAGE_TYPE_XDES:
++ strcpy(page_type, "xdes");
++ break;
++ case FIL_PAGE_TYPE_BLOB:
++ strcpy(page_type, "blob");
++ break;
++ case FIL_PAGE_TYPE_ZBLOB:
++ strcpy(page_type, "zblob");
++ break;
++ case FIL_PAGE_TYPE_ZBLOB2:
++ strcpy(page_type, "zblob2");
++ break;
++ default:
++ sprintf(page_type, "unknown (type=%li)", fil_page_get_type(frame));
++ }
++
++ field_store_string(table->field[0], page_type);
++ table->field[1]->store(block->page.space);
++ table->field[2]->store(block->page.offset);
++ table->field[3]->store(block->page.LRU_position);
++ table->field[4]->store(block->page.buf_fix_count);
++ table->field[5]->store(block->page.flush_type);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++
++ }
++ }
++
++ mutex_exit(&(dict_sys->mutex));
++ buf_pool_mutex_exit();
++
++ DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_fill(
++/*================*/
++ /* out: 0 on success, 1 on failure */
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++
++ ulint n_chunks, n_blocks;
++ dict_index_t* index;
++ dulint index_id;
++
++ char *p;
++ char db_name_raw[NAME_LEN*5+1], db_name[NAME_LEN+1];
++ char table_name_raw[NAME_LEN*5+1], table_name[NAME_LEN+1];
++
++ buf_chunk_t* chunk;
++
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ buf_pool_mutex_enter();
++ mutex_enter(&(dict_sys->mutex));
++
++ chunk = buf_pool->chunks;
++
++ for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++ buf_block_t* block = chunk->blocks;
++
++ for (n_blocks = chunk->size; n_blocks--; block++) {
++ const buf_frame_t* frame = block->frame;
++
++ if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
++ index_id = btr_page_get_index_id(frame);
++ index = dict_index_get_if_in_cache_low(index_id);
++ if(index)
++ {
++ if((p = strchr(index->table_name, '/')))
++ {
++ strncpy(db_name_raw, index->table_name, p-index->table_name);
++ db_name_raw[p-index->table_name] = 0;
++ filename_to_tablename(db_name_raw, db_name, sizeof(db_name));
++ field_store_string(table->field[0], db_name);
++ p++;
++ } else {
++ field_store_string(table->field[0], NULL);
++ p = (char *)index->table_name;
++ }
++ strcpy(table_name_raw, p);
++ filename_to_tablename(table_name_raw, table_name, sizeof(table_name));
++ field_store_string(table->field[1], table_name);
++ field_store_string(table->field[2], index->name);
++
++ table->field[3]->store(block->page.space);
++ table->field[4]->store(block->page.offset);
++ table->field[5]->store(page_get_n_recs(frame));
++ table->field[6]->store(page_get_data_size(frame));
++ table->field[7]->store(block->is_hashed);
++ table->field[8]->store(block->page.accessed);
++ table->field[9]->store(block->page.newest_modification != 0);
++ table->field[10]->store(block->page.oldest_modification != 0);
++ table->field[11]->store(block->page.old);
++ table->field[12]->store(block->page.LRU_position);
++ table->field[13]->store(block->page.buf_fix_count);
++ table->field[14]->store(block->page.flush_type);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++ }
++ }
++ }
++ }
++
++ mutex_exit(&(dict_sys->mutex));
++ buf_pool_mutex_exit();
++
++ DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_fill(
++/*================*/
++ /* out: 0 on success, 1 on failure */
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++
++ ulint n_chunks, n_blocks;
++ buf_chunk_t* chunk;
++ page_zip_des_t* block_page_zip;
++
++ ulint part_len;
++ ulint next_page_no;
++
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ buf_pool_mutex_enter();
++ mutex_enter(&(dict_sys->mutex));
++
++ chunk = buf_pool->chunks;
++
++ for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++ buf_block_t* block = chunk->blocks;
++ block_page_zip = buf_block_get_page_zip(block);
++
++ for (n_blocks = chunk->size; n_blocks--; block++) {
++ const buf_frame_t* frame = block->frame;
++
++ if (fil_page_get_type(frame) == FIL_PAGE_TYPE_BLOB) {
++
++ if (UNIV_LIKELY_NULL(block_page_zip)) {
++ part_len = 0; /* hmm, can't figure it out */
++
++ next_page_no = mach_read_from_4(
++ buf_block_get_frame(block)
++ + FIL_PAGE_NEXT);
++ } else {
++ part_len = mach_read_from_4(
++ buf_block_get_frame(block)
++ + FIL_PAGE_DATA
++ + 0 /*BTR_BLOB_HDR_PART_LEN*/);
++
++ next_page_no = mach_read_from_4(
++ buf_block_get_frame(block)
++ + FIL_PAGE_DATA
++ + 4 /*BTR_BLOB_HDR_NEXT_PAGE_NO*/);
++ }
++
++ table->field[0]->store(block->page.space);
++ table->field[1]->store(block->page.offset);
++ table->field[2]->store(block_page_zip != NULL);
++ table->field[3]->store(part_len);
++
++ if(next_page_no == FIL_NULL)
++ {
++ table->field[4]->store(0);
++ } else {
++ table->field[4]->store(block->page.offset);
++ }
++
++ table->field[5]->store(block->page.LRU_position);
++ table->field[6]->store(block->page.buf_fix_count);
++ table->field[7]->store(block->page.flush_type);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++
++ }
++ }
++ }
++
++ mutex_exit(&(dict_sys->mutex));
++ buf_pool_mutex_exit();
++
++ DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_init(
++/*=========*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_buffer_pool_pages_fields_info;
++ schema->fill_table = i_s_innodb_buffer_pool_pages_fill;
++
++ DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_init(
++/*=========*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_buffer_pool_pages_index_fields_info;
++ schema->fill_table = i_s_innodb_buffer_pool_pages_index_fill;
++
++ DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_init(
++/*=========*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_buffer_pool_pages_blob_fields_info;
++ schema->fill_table = i_s_innodb_buffer_pool_pages_blob_fill;
++
++ DBUG_RETURN(0);
++}
++
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB buffer pool pages"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages_index =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_INDEX"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB buffer pool index pages"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_index_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages_blob =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_BLOB"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB buffer pool blob pages"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_blob_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++
+ /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */
+ static ST_FIELD_INFO innodb_trx_fields_info[] =
+ {
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.h innodb_plugin-1.0.3_tmp/handler/i_s.h
+--- innodb_plugin-1.0.3_orig/handler/i_s.h 2009-03-18 18:08:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.h 2009-03-18 18:15:09.000000000 +0900
+@@ -25,6 +25,9 @@
+ #ifndef i_s_h
+ #define i_s_h
+
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages_index;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages_blob;
+ extern struct st_mysql_plugin i_s_innodb_trx;
+ extern struct st_mysql_plugin i_s_innodb_locks;
+ extern struct st_mysql_plugin i_s_innodb_lock_waits;
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-18 18:12:58.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-18 18:15:09.000000000 +0900
+@@ -28,5 +28,6 @@
+ {"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql
+--- innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql 2009-03-18 18:08:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql 2009-03-18 18:15:09.000000000 +0900
+@@ -8,3 +8,6 @@
+ INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.so';
+ INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.so';
+ INSTALL PLUGIN XTRADB_ENHANCEMENTS SONAME 'ha_innodb.so';
++INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES SONAME 'ha_innodb.so';
++INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_BLOB SONAME 'ha_innodb.so';
++INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_INDEX SONAME 'ha_innodb.so';
=== added file 'innodb_expand_undo_slots.patch'
--- innodb_expand_undo_slots.patch 1970-01-01 00:00:00 +0000
+++ innodb_expand_undo_slots.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,143 @@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc 2009-03-18 18:19:52.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc 2009-03-19 16:14:28.000000000 +0900
+@@ -174,6 +174,7 @@
+ #endif /* UNIV_LOG_ARCHIVE */
+ static my_bool innobase_use_doublewrite = TRUE;
+ static my_bool innobase_use_checksums = TRUE;
++static my_bool innobase_extra_undoslots = FALSE;
+ static my_bool innobase_locks_unsafe_for_binlog = FALSE;
+ static my_bool innobase_rollback_on_timeout = FALSE;
+ static my_bool innobase_create_status_file = FALSE;
+@@ -2002,6 +2003,8 @@
+ goto error;
+ }
+
++ srv_extra_undoslots = (ibool) innobase_extra_undoslots;
++
+ /* -------------- Log files ---------------------------*/
+
+ /* The default dir for log files is the datadir of MySQL */
+@@ -9499,6 +9502,13 @@
+ "The common part for InnoDB table spaces.",
+ NULL, NULL, NULL);
+
++static MYSQL_SYSVAR_BOOL(extra_undoslots, innobase_extra_undoslots,
++ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++ "Enable to use about 4000 undo slots instead of default 1024. "
++ "#### Attention: Once you enable this parameter, "
++ "don't use the datafile for normal mysqld or ibbackup! ####",
++ NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Enable InnoDB doublewrite buffer (enabled by default). "
+@@ -9813,6 +9823,7 @@
+ MYSQL_SYSVAR(data_file_path),
+ MYSQL_SYSVAR(data_home_dir),
+ MYSQL_SYSVAR(doublewrite),
++ MYSQL_SYSVAR(extra_undoslots),
+ MYSQL_SYSVAR(fast_shutdown),
+ MYSQL_SYSVAR(file_io_threads),
+ MYSQL_SYSVAR(file_per_table),
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-19 16:13:38.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-19 16:14:28.000000000 +0900
+@@ -30,5 +30,6 @@
+ {"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_expand_undo_slots","expandable maximum number of undo slots","from 1024 (default) to about 4000","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h 2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h 2009-03-19 16:14:28.000000000 +0900
+@@ -95,6 +95,8 @@
+ extern ulint* srv_data_file_sizes;
+ extern ulint* srv_data_file_is_raw_partition;
+
++extern ibool srv_extra_undoslots;
++
+ extern ibool srv_auto_extend_last_data_file;
+ extern ulint srv_last_file_size_max;
+ extern ulong srv_auto_extend_increment;
+diff -ruN innodb_plugin-1.0.3_orig/include/trx0rseg.h innodb_plugin-1.0.3_tmp/include/trx0rseg.h
+--- innodb_plugin-1.0.3_orig/include/trx0rseg.h 2009-02-17 18:41:24.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/trx0rseg.h 2009-03-19 16:14:28.000000000 +0900
+@@ -131,8 +131,11 @@
+ mtr_t* mtr); /* in: mtr */
+
+
++/* Real max value may be 4076 in usual. But reserve 4 slot for safety or etc... */
++#define TRX_RSEG_N_EXTRA_SLOTS (((UNIV_PAGE_SIZE - (FIL_PAGE_DATA + FIL_PAGE_DATA_END + TRX_RSEG_UNDO_SLOTS)) / TRX_RSEG_SLOT_SIZE) - 4)
++
+ /* Number of undo log slots in a rollback segment file copy */
+-#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)
++#define TRX_RSEG_N_SLOTS (srv_extra_undoslots ? TRX_RSEG_N_EXTRA_SLOTS : (UNIV_PAGE_SIZE / 16))
+
+ /* Maximum number of transactions supported by a single rollback segment */
+ #define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c 2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c 2009-03-19 16:14:28.000000000 +0900
+@@ -131,6 +131,8 @@
+ /* size in database pages */
+ UNIV_INTERN ulint* srv_data_file_sizes = NULL;
+
++UNIV_INTERN ibool srv_extra_undoslots = FALSE;
++
+ /* if TRUE, then we auto-extend the last data file */
+ UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
+ /* if != 0, this tells the max size auto-extending may increase the
+diff -ruN innodb_plugin-1.0.3_orig/trx/trx0undo.c innodb_plugin-1.0.3_tmp/trx/trx0undo.c
+--- innodb_plugin-1.0.3_orig/trx/trx0undo.c 2009-02-17 19:12:56.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/trx/trx0undo.c 2009-03-19 16:14:28.000000000 +0900
+@@ -1382,9 +1382,47 @@
+ rseg_header = trx_rsegf_get_new(rseg->space, rseg->zip_size,
+ rseg->page_no, &mtr);
+
++ if (!srv_extra_undoslots) {
++ /* uses direct call for avoid "Assertion failure" */
++ //page_no = trx_rsegf_get_nth_undo(rseg_header, TRX_RSEG_N_EXTRA_SLOTS - 1, &mtr);
++ page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS
++ + (TRX_RSEG_N_EXTRA_SLOTS - 1) * TRX_RSEG_SLOT_SIZE,
++ MLOG_4BYTES, &mtr);
++ if (page_no != 0) {
++ /* check extended slots are not used */
++ for (i = TRX_RSEG_N_SLOTS; i < TRX_RSEG_N_EXTRA_SLOTS; i++) {
++ /* uses direct call for avoid "Assertion failure" */
++ page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS
++ + i * TRX_RSEG_SLOT_SIZE,
++ MLOG_4BYTES, &mtr);
++ if (page_no != FIL_NULL) {
++ srv_extra_undoslots = TRUE;
++ fprintf(stderr,
++"InnoDB: Error: innodb_extra_undoslots option is disabled, but it was enabled before.\n"
++"InnoDB: The datafile is not normal for mysqld and disabled innodb_extra_undoslots.\n"
++"InnoDB: Enable innodb_extra_undoslots if it was enabled before, and\n"
++"InnoDB: ### don't use this datafile with other mysqld or ibbackup! ###\n"
++"InnoDB: Cannot continue operation for the safety. Calling exit(1).\n");
++ exit(1);
++ }
++ }
++ fprintf(stderr,
++"InnoDB: Warning: innodb_extra_undoslots option is disabled, but it was enabled before.\n"
++"InnoDB: But extended undo slots seem not used, so continue operation.\n");
++ }
++ }
++
+ for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+ page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
+
++ /* If it was not initialized when the datafile created,
++ page_no will be 0 for the extended slots after that */
++
++ if (page_no == 0) {
++ page_no = FIL_NULL;
++ trx_rsegf_set_nth_undo(rseg_header, i, page_no, &mtr);
++ }
++
+ /* In forced recovery: try to avoid operations which look
+ at database pages; undo logs are rapidly changing data, and
+ the probability that they are in an inconsistent state is
=== added file 'innodb_extra_rseg.patch'
--- innodb_extra_rseg.patch 1970-01-01 00:00:00 +0000
+++ innodb_extra_rseg.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,326 @@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc 2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc 2009-03-19 16:20:33.000000000 +0900
+@@ -9813,6 +9813,11 @@
+ "Number of background write I/O threads in InnoDB.",
+ NULL, NULL, 1, 1, 64, 0);
+
++static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "Number of extra user rollback segments when create new database.",
++ NULL, NULL, 0, 0, 127, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+@@ -9872,6 +9877,7 @@
+ MYSQL_SYSVAR(enable_unsafe_group_commit),
+ MYSQL_SYSVAR(read_io_threads),
+ MYSQL_SYSVAR(write_io_threads),
++ MYSQL_SYSVAR(extra_rsegments),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(change_buffering),
+ NULL
+@@ -10054,6 +10060,7 @@
+ innobase_system_variables, /* system variables */
+ NULL /* reserved */
+ },
++i_s_innodb_rseg,
+ i_s_innodb_buffer_pool_pages,
+ i_s_innodb_buffer_pool_pages_index,
+ i_s_innodb_buffer_pool_pages_blob,
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc 2009-03-19 16:13:38.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc 2009-03-19 16:18:06.000000000 +0900
+@@ -43,6 +43,8 @@
+ #include "srv0start.h" /* for srv_was_started */
+ #include "btr0btr.h" /* for btr_page_get_index_id */
+ #include "dict0dict.h" /* for dict_index_get_if_in_cache */
++#include "trx0rseg.h" /* for trx_rseg_struct */
++#include "trx0sys.h" /* for trx_sys */
+ /* from buf0buf.c */
+ struct buf_chunk_struct{
+ ulint mem_size; /* allocated size of the chunk */
+@@ -2490,3 +2492,166 @@
+
+ DBUG_RETURN(0);
+ }
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO i_s_innodb_rseg_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "rseg_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "zip_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "max_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "curr_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static
++int
++i_s_innodb_rseg_fill(
++/*=================*/
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++ trx_rseg_t* rseg;
++
++ DBUG_ENTER("i_s_innodb_rseg_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
++
++ while (rseg) {
++ table->field[0]->store(rseg->id);
++ table->field[1]->store(rseg->space);
++ table->field[2]->store(rseg->zip_size);
++ table->field[3]->store(rseg->page_no);
++ table->field[4]->store(rseg->max_size);
++ table->field[5]->store(rseg->curr_size);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++
++ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
++ }
++
++ DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_rseg_init(
++/*=================*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_rseg_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_rseg_fields_info;
++ schema->fill_table = i_s_innodb_rseg_fill;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_rseg =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_RSEG"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB rollback segment information"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_rseg_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.h innodb_plugin-1.0.3_tmp/handler/i_s.h
+--- innodb_plugin-1.0.3_orig/handler/i_s.h 2009-03-18 18:19:52.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.h 2009-03-19 16:18:06.000000000 +0900
+@@ -36,5 +36,6 @@
+ extern struct st_mysql_plugin i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
+ extern struct st_mysql_plugin i_s_innodb_patches;
++extern struct st_mysql_plugin i_s_innodb_rseg;
+
+ #endif /* i_s_h */
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-19 16:18:06.000000000 +0900
+@@ -31,5 +31,6 @@
+ {"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_expand_undo_slots","expandable maximum number of undo slots","from 1024 (default) to about 4000","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_extra_rseg","allow to create extra rollback segments","When create new db, the new parameter allows to create more rollback segments","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h 2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h 2009-03-19 16:18:06.000000000 +0900
+@@ -184,6 +184,8 @@
+ extern ulint srv_read_ahead;
+ extern ulint srv_adaptive_checkpoint;
+
++extern ulint srv_extra_rsegments;
++
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+diff -ruN innodb_plugin-1.0.3_orig/include/trx0sys.h innodb_plugin-1.0.3_tmp/include/trx0sys.h
+--- innodb_plugin-1.0.3_orig/include/trx0sys.h 2009-02-17 18:41:24.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/trx0sys.h 2009-03-19 16:18:06.000000000 +0900
+@@ -122,6 +122,13 @@
+ void
+ trx_sys_create(void);
+ /*================*/
++/*********************************************************************
++Create extra rollback segments when create_new_db */
++UNIV_INTERN
++void
++trx_sys_create_extra_rseg(
++/*======================*/
++ ulint num); /* in: number of extra user rollback segments */
+ /********************************************************************
+ Looks for a free slot for a rollback segment in the trx system file copy. */
+ UNIV_INTERN
+diff -ruN innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql
+--- innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql 2009-03-18 18:19:52.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql 2009-03-19 16:18:06.000000000 +0900
+@@ -11,3 +11,4 @@
+ INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES SONAME 'ha_innodb.so';
+ INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_BLOB SONAME 'ha_innodb.so';
+ INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_INDEX SONAME 'ha_innodb.so';
++INSTALL PLUGIN innodb_rseg SONAME 'ha_innodb.so';
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c 2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c 2009-03-19 16:18:06.000000000 +0900
+@@ -362,6 +362,8 @@
+ UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+ UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
+ UNIV_INTERN ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
++
++UNIV_INTERN ulint srv_extra_rsegments = 0; /* extra rseg for users */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong srv_n_spin_wait_rounds = 20;
+ UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0start.c innodb_plugin-1.0.3_tmp/srv/srv0start.c
+--- innodb_plugin-1.0.3_orig/srv/srv0start.c 2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0start.c 2009-03-19 16:18:06.000000000 +0900
+@@ -1486,6 +1486,8 @@
+ dict_create();
+ srv_startup_is_before_trx_rollback_phase = FALSE;
+
++ if (srv_extra_rsegments)
++ trx_sys_create_extra_rseg(srv_extra_rsegments);
+ #ifdef UNIV_LOG_ARCHIVE
+ } else if (srv_archive_recovery) {
+ fprintf(stderr,
+diff -ruN innodb_plugin-1.0.3_orig/trx/trx0sys.c innodb_plugin-1.0.3_tmp/trx/trx0sys.c
+--- innodb_plugin-1.0.3_orig/trx/trx0sys.c 2009-02-17 19:12:56.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/trx/trx0sys.c 2009-03-19 16:18:06.000000000 +0900
+@@ -1066,6 +1066,31 @@
+ }
+
+ /*********************************************************************
++Create extra rollback segments when create_new_db */
++UNIV_INTERN
++void
++trx_sys_create_extra_rseg(
++/*======================*/
++ ulint num) /* in: number of extra user rollback segments */
++{
++ mtr_t mtr;
++ ulint slot_no;
++ ulint i;
++
++ /* Craete extra rollback segments */
++ mtr_start(&mtr);
++ for (i = 1; i < num + 1; i++) {
++ if(!trx_rseg_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no, &mtr)) {
++ fprintf(stderr,
++"InnoDB: Warning: Failed to create extra rollback segments.\n");
++ break;
++ }
++ ut_a(slot_no == i);
++ }
++ mtr_commit(&mtr);
++}
++
++/*********************************************************************
+ Update the file format tag. */
+ static
+ ibool
=== added file 'innodb_io_patches.patch'
--- innodb_io_patches.patch 1970-01-01 00:00:00 +0000
+++ innodb_io_patches.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,586 @@
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0flu.c innodb_plugin-1.0.3_tmp/buf/buf0flu.c
+--- innodb_plugin-1.0.3_orig/buf/buf0flu.c 2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0flu.c 2009-03-18 17:48:18.000000000 +0900
+@@ -1048,9 +1048,23 @@
+
+ old_page_count = page_count;
+
++ if (srv_flush_neighbor_pages) {
+ /* Try to flush also all the neighbors */
+ page_count += buf_flush_try_neighbors(
+ space, offset, flush_type);
++ } else {
++ /* Try to flush the page only */
++ buf_pool_mutex_enter();
++
++ mutex_t* block_mutex = buf_page_get_mutex(bpage);
++ mutex_enter(block_mutex);
++
++ buf_page_t* bpage_tmp = buf_page_hash_get(space, offset);
++ if (bpage_tmp) {
++ buf_flush_page(bpage_tmp, flush_type);
++ page_count++;
++ }
++ }
+ /* fprintf(stderr,
+ "Flush type %lu, page no %lu, neighb %lu\n",
+ flush_type, offset,
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0rea.c innodb_plugin-1.0.3_tmp/buf/buf0rea.c
+--- innodb_plugin-1.0.3_orig/buf/buf0rea.c 2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0rea.c 2009-03-18 17:48:18.000000000 +0900
+@@ -36,6 +36,7 @@
+ #include "os0file.h"
+ #include "srv0start.h"
+
++extern ulint srv_read_ahead;
+ extern ulint srv_read_ahead_rnd;
+ extern ulint srv_read_ahead_seq;
+ extern ulint srv_buf_pool_reads;
+@@ -203,6 +204,10 @@
+ ulint i;
+ ulint buf_read_ahead_random_area;
+
++ if (!(srv_read_ahead & 1)) {
++ return(0);
++ }
++
+ if (srv_startup_is_before_trx_rollback_phase) {
+ /* No read-ahead to avoid thread deadlocks */
+ return(0);
+@@ -428,6 +433,10 @@
+ const ulint buf_read_ahead_linear_area
+ = BUF_READ_AHEAD_LINEAR_AREA;
+
++ if (!(srv_read_ahead & 2)) {
++ return(0);
++ }
++
+ if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
+ /* No read-ahead to avoid thread deadlocks */
+ return(0);
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc 2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc 2009-03-18 17:52:19.000000000 +0900
+@@ -143,6 +143,7 @@
+ innobase_force_recovery, innobase_open_files,
+ innobase_autoinc_lock_mode;
+
++static unsigned long innobase_read_io_threads, innobase_write_io_threads;
+ static long long innobase_buffer_pool_size, innobase_log_file_size;
+
+ /* The default values for the following char* start-up parameters
+@@ -2104,6 +2105,10 @@
+ srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
+
+ srv_n_file_io_threads = (ulint) innobase_file_io_threads;
++ srv_n_read_io_threads = (ulint) innobase_read_io_threads;
++ srv_n_write_io_threads = (ulint) innobase_write_io_threads;
++
++ srv_read_ahead &= 3;
+
+ srv_force_recovery = (ulint) innobase_force_recovery;
+
+@@ -8879,6 +8884,10 @@
+ if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
+ (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
+ {
++ if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
++ /* choose group commit rather than binlog order */
++ return(0);
++ }
+
+ /* For ibbackup to work the order of transactions in binlog
+ and InnoDB must be the same. Consider the situation
+@@ -9716,6 +9725,84 @@
+ innodb_change_buffering_validate,
+ innodb_change_buffering_update, NULL);
+
++static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
++ PLUGIN_VAR_RQCMDARG,
++ "Number of IO operations per second the server can do. Tunes background IO rate.",
++ NULL, NULL, 100, 100, 999999999, 0);
++
++static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "The maximum size of the insert buffer. (in bytes)",
++ NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
++ NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
++ PLUGIN_VAR_RQCMDARG,
++ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
++ NULL, NULL, 100, 100, 999999999, 0);
++
++static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
++ NULL, NULL, 1, 0, 1, 0);
++
++static
++void
++innodb_read_ahead_update(
++ THD* thd,
++ struct st_mysql_sys_var* var,
++ void* var_ptr,
++ const void* save)
++{
++ *(long *)var_ptr= (*(long *)save) & 3;
++}
++const char *read_ahead_names[]=
++{
++ "none", /* 0 */
++ "random",
++ "linear",
++ "both", /* 3 */
++ /* For compatibility of the older patch */
++ "0", /* 4 ("none" + 4) */
++ "1",
++ "2",
++ "3", /* 7 ("both" + 4) */
++ NullS
++};
++TYPELIB read_ahead_typelib=
++{
++ array_elements(read_ahead_names) - 1, "read_ahead_typelib",
++ read_ahead_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
++ PLUGIN_VAR_RQCMDARG,
++ "Control read ahead activity. (none, random, linear, [both])",
++ NULL, innodb_read_ahead_update, 3, &read_ahead_typelib);
++
++static MYSQL_SYSVAR_ULONG(adaptive_checkpoint, srv_adaptive_checkpoint,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable flushing along modified age. 0:disable 1:enable",
++ NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
++ NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "Number of background read I/O threads in InnoDB.",
++ NULL, NULL, 1, 1, 64, 0);
++
++static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "Number of background write I/O threads in InnoDB.",
++ NULL, NULL, 1, 1, 64, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+@@ -9764,6 +9851,16 @@
+ MYSQL_SYSVAR(show_verbose_locks),
+ MYSQL_SYSVAR(show_locks_held),
+ MYSQL_SYSVAR(version),
++ MYSQL_SYSVAR(io_capacity),
++ MYSQL_SYSVAR(ibuf_max_size),
++ MYSQL_SYSVAR(ibuf_active_contract),
++ MYSQL_SYSVAR(ibuf_accel_rate),
++ MYSQL_SYSVAR(flush_neighbor_pages),
++ MYSQL_SYSVAR(read_ahead),
++ MYSQL_SYSVAR(adaptive_checkpoint),
++ MYSQL_SYSVAR(enable_unsafe_group_commit),
++ MYSQL_SYSVAR(read_io_threads),
++ MYSQL_SYSVAR(write_io_threads),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(change_buffering),
+ NULL
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-18 17:48:18.000000000 +0900
+@@ -25,5 +25,6 @@
+ }innodb_enhancements[] = {
+ {"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/ibuf/ibuf0ibuf.c innodb_plugin-1.0.3_tmp/ibuf/ibuf0ibuf.c
+--- innodb_plugin-1.0.3_orig/ibuf/ibuf0ibuf.c 2009-02-17 17:55:41.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/ibuf/ibuf0ibuf.c 2009-03-18 17:48:18.000000000 +0900
+@@ -422,8 +422,10 @@
+ grow in size, as the references on the upper levels of the tree can
+ change */
+
+- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
+- / IBUF_POOL_SIZE_PER_MAX_SIZE;
++ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
++ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
++
++ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
+
+ mutex_create(&ibuf_pessimistic_insert_mutex,
+ SYNC_IBUF_PESS_INSERT_MUTEX);
+@@ -2247,11 +2249,13 @@
+
+ mutex_enter(&ibuf_mutex);
+
++ if (!srv_ibuf_active_contract) {
+ if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
+ mutex_exit(&ibuf_mutex);
+
+ return;
+ }
++ }
+
+ sync = FALSE;
+
+diff -ruN innodb_plugin-1.0.3_orig/include/os0file.h innodb_plugin-1.0.3_tmp/include/os0file.h
+--- innodb_plugin-1.0.3_orig/include/os0file.h 2009-02-17 18:18:35.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/os0file.h 2009-03-18 17:48:18.000000000 +0900
+@@ -577,8 +577,10 @@
+ /*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+- ulint n_segments, /* in: combined number of segments in the four
+- first aio arrays; must be >= 4 */
++// ulint n_segments, /* in: combined number of segments in the four
++// first aio arrays; must be >= 4 */
++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */
++ ulint n_write_threads, /**/
+ ulint n_slots_sync); /* in: number of slots in the sync aio array */
+ /***********************************************************************
+ Requests an asynchronous i/o operation. */
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h 2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h 2009-03-18 17:48:18.000000000 +0900
+@@ -126,6 +126,8 @@
+ extern ulint srv_lock_table_size;
+
+ extern ulint srv_n_file_io_threads;
++extern ulint srv_n_read_io_threads;
++extern ulint srv_n_write_io_threads;
+
+ #ifdef UNIV_LOG_ARCHIVE
+ extern ibool srv_log_archive_on;
+@@ -170,6 +172,16 @@
+ extern ulong srv_max_purge_lag;
+
+ extern ulong srv_replication_delay;
++
++extern ulint srv_io_capacity;
++extern long long srv_ibuf_max_size;
++extern ulint srv_ibuf_active_contract;
++extern ulint srv_ibuf_accel_rate;
++extern ulint srv_flush_neighbor_pages;
++extern ulint srv_enable_unsafe_group_commit;
++extern ulint srv_read_ahead;
++extern ulint srv_adaptive_checkpoint;
++
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+diff -ruN innodb_plugin-1.0.3_orig/log/log0log.c innodb_plugin-1.0.3_tmp/log/log0log.c
+--- innodb_plugin-1.0.3_orig/log/log0log.c 2009-02-17 18:50:12.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/log/log0log.c 2009-03-18 17:48:18.000000000 +0900
+@@ -3276,6 +3276,15 @@
+ log_sys->flushed_to_disk_lsn,
+ log_sys->last_checkpoint_lsn);
+
++ fprintf(file,
++ "Max checkpoint age %lu\n"
++ "Modified age %lu\n"
++ "Checkpoint age %lu\n",
++ (ulong) log_sys->max_checkpoint_age,
++ (ulong) (log_sys->lsn -
++ log_buf_pool_get_oldest_modification()),
++ (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
++
+ current_time = time(NULL);
+
+ time_elapsed = 0.001 + difftime(current_time,
+diff -ruN innodb_plugin-1.0.3_orig/os/os0file.c innodb_plugin-1.0.3_tmp/os/os0file.c
+--- innodb_plugin-1.0.3_orig/os/os0file.c 2009-02-17 18:53:58.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/os/os0file.c 2009-03-18 17:48:18.000000000 +0900
+@@ -2936,8 +2936,10 @@
+ /*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+- ulint n_segments, /* in: combined number of segments in the four
+- first aio arrays; must be >= 4 */
++// ulint n_segments, /* in: combined number of segments in the four
++// first aio arrays; must be >= 4 */
++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/
++ ulint n_write_threads, /**/
+ ulint n_slots_sync) /* in: number of slots in the sync aio array */
+ {
+ ulint n_read_segs;
+@@ -2945,6 +2947,8 @@
+ ulint n_per_seg;
+ ulint i;
+
++ ulint n_segments = 2 + n_read_threads + n_write_threads;
++
+ ut_ad(n % n_segments == 0);
+ ut_ad(n_segments >= 4);
+
+@@ -2955,8 +2959,8 @@
+ }
+
+ n_per_seg = n / n_segments;
+- n_write_segs = (n_segments - 2) / 2;
+- n_read_segs = n_segments - 2 - n_write_segs;
++ n_write_segs = n_write_threads;
++ n_read_segs = n_read_threads;
+
+ /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
+
+@@ -3156,6 +3160,13 @@
+ OVERLAPPED* control;
+ #endif
+ ulint i;
++ ulint prim_segment;
++ ulint n;
++
++ n = array->n_slots / array->n_segments;
++ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
++ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
++
+ loop:
+ os_mutex_enter(array->mutex);
+
+@@ -3174,6 +3185,16 @@
+ goto loop;
+ }
+
++ for (i = prim_segment * n; i < array->n_slots; i++) {
++ slot = os_aio_array_get_nth_slot(array, i);
++
++ if (slot->reserved == FALSE) {
++ break;
++ }
++ }
++
++ if (slot->reserved == TRUE){
++ /* Not found after the intended segment. So we should search before. */
+ for (i = 0;; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+@@ -3181,6 +3202,7 @@
+ break;
+ }
+ }
++ }
+
+ array->n_reserved++;
+
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c 2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c 2009-03-18 17:48:18.000000000 +0900
+@@ -177,6 +177,8 @@
+ UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
+
+ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
++UNIV_INTERN ulint srv_n_read_io_threads = 1;
++UNIV_INTERN ulint srv_n_write_io_threads = 1;
+
+ #ifdef UNIV_LOG_ARCHIVE
+ UNIV_INTERN ibool srv_log_archive_on = FALSE;
+@@ -341,6 +343,23 @@
+
+ UNIV_INTERN ulong srv_replication_delay = 0;
+
++UNIV_INTERN ulint srv_io_capacity = 100;
++
++/* Returns the number of IO operations that is X percent of the capacity.
++PCT_IO(5) -> returns the number of IO operations that is 5% of the max
++where max is srv_io_capacity. */
++#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
++
++UNIV_INTERN long long srv_ibuf_max_size = 0;
++UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
++#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
++
++UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
++
++UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
++UNIV_INTERN ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong srv_n_spin_wait_rounds = 20;
+ UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+@@ -2325,6 +2344,8 @@
+ ibool skip_sleep = FALSE;
+ ulint i;
+
++ ib_uint64_t oldest_lsn;
++
+ #ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Master thread starts, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+@@ -2412,10 +2433,10 @@
+ + log_sys->n_pending_writes;
+ n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
+- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
++ if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) {
+ srv_main_thread_op_info = "doing insert buffer merge";
+ ibuf_contract_for_n_pages(
+- TRUE, srv_insert_buffer_batch_size / 4);
++ TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size / 4)));
+
+ srv_main_thread_op_info = "flushing log";
+
+@@ -2428,7 +2449,7 @@
+ /* Try to keep the number of modified pages in the
+ buffer pool under the limit wished by the user */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ IB_ULONGLONG_MAX);
+
+ /* If we had to do the flush, it may have taken
+@@ -2437,6 +2458,49 @@
+ iteration of this loop. */
+
+ skip_sleep = TRUE;
++ } else if (srv_adaptive_checkpoint) {
++
++ /* Try to keep modified age not to exceed
++ max_checkpoint_age * 7/8 line */
++
++ mutex_enter(&(log_sys->mutex));
++
++ oldest_lsn = buf_pool_get_oldest_modification();
++ if (oldest_lsn == 0) {
++
++ mutex_exit(&(log_sys->mutex));
++
++ } else {
++ if ((log_sys->lsn - oldest_lsn)
++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
++ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
++ /* We should not flush from here. */
++ mutex_exit(&(log_sys->mutex));
++ } else if ((log_sys->lsn - oldest_lsn)
++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) {
++
++ /* 2nd defence line (max_checkpoint_age * 3/4) */
++
++ mutex_exit(&(log_sys->mutex));
++
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
++ IB_ULONGLONG_MAX);
++ skip_sleep = TRUE;
++ } else if ((log_sys->lsn - oldest_lsn)
++ > (log_sys->max_checkpoint_age)/2 ) {
++
++ /* 1st defence line (max_checkpoint_age * 1/2) */
++
++ mutex_exit(&(log_sys->mutex));
++
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
++ IB_ULONGLONG_MAX);
++ skip_sleep = TRUE;
++ } else {
++ mutex_exit(&(log_sys->mutex));
++ }
++ }
++
+ }
+
+ if (srv_activity_count == old_activity_count) {
+@@ -2463,10 +2527,10 @@
+ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
+ n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
+- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+-
+- srv_main_thread_op_info = "flushing buffer pool pages";
+- buf_flush_batch(BUF_FLUSH_LIST, 100, IB_ULONGLONG_MAX);
++ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
++
++ srv_main_thread_op_info = "flushing buffer pool pages";
++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), IB_ULONGLONG_MAX);
+
+ srv_main_thread_op_info = "flushing log";
+ log_buffer_flush_to_disk();
+@@ -2476,7 +2540,7 @@
+ even if the server were active */
+
+ srv_main_thread_op_info = "doing insert buffer merge";
+- ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
++ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size / 4)));
+
+ srv_main_thread_op_info = "flushing log";
+ log_buffer_flush_to_disk();
+@@ -2516,14 +2580,14 @@
+ (> 70 %), we assume we can afford reserving the disk(s) for
+ the time it requires to flush 100 pages */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ IB_ULONGLONG_MAX);
+ } else {
+ /* Otherwise, we only flush a small number of pages so that
+ we do not unnecessarily use much disk i/o capacity from
+ other work */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
+ IB_ULONGLONG_MAX);
+ }
+
+@@ -2611,7 +2675,7 @@
+ n_bytes_merged = 0;
+ } else {
+ n_bytes_merged = ibuf_contract_for_n_pages(
+- TRUE, srv_insert_buffer_batch_size);
++ TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size * 5)));
+ }
+
+ srv_main_thread_op_info = "reserving kernel mutex";
+@@ -2627,7 +2691,7 @@
+ srv_main_thread_op_info = "flushing buffer pool pages";
+
+ if (srv_fast_shutdown < 2) {
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ IB_ULONGLONG_MAX);
+ } else {
+ /* In the fastest shutdown we do not flush the buffer pool
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0start.c innodb_plugin-1.0.3_tmp/srv/srv0start.c
+--- innodb_plugin-1.0.3_orig/srv/srv0start.c 2009-03-05 20:49:51.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0start.c 2009-03-18 17:48:18.000000000 +0900
+@@ -1252,24 +1252,28 @@
+ return(DB_ERROR);
+ }
+
++ /* over write innodb_file_io_threads */
++ srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads;
++
+ /* Restrict the maximum number of file i/o threads */
+ if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
+
+ srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
++ srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2;
+ }
+
+ if (!os_aio_use_native_aio) {
+ /* In simulated aio we currently have use only for 4 threads */
+- srv_n_file_io_threads = 4;
++ /*srv_n_file_io_threads = 4;*/
+
+ os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+- srv_n_file_io_threads,
+- SRV_MAX_N_PENDING_SYNC_IOS);
++ srv_n_read_io_threads, srv_n_write_io_threads,
++ SRV_MAX_N_PENDING_SYNC_IOS * 8);
+ } else {
+ os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+- srv_n_file_io_threads,
++ srv_n_read_io_threads, srv_n_write_io_threads,
+ SRV_MAX_N_PENDING_SYNC_IOS);
+ }
+
=== added file 'innodb_opt_lru_count.patch'
--- innodb_opt_lru_count.patch 1970-01-01 00:00:00 +0000
+++ innodb_opt_lru_count.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,157 @@
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buf.c innodb_plugin-1.0.3_tmp/buf/buf0buf.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buf.c 2009-03-18 18:08:28.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buf.c 2009-03-18 18:09:28.000000000 +0900
+@@ -3015,7 +3015,7 @@
+ ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin();
++ buf_flush_free_margin(FALSE);
+
+ frame = block->frame;
+
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0flu.c innodb_plugin-1.0.3_tmp/buf/buf0flu.c
+--- innodb_plugin-1.0.3_orig/buf/buf0flu.c 2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0flu.c 2009-03-18 18:09:28.000000000 +0900
+@@ -133,9 +133,9 @@
+ buf_page_t* bpage) /* in: buffer control block, must be
+ buf_page_in_file(bpage) and in the LRU list */
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+- ut_ad(bpage->in_LRU_list);
++ //ut_ad(bpage->in_LRU_list); /* optimistic use */
+
+ if (UNIV_LIKELY(buf_page_in_file(bpage))) {
+
+@@ -144,6 +144,8 @@
+ && bpage->buf_fix_count == 0);
+ }
+
++ /* permited not to own LRU_mutex.. */
++/*
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: buffer block state %lu"
+@@ -151,6 +153,7 @@
+ (ulong) buf_page_get_state(bpage));
+ ut_print_buf(stderr, bpage, sizeof(buf_page_t));
+ putc('\n', stderr);
++*/
+
+ return(FALSE);
+ }
+@@ -1137,7 +1140,7 @@
+ ulint n_replaceable;
+ ulint distance = 0;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
+
+ n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+@@ -1163,7 +1166,7 @@
+ bpage = UT_LIST_GET_PREV(LRU, bpage);
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+
+@@ -1182,8 +1185,9 @@
+ immediately, without waiting. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margin(void)
++buf_flush_free_margin(
+ /*=======================*/
++ ibool wait)
+ {
+ ulint n_to_flush;
+ ulint n_flushed;
+@@ -1192,7 +1196,7 @@
+
+ if (n_to_flush > 0) {
+ n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
+- if (n_flushed == ULINT_UNDEFINED) {
++ if (wait && n_flushed == ULINT_UNDEFINED) {
+ /* There was an LRU type flush batch already running;
+ let us wait for it to end */
+
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0lru.c innodb_plugin-1.0.3_tmp/buf/buf0lru.c
+--- innodb_plugin-1.0.3_orig/buf/buf0lru.c 2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0lru.c 2009-03-18 18:09:28.000000000 +0900
+@@ -910,7 +910,7 @@
+
+ /* No free block was found: try to flush the LRU list */
+
+- buf_flush_free_margin();
++ buf_flush_free_margin(TRUE);
+ ++srv_buf_pool_wait_free;
+
+ os_aio_simulated_wake_handler_threads();
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0rea.c innodb_plugin-1.0.3_tmp/buf/buf0rea.c
+--- innodb_plugin-1.0.3_orig/buf/buf0rea.c 2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0rea.c 2009-03-18 18:09:28.000000000 +0900
+@@ -375,7 +375,7 @@
+ }
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin();
++ buf_flush_free_margin(FALSE);
+
+ /* Increment number of I/O operations used for LRU policy. */
+ buf_LRU_stat_inc_io();
+@@ -636,7 +636,7 @@
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin();
++ buf_flush_free_margin(FALSE);
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints && (count > 0)) {
+@@ -721,7 +721,7 @@
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin();
++ buf_flush_free_margin(FALSE);
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+@@ -806,7 +806,7 @@
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin();
++ buf_flush_free_margin(FALSE);
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-18 18:08:42.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-18 18:09:28.000000000 +0900
+@@ -27,5 +27,6 @@
+ {"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0flu.h innodb_plugin-1.0.3_tmp/include/buf0flu.h
+--- innodb_plugin-1.0.3_orig/include/buf0flu.h 2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0flu.h 2009-03-18 18:09:28.000000000 +0900
+@@ -49,8 +49,9 @@
+ a margin of replaceable pages there. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margin(void);
++buf_flush_free_margin(
+ /*=======================*/
++ ibool wait);
+ /************************************************************************
+ Initializes a page for writing to the tablespace. */
+ UNIV_INTERN
=== added file 'innodb_rw_lock.patch'
--- innodb_rw_lock.patch 1970-01-01 00:00:00 +0000
+++ innodb_rw_lock.patch 2009-03-23 09:54:36 +0000
@@ -0,0 +1,1060 @@
+diff -ruN innodb_plugin-1.0.3_orig/Makefile.in innodb_plugin-1.0.3_tmp/Makefile.in
+--- innodb_plugin-1.0.3_orig/Makefile.in 2009-03-23 17:06:47.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/Makefile.in 2009-03-23 17:07:17.000000000 +0900
+@@ -713,7 +713,8 @@
+ echo '#define HAVE_ATOMIC_PTHREAD_T' > include/ut0auxconf.h ; \
+ fi
+
+-all: check_have_atomic_pthread_t all-am
++# This is temprary fix for http://bugs.mysql.com/43740
++all: all-am
+
+ .SUFFIXES:
+ .SUFFIXES: .c .cc .lo .o .obj
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-23 17:06:47.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-23 17:07:17.000000000 +0900
+@@ -26,5 +26,6 @@
+ {"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/sync0rw.h innodb_plugin-1.0.3_tmp/include/sync0rw.h
+--- innodb_plugin-1.0.3_orig/include/sync0rw.h 2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/sync0rw.h 2009-03-23 17:07:17.000000000 +0900
+@@ -359,7 +359,17 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
++/*==================*/
++ rw_lock_t* lock);
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*==================*/
++ rw_lock_t* lock);
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
+ /*================*/
+ rw_lock_t* lock);
+ UNIV_INLINE
+@@ -478,6 +488,14 @@
+ rw_lock_debug_t* info); /* in: debug struct */
+ #endif /* UNIV_SYNC_DEBUG */
+
++#ifndef INNODB_RW_LOCKS_USE_ATOMICS
++#error INNODB_RW_LOCKS_USE_ATOMICS is not defined. Do you use enough new GCC or compatibles?
++#error Or do you use exact options for CFLAGS?
++#error e.g. (for x86_32): "-m32 -march=i586 -mtune=i686"
++#error e.g. (for Sparc_64): "-m64 -mcpu=v9"
++#error Otherwise, this build may be slower than normal version.
++#endif
++
+ /* NOTE! The structure appears here only for the compiler to know its size.
+ Do not use its fields directly! The structure used in the spin lock
+ implementation of a read-write lock. Several threads may have a shared lock
+@@ -489,7 +507,16 @@
+ struct rw_lock_struct {
+ volatile lint lock_word;
+ /* Holds the state of the lock. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ volatile ulint s_waiters; /* 1: there are waiters (s_lock) */
++ volatile ulint x_waiters; /* 1: there are waiters (x_lock) */
++ volatile ulint wait_ex_waiters; /* 1: there are waiters (wait_ex) */
++ volatile ulint reader_count; /* Number of readers who have locked this
++ lock in the shared mode */
++ volatile ulint writer;
++#else
+ volatile ulint waiters;/* 1: there are waiters */
++#endif
+ volatile ibool recursive;/* Default value FALSE which means the lock
+ is non-recursive. The value is typically set
+ to TRUE making normal rw_locks recursive. In
+@@ -506,7 +533,16 @@
+ /* Thread id of writer thread. Is only
+ guaranteed to have sane and non-stale
+ value iff recursive flag is set. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ volatile ulint writer_count; /* Number of times the same thread has
++ recursively locked the lock in the exclusive
++ mode */
++ /* Used by sync0arr.c for thread queueing */
++ os_event_t s_event; /* Used for s_lock */
++ os_event_t x_event; /* Used for x_lock */
++#else
+ os_event_t event; /* Used by sync0arr.c for thread queueing */
++#endif
+ os_event_t wait_ex_event;
+ /* Event for next-writer to wait on. A thread
+ must decrement lock_word before waiting. */
+@@ -528,7 +564,7 @@
+ /* last s-lock file/line is not guaranteed to be correct */
+ const char* last_s_file_name;/* File name where last s-locked */
+ const char* last_x_file_name;/* File name where last x-locked */
+- ibool writer_is_wait_ex;
++ volatile ibool writer_is_wait_ex;
+ /* This is TRUE if the writer field is
+ RW_LOCK_WAIT_EX; this field is located far
+ from the memory update hotspot fields which
+diff -ruN innodb_plugin-1.0.3_orig/include/sync0rw.ic innodb_plugin-1.0.3_tmp/include/sync0rw.ic
+--- innodb_plugin-1.0.3_orig/include/sync0rw.ic 2009-02-17 21:59:54.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/sync0rw.ic 2009-03-23 17:13:13.000000000 +0900
+@@ -70,12 +70,28 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
+ /*================*/
+ /* out: 1 if waiters, 0 otherwise */
+ rw_lock_t* lock) /* in: rw-lock */
+ {
+- return(lock->waiters);
++ return(lock->s_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*================*/
++ rw_lock_t* lock)
++{
++ return(lock->x_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
++/*================*/
++ rw_lock_t* lock)
++{
++ return(lock->wait_ex_waiters);
+ }
+
+ /************************************************************************
+@@ -84,14 +100,41 @@
+ memory barrier. */
+ UNIV_INLINE
+ void
+-rw_lock_set_waiter_flag(
++rw_lock_set_s_waiter_flag(
++/*====================*/
++ rw_lock_t* lock) /* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ // os_compare_and_swap(&lock->s_waiters, 0, 1);
++ __sync_lock_test_and_set(&lock->s_waiters, 1);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++ lock->s_waiters = 1;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_set_x_waiter_flag(
++/*====================*/
++ rw_lock_t* lock) /* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ // os_compare_and_swap(&lock->x_waiters, 0, 1);
++ __sync_lock_test_and_set(&lock->x_waiters, 1);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++ lock->x_waiters = 1;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_set_wx_waiter_flag(
+ /*====================*/
+ rw_lock_t* lock) /* in: rw-lock */
+ {
+ #ifdef INNODB_RW_LOCKS_USE_ATOMICS
+- os_compare_and_swap(&lock->waiters, 0, 1);
++ // os_compare_and_swap(&lock->wait_ex_waiters, 0, 1);
++ __sync_lock_test_and_set(&lock->wait_ex_waiters, 1);
+ #else /* INNODB_RW_LOCKS_USE_ATOMICS */
+- lock->waiters = 1;
++ lock->wait_ex_waiters = 1;
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ }
+
+@@ -101,14 +144,41 @@
+ memory barrier. */
+ UNIV_INLINE
+ void
+-rw_lock_reset_waiter_flag(
++rw_lock_reset_s_waiter_flag(
++/*======================*/
++ rw_lock_t* lock) /* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ // os_compare_and_swap(&lock->s_waiters, 1, 0);
++ __sync_lock_test_and_set(&lock->s_waiters, 0);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++ lock->s_waiters = 0;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_reset_x_waiter_flag(
++/*======================*/
++ rw_lock_t* lock) /* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ // os_compare_and_swap(&lock->x_waiters, 1, 0);
++ __sync_lock_test_and_set(&lock->x_waiters, 0);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++ lock->x_waiters = 0;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_reset_wx_waiter_flag(
+ /*======================*/
+ rw_lock_t* lock) /* in: rw-lock */
+ {
+ #ifdef INNODB_RW_LOCKS_USE_ATOMICS
+- os_compare_and_swap(&lock->waiters, 1, 0);
++ // os_compare_and_swap(&lock->wait_ex_waiters, 1, 0);
++ __sync_lock_test_and_set(&lock->wait_ex_waiters, 0);
+ #else /* INNODB_RW_LOCKS_USE_ATOMICS */
+- lock->waiters = 0;
++ lock->wait_ex_waiters = 0;
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ }
+
+@@ -121,6 +191,17 @@
+ /*===============*/
+ rw_lock_t* lock)
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ if (lock->writer == RW_LOCK_NOT_LOCKED) {
++ return(RW_LOCK_NOT_LOCKED);
++ }
++
++ if (lock->writer_is_wait_ex) {
++ return(RW_LOCK_WAIT_EX);
++ } else {
++ return(RW_LOCK_EX);
++ }
++#else
+ lint lock_word = lock->lock_word;
+ if(lock_word > 0) {
+ /* return NOT_LOCKED in s-lock state, like the writer
+@@ -132,6 +213,7 @@
+ ut_ad(lock_word > -X_LOCK_DECR);
+ return(RW_LOCK_WAIT_EX);
+ }
++#endif
+ }
+
+ /**********************************************************************
+@@ -142,6 +224,9 @@
+ /*=====================*/
+ rw_lock_t* lock)
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ return(lock->reader_count);
++#else
+ lint lock_word = lock->lock_word;
+ if(lock_word > 0) {
+ /* s-locked, no x-waiters */
+@@ -151,6 +236,7 @@
+ return((ulint)(-lock_word));
+ }
+ return(0);
++#endif
+ }
+
+ #ifndef INNODB_RW_LOCKS_USE_ATOMICS
+@@ -174,12 +260,16 @@
+ /* out: value of writer_count */
+ rw_lock_t* lock) /* in: rw-lock */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ return(lock->writer_count);
++#else
+ lint lock_copy = lock->lock_word;
+ /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
+ if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+ return(0);
+ }
+ return(((-lock_copy) / X_LOCK_DECR) + 1);
++#endif
+ }
+
+ /**********************************************************************
+@@ -317,11 +407,26 @@
+ const char* file_name, /* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) {
++ /* try s-lock */
++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
++ /* fail */
++ __sync_fetch_and_add(&(lock->lock_word),1);
++ return(FALSE); /* locking did not succeed */
++ }
++ /* success */
++ __sync_fetch_and_add(&(lock->reader_count),1);
++ } else {
++ return(FALSE); /* locking did not succeed */
++ }
++#else
+ /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
+ if (!rw_lock_lock_word_decr(lock, 1)) {
+ /* Locking did not succeed */
+ return(FALSE);
+ }
++#endif
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
+@@ -346,10 +451,17 @@
+ const char* file_name, /* in: file name where requested */
+ ulint line) /* in: line where lock requested */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++ ut_ad(rw_lock_get_reader_count(lock) == 0);
++
++ __sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ ut_ad(lock->lock_word == X_LOCK_DECR);
+
+ /* Indicate there is a new reader by decrementing lock_word */
+ lock->lock_word--;
++#endif
+
+ lock->last_s_file_name = file_name;
+ lock->last_s_line = line;
+@@ -372,9 +484,17 @@
+ ulint line) /* in: line where lock requested */
+ {
+ ut_ad(rw_lock_validate(lock));
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ut_ad(rw_lock_get_reader_count(lock) == 0);
++ ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++
++ lock->writer = RW_LOCK_EX;
++ __sync_fetch_and_add(&(lock->writer_count),1);
++#else
+ ut_ad(lock->lock_word == X_LOCK_DECR);
+
+ lock->lock_word -= X_LOCK_DECR;
++#endif
+ lock->writer_thread = os_thread_get_curr_id();
+ lock->recursive = TRUE;
+
+@@ -448,7 +568,56 @@
+ ibool success;
+
+ #ifdef INNODB_RW_LOCKS_USE_ATOMICS
+- success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
++ success = FALSE;
++ if ((lock->reader_count == 0)
++ && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++retry_x_lock:
++ /* try x-lock */
++ if(__sync_sub_and_fetch(&(lock->lock_word),
++ X_LOCK_DECR) == 0) {
++ /* success */
++ /* try to lock writer */
++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++ == RW_LOCK_NOT_LOCKED) {
++ /* success */
++ lock->writer_thread = curr_thread;
++ lock->recursive = TRUE;
++ lock->writer_is_wait_ex = FALSE;
++ /* next function may work as memory barrier */
++ relock:
++ __sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
++#endif
++
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = line;
++
++ ut_ad(rw_lock_validate(lock));
++
++ return(TRUE);
++ } else {
++ /* x-unlock */
++ __sync_fetch_and_add(&(lock->lock_word),
++ X_LOCK_DECR);
++ }
++ } else {
++ /* fail (x-lock) */
++ if (__sync_fetch_and_add(&(lock->lock_word),X_LOCK_DECR)
++ == 0)
++ goto retry_x_lock;
++ }
++ }
++
++ if (lock->recursive
++ && os_thread_eq(lock->writer_thread, curr_thread)) {
++ goto relock;
++ }
++
++ //ut_ad(rw_lock_validate(lock));
++
++ return(FALSE);
+ #else
+
+ success = FALSE;
+@@ -459,7 +628,6 @@
+ }
+ mutex_exit(&(lock->mutex));
+
+-#endif
+ if (success) {
+ rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+
+@@ -486,6 +654,7 @@
+ ut_ad(rw_lock_validate(lock));
+
+ return(TRUE);
++#endif
+ }
+
+ /**********************************************************************
+@@ -501,6 +670,31 @@
+ #endif
+ )
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ibool last = FALSE;
++
++ ut_a(lock->reader_count > 0);
++
++ /* unlock lock_word */
++ __sync_fetch_and_add(&(lock->lock_word),1);
++
++ if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) {
++ last = TRUE;
++ }
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
++#endif
++
++ if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0))) {
++ os_event_set(lock->wait_ex_event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ }
++ else if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->x_waiters, 0))) {
++ os_event_set(lock->x_event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ }
++#else
+ ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
+
+ #ifdef UNIV_SYNC_DEBUG
+@@ -517,6 +711,7 @@
+ sync_array_object_signalled(sync_primary_wait_array);
+
+ }
++#endif
+
+ ut_ad(rw_lock_validate(lock));
+
+@@ -534,6 +729,19 @@
+ /*====================*/
+ rw_lock_t* lock) /* in: rw-lock */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ut_ad(lock->reader_count > 0);
++
++ __sync_sub_and_fetch(&(lock->reader_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
++#endif
++
++ ut_ad(!lock->s_waiters);
++ ut_ad(!lock->x_waiters);
++ ut_ad(!lock->wait_ex_waiters);
++#else
+ ut_ad(lock->lock_word < X_LOCK_DECR);
+
+ #ifdef UNIV_SYNC_DEBUG
+@@ -544,6 +752,7 @@
+ lock->lock_word++;
+
+ ut_ad(!lock->waiters);
++#endif
+ ut_ad(rw_lock_validate(lock));
+ #ifdef UNIV_SYNC_PERF_STAT
+ rw_s_exit_count++;
+@@ -563,6 +772,49 @@
+ #endif
+ )
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ibool last = FALSE;
++ ibool s_sg = FALSE;
++ ibool x_sg = FALSE;
++
++ ut_ad(lock->writer_count > 0);
++
++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++ last = TRUE;
++ }
++
++ if (last) {
++ /* unlock lock_word */
++ __sync_fetch_and_add(&(lock->lock_word),X_LOCK_DECR);
++
++ lock->recursive = FALSE;
++ /* FIXME: It is a value of bad manners for pthread.
++ But we shouldn't keep an ID of not-owner. */
++ lock->writer_thread = -1;
++ __sync_lock_test_and_set(&(lock->writer),RW_LOCK_NOT_LOCKED);
++ }
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
++#endif
++ if (last) {
++ if(__sync_lock_test_and_set(&lock->s_waiters, 0)){
++ s_sg = TRUE;
++ }
++ if(__sync_lock_test_and_set(&lock->x_waiters, 0)){
++ x_sg = TRUE;
++ }
++ }
++
++ if (UNIV_UNLIKELY(s_sg)) {
++ os_event_set(lock->s_event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ }
++ if (UNIV_UNLIKELY(x_sg)) {
++ os_event_set(lock->x_event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ }
++#else
+ ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+
+ /* lock->recursive flag also indicates if lock->writer_thread is
+@@ -593,6 +845,7 @@
+ }
+ }
+
++#endif
+ ut_ad(rw_lock_validate(lock));
+
+ #ifdef UNIV_SYNC_PERF_STAT
+@@ -612,6 +865,19 @@
+ /* Reset the exclusive lock if this thread no longer has an x-mode
+ lock */
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++ lock->writer = RW_LOCK_NOT_LOCKED;
++ }
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
++#endif
++
++ ut_ad(!lock->s_waiters);
++ ut_ad(!lock->x_waiters);
++ ut_ad(!lock->wait_ex_waiters);
++#else
+ ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+
+ #ifdef UNIV_SYNC_DEBUG
+@@ -627,6 +893,7 @@
+ lock->lock_word += X_LOCK_DECR;
+
+ ut_ad(!lock->waiters);
++#endif
+ ut_ad(rw_lock_validate(lock));
+
+ #ifdef UNIV_SYNC_PERF_STAT
+diff -ruN innodb_plugin-1.0.3_orig/include/ut0auxconf.h innodb_plugin-1.0.3_tmp/include/ut0auxconf.h
+--- innodb_plugin-1.0.3_orig/include/ut0auxconf.h 2009-03-05 23:38:59.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/ut0auxconf.h 2009-03-23 17:07:17.000000000 +0900
+@@ -12,3 +12,8 @@
+ the hack from Makefile.in wiped away then the "real" check from plug.in
+ will take over.
+ */
++/* This is temprary fix for http://bugs.mysql.com/43740 */
++/* force to enable */
++#ifdef HAVE_GCC_ATOMIC_BUILTINS
++#define HAVE_ATOMIC_PTHREAD_T
++#endif
+diff -ruN innodb_plugin-1.0.3_orig/sync/sync0arr.c innodb_plugin-1.0.3_tmp/sync/sync0arr.c
+--- innodb_plugin-1.0.3_orig/sync/sync0arr.c 2009-02-17 21:26:53.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/sync/sync0arr.c 2009-03-23 17:07:17.000000000 +0900
+@@ -331,8 +331,15 @@
+ return(((mutex_t *) cell->wait_object)->event);
+ } else if (type == RW_LOCK_WAIT_EX) {
+ return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ } else if (type == RW_LOCK_SHARED) {
++ return(((rw_lock_t *) cell->wait_object)->s_event);
++ } else { /* RW_LOCK_EX */
++ return(((rw_lock_t *) cell->wait_object)->x_event);
++#else
+ } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
+ return(((rw_lock_t *) cell->wait_object)->event);
++#endif
+ }
+ }
+
+@@ -503,7 +510,7 @@
+ || type == RW_LOCK_WAIT_EX
+ || type == RW_LOCK_SHARED) {
+
+- fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
++ fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file);
+
+ rwlock = cell->old_wait_rw_lock;
+
+@@ -523,12 +530,21 @@
+ }
+
+ fprintf(file,
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ "number of readers %lu, s_waiters flag %lu, x_waiters flag %lu, "
++#else
+ "number of readers %lu, waiters flag %lu, "
++#endif
+ "lock_word: %lx\n"
+ "Last time read locked in file %s line %lu\n"
+ "Last time write locked in file %s line %lu\n",
+ (ulong) rw_lock_get_reader_count(rwlock),
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ (ulong) rwlock->s_waiters,
++ (ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters),
++#else
+ (ulong) rwlock->waiters,
++#endif
+ rwlock->lock_word,
+ rwlock->last_s_file_name,
+ (ulong) rwlock->last_s_line,
+diff -ruN innodb_plugin-1.0.3_orig/sync/sync0rw.c innodb_plugin-1.0.3_tmp/sync/sync0rw.c
+--- innodb_plugin-1.0.3_orig/sync/sync0rw.c 2009-02-17 21:26:53.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/sync/sync0rw.c 2009-03-23 17:15:44.000000000 +0900
+@@ -250,7 +250,17 @@
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+ lock->lock_word = X_LOCK_DECR;
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ lock->s_waiters = 0;
++ lock->x_waiters = 0;
++ lock->wait_ex_waiters = 0;
++ lock->writer = RW_LOCK_NOT_LOCKED;
++ lock->writer_count = 0;
++ lock->reader_count = 0;
++ lock->writer_is_wait_ex = FALSE;
++#else
+ lock->waiters = 0;
++#endif
+
+ /* We set this value to signify that lock->writer_thread
+ contains garbage at initialization and cannot be used for
+@@ -273,7 +283,12 @@
+ lock->last_x_file_name = "not yet reserved";
+ lock->last_s_line = 0;
+ lock->last_x_line = 0;
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ lock->s_event = os_event_create(NULL);
++ lock->x_event = os_event_create(NULL);
++#else
+ lock->event = os_event_create(NULL);
++#endif
+ lock->wait_ex_event = os_event_create(NULL);
+
+ mutex_enter(&rw_lock_list_mutex);
+@@ -299,7 +314,15 @@
+ rw_lock_t* lock) /* in: rw-lock */
+ {
+ ut_ad(rw_lock_validate(lock));
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++ ut_a(rw_lock_get_s_waiters(lock) == 0);
++ ut_a(rw_lock_get_x_waiters(lock) == 0);
++ ut_a(rw_lock_get_wx_waiters(lock) == 0);
++ ut_a(rw_lock_get_reader_count(lock) == 0);
++#else
+ ut_a(lock->lock_word == X_LOCK_DECR);
++#endif
+
+ lock->magic_n = 0;
+
+@@ -308,7 +331,12 @@
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+ mutex_enter(&rw_lock_list_mutex);
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ os_event_free(lock->s_event);
++ os_event_free(lock->x_event);
++#else
+ os_event_free(lock->event);
++#endif
+
+ os_event_free(lock->wait_ex_event);
+
+@@ -336,12 +364,23 @@
+ {
+ ut_a(lock);
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
++
++ ulint waiters = rw_lock_get_s_waiters(lock);
++ ut_a(waiters == 0 || waiters == 1);
++ waiters = rw_lock_get_x_waiters(lock);
++ ut_a(waiters == 0 || waiters == 1);
++ waiters = rw_lock_get_wx_waiters(lock);
++ ut_a(waiters == 0 || waiters == 1);
++#else
+ ulint waiters = rw_lock_get_waiters(lock);
+ lint lock_word = lock->lock_word;
+
+ ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+ ut_a(waiters == 0 || waiters == 1);
+ ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
++#endif
+
+ return(TRUE);
+ }
+@@ -371,7 +410,12 @@
+ lock_loop:
+
+ /* Spin waiting for the writer field to become free */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ while (i < SYNC_SPIN_ROUNDS
++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
++#else
+ while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
++#endif
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+@@ -412,12 +456,29 @@
+
+ /* Set waiters before checking lock_word to ensure wake-up
+ signal is sent. This may lead to some unnecessary signals. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ rw_lock_set_s_waiter_flag(lock);
++#else
+ rw_lock_set_waiter_flag(lock);
++#endif
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ for (i = 0; i < 4; i++) {
++#endif
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
+ sync_array_free_cell(sync_primary_wait_array, index);
+ return; /* Success */
+ }
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ }
++
++ /* If wait_ex_waiter stalls, wakes it. */
++ if (lock->reader_count == 0
++ && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0)) {
++ os_event_set(lock->wait_ex_event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ }
++#endif
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+@@ -456,7 +517,12 @@
+ {
+ ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ lock->writer_thread = os_thread_get_curr_id();
++ lock->recursive = TRUE;
++#else
+ rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
++#endif
+ }
+
+ /**********************************************************************
+@@ -530,7 +596,11 @@
+ /**********************************************************************
+ Low-level function for acquiring an exclusive lock. */
+ UNIV_INLINE
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ulint
++#else
+ ibool
++#endif
+ rw_lock_x_lock_low(
+ /*===============*/
+ /* out: RW_LOCK_NOT_LOCKED if did
+@@ -543,6 +613,90 @@
+ {
+ os_thread_id_t curr_thread = os_thread_get_curr_id();
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++retry_writer:
++ /* try to lock writer */
++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++ == RW_LOCK_NOT_LOCKED) {
++ /* success */
++ /* obtain RW_LOCK_WAIT_EX right */
++ lock->writer_thread = curr_thread;
++ lock->recursive = pass ? FALSE : TRUE;
++ lock->writer_is_wait_ex = TRUE;
++ /* atomic operation may be safer about memory order. */
++ __sync_synchronize();
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
++ file_name, line);
++#endif
++ }
++
++ if (!os_thread_eq(lock->writer_thread, curr_thread)) {
++ return(RW_LOCK_NOT_LOCKED);
++ }
++
++ switch(rw_lock_get_writer(lock)) {
++ case RW_LOCK_WAIT_EX:
++ /* have right to try x-lock */
++retry_x_lock:
++ /* try x-lock */
++ if(__sync_sub_and_fetch(&(lock->lock_word),
++ X_LOCK_DECR) == 0) {
++ /* success */
++ lock->recursive = pass ? FALSE : TRUE;
++ lock->writer_is_wait_ex = FALSE;
++ __sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++ file_name, line);
++#endif
++
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = line;
++
++ /* Locking succeeded, we may return */
++ return(RW_LOCK_EX);
++ } else if(__sync_fetch_and_add(&(lock->lock_word),
++ X_LOCK_DECR) == 0) {
++ /* retry x-lock */
++ goto retry_x_lock;
++ }
++
++ /* There are readers, we have to wait */
++ return(RW_LOCK_WAIT_EX);
++
++ break;
++
++ case RW_LOCK_EX:
++ /* already have x-lock */
++ if (lock->recursive && (pass == 0)) {
++ __sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
++ line);
++#endif
++
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = line;
++
++ /* Locking succeeded, we may return */
++ return(RW_LOCK_EX);
++ }
++
++ return(RW_LOCK_NOT_LOCKED);
++
++ break;
++
++ default: /* RW_LOCK_NOT_LOCKED? maybe impossible */
++ goto retry_writer;
++ }
++
++ /* Locking did not succeed */
++ return(RW_LOCK_NOT_LOCKED);
++#else
+ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+
+ /* lock->recursive also tells us if the writer_thread
+@@ -580,6 +734,7 @@
+ lock->last_x_line = (unsigned int) line;
+
+ return(TRUE);
++#endif
+ }
+
+ /**********************************************************************
+@@ -604,18 +759,55 @@
+ ulint index; /* index of the reserved wait cell */
+ ulint i; /* spin round count */
+ ibool spinning = FALSE;
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ulint state = RW_LOCK_NOT_LOCKED; /* lock state acquired */
++ ulint prev_state = RW_LOCK_NOT_LOCKED;
++#endif
+
+ ut_ad(rw_lock_validate(lock));
+
+ i = 0;
+
+ lock_loop:
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ prev_state = state;
++ state = rw_lock_x_lock_low(lock, pass, file_name, line);
++
++lock_loop_2:
++ if (state != prev_state) i=0; /* if progress, reset counter. */
+
++ if (state == RW_LOCK_EX) {
++#else
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
++#endif
+ rw_x_spin_round_count += i;
+
+ return; /* Locking succeeded */
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ } else if (state == RW_LOCK_WAIT_EX) {
++
++ if (!spinning) {
++ spinning = TRUE;
++ rw_x_spin_wait_count++;
++ }
++
++ /* Spin waiting for the reader count field to become zero */
++ while (i < SYNC_SPIN_ROUNDS
++ && lock->lock_word != X_LOCK_DECR) {
++ if (srv_spin_wait_delay) {
++ ut_delay(ut_rnd_interval(0,
++ srv_spin_wait_delay));
++ }
++
++ i++;
++ }
++ if (i == SYNC_SPIN_ROUNDS) {
++ os_thread_yield();
++ } else {
++ goto lock_loop;
++ }
++#endif
+ } else {
+
+ if (!spinning) {
+@@ -625,7 +817,11 @@
+
+ /* Spin waiting for the lock_word to become free */
+ while (i < SYNC_SPIN_ROUNDS
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
++#else
+ && lock->lock_word <= 0) {
++#endif
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+ srv_spin_wait_delay));
+@@ -652,18 +848,46 @@
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock,
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ (state == RW_LOCK_WAIT_EX)
++ ? RW_LOCK_WAIT_EX : RW_LOCK_EX,
++#else
+ RW_LOCK_EX,
++#endif
+ file_name, line,
+ &index);
+
+ /* Waiters must be set before checking lock_word, to ensure signal
+ is sent. This could lead to a few unnecessary wake-up signals. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ if (state == RW_LOCK_WAIT_EX) {
++ rw_lock_set_wx_waiter_flag(lock);
++ } else {
++ rw_lock_set_x_waiter_flag(lock);
++ }
++#else
+ rw_lock_set_waiter_flag(lock);
++#endif
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ for (i = 0; i < 4; i++) {
++ prev_state = state;
++ state = rw_lock_x_lock_low(lock, pass, file_name, line);
++ if (state == RW_LOCK_EX) {
++ sync_array_free_cell(sync_primary_wait_array, index);
++ return; /* Locking succeeded */
++ } else if (state != prev_state) {
++ /* retry! */
++ sync_array_free_cell(sync_primary_wait_array, index);
++ goto lock_loop_2;
++ }
++ }
++#else
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+ sync_array_free_cell(sync_primary_wait_array, index);
+ return; /* Locking succeeded */
+ }
++#endif
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+@@ -914,11 +1138,24 @@
+
+ fprintf(file, "RW-LOCK: %p ", (void*) lock);
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ if (rw_lock_get_s_waiters(lock)) {
++ fputs(" s_waiters for the lock exist", file);
++ }
++ if (rw_lock_get_x_waiters(lock)) {
++ fputs(" x_waiters for the lock exist", file);
++ }
++ if (rw_lock_get_wx_waiters(lock)) {
++ fputs(" wait_ex_waiters for the lock exist", file);
++ }
++ putc('\n', file);
++#else
+ if (rw_lock_get_waiters(lock)) {
+ fputs(" Waiters for the lock exist\n", file);
+ } else {
+ putc('\n', file);
+ }
++#endif
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+ while (info != NULL) {
+@@ -957,11 +1194,24 @@
+ #endif
+ if (lock->lock_word != X_LOCK_DECR) {
+
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ if (rw_lock_get_s_waiters(lock)) {
++ fputs(" s_waiters for the lock exist", stderr);
++ }
++ if (rw_lock_get_x_waiters(lock)) {
++ fputs(" x_waiters for the lock exist", stderr);
++ }
++ if (rw_lock_get_wx_waiters(lock)) {
++ fputs(" wait_ex_waiters for the lock exist", stderr);
++ }
++ putc('\n', stderr);
++#else
+ if (rw_lock_get_waiters(lock)) {
+ fputs(" Waiters for the lock exist\n", stderr);
+ } else {
+ putc('\n', stderr);
+ }
++#endif
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+ while (info != NULL) {
=== added file 'innodb_show_enhancements.patch'
--- innodb_show_enhancements.patch 1970-01-01 00:00:00 +0000
+++ innodb_show_enhancements.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,278 @@
+diff -ruN innodb_plugin-1.0.3_orig/Makefile.am innodb_plugin-1.0.3_tmp/Makefile.am
+--- innodb_plugin-1.0.3_orig/Makefile.am 2008-05-01 02:59:16.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/Makefile.am 2009-03-18 17:14:19.000000000 +0900
+@@ -131,7 +131,7 @@
+ include/ut0list.ic include/ut0wqueue.h \
+ include/ha_prototypes.h handler/ha_innodb.h \
+ include/handler0alter.h \
+- handler/i_s.h
++ handler/i_s.h handler/innodb_patch_info.h
+
+ EXTRA_LIBRARIES= libinnobase.a
+ noinst_LIBRARIES= @plugin_innobase_static_target@
+diff -ruN innodb_plugin-1.0.3_orig/Makefile.in innodb_plugin-1.0.3_tmp/Makefile.in
+--- innodb_plugin-1.0.3_orig/Makefile.in 2009-03-06 19:22:06.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/Makefile.in 2009-03-18 17:14:19.000000000 +0900
+@@ -651,7 +651,7 @@
+ include/ut0list.ic include/ut0wqueue.h \
+ include/ha_prototypes.h handler/ha_innodb.h \
+ include/handler0alter.h \
+- handler/i_s.h
++ handler/i_s.h handler/innodb_patch_info.h
+
+ EXTRA_LIBRARIES = libinnobase.a
+ noinst_LIBRARIES = @plugin_innobase_static_target@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc 2009-03-06 05:29:07.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc 2009-03-18 17:14:19.000000000 +0900
+@@ -9940,7 +9940,8 @@
+ i_s_innodb_cmp,
+ i_s_innodb_cmp_reset,
+ i_s_innodb_cmpmem,
+-i_s_innodb_cmpmem_reset
++i_s_innodb_cmpmem_reset,
++i_s_innodb_patches
+ mysql_declare_plugin_end;
+
+ #ifdef UNIV_COMPILE_TEST_FUNCS
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc 2009-02-17 17:25:45.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc 2009-03-18 17:14:19.000000000 +0900
+@@ -31,6 +31,7 @@
+ #include <mysys_err.h>
+ #include <my_sys.h>
+ #include "i_s.h"
++#include "innodb_patch_info.h"
+ #include <mysql/plugin.h>
+
+ extern "C" {
+@@ -215,6 +216,168 @@
+ return(ret);
+ }
+
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_patches */
++static ST_FIELD_INFO innodb_patches_fields_info[] =
++{
++#define IDX_PATCH_NAME 0
++ {STRUCT_FLD(field_name, "name"),
++ STRUCT_FLD(field_length, 255),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define IDX_PATCH_DESCR 1
++ {STRUCT_FLD(field_name, "description"),
++ STRUCT_FLD(field_length, 255),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define IDX_PATCH_COMMENT 2
++ {STRUCT_FLD(field_name, "comment"),
++ STRUCT_FLD(field_length, 100),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define IDX_PATCH_LINK 3
++ {STRUCT_FLD(field_name, "link"),
++ STRUCT_FLD(field_length, 255),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static struct st_mysql_information_schema i_s_info =
++{
++ MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
++};
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_patches */
++static
++int
++innodb_patches_fill(
++/*=============*/
++ /* out: 0 on success, 1 on failure */
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++ int i;
++ Field** fields;
++
++
++ DBUG_ENTER("innodb_patches_fill");
++ fields = table->field;
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ for (i = 0; innodb_enhancements[i].file; i++) {
++
++ field_store_string(fields[0],innodb_enhancements[i].file);
++ field_store_string(fields[1],innodb_enhancements[i].name);
++ field_store_string(fields[2],innodb_enhancements[i].comment);
++ field_store_string(fields[3],innodb_enhancements[i].link);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++
++ }
++
++
++ DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_patches. */
++static
++int
++innodb_patches_init(
++/*=========*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("innodb_patches_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_patches_fields_info;
++ schema->fill_table = innodb_patches_fill;
++
++ DBUG_RETURN(0);
++}
++
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_patches =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "XTRADB_ENHANCEMENTS"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, "Percona"),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "Enhancements applied to InnoDB plugin"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_patches_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++
+ /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */
+ static ST_FIELD_INFO innodb_trx_fields_info[] =
+ {
+@@ -399,10 +562,6 @@
+ DBUG_RETURN(0);
+ }
+
+-static struct st_mysql_information_schema i_s_info =
+-{
+- MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
+-};
+
+ UNIV_INTERN struct st_mysql_plugin i_s_innodb_trx =
+ {
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.h innodb_plugin-1.0.3_tmp/handler/i_s.h
+--- innodb_plugin-1.0.3_orig/handler/i_s.h 2009-02-17 17:25:45.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.h 2009-03-18 17:14:19.000000000 +0900
+@@ -32,5 +32,6 @@
+ extern struct st_mysql_plugin i_s_innodb_cmp_reset;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
++extern struct st_mysql_plugin i_s_innodb_patches;
+
+ #endif /* i_s_h */
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 1970-01-01 09:00:00.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-18 17:14:19.000000000 +0900
+@@ -0,0 +1,28 @@
++/* Copyright (C) 2002-2006 MySQL AB
++
++ This program is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; version 2 of the License.
++
++ This program is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with this program; if not, write to the Free Software
++ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
++
++#ifdef USE_PRAGMA_INTERFACE
++#pragma interface /* gcc class implementation */
++#endif
++
++struct innodb_enhancement {
++ const char *file;
++ const char *name;
++ const char *comment;
++ const char *link;
++}innodb_enhancements[] = {
++{"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{NULL, NULL, NULL, NULL}
++};
+diff -ruN innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql
+--- innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql 2008-10-30 19:38:18.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql 2009-03-18 17:14:19.000000000 +0900
+@@ -7,3 +7,4 @@
+ INSTALL PLUGIN innodb_cmp_reset SONAME 'ha_innodb.so';
+ INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.so';
+ INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.so';
++INSTALL PLUGIN XTRADB_ENHANCEMENTS SONAME 'ha_innodb.so';
=== added file 'innodb_show_status.patch'
--- innodb_show_status.patch 1970-01-01 00:00:00 +0000
+++ innodb_show_status.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,493 @@
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buf.c innodb_plugin-1.0.3_tmp/buf/buf0buf.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buf.c 2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buf.c 2009-03-18 17:19:36.000000000 +0900
+@@ -3760,13 +3760,15 @@
+ buf_pool_mutex_enter();
+
+ fprintf(file,
+- "Buffer pool size %lu\n"
+- "Free buffers %lu\n"
+- "Database pages %lu\n"
+- "Modified db pages %lu\n"
++ "Buffer pool size %lu\n"
++ "Buffer pool size, bytes %lu\n"
++ "Free buffers %lu\n"
++ "Database pages %lu\n"
++ "Modified db pages %lu\n"
+ "Pending reads %lu\n"
+ "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
+ (ulong) size,
++ (ulong) size * UNIV_PAGE_SIZE,
+ (ulong) UT_LIST_GET_LEN(buf_pool->free),
+ (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+ (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
+diff -ruN innodb_plugin-1.0.3_orig/fil/fil0fil.c innodb_plugin-1.0.3_tmp/fil/fil0fil.c
+--- innodb_plugin-1.0.3_orig/fil/fil0fil.c 2009-02-17 17:15:06.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/fil/fil0fil.c 2009-03-18 17:19:36.000000000 +0900
+@@ -4788,3 +4788,30 @@
+
+ return(mach_read_from_2(page + FIL_PAGE_TYPE));
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void)
++/*=======================*/
++{
++ if (fil_system) {
++ return (fil_system->spaces->n_cells
++ + fil_system->name_hash->n_cells);
++ } else {
++ return 0;
++ }
++}
++
++ulint
++fil_system_hash_nodes(void)
++/*=======================*/
++{
++ if (fil_system) {
++ return (UT_LIST_GET_LEN(fil_system->space_list)
++ * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE));
++ } else {
++ return 0;
++ }
++}
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc 2009-03-18 17:18:40.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc 2009-03-18 17:19:36.000000000 +0900
+@@ -9542,6 +9542,16 @@
+ "Force InnoDB to not use next-key locking, to use only row-level locking.",
+ NULL, NULL, FALSE);
+
++static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks,
++ PLUGIN_VAR_OPCMDARG,
++ "Whether to show records locked in SHOW INNODB STATUS.",
++ NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held,
++ PLUGIN_VAR_RQCMDARG,
++ "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.",
++ NULL, NULL, 10, 0, 1000, 0);
++
+ #ifdef UNIV_LOG_ARCHIVE
+ static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -9692,7 +9702,7 @@
+
+ static MYSQL_SYSVAR_STR(version, innodb_version_str,
+ PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
+- "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
++ "Percona-InnoDB-plugin version", NULL, NULL, INNODB_VERSION_STR);
+
+ static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+@@ -9751,6 +9761,8 @@
+ MYSQL_SYSVAR(thread_concurrency),
+ MYSQL_SYSVAR(thread_sleep_delay),
+ MYSQL_SYSVAR(autoinc_lock_mode),
++ MYSQL_SYSVAR(show_verbose_locks),
++ MYSQL_SYSVAR(show_locks_held),
+ MYSQL_SYSVAR(version),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(change_buffering),
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-18 17:18:40.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-18 17:19:36.000000000 +0900
+@@ -24,5 +24,6 @@
+ const char *link;
+ }innodb_enhancements[] = {
+ {"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/fil0fil.h innodb_plugin-1.0.3_tmp/include/fil0fil.h
+--- innodb_plugin-1.0.3_orig/include/fil0fil.h 2009-02-17 18:06:49.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/fil0fil.h 2009-03-18 17:19:36.000000000 +0900
+@@ -695,6 +695,16 @@
+ return value not defined */
+ const byte* page); /* in: file page */
+
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void);
++/*========================*/
++
++ulint
++fil_system_hash_nodes(void);
++/*========================*/
+
+ typedef struct fil_space_struct fil_space_t;
+
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h 2009-02-25 19:09:15.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h 2009-03-18 17:19:36.000000000 +0900
+@@ -112,6 +112,9 @@
+ extern ulint srv_log_buffer_size;
+ extern ulong srv_flush_log_at_trx_commit;
+
++extern ulint srv_show_locks_held;
++extern ulint srv_show_verbose_locks;
++
+ /* The sort order table of the MySQL latin1_swedish_ci character set
+ collation */
+ extern const byte* srv_latin1_ordering;
+diff -ruN innodb_plugin-1.0.3_orig/include/thr0loc.h innodb_plugin-1.0.3_tmp/include/thr0loc.h
+--- innodb_plugin-1.0.3_orig/include/thr0loc.h 2009-02-17 18:39:11.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/thr0loc.h 2009-03-18 17:19:36.000000000 +0900
+@@ -76,6 +76,17 @@
+ /*=============================*/
+ /* out: pointer to the in_ibuf field */
+
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void);
++/*=======================*/
++
++ulint
++thr_local_hash_nodes(void);
++/*=======================*/
++
+ #ifndef UNIV_NONINL
+ #include "thr0loc.ic"
+ #endif
+diff -ruN innodb_plugin-1.0.3_orig/include/univ.i innodb_plugin-1.0.3_tmp/include/univ.i
+--- innodb_plugin-1.0.3_orig/include/univ.i 2009-03-05 23:38:59.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/univ.i 2009-03-18 17:37:39.000000000 +0900
+@@ -35,6 +35,7 @@
+ #define INNODB_VERSION_MAJOR 1
+ #define INNODB_VERSION_MINOR 0
+ #define INNODB_VERSION_BUGFIX 3
++#define PERCONA_INNODB_VERSION 3
+
+ /* The following is the InnoDB version as shown in
+ SELECT plugin_version FROM information_schema.plugins;
+@@ -46,13 +47,14 @@
+ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+
+ /* auxiliary macros to help creating the version as string */
+-#define __INNODB_VERSION(a, b, c) (#a "." #b "." #c)
+-#define _INNODB_VERSION(a, b, c) __INNODB_VERSION(a, b, c)
++#define __INNODB_VERSION(a, b, c, d) (#a "." #b "." #c "-" #d)
++#define _INNODB_VERSION(a, b, c, d) __INNODB_VERSION(a, b, c, d)
+
+ #define INNODB_VERSION_STR \
+ _INNODB_VERSION(INNODB_VERSION_MAJOR, \
+ INNODB_VERSION_MINOR, \
+- INNODB_VERSION_BUGFIX)
++ INNODB_VERSION_BUGFIX, \
++ PERCONA_INNODB_VERSION)
+
+ #ifdef MYSQL_DYNAMIC_PLUGIN
+ /* In the dynamic plugin, redefine some externally visible symbols
+diff -ruN innodb_plugin-1.0.3_orig/lock/lock0lock.c innodb_plugin-1.0.3_tmp/lock/lock0lock.c
+--- innodb_plugin-1.0.3_orig/lock/lock0lock.c 2009-02-17 18:50:12.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/lock/lock0lock.c 2009-03-18 17:19:36.000000000 +0900
+@@ -4319,32 +4319,32 @@
+
+ putc('\n', file);
+
+- block = buf_page_try_get(space, page_no, &mtr);
+-
+- if (block) {
+- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+-
+- if (lock_rec_get_nth_bit(lock, i)) {
+-
+- const rec_t* rec
+- = page_find_rec_with_heap_no(
+- buf_block_get_frame(block), i);
+- offsets = rec_get_offsets(
+- rec, lock->index, offsets,
+- ULINT_UNDEFINED, &heap);
+-
+- fprintf(file, "Record lock, heap no %lu ",
+- (ulong) i);
+- rec_print_new(file, rec, offsets);
+- putc('\n', file);
++ if ( srv_show_verbose_locks ) {
++ block = buf_page_try_get(space, page_no, &mtr);
++ if (block) {
++ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
++
++ if (lock_rec_get_nth_bit(lock, i)) {
++
++ const rec_t* rec
++ = page_find_rec_with_heap_no(
++ buf_block_get_frame(block), i);
++ offsets = rec_get_offsets(
++ rec, lock->index, offsets,
++ ULINT_UNDEFINED, &heap);
++
++ fprintf(file, "Record lock, heap no %lu ",
++ (ulong) i);
++ rec_print_new(file, rec, offsets);
++ putc('\n', file);
++ }
++ }
++ } else {
++ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
++ fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
+ }
+- }
+- } else {
+- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+- fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
+ }
+ }
+-
+ mtr_commit(&mtr);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+@@ -4523,7 +4523,7 @@
+ }
+ }
+
+- if (!srv_print_innodb_lock_monitor) {
++ if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) {
+ nth_trx++;
+ goto loop;
+ }
+@@ -4582,8 +4582,8 @@
+
+ nth_lock++;
+
+- if (nth_lock >= 10) {
+- fputs("10 LOCKS PRINTED FOR THIS TRX:"
++ if (nth_lock >= srv_show_locks_held) {
++ fputs("TOO LOCKS PRINTED FOR THIS TRX:"
+ " SUPPRESSING FURTHER PRINTS\n",
+ file);
+
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c 2009-02-25 19:09:15.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c 2009-03-18 17:19:36.000000000 +0900
+@@ -156,6 +156,10 @@
+ UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
+ UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
+
++UNIV_INTERN ulint srv_show_locks_held = 10;
++UNIV_INTERN ulint srv_show_verbose_locks = 0;
++
++
+ /* The sort order table of the MySQL latin1_swedish_ci character set
+ collation */
+ UNIV_INTERN const byte* srv_latin1_ordering;
+@@ -1644,6 +1648,14 @@
+ time_t current_time;
+ ulint n_reserved;
+
++ ulint btr_search_sys_subtotal;
++ ulint lock_sys_subtotal;
++ ulint recv_sys_subtotal;
++ ulint io_counter_subtotal;
++
++ ulint i;
++ trx_t* trx;
++
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ current_time = time(NULL);
+@@ -1687,24 +1699,6 @@
+
+ mutex_exit(&dict_foreign_err_mutex);
+
+- lock_print_info_summary(file);
+- if (trx_start) {
+- long t = ftell(file);
+- if (t < 0) {
+- *trx_start = ULINT_UNDEFINED;
+- } else {
+- *trx_start = (ulint) t;
+- }
+- }
+- lock_print_info_all_transactions(file);
+- if (trx_end) {
+- long t = ftell(file);
+- if (t < 0) {
+- *trx_end = ULINT_UNDEFINED;
+- } else {
+- *trx_end = (ulint) t;
+- }
+- }
+ fputs("--------\n"
+ "FILE I/O\n"
+ "--------\n", file);
+@@ -1735,10 +1729,84 @@
+ "BUFFER POOL AND MEMORY\n"
+ "----------------------\n", file);
+ fprintf(file,
+- "Total memory allocated " ULINTPF
+- "; in additional pool allocated " ULINTPF "\n",
+- ut_total_allocated_memory,
+- mem_pool_get_reserved(mem_comm_pool));
++ "Total memory allocated " ULINTPF
++ "; in additional pool allocated " ULINTPF "\n",
++ ut_total_allocated_memory,
++ mem_pool_get_reserved(mem_comm_pool));
++ /* Calcurate reserved memories */
++ if (btr_search_sys && btr_search_sys->hash_index->heap) {
++ btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap);
++ } else {
++ btr_search_sys_subtotal = 0;
++ for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) {
++ btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]);
++ }
++ }
++
++ lock_sys_subtotal = 0;
++ if (trx_sys) {
++ mutex_enter(&kernel_mutex);
++ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
++ while (trx) {
++ lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0);
++ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
++ }
++ mutex_exit(&kernel_mutex);
++ }
++
++ recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
++ ? mem_heap_get_size(recv_sys->heap) : 0);
++
++ fprintf(file,
++ "Internal hash tables (constant factor + variable factor)\n"
++ " Adaptive hash index %lu \t(%lu + %lu)\n"
++ " Page hash %lu\n"
++ " Dictionary cache %lu \t(%lu + %lu)\n"
++ " File system %lu \t(%lu + %lu)\n"
++ " Lock system %lu \t(%lu + %lu)\n"
++ " Recovery system %lu \t(%lu + %lu)\n"
++ " Threads %lu \t(%lu + %lu)\n",
++
++ (ulong) (btr_search_sys
++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0)
++ + btr_search_sys_subtotal,
++ (ulong) (btr_search_sys
++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0),
++ (ulong) btr_search_sys_subtotal,
++
++ (ulong) (buf_pool->page_hash->n_cells * sizeof(hash_cell_t)),
++
++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++ + dict_sys->table_id_hash->n_cells
++ ) * sizeof(hash_cell_t)
++ + dict_sys->size) : 0),
++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++ + dict_sys->table_id_hash->n_cells
++ ) * sizeof(hash_cell_t)) : 0),
++ (ulong) (dict_sys ? (dict_sys->size) : 0),
++
++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
++ + fil_system_hash_nodes()),
++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
++ (ulong) fil_system_hash_nodes(),
++
++ (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
++ + lock_sys_subtotal),
++ (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
++ (ulong) lock_sys_subtotal,
++
++ (ulong) (((recv_sys && recv_sys->addr_hash)
++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
++ + recv_sys_subtotal),
++ (ulong) ((recv_sys && recv_sys->addr_hash)
++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
++ (ulong) recv_sys_subtotal,
++
++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)
++ + thr_local_hash_nodes()),
++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)),
++ (ulong) thr_local_hash_nodes());
++
+ fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
+ dict_sys->size);
+
+@@ -1797,6 +1865,25 @@
+ srv_n_rows_deleted_old = srv_n_rows_deleted;
+ srv_n_rows_read_old = srv_n_rows_read;
+
++ lock_print_info_summary(file);
++ if (trx_start) {
++ long t = ftell(file);
++ if (t < 0) {
++ *trx_start = ULINT_UNDEFINED;
++ } else {
++ *trx_start = (ulint) t;
++ }
++ }
++ lock_print_info_all_transactions(file);
++ if (trx_end) {
++ long t = ftell(file);
++ if (t < 0) {
++ *trx_end = ULINT_UNDEFINED;
++ } else {
++ *trx_end = (ulint) t;
++ }
++ }
++
+ fputs("----------------------------\n"
+ "END OF INNODB MONITOR OUTPUT\n"
+ "============================\n", file);
+diff -ruN innodb_plugin-1.0.3_orig/thr/thr0loc.c innodb_plugin-1.0.3_tmp/thr/thr0loc.c
+--- innodb_plugin-1.0.3_orig/thr/thr0loc.c 2009-02-17 19:09:16.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/thr/thr0loc.c 2009-03-18 17:19:36.000000000 +0900
+@@ -48,6 +48,7 @@
+
+ /* The hash table. The module is not yet initialized when it is NULL. */
+ static hash_table_t* thr_local_hash = NULL;
++ulint thr_local_hash_n_nodes = 0;
+
+ /* The private data for each thread should be put to
+ the structure below and the accessor functions written
+@@ -193,6 +194,7 @@
+ os_thread_pf(os_thread_get_curr_id()),
+ local);
+
++ thr_local_hash_n_nodes++;
+ mutex_exit(&thr_local_mutex);
+ }
+
+@@ -220,6 +222,7 @@
+
+ HASH_DELETE(thr_local_t, hash, thr_local_hash,
+ os_thread_pf(id), local);
++ thr_local_hash_n_nodes--;
+
+ mutex_exit(&thr_local_mutex);
+
+@@ -242,3 +245,29 @@
+
+ mutex_create(&thr_local_mutex, SYNC_THR_LOCAL);
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void)
++/*======================*/
++{
++ if (thr_local_hash) {
++ return (thr_local_hash->n_cells);
++ } else {
++ return 0;
++ }
++}
++
++ulint
++thr_local_hash_nodes(void)
++/*======================*/
++{
++ if (thr_local_hash) {
++ return (thr_local_hash_n_nodes
++ * (sizeof(thr_local_t) + MEM_BLOCK_HEADER_SIZE));
++ } else {
++ return 0;
++ }
++}
=== added file 'innodb_split_buf_pool_mutex.patch'
--- innodb_split_buf_pool_mutex.patch 1970-01-01 00:00:00 +0000
+++ innodb_split_buf_pool_mutex.patch 2009-03-20 05:35:54 +0000
@@ -0,0 +1,3356 @@
+diff -ruN innodb_plugin-1.0.3_orig/btr/btr0cur.c innodb_plugin-1.0.3_tmp/btr/btr0cur.c
+--- innodb_plugin-1.0.3_orig/btr/btr0cur.c 2009-02-27 06:27:51.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/btr/btr0cur.c 2009-03-19 18:51:20.000000000 +0900
+@@ -3733,7 +3733,8 @@
+
+ mtr_commit(mtr);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+ mutex_enter(&block->mutex);
+
+ /* Only free the block if it is still allocated to
+@@ -3744,17 +3745,22 @@
+ && buf_block_get_space(block) == space
+ && buf_block_get_page_no(block) == page_no) {
+
+- if (buf_LRU_free_block(&block->page, all, NULL)
++ if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
+ != BUF_LRU_FREED
+- && all && block->page.zip.data) {
++ && all && block->page.zip.data
++ /* Now, buf_LRU_free_block() may release mutex temporarily */
++ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
++ && buf_block_get_space(block) == space
++ && buf_block_get_page_no(block) == page_no) {
+ /* Attempt to deallocate the uncompressed page
+ if the whole block cannot be deallocted. */
+
+- buf_LRU_free_block(&block->page, FALSE, NULL);
++ buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ mutex_exit(&block->mutex);
+ }
+
+diff -ruN innodb_plugin-1.0.3_orig/btr/btr0sea.c innodb_plugin-1.0.3_tmp/btr/btr0sea.c
+--- innodb_plugin-1.0.3_orig/btr/btr0sea.c 2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/btr/btr0sea.c 2009-03-19 18:51:20.000000000 +0900
+@@ -1731,7 +1731,8 @@
+ rec_offs_init(offsets_);
+
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_x_lock(&page_hash_latch);
+
+ cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+
+@@ -1739,11 +1740,13 @@
+ /* We release btr_search_latch every once in a while to
+ give other queries a chance to run. */
+ if ((i != 0) && ((i % chunk_size) == 0)) {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_x_unlock(&page_hash_latch);
+ rw_lock_x_unlock(&btr_search_latch);
+ os_thread_yield();
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_x_lock(&page_hash_latch);
+ }
+
+ node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+@@ -1850,11 +1853,13 @@
+ /* We release btr_search_latch every once in a while to
+ give other queries a chance to run. */
+ if (i != 0) {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_x_unlock(&page_hash_latch);
+ rw_lock_x_unlock(&btr_search_latch);
+ os_thread_yield();
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_x_lock(&page_hash_latch);
+ }
+
+ if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+@@ -1862,7 +1867,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_x_unlock(&page_hash_latch);
+ rw_lock_x_unlock(&btr_search_latch);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buddy.c innodb_plugin-1.0.3_tmp/buf/buf0buddy.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buddy.c 2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buddy.c 2009-03-19 18:51:20.000000000 +0900
+@@ -131,14 +131,15 @@
+ {
+ buf_page_t* bpage;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&zip_free_mutex));
+ ut_a(i < BUF_BUDDY_SIZES);
+
+ #if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
+ /* Valgrind would complain about accessing free memory. */
+ UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]);
+ #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
+- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+
+ if (bpage) {
+ UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+@@ -177,16 +178,19 @@
+ void
+ buf_buddy_block_free(
+ /*=================*/
+- void* buf) /* in: buffer frame to deallocate */
++ void* buf, /* in: buffer frame to deallocate */
++ ibool have_page_hash_mutex)
+ {
+ const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
+ buf_page_t* bpage;
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+ ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
+
++ mutex_enter(&zip_hash_mutex);
++
+ HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+ && bpage->in_zip_hash && !bpage->in_page_hash),
+@@ -198,12 +202,14 @@
+ ut_d(bpage->in_zip_hash = FALSE);
+ HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
+
++ mutex_exit(&zip_hash_mutex);
++
+ ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+ UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+
+ block = (buf_block_t*) bpage;
+ mutex_enter(&block->mutex);
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ mutex_exit(&block->mutex);
+
+ ut_ad(buf_buddy_n_frames > 0);
+@@ -219,7 +225,7 @@
+ buf_block_t* block) /* in: buffer frame to allocate */
+ {
+ const ulint fold = BUF_POOL_ZIP_FOLD(block);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+ ut_ad(!mutex_own(&buf_pool_zip_mutex));
+
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
+@@ -230,7 +236,10 @@
+ ut_ad(!block->page.in_page_hash);
+ ut_ad(!block->page.in_zip_hash);
+ ut_d(block->page.in_zip_hash = TRUE);
++
++ mutex_enter(&zip_hash_mutex);
+ HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
++ mutex_exit(&zip_hash_mutex);
+
+ ut_d(buf_buddy_n_frames++);
+ }
+@@ -284,24 +293,28 @@
+ possibly NULL if lru==NULL */
+ ulint i, /* in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+- ibool* lru) /* in: pointer to a variable that will be assigned
++ ibool* lru, /* in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool_mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ {
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+ ut_ad(!mutex_own(&buf_pool_zip_mutex));
+
+ if (i < BUF_BUDDY_SIZES) {
+ /* Try to allocate from the buddy system. */
++ mutex_enter(&zip_free_mutex);
+ block = buf_buddy_alloc_zip(i);
+
+ if (block) {
+
+ goto func_exit;
+ }
++
++ mutex_exit(&zip_free_mutex);
+ }
+
+ /* Try allocating from the buf_pool->free list. */
+@@ -318,18 +331,31 @@
+ }
+
+ /* Try replacing an uncompressed page in the buffer pool. */
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ if (have_page_hash_mutex) {
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+ block = buf_LRU_get_free_block(0);
+ *lru = TRUE;
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ if (have_page_hash_mutex) {
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++ }
+
+ alloc_big:
+ buf_buddy_block_register(block);
+
++ mutex_enter(&zip_free_mutex);
+ block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
+
+ func_exit:
+ buf_buddy_stat[i].used++;
++ mutex_exit(&zip_free_mutex);
++
+ return(block);
+ }
+
+@@ -345,7 +371,8 @@
+ {
+ buf_page_t* b;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&flush_list_mutex));
+
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+@@ -354,7 +381,7 @@
+ case BUF_BLOCK_FILE_PAGE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* Cannot relocate dirty pages. */
+ return(FALSE);
+@@ -364,9 +391,17 @@
+ }
+
+ mutex_enter(&buf_pool_zip_mutex);
++ mutex_enter(&zip_free_mutex);
+
+ if (!buf_page_can_relocate(bpage)) {
+ mutex_exit(&buf_pool_zip_mutex);
++ mutex_exit(&zip_free_mutex);
++ return(FALSE);
++ }
++
++ if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
++ mutex_exit(&buf_pool_zip_mutex);
++ mutex_exit(&zip_free_mutex);
+ return(FALSE);
+ }
+
+@@ -384,6 +419,7 @@
+ }
+
+ mutex_exit(&buf_pool_zip_mutex);
++ mutex_exit(&zip_free_mutex);
+ return(TRUE);
+ }
+
+@@ -396,13 +432,15 @@
+ /* out: TRUE if relocated */
+ void* src, /* in: block to relocate */
+ void* dst, /* in: free block to relocate to */
+- ulint i) /* in: index of buf_pool->zip_free[] */
++ ulint i, /* in: index of buf_pool->zip_free[] */
++ ibool have_page_hash_mutex)
+ {
+ buf_page_t* bpage;
+ const ulint size = BUF_BUDDY_LOW << i;
+ ullint usec = ut_time_us(NULL);
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&zip_free_mutex));
+ ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ ut_ad(!ut_align_offset(src, size));
+ ut_ad(!ut_align_offset(dst, size));
+@@ -421,9 +459,17 @@
+ actually is a properly initialized buf_page_t object. */
+
+ if (size >= PAGE_ZIP_MIN_SIZE) {
++ if (!have_page_hash_mutex)
++ mutex_exit(&zip_free_mutex);
++
+ /* This is a compressed page. */
+ mutex_t* mutex;
+
++ if (!have_page_hash_mutex) {
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++ }
+ /* The src block may be split into smaller blocks,
+ some of which may be free. Thus, the
+ mach_read_from_4() calls below may attempt to read
+@@ -444,6 +490,12 @@
+ added to buf_pool->page_hash yet. Obviously,
+ it cannot be relocated. */
+
++ if (!have_page_hash_mutex) {
++ mutex_enter(&zip_free_mutex);
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+ return(FALSE);
+ }
+
+@@ -453,9 +505,19 @@
+ For the sake of simplicity, give up. */
+ ut_ad(page_zip_get_size(&bpage->zip) < size);
+
++ if (!have_page_hash_mutex) {
++ mutex_enter(&zip_free_mutex);
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+ return(FALSE);
+ }
+
++ /* To keep latch order */
++ if (have_page_hash_mutex)
++ mutex_exit(&zip_free_mutex);
++
+ /* The block must have been allocated, but it may
+ contain uninitialized data. */
+ UNIV_MEM_ASSERT_W(src, size);
+@@ -463,6 +525,7 @@
+ mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(mutex);
++ mutex_enter(&zip_free_mutex);
+
+ if (buf_page_can_relocate(bpage)) {
+ /* Relocate the compressed page. */
+@@ -479,17 +542,53 @@
+ buddy_stat->relocated_usec
+ += ut_time_us(NULL) - usec;
+ }
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+ return(TRUE);
+ }
+
++ if (!have_page_hash_mutex) {
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
++
+ mutex_exit(mutex);
+ } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
+ /* This must be a buf_page_t object. */
+ UNIV_MEM_ASSERT_RW(src, size);
++
++ mutex_exit(&zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++ }
++
+ if (buf_buddy_relocate_block(src, dst)) {
++ mutex_enter(&zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+
+ goto success;
+ }
++
++ mutex_enter(&zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+ }
+
+ return(FALSE);
+@@ -503,12 +602,14 @@
+ /*===============*/
+ void* buf, /* in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint i) /* in: index of buf_pool->zip_free[] */
++ ulint i, /* in: index of buf_pool->zip_free[] */
++ ibool have_page_hash_mutex)
+ {
+ buf_page_t* bpage;
+ buf_page_t* buddy;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&zip_free_mutex));
+ ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ ut_ad(i <= BUF_BUDDY_SIZES);
+ ut_ad(buf_buddy_stat[i].used > 0);
+@@ -519,7 +620,7 @@
+ ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+
+ if (i == BUF_BUDDY_SIZES) {
+- buf_buddy_block_free(buf);
++ buf_buddy_block_free(buf, have_page_hash_mutex);
+ return;
+ }
+
+@@ -577,7 +678,7 @@
+ #endif /* UNIV_DEBUG_VALGRIND */
+
+ /* The buddy is not free. Is there a free block of this size? */
+- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+
+ if (bpage) {
+ /* Remove the block from the free list, because a successful
+@@ -587,7 +688,7 @@
+ buf_buddy_remove_from_free(bpage, i);
+
+ /* Try to relocate the buddy of buf to the free block. */
+- if (buf_buddy_relocate(buddy, bpage, i)) {
++ if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
+
+ ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+ goto buddy_free2;
+@@ -615,7 +716,7 @@
+ }
+ #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
+
+- if (buf_buddy_relocate(buddy, buf, i)) {
++ if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
+
+ buf = bpage;
+ UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buf.c innodb_plugin-1.0.3_tmp/buf/buf0buf.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buf.c 2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buf.c 2009-03-19 18:51:20.000000000 +0900
+@@ -244,6 +244,12 @@
+ /* mutex protecting the buffer pool struct and control blocks, except the
+ read-write lock in them */
+ UNIV_INTERN mutex_t buf_pool_mutex;
++UNIV_INTERN mutex_t LRU_list_mutex;
++UNIV_INTERN mutex_t flush_list_mutex;
++UNIV_INTERN rw_lock_t page_hash_latch;
++UNIV_INTERN mutex_t free_list_mutex;
++UNIV_INTERN mutex_t zip_free_mutex;
++UNIV_INTERN mutex_t zip_hash_mutex;
+ /* mutex protecting the control blocks of compressed-only pages
+ (of type buf_page_t, not buf_block_t) */
+ UNIV_INTERN mutex_t buf_pool_zip_mutex;
+@@ -664,9 +670,9 @@
+ block->page.in_zip_hash = FALSE;
+ block->page.in_flush_list = FALSE;
+ block->page.in_free_list = FALSE;
++#endif /* UNIV_DEBUG */
+ block->page.in_LRU_list = FALSE;
+ block->in_unzip_LRU_list = FALSE;
+-#endif /* UNIV_DEBUG */
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -751,8 +757,10 @@
+ memset(block->frame, '\0', UNIV_PAGE_SIZE);
+ #endif
+ /* Add the block to the free list */
++ mutex_enter(&free_list_mutex);
+ UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+ ut_d(block->page.in_free_list = TRUE);
++ mutex_exit(&free_list_mutex);
+
+ block++;
+ frame += UNIV_PAGE_SIZE;
+@@ -778,7 +786,7 @@
+ ulint i;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+
+ block = chunk->blocks;
+
+@@ -832,7 +840,7 @@
+ ulint i;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
+
+ block = chunk->blocks;
+
+@@ -865,7 +873,7 @@
+ ulint i;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own());
++ ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
+
+ block = chunk->blocks;
+
+@@ -891,7 +899,7 @@
+ buf_block_t* block;
+ const buf_block_t* block_end;
+
+- ut_ad(buf_pool_mutex_own());
++ ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
+
+ block_end = chunk->blocks + chunk->size;
+
+@@ -903,8 +911,10 @@
+ ut_ad(!block->in_unzip_LRU_list);
+ ut_ad(!block->page.in_flush_list);
+ /* Remove the block from the free list. */
++ mutex_enter(&free_list_mutex);
+ ut_ad(block->page.in_free_list);
+ UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++ mutex_exit(&free_list_mutex);
+
+ /* Free the latches. */
+ mutex_free(&block->mutex);
+@@ -935,8 +945,18 @@
+ /* 1. Initialize general fields
+ ------------------------------- */
+ mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
++ mutex_create(&LRU_list_mutex, SYNC_NO_ORDER_CHECK);
++ mutex_create(&flush_list_mutex, SYNC_NO_ORDER_CHECK);
++ rw_lock_create(&page_hash_latch, SYNC_NO_ORDER_CHECK);
++ mutex_create(&free_list_mutex, SYNC_NO_ORDER_CHECK);
++ mutex_create(&zip_free_mutex, SYNC_NO_ORDER_CHECK);
++ mutex_create(&zip_hash_mutex, SYNC_NO_ORDER_CHECK);
++
+ mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
+
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
+ buf_pool_mutex_enter();
+
+ buf_pool->n_chunks = 1;
+@@ -973,6 +993,9 @@
+ --------------------------- */
+ /* All fields are initialized by mem_zalloc(). */
+
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+ buf_pool_mutex_exit();
+
+ btr_search_sys_create(buf_pool->curr_size
+@@ -1105,7 +1128,11 @@
+ buf_page_t* b;
+ ulint fold;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ ut_a(bpage->buf_fix_count == 0);
+@@ -1130,7 +1157,7 @@
+
+ memcpy(dpage, bpage, sizeof *dpage);
+
+- ut_d(bpage->in_LRU_list = FALSE);
++ bpage->in_LRU_list = FALSE;
+ ut_d(bpage->in_page_hash = FALSE);
+
+ /* relocate buf_pool->LRU */
+@@ -1186,7 +1213,8 @@
+
+ try_again:
+ btr_search_disable(); /* Empty the adaptive hash index again */
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ shrink_again:
+ if (buf_pool->n_chunks <= 1) {
+@@ -1257,7 +1285,7 @@
+
+ buf_LRU_make_block_old(&block->page);
+ dirty++;
+- } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
++ } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
+ != BUF_LRU_FREED) {
+ nonfree++;
+ }
+@@ -1265,7 +1293,8 @@
+ mutex_exit(&block->mutex);
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+
+ /* Request for a flush of the chunk if it helps.
+ Do not flush if there are non-free blocks, since
+@@ -1314,7 +1343,8 @@
+ func_done:
+ srv_buf_pool_old_size = srv_buf_pool_size;
+ func_exit:
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ btr_search_enable();
+ }
+
+@@ -1332,7 +1362,11 @@
+ hash_table_t* zip_hash;
+ buf_page_t* b;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++
+
+ /* Free, create, and populate the hash table. */
+ hash_table_free(buf_pool->page_hash);
+@@ -1412,7 +1446,10 @@
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+ }
+
+ /************************************************************************
+@@ -1422,17 +1459,20 @@
+ buf_pool_resize(void)
+ /*=================*/
+ {
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ if (srv_buf_pool_old_size == srv_buf_pool_size) {
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ return;
+ }
+
+ if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+
+ /* Disable adaptive hash indexes and empty the index
+ in order to free up memory in the buffer pool chunks. */
+@@ -1466,7 +1506,8 @@
+ }
+
+ srv_buf_pool_old_size = srv_buf_pool_size;
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ }
+
+ buf_pool_page_hash_rebuild();
+@@ -1488,12 +1529,14 @@
+
+ if (buf_page_peek_if_too_old(bpage)) {
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+ /* There has been freeing activity in the LRU list:
+ best to move to the head of the LRU list */
+
+ buf_LRU_make_block_young(bpage);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ }
+ }
+
+@@ -1507,13 +1550,15 @@
+ /*================*/
+ buf_page_t* bpage) /* in: buffer block of a file page */
+ {
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ ut_a(buf_page_in_file(bpage));
+
+ buf_LRU_make_block_young(bpage);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ }
+
+ /************************************************************************
+@@ -1528,7 +1573,8 @@
+ {
+ buf_block_t* block;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
+
+@@ -1536,7 +1582,8 @@
+ block->check_index_page_at_flush = FALSE;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+ }
+
+ /************************************************************************
+@@ -1555,7 +1602,8 @@
+ buf_block_t* block;
+ ibool is_hashed;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
+
+@@ -1565,7 +1613,8 @@
+ is_hashed = block->is_hashed;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ return(is_hashed);
+ }
+@@ -1587,7 +1636,8 @@
+ {
+ buf_page_t* bpage;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+
+ bpage = buf_page_hash_get(space, offset);
+
+@@ -1595,7 +1645,8 @@
+ bpage->file_page_was_freed = TRUE;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ return(bpage);
+ }
+@@ -1616,7 +1667,8 @@
+ {
+ buf_page_t* bpage;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+
+ bpage = buf_page_hash_get(space, offset);
+
+@@ -1624,7 +1676,8 @@
+ bpage->file_page_was_freed = FALSE;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ return(bpage);
+ }
+@@ -1657,8 +1710,9 @@
+ buf_pool->n_page_gets++;
+
+ for (;;) {
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
+ lookup:
++ rw_lock_s_lock(&page_hash_latch);
+ bpage = buf_page_hash_get(space, offset);
+ if (bpage) {
+ break;
+@@ -1666,7 +1720,8 @@
+
+ /* Page not in buf_pool: needs to be read from file */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ buf_read_page(space, zip_size, offset);
+
+@@ -1677,13 +1732,16 @@
+
+ if (UNIV_UNLIKELY(!bpage->zip.data)) {
+ /* There is no compressed page. */
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+ return(NULL);
+ }
+
+ block_mutex = buf_page_get_mutex(bpage);
+ mutex_enter(block_mutex);
+
++ rw_lock_s_unlock(&page_hash_latch);
++
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+@@ -1698,7 +1756,7 @@
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ /* Discard the uncompressed page frame if possible. */
+- if (buf_LRU_free_block(bpage, FALSE, NULL)
++ if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
+ == BUF_LRU_FREED) {
+
+ mutex_exit(block_mutex);
+@@ -1712,7 +1770,7 @@
+
+ must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ buf_page_set_accessed(bpage, TRUE);
+
+@@ -1943,7 +2001,7 @@
+ const buf_chunk_t* chunk = buf_pool->chunks;
+ const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+
+ if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+ /* The pointer should be aligned. */
+@@ -1986,6 +2044,7 @@
+ ibool accessed;
+ ulint fix_type;
+ ibool must_read;
++ mutex_t* block_mutex;
+
+ ut_ad(mtr);
+ ut_ad((rw_latch == RW_S_LATCH)
+@@ -2001,9 +2060,12 @@
+ buf_pool->n_page_gets++;
+ loop:
+ block = guess;
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
+
+ if (block) {
++ block_mutex = buf_page_get_mutex((buf_page_t*)block);
++ mutex_enter(block_mutex);
++
+ /* If the guess is a compressed page descriptor that
+ has been allocated by buf_buddy_alloc(), it may have
+ been invalidated by buf_buddy_relocate(). In that
+@@ -2017,6 +2079,8 @@
+ || space != block->page.space
+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
++ mutex_exit(block_mutex);
++
+ block = guess = NULL;
+ } else {
+ ut_ad(!block->page.in_zip_hash);
+@@ -2025,14 +2089,20 @@
+ }
+
+ if (block == NULL) {
++ rw_lock_s_lock(&page_hash_latch);
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
++ if (block) {
++ block_mutex = buf_page_get_mutex((buf_page_t*)block);
++ mutex_enter(block_mutex);
++ }
++ rw_lock_s_unlock(&page_hash_latch);
+ }
+
+ loop2:
+ if (block == NULL) {
+ /* Page not in buf_pool: needs to be read from file */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ if (mode == BUF_GET_IF_IN_POOL) {
+
+@@ -2053,7 +2123,8 @@
+
+ if (must_read && mode == BUF_GET_IF_IN_POOL) {
+ /* The page is only being read to buffer */
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(block_mutex);
+
+ return(NULL);
+ }
+@@ -2063,10 +2134,16 @@
+ ibool success;
+
+ case BUF_BLOCK_FILE_PAGE:
++ if (block_mutex == &buf_pool_zip_mutex) {
++ /* it is wrong mutex... */
++ mutex_exit(block_mutex);
++ goto loop;
++ }
+ break;
+
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
++ ut_ad(block_mutex == &buf_pool_zip_mutex);
+ bpage = &block->page;
+
+ if (bpage->buf_fix_count
+@@ -2077,20 +2154,26 @@
+ wait_until_unfixed:
+ /* The block is buffer-fixed or I/O-fixed.
+ Try again later. */
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(block_mutex);
+ os_thread_sleep(WAIT_FOR_READ);
+
+ goto loop;
+ }
+
+ /* Allocate an uncompressed page. */
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(block_mutex);
+
+ block = buf_LRU_get_free_block(0);
+ ut_a(block);
++ block_mutex = &block->mutex;
+
+- buf_pool_mutex_enter();
+- mutex_enter(&block->mutex);
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++ mutex_enter(block_mutex);
+
+ {
+ buf_page_t* hash_bpage
+@@ -2101,10 +2184,17 @@
+ while buf_pool_mutex was released.
+ Free the block that was allocated. */
+
+- buf_LRU_block_free_non_file_page(block);
+- mutex_exit(&block->mutex);
++ buf_LRU_block_free_non_file_page(block, TRUE);
++ mutex_exit(block_mutex);
+
+ block = (buf_block_t*) hash_bpage;
++ if (block) {
++ block_mutex = buf_page_get_mutex((buf_page_t*)block);
++ mutex_enter(block_mutex);
++ }
++ rw_lock_x_unlock(&page_hash_latch);
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
+ goto loop2;
+ }
+ }
+@@ -2118,9 +2208,12 @@
+ Free the block that was allocated and try again.
+ This should be extremely unlikely. */
+
+- buf_LRU_block_free_non_file_page(block);
+- mutex_exit(&block->mutex);
++ buf_LRU_block_free_non_file_page(block, TRUE);
++ //mutex_exit(&block->mutex);
+
++ rw_lock_x_unlock(&page_hash_latch);
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
+ goto wait_until_unfixed;
+ }
+
+@@ -2130,6 +2223,9 @@
+ mutex_enter(&buf_pool_zip_mutex);
+
+ buf_relocate(bpage, &block->page);
++
++ rw_lock_x_unlock(&page_hash_latch);
++
+ buf_block_init_low(block);
+ block->lock_hash_val = lock_rec_hash(space, offset);
+
+@@ -2161,6 +2257,8 @@
+ }
+ }
+
++ mutex_exit(&flush_list_mutex);
++
+ /* Buffer-fix, I/O-fix, and X-latch the block
+ for the duration of the decompression.
+ Also add the block to the unzip_LRU list. */
+@@ -2169,16 +2267,22 @@
+ /* Insert at the front of unzip_LRU list */
+ buf_unzip_LRU_add_block(block, FALSE);
+
++ mutex_exit(&LRU_list_mutex);
++
+ block->page.buf_fix_count = 1;
+ buf_block_set_io_fix(block, BUF_IO_READ);
++
++ mutex_enter(&buf_pool_mutex);
+ buf_pool->n_pend_unzip++;
++ mutex_exit(&buf_pool_mutex);
++
+ rw_lock_x_lock(&block->lock);
+- mutex_exit(&block->mutex);
++ mutex_exit(block_mutex);
+ mutex_exit(&buf_pool_zip_mutex);
+
+- buf_buddy_free(bpage, sizeof *bpage);
++ buf_buddy_free(bpage, sizeof *bpage, FALSE);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ /* Decompress the page and apply buffered operations
+ while not holding buf_pool_mutex or block->mutex. */
+@@ -2190,17 +2294,21 @@
+ }
+
+ /* Unfix and unlatch the block. */
+- buf_pool_mutex_enter();
+- mutex_enter(&block->mutex);
++ //buf_pool_mutex_enter();
++ block_mutex = &block->mutex;
++ mutex_enter(block_mutex);
++ mutex_enter(&buf_pool_mutex);
+ buf_pool->n_pend_unzip--;
++ mutex_exit(&buf_pool_mutex);
+ block->page.buf_fix_count--;
+ buf_block_set_io_fix(block, BUF_IO_NONE);
+- mutex_exit(&block->mutex);
++ //mutex_exit(&block->mutex);
+ rw_lock_x_unlock(&block->lock);
+
+ if (UNIV_UNLIKELY(!success)) {
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(block_mutex);
+ return(NULL);
+ }
+
+@@ -2217,11 +2325,11 @@
+
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+- mutex_enter(&block->mutex);
++ //mutex_enter(&block->mutex);
+ UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
+
+ buf_block_buf_fix_inc(block, file, line);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ /* Check if this is the first access to the page */
+
+@@ -2229,7 +2337,7 @@
+
+ buf_page_set_accessed(&block->page, TRUE);
+
+- mutex_exit(&block->mutex);
++ mutex_exit(block_mutex);
+
+ buf_block_make_young(&block->page);
+
+@@ -2515,16 +2623,19 @@
+ ibool success;
+ ulint fix_type;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+ block = buf_block_hash_get(space_id, page_no);
+
+ if (!block) {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+ return(NULL);
+ }
+
+ mutex_enter(&block->mutex);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+@@ -2644,7 +2755,10 @@
+ {
+ buf_page_t* hash_page;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(&(block->mutex)));
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+@@ -2677,7 +2791,8 @@
+ (const void*) hash_page, (const void*) block);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(&block->mutex);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_x_unlock(&page_hash_latch);
+ buf_print();
+ buf_LRU_print();
+ buf_validate();
+@@ -2756,16 +2871,28 @@
+ ut_ad(block);
+ }
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ if(!block) {
++ mutex_enter(&flush_list_mutex);
++ }
++ rw_lock_x_lock(&page_hash_latch);
+
+ if (buf_page_hash_get(space, offset)) {
+ /* The page is already in the buffer pool. */
+ err_exit:
+ if (block) {
+ mutex_enter(&block->mutex);
+- buf_LRU_block_free_non_file_page(block);
++ mutex_exit(&LRU_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ buf_LRU_block_free_non_file_page(block, FALSE);
+ mutex_exit(&block->mutex);
+ }
++ else {
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+
+ bpage = NULL;
+ goto func_exit;
+@@ -2785,6 +2912,8 @@
+ mutex_enter(&block->mutex);
+ buf_page_init(space, offset, block);
+
++ rw_lock_x_unlock(&page_hash_latch);
++
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+
+@@ -2812,7 +2941,7 @@
+ been added to buf_pool->LRU and
+ buf_pool->page_hash. */
+ mutex_exit(&block->mutex);
+- data = buf_buddy_alloc(zip_size, &lru);
++ data = buf_buddy_alloc(zip_size, &lru, FALSE);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+@@ -2825,6 +2954,7 @@
+ buf_unzip_LRU_add_block(block, TRUE);
+ }
+
++ mutex_exit(&LRU_list_mutex);
+ mutex_exit(&block->mutex);
+ } else {
+ /* Defer buf_buddy_alloc() until after the block has
+@@ -2836,8 +2966,8 @@
+ control block (bpage), in order to avoid the
+ invocation of buf_buddy_relocate_block() on
+ uninitialized data. */
+- data = buf_buddy_alloc(zip_size, &lru);
+- bpage = buf_buddy_alloc(sizeof *bpage, &lru);
++ data = buf_buddy_alloc(zip_size, &lru, TRUE);
++ bpage = buf_buddy_alloc(sizeof *bpage, &lru, TRUE);
+
+ /* If buf_buddy_alloc() allocated storage from the LRU list,
+ it released and reacquired buf_pool_mutex. Thus, we must
+@@ -2846,8 +2976,12 @@
+ && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
+
+ /* The block was added by some other thread. */
+- buf_buddy_free(bpage, sizeof *bpage);
+- buf_buddy_free(data, zip_size);
++ buf_buddy_free(bpage, sizeof *bpage, TRUE);
++ buf_buddy_free(data, zip_size, TRUE);
++
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+
+ bpage = NULL;
+ goto func_exit;
+@@ -2870,25 +3004,32 @@
+ bpage->in_zip_hash = FALSE;
+ bpage->in_flush_list = FALSE;
+ bpage->in_free_list = FALSE;
+- bpage->in_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
++ bpage->in_LRU_list = FALSE;
+
+ ut_d(bpage->in_page_hash = TRUE);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ buf_page_address_fold(space, offset), bpage);
+
++ rw_lock_x_unlock(&page_hash_latch);
++
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+ buf_LRU_insert_zip_clean(bpage);
+
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++
+ buf_page_set_io_fix(bpage, BUF_IO_READ);
+
+ mutex_exit(&buf_pool_zip_mutex);
+ }
+
++ mutex_enter(&buf_pool_mutex);
+ buf_pool->n_pend_reads++;
++ mutex_exit(&buf_pool_mutex);
+ func_exit:
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+@@ -2924,7 +3065,9 @@
+
+ free_block = buf_LRU_get_free_block(0);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
+
+@@ -2937,7 +3080,9 @@
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+
+ /* Page can be found in buf_pool */
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+
+ buf_block_free(free_block);
+
+@@ -2959,6 +3104,7 @@
+ mutex_enter(&block->mutex);
+
+ buf_page_init(space, offset, block);
++ rw_lock_x_unlock(&page_hash_latch);
+
+ /* The block must be put to the LRU list */
+ buf_LRU_add_block(&block->page, FALSE);
+@@ -2985,7 +3131,7 @@
+ the reacquisition of buf_pool_mutex. We also must
+ defer this operation until after the block descriptor
+ has been added to buf_pool->LRU and buf_pool->page_hash. */
+- data = buf_buddy_alloc(zip_size, &lru);
++ data = buf_buddy_alloc(zip_size, &lru, FALSE);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+@@ -3001,7 +3147,8 @@
+ rw_lock_x_unlock(&block->lock);
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+
+ mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+
+@@ -3053,6 +3200,7 @@
+ enum buf_io_fix io_type;
+ const ibool uncompressed = (buf_page_get_state(bpage)
+ == BUF_BLOCK_FILE_PAGE);
++ enum buf_flush flush_type;
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -3187,8 +3335,17 @@
+ }
+ }
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ if (io_type == BUF_IO_WRITE) {
++ flush_type = buf_page_get_flush_type(bpage);
++ /* to keep consistency at buf_LRU_insert_zip_clean() */
++ //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++ mutex_enter(&LRU_list_mutex);
++ //}
++ mutex_enter(&flush_list_mutex);
++ }
+ mutex_enter(buf_page_get_mutex(bpage));
++ mutex_enter(&buf_pool_mutex);
+
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+ if (io_type == BUF_IO_WRITE || uncompressed) {
+@@ -3228,6 +3385,12 @@
+
+ buf_flush_write_complete(bpage);
+
++ mutex_exit(&flush_list_mutex);
++ /* to keep consistency at buf_LRU_insert_zip_clean() */
++ //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++ mutex_exit(&LRU_list_mutex);
++ //}
++
+ if (uncompressed) {
+ rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_WRITE);
+@@ -3250,8 +3413,9 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++ mutex_exit(&buf_pool_mutex);
+ mutex_exit(buf_page_get_mutex(bpage));
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+ }
+
+ /*************************************************************************
+@@ -3273,12 +3437,14 @@
+ freed = buf_LRU_search_and_free_block(100);
+ }
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+ ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ }
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+@@ -3302,7 +3468,11 @@
+
+ ut_ad(buf_pool);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++ /* for keep the new latch order, it cannot validate correctly... */
+
+ chunk = buf_pool->chunks;
+
+@@ -3483,19 +3653,25 @@
+ }
+
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
++ /* because of latching order, we cannot get free_list_mutex here. */
++/*
+ if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+ fprintf(stderr, "Free list len %lu, free blocks %lu\n",
+ (ulong) UT_LIST_GET_LEN(buf_pool->free),
+ (ulong) n_free);
+ ut_error;
+ }
++*/
+ ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
+
+ ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+
+ ut_a(buf_LRU_validate());
+ ut_a(buf_flush_validate());
+@@ -3529,7 +3705,10 @@
+ index_ids = mem_alloc(sizeof(dulint) * size);
+ counts = mem_alloc(sizeof(ulint) * size);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ mutex_enter(&free_list_mutex);
+
+ fprintf(stderr,
+ "buf_pool size %lu\n"
+@@ -3592,7 +3771,10 @@
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ mutex_exit(&free_list_mutex);
+
+ for (i = 0; i < n_found; i++) {
+ index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -3630,7 +3812,7 @@
+ ulint i;
+ ulint fixed_pages_number = 0;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
+
+ chunk = buf_pool->chunks;
+
+@@ -3700,7 +3882,7 @@
+ }
+
+ mutex_exit(&buf_pool_zip_mutex);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ return(fixed_pages_number);
+ }
+@@ -3729,7 +3911,7 @@
+ {
+ ulint ratio;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter(); /* optimistic */
+
+ ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
+ / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+@@ -3737,7 +3919,7 @@
+
+ /* 1 + is there to avoid division by zero */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit(); /* optimistic */
+
+ return(ratio);
+ }
+@@ -3757,7 +3939,11 @@
+ ut_ad(buf_pool);
+ size = buf_pool->curr_size;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ mutex_enter(&free_list_mutex);
++ mutex_enter(&buf_pool_mutex);
+
+ fprintf(file,
+ "Buffer pool size %lu\n"
+@@ -3824,7 +4010,11 @@
+ buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
+ buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ mutex_exit(&free_list_mutex);
++ mutex_exit(&buf_pool_mutex);
+ }
+
+ /**************************************************************************
+@@ -3853,7 +4043,7 @@
+
+ ut_ad(buf_pool);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter(); /* optimistic */
+
+ chunk = buf_pool->chunks;
+
+@@ -3870,7 +4060,7 @@
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit(); /* optimistic */
+
+ return(TRUE);
+ }
+@@ -3886,7 +4076,8 @@
+ {
+ ibool ret;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+
+ if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
+ + buf_pool->n_flush[BUF_FLUSH_LIST]
+@@ -3896,7 +4087,8 @@
+ ret = TRUE;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ return(ret);
+ }
+@@ -3910,11 +4102,13 @@
+ {
+ ulint len;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&free_list_mutex);
+
+ len = UT_LIST_GET_LEN(buf_pool->free);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&free_list_mutex);
+
+ return(len);
+ }
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0flu.c innodb_plugin-1.0.3_tmp/buf/buf0flu.c
+--- innodb_plugin-1.0.3_orig/buf/buf0flu.c 2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0flu.c 2009-03-19 18:53:43.000000000 +0900
+@@ -61,7 +61,8 @@
+ /*=============================*/
+ buf_block_t* block) /* in/out: block which is modified */
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&flush_list_mutex));
+ ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
+ || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
+ <= block->page.oldest_modification));
+@@ -92,7 +93,8 @@
+ buf_page_t* prev_b;
+ buf_page_t* b;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&flush_list_mutex));
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+ ut_ad(block->page.in_LRU_list);
+@@ -134,10 +136,10 @@
+ buf_page_in_file(bpage) and in the LRU list */
+ {
+ //ut_ad(buf_pool_mutex_own());
+- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+- //ut_ad(bpage->in_LRU_list); /* optimistic use */
++ //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++ //ut_ad(bpage->in_LRU_list);
+
+- if (UNIV_LIKELY(buf_page_in_file(bpage))) {
++ if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+
+ return(bpage->oldest_modification == 0
+ && buf_page_get_io_fix(bpage) == BUF_IO_NONE
+@@ -170,7 +172,7 @@
+ enum buf_flush flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ {
+ ut_a(buf_page_in_file(bpage));
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+
+@@ -203,7 +205,8 @@
+ /*=============*/
+ buf_page_t* bpage) /* in: pointer to the block in question */
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&flush_list_mutex));
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(bpage->in_flush_list);
+ ut_d(bpage->in_flush_list = FALSE);
+@@ -762,12 +765,19 @@
+ ibool is_uncompressed;
+
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
++ || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
++#endif
+ ut_ad(buf_page_in_file(bpage));
+
+ block_mutex = buf_page_get_mutex(bpage);
+ ut_ad(mutex_own(block_mutex));
+
++ mutex_enter(&buf_pool_mutex);
++ rw_lock_s_unlock(&page_hash_latch);
++
+ ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
+
+ buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+@@ -798,7 +808,8 @@
+ }
+
+ mutex_exit(block_mutex);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ /* Even though bpage is not protected by any mutex at
+ this point, it is safe to access bpage, because it is
+@@ -835,7 +846,8 @@
+ immediately. */
+
+ mutex_exit(block_mutex);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+ break;
+
+ default:
+@@ -899,7 +911,8 @@
+ high = fil_space_get_size(space);
+ }
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+
+ for (i = low; i < high; i++) {
+
+@@ -936,14 +949,16 @@
+ ut_ad(!mutex_own(block_mutex));
+ count++;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+ } else {
+ mutex_exit(block_mutex);
+ }
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ return(count);
+ }
+@@ -987,20 +1002,29 @@
+ ut_ad((flush_type != BUF_FLUSH_LIST)
+ || sync_thread_levels_empty_gen(TRUE));
+ #endif /* UNIV_SYNC_DEBUG */
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+
+ if ((buf_pool->n_flush[flush_type] > 0)
+ || (buf_pool->init_flush[flush_type] == TRUE)) {
+
+ /* There is already a flush batch of the same type running */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ return(ULINT_UNDEFINED);
+ }
+
+ buf_pool->init_flush[flush_type] = TRUE;
+
++ mutex_exit(&buf_pool_mutex);
++
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_enter(&LRU_list_mutex);
++ }
++ mutex_enter(&flush_list_mutex);
++
+ for (;;) {
+ flush_next:
+ /* If we have flushed enough, leave the loop */
+@@ -1047,7 +1071,11 @@
+ space = buf_page_get_space(bpage);
+ offset = buf_page_get_page_no(bpage);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_exit(&LRU_list_mutex);
++ }
++ mutex_exit(&flush_list_mutex);
+
+ old_page_count = page_count;
+
+@@ -1057,7 +1085,8 @@
+ space, offset, flush_type);
+ } else {
+ /* Try to flush the page only */
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ mutex_enter(block_mutex);
+@@ -1073,7 +1102,11 @@
+ flush_type, offset,
+ page_count - old_page_count); */
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_enter(&LRU_list_mutex);
++ }
++ mutex_enter(&flush_list_mutex);
+ goto flush_next;
+
+ } else if (flush_type == BUF_FLUSH_LRU) {
+@@ -1091,6 +1124,13 @@
+ break;
+ }
+
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_exit(&LRU_list_mutex);
++ }
++ mutex_exit(&flush_list_mutex);
++
++ mutex_enter(&buf_pool_mutex);
++
+ buf_pool->init_flush[flush_type] = FALSE;
+
+ if (buf_pool->n_flush[flush_type] == 0) {
+@@ -1100,7 +1140,8 @@
+ os_event_set(buf_pool->no_flush[flush_type]);
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ buf_flush_buffered_writes();
+
+@@ -1147,8 +1188,14 @@
+ buf_page_t* bpage;
+ ulint n_replaceable;
+ ulint distance = 0;
++ ibool have_LRU_mutex = FALSE;
++
++ if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++ have_LRU_mutex = TRUE;
+
+ //buf_pool_mutex_enter();
++ if (have_LRU_mutex)
++ mutex_enter(&LRU_list_mutex);
+
+ n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+@@ -1159,6 +1206,12 @@
+ + BUF_FLUSH_EXTRA_MARGIN)
+ && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
+
++ if (!bpage->in_LRU_list) {
++ /* reatart. but it is very optimistic */
++ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++ continue;
++ }
++
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
+@@ -1175,6 +1228,8 @@
+ }
+
+ //buf_pool_mutex_exit();
++ if (have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
+
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+
+@@ -1252,11 +1307,13 @@
+ {
+ ibool ret;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&flush_list_mutex);
+
+ ret = buf_flush_validate_low();
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&flush_list_mutex);
+
+ return(ret);
+ }
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0lru.c innodb_plugin-1.0.3_tmp/buf/buf0lru.c
+--- innodb_plugin-1.0.3_orig/buf/buf0lru.c 2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0lru.c 2009-03-19 18:51:20.000000000 +0900
+@@ -129,25 +129,31 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+- buf_block_t* block); /* in: block, must contain a file page and
++ buf_block_t* block, /* in: block, must contain a file page and
+ be in a state where it can be freed */
++ ibool have_page_hash_mutex);
+
+ /**********************************************************************
+ Determines if the unzip_LRU list should be used for evicting a victim
+ instead of the general LRU list. */
+ UNIV_INLINE
+ ibool
+-buf_LRU_evict_from_unzip_LRU(void)
++buf_LRU_evict_from_unzip_LRU(
++ ibool have_LRU_mutex)
+ /*==============================*/
+ /* out: TRUE if should use unzip_LRU */
+ {
+ ulint io_avg;
+ ulint unzip_avg;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+
++ if (!have_LRU_mutex)
++ mutex_enter(&LRU_list_mutex);
+ /* If the unzip_LRU list is empty, we can only use the LRU. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
++ if (!have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
+ return(FALSE);
+ }
+
+@@ -156,14 +162,20 @@
+ decompressed pages in the buffer pool. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+ <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
++ if (!have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
+ return(FALSE);
+ }
+
+ /* If eviction hasn't started yet, we assume by default
+ that a workload is disk bound. */
+ if (buf_pool->freed_page_clock == 0) {
++ if (!have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
+ return(TRUE);
+ }
++ if (!have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
+
+ /* Calculate the average over past intervals, and add the values
+ of the current interval. */
+@@ -229,7 +241,8 @@
+
+ page_arr = ut_malloc(sizeof(ulint)
+ * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ scan_again:
+ num_entries = 0;
+@@ -269,12 +282,14 @@
+ }
+ /* Array full. We release the buf_pool_mutex to
+ obey the latching order. */
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+
+ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
+ num_entries);
+ num_entries = 0;
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+ } else {
+ mutex_exit(block_mutex);
+ }
+@@ -299,7 +314,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+
+ /* Drop any remaining batch of search hashed pages. */
+ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+@@ -327,7 +343,10 @@
+ buf_LRU_drop_page_hash_for_tablespace(id);
+
+ scan_again:
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
+
+ all_freed = TRUE;
+
+@@ -369,7 +388,10 @@
+ ulint page_no;
+ ulint zip_size;
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+
+ zip_size = buf_page_get_zip_size(bpage);
+ page_no = buf_page_get_page_no(bpage);
+@@ -393,7 +415,7 @@
+ if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ != BUF_BLOCK_ZIP_FREE) {
+ buf_LRU_block_free_hashed_page((buf_block_t*)
+- bpage);
++ bpage, TRUE);
+ } else {
+ /* The block_mutex should have been
+ released by buf_LRU_block_remove_hashed_page()
+@@ -416,7 +438,10 @@
+ bpage = prev_bpage;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+
+ if (!all_freed) {
+ os_thread_sleep(20000);
+@@ -439,14 +464,16 @@
+ ulint len;
+ ulint limit;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+ if (len < BUF_LRU_OLD_MIN_LEN) {
+ /* The LRU list is too short to do read-ahead */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+
+ return(0);
+ }
+@@ -455,7 +482,8 @@
+
+ limit = buf_page_get_LRU_position(bpage) - len / BUF_LRU_INITIAL_RATIO;
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+
+ return(limit);
+ }
+@@ -470,7 +498,8 @@
+ {
+ buf_page_t* b;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&flush_list_mutex));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
+
+ /* Find the first successor of bpage in the LRU list
+@@ -478,7 +507,7 @@
+ b = bpage;
+ do {
+ b = UT_LIST_GET_NEXT(LRU, b);
+- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
++ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
+
+ /* Insert bpage before b, i.e., after the predecessor of b. */
+ if (b) {
+@@ -500,16 +529,17 @@
+ buf_LRU_free_from_unzip_LRU_list(
+ /*=============================*/
+ /* out: TRUE if freed */
+- ulint n_iterations) /* in: how many times this has been called
++ ulint n_iterations, /* in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; we will search
+ n_iterations / 5 of the unzip_LRU list,
+ or nothing if n_iterations >= 5 */
++ ibool have_LRU_mutex)
+ {
+ buf_block_t* block;
+ ulint distance;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own()); /* optimistic */
+
+ /* Theoratically it should be much easier to find a victim
+ from unzip_LRU as we can choose even a dirty block (as we'll
+@@ -519,7 +549,7 @@
+ if we have done five iterations so far. */
+
+ if (UNIV_UNLIKELY(n_iterations >= 5)
+- || !buf_LRU_evict_from_unzip_LRU()) {
++ || !buf_LRU_evict_from_unzip_LRU(have_LRU_mutex)) {
+
+ return(FALSE);
+ }
+@@ -527,18 +557,23 @@
+ distance = 100 + (n_iterations
+ * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
+
++restart:
+ for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+ UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+ block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
++ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
++ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)
++ goto restart;
+
+ enum buf_lru_free_block_status freed;
+
+- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+- ut_ad(block->in_unzip_LRU_list);
+- ut_ad(block->page.in_LRU_list);
++ /* optimistic */
++ //ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
++ //ut_ad(block->in_unzip_LRU_list);
++ //ut_ad(block->page.in_LRU_list);
+
+ mutex_enter(&block->mutex);
+- freed = buf_LRU_free_block(&block->page, FALSE, NULL);
++ freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
+ mutex_exit(&block->mutex);
+
+ switch (freed) {
+@@ -571,33 +606,39 @@
+ buf_LRU_free_from_common_LRU_list(
+ /*==============================*/
+ /* out: TRUE if freed */
+- ulint n_iterations) /* in: how many times this has been called
++ ulint n_iterations, /* in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; if
+ n_iterations < 10, then we search
+ n_iterations / 10 * buf_pool->curr_size
+ pages from the end of the LRU list */
++ ibool have_LRU_mutex)
+ {
+ buf_page_t* bpage;
+ ulint distance;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own()); /* optimistic */
+
+ distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
+
++restart:
+ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+ bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
++ if (!bpage->in_LRU_list
++ || buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE)
++ goto restart;
+
+ enum buf_lru_free_block_status freed;
+ mutex_t* block_mutex
+ = buf_page_get_mutex(bpage);
+
+- ut_ad(buf_page_in_file(bpage));
+- ut_ad(bpage->in_LRU_list);
++ /* optimistic */
++ //ut_ad(buf_page_in_file(bpage));
++ //ut_ad(bpage->in_LRU_list);
+
+ mutex_enter(block_mutex);
+- freed = buf_LRU_free_block(bpage, TRUE, NULL);
++ freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
+ mutex_exit(block_mutex);
+
+ switch (freed) {
+@@ -640,22 +681,33 @@
+ n_iterations / 5 of the unzip_LRU list. */
+ {
+ ibool freed = FALSE;
++ ibool have_LRU_mutex = FALSE;
++
++ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++ have_LRU_mutex = TRUE;
+
+- buf_pool_mutex_enter();
++ /* optimistic search... */
++ //buf_pool_mutex_enter();
++ if (have_LRU_mutex)
++ mutex_enter(&LRU_list_mutex);
+
+- freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
++ freed = buf_LRU_free_from_unzip_LRU_list(n_iterations, have_LRU_mutex);
+
+ if (!freed) {
+- freed = buf_LRU_free_from_common_LRU_list(n_iterations);
++ freed = buf_LRU_free_from_common_LRU_list(n_iterations, have_LRU_mutex);
+ }
+
++ mutex_enter(&buf_pool_mutex);
+ if (!freed) {
+ buf_pool->LRU_flush_ended = 0;
+ } else if (buf_pool->LRU_flush_ended > 0) {
+ buf_pool->LRU_flush_ended--;
+ }
++ mutex_exit(&buf_pool_mutex);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ if (have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
+
+ return(freed);
+ }
+@@ -673,18 +725,22 @@
+ buf_LRU_try_free_flushed_blocks(void)
+ /*=================================*/
+ {
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+
+ while (buf_pool->LRU_flush_ended > 0) {
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ buf_LRU_search_and_free_block(1);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+ }
+
+ /**********************************************************************
+@@ -700,7 +756,9 @@
+ {
+ ibool ret = FALSE;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&free_list_mutex);
+
+ if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
+@@ -708,7 +766,9 @@
+ ret = TRUE;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&free_list_mutex);
+
+ return(ret);
+ }
+@@ -725,9 +785,10 @@
+ {
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+
+- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
++ mutex_enter(&free_list_mutex);
++ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
+
+ if (block) {
+ ut_ad(block->page.in_free_list);
+@@ -737,12 +798,16 @@
+ ut_a(!buf_page_in_file(&block->page));
+ UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+
++ mutex_exit(&free_list_mutex);
++
+ mutex_enter(&block->mutex);
+
+ buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+ UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+
+ mutex_exit(&block->mutex);
++ } else {
++ mutex_exit(&free_list_mutex);
+ }
+
+ return(block);
+@@ -767,7 +832,7 @@
+ ibool mon_value_was = FALSE;
+ ibool started_monitor = FALSE;
+ loop:
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
+
+ if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
+@@ -847,14 +912,16 @@
+ if (UNIV_UNLIKELY(zip_size)) {
+ ibool lru;
+ page_zip_set_size(&block->page.zip, zip_size);
+- block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
++ mutex_enter(&LRU_list_mutex);
++ block->page.zip.data = buf_buddy_alloc(zip_size, &lru, FALSE);
++ mutex_exit(&LRU_list_mutex);
+ UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+ } else {
+ page_zip_set_size(&block->page.zip, 0);
+ block->page.zip.data = NULL;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ if (started_monitor) {
+ srv_print_innodb_monitor = mon_value_was;
+@@ -866,7 +933,7 @@
+ /* If no block was in the free list, search from the end of the LRU
+ list and try to free a block there */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+
+ freed = buf_LRU_search_and_free_block(n_iterations);
+
+@@ -915,18 +982,21 @@
+
+ os_aio_simulated_wake_handler_threads();
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+
+ if (buf_pool->LRU_flush_ended > 0) {
+ /* We have written pages in an LRU flush. To make the insert
+ buffer more efficient, we try to move these pages to the free
+ list. */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ buf_LRU_try_free_flushed_blocks();
+ } else {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+ }
+
+ if (n_iterations > 10) {
+@@ -951,7 +1021,8 @@
+ ulint new_len;
+
+ ut_a(buf_pool->LRU_old);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+ #if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
+ # error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
+ #endif
+@@ -1009,7 +1080,8 @@
+ {
+ buf_page_t* bpage;
+
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+
+ /* We first initialize all blocks in the LRU list as old and then use
+@@ -1041,13 +1113,14 @@
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+ ut_ad(buf_page_in_file(bpage));
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+
+ if (buf_page_belongs_to_unzip_LRU(bpage)) {
+ buf_block_t* block = (buf_block_t*) bpage;
+
+ ut_ad(block->in_unzip_LRU_list);
+- ut_d(block->in_unzip_LRU_list = FALSE);
++ block->in_unzip_LRU_list = FALSE;
+
+ UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+ }
+@@ -1063,7 +1136,8 @@
+ {
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -1090,7 +1164,7 @@
+
+ /* Remove the block from the LRU list */
+ UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+- ut_d(bpage->in_LRU_list = FALSE);
++ bpage->in_LRU_list = FALSE;
+
+ buf_unzip_LRU_remove_block_if_needed(bpage);
+
+@@ -1126,12 +1200,13 @@
+ {
+ ut_ad(buf_pool);
+ ut_ad(block);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+
+ ut_ad(!block->in_unzip_LRU_list);
+- ut_d(block->in_unzip_LRU_list = TRUE);
++ block->in_unzip_LRU_list = TRUE;
+
+ if (old) {
+ UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+@@ -1152,7 +1227,8 @@
+
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -1166,7 +1242,7 @@
+
+ ut_ad(!bpage->in_LRU_list);
+ UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+- ut_d(bpage->in_LRU_list = TRUE);
++ bpage->in_LRU_list = TRUE;
+
+ buf_page_set_old(bpage, TRUE);
+
+@@ -1212,7 +1288,8 @@
+ {
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(!bpage->in_LRU_list);
+@@ -1243,7 +1320,7 @@
+ bpage->LRU_position = (buf_pool->LRU_old)->LRU_position;
+ }
+
+- ut_d(bpage->in_LRU_list = TRUE);
++ bpage->in_LRU_list = TRUE;
+
+ buf_page_set_old(bpage, old);
+
+@@ -1331,22 +1408,24 @@
+ buf_page_t* bpage, /* in: block to be freed */
+ ibool zip, /* in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+- ibool* buf_pool_mutex_released)
++ ibool* buf_pool_mutex_released,
+ /* in: pointer to a variable that will
+ be assigned TRUE if buf_pool_mutex
+ was temporarily released, or NULL */
++ ibool have_LRU_mutex)
+ {
+ buf_page_t* b = NULL;
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+- ut_ad(buf_pool_mutex_own());
+- ut_ad(mutex_own(block_mutex));
+- ut_ad(buf_page_in_file(bpage));
+- ut_ad(bpage->in_LRU_list);
+- ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
++ //ut_ad(buf_pool_mutex_own());
++ /* optimistic */
++ //ut_ad(mutex_own(block_mutex));
++ //ut_ad(buf_page_in_file(bpage));
++ //ut_ad(bpage->in_LRU_list);
++ //ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+
+- if (!buf_page_can_relocate(bpage)) {
++ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
+
+ /* Do not free buffer-fixed or I/O-fixed blocks. */
+ return(BUF_LRU_NOT_FREED);
+@@ -1378,15 +1457,15 @@
+ If it cannot be allocated (without freeing a block
+ from the LRU list), refuse to free bpage. */
+ alloc:
+- buf_pool_mutex_exit_forbid();
+- b = buf_buddy_alloc(sizeof *b, NULL);
+- buf_pool_mutex_exit_allow();
++ //buf_pool_mutex_exit_forbid();
++ b = buf_buddy_alloc(sizeof *b, NULL, FALSE);
++ //buf_pool_mutex_exit_allow();
+
+ if (UNIV_UNLIKELY(!b)) {
+ return(BUF_LRU_CANNOT_RELOCATE);
+ }
+
+- memcpy(b, bpage, sizeof *b);
++ //memcpy(b, bpage, sizeof *b);
+ }
+
+ #ifdef UNIV_DEBUG
+@@ -1397,6 +1476,41 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++ /* not to break latch order, must re-enter block_mutex */
++ mutex_exit(block_mutex);
++
++ if (!have_LRU_mutex)
++ mutex_enter(&LRU_list_mutex); /* optimistic */
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++ mutex_enter(block_mutex);
++
++ /* recheck states of block */
++ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
++ || !buf_page_can_relocate(bpage)) {
++not_freed:
++ if (b) {
++ buf_buddy_free(b, sizeof *b, TRUE);
++ }
++ if (!have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ return(BUF_LRU_NOT_FREED);
++ } else if (zip || !bpage->zip.data) {
++ if (bpage->oldest_modification)
++ goto not_freed;
++ } else if (bpage->oldest_modification) {
++ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
++ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
++ goto not_freed;
++ }
++ }
++
++ if (b) {
++ memcpy(b, bpage, sizeof *b);
++ }
++
+ if (buf_LRU_block_remove_hashed_page(bpage, zip)
+ != BUF_BLOCK_ZIP_FREE) {
+ ut_a(bpage->buf_fix_count == 0);
+@@ -1408,6 +1522,10 @@
+
+ ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
+
++ while (prev_b && !prev_b->in_LRU_list) {
++ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
++ }
++
+ b->state = b->oldest_modification
+ ? BUF_BLOCK_ZIP_DIRTY
+ : BUF_BLOCK_ZIP_PAGE;
+@@ -1478,7 +1596,7 @@
+ buf_LRU_old_init();
+ }
+ } else {
+- ut_d(b->in_LRU_list = FALSE);
++ b->in_LRU_list = FALSE;
+ buf_LRU_add_block_low(b, buf_page_is_old(b));
+ }
+
+@@ -1521,7 +1639,10 @@
+ *buf_pool_mutex_released = TRUE;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+ mutex_exit(block_mutex);
+
+ /* Remove possible adaptive hash index on the page.
+@@ -1553,7 +1674,9 @@
+ : BUF_NO_CHECKSUM_MAGIC);
+ }
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ if (have_LRU_mutex)
++ mutex_enter(&LRU_list_mutex);
+ mutex_enter(block_mutex);
+
+ if (b) {
+@@ -1563,13 +1686,18 @@
+ mutex_exit(&buf_pool_zip_mutex);
+ }
+
+- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
++ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
+ } else {
+ /* The block_mutex should have been released by
+ buf_LRU_block_remove_hashed_page() when it returns
+ BUF_BLOCK_ZIP_FREE. */
+ ut_ad(block_mutex == &buf_pool_zip_mutex);
+ mutex_enter(block_mutex);
++
++ if (!have_LRU_mutex)
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+ }
+
+ return(BUF_LRU_FREED);
+@@ -1581,12 +1709,13 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+- buf_block_t* block) /* in: block, must not contain a file page */
++ buf_block_t* block, /* in: block, must not contain a file page */
++ ibool have_page_hash_mutex)
+ {
+ void* data;
+
+ ut_ad(block);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(&block->mutex));
+
+ switch (buf_block_get_state(block)) {
+@@ -1620,15 +1749,17 @@
+ if (data) {
+ block->page.zip.data = NULL;
+ mutex_exit(&block->mutex);
+- buf_pool_mutex_exit_forbid();
+- buf_buddy_free(data, page_zip_get_size(&block->page.zip));
+- buf_pool_mutex_exit_allow();
++ //buf_pool_mutex_exit_forbid();
++ buf_buddy_free(data, page_zip_get_size(&block->page.zip), have_page_hash_mutex);
++ //buf_pool_mutex_exit_allow();
+ mutex_enter(&block->mutex);
+ page_zip_set_size(&block->page.zip, 0);
+ }
+
++ mutex_enter(&free_list_mutex);
+ UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
+ ut_d(block->page.in_free_list = TRUE);
++ mutex_exit(&free_list_mutex);
+
+ UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+ }
+@@ -1657,7 +1788,11 @@
+ {
+ const buf_page_t* hashed_bpage;
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+@@ -1758,7 +1893,9 @@
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(buf_page_get_mutex(bpage));
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
+ buf_print();
+ buf_LRU_print();
+ buf_validate();
+@@ -1784,11 +1921,11 @@
+ UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+
+ mutex_exit(&buf_pool_zip_mutex);
+- buf_pool_mutex_exit_forbid();
++ //buf_pool_mutex_exit_forbid();
+ buf_buddy_free(bpage->zip.data,
+- page_zip_get_size(&bpage->zip));
+- buf_buddy_free(bpage, sizeof(*bpage));
+- buf_pool_mutex_exit_allow();
++ page_zip_get_size(&bpage->zip), TRUE);
++ buf_buddy_free(bpage, sizeof(*bpage), TRUE);
++ //buf_pool_mutex_exit_allow();
+ UNIV_MEM_UNDESC(bpage);
+ return(BUF_BLOCK_ZIP_FREE);
+
+@@ -1807,9 +1944,9 @@
+ bpage->zip.data = NULL;
+
+ mutex_exit(&((buf_block_t*) bpage)->mutex);
+- buf_pool_mutex_exit_forbid();
+- buf_buddy_free(data, page_zip_get_size(&bpage->zip));
+- buf_pool_mutex_exit_allow();
++ //buf_pool_mutex_exit_forbid();
++ buf_buddy_free(data, page_zip_get_size(&bpage->zip), TRUE);
++ //buf_pool_mutex_exit_allow();
+ mutex_enter(&((buf_block_t*) bpage)->mutex);
+ page_zip_set_size(&bpage->zip, 0);
+ }
+@@ -1835,15 +1972,16 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+- buf_block_t* block) /* in: block, must contain a file page and
++ buf_block_t* block, /* in: block, must contain a file page and
+ be in a state where it can be freed */
++ ibool have_page_hash_mutex)
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(&block->mutex));
+
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ }
+
+ /************************************************************************
+@@ -1861,7 +1999,8 @@
+ goto func_exit;
+ }
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+
+ /* Update the index. */
+ item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
+@@ -1875,7 +2014,8 @@
+ /* Put current entry in the array. */
+ memcpy(item, &buf_LRU_stat_cur, sizeof *item);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ func_exit:
+ /* Clear the current entry. */
+@@ -1897,7 +2037,8 @@
+ ulint LRU_pos;
+
+ ut_ad(buf_pool);
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+@@ -1956,6 +2097,9 @@
+ ut_a(buf_pool->LRU_old_len == old_len);
+ }
+
++ mutex_exit(&LRU_list_mutex);
++ mutex_enter(&free_list_mutex);
++
+ UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free);
+
+ for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+@@ -1965,6 +2109,9 @@
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+ }
+
++ mutex_exit(&free_list_mutex);
++ mutex_enter(&LRU_list_mutex);
++
+ UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU);
+
+ for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
+@@ -1976,7 +2123,8 @@
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ return(TRUE);
+ }
+ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+@@ -1992,7 +2140,8 @@
+ const buf_page_t* bpage;
+
+ ut_ad(buf_pool);
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&LRU_list_mutex);
+
+ fprintf(stderr, "Pool ulint clock %lu\n",
+ (ulong) buf_pool->ulint_clock);
+@@ -2055,6 +2204,7 @@
+ bpage = UT_LIST_GET_NEXT(LRU, bpage);
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&LRU_list_mutex);
+ }
+ #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0rea.c innodb_plugin-1.0.3_tmp/buf/buf0rea.c
+--- innodb_plugin-1.0.3_orig/buf/buf0rea.c 2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0rea.c 2009-03-19 18:51:20.000000000 +0900
+@@ -246,18 +246,22 @@
+
+ LRU_recent_limit = buf_LRU_get_recent_limit();
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+
+ if (buf_pool->n_pend_reads
+ > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ return(0);
+ }
++ mutex_exit(&buf_pool_mutex);
+
+ /* Count how many blocks in the area have been recently accessed,
+ that is, reside near the start of the LRU list. */
+
++ rw_lock_s_lock(&page_hash_latch);
+ for (i = low; i < high; i++) {
+ const buf_page_t* bpage = buf_page_hash_get(space, i);
+
+@@ -269,13 +273,15 @@
+
+ if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+ goto read_ahead;
+ }
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+ /* Do nothing */
+ return(0);
+
+@@ -469,10 +475,12 @@
+
+ tablespace_version = fil_space_get_version(space);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&buf_pool_mutex);
+
+ if (high > fil_space_get_size(space)) {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+ /* The area is not whole, return */
+
+ return(0);
+@@ -480,10 +488,12 @@
+
+ if (buf_pool->n_pend_reads
+ > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&buf_pool_mutex);
+
+ return(0);
+ }
++ mutex_exit(&buf_pool_mutex);
+
+ /* Check that almost all pages in the area have been accessed; if
+ offset == low, the accesses must be in a descending order, otherwise,
+@@ -497,6 +507,7 @@
+
+ fail_count = 0;
+
++ rw_lock_s_lock(&page_hash_latch);
+ for (i = low; i < high; i++) {
+ bpage = buf_page_hash_get(space, i);
+
+@@ -520,7 +531,8 @@
+ * LINEAR_AREA_THRESHOLD_COEF) {
+ /* Too many failures: return */
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ return(0);
+ }
+@@ -531,7 +543,8 @@
+ bpage = buf_page_hash_get(space, offset);
+
+ if (bpage == NULL) {
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ return(0);
+ }
+@@ -557,7 +570,8 @@
+ pred_offset = fil_page_get_prev(frame);
+ succ_offset = fil_page_get_next(frame);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ if ((offset == low) && (succ_offset == offset + 1)) {
+
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc 2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc 2009-03-19 18:51:20.000000000 +0900
+@@ -2280,7 +2280,8 @@
+
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&zip_free_mutex);
+
+ for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+ buf_buddy_stat_t* buddy_stat = &buf_buddy_stat[x];
+@@ -2306,7 +2307,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&zip_free_mutex);
+ DBUG_RETURN(status);
+ }
+
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h 2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h 2009-03-19 18:51:20.000000000 +0900
+@@ -29,5 +29,6 @@
+ {"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buddy.h innodb_plugin-1.0.3_tmp/include/buf0buddy.h
+--- innodb_plugin-1.0.3_orig/include/buf0buddy.h 2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buddy.h 2009-03-19 18:51:20.000000000 +0900
+@@ -49,10 +49,11 @@
+ /* out: allocated block,
+ possibly NULL if lru == NULL */
+ ulint size, /* in: block size, up to UNIV_PAGE_SIZE */
+- ibool* lru) /* in: pointer to a variable that will be assigned
++ ibool* lru, /* in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool_mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ __attribute__((malloc));
+
+ /**************************************************************************
+@@ -63,7 +64,8 @@
+ /*===========*/
+ void* buf, /* in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint size) /* in: block size, up to UNIV_PAGE_SIZE */
++ ulint size, /* in: block size, up to UNIV_PAGE_SIZE */
++ ibool have_page_hash_mutex)
+ __attribute__((nonnull));
+
+ /** Statistics of buddy blocks of a given size. */
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buddy.ic innodb_plugin-1.0.3_tmp/include/buf0buddy.ic
+--- innodb_plugin-1.0.3_orig/include/buf0buddy.ic 2009-02-19 00:04:03.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buddy.ic 2009-03-19 18:51:20.000000000 +0900
+@@ -44,10 +44,11 @@
+ possibly NULL if lru==NULL */
+ ulint i, /* in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+- ibool* lru) /* in: pointer to a variable that will be assigned
++ ibool* lru, /* in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool_mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ __attribute__((malloc));
+
+ /**************************************************************************
+@@ -58,8 +59,9 @@
+ /*===============*/
+ void* buf, /* in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint i) /* in: index of buf_pool->zip_free[],
++ ulint i, /* in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
++ ibool have_page_hash_mutex)
+ __attribute__((nonnull));
+
+ /**************************************************************************
+@@ -98,14 +100,15 @@
+ /* out: allocated block,
+ possibly NULL if lru == NULL */
+ ulint size, /* in: block size, up to UNIV_PAGE_SIZE */
+- ibool* lru) /* in: pointer to a variable that will be assigned
++ ibool* lru, /* in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool_mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+
+- return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
++ return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+ }
+
+ /**************************************************************************
+@@ -116,11 +119,26 @@
+ /*===========*/
+ void* buf, /* in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint size) /* in: block size, up to UNIV_PAGE_SIZE */
++ ulint size, /* in: block size, up to UNIV_PAGE_SIZE */
++ ibool have_page_hash_mutex)
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+
+- buf_buddy_free_low(buf, buf_buddy_get_slot(size));
++ if (!have_page_hash_mutex) {
++ mutex_enter(&LRU_list_mutex);
++ mutex_enter(&flush_list_mutex);
++ rw_lock_x_lock(&page_hash_latch);
++ }
++
++ mutex_enter(&zip_free_mutex);
++ buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE);
++ mutex_exit(&zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&LRU_list_mutex);
++ mutex_exit(&flush_list_mutex);
++ rw_lock_x_unlock(&page_hash_latch);
++ }
+ }
+
+ #ifdef UNIV_MATERIALIZE
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buf.h innodb_plugin-1.0.3_tmp/include/buf0buf.h
+--- innodb_plugin-1.0.3_orig/include/buf0buf.h 2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buf.h 2009-03-19 18:51:20.000000000 +0900
+@@ -1061,10 +1061,10 @@
+
+ UT_LIST_NODE_T(buf_page_t) LRU;
+ /* node of the LRU list */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+ ibool in_LRU_list; /* TRUE if the page is in the LRU list;
+ used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+ unsigned old:1; /* TRUE if the block is in the old
+ blocks in the LRU list */
+ unsigned LRU_position:31;/* value which monotonically decreases
+@@ -1104,11 +1104,11 @@
+ a block is in the unzip_LRU list
+ if page.state == BUF_BLOCK_FILE_PAGE
+ and page.zip.data != NULL */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+ ibool in_unzip_LRU_list;/* TRUE if the page is in the
+ decompressed LRU list;
+ used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+ byte* frame; /* pointer to buffer frame which
+ is of size UNIV_PAGE_SIZE, and
+ aligned to an address divisible by
+@@ -1316,6 +1316,12 @@
+ /* mutex protecting the buffer pool struct and control blocks, except the
+ read-write lock in them */
+ extern mutex_t buf_pool_mutex;
++extern mutex_t LRU_list_mutex;
++extern mutex_t flush_list_mutex;
++extern rw_lock_t page_hash_latch;
++extern mutex_t free_list_mutex;
++extern mutex_t zip_free_mutex;
++extern mutex_t zip_hash_mutex;
+ /* mutex protecting the control blocks of compressed-only pages
+ (of type buf_page_t, not buf_block_t) */
+ extern mutex_t buf_pool_zip_mutex;
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buf.ic innodb_plugin-1.0.3_tmp/include/buf0buf.ic
+--- innodb_plugin-1.0.3_orig/include/buf0buf.ic 2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buf.ic 2009-03-19 18:51:20.000000000 +0900
+@@ -100,7 +100,8 @@
+ buf_page_t* bpage;
+ ib_uint64_t lsn;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&flush_list_mutex);
+
+ bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+@@ -111,7 +112,8 @@
+ lsn = bpage->oldest_modification;
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&flush_list_mutex);
+
+ /* The returned answer may be out of date: the flush_list can
+ change after the mutex has been released. */
+@@ -128,7 +130,8 @@
+ /*====================*/
+ /* out: new clock value */
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+
+ buf_pool->ulint_clock++;
+
+@@ -246,7 +249,7 @@
+ case BUF_BLOCK_ZIP_FREE:
+ /* This is a free page in buf_pool->zip_free[].
+ Such pages should only be accessed by the buddy allocator. */
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+@@ -305,7 +308,7 @@
+ {
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ return(NULL);
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+@@ -410,7 +413,7 @@
+ buf_page_t* bpage, /* in/out: control block */
+ enum buf_io_fix io_fix) /* in: io_fix state */
+ {
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+ bpage->io_fix = io_fix;
+@@ -438,12 +441,13 @@
+ /*==================*/
+ const buf_page_t* bpage) /* control block being relocated */
+ {
+- ut_ad(buf_pool_mutex_own());
+- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+- ut_ad(buf_page_in_file(bpage));
+- ut_ad(bpage->in_LRU_list);
++ //ut_ad(buf_pool_mutex_own());
++ /* optimistic */
++ //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++ //ut_ad(buf_page_in_file(bpage));
++ //ut_ad(bpage->in_LRU_list);
+
+- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
++ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
+ && bpage->buf_fix_count == 0);
+ }
+
+@@ -472,7 +476,8 @@
+ ibool old) /* in: old */
+ {
+ ut_a(buf_page_in_file(bpage));
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&LRU_list_mutex));
+ ut_ad(bpage->in_LRU_list);
+
+ #ifdef UNIV_LRU_DEBUG
+@@ -728,17 +733,17 @@
+ /*===========*/
+ buf_block_t* block) /* in, own: block to be freed */
+ {
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
+
+ mutex_enter(&block->mutex);
+
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, FALSE);
+
+ mutex_exit(&block->mutex);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
+ }
+
+ /*************************************************************************
+@@ -783,14 +788,17 @@
+ buf_page_t* bpage) /* in: buf_pool block, must be bufferfixed */
+ {
+ ibool io_fixed;
++ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(block_mutex);
+
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(bpage->buf_fix_count > 0);
+
+ io_fixed = buf_page_get_io_fix(bpage) != BUF_IO_NONE;
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(block_mutex);
+
+ return(io_fixed);
+ }
+@@ -917,7 +925,11 @@
+ ulint fold;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
++ || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
++#endif
+
+ /* Look for the page in the hash table */
+
+@@ -966,11 +978,13 @@
+ {
+ const buf_page_t* bpage;
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ rw_lock_s_lock(&page_hash_latch);
+
+ bpage = buf_page_hash_get(space, offset);
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ rw_lock_s_unlock(&page_hash_latch);
+
+ return(bpage != NULL);
+ }
+@@ -1033,12 +1047,17 @@
+ ut_a(block->page.buf_fix_count > 0);
+
+ if (rw_latch == RW_X_LATCH && mtr->modifications) {
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&flush_list_mutex);
++ mutex_enter(&block->mutex);
++ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ buf_flush_note_modification(block, mtr);
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&flush_list_mutex);
+ }
+-
++ else {
+ mutex_enter(&block->mutex);
++ }
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&(block->debug_latch));
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0flu.ic innodb_plugin-1.0.3_tmp/include/buf0flu.ic
+--- innodb_plugin-1.0.3_orig/include/buf0flu.ic 2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0flu.ic 2009-03-19 18:51:20.000000000 +0900
+@@ -59,7 +59,8 @@
+ #ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+- ut_ad(buf_pool_mutex_own());
++ //ut_ad(buf_pool_mutex_own());
++ ut_ad(mutex_own(&flush_list_mutex));
+
+ ut_ad(mtr->start_lsn != 0);
+ ut_ad(mtr->modifications);
+@@ -99,7 +100,8 @@
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+- buf_pool_mutex_enter();
++ //buf_pool_mutex_enter();
++ mutex_enter(&flush_list_mutex);
+
+ ut_ad(block->page.newest_modification <= end_lsn);
+
+@@ -116,5 +118,6 @@
+ ut_ad(block->page.oldest_modification <= start_lsn);
+ }
+
+- buf_pool_mutex_exit();
++ //buf_pool_mutex_exit();
++ mutex_exit(&flush_list_mutex);
+ }
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0lru.h innodb_plugin-1.0.3_tmp/include/buf0lru.h
+--- innodb_plugin-1.0.3_orig/include/buf0lru.h 2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0lru.h 2009-03-19 18:51:20.000000000 +0900
+@@ -122,10 +122,11 @@
+ buf_page_t* bpage, /* in: block to be freed */
+ ibool zip, /* in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+- ibool* buf_pool_mutex_released);
++ ibool* buf_pool_mutex_released,
+ /* in: pointer to a variable that will
+ be assigned TRUE if buf_pool_mutex
+ was temporarily released, or NULL */
++ ibool have_LRU_mutex);
+ /**********************************************************************
+ Try to free a replaceable block. */
+ UNIV_INTERN
+@@ -169,7 +170,8 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+- buf_block_t* block); /* in: block, must not contain a file page */
++ buf_block_t* block, /* in: block, must not contain a file page */
++ ibool have_page_hash_mutex);
+ /**********************************************************************
+ Adds a block to the LRU list. */
+ UNIV_INTERN
+diff -ruN innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.result innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.result
+--- innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.result 1970-01-01 09:00:00.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.result 2009-03-19 18:51:20.000000000 +0900
+@@ -0,0 +1,2 @@
++SET GLOBAL innodb_file_format='Barracuda';
++SET GLOBAL innodb_file_per_table=ON;
+diff -ruN innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.test innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.test
+--- innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.test 1970-01-01 09:00:00.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.test 2009-03-19 18:51:20.000000000 +0900
+@@ -0,0 +1,38 @@
++-- source include/have_innodb.inc
++
++SET GLOBAL innodb_file_format='Barracuda';
++SET GLOBAL innodb_file_per_table=ON;
++
++-- disable_query_log
++-- disable_result_log
++
++DROP TABLE IF EXISTS `test1`;
++CREATE TABLE IF NOT EXISTS `test1` (
++ `a` int primary key auto_increment,
++ `b` int default 0,
++ `c` char(100) default 'testtest'
++) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
++
++delimiter |;
++CREATE PROCEDURE insert_many(p1 int)
++BEGIN
++SET @x = 0;
++SET @y = 0;
++REPEAT
++ insert into test1 set b=1;
++ SET @x = @x + 1;
++ SET @y = @y + 1;
++ IF @y >= 100 THEN
++ commit;
++ SET @y = 0;
++ END IF;
++UNTIL @x >= p1 END REPEAT;
++END|
++delimiter ;|
++call insert_many(100000);
++DROP PROCEDURE insert_many;
++
++# The bug is hangup at the following statement
++ALTER TABLE test1 ENGINE=MyISAM;
++
++DROP TABLE test1;
=== added file 'series'
--- series 1970-01-01 00:00:00 +0000
+++ series 2009-03-19 07:29:08 +0000
@@ -0,0 +1,9 @@
+innodb_show_enhancements.patch
+innodb_show_status.patch
+innodb_io_patches.patch
+innodb_rw_lock.patch
+innodb_opt_lru_count.patch
+i_s_innodb_buffer_pool_pages.patch
+innodb_split_buf_pool_mutex.patch
+innodb_expand_undo_slots.patch
+innodb_extra_rseg.patch