← Back to team overview

percona-discussion team mailing list archive

[Merge] lp:~percona-dev/percona-xtradb/porting-1.0.3 into lp:~percona-dev/percona-xtradb/extensions-1.0

 

Percona has proposed merging lp:~percona-dev/percona-xtradb/porting-1.0.3 into lp:~percona-dev/percona-xtradb/extensions-1.0.

Requested reviews:
    Percona developers (percona-dev)

rw_locks for 1.0.3 plugin
-- 
https://code.launchpad.net/~percona-dev/percona-xtradb/porting-1.0.3/+merge/4919
Your team Percona developers is subscribed to branch lp:~percona-dev/percona-xtradb/porting-1.0.3.
=== added file 'i_s_innodb_buffer_pool_pages.patch'
--- i_s_innodb_buffer_pool_pages.patch	1970-01-01 00:00:00 +0000
+++ i_s_innodb_buffer_pool_pages.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,818 @@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc	2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc	2009-03-18 18:15:09.000000000 +0900
+@@ -10043,6 +10043,9 @@
+   innobase_system_variables, /* system variables */
+   NULL /* reserved */
+ },
++i_s_innodb_buffer_pool_pages,
++i_s_innodb_buffer_pool_pages_index,
++i_s_innodb_buffer_pool_pages_blob,
+ i_s_innodb_trx,
+ i_s_innodb_locks,
+ i_s_innodb_lock_waits,
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc	2009-03-18 18:08:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc	2009-03-18 18:15:09.000000000 +0900
+@@ -41,6 +41,16 @@
+ #include "buf0buf.h" /* for buf_pool and PAGE_ZIP_MIN_SIZE */
+ #include "ha_prototypes.h" /* for innobase_convert_name() */
+ #include "srv0start.h" /* for srv_was_started */
++#include "btr0btr.h" /* for btr_page_get_index_id */
++#include "dict0dict.h" /* for dict_index_get_if_in_cache */
++/* from buf0buf.c */
++struct buf_chunk_struct{
++	ulint		mem_size;	/* allocated size of the chunk */
++	ulint		size;		/* size of frames[] and blocks[] */
++	void*		mem;		/* pointer to the memory area which
++					was allocated for the frames */
++	buf_block_t*	blocks;		/* array of buffer control blocks */
++};
+ }
+ #include "handler0vars.h"
+ 
+@@ -378,6 +388,751 @@
+ };
+ 
+ 
++static ST_FIELD_INFO	i_s_innodb_buffer_pool_pages_fields_info[] =
++{
++	{STRUCT_FLD(field_name,		"page_type"),
++	 STRUCT_FLD(field_length,	64),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"space_id"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"page_no"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"lru_position"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"fix_count"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"flush_type"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO	i_s_innodb_buffer_pool_pages_index_fields_info[] =
++{
++	{STRUCT_FLD(field_name,		"schema_name"),
++	 STRUCT_FLD(field_length,	64),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_MAYBE_NULL),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"table_name"),
++	 STRUCT_FLD(field_length,	64),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	0),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"index_name"),
++	 STRUCT_FLD(field_length,	64),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	0),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"space_id"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"page_no"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"n_recs"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"data_size"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"hashed"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"accessed"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"modified"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"dirty"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"old"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"lru_position"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"fix_count"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"flush_type"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO	i_s_innodb_buffer_pool_pages_blob_fields_info[] =
++{
++	{STRUCT_FLD(field_name,		"space_id"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"page_no"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"compressed"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"part_len"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"next_page_no"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"lru_position"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"fix_count"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"flush_type"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	END_OF_ST_FIELD_INFO
++};
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_fill(
++/*================*/
++				/* out: 0 on success, 1 on failure */
++	THD*		thd,	/* in: thread */
++	TABLE_LIST*	tables,	/* in/out: tables to fill */
++	COND*		cond)	/* in: condition (ignored) */
++{
++	TABLE*	table	= (TABLE *) tables->table;
++	int	status	= 0;
++
++  ulint		n_chunks, n_blocks;
++
++	buf_chunk_t*	chunk;
++
++	DBUG_ENTER("i_s_innodb_buffer_pool_pages_fill");
++
++	/* deny access to non-superusers */
++	if (check_global_access(thd, PROCESS_ACL)) {
++
++		DBUG_RETURN(0);
++	}
++
++	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++	buf_pool_mutex_enter();
++  mutex_enter(&(dict_sys->mutex));
++	
++	chunk = buf_pool->chunks;
++  
++	for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++		buf_block_t*	block		= chunk->blocks;
++
++    for (n_blocks	= chunk->size; n_blocks--; block++) {
++      const buf_frame_t* frame = block->frame;
++  
++      char page_type[64];
++
++      switch(fil_page_get_type(frame))
++      {
++      case FIL_PAGE_INDEX:
++        strcpy(page_type, "index");
++        break;
++      case FIL_PAGE_UNDO_LOG:
++        strcpy(page_type, "undo_log");
++        break;
++      case FIL_PAGE_INODE:
++        strcpy(page_type, "inode");
++        break;
++      case FIL_PAGE_IBUF_FREE_LIST:
++        strcpy(page_type, "ibuf_free_list");
++        break;
++      case FIL_PAGE_TYPE_ALLOCATED:
++        strcpy(page_type, "allocated");
++        break;
++      case FIL_PAGE_IBUF_BITMAP:
++        strcpy(page_type, "bitmap");
++        break;
++      case FIL_PAGE_TYPE_SYS:
++        strcpy(page_type, "sys");
++        break;
++      case FIL_PAGE_TYPE_TRX_SYS:
++        strcpy(page_type, "trx_sys");
++        break;
++      case FIL_PAGE_TYPE_FSP_HDR:
++        strcpy(page_type, "fsp_hdr");
++        break;
++      case FIL_PAGE_TYPE_XDES:
++        strcpy(page_type, "xdes");
++        break;
++      case FIL_PAGE_TYPE_BLOB:
++        strcpy(page_type, "blob");
++        break;
++      case FIL_PAGE_TYPE_ZBLOB:
++        strcpy(page_type, "zblob");
++        break;
++      case FIL_PAGE_TYPE_ZBLOB2:
++        strcpy(page_type, "zblob2");
++        break;
++      default:
++        sprintf(page_type, "unknown (type=%li)", fil_page_get_type(frame));
++      }
++      
++      field_store_string(table->field[0], page_type);
++      table->field[1]->store(block->page.space);
++      table->field[2]->store(block->page.offset);
++      table->field[3]->store(block->page.LRU_position);
++      table->field[4]->store(block->page.buf_fix_count);
++      table->field[5]->store(block->page.flush_type);
++
++      if (schema_table_store_record(thd, table)) {
++        status = 1;
++        break;
++      }
++      
++    }      
++	}
++
++  mutex_exit(&(dict_sys->mutex));
++	buf_pool_mutex_exit();
++
++	DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_fill(
++/*================*/
++				/* out: 0 on success, 1 on failure */
++	THD*		thd,	/* in: thread */
++	TABLE_LIST*	tables,	/* in/out: tables to fill */
++	COND*		cond)	/* in: condition (ignored) */
++{
++	TABLE*	table	= (TABLE *) tables->table;
++	int	status	= 0;
++
++  ulint		n_chunks, n_blocks;
++  dict_index_t*	index;
++  dulint		index_id;
++
++  char *p;
++  char db_name_raw[NAME_LEN*5+1], db_name[NAME_LEN+1];
++  char table_name_raw[NAME_LEN*5+1], table_name[NAME_LEN+1];
++
++	buf_chunk_t*	chunk;
++
++	DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill");
++
++	/* deny access to non-superusers */
++	if (check_global_access(thd, PROCESS_ACL)) {
++
++		DBUG_RETURN(0);
++	}
++
++	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++	buf_pool_mutex_enter();
++  mutex_enter(&(dict_sys->mutex));
++	
++	chunk = buf_pool->chunks;
++  
++	for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++		buf_block_t*	block		= chunk->blocks;
++
++		for (n_blocks	= chunk->size; n_blocks--; block++) {
++			const buf_frame_t* frame = block->frame;
++  
++      if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
++        index_id = btr_page_get_index_id(frame);
++      	index = dict_index_get_if_in_cache_low(index_id);
++      	if(index)
++        {
++          if((p = strchr(index->table_name, '/')))
++          {
++            strncpy(db_name_raw, index->table_name, p-index->table_name);
++            db_name_raw[p-index->table_name] = 0;
++            filename_to_tablename(db_name_raw, db_name, sizeof(db_name));
++            field_store_string(table->field[0], db_name);
++            p++;            
++          } else {
++            field_store_string(table->field[0], NULL);
++            p = (char *)index->table_name;
++          }
++          strcpy(table_name_raw, p);
++          filename_to_tablename(table_name_raw, table_name, sizeof(table_name));
++          field_store_string(table->field[1], table_name);
++          field_store_string(table->field[2], index->name);
++          
++          table->field[3]->store(block->page.space);
++          table->field[4]->store(block->page.offset);
++          table->field[5]->store(page_get_n_recs(frame));
++          table->field[6]->store(page_get_data_size(frame));
++          table->field[7]->store(block->is_hashed);
++          table->field[8]->store(block->page.accessed);
++          table->field[9]->store(block->page.newest_modification != 0);
++          table->field[10]->store(block->page.oldest_modification != 0);
++          table->field[11]->store(block->page.old);
++          table->field[12]->store(block->page.LRU_position);
++          table->field[13]->store(block->page.buf_fix_count);
++          table->field[14]->store(block->page.flush_type);
++          
++          if (schema_table_store_record(thd, table)) {
++            status = 1;
++            break;
++          }
++        }
++      }      
++    }
++	}
++
++  mutex_exit(&(dict_sys->mutex));
++	buf_pool_mutex_exit();
++
++	DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_fill(
++/*================*/
++				/* out: 0 on success, 1 on failure */
++	THD*		thd,	/* in: thread */
++	TABLE_LIST*	tables,	/* in/out: tables to fill */
++	COND*		cond)	/* in: condition (ignored) */
++{
++	TABLE*	table	= (TABLE *) tables->table;
++	int	status	= 0;
++
++  ulint		n_chunks, n_blocks;
++	buf_chunk_t*	chunk;
++	page_zip_des_t*	block_page_zip;
++
++	ulint		part_len;
++	ulint		next_page_no;
++
++	DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_fill");
++
++	/* deny access to non-superusers */
++	if (check_global_access(thd, PROCESS_ACL)) {
++
++		DBUG_RETURN(0);
++	}
++
++	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++	buf_pool_mutex_enter();
++  mutex_enter(&(dict_sys->mutex));
++	
++	chunk = buf_pool->chunks;
++    
++	for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++		buf_block_t*	block		= chunk->blocks;
++    block_page_zip = buf_block_get_page_zip(block);
++
++    for (n_blocks	= chunk->size; n_blocks--; block++) {
++      const buf_frame_t* frame = block->frame;
++
++      if (fil_page_get_type(frame) == FIL_PAGE_TYPE_BLOB) {
++
++        if (UNIV_LIKELY_NULL(block_page_zip)) {
++          part_len = 0; /* hmm, can't figure it out */
++  
++          next_page_no = mach_read_from_4(
++            buf_block_get_frame(block)
++            + FIL_PAGE_NEXT);        
++        } else {
++          part_len = mach_read_from_4(
++            buf_block_get_frame(block)
++            + FIL_PAGE_DATA
++            + 0 /*BTR_BLOB_HDR_PART_LEN*/);
++  
++          next_page_no = mach_read_from_4(
++            buf_block_get_frame(block)
++            + FIL_PAGE_DATA
++            + 4 /*BTR_BLOB_HDR_NEXT_PAGE_NO*/);
++        }
++
++        table->field[0]->store(block->page.space);
++        table->field[1]->store(block->page.offset);
++        table->field[2]->store(block_page_zip != NULL);
++        table->field[3]->store(part_len);
++
++        if(next_page_no == FIL_NULL)
++        {
++          table->field[4]->store(0);
++        } else {
++          table->field[4]->store(block->page.offset);
++        }
++
++        table->field[5]->store(block->page.LRU_position);
++        table->field[6]->store(block->page.buf_fix_count);
++        table->field[7]->store(block->page.flush_type);
++  
++        if (schema_table_store_record(thd, table)) {
++          status = 1;
++          break;
++        }
++
++      }
++    }      
++	}
++
++  mutex_exit(&(dict_sys->mutex));
++	buf_pool_mutex_exit();
++
++	DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_init(
++/*=========*/
++			/* out: 0 on success */
++	void*	p)	/* in/out: table schema object */
++{
++	DBUG_ENTER("i_s_innodb_buffer_pool_pages_init");
++	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++	schema->fields_info = i_s_innodb_buffer_pool_pages_fields_info;
++	schema->fill_table = i_s_innodb_buffer_pool_pages_fill;
++
++	DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_init(
++/*=========*/
++			/* out: 0 on success */
++	void*	p)	/* in/out: table schema object */
++{
++	DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_init");
++	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++	schema->fields_info = i_s_innodb_buffer_pool_pages_index_fields_info;
++	schema->fill_table = i_s_innodb_buffer_pool_pages_index_fill;
++
++	DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_init(
++/*=========*/
++			/* out: 0 on success */
++	void*	p)	/* in/out: table schema object */
++{
++	DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_init");
++	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++	schema->fields_info = i_s_innodb_buffer_pool_pages_blob_fields_info;
++	schema->fill_table = i_s_innodb_buffer_pool_pages_blob_fill;
++
++	DBUG_RETURN(0);
++}
++
++
++UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_pool_pages =
++{
++	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
++	/* int */
++	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++	/* pointer to type-specific plugin descriptor */
++	/* void* */
++	STRUCT_FLD(info, &i_s_info),
++
++	/* plugin name */
++	/* const char* */
++	STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES"),
++
++	/* plugin author (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(author, plugin_author),
++
++	/* general descriptive text (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(descr, "InnoDB buffer pool pages"),
++
++	/* the plugin license (PLUGIN_LICENSE_XXX) */
++	/* int */
++	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++	/* the function to invoke when plugin is loaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_init),
++
++	/* the function to invoke when plugin is unloaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(deinit, i_s_common_deinit),
++
++	/* plugin version (for SHOW PLUGINS) */
++	/* unsigned int */
++	STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++	/* struct st_mysql_show_var* */
++	STRUCT_FLD(status_vars, NULL),
++
++	/* struct st_mysql_sys_var** */
++	STRUCT_FLD(system_vars, NULL),
++
++	/* reserved for dependency checking */
++	/* void* */
++	STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_pool_pages_index =
++{
++	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
++	/* int */
++	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++	/* pointer to type-specific plugin descriptor */
++	/* void* */
++	STRUCT_FLD(info, &i_s_info),
++
++	/* plugin name */
++	/* const char* */
++	STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_INDEX"),
++
++	/* plugin author (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(author, plugin_author),
++
++	/* general descriptive text (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(descr, "InnoDB buffer pool index pages"),
++
++	/* the plugin license (PLUGIN_LICENSE_XXX) */
++	/* int */
++	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++	/* the function to invoke when plugin is loaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_index_init),
++
++	/* the function to invoke when plugin is unloaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(deinit, i_s_common_deinit),
++
++	/* plugin version (for SHOW PLUGINS) */
++	/* unsigned int */
++	STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++	/* struct st_mysql_show_var* */
++	STRUCT_FLD(status_vars, NULL),
++
++	/* struct st_mysql_sys_var** */
++	STRUCT_FLD(system_vars, NULL),
++
++	/* reserved for dependency checking */
++	/* void* */
++	STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_pool_pages_blob =
++{
++	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
++	/* int */
++	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++	/* pointer to type-specific plugin descriptor */
++	/* void* */
++	STRUCT_FLD(info, &i_s_info),
++
++	/* plugin name */
++	/* const char* */
++	STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_BLOB"),
++
++	/* plugin author (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(author, plugin_author),
++
++	/* general descriptive text (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(descr, "InnoDB buffer pool blob pages"),
++
++	/* the plugin license (PLUGIN_LICENSE_XXX) */
++	/* int */
++	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++	/* the function to invoke when plugin is loaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_blob_init),
++
++	/* the function to invoke when plugin is unloaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(deinit, i_s_common_deinit),
++
++	/* plugin version (for SHOW PLUGINS) */
++	/* unsigned int */
++	STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++	/* struct st_mysql_show_var* */
++	STRUCT_FLD(status_vars, NULL),
++
++	/* struct st_mysql_sys_var** */
++	STRUCT_FLD(system_vars, NULL),
++
++	/* reserved for dependency checking */
++	/* void* */
++	STRUCT_FLD(__reserved1, NULL)
++};
++
++
+ /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */
+ static ST_FIELD_INFO	innodb_trx_fields_info[] =
+ {
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.h innodb_plugin-1.0.3_tmp/handler/i_s.h
+--- innodb_plugin-1.0.3_orig/handler/i_s.h	2009-03-18 18:08:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.h	2009-03-18 18:15:09.000000000 +0900
+@@ -25,6 +25,9 @@
+ #ifndef i_s_h
+ #define i_s_h
+ 
++extern struct st_mysql_plugin	i_s_innodb_buffer_pool_pages;
++extern struct st_mysql_plugin	i_s_innodb_buffer_pool_pages_index;
++extern struct st_mysql_plugin	i_s_innodb_buffer_pool_pages_blob;
+ extern struct st_mysql_plugin	i_s_innodb_trx;
+ extern struct st_mysql_plugin	i_s_innodb_locks;
+ extern struct st_mysql_plugin	i_s_innodb_lock_waits;
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-18 18:12:58.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-18 18:15:09.000000000 +0900
+@@ -28,5 +28,6 @@
+ {"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql
+--- innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql	2009-03-18 18:08:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql	2009-03-18 18:15:09.000000000 +0900
+@@ -8,3 +8,6 @@
+ INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.so';
+ INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.so';
+ INSTALL PLUGIN XTRADB_ENHANCEMENTS SONAME 'ha_innodb.so';
++INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES SONAME 'ha_innodb.so';
++INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_BLOB SONAME 'ha_innodb.so';
++INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_INDEX SONAME 'ha_innodb.so';

=== added file 'innodb_expand_undo_slots.patch'
--- innodb_expand_undo_slots.patch	1970-01-01 00:00:00 +0000
+++ innodb_expand_undo_slots.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,143 @@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc	2009-03-18 18:19:52.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc	2009-03-19 16:14:28.000000000 +0900
+@@ -174,6 +174,7 @@
+ #endif /* UNIV_LOG_ARCHIVE */
+ static my_bool	innobase_use_doublewrite		= TRUE;
+ static my_bool	innobase_use_checksums			= TRUE;
++static my_bool	innobase_extra_undoslots		= FALSE;
+ static my_bool	innobase_locks_unsafe_for_binlog	= FALSE;
+ static my_bool	innobase_rollback_on_timeout		= FALSE;
+ static my_bool	innobase_create_status_file		= FALSE;
+@@ -2002,6 +2003,8 @@
+ 		goto error;
+ 	}
+ 
++	srv_extra_undoslots = (ibool) innobase_extra_undoslots;
++
+ 	/* -------------- Log files ---------------------------*/
+ 
+ 	/* The default dir for log files is the datadir of MySQL */
+@@ -9499,6 +9502,13 @@
+   "The common part for InnoDB table spaces.",
+   NULL, NULL, NULL);
+ 
++static MYSQL_SYSVAR_BOOL(extra_undoslots, innobase_extra_undoslots,
++  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++  "Enable to use about 4000 undo slots instead of default 1024. "
++  "#### Attention: Once you enable this parameter, "
++  "don't use the datafile for normal mysqld or ibbackup! ####",
++  NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
+   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+   "Enable InnoDB doublewrite buffer (enabled by default). "
+@@ -9813,6 +9823,7 @@
+   MYSQL_SYSVAR(data_file_path),
+   MYSQL_SYSVAR(data_home_dir),
+   MYSQL_SYSVAR(doublewrite),
++  MYSQL_SYSVAR(extra_undoslots),
+   MYSQL_SYSVAR(fast_shutdown),
+   MYSQL_SYSVAR(file_io_threads),
+   MYSQL_SYSVAR(file_per_table),
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-19 16:13:38.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-19 16:14:28.000000000 +0900
+@@ -30,5 +30,6 @@
+ {"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_expand_undo_slots","expandable maximum number of undo slots","from 1024 (default) to about 4000","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h	2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h	2009-03-19 16:14:28.000000000 +0900
+@@ -95,6 +95,8 @@
+ extern ulint*	srv_data_file_sizes;
+ extern ulint*	srv_data_file_is_raw_partition;
+ 
++extern ibool	srv_extra_undoslots;
++
+ extern ibool	srv_auto_extend_last_data_file;
+ extern ulint	srv_last_file_size_max;
+ extern ulong	srv_auto_extend_increment;
+diff -ruN innodb_plugin-1.0.3_orig/include/trx0rseg.h innodb_plugin-1.0.3_tmp/include/trx0rseg.h
+--- innodb_plugin-1.0.3_orig/include/trx0rseg.h	2009-02-17 18:41:24.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/trx0rseg.h	2009-03-19 16:14:28.000000000 +0900
+@@ -131,8 +131,11 @@
+ 	mtr_t*	mtr);		/* in: mtr */
+ 
+ 
++/* Real max value may be 4076 in usual. But reserve 4 slot for safety or etc... */
++#define TRX_RSEG_N_EXTRA_SLOTS	(((UNIV_PAGE_SIZE - (FIL_PAGE_DATA + FIL_PAGE_DATA_END + TRX_RSEG_UNDO_SLOTS)) / TRX_RSEG_SLOT_SIZE) - 4)
++
+ /* Number of undo log slots in a rollback segment file copy */
+-#define TRX_RSEG_N_SLOTS	(UNIV_PAGE_SIZE / 16)
++#define TRX_RSEG_N_SLOTS	(srv_extra_undoslots ? TRX_RSEG_N_EXTRA_SLOTS : (UNIV_PAGE_SIZE / 16))
+ 
+ /* Maximum number of transactions supported by a single rollback segment */
+ #define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c	2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c	2009-03-19 16:14:28.000000000 +0900
+@@ -131,6 +131,8 @@
+ /* size in database pages */
+ UNIV_INTERN ulint*	srv_data_file_sizes = NULL;
+ 
++UNIV_INTERN ibool	srv_extra_undoslots = FALSE;
++
+ /* if TRUE, then we auto-extend the last data file */
+ UNIV_INTERN ibool	srv_auto_extend_last_data_file	= FALSE;
+ /* if != 0, this tells the max size auto-extending may increase the
+diff -ruN innodb_plugin-1.0.3_orig/trx/trx0undo.c innodb_plugin-1.0.3_tmp/trx/trx0undo.c
+--- innodb_plugin-1.0.3_orig/trx/trx0undo.c	2009-02-17 19:12:56.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/trx/trx0undo.c	2009-03-19 16:14:28.000000000 +0900
+@@ -1382,9 +1382,47 @@
+ 	rseg_header = trx_rsegf_get_new(rseg->space, rseg->zip_size,
+ 					rseg->page_no, &mtr);
+ 
++	if (!srv_extra_undoslots) {
++		/* uses direct call for avoid "Assertion failure" */
++		//page_no = trx_rsegf_get_nth_undo(rseg_header, TRX_RSEG_N_EXTRA_SLOTS - 1, &mtr);
++		page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS
++					 + (TRX_RSEG_N_EXTRA_SLOTS - 1) * TRX_RSEG_SLOT_SIZE,
++					 MLOG_4BYTES, &mtr);
++		if (page_no != 0) {
++			/* check extended slots are not used */
++			for (i = TRX_RSEG_N_SLOTS; i < TRX_RSEG_N_EXTRA_SLOTS; i++) {
++				/* uses direct call for avoid "Assertion failure" */
++				page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS
++							 + i * TRX_RSEG_SLOT_SIZE,
++							 MLOG_4BYTES, &mtr);
++				if (page_no != FIL_NULL) {
++					srv_extra_undoslots = TRUE;
++					fprintf(stderr,
++"InnoDB: Error: innodb_extra_undoslots option is disabled, but it was enabled before.\n"
++"InnoDB: The datafile is not normal for mysqld and disabled innodb_extra_undoslots.\n"
++"InnoDB: Enable innodb_extra_undoslots if it was enabled before, and\n"
++"InnoDB: ### don't use this datafile with other mysqld or ibbackup! ###\n"
++"InnoDB: Cannot continue operation for the safety. Calling exit(1).\n");
++					exit(1);
++				}
++			}
++			fprintf(stderr,
++"InnoDB: Warning: innodb_extra_undoslots option is disabled, but it was  enabled before.\n"
++"InnoDB: But extended undo slots seem not used, so continue operation.\n");
++		}
++	}
++
+ 	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+ 		page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
+ 
++		/* If it was not initialized when the datafile created,
++		page_no will be 0 for the extended slots after that */
++
++		if (page_no == 0) {
++			page_no = FIL_NULL;
++			trx_rsegf_set_nth_undo(rseg_header, i, page_no, &mtr);
++		}
++
+ 		/* In forced recovery: try to avoid operations which look
+ 		at database pages; undo logs are rapidly changing data, and
+ 		the probability that they are in an inconsistent state is

=== added file 'innodb_extra_rseg.patch'
--- innodb_extra_rseg.patch	1970-01-01 00:00:00 +0000
+++ innodb_extra_rseg.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,326 @@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc	2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc	2009-03-19 16:20:33.000000000 +0900
+@@ -9813,6 +9813,11 @@
+   "Number of background write I/O threads in InnoDB.",
+   NULL, NULL, 1, 1, 64, 0);
+ 
++static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "Number of extra user rollback segments when create new database.",
++  NULL, NULL, 0, 0, 127, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+@@ -9872,6 +9877,7 @@
+   MYSQL_SYSVAR(enable_unsafe_group_commit),
+   MYSQL_SYSVAR(read_io_threads),
+   MYSQL_SYSVAR(write_io_threads),
++  MYSQL_SYSVAR(extra_rsegments),
+   MYSQL_SYSVAR(use_sys_malloc),
+   MYSQL_SYSVAR(change_buffering),
+   NULL
+@@ -10054,6 +10060,7 @@
+   innobase_system_variables, /* system variables */
+   NULL /* reserved */
+ },
++i_s_innodb_rseg,
+ i_s_innodb_buffer_pool_pages,
+ i_s_innodb_buffer_pool_pages_index,
+ i_s_innodb_buffer_pool_pages_blob,
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc	2009-03-19 16:13:38.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc	2009-03-19 16:18:06.000000000 +0900
+@@ -43,6 +43,8 @@
+ #include "srv0start.h" /* for srv_was_started */
+ #include "btr0btr.h" /* for btr_page_get_index_id */
+ #include "dict0dict.h" /* for dict_index_get_if_in_cache */
++#include "trx0rseg.h" /* for trx_rseg_struct */
++#include "trx0sys.h" /* for trx_sys */
+ /* from buf0buf.c */
+ struct buf_chunk_struct{
+ 	ulint		mem_size;	/* allocated size of the chunk */
+@@ -2490,3 +2492,166 @@
+ 
+ 	DBUG_RETURN(0);
+ }
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO	i_s_innodb_rseg_fields_info[] =
++{
++	{STRUCT_FLD(field_name,		"rseg_id"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"space_id"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"zip_size"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"page_no"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"max_size"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	{STRUCT_FLD(field_name,		"curr_size"),
++	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	END_OF_ST_FIELD_INFO
++};
++
++static
++int
++i_s_innodb_rseg_fill(
++/*=================*/
++	THD*		thd,	/* in: thread */
++	TABLE_LIST*	tables,	/* in/out: tables to fill */
++	COND*		cond)	/* in: condition (ignored) */
++{
++	TABLE*	table	= (TABLE *) tables->table;
++	int	status	= 0;
++	trx_rseg_t*	rseg;
++
++	DBUG_ENTER("i_s_innodb_rseg_fill");
++
++	/* deny access to non-superusers */
++	if (check_global_access(thd, PROCESS_ACL)) {
++
++		DBUG_RETURN(0);
++	}
++
++	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++	rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
++
++	while (rseg) {
++		table->field[0]->store(rseg->id);
++		table->field[1]->store(rseg->space);
++		table->field[2]->store(rseg->zip_size);
++		table->field[3]->store(rseg->page_no);
++		table->field[4]->store(rseg->max_size);
++		table->field[5]->store(rseg->curr_size);
++
++		if (schema_table_store_record(thd, table)) {
++			status = 1;
++			break;
++		}
++
++		rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
++	}
++
++	DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_rseg_init(
++/*=================*/
++			/* out: 0 on success */
++	void*	p)	/* in/out: table schema object */
++{
++	DBUG_ENTER("i_s_innodb_rseg_init");
++	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++	schema->fields_info = i_s_innodb_rseg_fields_info;
++	schema->fill_table = i_s_innodb_rseg_fill;
++
++	DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin	i_s_innodb_rseg =
++{
++	/* the plugin type (a MYSQL_XXX_PLUGIN value) */
++	/* int */
++	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++	/* pointer to type-specific plugin descriptor */
++	/* void* */
++	STRUCT_FLD(info, &i_s_info),
++
++	/* plugin name */
++	/* const char* */
++	STRUCT_FLD(name, "INNODB_RSEG"),
++
++	/* plugin author (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(author, plugin_author),
++
++	/* general descriptive text (for SHOW PLUGINS) */
++	/* const char* */
++	STRUCT_FLD(descr, "InnoDB rollback segment information"),
++
++	/* the plugin license (PLUGIN_LICENSE_XXX) */
++	/* int */
++	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++	/* the function to invoke when plugin is loaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(init, i_s_innodb_rseg_init),
++
++	/* the function to invoke when plugin is unloaded */
++	/* int (*)(void*); */
++	STRUCT_FLD(deinit, i_s_common_deinit),
++
++	/* plugin version (for SHOW PLUGINS) */
++	/* unsigned int */
++	STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++	/* struct st_mysql_show_var* */
++	STRUCT_FLD(status_vars, NULL),
++
++	/* struct st_mysql_sys_var** */
++	STRUCT_FLD(system_vars, NULL),
++
++	/* reserved for dependency checking */
++	/* void* */
++	STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.h innodb_plugin-1.0.3_tmp/handler/i_s.h
+--- innodb_plugin-1.0.3_orig/handler/i_s.h	2009-03-18 18:19:52.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.h	2009-03-19 16:18:06.000000000 +0900
+@@ -36,5 +36,6 @@
+ extern struct st_mysql_plugin	i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin	i_s_innodb_cmpmem_reset;
+ extern struct st_mysql_plugin	i_s_innodb_patches;
++extern struct st_mysql_plugin	i_s_innodb_rseg;
+ 
+ #endif /* i_s_h */
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-19 16:18:06.000000000 +0900
+@@ -31,5 +31,6 @@
+ {"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_expand_undo_slots","expandable maximum number of undo slots","from 1024 (default) to about 4000","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_extra_rseg","allow to create extra rollback segments","When create new db, the new parameter allows to create more rollback segments","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h	2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h	2009-03-19 16:18:06.000000000 +0900
+@@ -184,6 +184,8 @@
+ extern ulint	srv_read_ahead;
+ extern ulint	srv_adaptive_checkpoint;
+ 
++extern ulint	srv_extra_rsegments;
++
+ /*-------------------------------------------*/
+ 
+ extern ulint	srv_n_rows_inserted;
+diff -ruN innodb_plugin-1.0.3_orig/include/trx0sys.h innodb_plugin-1.0.3_tmp/include/trx0sys.h
+--- innodb_plugin-1.0.3_orig/include/trx0sys.h	2009-02-17 18:41:24.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/trx0sys.h	2009-03-19 16:18:06.000000000 +0900
+@@ -122,6 +122,13 @@
+ void
+ trx_sys_create(void);
+ /*================*/
++/*********************************************************************
++Create extra rollback segments when create_new_db */
++UNIV_INTERN
++void
++trx_sys_create_extra_rseg(
++/*======================*/
++	ulint	num);	/* in: number of extra user rollback segments */
+ /********************************************************************
+ Looks for a free slot for a rollback segment in the trx system file copy. */
+ UNIV_INTERN
+diff -ruN innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql
+--- innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql	2009-03-18 18:19:52.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql	2009-03-19 16:18:06.000000000 +0900
+@@ -11,3 +11,4 @@
+ INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES SONAME 'ha_innodb.so';
+ INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_BLOB SONAME 'ha_innodb.so';
+ INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_INDEX SONAME 'ha_innodb.so';
++INSTALL PLUGIN innodb_rseg SONAME 'ha_innodb.so';
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c	2009-03-19 16:17:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c	2009-03-19 16:18:06.000000000 +0900
+@@ -362,6 +362,8 @@
+ UNIV_INTERN ulint	srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+ UNIV_INTERN ulint	srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
+ UNIV_INTERN ulint	srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
++
++UNIV_INTERN ulint	srv_extra_rsegments = 0; /* extra rseg for users */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong	srv_n_spin_wait_rounds	= 20;
+ UNIV_INTERN ulong	srv_n_free_tickets_to_enter = 500;
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0start.c innodb_plugin-1.0.3_tmp/srv/srv0start.c
+--- innodb_plugin-1.0.3_orig/srv/srv0start.c	2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0start.c	2009-03-19 16:18:06.000000000 +0900
+@@ -1486,6 +1486,8 @@
+ 		dict_create();
+ 		srv_startup_is_before_trx_rollback_phase = FALSE;
+ 
++		if (srv_extra_rsegments)
++			trx_sys_create_extra_rseg(srv_extra_rsegments);
+ #ifdef UNIV_LOG_ARCHIVE
+ 	} else if (srv_archive_recovery) {
+ 		fprintf(stderr,
+diff -ruN innodb_plugin-1.0.3_orig/trx/trx0sys.c innodb_plugin-1.0.3_tmp/trx/trx0sys.c
+--- innodb_plugin-1.0.3_orig/trx/trx0sys.c	2009-02-17 19:12:56.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/trx/trx0sys.c	2009-03-19 16:18:06.000000000 +0900
+@@ -1066,6 +1066,31 @@
+ }
+ 
+ /*********************************************************************
++Create extra rollback segments when create_new_db */
++UNIV_INTERN
++void
++trx_sys_create_extra_rseg(
++/*======================*/
++	ulint	num)	/* in: number of extra user rollback segments */
++{
++	mtr_t	mtr;
++	ulint	slot_no;
++	ulint	i;
++
++	/* Craete extra rollback segments */
++	mtr_start(&mtr);
++	for (i = 1; i < num + 1; i++) {
++		if(!trx_rseg_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no, &mtr)) {
++			fprintf(stderr,
++"InnoDB: Warning: Failed to create extra rollback segments.\n");
++			break;
++		}
++		ut_a(slot_no == i);
++	}
++	mtr_commit(&mtr);
++}
++
++/*********************************************************************
+ Update the file format tag. */
+ static
+ ibool

=== added file 'innodb_io_patches.patch'
--- innodb_io_patches.patch	1970-01-01 00:00:00 +0000
+++ innodb_io_patches.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,586 @@
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0flu.c innodb_plugin-1.0.3_tmp/buf/buf0flu.c
+--- innodb_plugin-1.0.3_orig/buf/buf0flu.c	2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0flu.c	2009-03-18 17:48:18.000000000 +0900
+@@ -1048,9 +1048,23 @@
+ 
+ 				old_page_count = page_count;
+ 
++				if (srv_flush_neighbor_pages) {
+ 				/* Try to flush also all the neighbors */
+ 				page_count += buf_flush_try_neighbors(
+ 					space, offset, flush_type);
++				} else {
++					/* Try to flush the page only */
++					buf_pool_mutex_enter();
++
++					mutex_t* block_mutex = buf_page_get_mutex(bpage);
++					mutex_enter(block_mutex);
++
++					buf_page_t* bpage_tmp = buf_page_hash_get(space, offset);
++					if (bpage_tmp) {
++						buf_flush_page(bpage_tmp, flush_type);
++						page_count++;
++					}
++				}
+ 				/* fprintf(stderr,
+ 				"Flush type %lu, page no %lu, neighb %lu\n",
+ 				flush_type, offset,
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0rea.c innodb_plugin-1.0.3_tmp/buf/buf0rea.c
+--- innodb_plugin-1.0.3_orig/buf/buf0rea.c	2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0rea.c	2009-03-18 17:48:18.000000000 +0900
+@@ -36,6 +36,7 @@
+ #include "os0file.h"
+ #include "srv0start.h"
+ 
++extern ulint srv_read_ahead;
+ extern ulint srv_read_ahead_rnd;
+ extern ulint srv_read_ahead_seq;
+ extern ulint srv_buf_pool_reads;
+@@ -203,6 +204,10 @@
+ 	ulint		i;
+ 	ulint		buf_read_ahead_random_area;
+ 
++	if (!(srv_read_ahead & 1)) {
++		return(0);
++	}
++
+ 	if (srv_startup_is_before_trx_rollback_phase) {
+ 		/* No read-ahead to avoid thread deadlocks */
+ 		return(0);
+@@ -428,6 +433,10 @@
+ 	const ulint	buf_read_ahead_linear_area
+ 		= BUF_READ_AHEAD_LINEAR_AREA;
+ 
++ 	if (!(srv_read_ahead & 2)) {
++ 		return(0);
++ 	}
++ 
+ 	if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
+ 		/* No read-ahead to avoid thread deadlocks */
+ 		return(0);
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc	2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc	2009-03-18 17:52:19.000000000 +0900
+@@ -143,6 +143,7 @@
+ 	innobase_force_recovery, innobase_open_files,
+ 	innobase_autoinc_lock_mode;
+ 
++static unsigned long innobase_read_io_threads, innobase_write_io_threads;
+ static long long innobase_buffer_pool_size, innobase_log_file_size;
+ 
+ /* The default values for the following char* start-up parameters
+@@ -2104,6 +2105,10 @@
+ 	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
+ 
+ 	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
++	srv_n_read_io_threads = (ulint) innobase_read_io_threads;
++	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
++
++	srv_read_ahead &= 3;
+ 
+ 	srv_force_recovery = (ulint) innobase_force_recovery;
+ 
+@@ -8879,6 +8884,10 @@
+ 	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
+ 	    (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
+ 	{
++		if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
++			/* choose group commit rather than binlog order */
++			return(0);
++		}
+ 
+ 		/* For ibbackup to work the order of transactions in binlog
+ 		and InnoDB must be the same. Consider the situation
+@@ -9716,6 +9725,84 @@
+   innodb_change_buffering_validate,
+   innodb_change_buffering_update, NULL);
+ 
++static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
++  PLUGIN_VAR_RQCMDARG,
++  "Number of IO operations per second the server can do. Tunes background IO rate.",
++  NULL, NULL, 100, 100, 999999999, 0);
++
++static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "The maximum size of the insert buffer. (in bytes)",
++  NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
++  NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
++  PLUGIN_VAR_RQCMDARG,
++  "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
++  NULL, NULL, 100, 100, 999999999, 0);
++
++static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
++  NULL, NULL, 1, 0, 1, 0);
++
++static
++void
++innodb_read_ahead_update(
++  THD* thd,
++  struct st_mysql_sys_var*     var,
++  void*        var_ptr,
++  const void*  save)
++{
++  *(long *)var_ptr= (*(long *)save) & 3;
++}
++const char *read_ahead_names[]=
++{
++  "none", /* 0 */
++  "random",
++  "linear",
++  "both", /* 3 */
++  /* For compatibility of the older patch */
++  "0", /* 4 ("none" + 4) */
++  "1",
++  "2",
++  "3", /* 7 ("both" + 4) */
++  NullS
++};
++TYPELIB read_ahead_typelib=
++{
++  array_elements(read_ahead_names) - 1, "read_ahead_typelib",
++  read_ahead_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
++  PLUGIN_VAR_RQCMDARG,
++  "Control read ahead activity. (none, random, linear, [both])",
++  NULL, innodb_read_ahead_update, 3, &read_ahead_typelib);
++
++static MYSQL_SYSVAR_ULONG(adaptive_checkpoint, srv_adaptive_checkpoint,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable flushing along modified age. 0:disable 1:enable",
++  NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
++  NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "Number of background read I/O threads in InnoDB.",
++  NULL, NULL, 1, 1, 64, 0);
++
++static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "Number of background write I/O threads in InnoDB.",
++  NULL, NULL, 1, 1, 64, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+@@ -9764,6 +9851,16 @@
+   MYSQL_SYSVAR(show_verbose_locks),
+   MYSQL_SYSVAR(show_locks_held),
+   MYSQL_SYSVAR(version),
++  MYSQL_SYSVAR(io_capacity),
++  MYSQL_SYSVAR(ibuf_max_size),
++  MYSQL_SYSVAR(ibuf_active_contract),
++  MYSQL_SYSVAR(ibuf_accel_rate),
++  MYSQL_SYSVAR(flush_neighbor_pages),
++  MYSQL_SYSVAR(read_ahead),
++  MYSQL_SYSVAR(adaptive_checkpoint),
++  MYSQL_SYSVAR(enable_unsafe_group_commit),
++  MYSQL_SYSVAR(read_io_threads),
++  MYSQL_SYSVAR(write_io_threads),
+   MYSQL_SYSVAR(use_sys_malloc),
+   MYSQL_SYSVAR(change_buffering),
+   NULL
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-18 17:48:18.000000000 +0900
+@@ -25,5 +25,6 @@
+ }innodb_enhancements[] = {
+ {"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/ibuf/ibuf0ibuf.c innodb_plugin-1.0.3_tmp/ibuf/ibuf0ibuf.c
+--- innodb_plugin-1.0.3_orig/ibuf/ibuf0ibuf.c	2009-02-17 17:55:41.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/ibuf/ibuf0ibuf.c	2009-03-18 17:48:18.000000000 +0900
+@@ -422,8 +422,10 @@
+ 	grow in size, as the references on the upper levels of the tree can
+ 	change */
+ 
+-	ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
+-		/ IBUF_POOL_SIZE_PER_MAX_SIZE;
++	ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
++		/ IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
++
++	srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
+ 
+ 	mutex_create(&ibuf_pessimistic_insert_mutex,
+ 		     SYNC_IBUF_PESS_INSERT_MUTEX);
+@@ -2247,11 +2249,13 @@
+ 
+ 	mutex_enter(&ibuf_mutex);
+ 
++	if (!srv_ibuf_active_contract) {
+ 	if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
+ 		mutex_exit(&ibuf_mutex);
+ 
+ 		return;
+ 	}
++	}
+ 
+ 	sync = FALSE;
+ 
+diff -ruN innodb_plugin-1.0.3_orig/include/os0file.h innodb_plugin-1.0.3_tmp/include/os0file.h
+--- innodb_plugin-1.0.3_orig/include/os0file.h	2009-02-17 18:18:35.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/os0file.h	2009-03-18 17:48:18.000000000 +0900
+@@ -577,8 +577,10 @@
+ /*========*/
+ 	ulint	n,		/* in: maximum number of pending aio operations
+ 				allowed; n must be divisible by n_segments */
+-	ulint	n_segments,	/* in: combined number of segments in the four
+-				first aio arrays; must be >= 4 */
++//	ulint	n_segments,	/* in: combined number of segments in the four
++//				first aio arrays; must be >= 4 */
++	ulint	n_read_threads,  /* n_segments == 2 + n_read_threads + n_write_threads */
++	ulint	n_write_threads, /**/
+ 	ulint	n_slots_sync);	/* in: number of slots in the sync aio array */
+ /***********************************************************************
+ Requests an asynchronous i/o operation. */
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h	2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h	2009-03-18 17:48:18.000000000 +0900
+@@ -126,6 +126,8 @@
+ extern ulint	srv_lock_table_size;
+ 
+ extern ulint	srv_n_file_io_threads;
++extern ulint	srv_n_read_io_threads;
++extern ulint	srv_n_write_io_threads;
+ 
+ #ifdef UNIV_LOG_ARCHIVE
+ extern ibool	srv_log_archive_on;
+@@ -170,6 +172,16 @@
+ extern ulong	srv_max_purge_lag;
+ 
+ extern ulong	srv_replication_delay;
++
++extern ulint	srv_io_capacity;
++extern long long	srv_ibuf_max_size;
++extern ulint	srv_ibuf_active_contract;
++extern ulint	srv_ibuf_accel_rate;
++extern ulint	srv_flush_neighbor_pages;
++extern ulint	srv_enable_unsafe_group_commit;
++extern ulint	srv_read_ahead;
++extern ulint	srv_adaptive_checkpoint;
++
+ /*-------------------------------------------*/
+ 
+ extern ulint	srv_n_rows_inserted;
+diff -ruN innodb_plugin-1.0.3_orig/log/log0log.c innodb_plugin-1.0.3_tmp/log/log0log.c
+--- innodb_plugin-1.0.3_orig/log/log0log.c	2009-02-17 18:50:12.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/log/log0log.c	2009-03-18 17:48:18.000000000 +0900
+@@ -3276,6 +3276,15 @@
+ 		log_sys->flushed_to_disk_lsn,
+ 		log_sys->last_checkpoint_lsn);
+ 
++	fprintf(file,
++		"Max checkpoint age  %lu\n"
++		"Modified age        %lu\n"
++		"Checkpoint age      %lu\n",
++			(ulong) log_sys->max_checkpoint_age,
++			(ulong) (log_sys->lsn -
++					log_buf_pool_get_oldest_modification()),
++			(ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
++
+ 	current_time = time(NULL);
+ 
+ 	time_elapsed = 0.001 + difftime(current_time,
+diff -ruN innodb_plugin-1.0.3_orig/os/os0file.c innodb_plugin-1.0.3_tmp/os/os0file.c
+--- innodb_plugin-1.0.3_orig/os/os0file.c	2009-02-17 18:53:58.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/os/os0file.c	2009-03-18 17:48:18.000000000 +0900
+@@ -2936,8 +2936,10 @@
+ /*========*/
+ 	ulint	n,		/* in: maximum number of pending aio operations
+ 				allowed; n must be divisible by n_segments */
+-	ulint	n_segments,	/* in: combined number of segments in the four
+-				first aio arrays; must be >= 4 */
++//	ulint	n_segments,	/* in: combined number of segments in the four
++//				first aio arrays; must be >= 4 */
++	ulint	n_read_threads,  /* n_segments == 2 + n_read_threads + n_write_threads*/
++	ulint	n_write_threads, /**/
+ 	ulint	n_slots_sync)	/* in: number of slots in the sync aio array */
+ {
+ 	ulint	n_read_segs;
+@@ -2945,6 +2947,8 @@
+ 	ulint	n_per_seg;
+ 	ulint	i;
+ 
++ 	ulint	n_segments = 2 + n_read_threads + n_write_threads;
++ 
+ 	ut_ad(n % n_segments == 0);
+ 	ut_ad(n_segments >= 4);
+ 
+@@ -2955,8 +2959,8 @@
+ 	}
+ 
+ 	n_per_seg = n / n_segments;
+-	n_write_segs = (n_segments - 2) / 2;
+-	n_read_segs = n_segments - 2 - n_write_segs;
++	n_write_segs = n_write_threads;
++	n_read_segs = n_read_threads;
+ 
+ 	/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
+ 
+@@ -3156,6 +3160,13 @@
+ 	OVERLAPPED*	control;
+ #endif
+ 	ulint		i;
++	ulint		prim_segment;
++	ulint		n;
++
++	n = array->n_slots / array->n_segments;
++	/* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
++	prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
++
+ loop:
+ 	os_mutex_enter(array->mutex);
+ 
+@@ -3174,6 +3185,16 @@
+ 		goto loop;
+ 	}
+ 
++	for (i = prim_segment * n; i < array->n_slots; i++) {
++		slot = os_aio_array_get_nth_slot(array, i);
++
++		if (slot->reserved == FALSE) {
++			break;
++		}
++	}
++
++	if (slot->reserved == TRUE){
++		/* Not found after the intended segment. So we should search before. */
+ 	for (i = 0;; i++) {
+ 		slot = os_aio_array_get_nth_slot(array, i);
+ 
+@@ -3181,6 +3202,7 @@
+ 			break;
+ 		}
+ 	}
++	}
+ 
+ 	array->n_reserved++;
+ 
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c	2009-03-18 17:44:46.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c	2009-03-18 17:48:18.000000000 +0900
+@@ -177,6 +177,8 @@
+ UNIV_INTERN ulint	srv_lock_table_size	= ULINT_MAX;
+ 
+ UNIV_INTERN ulint	srv_n_file_io_threads	= ULINT_MAX;
++UNIV_INTERN ulint	srv_n_read_io_threads	= 1;
++UNIV_INTERN ulint	srv_n_write_io_threads	= 1;
+ 
+ #ifdef UNIV_LOG_ARCHIVE
+ UNIV_INTERN ibool		srv_log_archive_on	= FALSE;
+@@ -341,6 +343,23 @@
+ 
+ UNIV_INTERN ulong	srv_replication_delay		= 0;
+ 
++UNIV_INTERN ulint	srv_io_capacity = 100;
++
++/* Returns the number of IO operations that is X percent of the capacity.
++PCT_IO(5) -> returns the number of IO operations that is 5% of the max
++where max is srv_io_capacity. */
++#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
++
++UNIV_INTERN long long	srv_ibuf_max_size = 0;
++UNIV_INTERN ulint	srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint	srv_ibuf_accel_rate = 100;
++#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
++
++UNIV_INTERN ulint	srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
++
++UNIV_INTERN ulint	srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint	srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
++UNIV_INTERN ulint	srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong	srv_n_spin_wait_rounds	= 20;
+ UNIV_INTERN ulong	srv_n_free_tickets_to_enter = 500;
+@@ -2325,6 +2344,8 @@
+ 	ibool		skip_sleep	= FALSE;
+ 	ulint		i;
+ 
++	ib_uint64_t	oldest_lsn;
++	
+ #ifdef UNIV_DEBUG_THREAD_CREATION
+ 	fprintf(stderr, "Master thread starts, id %lu\n",
+ 		os_thread_pf(os_thread_get_curr_id()));
+@@ -2412,10 +2433,10 @@
+ 			+ log_sys->n_pending_writes;
+ 		n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ 			+ buf_pool->n_pages_written;
+-		if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
++		if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) {
+ 			srv_main_thread_op_info = "doing insert buffer merge";
+ 			ibuf_contract_for_n_pages(
+-				TRUE, srv_insert_buffer_batch_size / 4);
++				TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size / 4)));
+ 
+ 			srv_main_thread_op_info = "flushing log";
+ 
+@@ -2428,7 +2449,7 @@
+ 			/* Try to keep the number of modified pages in the
+ 			buffer pool under the limit wished by the user */
+ 
+-			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ 			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ 							  IB_ULONGLONG_MAX);
+ 
+ 			/* If we had to do the flush, it may have taken
+@@ -2437,6 +2458,49 @@
+ 			iteration of this loop. */
+ 
+ 			skip_sleep = TRUE;
++		} else if (srv_adaptive_checkpoint) {
++
++			/* Try to keep modified age not to exceed
++			max_checkpoint_age * 7/8 line */
++
++			mutex_enter(&(log_sys->mutex));
++
++			oldest_lsn = buf_pool_get_oldest_modification();
++			if (oldest_lsn == 0) {
++
++				mutex_exit(&(log_sys->mutex));
++
++			} else {
++				if ((log_sys->lsn - oldest_lsn)
++				    > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
++					/* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
++					/* We should not flush from here. */
++					mutex_exit(&(log_sys->mutex));
++				} else if ((log_sys->lsn - oldest_lsn)
++				    > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) {
++
++					/* 2nd defence line (max_checkpoint_age * 3/4) */
++
++					mutex_exit(&(log_sys->mutex));
++
++					n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
++									  IB_ULONGLONG_MAX);
++					skip_sleep = TRUE;
++				} else if ((log_sys->lsn - oldest_lsn)
++					   > (log_sys->max_checkpoint_age)/2 ) {
++
++					/* 1st defence line (max_checkpoint_age * 1/2) */
++
++					mutex_exit(&(log_sys->mutex));
++
++					n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
++									  IB_ULONGLONG_MAX);
++					skip_sleep = TRUE;
++				} else {
++					mutex_exit(&(log_sys->mutex));
++				}
++			}
++
+ 		}
+ 
+ 		if (srv_activity_count == old_activity_count) {
+@@ -2463,10 +2527,10 @@
+ 	n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
+ 	n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ 		+ buf_pool->n_pages_written;
+-	if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+-
+-		srv_main_thread_op_info = "flushing buffer pool pages";
+-		buf_flush_batch(BUF_FLUSH_LIST, 100, IB_ULONGLONG_MAX);
++ 	if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
++  
++  		srv_main_thread_op_info = "flushing buffer pool pages";
++ 		buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), IB_ULONGLONG_MAX);
+ 
+ 		srv_main_thread_op_info = "flushing log";
+ 		log_buffer_flush_to_disk();
+@@ -2476,7 +2540,7 @@
+ 	even if the server were active */
+ 
+ 	srv_main_thread_op_info = "doing insert buffer merge";
+-	ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
++	ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size / 4)));
+ 
+ 	srv_main_thread_op_info = "flushing log";
+ 	log_buffer_flush_to_disk();
+@@ -2516,14 +2580,14 @@
+ 		(> 70 %), we assume we can afford reserving the disk(s) for
+ 		the time it requires to flush 100 pages */
+ 
+-		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ 	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ 						  IB_ULONGLONG_MAX);
+ 	} else {
+ 		/* Otherwise, we only flush a small number of pages so that
+ 		we do not unnecessarily use much disk i/o capacity from
+ 		other work */
+ 
+-		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
++ 	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
+ 						  IB_ULONGLONG_MAX);
+ 	}
+ 
+@@ -2611,7 +2675,7 @@
+ 		n_bytes_merged = 0;
+ 	} else {
+ 		n_bytes_merged = ibuf_contract_for_n_pages(
+-			TRUE, srv_insert_buffer_batch_size);
++			TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size * 5)));
+ 	}
+ 
+ 	srv_main_thread_op_info = "reserving kernel mutex";
+@@ -2627,7 +2691,7 @@
+ 	srv_main_thread_op_info = "flushing buffer pool pages";
+ 
+ 	if (srv_fast_shutdown < 2) {
+-		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ 	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ 						  IB_ULONGLONG_MAX);
+ 	} else {
+ 		/* In the fastest shutdown we do not flush the buffer pool
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0start.c innodb_plugin-1.0.3_tmp/srv/srv0start.c
+--- innodb_plugin-1.0.3_orig/srv/srv0start.c	2009-03-05 20:49:51.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0start.c	2009-03-18 17:48:18.000000000 +0900
+@@ -1252,24 +1252,28 @@
+ 		return(DB_ERROR);
+ 	}
+ 
++	/* over write innodb_file_io_threads */
++	srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads;
++
+ 	/* Restrict the maximum number of file i/o threads */
+ 	if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
+ 
+ 		srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
++		srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2;
+ 	}
+ 
+ 	if (!os_aio_use_native_aio) {
+ 		/* In simulated aio we currently have use only for 4 threads */
+-		srv_n_file_io_threads = 4;
++		/*srv_n_file_io_threads = 4;*/
+ 
+ 		os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
+ 			    * srv_n_file_io_threads,
+-			    srv_n_file_io_threads,
+-			    SRV_MAX_N_PENDING_SYNC_IOS);
++			    srv_n_read_io_threads, srv_n_write_io_threads,
++			    SRV_MAX_N_PENDING_SYNC_IOS * 8);
+ 	} else {
+ 		os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
+ 			    * srv_n_file_io_threads,
+-			    srv_n_file_io_threads,
++			    srv_n_read_io_threads, srv_n_write_io_threads,
+ 			    SRV_MAX_N_PENDING_SYNC_IOS);
+ 	}
+ 

=== added file 'innodb_opt_lru_count.patch'
--- innodb_opt_lru_count.patch	1970-01-01 00:00:00 +0000
+++ innodb_opt_lru_count.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,157 @@
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buf.c innodb_plugin-1.0.3_tmp/buf/buf0buf.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buf.c	2009-03-18 18:08:28.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buf.c	2009-03-18 18:09:28.000000000 +0900
+@@ -3015,7 +3015,7 @@
+ 	ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+ 
+ 	/* Flush pages from the end of the LRU list if necessary */
+-	buf_flush_free_margin();
++	buf_flush_free_margin(FALSE);
+ 
+ 	frame = block->frame;
+ 
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0flu.c innodb_plugin-1.0.3_tmp/buf/buf0flu.c
+--- innodb_plugin-1.0.3_orig/buf/buf0flu.c	2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0flu.c	2009-03-18 18:09:28.000000000 +0900
+@@ -133,9 +133,9 @@
+ 	buf_page_t*	bpage)	/* in: buffer control block, must be
+ 				buf_page_in_file(bpage) and in the LRU list */
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+-	ut_ad(bpage->in_LRU_list);
++	//ut_ad(bpage->in_LRU_list); /* optimistic use */
+ 
+ 	if (UNIV_LIKELY(buf_page_in_file(bpage))) {
+ 
+@@ -144,6 +144,8 @@
+ 		       && bpage->buf_fix_count == 0);
+ 	}
+ 
++	/* permited not to own LRU_mutex..  */
++/*
+ 	ut_print_timestamp(stderr);
+ 	fprintf(stderr,
+ 		"  InnoDB: Error: buffer block state %lu"
+@@ -151,6 +153,7 @@
+ 		(ulong) buf_page_get_state(bpage));
+ 	ut_print_buf(stderr, bpage, sizeof(buf_page_t));
+ 	putc('\n', stderr);
++*/
+ 
+ 	return(FALSE);
+ }
+@@ -1137,7 +1140,7 @@
+ 	ulint		n_replaceable;
+ 	ulint		distance	= 0;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
+ 
+ 	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+ 
+@@ -1163,7 +1166,7 @@
+ 		bpage = UT_LIST_GET_PREV(LRU, bpage);
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ 
+ 	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+ 
+@@ -1182,8 +1185,9 @@
+ immediately, without waiting. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margin(void)
++buf_flush_free_margin(
+ /*=======================*/
++	ibool	wait)
+ {
+ 	ulint	n_to_flush;
+ 	ulint	n_flushed;
+@@ -1192,7 +1196,7 @@
+ 
+ 	if (n_to_flush > 0) {
+ 		n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
+-		if (n_flushed == ULINT_UNDEFINED) {
++		if (wait && n_flushed == ULINT_UNDEFINED) {
+ 			/* There was an LRU type flush batch already running;
+ 			let us wait for it to end */
+ 
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0lru.c innodb_plugin-1.0.3_tmp/buf/buf0lru.c
+--- innodb_plugin-1.0.3_orig/buf/buf0lru.c	2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0lru.c	2009-03-18 18:09:28.000000000 +0900
+@@ -910,7 +910,7 @@
+ 
+ 	/* No free block was found: try to flush the LRU list */
+ 
+-	buf_flush_free_margin();
++	buf_flush_free_margin(TRUE);
+ 	++srv_buf_pool_wait_free;
+ 
+ 	os_aio_simulated_wake_handler_threads();
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0rea.c innodb_plugin-1.0.3_tmp/buf/buf0rea.c
+--- innodb_plugin-1.0.3_orig/buf/buf0rea.c	2009-03-18 18:08:37.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0rea.c	2009-03-18 18:09:28.000000000 +0900
+@@ -375,7 +375,7 @@
+ 	}
+ 
+ 	/* Flush pages from the end of the LRU list if necessary */
+-	buf_flush_free_margin();
++	buf_flush_free_margin(FALSE);
+ 
+ 	/* Increment number of I/O operations used for LRU policy. */
+ 	buf_LRU_stat_inc_io();
+@@ -636,7 +636,7 @@
+ 	os_aio_simulated_wake_handler_threads();
+ 
+ 	/* Flush pages from the end of the LRU list if necessary */
+-	buf_flush_free_margin();
++	buf_flush_free_margin(FALSE);
+ 
+ #ifdef UNIV_DEBUG
+ 	if (buf_debug_prints && (count > 0)) {
+@@ -721,7 +721,7 @@
+ 	os_aio_simulated_wake_handler_threads();
+ 
+ 	/* Flush pages from the end of the LRU list if necessary */
+-	buf_flush_free_margin();
++	buf_flush_free_margin(FALSE);
+ 
+ #ifdef UNIV_DEBUG
+ 	if (buf_debug_prints) {
+@@ -806,7 +806,7 @@
+ 	os_aio_simulated_wake_handler_threads();
+ 
+ 	/* Flush pages from the end of the LRU list if necessary */
+-	buf_flush_free_margin();
++	buf_flush_free_margin(FALSE);
+ 
+ #ifdef UNIV_DEBUG
+ 	if (buf_debug_prints) {
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-18 18:08:42.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-18 18:09:28.000000000 +0900
+@@ -27,5 +27,6 @@
+ {"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0flu.h innodb_plugin-1.0.3_tmp/include/buf0flu.h
+--- innodb_plugin-1.0.3_orig/include/buf0flu.h	2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0flu.h	2009-03-18 18:09:28.000000000 +0900
+@@ -49,8 +49,9 @@
+ a margin of replaceable pages there. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margin(void);
++buf_flush_free_margin(
+ /*=======================*/
++	ibool	wait);
+ /************************************************************************
+ Initializes a page for writing to the tablespace. */
+ UNIV_INTERN

=== added file 'innodb_rw_lock.patch'
--- innodb_rw_lock.patch	1970-01-01 00:00:00 +0000
+++ innodb_rw_lock.patch	2009-03-23 09:54:36 +0000
@@ -0,0 +1,1060 @@
+diff -ruN innodb_plugin-1.0.3_orig/Makefile.in innodb_plugin-1.0.3_tmp/Makefile.in
+--- innodb_plugin-1.0.3_orig/Makefile.in	2009-03-23 17:06:47.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/Makefile.in	2009-03-23 17:07:17.000000000 +0900
+@@ -713,7 +713,8 @@
+ 		echo '#define HAVE_ATOMIC_PTHREAD_T' > include/ut0auxconf.h ; \
+ 	fi
+ 
+-all: check_have_atomic_pthread_t all-am
++# This is temprary fix for http://bugs.mysql.com/43740
++all: all-am
+ 
+ .SUFFIXES:
+ .SUFFIXES: .c .cc .lo .o .obj
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-23 17:06:47.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-23 17:07:17.000000000 +0900
+@@ -26,5 +26,6 @@
+ {"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_io","Improvements to InnoDB IO","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/sync0rw.h innodb_plugin-1.0.3_tmp/include/sync0rw.h
+--- innodb_plugin-1.0.3_orig/include/sync0rw.h	2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/sync0rw.h	2009-03-23 17:07:17.000000000 +0900
+@@ -359,7 +359,17 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
++/*==================*/
++	rw_lock_t*	lock);
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*==================*/
++	rw_lock_t*	lock);
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
+ /*================*/
+ 	rw_lock_t*	lock);
+ UNIV_INLINE
+@@ -478,6 +488,14 @@
+ 	rw_lock_debug_t*	info);	/* in: debug struct */
+ #endif /* UNIV_SYNC_DEBUG */
+ 
++#ifndef INNODB_RW_LOCKS_USE_ATOMICS
++#error INNODB_RW_LOCKS_USE_ATOMICS is not defined. Do you use enough new GCC or compatibles?
++#error Or do you use exact options for CFLAGS?
++#error e.g. (for x86_32): "-m32 -march=i586 -mtune=i686"
++#error e.g. (for Sparc_64): "-m64 -mcpu=v9"
++#error Otherwise, this build may be slower than normal version.
++#endif
++
+ /* NOTE! The structure appears here only for the compiler to know its size.
+ Do not use its fields directly! The structure used in the spin lock
+ implementation of a read-write lock. Several threads may have a shared lock
+@@ -489,7 +507,16 @@
+ struct rw_lock_struct {
+ 	volatile lint	lock_word;
+ 				/* Holds the state of the lock. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	volatile ulint	s_waiters;	/* 1: there are waiters (s_lock) */
++	volatile ulint	x_waiters;	/* 1: there are waiters (x_lock) */
++	volatile ulint	wait_ex_waiters;	/* 1: there are waiters (wait_ex) */
++	volatile ulint	reader_count;	/* Number of readers who have locked this
++ 				lock in the shared mode */
++	volatile ulint	writer;
++#else
+ 	volatile ulint	waiters;/* 1: there are waiters */
++#endif
+ 	volatile ibool	recursive;/* Default value FALSE which means the lock
+ 				is non-recursive. The value is typically set
+ 				to TRUE making normal rw_locks recursive. In
+@@ -506,7 +533,16 @@
+ 				/* Thread id of writer thread. Is only
+ 				guaranteed to have sane and non-stale
+ 				value iff recursive flag is set. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	volatile ulint	writer_count;	/* Number of times the same thread has
++ 				recursively locked the lock in the exclusive
++ 				mode */
++			/* Used by sync0arr.c for thread queueing */
++	os_event_t	s_event;	/* Used for s_lock */
++	os_event_t	x_event;	/* Used for x_lock */
++#else
+ 	os_event_t	event;	/* Used by sync0arr.c for thread queueing */
++#endif
+ 	os_event_t	wait_ex_event;
+ 				/* Event for next-writer to wait on. A thread
+ 				must decrement lock_word before waiting. */
+@@ -528,7 +564,7 @@
+         /* last s-lock file/line is not guaranteed to be correct */
+ 	const char*	last_s_file_name;/* File name where last s-locked */
+ 	const char*	last_x_file_name;/* File name where last x-locked */
+-	ibool		writer_is_wait_ex;
++	volatile ibool		writer_is_wait_ex;
+ 				/* This is TRUE if the writer field is
+ 				RW_LOCK_WAIT_EX; this field is located far
+ 				from the memory update hotspot fields which
+diff -ruN innodb_plugin-1.0.3_orig/include/sync0rw.ic innodb_plugin-1.0.3_tmp/include/sync0rw.ic
+--- innodb_plugin-1.0.3_orig/include/sync0rw.ic	2009-02-17 21:59:54.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/sync0rw.ic	2009-03-23 17:13:13.000000000 +0900
+@@ -70,12 +70,28 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
+ /*================*/
+ 				/* out: 1 if waiters, 0 otherwise */
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+-	return(lock->waiters);
++	return(lock->s_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*================*/
++	rw_lock_t*	lock)
++{
++	return(lock->x_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
++/*================*/
++	rw_lock_t*      lock)
++{
++	return(lock->wait_ex_waiters);
+ }
+ 
+ /************************************************************************
+@@ -84,14 +100,41 @@
+ memory barrier. */
+ UNIV_INLINE
+ void
+-rw_lock_set_waiter_flag(
++rw_lock_set_s_waiter_flag(
++/*====================*/
++	rw_lock_t*	lock)	/* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	// os_compare_and_swap(&lock->s_waiters, 0, 1);
++	__sync_lock_test_and_set(&lock->s_waiters, 1);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++	lock->s_waiters = 1;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_set_x_waiter_flag(
++/*====================*/
++	rw_lock_t*	lock)	/* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	// os_compare_and_swap(&lock->x_waiters, 0, 1);
++	__sync_lock_test_and_set(&lock->x_waiters, 1);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++	lock->x_waiters = 1;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_set_wx_waiter_flag(
+ /*====================*/
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+ #ifdef INNODB_RW_LOCKS_USE_ATOMICS
+-	os_compare_and_swap(&lock->waiters, 0, 1);
++	// os_compare_and_swap(&lock->wait_ex_waiters, 0, 1);
++	__sync_lock_test_and_set(&lock->wait_ex_waiters, 1);
+ #else /* INNODB_RW_LOCKS_USE_ATOMICS */
+-	lock->waiters = 1;
++	lock->wait_ex_waiters = 1;
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ }
+ 
+@@ -101,14 +144,41 @@
+ memory barrier. */
+ UNIV_INLINE
+ void
+-rw_lock_reset_waiter_flag(
++rw_lock_reset_s_waiter_flag(
++/*======================*/
++	rw_lock_t*	lock)	/* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	// os_compare_and_swap(&lock->s_waiters, 1, 0);
++	__sync_lock_test_and_set(&lock->s_waiters, 0);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++	lock->s_waiters = 0;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_reset_x_waiter_flag(
++/*======================*/
++	rw_lock_t*	lock)	/* in: rw-lock */
++{
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	// os_compare_and_swap(&lock->x_waiters, 1, 0);
++	__sync_lock_test_and_set(&lock->x_waiters, 0);
++#else /* INNODB_RW_LOCKS_USE_ATOMICS */
++	lock->x_waiters = 0;
++#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
++}
++UNIV_INLINE
++void
++rw_lock_reset_wx_waiter_flag(
+ /*======================*/
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+ #ifdef INNODB_RW_LOCKS_USE_ATOMICS
+-	os_compare_and_swap(&lock->waiters, 1, 0);
++	// os_compare_and_swap(&lock->wait_ex_waiters, 1, 0);
++	__sync_lock_test_and_set(&lock->wait_ex_waiters, 0);
+ #else /* INNODB_RW_LOCKS_USE_ATOMICS */
+-	lock->waiters = 0;
++	lock->wait_ex_waiters = 0;
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ }
+ 
+@@ -121,6 +191,17 @@
+ /*===============*/
+ 	rw_lock_t*	lock)
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	if (lock->writer == RW_LOCK_NOT_LOCKED) {
++		return(RW_LOCK_NOT_LOCKED);
++	}
++
++	if (lock->writer_is_wait_ex) {
++		return(RW_LOCK_WAIT_EX);
++	} else {
++		return(RW_LOCK_EX);
++	}
++#else
+ 	lint lock_word = lock->lock_word;
+ 	if(lock_word > 0) {
+ 		/* return NOT_LOCKED in s-lock state, like the writer
+@@ -132,6 +213,7 @@
+                 ut_ad(lock_word > -X_LOCK_DECR);
+ 		return(RW_LOCK_WAIT_EX);
+ 	}
++#endif
+ }
+ 
+ /**********************************************************************
+@@ -142,6 +224,9 @@
+ /*=====================*/
+ 	rw_lock_t*	lock)
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	return(lock->reader_count);
++#else
+ 	lint lock_word = lock->lock_word;
+ 	if(lock_word > 0) {
+ 		/* s-locked, no x-waiters */
+@@ -151,6 +236,7 @@
+ 		return((ulint)(-lock_word));
+ 	}
+ 	return(0);
++#endif
+ }
+ 
+ #ifndef INNODB_RW_LOCKS_USE_ATOMICS
+@@ -174,12 +260,16 @@
+ 				/* out: value of writer_count */
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	return(lock->writer_count);
++#else
+ 	lint lock_copy = lock->lock_word;
+ 	/* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
+ 	if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+ 		return(0);
+ 	}
+ 	return(((-lock_copy) / X_LOCK_DECR) + 1);
++#endif
+ }
+ 
+ /**********************************************************************
+@@ -317,11 +407,26 @@
+ 	const char*	file_name, /* in: file name where lock requested */
+ 	ulint		line)	/* in: line where requested */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) {
++		/* try s-lock */
++		if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
++			/* fail */
++			__sync_fetch_and_add(&(lock->lock_word),1);
++			return(FALSE);  /* locking did not succeed */
++		}
++		/* success */
++		__sync_fetch_and_add(&(lock->reader_count),1);
++	} else {
++		return(FALSE);  /* locking did not succeed */
++	}
++#else
+ 	/* TODO: study performance of UNIV_LIKELY branch prediction hints. */
+ 	if (!rw_lock_lock_word_decr(lock, 1)) {
+ 		/* Locking did not succeed */
+ 		return(FALSE);
+ 	}
++#endif
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 	rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
+@@ -346,10 +451,17 @@
+ 	const char*	file_name,	/* in: file name where requested */
+ 	ulint		line)		/* in: line where lock requested */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++	ut_ad(rw_lock_get_reader_count(lock) == 0);
++
++	__sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ 	ut_ad(lock->lock_word == X_LOCK_DECR);
+ 
+ 	/* Indicate there is a new reader by decrementing lock_word */
+ 	lock->lock_word--;
++#endif
+ 
+ 	lock->last_s_file_name = file_name;
+ 	lock->last_s_line = line;
+@@ -372,9 +484,17 @@
+ 	ulint		line)		/* in: line where lock requested */
+ {
+ 	ut_ad(rw_lock_validate(lock));
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ut_ad(rw_lock_get_reader_count(lock) == 0);
++	ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++
++	lock->writer = RW_LOCK_EX;
++	__sync_fetch_and_add(&(lock->writer_count),1);
++#else
+ 	ut_ad(lock->lock_word == X_LOCK_DECR);
+ 
+ 	lock->lock_word -= X_LOCK_DECR;
++#endif
+ 	lock->writer_thread = os_thread_get_curr_id();
+ 	lock->recursive = TRUE;
+ 
+@@ -448,7 +568,56 @@
+ 	ibool success;
+ 
+ #ifdef INNODB_RW_LOCKS_USE_ATOMICS
+-	success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
++	success = FALSE;
++	if ((lock->reader_count == 0)
++			&& rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++retry_x_lock:
++		/* try x-lock */
++		if(__sync_sub_and_fetch(&(lock->lock_word),
++				X_LOCK_DECR) == 0) {
++			/* success */
++			/* try to lock writer */
++			if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++					== RW_LOCK_NOT_LOCKED) {
++				/* success */
++				lock->writer_thread = curr_thread;
++				lock->recursive = TRUE;
++				lock->writer_is_wait_ex = FALSE;
++				/* next function may work as memory barrier */
++			relock:
++				__sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++				rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
++#endif
++
++				lock->last_x_file_name = file_name;
++				lock->last_x_line = line;
++
++				ut_ad(rw_lock_validate(lock));
++
++				return(TRUE);
++			} else {
++				/* x-unlock */
++				__sync_fetch_and_add(&(lock->lock_word),
++					X_LOCK_DECR);
++			}
++		} else {
++			/* fail (x-lock) */
++			if (__sync_fetch_and_add(&(lock->lock_word),X_LOCK_DECR)
++					== 0)
++				goto retry_x_lock;
++		}
++	}
++
++	if (lock->recursive
++			&& os_thread_eq(lock->writer_thread, curr_thread)) {
++		goto relock;
++	}
++
++	//ut_ad(rw_lock_validate(lock));
++
++	return(FALSE);
+ #else
+ 
+ 	success = FALSE;
+@@ -459,7 +628,6 @@
+ 	}
+ 	mutex_exit(&(lock->mutex));
+ 
+-#endif
+ 	if (success) {
+ 		rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+ 
+@@ -486,6 +654,7 @@
+ 	ut_ad(rw_lock_validate(lock));
+ 
+ 	return(TRUE);
++#endif
+ }
+ 
+ /**********************************************************************
+@@ -501,6 +670,31 @@
+ #endif
+ 	)
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ibool	last	= FALSE;
++
++	ut_a(lock->reader_count > 0);
++
++	/* unlock lock_word */
++	__sync_fetch_and_add(&(lock->lock_word),1);
++
++	if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) {
++		last = TRUE;
++	}
++
++#ifdef UNIV_SYNC_DEBUG
++	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
++#endif
++
++	if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0))) {
++		os_event_set(lock->wait_ex_event);
++		sync_array_object_signalled(sync_primary_wait_array);
++	}
++	else if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->x_waiters, 0))) {
++		os_event_set(lock->x_event);
++		sync_array_object_signalled(sync_primary_wait_array);
++	}
++#else
+ 	ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
+ 
+ #ifdef UNIV_SYNC_DEBUG
+@@ -517,6 +711,7 @@
+ 		sync_array_object_signalled(sync_primary_wait_array);
+ 
+ 	}
++#endif
+ 
+ 	ut_ad(rw_lock_validate(lock));
+ 
+@@ -534,6 +729,19 @@
+ /*====================*/
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ut_ad(lock->reader_count > 0);
++
++	__sync_sub_and_fetch(&(lock->reader_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++	rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
++#endif
++
++	ut_ad(!lock->s_waiters);
++	ut_ad(!lock->x_waiters);
++	ut_ad(!lock->wait_ex_waiters);
++#else
+ 	ut_ad(lock->lock_word < X_LOCK_DECR);
+ 
+ #ifdef UNIV_SYNC_DEBUG
+@@ -544,6 +752,7 @@
+ 	lock->lock_word++;
+ 
+ 	ut_ad(!lock->waiters);
++#endif
+ 	ut_ad(rw_lock_validate(lock));
+ #ifdef UNIV_SYNC_PERF_STAT
+ 	rw_s_exit_count++;
+@@ -563,6 +772,49 @@
+ #endif
+ 	)
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ibool	last	= FALSE;
++	ibool	s_sg	= FALSE;
++	ibool	x_sg	= FALSE;
++
++	ut_ad(lock->writer_count > 0);
++
++	if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++		last = TRUE;
++	}
++
++	if (last) {
++		/* unlock lock_word */
++		__sync_fetch_and_add(&(lock->lock_word),X_LOCK_DECR);
++
++		lock->recursive = FALSE;
++		/* FIXME: It is a value of bad manners for pthread.
++		          But we shouldn't keep an ID of not-owner. */
++		lock->writer_thread = -1;
++		__sync_lock_test_and_set(&(lock->writer),RW_LOCK_NOT_LOCKED);
++	}
++
++#ifdef UNIV_SYNC_DEBUG
++	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
++#endif
++	if (last) {
++		if(__sync_lock_test_and_set(&lock->s_waiters, 0)){
++			s_sg = TRUE;
++		}
++		if(__sync_lock_test_and_set(&lock->x_waiters, 0)){
++			x_sg = TRUE;
++		}
++	}
++
++	if (UNIV_UNLIKELY(s_sg)) {
++		os_event_set(lock->s_event);
++		sync_array_object_signalled(sync_primary_wait_array);
++	}
++	if (UNIV_UNLIKELY(x_sg)) {
++		os_event_set(lock->x_event);
++		sync_array_object_signalled(sync_primary_wait_array);
++	}
++#else
+ 	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+ 
+ 	/* lock->recursive flag also indicates if lock->writer_thread is
+@@ -593,6 +845,7 @@
+ 		}
+ 	}
+ 
++#endif
+ 	ut_ad(rw_lock_validate(lock));
+ 
+ #ifdef UNIV_SYNC_PERF_STAT
+@@ -612,6 +865,19 @@
+ 	/* Reset the exclusive lock if this thread no longer has an x-mode
+ 	lock */
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++		lock->writer = RW_LOCK_NOT_LOCKED;
++	}
++
++#ifdef UNIV_SYNC_DEBUG
++	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
++#endif
++
++	ut_ad(!lock->s_waiters);
++	ut_ad(!lock->x_waiters);
++	ut_ad(!lock->wait_ex_waiters);
++#else
+ 	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+ 
+ #ifdef UNIV_SYNC_DEBUG
+@@ -627,6 +893,7 @@
+ 	lock->lock_word += X_LOCK_DECR;
+ 
+ 	ut_ad(!lock->waiters);
++#endif
+ 	ut_ad(rw_lock_validate(lock));
+ 
+ #ifdef UNIV_SYNC_PERF_STAT
+diff -ruN innodb_plugin-1.0.3_orig/include/ut0auxconf.h innodb_plugin-1.0.3_tmp/include/ut0auxconf.h
+--- innodb_plugin-1.0.3_orig/include/ut0auxconf.h	2009-03-05 23:38:59.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/ut0auxconf.h	2009-03-23 17:07:17.000000000 +0900
+@@ -12,3 +12,8 @@
+ the hack from Makefile.in wiped away then the "real" check from plug.in
+ will take over.
+ */
++/* This is temprary fix for http://bugs.mysql.com/43740 */
++/* force to enable */
++#ifdef HAVE_GCC_ATOMIC_BUILTINS
++#define HAVE_ATOMIC_PTHREAD_T
++#endif
+diff -ruN innodb_plugin-1.0.3_orig/sync/sync0arr.c innodb_plugin-1.0.3_tmp/sync/sync0arr.c
+--- innodb_plugin-1.0.3_orig/sync/sync0arr.c	2009-02-17 21:26:53.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/sync/sync0arr.c	2009-03-23 17:07:17.000000000 +0900
+@@ -331,8 +331,15 @@
+ 		return(((mutex_t *) cell->wait_object)->event);
+ 	} else if (type == RW_LOCK_WAIT_EX) {
+ 		return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	} else if (type == RW_LOCK_SHARED) {
++		return(((rw_lock_t *) cell->wait_object)->s_event);
++	} else { /* RW_LOCK_EX */
++		return(((rw_lock_t *) cell->wait_object)->x_event);
++#else
+ 	} else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
+ 		return(((rw_lock_t *) cell->wait_object)->event);
++#endif
+ 	}
+ }
+ 
+@@ -503,7 +510,7 @@
+ 		   || type == RW_LOCK_WAIT_EX
+ 		   || type == RW_LOCK_SHARED) {
+ 
+-		fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
++		fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file);
+ 
+ 		rwlock = cell->old_wait_rw_lock;
+ 
+@@ -523,12 +530,21 @@
+ 		}
+ 
+ 		fprintf(file,
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++			"number of readers %lu, s_waiters flag %lu, x_waiters flag %lu, "
++#else
+ 			"number of readers %lu, waiters flag %lu, "
++#endif
+                         "lock_word: %lx\n"
+ 			"Last time read locked in file %s line %lu\n"
+ 			"Last time write locked in file %s line %lu\n",
+ 			(ulong) rw_lock_get_reader_count(rwlock),
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++			(ulong) rwlock->s_waiters,
++			(ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters),
++#else
+ 			(ulong) rwlock->waiters,
++#endif
+ 			rwlock->lock_word,
+ 			rwlock->last_s_file_name,
+ 			(ulong) rwlock->last_s_line,
+diff -ruN innodb_plugin-1.0.3_orig/sync/sync0rw.c innodb_plugin-1.0.3_tmp/sync/sync0rw.c
+--- innodb_plugin-1.0.3_orig/sync/sync0rw.c	2009-02-17 21:26:53.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/sync/sync0rw.c	2009-03-23 17:15:44.000000000 +0900
+@@ -250,7 +250,17 @@
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ 
+ 	lock->lock_word = X_LOCK_DECR;
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	lock->s_waiters = 0;
++	lock->x_waiters = 0;
++	lock->wait_ex_waiters = 0;
++	lock->writer = RW_LOCK_NOT_LOCKED;
++	lock->writer_count = 0;
++	lock->reader_count = 0;
++	lock->writer_is_wait_ex = FALSE;
++#else
+ 	lock->waiters = 0;
++#endif
+ 
+ 	/* We set this value to signify that lock->writer_thread
+ 	contains garbage at initialization and cannot be used for
+@@ -273,7 +283,12 @@
+ 	lock->last_x_file_name = "not yet reserved";
+ 	lock->last_s_line = 0;
+ 	lock->last_x_line = 0;
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	lock->s_event = os_event_create(NULL);
++	lock->x_event = os_event_create(NULL);
++#else
+ 	lock->event = os_event_create(NULL);
++#endif
+ 	lock->wait_ex_event = os_event_create(NULL);
+ 
+ 	mutex_enter(&rw_lock_list_mutex);
+@@ -299,7 +314,15 @@
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+ 	ut_ad(rw_lock_validate(lock));
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++	ut_a(rw_lock_get_s_waiters(lock) == 0);
++	ut_a(rw_lock_get_x_waiters(lock) == 0);
++	ut_a(rw_lock_get_wx_waiters(lock) == 0);
++	ut_a(rw_lock_get_reader_count(lock) == 0);
++#else
+ 	ut_a(lock->lock_word == X_LOCK_DECR);
++#endif
+ 
+ 	lock->magic_n = 0;
+ 
+@@ -308,7 +331,12 @@
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+ 
+ 	mutex_enter(&rw_lock_list_mutex);
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	os_event_free(lock->s_event);
++	os_event_free(lock->x_event);
++#else
+ 	os_event_free(lock->event);
++#endif
+ 
+ 	os_event_free(lock->wait_ex_event);
+ 
+@@ -336,12 +364,23 @@
+ {
+ 	ut_a(lock);
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
++
++	ulint waiters = rw_lock_get_s_waiters(lock);
++	ut_a(waiters == 0 || waiters == 1);
++	waiters = rw_lock_get_x_waiters(lock);
++	ut_a(waiters == 0 || waiters == 1);
++	waiters = rw_lock_get_wx_waiters(lock);
++	ut_a(waiters == 0 || waiters == 1);
++#else
+ 	ulint waiters = rw_lock_get_waiters(lock);
+ 	lint lock_word = lock->lock_word;
+ 
+ 	ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+ 	ut_a(waiters == 0 || waiters == 1);
+ 	ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
++#endif
+ 
+ 	return(TRUE);
+ }
+@@ -371,7 +410,12 @@
+ lock_loop:
+ 
+ 	/* Spin waiting for the writer field to become free */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	while (i < SYNC_SPIN_ROUNDS
++	       && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
++#else
+ 	while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
++#endif
+ 		if (srv_spin_wait_delay) {
+ 			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ 		}
+@@ -412,12 +456,29 @@
+ 
+ 		/* Set waiters before checking lock_word to ensure wake-up
+                 signal is sent. This may lead to some unnecessary signals. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++		rw_lock_set_s_waiter_flag(lock);
++#else
+ 		rw_lock_set_waiter_flag(lock);
++#endif
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++		for (i = 0; i < 4; i++) {
++#endif
+ 		if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
+ 			sync_array_free_cell(sync_primary_wait_array, index);
+ 			return; /* Success */
+ 		}
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++		}
++
++		/* If wait_ex_waiter stalls, wakes it. */
++		if (lock->reader_count == 0
++		    && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0)) {
++			os_event_set(lock->wait_ex_event);
++			sync_array_object_signalled(sync_primary_wait_array);
++		}
++#endif
+ 
+ 		if (srv_print_latch_waits) {
+ 			fprintf(stderr,
+@@ -456,7 +517,12 @@
+ {
+ 	ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	lock->writer_thread = os_thread_get_curr_id();
++	lock->recursive = TRUE;
++#else
+ 	rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
++#endif
+ }
+ 
+ /**********************************************************************
+@@ -530,7 +596,11 @@
+ /**********************************************************************
+ Low-level function for acquiring an exclusive lock. */
+ UNIV_INLINE
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ulint
++#else
+ ibool
++#endif
+ rw_lock_x_lock_low(
+ /*===============*/
+ 				/* out: RW_LOCK_NOT_LOCKED if did
+@@ -543,6 +613,90 @@
+ {
+ 	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++retry_writer:
++	/* try to lock writer */
++	if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++			== RW_LOCK_NOT_LOCKED) {
++		/* success */
++		/* obtain RW_LOCK_WAIT_EX right */
++		lock->writer_thread = curr_thread;
++		lock->recursive = pass ? FALSE : TRUE;
++		lock->writer_is_wait_ex = TRUE;
++		/* atomic operation may be safer about memory order. */
++		__sync_synchronize();
++#ifdef UNIV_SYNC_DEBUG
++		rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
++					file_name, line);
++#endif
++	}
++
++	if (!os_thread_eq(lock->writer_thread, curr_thread)) {
++		return(RW_LOCK_NOT_LOCKED);
++	}
++
++	switch(rw_lock_get_writer(lock)) {
++	    case RW_LOCK_WAIT_EX:
++		/* have right to try x-lock */
++retry_x_lock:
++		/* try x-lock */
++		if(__sync_sub_and_fetch(&(lock->lock_word),
++				X_LOCK_DECR) == 0) {
++			/* success */
++			lock->recursive = pass ? FALSE : TRUE;
++			lock->writer_is_wait_ex = FALSE;
++			__sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++			rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
++			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++						file_name, line);
++#endif
++
++			lock->last_x_file_name = file_name;
++			lock->last_x_line = line;
++
++			/* Locking succeeded, we may return */
++			return(RW_LOCK_EX);
++		} else if(__sync_fetch_and_add(&(lock->lock_word),
++				X_LOCK_DECR) == 0) {
++			/* retry x-lock */
++			goto retry_x_lock;
++		}
++
++		/* There are readers, we have to wait */
++		return(RW_LOCK_WAIT_EX);
++
++		break;
++
++	    case RW_LOCK_EX:
++		/* already have x-lock */
++		if (lock->recursive && (pass == 0)) {
++			__sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
++						line);
++#endif
++
++			lock->last_x_file_name = file_name;
++			lock->last_x_line = line;
++
++			/* Locking succeeded, we may return */
++			return(RW_LOCK_EX);
++		}
++
++		return(RW_LOCK_NOT_LOCKED);
++
++		break;
++
++	    default: /* RW_LOCK_NOT_LOCKED? maybe impossible */
++		goto retry_writer;
++	}
++
++	/* Locking did not succeed */
++	return(RW_LOCK_NOT_LOCKED);
++#else
+ 	if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+ 
+ 		/* lock->recursive also tells us if the writer_thread
+@@ -580,6 +734,7 @@
+ 	lock->last_x_line = (unsigned int) line;
+ 
+ 	return(TRUE);
++#endif
+ }
+ 
+ /**********************************************************************
+@@ -604,18 +759,55 @@
+ 	ulint	index;	/* index of the reserved wait cell */
+ 	ulint	i;	/* spin round count */
+ 	ibool   spinning = FALSE;
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	ulint	state = RW_LOCK_NOT_LOCKED;	/* lock state acquired */
++	ulint	prev_state = RW_LOCK_NOT_LOCKED;
++#endif
+ 
+ 	ut_ad(rw_lock_validate(lock));
+ 
+ 	i = 0;
+ 
+ lock_loop:
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	prev_state = state;
++	state = rw_lock_x_lock_low(lock, pass, file_name, line);
++
++lock_loop_2:
++	if (state != prev_state) i=0; /* if progress, reset counter. */
+ 
++	if (state == RW_LOCK_EX) {
++#else
+ 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
++#endif
+ 		rw_x_spin_round_count += i;
+ 
+ 		return;	/* Locking succeeded */
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	} else if (state == RW_LOCK_WAIT_EX) {
++
++		if (!spinning) {
++			spinning = TRUE;
++			rw_x_spin_wait_count++;
++		}
++
++		/* Spin waiting for the reader count field to become zero */
++		while (i < SYNC_SPIN_ROUNDS
++		       && lock->lock_word != X_LOCK_DECR) {
++			if (srv_spin_wait_delay) {
++				ut_delay(ut_rnd_interval(0,
++							 srv_spin_wait_delay));
++			}
++
++			i++;
++		}
++		if (i == SYNC_SPIN_ROUNDS) {
++			os_thread_yield();
++		} else {
++			goto lock_loop;
++		}
++#endif
+ 	} else {
+ 
+                 if (!spinning) {
+@@ -625,7 +817,11 @@
+ 
+ 		/* Spin waiting for the lock_word to become free */
+ 		while (i < SYNC_SPIN_ROUNDS
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++		       && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
++#else
+ 		       && lock->lock_word <= 0) {
++#endif
+ 			if (srv_spin_wait_delay) {
+ 				ut_delay(ut_rnd_interval(0,
+ 							 srv_spin_wait_delay));
+@@ -652,18 +848,46 @@
+ 
+ 	sync_array_reserve_cell(sync_primary_wait_array,
+ 				lock,
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++				(state == RW_LOCK_WAIT_EX)
++				 ? RW_LOCK_WAIT_EX : RW_LOCK_EX,
++#else
+ 				RW_LOCK_EX,
++#endif
+ 				file_name, line,
+ 				&index);
+ 
+ 	/* Waiters must be set before checking lock_word, to ensure signal
+ 	is sent. This could lead to a few unnecessary wake-up signals. */
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	if (state == RW_LOCK_WAIT_EX) {
++		rw_lock_set_wx_waiter_flag(lock);
++	} else {
++		rw_lock_set_x_waiter_flag(lock);
++	}
++#else
+ 	rw_lock_set_waiter_flag(lock);
++#endif
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++	for (i = 0; i < 4; i++) {
++		prev_state = state;
++		state = rw_lock_x_lock_low(lock, pass, file_name, line);
++		if (state == RW_LOCK_EX) {
++			sync_array_free_cell(sync_primary_wait_array, index);
++			return; /* Locking succeeded */
++		} else if (state != prev_state) {
++			/* retry! */
++			sync_array_free_cell(sync_primary_wait_array, index);
++			goto lock_loop_2;
++		}
++	}
++#else
+ 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+ 		sync_array_free_cell(sync_primary_wait_array, index);
+ 		return; /* Locking succeeded */
+ 	}
++#endif
+ 
+ 	if (srv_print_latch_waits) {
+ 		fprintf(stderr,
+@@ -914,11 +1138,24 @@
+ 
+ 			fprintf(file, "RW-LOCK: %p ", (void*) lock);
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++			if (rw_lock_get_s_waiters(lock)) {
++				fputs(" s_waiters for the lock exist", file);
++			}
++			if (rw_lock_get_x_waiters(lock)) {
++				fputs(" x_waiters for the lock exist", file);
++			}
++			if (rw_lock_get_wx_waiters(lock)) {
++				fputs(" wait_ex_waiters for the lock exist", file);
++			}
++			putc('\n', file);
++#else
+ 			if (rw_lock_get_waiters(lock)) {
+ 				fputs(" Waiters for the lock exist\n", file);
+ 			} else {
+ 				putc('\n', file);
+ 			}
++#endif
+ 
+ 			info = UT_LIST_GET_FIRST(lock->debug_list);
+ 			while (info != NULL) {
+@@ -957,11 +1194,24 @@
+ #endif
+ 	if (lock->lock_word != X_LOCK_DECR) {
+ 
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++		if (rw_lock_get_s_waiters(lock)) {
++			fputs(" s_waiters for the lock exist", stderr);
++		}
++		if (rw_lock_get_x_waiters(lock)) {
++			fputs(" x_waiters for the lock exist", stderr);
++		}
++		if (rw_lock_get_wx_waiters(lock)) {
++			fputs(" wait_ex_waiters for the lock exist", stderr);
++		}
++		putc('\n', stderr);
++#else
+ 		if (rw_lock_get_waiters(lock)) {
+ 			fputs(" Waiters for the lock exist\n", stderr);
+ 		} else {
+ 			putc('\n', stderr);
+ 		}
++#endif
+ 
+ 		info = UT_LIST_GET_FIRST(lock->debug_list);
+ 		while (info != NULL) {

=== added file 'innodb_show_enhancements.patch'
--- innodb_show_enhancements.patch	1970-01-01 00:00:00 +0000
+++ innodb_show_enhancements.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,278 @@
+diff -ruN innodb_plugin-1.0.3_orig/Makefile.am innodb_plugin-1.0.3_tmp/Makefile.am
+--- innodb_plugin-1.0.3_orig/Makefile.am	2008-05-01 02:59:16.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/Makefile.am	2009-03-18 17:14:19.000000000 +0900
+@@ -131,7 +131,7 @@
+ 			include/ut0list.ic include/ut0wqueue.h		\
+ 			include/ha_prototypes.h handler/ha_innodb.h	\
+ 			include/handler0alter.h				\
+-			handler/i_s.h
++			handler/i_s.h handler/innodb_patch_info.h
+ 
+ EXTRA_LIBRARIES=	libinnobase.a
+ noinst_LIBRARIES=	@plugin_innobase_static_target@
+diff -ruN innodb_plugin-1.0.3_orig/Makefile.in innodb_plugin-1.0.3_tmp/Makefile.in
+--- innodb_plugin-1.0.3_orig/Makefile.in	2009-03-06 19:22:06.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/Makefile.in	2009-03-18 17:14:19.000000000 +0900
+@@ -651,7 +651,7 @@
+ 			include/ut0list.ic include/ut0wqueue.h		\
+ 			include/ha_prototypes.h handler/ha_innodb.h	\
+ 			include/handler0alter.h				\
+-			handler/i_s.h
++			handler/i_s.h handler/innodb_patch_info.h
+ 
+ EXTRA_LIBRARIES = libinnobase.a
+ noinst_LIBRARIES = @plugin_innobase_static_target@
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc	2009-03-06 05:29:07.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc	2009-03-18 17:14:19.000000000 +0900
+@@ -9940,7 +9940,8 @@
+ i_s_innodb_cmp,
+ i_s_innodb_cmp_reset,
+ i_s_innodb_cmpmem,
+-i_s_innodb_cmpmem_reset
++i_s_innodb_cmpmem_reset,
++i_s_innodb_patches
+ mysql_declare_plugin_end;
+ 
+ #ifdef UNIV_COMPILE_TEST_FUNCS
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc	2009-02-17 17:25:45.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc	2009-03-18 17:14:19.000000000 +0900
+@@ -31,6 +31,7 @@
+ #include <mysys_err.h>
+ #include <my_sys.h>
+ #include "i_s.h"
++#include "innodb_patch_info.h"
+ #include <mysql/plugin.h>
+ 
+ extern "C" {
+@@ -215,6 +216,168 @@
+ 	return(ret);
+ }
+ 
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_patches */
++static ST_FIELD_INFO	innodb_patches_fields_info[] =
++{
++#define IDX_PATCH_NAME		0
++	{STRUCT_FLD(field_name,		"name"),
++	 STRUCT_FLD(field_length,	255),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	0),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++#define IDX_PATCH_DESCR		1
++	{STRUCT_FLD(field_name,		"description"),
++	 STRUCT_FLD(field_length,	255),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	0),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++#define IDX_PATCH_COMMENT		2
++	{STRUCT_FLD(field_name,		"comment"),
++	 STRUCT_FLD(field_length,	100),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	0),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++#define IDX_PATCH_LINK			3
++	{STRUCT_FLD(field_name,		"link"),
++	 STRUCT_FLD(field_length,	255),
++	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
++	 STRUCT_FLD(value,		0),
++	 STRUCT_FLD(field_flags,	0),
++	 STRUCT_FLD(old_name,		""),
++	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
++
++	END_OF_ST_FIELD_INFO
++};
++
++static struct st_mysql_information_schema	i_s_info =
++{
++	MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
++};
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_patches */
++static
++int
++innodb_patches_fill(
++/*=============*/
++				/* out: 0 on success, 1 on failure */
++	THD*		thd,	/* in: thread */
++	TABLE_LIST*	tables,	/* in/out: tables to fill */
++	COND*		cond)	/* in: condition (ignored) */
++{
++	TABLE*	table	= (TABLE *) tables->table;
++	int	status	= 0;
++	int	i;
++	Field**	fields;
++
++
++	DBUG_ENTER("innodb_patches_fill");
++	fields = table->field;
++
++	/* deny access to non-superusers */
++	if (check_global_access(thd, PROCESS_ACL)) {
++
++		DBUG_RETURN(0);
++	}
++
++	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++	
++	for (i = 0; innodb_enhancements[i].file; i++) {
++
++   	field_store_string(fields[0],innodb_enhancements[i].file);
++   	field_store_string(fields[1],innodb_enhancements[i].name);
++   	field_store_string(fields[2],innodb_enhancements[i].comment);
++   	field_store_string(fields[3],innodb_enhancements[i].link);
++
++	if (schema_table_store_record(thd, table)) {
++		status = 1;
++		break;
++	}
++
++	}
++
++
++	DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_patches. */
++static
++int
++innodb_patches_init(
++/*=========*/
++			/* out: 0 on success */
++	void*	p)	/* in/out: table schema object */
++{
++	DBUG_ENTER("innodb_patches_init");
++	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++	schema->fields_info = innodb_patches_fields_info;
++	schema->fill_table = innodb_patches_fill;
++
++	DBUG_RETURN(0);
++}
++
++
++UNIV_INTERN struct st_mysql_plugin      i_s_innodb_patches =
++{
++        /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++        /* int */
++        STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++        /* pointer to type-specific plugin descriptor */
++        /* void* */
++        STRUCT_FLD(info, &i_s_info),
++
++        /* plugin name */
++        /* const char* */
++        STRUCT_FLD(name, "XTRADB_ENHANCEMENTS"),
++
++        /* plugin author (for SHOW PLUGINS) */
++        /* const char* */
++        STRUCT_FLD(author, "Percona"),
++
++        /* general descriptive text (for SHOW PLUGINS) */
++        /* const char* */
++        STRUCT_FLD(descr, "Enhancements applied to InnoDB plugin"),
++
++        /* the plugin license (PLUGIN_LICENSE_XXX) */
++        /* int */
++        STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++        /* the function to invoke when plugin is loaded */
++        /* int (*)(void*); */
++        STRUCT_FLD(init, innodb_patches_init),
++
++        /* the function to invoke when plugin is unloaded */
++        /* int (*)(void*); */
++        STRUCT_FLD(deinit, i_s_common_deinit),
++
++        /* plugin version (for SHOW PLUGINS) */
++        /* unsigned int */
++        STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++        /* struct st_mysql_show_var* */
++        STRUCT_FLD(status_vars, NULL),
++
++        /* struct st_mysql_sys_var** */
++        STRUCT_FLD(system_vars, NULL),
++
++        /* reserved for dependency checking */
++        /* void* */
++        STRUCT_FLD(__reserved1, NULL)
++};
++
++
+ /* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */
+ static ST_FIELD_INFO	innodb_trx_fields_info[] =
+ {
+@@ -399,10 +562,6 @@
+ 	DBUG_RETURN(0);
+ }
+ 
+-static struct st_mysql_information_schema	i_s_info =
+-{
+-	MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
+-};
+ 
+ UNIV_INTERN struct st_mysql_plugin	i_s_innodb_trx =
+ {
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.h innodb_plugin-1.0.3_tmp/handler/i_s.h
+--- innodb_plugin-1.0.3_orig/handler/i_s.h	2009-02-17 17:25:45.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.h	2009-03-18 17:14:19.000000000 +0900
+@@ -32,5 +32,6 @@
+ extern struct st_mysql_plugin	i_s_innodb_cmp_reset;
+ extern struct st_mysql_plugin	i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin	i_s_innodb_cmpmem_reset;
++extern struct st_mysql_plugin	i_s_innodb_patches;
+ 
+ #endif /* i_s_h */
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	1970-01-01 09:00:00.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-18 17:14:19.000000000 +0900
+@@ -0,0 +1,28 @@
++/* Copyright (C) 2002-2006 MySQL AB
++  
++   This program is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; version 2 of the License.
++  
++   This program is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
++
++#ifdef USE_PRAGMA_INTERFACE
++#pragma interface                      /* gcc class implementation */
++#endif
++
++struct innodb_enhancement {
++       const char *file;
++       const char *name;
++       const char *comment;
++       const char *link;
++}innodb_enhancements[] = {
++{"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{NULL, NULL, NULL, NULL}
++};
+diff -ruN innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql
+--- innodb_plugin-1.0.3_orig/scripts/install_innodb_plugins.sql	2008-10-30 19:38:18.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/scripts/install_innodb_plugins.sql	2009-03-18 17:14:19.000000000 +0900
+@@ -7,3 +7,4 @@
+ INSTALL PLUGIN innodb_cmp_reset SONAME 'ha_innodb.so';
+ INSTALL PLUGIN innodb_cmpmem SONAME 'ha_innodb.so';
+ INSTALL PLUGIN innodb_cmpmem_reset SONAME 'ha_innodb.so';
++INSTALL PLUGIN XTRADB_ENHANCEMENTS SONAME 'ha_innodb.so';

=== added file 'innodb_show_status.patch'
--- innodb_show_status.patch	1970-01-01 00:00:00 +0000
+++ innodb_show_status.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,493 @@
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buf.c innodb_plugin-1.0.3_tmp/buf/buf0buf.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buf.c	2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buf.c	2009-03-18 17:19:36.000000000 +0900
+@@ -3760,13 +3760,15 @@
+ 	buf_pool_mutex_enter();
+ 
+ 	fprintf(file,
+-		"Buffer pool size   %lu\n"
+-		"Free buffers       %lu\n"
+-		"Database pages     %lu\n"
+-		"Modified db pages  %lu\n"
++		"Buffer pool size        %lu\n"
++		"Buffer pool size, bytes %lu\n"
++		"Free buffers            %lu\n"
++		"Database pages          %lu\n"
++		"Modified db pages       %lu\n"
+ 		"Pending reads %lu\n"
+ 		"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
+ 		(ulong) size,
++		(ulong) size * UNIV_PAGE_SIZE,
+ 		(ulong) UT_LIST_GET_LEN(buf_pool->free),
+ 		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+ 		(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
+diff -ruN innodb_plugin-1.0.3_orig/fil/fil0fil.c innodb_plugin-1.0.3_tmp/fil/fil0fil.c
+--- innodb_plugin-1.0.3_orig/fil/fil0fil.c	2009-02-17 17:15:06.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/fil/fil0fil.c	2009-03-18 17:19:36.000000000 +0900
+@@ -4788,3 +4788,30 @@
+ 
+ 	return(mach_read_from_2(page + FIL_PAGE_TYPE));
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void)
++/*=======================*/
++{
++       if (fil_system) {
++               return (fil_system->spaces->n_cells
++                       + fil_system->name_hash->n_cells);
++       } else {
++               return 0;
++       }
++}
++
++ulint
++fil_system_hash_nodes(void)
++/*=======================*/
++{
++       if (fil_system) {
++               return (UT_LIST_GET_LEN(fil_system->space_list)
++                       * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE));
++       } else {
++               return 0;
++       }
++}
+diff -ruN innodb_plugin-1.0.3_orig/handler/ha_innodb.cc innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc
+--- innodb_plugin-1.0.3_orig/handler/ha_innodb.cc	2009-03-18 17:18:40.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/ha_innodb.cc	2009-03-18 17:19:36.000000000 +0900
+@@ -9542,6 +9542,16 @@
+   "Force InnoDB to not use next-key locking, to use only row-level locking.",
+   NULL, NULL, FALSE);
+ 
++static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks,
++  PLUGIN_VAR_OPCMDARG,
++  "Whether to show records locked in SHOW INNODB STATUS.",
++  NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held,
++  PLUGIN_VAR_RQCMDARG,
++  "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.",
++  NULL, NULL, 10, 0, 1000, 0);
++
+ #ifdef UNIV_LOG_ARCHIVE
+ static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
+   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -9692,7 +9702,7 @@
+ 
+ static MYSQL_SYSVAR_STR(version, innodb_version_str,
+   PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
+-  "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
++  "Percona-InnoDB-plugin version", NULL, NULL, INNODB_VERSION_STR);
+ 
+ static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
+   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+@@ -9751,6 +9761,8 @@
+   MYSQL_SYSVAR(thread_concurrency),
+   MYSQL_SYSVAR(thread_sleep_delay),
+   MYSQL_SYSVAR(autoinc_lock_mode),
++  MYSQL_SYSVAR(show_verbose_locks),
++  MYSQL_SYSVAR(show_locks_held),
+   MYSQL_SYSVAR(version),
+   MYSQL_SYSVAR(use_sys_malloc),
+   MYSQL_SYSVAR(change_buffering),
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-18 17:18:40.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-18 17:19:36.000000000 +0900
+@@ -24,5 +24,6 @@
+        const char *link;
+ }innodb_enhancements[] = {
+ {"xtradb_show_enhancements","I_S.XTRADB_ENHANCEMENTS","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_show_status","Improvements to SHOW INNODB STATUS","Memory information and lock info fixes","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/fil0fil.h innodb_plugin-1.0.3_tmp/include/fil0fil.h
+--- innodb_plugin-1.0.3_orig/include/fil0fil.h	2009-02-17 18:06:49.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/fil0fil.h	2009-03-18 17:19:36.000000000 +0900
+@@ -695,6 +695,16 @@
+ 				return value not defined */
+ 	const byte*	page);	/* in: file page */
+ 
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void);
++/*========================*/
++
++ulint
++fil_system_hash_nodes(void);
++/*========================*/
+ 
+ typedef	struct fil_space_struct	fil_space_t;
+ 
+diff -ruN innodb_plugin-1.0.3_orig/include/srv0srv.h innodb_plugin-1.0.3_tmp/include/srv0srv.h
+--- innodb_plugin-1.0.3_orig/include/srv0srv.h	2009-02-25 19:09:15.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/srv0srv.h	2009-03-18 17:19:36.000000000 +0900
+@@ -112,6 +112,9 @@
+ extern ulint	srv_log_buffer_size;
+ extern ulong	srv_flush_log_at_trx_commit;
+ 
++extern ulint    srv_show_locks_held;
++extern ulint    srv_show_verbose_locks;
++
+ /* The sort order table of the MySQL latin1_swedish_ci character set
+ collation */
+ extern const byte*	srv_latin1_ordering;
+diff -ruN innodb_plugin-1.0.3_orig/include/thr0loc.h innodb_plugin-1.0.3_tmp/include/thr0loc.h
+--- innodb_plugin-1.0.3_orig/include/thr0loc.h	2009-02-17 18:39:11.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/thr0loc.h	2009-03-18 17:19:36.000000000 +0900
+@@ -76,6 +76,17 @@
+ /*=============================*/
+ 			/* out: pointer to the in_ibuf field */
+ 
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void);
++/*=======================*/
++
++ulint
++thr_local_hash_nodes(void);
++/*=======================*/
++
+ #ifndef UNIV_NONINL
+ #include "thr0loc.ic"
+ #endif
+diff -ruN innodb_plugin-1.0.3_orig/include/univ.i innodb_plugin-1.0.3_tmp/include/univ.i
+--- innodb_plugin-1.0.3_orig/include/univ.i	2009-03-05 23:38:59.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/univ.i	2009-03-18 17:37:39.000000000 +0900
+@@ -35,6 +35,7 @@
+ #define INNODB_VERSION_MAJOR	1
+ #define INNODB_VERSION_MINOR	0
+ #define INNODB_VERSION_BUGFIX	3
++#define PERCONA_INNODB_VERSION	3
+ 
+ /* The following is the InnoDB version as shown in
+ SELECT plugin_version FROM information_schema.plugins;
+@@ -46,13 +47,14 @@
+ 	(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+ 
+ /* auxiliary macros to help creating the version as string */
+-#define __INNODB_VERSION(a, b, c)	(#a "." #b "." #c)
+-#define _INNODB_VERSION(a, b, c)	__INNODB_VERSION(a, b, c)
++#define __INNODB_VERSION(a, b, c, d)	(#a "." #b "." #c "-" #d)
++#define _INNODB_VERSION(a, b, c, d)	__INNODB_VERSION(a, b, c, d)
+ 
+ #define INNODB_VERSION_STR			\
+ 	_INNODB_VERSION(INNODB_VERSION_MAJOR,	\
+ 			INNODB_VERSION_MINOR,	\
+-			INNODB_VERSION_BUGFIX)
++			INNODB_VERSION_BUGFIX,	\
++			PERCONA_INNODB_VERSION)
+ 
+ #ifdef MYSQL_DYNAMIC_PLUGIN
+ /* In the dynamic plugin, redefine some externally visible symbols
+diff -ruN innodb_plugin-1.0.3_orig/lock/lock0lock.c innodb_plugin-1.0.3_tmp/lock/lock0lock.c
+--- innodb_plugin-1.0.3_orig/lock/lock0lock.c	2009-02-17 18:50:12.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/lock/lock0lock.c	2009-03-18 17:19:36.000000000 +0900
+@@ -4319,32 +4319,32 @@
+ 
+ 	putc('\n', file);
+ 
+-	block = buf_page_try_get(space, page_no, &mtr);
+-
+-	if (block) {
+-		for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+-
+-			if (lock_rec_get_nth_bit(lock, i)) {
+-
+-				const rec_t*	rec
+-					= page_find_rec_with_heap_no(
+-						buf_block_get_frame(block), i);
+-				offsets = rec_get_offsets(
+-					rec, lock->index, offsets,
+-					ULINT_UNDEFINED, &heap);
+-
+-				fprintf(file, "Record lock, heap no %lu ",
+-					(ulong) i);
+-				rec_print_new(file, rec, offsets);
+-				putc('\n', file);
++	if ( srv_show_verbose_locks ) {
++		block = buf_page_try_get(space, page_no, &mtr);
++		if (block) {
++			for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
++
++				if (lock_rec_get_nth_bit(lock, i)) {
++
++					const rec_t*	rec
++						= page_find_rec_with_heap_no(
++								buf_block_get_frame(block), i);
++					offsets = rec_get_offsets(
++							rec, lock->index, offsets,
++							ULINT_UNDEFINED, &heap);
++
++					fprintf(file, "Record lock, heap no %lu ",
++							(ulong) i);
++					rec_print_new(file, rec, offsets);
++					putc('\n', file);
++				}
++			}
++		} else {
++			for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
++				fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
+ 			}
+-		}
+-	} else {
+-		for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+-			fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
+ 		}
+ 	}
+-
+ 	mtr_commit(&mtr);
+ 	if (UNIV_LIKELY_NULL(heap)) {
+ 		mem_heap_free(heap);
+@@ -4523,7 +4523,7 @@
+ 		}
+ 	}
+ 
+-	if (!srv_print_innodb_lock_monitor) {
++        if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) {
+ 		nth_trx++;
+ 		goto loop;
+ 	}
+@@ -4582,8 +4582,8 @@
+ 
+ 	nth_lock++;
+ 
+-	if (nth_lock >= 10) {
+-		fputs("10 LOCKS PRINTED FOR THIS TRX:"
++	if (nth_lock >= srv_show_locks_held) {
++		fputs("TOO LOCKS PRINTED FOR THIS TRX:"
+ 		      " SUPPRESSING FURTHER PRINTS\n",
+ 		      file);
+ 
+diff -ruN innodb_plugin-1.0.3_orig/srv/srv0srv.c innodb_plugin-1.0.3_tmp/srv/srv0srv.c
+--- innodb_plugin-1.0.3_orig/srv/srv0srv.c	2009-02-25 19:09:15.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/srv/srv0srv.c	2009-03-18 17:19:36.000000000 +0900
+@@ -156,6 +156,10 @@
+ UNIV_INTERN ulint	srv_log_buffer_size	= ULINT_MAX;
+ UNIV_INTERN ulong	srv_flush_log_at_trx_commit = 1;
+ 
++UNIV_INTERN ulint  srv_show_locks_held     = 10;
++UNIV_INTERN ulint  srv_show_verbose_locks  = 0;
++
++
+ /* The sort order table of the MySQL latin1_swedish_ci character set
+ collation */
+ UNIV_INTERN const byte*	srv_latin1_ordering;
+@@ -1644,6 +1648,14 @@
+ 	time_t	current_time;
+ 	ulint	n_reserved;
+ 
++	ulint	btr_search_sys_subtotal;
++	ulint	lock_sys_subtotal;
++	ulint	recv_sys_subtotal;
++	ulint	io_counter_subtotal;
++
++	ulint	i;
++	trx_t*	trx;
++
+ 	mutex_enter(&srv_innodb_monitor_mutex);
+ 
+ 	current_time = time(NULL);
+@@ -1687,24 +1699,6 @@
+ 
+ 	mutex_exit(&dict_foreign_err_mutex);
+ 
+-	lock_print_info_summary(file);
+-	if (trx_start) {
+-		long	t = ftell(file);
+-		if (t < 0) {
+-			*trx_start = ULINT_UNDEFINED;
+-		} else {
+-			*trx_start = (ulint) t;
+-		}
+-	}
+-	lock_print_info_all_transactions(file);
+-	if (trx_end) {
+-		long	t = ftell(file);
+-		if (t < 0) {
+-			*trx_end = ULINT_UNDEFINED;
+-		} else {
+-			*trx_end = (ulint) t;
+-		}
+-	}
+ 	fputs("--------\n"
+ 	      "FILE I/O\n"
+ 	      "--------\n", file);
+@@ -1735,10 +1729,84 @@
+ 	      "BUFFER POOL AND MEMORY\n"
+ 	      "----------------------\n", file);
+ 	fprintf(file,
+-		"Total memory allocated " ULINTPF
+-		"; in additional pool allocated " ULINTPF "\n",
+-		ut_total_allocated_memory,
+-		mem_pool_get_reserved(mem_comm_pool));
++			"Total memory allocated " ULINTPF
++			"; in additional pool allocated " ULINTPF "\n",
++			ut_total_allocated_memory,
++			mem_pool_get_reserved(mem_comm_pool));
++	/* Calcurate reserved memories */
++	if (btr_search_sys && btr_search_sys->hash_index->heap) {
++		btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap);
++	} else {
++		btr_search_sys_subtotal = 0;
++		for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) {
++			btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]);
++		}
++	}
++
++	lock_sys_subtotal = 0;
++	if (trx_sys) {
++		mutex_enter(&kernel_mutex);
++		trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
++		while (trx) {
++			lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0);
++			trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
++		}
++		mutex_exit(&kernel_mutex);
++	}
++
++	recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
++			? mem_heap_get_size(recv_sys->heap) : 0);
++
++	fprintf(file,
++			"Internal hash tables (constant factor + variable factor)\n"
++			"    Adaptive hash index %lu \t(%lu + %lu)\n"
++			"    Page hash           %lu\n"
++			"    Dictionary cache    %lu \t(%lu + %lu)\n"
++			"    File system         %lu \t(%lu + %lu)\n"
++			"    Lock system         %lu \t(%lu + %lu)\n"
++			"    Recovery system     %lu \t(%lu + %lu)\n"
++			"    Threads             %lu \t(%lu + %lu)\n",
++
++			(ulong) (btr_search_sys
++				? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0)
++			+ btr_search_sys_subtotal,
++			(ulong) (btr_search_sys
++				? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0),
++			(ulong) btr_search_sys_subtotal,
++
++			(ulong) (buf_pool->page_hash->n_cells * sizeof(hash_cell_t)),
++
++			(ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++						+ dict_sys->table_id_hash->n_cells
++						) * sizeof(hash_cell_t)
++					+ dict_sys->size) : 0),
++			(ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++							+ dict_sys->table_id_hash->n_cells
++							) * sizeof(hash_cell_t)) : 0),
++			(ulong) (dict_sys ? (dict_sys->size) : 0),
++
++			(ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
++					+ fil_system_hash_nodes()),
++			(ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
++			(ulong) fil_system_hash_nodes(),
++
++			(ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
++					+ lock_sys_subtotal),
++			(ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
++			(ulong) lock_sys_subtotal,
++
++			(ulong) (((recv_sys && recv_sys->addr_hash)
++						? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
++					+ recv_sys_subtotal),
++			(ulong) ((recv_sys && recv_sys->addr_hash)
++					? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
++			(ulong) recv_sys_subtotal,
++
++			(ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)
++					+ thr_local_hash_nodes()),
++			(ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)),
++			(ulong) thr_local_hash_nodes());
++
+ 	fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
+ 		dict_sys->size);
+ 
+@@ -1797,6 +1865,25 @@
+ 	srv_n_rows_deleted_old = srv_n_rows_deleted;
+ 	srv_n_rows_read_old = srv_n_rows_read;
+ 
++	lock_print_info_summary(file);
++	if (trx_start) {
++		long	t = ftell(file);
++		if (t < 0) {
++			*trx_start = ULINT_UNDEFINED;
++		} else {
++			*trx_start = (ulint) t;
++		}
++	}
++	lock_print_info_all_transactions(file);
++	if (trx_end) {
++		long	t = ftell(file);
++		if (t < 0) {
++			*trx_end = ULINT_UNDEFINED;
++		} else {
++			*trx_end = (ulint) t;
++		}
++	}
++
+ 	fputs("----------------------------\n"
+ 	      "END OF INNODB MONITOR OUTPUT\n"
+ 	      "============================\n", file);
+diff -ruN innodb_plugin-1.0.3_orig/thr/thr0loc.c innodb_plugin-1.0.3_tmp/thr/thr0loc.c
+--- innodb_plugin-1.0.3_orig/thr/thr0loc.c	2009-02-17 19:09:16.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/thr/thr0loc.c	2009-03-18 17:19:36.000000000 +0900
+@@ -48,6 +48,7 @@
+ 
+ /* The hash table. The module is not yet initialized when it is NULL. */
+ static hash_table_t*	thr_local_hash	= NULL;
++ulint		thr_local_hash_n_nodes = 0;
+ 
+ /* The private data for each thread should be put to
+ the structure below and the accessor functions written
+@@ -193,6 +194,7 @@
+ 		    os_thread_pf(os_thread_get_curr_id()),
+ 		    local);
+ 
++	thr_local_hash_n_nodes++;
+ 	mutex_exit(&thr_local_mutex);
+ }
+ 
+@@ -220,6 +222,7 @@
+ 
+ 	HASH_DELETE(thr_local_t, hash, thr_local_hash,
+ 		    os_thread_pf(id), local);
++	thr_local_hash_n_nodes--;
+ 
+ 	mutex_exit(&thr_local_mutex);
+ 
+@@ -242,3 +245,29 @@
+ 
+ 	mutex_create(&thr_local_mutex, SYNC_THR_LOCAL);
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void)
++/*======================*/
++{
++	if (thr_local_hash) {
++		return (thr_local_hash->n_cells);
++	} else {
++		return 0;
++	}
++}
++
++ulint
++thr_local_hash_nodes(void)
++/*======================*/
++{
++	if (thr_local_hash) {
++		return (thr_local_hash_n_nodes
++			* (sizeof(thr_local_t) + MEM_BLOCK_HEADER_SIZE));
++	} else {
++		return 0;
++	}
++}

=== added file 'innodb_split_buf_pool_mutex.patch'
--- innodb_split_buf_pool_mutex.patch	1970-01-01 00:00:00 +0000
+++ innodb_split_buf_pool_mutex.patch	2009-03-20 05:35:54 +0000
@@ -0,0 +1,3356 @@
+diff -ruN innodb_plugin-1.0.3_orig/btr/btr0cur.c innodb_plugin-1.0.3_tmp/btr/btr0cur.c
+--- innodb_plugin-1.0.3_orig/btr/btr0cur.c	2009-02-27 06:27:51.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/btr/btr0cur.c	2009-03-19 18:51:20.000000000 +0900
+@@ -3733,7 +3733,8 @@
+ 
+ 	mtr_commit(mtr);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 	mutex_enter(&block->mutex);
+ 
+ 	/* Only free the block if it is still allocated to
+@@ -3744,17 +3745,22 @@
+ 	    && buf_block_get_space(block) == space
+ 	    && buf_block_get_page_no(block) == page_no) {
+ 
+-		if (buf_LRU_free_block(&block->page, all, NULL)
++		if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
+ 		    != BUF_LRU_FREED
+-		    && all && block->page.zip.data) {
++		    && all && block->page.zip.data
++		    /* Now, buf_LRU_free_block() may release mutex temporarily */
++		    && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
++		    && buf_block_get_space(block) == space
++		    && buf_block_get_page_no(block) == page_no) {
+ 			/* Attempt to deallocate the uncompressed page
+ 			if the whole block cannot be deallocted. */
+ 
+-			buf_LRU_free_block(&block->page, FALSE, NULL);
++			buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ 	mutex_exit(&block->mutex);
+ }
+ 
+diff -ruN innodb_plugin-1.0.3_orig/btr/btr0sea.c innodb_plugin-1.0.3_tmp/btr/btr0sea.c
+--- innodb_plugin-1.0.3_orig/btr/btr0sea.c	2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/btr/btr0sea.c	2009-03-19 18:51:20.000000000 +0900
+@@ -1731,7 +1731,8 @@
+ 	rec_offs_init(offsets_);
+ 
+ 	rw_lock_x_lock(&btr_search_latch);
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_x_lock(&page_hash_latch);
+ 
+ 	cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+ 
+@@ -1739,11 +1740,13 @@
+ 		/* We release btr_search_latch every once in a while to
+ 		give other queries a chance to run. */
+ 		if ((i != 0) && ((i % chunk_size) == 0)) {
+-			buf_pool_mutex_exit();
++			//buf_pool_mutex_exit();
++			rw_lock_x_unlock(&page_hash_latch);
+ 			rw_lock_x_unlock(&btr_search_latch);
+ 			os_thread_yield();
+ 			rw_lock_x_lock(&btr_search_latch);
+-			buf_pool_mutex_enter();
++			//buf_pool_mutex_enter();
++			rw_lock_x_lock(&page_hash_latch);
+ 		}
+ 
+ 		node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+@@ -1850,11 +1853,13 @@
+ 		/* We release btr_search_latch every once in a while to
+ 		give other queries a chance to run. */
+ 		if (i != 0) {
+-			buf_pool_mutex_exit();
++			//buf_pool_mutex_exit();
++			rw_lock_x_unlock(&page_hash_latch);
+ 			rw_lock_x_unlock(&btr_search_latch);
+ 			os_thread_yield();
+ 			rw_lock_x_lock(&btr_search_latch);
+-			buf_pool_mutex_enter();
++			//buf_pool_mutex_enter();
++			rw_lock_x_lock(&page_hash_latch);
+ 		}
+ 
+ 		if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+@@ -1862,7 +1867,8 @@
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_x_unlock(&page_hash_latch);
+ 	rw_lock_x_unlock(&btr_search_latch);
+ 	if (UNIV_LIKELY_NULL(heap)) {
+ 		mem_heap_free(heap);
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buddy.c innodb_plugin-1.0.3_tmp/buf/buf0buddy.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buddy.c	2009-02-17 17:12:02.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buddy.c	2009-03-19 18:51:20.000000000 +0900
+@@ -131,14 +131,15 @@
+ {
+ 	buf_page_t*	bpage;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&zip_free_mutex));
+ 	ut_a(i < BUF_BUDDY_SIZES);
+ 
+ #if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
+ 	/* Valgrind would complain about accessing free memory. */
+ 	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]);
+ #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
+-	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+ 
+ 	if (bpage) {
+ 		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+@@ -177,16 +178,19 @@
+ void
+ buf_buddy_block_free(
+ /*=================*/
+-	void*	buf)	/* in: buffer frame to deallocate */
++	void*	buf,	/* in: buffer frame to deallocate */
++	ibool	have_page_hash_mutex)
+ {
+ 	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
+ 	buf_page_t*	bpage;
+ 	buf_block_t*	block;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ 	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
+ 
++	mutex_enter(&zip_hash_mutex);
++
+ 	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+ 		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+ 			  && bpage->in_zip_hash && !bpage->in_page_hash),
+@@ -198,12 +202,14 @@
+ 	ut_d(bpage->in_zip_hash = FALSE);
+ 	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
+ 
++	mutex_exit(&zip_hash_mutex);
++
+ 	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+ 	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+ 
+ 	block = (buf_block_t*) bpage;
+ 	mutex_enter(&block->mutex);
+-	buf_LRU_block_free_non_file_page(block);
++	buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ 	mutex_exit(&block->mutex);
+ 
+ 	ut_ad(buf_buddy_n_frames > 0);
+@@ -219,7 +225,7 @@
+ 	buf_block_t*	block)	/* in: buffer frame to allocate */
+ {
+ 	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ 
+ 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
+@@ -230,7 +236,10 @@
+ 	ut_ad(!block->page.in_page_hash);
+ 	ut_ad(!block->page.in_zip_hash);
+ 	ut_d(block->page.in_zip_hash = TRUE);
++
++	mutex_enter(&zip_hash_mutex);
+ 	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
++	mutex_exit(&zip_hash_mutex);
+ 
+ 	ut_d(buf_buddy_n_frames++);
+ }
+@@ -284,24 +293,28 @@
+ 			possibly NULL if lru==NULL */
+ 	ulint	i,	/* in: index of buf_pool->zip_free[],
+ 			or BUF_BUDDY_SIZES */
+-	ibool*	lru)	/* in: pointer to a variable that will be assigned
++	ibool*	lru,	/* in: pointer to a variable that will be assigned
+ 			TRUE if storage was allocated from the LRU list
+ 			and buf_pool_mutex was temporarily released,
+ 			or NULL if the LRU list should not be used */
++	ibool	have_page_hash_mutex)
+ {
+ 	buf_block_t*	block;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ 
+ 	if (i < BUF_BUDDY_SIZES) {
+ 		/* Try to allocate from the buddy system. */
++		mutex_enter(&zip_free_mutex);
+ 		block = buf_buddy_alloc_zip(i);
+ 
+ 		if (block) {
+ 
+ 			goto func_exit;
+ 		}
++
++		mutex_exit(&zip_free_mutex);
+ 	}
+ 
+ 	/* Try allocating from the buf_pool->free list. */
+@@ -318,18 +331,31 @@
+ 	}
+ 
+ 	/* Try replacing an uncompressed page in the buffer pool. */
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
++	if (have_page_hash_mutex) {
++		mutex_exit(&flush_list_mutex);
++		rw_lock_x_unlock(&page_hash_latch);
++	}
+ 	block = buf_LRU_get_free_block(0);
+ 	*lru = TRUE;
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	if (have_page_hash_mutex) {
++		mutex_enter(&flush_list_mutex);
++		rw_lock_x_lock(&page_hash_latch);
++	}
+ 
+ alloc_big:
+ 	buf_buddy_block_register(block);
+ 
++	mutex_enter(&zip_free_mutex);
+ 	block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
+ 
+ func_exit:
+ 	buf_buddy_stat[i].used++;
++	mutex_exit(&zip_free_mutex);
++
+ 	return(block);
+ }
+ 
+@@ -345,7 +371,8 @@
+ {
+ 	buf_page_t*	b;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&flush_list_mutex));
+ 
+ 	switch (buf_page_get_state(bpage)) {
+ 	case BUF_BLOCK_ZIP_FREE:
+@@ -354,7 +381,7 @@
+ 	case BUF_BLOCK_FILE_PAGE:
+ 	case BUF_BLOCK_MEMORY:
+ 	case BUF_BLOCK_REMOVE_HASH:
+-		ut_error;
++		/* ut_error; */ /* optimistic */
+ 	case BUF_BLOCK_ZIP_DIRTY:
+ 		/* Cannot relocate dirty pages. */
+ 		return(FALSE);
+@@ -364,9 +391,17 @@
+ 	}
+ 
+ 	mutex_enter(&buf_pool_zip_mutex);
++	mutex_enter(&zip_free_mutex);
+ 
+ 	if (!buf_page_can_relocate(bpage)) {
+ 		mutex_exit(&buf_pool_zip_mutex);
++		mutex_exit(&zip_free_mutex);
++		return(FALSE);
++	}
++
++	if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
++		mutex_exit(&buf_pool_zip_mutex);
++		mutex_exit(&zip_free_mutex);
+ 		return(FALSE);
+ 	}
+ 
+@@ -384,6 +419,7 @@
+ 	}
+ 
+ 	mutex_exit(&buf_pool_zip_mutex);
++	mutex_exit(&zip_free_mutex);
+ 	return(TRUE);
+ }
+ 
+@@ -396,13 +432,15 @@
+ 			/* out: TRUE if relocated */
+ 	void*	src,	/* in: block to relocate */
+ 	void*	dst,	/* in: free block to relocate to */
+-	ulint	i)	/* in: index of buf_pool->zip_free[] */
++	ulint	i,	/* in: index of buf_pool->zip_free[] */
++	ibool	have_page_hash_mutex)
+ {
+ 	buf_page_t*	bpage;
+ 	const ulint	size	= BUF_BUDDY_LOW << i;
+ 	ullint		usec	= ut_time_us(NULL);
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&zip_free_mutex));
+ 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ 	ut_ad(!ut_align_offset(src, size));
+ 	ut_ad(!ut_align_offset(dst, size));
+@@ -421,9 +459,17 @@
+ 	actually is a properly initialized buf_page_t object. */
+ 
+ 	if (size >= PAGE_ZIP_MIN_SIZE) {
++		if (!have_page_hash_mutex)
++			mutex_exit(&zip_free_mutex);
++
+ 		/* This is a compressed page. */
+ 		mutex_t*	mutex;
+ 
++		if (!have_page_hash_mutex) {
++			mutex_enter(&LRU_list_mutex);
++			mutex_enter(&flush_list_mutex);
++			rw_lock_x_lock(&page_hash_latch);
++		}
+ 		/* The src block may be split into smaller blocks,
+ 		some of which may be free.  Thus, the
+ 		mach_read_from_4() calls below may attempt to read
+@@ -444,6 +490,12 @@
+ 			added to buf_pool->page_hash yet.  Obviously,
+ 			it cannot be relocated. */
+ 
++			if (!have_page_hash_mutex) {
++				mutex_enter(&zip_free_mutex);
++				mutex_exit(&LRU_list_mutex);
++				mutex_exit(&flush_list_mutex);
++				rw_lock_x_unlock(&page_hash_latch);
++			}
+ 			return(FALSE);
+ 		}
+ 
+@@ -453,9 +505,19 @@
+ 			For the sake of simplicity, give up. */
+ 			ut_ad(page_zip_get_size(&bpage->zip) < size);
+ 
++			if (!have_page_hash_mutex) {
++				mutex_enter(&zip_free_mutex);
++				mutex_exit(&LRU_list_mutex);
++				mutex_exit(&flush_list_mutex);
++				rw_lock_x_unlock(&page_hash_latch);
++			}
+ 			return(FALSE);
+ 		}
+ 
++		/* To keep latch order */
++		if (have_page_hash_mutex)
++			mutex_exit(&zip_free_mutex);
++
+ 		/* The block must have been allocated, but it may
+ 		contain uninitialized data. */
+ 		UNIV_MEM_ASSERT_W(src, size);
+@@ -463,6 +525,7 @@
+ 		mutex = buf_page_get_mutex(bpage);
+ 
+ 		mutex_enter(mutex);
++		mutex_enter(&zip_free_mutex);
+ 
+ 		if (buf_page_can_relocate(bpage)) {
+ 			/* Relocate the compressed page. */
+@@ -479,17 +542,53 @@
+ 				buddy_stat->relocated_usec
+ 					+= ut_time_us(NULL) - usec;
+ 			}
++
++			if (!have_page_hash_mutex) {
++				mutex_exit(&LRU_list_mutex);
++				mutex_exit(&flush_list_mutex);
++				rw_lock_x_unlock(&page_hash_latch);
++			}
+ 			return(TRUE);
+ 		}
+ 
++		if (!have_page_hash_mutex) {
++			mutex_exit(&LRU_list_mutex);
++			mutex_exit(&flush_list_mutex);
++			rw_lock_x_unlock(&page_hash_latch);
++		}
++
+ 		mutex_exit(mutex);
+ 	} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
+ 		/* This must be a buf_page_t object. */
+ 		UNIV_MEM_ASSERT_RW(src, size);
++
++		mutex_exit(&zip_free_mutex);
++
++		if (!have_page_hash_mutex) {
++			mutex_enter(&LRU_list_mutex);
++			mutex_enter(&flush_list_mutex);
++			rw_lock_x_lock(&page_hash_latch);
++		}
++
+ 		if (buf_buddy_relocate_block(src, dst)) {
++			mutex_enter(&zip_free_mutex);
++
++			if (!have_page_hash_mutex) {
++				mutex_exit(&LRU_list_mutex);
++				mutex_exit(&flush_list_mutex);
++				rw_lock_x_unlock(&page_hash_latch);
++			}
+ 
+ 			goto success;
+ 		}
++
++		mutex_enter(&zip_free_mutex);
++
++		if (!have_page_hash_mutex) {
++			mutex_exit(&LRU_list_mutex);
++			mutex_exit(&flush_list_mutex);
++			rw_lock_x_unlock(&page_hash_latch);
++		}
+ 	}
+ 
+ 	return(FALSE);
+@@ -503,12 +602,14 @@
+ /*===============*/
+ 	void*	buf,	/* in: block to be freed, must not be
+ 			pointed to by the buffer pool */
+-	ulint	i)	/* in: index of buf_pool->zip_free[] */
++	ulint	i,	/* in: index of buf_pool->zip_free[] */
++	ibool	have_page_hash_mutex)
+ {
+ 	buf_page_t*	bpage;
+ 	buf_page_t*	buddy;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&zip_free_mutex));
+ 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+ 	ut_ad(i <= BUF_BUDDY_SIZES);
+ 	ut_ad(buf_buddy_stat[i].used > 0);
+@@ -519,7 +620,7 @@
+ 	ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+ 
+ 	if (i == BUF_BUDDY_SIZES) {
+-		buf_buddy_block_free(buf);
++		buf_buddy_block_free(buf, have_page_hash_mutex);
+ 		return;
+ 	}
+ 
+@@ -577,7 +678,7 @@
+ #endif /* UNIV_DEBUG_VALGRIND */
+ 
+ 	/* The buddy is not free. Is there a free block of this size? */
+-	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+ 
+ 	if (bpage) {
+ 		/* Remove the block from the free list, because a successful
+@@ -587,7 +688,7 @@
+ 		buf_buddy_remove_from_free(bpage, i);
+ 
+ 		/* Try to relocate the buddy of buf to the free block. */
+-		if (buf_buddy_relocate(buddy, bpage, i)) {
++		if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
+ 
+ 			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+ 			goto buddy_free2;
+@@ -615,7 +716,7 @@
+ 		}
+ #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
+ 
+-		if (buf_buddy_relocate(buddy, buf, i)) {
++		if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
+ 
+ 			buf = bpage;
+ 			UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0buf.c innodb_plugin-1.0.3_tmp/buf/buf0buf.c
+--- innodb_plugin-1.0.3_orig/buf/buf0buf.c	2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0buf.c	2009-03-19 18:51:20.000000000 +0900
+@@ -244,6 +244,12 @@
+ /* mutex protecting the buffer pool struct and control blocks, except the
+ read-write lock in them */
+ UNIV_INTERN mutex_t		buf_pool_mutex;
++UNIV_INTERN mutex_t		LRU_list_mutex;
++UNIV_INTERN mutex_t		flush_list_mutex;
++UNIV_INTERN rw_lock_t		page_hash_latch;
++UNIV_INTERN mutex_t		free_list_mutex;
++UNIV_INTERN mutex_t		zip_free_mutex;
++UNIV_INTERN mutex_t		zip_hash_mutex;
+ /* mutex protecting the control blocks of compressed-only pages
+ (of type buf_page_t, not buf_block_t) */
+ UNIV_INTERN mutex_t		buf_pool_zip_mutex;
+@@ -664,9 +670,9 @@
+ 	block->page.in_zip_hash = FALSE;
+ 	block->page.in_flush_list = FALSE;
+ 	block->page.in_free_list = FALSE;
++#endif /* UNIV_DEBUG */
+ 	block->page.in_LRU_list = FALSE;
+ 	block->in_unzip_LRU_list = FALSE;
+-#endif /* UNIV_DEBUG */
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ 	block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -751,8 +757,10 @@
+ 		memset(block->frame, '\0', UNIV_PAGE_SIZE);
+ #endif
+ 		/* Add the block to the free list */
++		mutex_enter(&free_list_mutex);
+ 		UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+ 		ut_d(block->page.in_free_list = TRUE);
++		mutex_exit(&free_list_mutex);
+ 
+ 		block++;
+ 		frame += UNIV_PAGE_SIZE;
+@@ -778,7 +786,7 @@
+ 	ulint		i;
+ 
+ 	ut_ad(buf_pool);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 
+ 	block = chunk->blocks;
+ 
+@@ -832,7 +840,7 @@
+ 	ulint		i;
+ 
+ 	ut_ad(buf_pool);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own()); /*optimistic...*/
+ 
+ 	block = chunk->blocks;
+ 
+@@ -865,7 +873,7 @@
+ 	ulint			i;
+ 
+ 	ut_ad(buf_pool);
+-	ut_ad(buf_pool_mutex_own());
++	ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
+ 
+ 	block = chunk->blocks;
+ 
+@@ -891,7 +899,7 @@
+ 	buf_block_t*		block;
+ 	const buf_block_t*	block_end;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
+ 
+ 	block_end = chunk->blocks + chunk->size;
+ 
+@@ -903,8 +911,10 @@
+ 		ut_ad(!block->in_unzip_LRU_list);
+ 		ut_ad(!block->page.in_flush_list);
+ 		/* Remove the block from the free list. */
++		mutex_enter(&free_list_mutex);
+ 		ut_ad(block->page.in_free_list);
+ 		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++		mutex_exit(&free_list_mutex);
+ 
+ 		/* Free the latches. */
+ 		mutex_free(&block->mutex);
+@@ -935,8 +945,18 @@
+ 	/* 1. Initialize general fields
+ 	------------------------------- */
+ 	mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
++	mutex_create(&LRU_list_mutex, SYNC_NO_ORDER_CHECK);
++	mutex_create(&flush_list_mutex, SYNC_NO_ORDER_CHECK);
++	rw_lock_create(&page_hash_latch, SYNC_NO_ORDER_CHECK);
++	mutex_create(&free_list_mutex, SYNC_NO_ORDER_CHECK);
++	mutex_create(&zip_free_mutex, SYNC_NO_ORDER_CHECK);
++	mutex_create(&zip_hash_mutex, SYNC_NO_ORDER_CHECK);
++
+ 	mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
+ 
++	mutex_enter(&LRU_list_mutex);
++	mutex_enter(&flush_list_mutex);
++	rw_lock_x_lock(&page_hash_latch);
+ 	buf_pool_mutex_enter();
+ 
+ 	buf_pool->n_chunks = 1;
+@@ -973,6 +993,9 @@
+ 	--------------------------- */
+ 	/* All fields are initialized by mem_zalloc(). */
+ 
++	mutex_exit(&LRU_list_mutex);
++	mutex_exit(&flush_list_mutex);
++	rw_lock_x_unlock(&page_hash_latch);
+ 	buf_pool_mutex_exit();
+ 
+ 	btr_search_sys_create(buf_pool->curr_size
+@@ -1105,7 +1128,11 @@
+ 	buf_page_t*	b;
+ 	ulint		fold;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
++#endif
+ 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ 	ut_a(bpage->buf_fix_count == 0);
+@@ -1130,7 +1157,7 @@
+ 
+ 	memcpy(dpage, bpage, sizeof *dpage);
+ 
+-	ut_d(bpage->in_LRU_list = FALSE);
++	bpage->in_LRU_list = FALSE;
+ 	ut_d(bpage->in_page_hash = FALSE);
+ 
+ 	/* relocate buf_pool->LRU */
+@@ -1186,7 +1213,8 @@
+ 
+ try_again:
+ 	btr_search_disable(); /* Empty the adaptive hash index again */
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ shrink_again:
+ 	if (buf_pool->n_chunks <= 1) {
+@@ -1257,7 +1285,7 @@
+ 
+ 				buf_LRU_make_block_old(&block->page);
+ 				dirty++;
+-			} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
++			} else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
+ 				   != BUF_LRU_FREED) {
+ 				nonfree++;
+ 			}
+@@ -1265,7 +1293,8 @@
+ 			mutex_exit(&block->mutex);
+ 		}
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
+ 
+ 		/* Request for a flush of the chunk if it helps.
+ 		Do not flush if there are non-free blocks, since
+@@ -1314,7 +1343,8 @@
+ func_done:
+ 	srv_buf_pool_old_size = srv_buf_pool_size;
+ func_exit:
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ 	btr_search_enable();
+ }
+ 
+@@ -1332,7 +1362,11 @@
+ 	hash_table_t*	zip_hash;
+ 	buf_page_t*	b;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	mutex_enter(&flush_list_mutex);
++	rw_lock_x_lock(&page_hash_latch);
++	
+ 
+ 	/* Free, create, and populate the hash table. */
+ 	hash_table_free(buf_pool->page_hash);
+@@ -1412,7 +1446,10 @@
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
++	mutex_exit(&flush_list_mutex);
++	rw_lock_x_unlock(&page_hash_latch);
+ }
+ 
+ /************************************************************************
+@@ -1422,17 +1459,20 @@
+ buf_pool_resize(void)
+ /*=================*/
+ {
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ 	if (srv_buf_pool_old_size == srv_buf_pool_size) {
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
+ 		return;
+ 	}
+ 
+ 	if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
+ 
+ 		/* Disable adaptive hash indexes and empty the index
+ 		in order to free up memory in the buffer pool chunks. */
+@@ -1466,7 +1506,8 @@
+ 		}
+ 
+ 		srv_buf_pool_old_size = srv_buf_pool_size;
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
+ 	}
+ 
+ 	buf_pool_page_hash_rebuild();
+@@ -1488,12 +1529,14 @@
+ 
+ 	if (buf_page_peek_if_too_old(bpage)) {
+ 
+-		buf_pool_mutex_enter();
++		//buf_pool_mutex_enter();
++		mutex_enter(&LRU_list_mutex);
+ 		/* There has been freeing activity in the LRU list:
+ 		best to move to the head of the LRU list */
+ 
+ 		buf_LRU_make_block_young(bpage);
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
+ 	}
+ }
+ 
+@@ -1507,13 +1550,15 @@
+ /*================*/
+ 	buf_page_t*	bpage)	/* in: buffer block of a file page */
+ {
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ 	ut_a(buf_page_in_file(bpage));
+ 
+ 	buf_LRU_make_block_young(bpage);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ }
+ 
+ /************************************************************************
+@@ -1528,7 +1573,8 @@
+ {
+ 	buf_block_t*	block;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_s_lock(&page_hash_latch);
+ 
+ 	block = (buf_block_t*) buf_page_hash_get(space, offset);
+ 
+@@ -1536,7 +1582,8 @@
+ 		block->check_index_page_at_flush = FALSE;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ }
+ 
+ /************************************************************************
+@@ -1555,7 +1602,8 @@
+ 	buf_block_t*	block;
+ 	ibool		is_hashed;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_s_lock(&page_hash_latch);
+ 
+ 	block = (buf_block_t*) buf_page_hash_get(space, offset);
+ 
+@@ -1565,7 +1613,8 @@
+ 		is_hashed = block->is_hashed;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 
+ 	return(is_hashed);
+ }
+@@ -1587,7 +1636,8 @@
+ {
+ 	buf_page_t*	bpage;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_s_lock(&page_hash_latch);
+ 
+ 	bpage = buf_page_hash_get(space, offset);
+ 
+@@ -1595,7 +1645,8 @@
+ 		bpage->file_page_was_freed = TRUE;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 
+ 	return(bpage);
+ }
+@@ -1616,7 +1667,8 @@
+ {
+ 	buf_page_t*	bpage;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_s_lock(&page_hash_latch);
+ 
+ 	bpage = buf_page_hash_get(space, offset);
+ 
+@@ -1624,7 +1676,8 @@
+ 		bpage->file_page_was_freed = FALSE;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 
+ 	return(bpage);
+ }
+@@ -1657,8 +1710,9 @@
+ 	buf_pool->n_page_gets++;
+ 
+ 	for (;;) {
+-		buf_pool_mutex_enter();
++		//buf_pool_mutex_enter();
+ lookup:
++		rw_lock_s_lock(&page_hash_latch);
+ 		bpage = buf_page_hash_get(space, offset);
+ 		if (bpage) {
+ 			break;
+@@ -1666,7 +1720,8 @@
+ 
+ 		/* Page not in buf_pool: needs to be read from file */
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		rw_lock_s_unlock(&page_hash_latch);
+ 
+ 		buf_read_page(space, zip_size, offset);
+ 
+@@ -1677,13 +1732,16 @@
+ 
+ 	if (UNIV_UNLIKELY(!bpage->zip.data)) {
+ 		/* There is no compressed page. */
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		rw_lock_s_unlock(&page_hash_latch);
+ 		return(NULL);
+ 	}
+ 
+ 	block_mutex = buf_page_get_mutex(bpage);
+ 	mutex_enter(block_mutex);
+ 
++	rw_lock_s_unlock(&page_hash_latch);
++
+ 	switch (buf_page_get_state(bpage)) {
+ 	case BUF_BLOCK_NOT_USED:
+ 	case BUF_BLOCK_READY_FOR_USE:
+@@ -1698,7 +1756,7 @@
+ 		break;
+ 	case BUF_BLOCK_FILE_PAGE:
+ 		/* Discard the uncompressed page frame if possible. */
+-		if (buf_LRU_free_block(bpage, FALSE, NULL)
++		if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
+ 		    == BUF_LRU_FREED) {
+ 
+ 			mutex_exit(block_mutex);
+@@ -1712,7 +1770,7 @@
+ 
+ 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ 
+ 	buf_page_set_accessed(bpage, TRUE);
+ 
+@@ -1943,7 +2001,7 @@
+ 	const buf_chunk_t*		chunk	= buf_pool->chunks;
+ 	const buf_chunk_t* const	echunk	= chunk + buf_pool->n_chunks;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 
+ 	if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+ 		/* The pointer should be aligned. */
+@@ -1986,6 +2044,7 @@
+ 	ibool		accessed;
+ 	ulint		fix_type;
+ 	ibool		must_read;
++	mutex_t*	block_mutex;
+ 
+ 	ut_ad(mtr);
+ 	ut_ad((rw_latch == RW_S_LATCH)
+@@ -2001,9 +2060,12 @@
+ 	buf_pool->n_page_gets++;
+ loop:
+ 	block = guess;
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
+ 
+ 	if (block) {
++		block_mutex = buf_page_get_mutex((buf_page_t*)block);
++		mutex_enter(block_mutex);
++
+ 		/* If the guess is a compressed page descriptor that
+ 		has been allocated by buf_buddy_alloc(), it may have
+ 		been invalidated by buf_buddy_relocate().  In that
+@@ -2017,6 +2079,8 @@
+ 		    || space != block->page.space
+ 		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+ 
++			mutex_exit(block_mutex);
++
+ 			block = guess = NULL;
+ 		} else {
+ 			ut_ad(!block->page.in_zip_hash);
+@@ -2025,14 +2089,20 @@
+ 	}
+ 
+ 	if (block == NULL) {
++		rw_lock_s_lock(&page_hash_latch);
+ 		block = (buf_block_t*) buf_page_hash_get(space, offset);
++		if (block) {
++			block_mutex = buf_page_get_mutex((buf_page_t*)block);
++			mutex_enter(block_mutex);
++		}
++		rw_lock_s_unlock(&page_hash_latch);
+ 	}
+ 
+ loop2:
+ 	if (block == NULL) {
+ 		/* Page not in buf_pool: needs to be read from file */
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
+ 
+ 		if (mode == BUF_GET_IF_IN_POOL) {
+ 
+@@ -2053,7 +2123,8 @@
+ 
+ 	if (must_read && mode == BUF_GET_IF_IN_POOL) {
+ 		/* The page is only being read to buffer */
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(block_mutex);
+ 
+ 		return(NULL);
+ 	}
+@@ -2063,10 +2134,16 @@
+ 		ibool		success;
+ 
+ 	case BUF_BLOCK_FILE_PAGE:
++		if (block_mutex == &buf_pool_zip_mutex) {
++			/* it is wrong mutex... */
++			mutex_exit(block_mutex);
++			goto loop;
++		}
+ 		break;
+ 
+ 	case BUF_BLOCK_ZIP_PAGE:
+ 	case BUF_BLOCK_ZIP_DIRTY:
++		ut_ad(block_mutex == &buf_pool_zip_mutex);
+ 		bpage = &block->page;
+ 
+ 		if (bpage->buf_fix_count
+@@ -2077,20 +2154,26 @@
+ wait_until_unfixed:
+ 			/* The block is buffer-fixed or I/O-fixed.
+ 			Try again later. */
+-			buf_pool_mutex_exit();
++			//buf_pool_mutex_exit();
++			mutex_exit(block_mutex);
+ 			os_thread_sleep(WAIT_FOR_READ);
+ 
+ 			goto loop;
+ 		}
+ 
+ 		/* Allocate an uncompressed page. */
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(block_mutex);
+ 
+ 		block = buf_LRU_get_free_block(0);
+ 		ut_a(block);
++		block_mutex = &block->mutex;
+ 
+-		buf_pool_mutex_enter();
+-		mutex_enter(&block->mutex);
++		//buf_pool_mutex_enter();
++		mutex_enter(&LRU_list_mutex);
++		mutex_enter(&flush_list_mutex);
++		rw_lock_x_lock(&page_hash_latch);
++		mutex_enter(block_mutex);
+ 
+ 		{
+ 			buf_page_t*	hash_bpage
+@@ -2101,10 +2184,17 @@
+ 				while buf_pool_mutex was released.
+ 				Free the block that was allocated. */
+ 
+-				buf_LRU_block_free_non_file_page(block);
+-				mutex_exit(&block->mutex);
++				buf_LRU_block_free_non_file_page(block, TRUE);
++				mutex_exit(block_mutex);
+ 
+ 				block = (buf_block_t*) hash_bpage;
++				if (block) {
++					block_mutex = buf_page_get_mutex((buf_page_t*)block);
++					mutex_enter(block_mutex);
++				}
++				rw_lock_x_unlock(&page_hash_latch);
++				mutex_exit(&LRU_list_mutex);
++				mutex_exit(&flush_list_mutex);
+ 				goto loop2;
+ 			}
+ 		}
+@@ -2118,9 +2208,12 @@
+ 			Free the block that was allocated and try again.
+ 			This should be extremely unlikely. */
+ 
+-			buf_LRU_block_free_non_file_page(block);
+-			mutex_exit(&block->mutex);
++			buf_LRU_block_free_non_file_page(block, TRUE);
++			//mutex_exit(&block->mutex);
+ 
++			rw_lock_x_unlock(&page_hash_latch);
++			mutex_exit(&LRU_list_mutex);
++			mutex_exit(&flush_list_mutex);
+ 			goto wait_until_unfixed;
+ 		}
+ 
+@@ -2130,6 +2223,9 @@
+ 		mutex_enter(&buf_pool_zip_mutex);
+ 
+ 		buf_relocate(bpage, &block->page);
++
++		rw_lock_x_unlock(&page_hash_latch);
++
+ 		buf_block_init_low(block);
+ 		block->lock_hash_val = lock_rec_hash(space, offset);
+ 
+@@ -2161,6 +2257,8 @@
+ 			}
+ 		}
+ 
++		mutex_exit(&flush_list_mutex);
++
+ 		/* Buffer-fix, I/O-fix, and X-latch the block
+ 		for the duration of the decompression.
+ 		Also add the block to the unzip_LRU list. */
+@@ -2169,16 +2267,22 @@
+ 		/* Insert at the front of unzip_LRU list */
+ 		buf_unzip_LRU_add_block(block, FALSE);
+ 
++		mutex_exit(&LRU_list_mutex);
++
+ 		block->page.buf_fix_count = 1;
+ 		buf_block_set_io_fix(block, BUF_IO_READ);
++
++		mutex_enter(&buf_pool_mutex);
+ 		buf_pool->n_pend_unzip++;
++		mutex_exit(&buf_pool_mutex);
++
+ 		rw_lock_x_lock(&block->lock);
+-		mutex_exit(&block->mutex);
++		mutex_exit(block_mutex);
+ 		mutex_exit(&buf_pool_zip_mutex);
+ 
+-		buf_buddy_free(bpage, sizeof *bpage);
++		buf_buddy_free(bpage, sizeof *bpage, FALSE);
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
+ 
+ 		/* Decompress the page and apply buffered operations
+ 		while not holding buf_pool_mutex or block->mutex. */
+@@ -2190,17 +2294,21 @@
+ 		}
+ 
+ 		/* Unfix and unlatch the block. */
+-		buf_pool_mutex_enter();
+-		mutex_enter(&block->mutex);
++		//buf_pool_mutex_enter();
++		block_mutex = &block->mutex;
++		mutex_enter(block_mutex);
++		mutex_enter(&buf_pool_mutex);
+ 		buf_pool->n_pend_unzip--;
++		mutex_exit(&buf_pool_mutex);
+ 		block->page.buf_fix_count--;
+ 		buf_block_set_io_fix(block, BUF_IO_NONE);
+-		mutex_exit(&block->mutex);
++		//mutex_exit(&block->mutex);
+ 		rw_lock_x_unlock(&block->lock);
+ 
+ 		if (UNIV_UNLIKELY(!success)) {
+ 
+-			buf_pool_mutex_exit();
++			//buf_pool_mutex_exit();
++			mutex_exit(block_mutex);
+ 			return(NULL);
+ 		}
+ 
+@@ -2217,11 +2325,11 @@
+ 
+ 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ 
+-	mutex_enter(&block->mutex);
++	//mutex_enter(&block->mutex);
+ 	UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
+ 
+ 	buf_block_buf_fix_inc(block, file, line);
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ 
+ 	/* Check if this is the first access to the page */
+ 
+@@ -2229,7 +2337,7 @@
+ 
+ 	buf_page_set_accessed(&block->page, TRUE);
+ 
+-	mutex_exit(&block->mutex);
++	mutex_exit(block_mutex);
+ 
+ 	buf_block_make_young(&block->page);
+ 
+@@ -2515,16 +2623,19 @@
+ 	ibool		success;
+ 	ulint		fix_type;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_s_lock(&page_hash_latch);
+ 	block = buf_block_hash_get(space_id, page_no);
+ 
+ 	if (!block) {
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		rw_lock_s_unlock(&page_hash_latch);
+ 		return(NULL);
+ 	}
+ 
+ 	mutex_enter(&block->mutex);
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+@@ -2644,7 +2755,10 @@
+ {
+ 	buf_page_t*	hash_page;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++#ifdef UNIV_SYNC_DEBUG
++	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
++#endif
+ 	ut_ad(mutex_own(&(block->mutex)));
+ 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+ 
+@@ -2677,7 +2791,8 @@
+ 			(const void*) hash_page, (const void*) block);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ 		mutex_exit(&block->mutex);
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		rw_lock_x_unlock(&page_hash_latch);
+ 		buf_print();
+ 		buf_LRU_print();
+ 		buf_validate();
+@@ -2756,16 +2871,28 @@
+ 		ut_ad(block);
+ 	}
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	if(!block) {
++		mutex_enter(&flush_list_mutex);
++	}
++	rw_lock_x_lock(&page_hash_latch);
+ 
+ 	if (buf_page_hash_get(space, offset)) {
+ 		/* The page is already in the buffer pool. */
+ err_exit:
+ 		if (block) {
+ 			mutex_enter(&block->mutex);
+-			buf_LRU_block_free_non_file_page(block);
++			mutex_exit(&LRU_list_mutex);
++			rw_lock_x_unlock(&page_hash_latch);
++			buf_LRU_block_free_non_file_page(block, FALSE);
+ 			mutex_exit(&block->mutex);
+ 		}
++		else {
++			mutex_exit(&LRU_list_mutex);
++			mutex_exit(&flush_list_mutex);
++			rw_lock_x_unlock(&page_hash_latch);
++		}
+ 
+ 		bpage = NULL;
+ 		goto func_exit;
+@@ -2785,6 +2912,8 @@
+ 		mutex_enter(&block->mutex);
+ 		buf_page_init(space, offset, block);
+ 
++		rw_lock_x_unlock(&page_hash_latch);
++
+ 		/* The block must be put to the LRU list, to the old blocks */
+ 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+ 
+@@ -2812,7 +2941,7 @@
+ 			been added to buf_pool->LRU and
+ 			buf_pool->page_hash. */
+ 			mutex_exit(&block->mutex);
+-			data = buf_buddy_alloc(zip_size, &lru);
++			data = buf_buddy_alloc(zip_size, &lru, FALSE);
+ 			mutex_enter(&block->mutex);
+ 			block->page.zip.data = data;
+ 
+@@ -2825,6 +2954,7 @@
+ 			buf_unzip_LRU_add_block(block, TRUE);
+ 		}
+ 
++		mutex_exit(&LRU_list_mutex);
+ 		mutex_exit(&block->mutex);
+ 	} else {
+ 		/* Defer buf_buddy_alloc() until after the block has
+@@ -2836,8 +2966,8 @@
+ 		control block (bpage), in order to avoid the
+ 		invocation of buf_buddy_relocate_block() on
+ 		uninitialized data. */
+-		data = buf_buddy_alloc(zip_size, &lru);
+-		bpage = buf_buddy_alloc(sizeof *bpage, &lru);
++		data = buf_buddy_alloc(zip_size, &lru, TRUE);
++		bpage = buf_buddy_alloc(sizeof *bpage, &lru, TRUE);
+ 
+ 		/* If buf_buddy_alloc() allocated storage from the LRU list,
+ 		it released and reacquired buf_pool_mutex.  Thus, we must
+@@ -2846,8 +2976,12 @@
+ 		    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
+ 
+ 			/* The block was added by some other thread. */
+-			buf_buddy_free(bpage, sizeof *bpage);
+-			buf_buddy_free(data, zip_size);
++			buf_buddy_free(bpage, sizeof *bpage, TRUE);
++			buf_buddy_free(data, zip_size, TRUE);
++
++			mutex_exit(&LRU_list_mutex);
++			mutex_exit(&flush_list_mutex);
++			rw_lock_x_unlock(&page_hash_latch);
+ 
+ 			bpage = NULL;
+ 			goto func_exit;
+@@ -2870,25 +3004,32 @@
+ 		bpage->in_zip_hash = FALSE;
+ 		bpage->in_flush_list = FALSE;
+ 		bpage->in_free_list = FALSE;
+-		bpage->in_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
++		bpage->in_LRU_list = FALSE;
+ 
+ 		ut_d(bpage->in_page_hash = TRUE);
+ 		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ 			    buf_page_address_fold(space, offset), bpage);
+ 
++		rw_lock_x_unlock(&page_hash_latch);
++
+ 		/* The block must be put to the LRU list, to the old blocks */
+ 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+ 		buf_LRU_insert_zip_clean(bpage);
+ 
++		mutex_exit(&LRU_list_mutex);
++		mutex_exit(&flush_list_mutex);
++
+ 		buf_page_set_io_fix(bpage, BUF_IO_READ);
+ 
+ 		mutex_exit(&buf_pool_zip_mutex);
+ 	}
+ 
++	mutex_enter(&buf_pool_mutex);
+ 	buf_pool->n_pend_reads++;
++	mutex_exit(&buf_pool_mutex);
+ func_exit:
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ 
+ 	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+ 
+@@ -2924,7 +3065,9 @@
+ 
+ 	free_block = buf_LRU_get_free_block(0);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	rw_lock_x_lock(&page_hash_latch);
+ 
+ 	block = (buf_block_t*) buf_page_hash_get(space, offset);
+ 
+@@ -2937,7 +3080,9 @@
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+ 
+ 		/* Page can be found in buf_pool */
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
++		rw_lock_x_unlock(&page_hash_latch);
+ 
+ 		buf_block_free(free_block);
+ 
+@@ -2959,6 +3104,7 @@
+ 	mutex_enter(&block->mutex);
+ 
+ 	buf_page_init(space, offset, block);
++	rw_lock_x_unlock(&page_hash_latch);
+ 
+ 	/* The block must be put to the LRU list */
+ 	buf_LRU_add_block(&block->page, FALSE);
+@@ -2985,7 +3131,7 @@
+ 		the reacquisition of buf_pool_mutex.  We also must
+ 		defer this operation until after the block descriptor
+ 		has been added to buf_pool->LRU and buf_pool->page_hash. */
+-		data = buf_buddy_alloc(zip_size, &lru);
++		data = buf_buddy_alloc(zip_size, &lru, FALSE);
+ 		mutex_enter(&block->mutex);
+ 		block->page.zip.data = data;
+ 
+@@ -3001,7 +3147,8 @@
+ 		rw_lock_x_unlock(&block->lock);
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ 
+ 	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+ 
+@@ -3053,6 +3200,7 @@
+ 	enum buf_io_fix	io_type;
+ 	const ibool	uncompressed = (buf_page_get_state(bpage)
+ 					== BUF_BLOCK_FILE_PAGE);
++	enum buf_flush	flush_type;
+ 
+ 	ut_a(buf_page_in_file(bpage));
+ 
+@@ -3187,8 +3335,17 @@
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	if (io_type == BUF_IO_WRITE) {
++		flush_type = buf_page_get_flush_type(bpage);
++		/* to keep consistency at buf_LRU_insert_zip_clean() */
++		//if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++			mutex_enter(&LRU_list_mutex);
++		//}
++		mutex_enter(&flush_list_mutex);
++	}
+ 	mutex_enter(buf_page_get_mutex(bpage));
++	mutex_enter(&buf_pool_mutex);
+ 
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+ 	if (io_type == BUF_IO_WRITE || uncompressed) {
+@@ -3228,6 +3385,12 @@
+ 
+ 		buf_flush_write_complete(bpage);
+ 
++		mutex_exit(&flush_list_mutex);
++		/* to keep consistency at buf_LRU_insert_zip_clean() */
++		//if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++			mutex_exit(&LRU_list_mutex);
++		//}
++
+ 		if (uncompressed) {
+ 			rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+ 					     BUF_IO_WRITE);
+@@ -3250,8 +3413,9 @@
+ 	}
+ #endif /* UNIV_DEBUG */
+ 
++	mutex_exit(&buf_pool_mutex);
+ 	mutex_exit(buf_page_get_mutex(bpage));
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ }
+ 
+ /*************************************************************************
+@@ -3273,12 +3437,14 @@
+ 		freed = buf_LRU_search_and_free_block(100);
+ 	}
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ 	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+ 	ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ }
+ 
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+@@ -3302,7 +3468,11 @@
+ 
+ 	ut_ad(buf_pool);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	mutex_enter(&flush_list_mutex);
++	rw_lock_x_lock(&page_hash_latch);
++	/* for keep the new latch order, it cannot validate correctly... */
+ 
+ 	chunk = buf_pool->chunks;
+ 
+@@ -3483,19 +3653,25 @@
+ 	}
+ 
+ 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
++	/* because of latching order, we cannot get free_list_mutex here. */
++/*
+ 	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+ 		fprintf(stderr, "Free list len %lu, free blocks %lu\n",
+ 			(ulong) UT_LIST_GET_LEN(buf_pool->free),
+ 			(ulong) n_free);
+ 		ut_error;
+ 	}
++*/
+ 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
+ 
+ 	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+ 	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+ 	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
++	mutex_exit(&flush_list_mutex);
++	rw_lock_x_unlock(&page_hash_latch);
+ 
+ 	ut_a(buf_LRU_validate());
+ 	ut_a(buf_flush_validate());
+@@ -3529,7 +3705,10 @@
+ 	index_ids = mem_alloc(sizeof(dulint) * size);
+ 	counts = mem_alloc(sizeof(ulint) * size);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	mutex_enter(&flush_list_mutex);
++	mutex_enter(&free_list_mutex);
+ 
+ 	fprintf(stderr,
+ 		"buf_pool size %lu\n"
+@@ -3592,7 +3771,10 @@
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
++	mutex_exit(&flush_list_mutex);
++	mutex_exit(&free_list_mutex);
+ 
+ 	for (i = 0; i < n_found; i++) {
+ 		index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -3630,7 +3812,7 @@
+ 	ulint		i;
+ 	ulint		fixed_pages_number = 0;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
+ 
+ 	chunk = buf_pool->chunks;
+ 
+@@ -3700,7 +3882,7 @@
+ 	}
+ 
+ 	mutex_exit(&buf_pool_zip_mutex);
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ 
+ 	return(fixed_pages_number);
+ }
+@@ -3729,7 +3911,7 @@
+ {
+ 	ulint	ratio;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter(); /* optimistic */
+ 
+ 	ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
+ 		/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+@@ -3737,7 +3919,7 @@
+ 
+ 	/* 1 + is there to avoid division by zero */
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit(); /* optimistic */
+ 
+ 	return(ratio);
+ }
+@@ -3757,7 +3939,11 @@
+ 	ut_ad(buf_pool);
+ 	size = buf_pool->curr_size;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	mutex_enter(&flush_list_mutex);
++	mutex_enter(&free_list_mutex);
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	fprintf(file,
+ 		"Buffer pool size        %lu\n"
+@@ -3824,7 +4010,11 @@
+ 		buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
+ 		buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
++	mutex_exit(&flush_list_mutex);
++	mutex_exit(&free_list_mutex);
++	mutex_exit(&buf_pool_mutex);
+ }
+ 
+ /**************************************************************************
+@@ -3853,7 +4043,7 @@
+ 
+ 	ut_ad(buf_pool);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter(); /* optimistic */
+ 
+ 	chunk = buf_pool->chunks;
+ 
+@@ -3870,7 +4060,7 @@
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit(); /* optimistic */
+ 
+ 	return(TRUE);
+ }
+@@ -3886,7 +4076,8 @@
+ {
+ 	ibool	ret;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
+ 	    + buf_pool->n_flush[BUF_FLUSH_LIST]
+@@ -3896,7 +4087,8 @@
+ 		ret = TRUE;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&buf_pool_mutex);
+ 
+ 	return(ret);
+ }
+@@ -3910,11 +4102,13 @@
+ {
+ 	ulint	len;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&free_list_mutex);
+ 
+ 	len = UT_LIST_GET_LEN(buf_pool->free);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&free_list_mutex);
+ 
+ 	return(len);
+ }
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0flu.c innodb_plugin-1.0.3_tmp/buf/buf0flu.c
+--- innodb_plugin-1.0.3_orig/buf/buf0flu.c	2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0flu.c	2009-03-19 18:53:43.000000000 +0900
+@@ -61,7 +61,8 @@
+ /*=============================*/
+ 	buf_block_t*	block)	/* in/out: block which is modified */
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&flush_list_mutex));
+ 	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
+ 	      || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
+ 		  <= block->page.oldest_modification));
+@@ -92,7 +93,8 @@
+ 	buf_page_t*	prev_b;
+ 	buf_page_t*	b;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&flush_list_mutex));
+ 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ 
+ 	ut_ad(block->page.in_LRU_list);
+@@ -134,10 +136,10 @@
+ 				buf_page_in_file(bpage) and in the LRU list */
+ {
+ 	//ut_ad(buf_pool_mutex_own());
+-	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+-	//ut_ad(bpage->in_LRU_list); /* optimistic use */
++	//ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++	//ut_ad(bpage->in_LRU_list);
+ 
+-	if (UNIV_LIKELY(buf_page_in_file(bpage))) {
++	if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+ 
+ 		return(bpage->oldest_modification == 0
+ 		       && buf_page_get_io_fix(bpage) == BUF_IO_NONE
+@@ -170,7 +172,7 @@
+ 	enum buf_flush	flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ {
+ 	ut_a(buf_page_in_file(bpage));
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own()); /*optimistic...*/
+ 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ 	ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+ 
+@@ -203,7 +205,8 @@
+ /*=============*/
+ 	buf_page_t*	bpage)	/* in: pointer to the block in question */
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&flush_list_mutex));
+ 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ 	ut_ad(bpage->in_flush_list);
+ 	ut_d(bpage->in_flush_list = FALSE);
+@@ -762,12 +765,19 @@
+ 	ibool		is_uncompressed;
+ 
+ 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++#ifdef UNIV_SYNC_DEBUG
++	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
++	      || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
++#endif
+ 	ut_ad(buf_page_in_file(bpage));
+ 
+ 	block_mutex = buf_page_get_mutex(bpage);
+ 	ut_ad(mutex_own(block_mutex));
+ 
++	mutex_enter(&buf_pool_mutex);
++	rw_lock_s_unlock(&page_hash_latch);
++
+ 	ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
+ 
+ 	buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+@@ -798,7 +808,8 @@
+ 		}
+ 
+ 		mutex_exit(block_mutex);
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 
+ 		/* Even though bpage is not protected by any mutex at
+ 		this point, it is safe to access bpage, because it is
+@@ -835,7 +846,8 @@
+ 		immediately. */
+ 
+ 		mutex_exit(block_mutex);
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 		break;
+ 
+ 	default:
+@@ -899,7 +911,8 @@
+ 		high = fil_space_get_size(space);
+ 	}
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_s_lock(&page_hash_latch);
+ 
+ 	for (i = low; i < high; i++) {
+ 
+@@ -936,14 +949,16 @@
+ 				ut_ad(!mutex_own(block_mutex));
+ 				count++;
+ 
+-				buf_pool_mutex_enter();
++				//buf_pool_mutex_enter();
++				rw_lock_s_lock(&page_hash_latch);
+ 			} else {
+ 				mutex_exit(block_mutex);
+ 			}
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 
+ 	return(count);
+ }
+@@ -987,20 +1002,29 @@
+ 	ut_ad((flush_type != BUF_FLUSH_LIST)
+ 	      || sync_thread_levels_empty_gen(TRUE));
+ #endif /* UNIV_SYNC_DEBUG */
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	if ((buf_pool->n_flush[flush_type] > 0)
+ 	    || (buf_pool->init_flush[flush_type] == TRUE)) {
+ 
+ 		/* There is already a flush batch of the same type running */
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 
+ 		return(ULINT_UNDEFINED);
+ 	}
+ 
+ 	buf_pool->init_flush[flush_type] = TRUE;
+ 
++	mutex_exit(&buf_pool_mutex);
++
++	if (flush_type == BUF_FLUSH_LRU) {
++		mutex_enter(&LRU_list_mutex);
++	}
++	mutex_enter(&flush_list_mutex);
++
+ 	for (;;) {
+ flush_next:
+ 		/* If we have flushed enough, leave the loop */
+@@ -1047,7 +1071,11 @@
+ 				space = buf_page_get_space(bpage);
+ 				offset = buf_page_get_page_no(bpage);
+ 
+-				buf_pool_mutex_exit();
++				//buf_pool_mutex_exit();
++				if (flush_type == BUF_FLUSH_LRU) {
++					mutex_exit(&LRU_list_mutex);
++				}
++				mutex_exit(&flush_list_mutex);
+ 
+ 				old_page_count = page_count;
+ 
+@@ -1057,7 +1085,8 @@
+ 					space, offset, flush_type);
+ 				} else {
+ 					/* Try to flush the page only */
+-					buf_pool_mutex_enter();
++					//buf_pool_mutex_enter();
++					rw_lock_s_lock(&page_hash_latch);
+ 
+ 					mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ 					mutex_enter(block_mutex);
+@@ -1073,7 +1102,11 @@
+ 				flush_type, offset,
+ 				page_count - old_page_count); */
+ 
+-				buf_pool_mutex_enter();
++				//buf_pool_mutex_enter();
++				if (flush_type == BUF_FLUSH_LRU) {
++					mutex_enter(&LRU_list_mutex);
++				}
++				mutex_enter(&flush_list_mutex);
+ 				goto flush_next;
+ 
+ 			} else if (flush_type == BUF_FLUSH_LRU) {
+@@ -1091,6 +1124,13 @@
+ 		break;
+ 	}
+ 
++	if (flush_type == BUF_FLUSH_LRU) {
++		mutex_exit(&LRU_list_mutex);
++	}
++	mutex_exit(&flush_list_mutex);
++
++	mutex_enter(&buf_pool_mutex);
++
+ 	buf_pool->init_flush[flush_type] = FALSE;
+ 
+ 	if (buf_pool->n_flush[flush_type] == 0) {
+@@ -1100,7 +1140,8 @@
+ 		os_event_set(buf_pool->no_flush[flush_type]);
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&buf_pool_mutex);
+ 
+ 	buf_flush_buffered_writes();
+ 
+@@ -1147,8 +1188,14 @@
+ 	buf_page_t*	bpage;
+ 	ulint		n_replaceable;
+ 	ulint		distance	= 0;
++	ibool		have_LRU_mutex = FALSE;
++
++	if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++		have_LRU_mutex = TRUE;
+ 
+ 	//buf_pool_mutex_enter();
++	if (have_LRU_mutex)
++		mutex_enter(&LRU_list_mutex);
+ 
+ 	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+ 
+@@ -1159,6 +1206,12 @@
+ 		   + BUF_FLUSH_EXTRA_MARGIN)
+ 	       && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
+ 
++		if (!bpage->in_LRU_list) {
++			/* reatart. but it is very optimistic */
++			bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++			continue;
++		}
++
+ 		mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ 
+ 		mutex_enter(block_mutex);
+@@ -1175,6 +1228,8 @@
+ 	}
+ 
+ 	//buf_pool_mutex_exit();
++	if (have_LRU_mutex)
++		mutex_exit(&LRU_list_mutex);
+ 
+ 	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+ 
+@@ -1252,11 +1307,13 @@
+ {
+ 	ibool	ret;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&flush_list_mutex);
+ 
+ 	ret = buf_flush_validate_low();
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&flush_list_mutex);
+ 
+ 	return(ret);
+ }
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0lru.c innodb_plugin-1.0.3_tmp/buf/buf0lru.c
+--- innodb_plugin-1.0.3_orig/buf/buf0lru.c	2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0lru.c	2009-03-19 18:51:20.000000000 +0900
+@@ -129,25 +129,31 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+-	buf_block_t*	block);	/* in: block, must contain a file page and
++	buf_block_t*	block,	/* in: block, must contain a file page and
+ 				be in a state where it can be freed */
++	ibool		have_page_hash_mutex);
+ 
+ /**********************************************************************
+ Determines if the unzip_LRU list should be used for evicting a victim
+ instead of the general LRU list. */
+ UNIV_INLINE
+ ibool
+-buf_LRU_evict_from_unzip_LRU(void)
++buf_LRU_evict_from_unzip_LRU(
++	ibool		have_LRU_mutex)
+ /*==============================*/
+ 				/* out: TRUE if should use unzip_LRU */
+ {
+ 	ulint	io_avg;
+ 	ulint	unzip_avg;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 
++	if (!have_LRU_mutex)
++		mutex_enter(&LRU_list_mutex);
+ 	/* If the unzip_LRU list is empty, we can only use the LRU. */
+ 	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
++		if (!have_LRU_mutex)
++			mutex_exit(&LRU_list_mutex);
+ 		return(FALSE);
+ 	}
+ 
+@@ -156,14 +162,20 @@
+ 	decompressed pages in the buffer pool. */
+ 	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+ 	    <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
++		if (!have_LRU_mutex)
++			mutex_exit(&LRU_list_mutex);
+ 		return(FALSE);
+ 	}
+ 
+ 	/* If eviction hasn't started yet, we assume by default
+ 	that a workload is disk bound. */
+ 	if (buf_pool->freed_page_clock == 0) {
++		if (!have_LRU_mutex)
++			mutex_exit(&LRU_list_mutex);
+ 		return(TRUE);
+ 	}
++	if (!have_LRU_mutex)
++		mutex_exit(&LRU_list_mutex);
+ 
+ 	/* Calculate the average over past intervals, and add the values
+ 	of the current interval. */
+@@ -229,7 +241,8 @@
+ 
+ 	page_arr = ut_malloc(sizeof(ulint)
+ 			     * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ scan_again:
+ 	num_entries = 0;
+@@ -269,12 +282,14 @@
+ 			}
+ 			/* Array full. We release the buf_pool_mutex to
+ 			obey the latching order. */
+-			buf_pool_mutex_exit();
++			//buf_pool_mutex_exit();
++			mutex_exit(&LRU_list_mutex);
+ 
+ 			buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
+ 						     num_entries);
+ 			num_entries = 0;
+-			buf_pool_mutex_enter();
++			//buf_pool_mutex_enter();
++			mutex_enter(&LRU_list_mutex);
+ 		} else {
+ 			mutex_exit(block_mutex);
+ 		}
+@@ -299,7 +314,8 @@
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ 
+ 	/* Drop any remaining batch of search hashed pages. */
+ 	buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+@@ -327,7 +343,10 @@
+ 	buf_LRU_drop_page_hash_for_tablespace(id);
+ 
+ scan_again:
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	mutex_enter(&flush_list_mutex);
++	rw_lock_x_lock(&page_hash_latch);
+ 
+ 	all_freed = TRUE;
+ 
+@@ -369,7 +388,10 @@
+ 				ulint	page_no;
+ 				ulint	zip_size;
+ 
+-				buf_pool_mutex_exit();
++				//buf_pool_mutex_exit();
++				mutex_exit(&LRU_list_mutex);
++				mutex_exit(&flush_list_mutex);
++				rw_lock_x_unlock(&page_hash_latch);
+ 
+ 				zip_size = buf_page_get_zip_size(bpage);
+ 				page_no = buf_page_get_page_no(bpage);
+@@ -393,7 +415,7 @@
+ 			if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ 			    != BUF_BLOCK_ZIP_FREE) {
+ 				buf_LRU_block_free_hashed_page((buf_block_t*)
+-							       bpage);
++							       bpage, TRUE);
+ 			} else {
+ 				/* The block_mutex should have been
+ 				released by buf_LRU_block_remove_hashed_page()
+@@ -416,7 +438,10 @@
+ 		bpage = prev_bpage;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
++	mutex_exit(&flush_list_mutex);
++	rw_lock_x_unlock(&page_hash_latch);
+ 
+ 	if (!all_freed) {
+ 		os_thread_sleep(20000);
+@@ -439,14 +464,16 @@
+ 	ulint			len;
+ 	ulint			limit;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ 	len = UT_LIST_GET_LEN(buf_pool->LRU);
+ 
+ 	if (len < BUF_LRU_OLD_MIN_LEN) {
+ 		/* The LRU list is too short to do read-ahead */
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
+ 
+ 		return(0);
+ 	}
+@@ -455,7 +482,8 @@
+ 
+ 	limit = buf_page_get_LRU_position(bpage) - len / BUF_LRU_INITIAL_RATIO;
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ 
+ 	return(limit);
+ }
+@@ -470,7 +498,8 @@
+ {
+ 	buf_page_t*	b;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&flush_list_mutex));
+ 	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
+ 
+ 	/* Find the first successor of bpage in the LRU list
+@@ -478,7 +507,7 @@
+ 	b = bpage;
+ 	do {
+ 		b = UT_LIST_GET_NEXT(LRU, b);
+-	} while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
++	} while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
+ 
+ 	/* Insert bpage before b, i.e., after the predecessor of b. */
+ 	if (b) {
+@@ -500,16 +529,17 @@
+ buf_LRU_free_from_unzip_LRU_list(
+ /*=============================*/
+ 				/* out: TRUE if freed */
+-	ulint	n_iterations)	/* in: how many times this has been called
++	ulint	n_iterations,	/* in: how many times this has been called
+ 				repeatedly without result: a high value means
+ 				that we should search farther; we will search
+ 				n_iterations / 5 of the unzip_LRU list,
+ 				or nothing if n_iterations >= 5 */
++	ibool	have_LRU_mutex)
+ {
+ 	buf_block_t*	block;
+ 	ulint		distance;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own()); /* optimistic */
+ 
+ 	/* Theoratically it should be much easier to find a victim
+ 	from unzip_LRU as we can choose even a dirty block (as we'll
+@@ -519,7 +549,7 @@
+ 	if we have done five iterations so far. */
+ 
+ 	if (UNIV_UNLIKELY(n_iterations >= 5)
+-	    || !buf_LRU_evict_from_unzip_LRU()) {
++	    || !buf_LRU_evict_from_unzip_LRU(have_LRU_mutex)) {
+ 
+ 		return(FALSE);
+ 	}
+@@ -527,18 +557,23 @@
+ 	distance = 100 + (n_iterations
+ 			  * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
+ 
++restart:
+ 	for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+ 	     UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+ 	     block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
++		if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
++		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)
++			goto restart;
+ 
+ 		enum buf_lru_free_block_status	freed;
+ 
+-		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+-		ut_ad(block->in_unzip_LRU_list);
+-		ut_ad(block->page.in_LRU_list);
++		/* optimistic */
++		//ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
++		//ut_ad(block->in_unzip_LRU_list);
++		//ut_ad(block->page.in_LRU_list);
+ 
+ 		mutex_enter(&block->mutex);
+-		freed = buf_LRU_free_block(&block->page, FALSE, NULL);
++		freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
+ 		mutex_exit(&block->mutex);
+ 
+ 		switch (freed) {
+@@ -571,33 +606,39 @@
+ buf_LRU_free_from_common_LRU_list(
+ /*==============================*/
+ 				/* out: TRUE if freed */
+-	ulint	n_iterations)	/* in: how many times this has been called
++	ulint	n_iterations,	/* in: how many times this has been called
+ 				repeatedly without result: a high value means
+ 				that we should search farther; if
+ 				n_iterations < 10, then we search
+ 				n_iterations / 10 * buf_pool->curr_size
+ 				pages from the end of the LRU list */
++	ibool	have_LRU_mutex)
+ {
+ 	buf_page_t*	bpage;
+ 	ulint		distance;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own()); /* optimistic */
+ 
+ 	distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
+ 
++restart:
+ 	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ 	     UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+ 	     bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
++		if (!bpage->in_LRU_list
++		    || buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE)
++			goto restart;
+ 
+ 		enum buf_lru_free_block_status	freed;
+ 		mutex_t*			block_mutex
+ 			= buf_page_get_mutex(bpage);
+ 
+-		ut_ad(buf_page_in_file(bpage));
+-		ut_ad(bpage->in_LRU_list);
++		/* optimistic */
++		//ut_ad(buf_page_in_file(bpage));
++		//ut_ad(bpage->in_LRU_list);
+ 
+ 		mutex_enter(block_mutex);
+-		freed = buf_LRU_free_block(bpage, TRUE, NULL);
++		freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
+ 		mutex_exit(block_mutex);
+ 
+ 		switch (freed) {
+@@ -640,22 +681,33 @@
+ 				n_iterations / 5 of the unzip_LRU list. */
+ {
+ 	ibool	freed = FALSE;
++	ibool	have_LRU_mutex = FALSE;
++
++	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++		have_LRU_mutex = TRUE;
+ 
+-	buf_pool_mutex_enter();
++	/* optimistic search... */
++	//buf_pool_mutex_enter();
++	if (have_LRU_mutex)
++		mutex_enter(&LRU_list_mutex);
+ 
+-	freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
++	freed = buf_LRU_free_from_unzip_LRU_list(n_iterations, have_LRU_mutex);
+ 
+ 	if (!freed) {
+-		freed = buf_LRU_free_from_common_LRU_list(n_iterations);
++		freed = buf_LRU_free_from_common_LRU_list(n_iterations, have_LRU_mutex);
+ 	}
+ 
++	mutex_enter(&buf_pool_mutex);
+ 	if (!freed) {
+ 		buf_pool->LRU_flush_ended = 0;
+ 	} else if (buf_pool->LRU_flush_ended > 0) {
+ 		buf_pool->LRU_flush_ended--;
+ 	}
++	mutex_exit(&buf_pool_mutex);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	if (have_LRU_mutex)
++		mutex_exit(&LRU_list_mutex);
+ 
+ 	return(freed);
+ }
+@@ -673,18 +725,22 @@
+ buf_LRU_try_free_flushed_blocks(void)
+ /*=================================*/
+ {
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	while (buf_pool->LRU_flush_ended > 0) {
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 
+ 		buf_LRU_search_and_free_block(1);
+ 
+-		buf_pool_mutex_enter();
++		//buf_pool_mutex_enter();
++		mutex_enter(&buf_pool_mutex);
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&buf_pool_mutex);
+ }
+ 
+ /**********************************************************************
+@@ -700,7 +756,9 @@
+ {
+ 	ibool	ret	= FALSE;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
++	mutex_enter(&free_list_mutex);
+ 
+ 	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ 	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
+@@ -708,7 +766,9 @@
+ 		ret = TRUE;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
++	mutex_exit(&free_list_mutex);
+ 
+ 	return(ret);
+ }
+@@ -725,9 +785,10 @@
+ {
+ 	buf_block_t*	block;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 
+-	block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
++	mutex_enter(&free_list_mutex);
++	block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
+ 
+ 	if (block) {
+ 		ut_ad(block->page.in_free_list);
+@@ -737,12 +798,16 @@
+ 		ut_a(!buf_page_in_file(&block->page));
+ 		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+ 
++		mutex_exit(&free_list_mutex);
++
+ 		mutex_enter(&block->mutex);
+ 
+ 		buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+ 		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+ 
+ 		mutex_exit(&block->mutex);
++	} else {
++		mutex_exit(&free_list_mutex);
+ 	}
+ 
+ 	return(block);
+@@ -767,7 +832,7 @@
+ 	ibool		mon_value_was	= FALSE;
+ 	ibool		started_monitor	= FALSE;
+ loop:
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
+ 
+ 	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ 	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
+@@ -847,14 +912,16 @@
+ 		if (UNIV_UNLIKELY(zip_size)) {
+ 			ibool	lru;
+ 			page_zip_set_size(&block->page.zip, zip_size);
+-			block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
++			mutex_enter(&LRU_list_mutex);
++			block->page.zip.data = buf_buddy_alloc(zip_size, &lru, FALSE);
++			mutex_exit(&LRU_list_mutex);
+ 			UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+ 		} else {
+ 			page_zip_set_size(&block->page.zip, 0);
+ 			block->page.zip.data = NULL;
+ 		}
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
+ 
+ 		if (started_monitor) {
+ 			srv_print_innodb_monitor = mon_value_was;
+@@ -866,7 +933,7 @@
+ 	/* If no block was in the free list, search from the end of the LRU
+ 	list and try to free a block there */
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ 
+ 	freed = buf_LRU_search_and_free_block(n_iterations);
+ 
+@@ -915,18 +982,21 @@
+ 
+ 	os_aio_simulated_wake_handler_threads();
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	if (buf_pool->LRU_flush_ended > 0) {
+ 		/* We have written pages in an LRU flush. To make the insert
+ 		buffer more efficient, we try to move these pages to the free
+ 		list. */
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 
+ 		buf_LRU_try_free_flushed_blocks();
+ 	} else {
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 	}
+ 
+ 	if (n_iterations > 10) {
+@@ -951,7 +1021,8 @@
+ 	ulint	new_len;
+ 
+ 	ut_a(buf_pool->LRU_old);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ #if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
+ # error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
+ #endif
+@@ -1009,7 +1080,8 @@
+ {
+ 	buf_page_t*	bpage;
+ 
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+ 
+ 	/* We first initialize all blocks in the LRU list as old and then use
+@@ -1041,13 +1113,14 @@
+ 	ut_ad(buf_pool);
+ 	ut_ad(bpage);
+ 	ut_ad(buf_page_in_file(bpage));
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 
+ 	if (buf_page_belongs_to_unzip_LRU(bpage)) {
+ 		buf_block_t*	block = (buf_block_t*) bpage;
+ 
+ 		ut_ad(block->in_unzip_LRU_list);
+-		ut_d(block->in_unzip_LRU_list = FALSE);
++		block->in_unzip_LRU_list = FALSE;
+ 
+ 		UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+ 	}
+@@ -1063,7 +1136,8 @@
+ {
+ 	ut_ad(buf_pool);
+ 	ut_ad(bpage);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 
+ 	ut_a(buf_page_in_file(bpage));
+ 
+@@ -1090,7 +1164,7 @@
+ 
+ 	/* Remove the block from the LRU list */
+ 	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+-	ut_d(bpage->in_LRU_list = FALSE);
++	bpage->in_LRU_list = FALSE;
+ 
+ 	buf_unzip_LRU_remove_block_if_needed(bpage);
+ 
+@@ -1126,12 +1200,13 @@
+ {
+ 	ut_ad(buf_pool);
+ 	ut_ad(block);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 
+ 	ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+ 
+ 	ut_ad(!block->in_unzip_LRU_list);
+-	ut_d(block->in_unzip_LRU_list = TRUE);
++	block->in_unzip_LRU_list = TRUE;
+ 
+ 	if (old) {
+ 		UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+@@ -1152,7 +1227,8 @@
+ 
+ 	ut_ad(buf_pool);
+ 	ut_ad(bpage);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 
+ 	ut_a(buf_page_in_file(bpage));
+ 
+@@ -1166,7 +1242,7 @@
+ 
+ 	ut_ad(!bpage->in_LRU_list);
+ 	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+-	ut_d(bpage->in_LRU_list = TRUE);
++	bpage->in_LRU_list = TRUE;
+ 
+ 	buf_page_set_old(bpage, TRUE);
+ 
+@@ -1212,7 +1288,8 @@
+ {
+ 	ut_ad(buf_pool);
+ 	ut_ad(bpage);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 
+ 	ut_a(buf_page_in_file(bpage));
+ 	ut_ad(!bpage->in_LRU_list);
+@@ -1243,7 +1320,7 @@
+ 		bpage->LRU_position = (buf_pool->LRU_old)->LRU_position;
+ 	}
+ 
+-	ut_d(bpage->in_LRU_list = TRUE);
++	bpage->in_LRU_list = TRUE;
+ 
+ 	buf_page_set_old(bpage, old);
+ 
+@@ -1331,22 +1408,24 @@
+ 	buf_page_t*	bpage,	/* in: block to be freed */
+ 	ibool		zip,	/* in: TRUE if should remove also the
+ 				compressed page of an uncompressed page */
+-	ibool*		buf_pool_mutex_released)
++	ibool*		buf_pool_mutex_released,
+ 				/* in: pointer to a variable that will
+ 				be assigned TRUE if buf_pool_mutex
+ 				was temporarily released, or NULL */
++	ibool		have_LRU_mutex)
+ {
+ 	buf_page_t*	b = NULL;
+ 	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+ 
+-	ut_ad(buf_pool_mutex_own());
+-	ut_ad(mutex_own(block_mutex));
+-	ut_ad(buf_page_in_file(bpage));
+-	ut_ad(bpage->in_LRU_list);
+-	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
++	//ut_ad(buf_pool_mutex_own());
++	/* optimistic */
++	//ut_ad(mutex_own(block_mutex));
++	//ut_ad(buf_page_in_file(bpage));
++	//ut_ad(bpage->in_LRU_list);
++	//ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+ 	UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+ 
+-	if (!buf_page_can_relocate(bpage)) {
++	if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
+ 
+ 		/* Do not free buffer-fixed or I/O-fixed blocks. */
+ 		return(BUF_LRU_NOT_FREED);
+@@ -1378,15 +1457,15 @@
+ 		If it cannot be allocated (without freeing a block
+ 		from the LRU list), refuse to free bpage. */
+ alloc:
+-		buf_pool_mutex_exit_forbid();
+-		b = buf_buddy_alloc(sizeof *b, NULL);
+-		buf_pool_mutex_exit_allow();
++		//buf_pool_mutex_exit_forbid();
++		b = buf_buddy_alloc(sizeof *b, NULL, FALSE);
++		//buf_pool_mutex_exit_allow();
+ 
+ 		if (UNIV_UNLIKELY(!b)) {
+ 			return(BUF_LRU_CANNOT_RELOCATE);
+ 		}
+ 
+-		memcpy(b, bpage, sizeof *b);
++		//memcpy(b, bpage, sizeof *b);
+ 	}
+ 
+ #ifdef UNIV_DEBUG
+@@ -1397,6 +1476,41 @@
+ 	}
+ #endif /* UNIV_DEBUG */
+ 
++	/* not to break latch order, must re-enter block_mutex */
++	mutex_exit(block_mutex);
++
++	if (!have_LRU_mutex)
++		mutex_enter(&LRU_list_mutex); /* optimistic */
++	mutex_enter(&flush_list_mutex);
++	rw_lock_x_lock(&page_hash_latch);
++	mutex_enter(block_mutex);
++
++	/* recheck states of block */
++	if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
++	    || !buf_page_can_relocate(bpage)) {
++not_freed:
++		if (b) {
++			buf_buddy_free(b, sizeof *b, TRUE);
++		}
++		if (!have_LRU_mutex)
++			mutex_exit(&LRU_list_mutex);
++		mutex_exit(&flush_list_mutex);
++		rw_lock_x_unlock(&page_hash_latch);
++		return(BUF_LRU_NOT_FREED);
++	} else if (zip || !bpage->zip.data) {
++		if (bpage->oldest_modification)
++			goto not_freed;
++	} else if (bpage->oldest_modification) {
++		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
++			ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
++			goto not_freed;
++		}
++	}
++
++	if (b) {
++		memcpy(b, bpage, sizeof *b);
++	}
++
+ 	if (buf_LRU_block_remove_hashed_page(bpage, zip)
+ 	    != BUF_BLOCK_ZIP_FREE) {
+ 		ut_a(bpage->buf_fix_count == 0);
+@@ -1408,6 +1522,10 @@
+ 
+ 			ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
+ 
++			while (prev_b && !prev_b->in_LRU_list) {
++				prev_b = UT_LIST_GET_PREV(LRU, prev_b);
++			}
++
+ 			b->state = b->oldest_modification
+ 				? BUF_BLOCK_ZIP_DIRTY
+ 				: BUF_BLOCK_ZIP_PAGE;
+@@ -1478,7 +1596,7 @@
+ 					buf_LRU_old_init();
+ 				}
+ 			} else {
+-				ut_d(b->in_LRU_list = FALSE);
++				b->in_LRU_list = FALSE;
+ 				buf_LRU_add_block_low(b, buf_page_is_old(b));
+ 			}
+ 
+@@ -1521,7 +1639,10 @@
+ 			*buf_pool_mutex_released = TRUE;
+ 		}
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
++		mutex_exit(&flush_list_mutex);
++		rw_lock_x_unlock(&page_hash_latch);
+ 		mutex_exit(block_mutex);
+ 
+ 		/* Remove possible adaptive hash index on the page.
+@@ -1553,7 +1674,9 @@
+ 				: BUF_NO_CHECKSUM_MAGIC);
+ 		}
+ 
+-		buf_pool_mutex_enter();
++		//buf_pool_mutex_enter();
++		if (have_LRU_mutex)
++			mutex_enter(&LRU_list_mutex);
+ 		mutex_enter(block_mutex);
+ 
+ 		if (b) {
+@@ -1563,13 +1686,18 @@
+ 			mutex_exit(&buf_pool_zip_mutex);
+ 		}
+ 
+-		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
++		buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
+ 	} else {
+ 		/* The block_mutex should have been released by
+ 		buf_LRU_block_remove_hashed_page() when it returns
+ 		BUF_BLOCK_ZIP_FREE. */
+ 		ut_ad(block_mutex == &buf_pool_zip_mutex);
+ 		mutex_enter(block_mutex);
++
++		if (!have_LRU_mutex)
++			mutex_exit(&LRU_list_mutex);
++		mutex_exit(&flush_list_mutex);
++		rw_lock_x_unlock(&page_hash_latch);
+ 	}
+ 
+ 	return(BUF_LRU_FREED);
+@@ -1581,12 +1709,13 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+-	buf_block_t*	block)	/* in: block, must not contain a file page */
++	buf_block_t*	block,	/* in: block, must not contain a file page */
++	ibool		have_page_hash_mutex)
+ {
+ 	void*	data;
+ 
+ 	ut_ad(block);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 	ut_ad(mutex_own(&block->mutex));
+ 
+ 	switch (buf_block_get_state(block)) {
+@@ -1620,15 +1749,17 @@
+ 	if (data) {
+ 		block->page.zip.data = NULL;
+ 		mutex_exit(&block->mutex);
+-		buf_pool_mutex_exit_forbid();
+-		buf_buddy_free(data, page_zip_get_size(&block->page.zip));
+-		buf_pool_mutex_exit_allow();
++		//buf_pool_mutex_exit_forbid();
++		buf_buddy_free(data, page_zip_get_size(&block->page.zip), have_page_hash_mutex);
++		//buf_pool_mutex_exit_allow();
+ 		mutex_enter(&block->mutex);
+ 		page_zip_set_size(&block->page.zip, 0);
+ 	}
+ 
++	mutex_enter(&free_list_mutex);
+ 	UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
+ 	ut_d(block->page.in_free_list = TRUE);
++	mutex_exit(&free_list_mutex);
+ 
+ 	UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+ }
+@@ -1657,7 +1788,11 @@
+ {
+ 	const buf_page_t*	hashed_bpage;
+ 	ut_ad(bpage);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
++#endif
+ 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ 
+ 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+@@ -1758,7 +1893,9 @@
+ 
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ 		mutex_exit(buf_page_get_mutex(bpage));
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&LRU_list_mutex);
++		rw_lock_x_unlock(&page_hash_latch);
+ 		buf_print();
+ 		buf_LRU_print();
+ 		buf_validate();
+@@ -1784,11 +1921,11 @@
+ 		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+ 
+ 		mutex_exit(&buf_pool_zip_mutex);
+-		buf_pool_mutex_exit_forbid();
++		//buf_pool_mutex_exit_forbid();
+ 		buf_buddy_free(bpage->zip.data,
+-			       page_zip_get_size(&bpage->zip));
+-		buf_buddy_free(bpage, sizeof(*bpage));
+-		buf_pool_mutex_exit_allow();
++			       page_zip_get_size(&bpage->zip), TRUE);
++		buf_buddy_free(bpage, sizeof(*bpage), TRUE);
++		//buf_pool_mutex_exit_allow();
+ 		UNIV_MEM_UNDESC(bpage);
+ 		return(BUF_BLOCK_ZIP_FREE);
+ 
+@@ -1807,9 +1944,9 @@
+ 			bpage->zip.data = NULL;
+ 
+ 			mutex_exit(&((buf_block_t*) bpage)->mutex);
+-			buf_pool_mutex_exit_forbid();
+-			buf_buddy_free(data, page_zip_get_size(&bpage->zip));
+-			buf_pool_mutex_exit_allow();
++			//buf_pool_mutex_exit_forbid();
++			buf_buddy_free(data, page_zip_get_size(&bpage->zip), TRUE);
++			//buf_pool_mutex_exit_allow();
+ 			mutex_enter(&((buf_block_t*) bpage)->mutex);
+ 			page_zip_set_size(&bpage->zip, 0);
+ 		}
+@@ -1835,15 +1972,16 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+-	buf_block_t*	block)	/* in: block, must contain a file page and
++	buf_block_t*	block,	/* in: block, must contain a file page and
+ 				be in a state where it can be freed */
++	ibool		have_page_hash_mutex)
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 	ut_ad(mutex_own(&block->mutex));
+ 
+ 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
+ 
+-	buf_LRU_block_free_non_file_page(block);
++	buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ }
+ 
+ /************************************************************************
+@@ -1861,7 +1999,8 @@
+ 		goto func_exit;
+ 	}
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	/* Update the index. */
+ 	item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
+@@ -1875,7 +2014,8 @@
+ 	/* Put current entry in the array. */
+ 	memcpy(item, &buf_LRU_stat_cur, sizeof *item);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&buf_pool_mutex);
+ 
+ func_exit:
+ 	/* Clear the current entry. */
+@@ -1897,7 +2037,8 @@
+ 	ulint		LRU_pos;
+ 
+ 	ut_ad(buf_pool);
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ 	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+ 
+@@ -1956,6 +2097,9 @@
+ 		ut_a(buf_pool->LRU_old_len == old_len);
+ 	}
+ 
++	mutex_exit(&LRU_list_mutex);
++	mutex_enter(&free_list_mutex);
++
+ 	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free);
+ 
+ 	for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+@@ -1965,6 +2109,9 @@
+ 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+ 	}
+ 
++	mutex_exit(&free_list_mutex);
++	mutex_enter(&LRU_list_mutex);
++
+ 	UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU);
+ 
+ 	for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
+@@ -1976,7 +2123,8 @@
+ 		ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ 	return(TRUE);
+ }
+ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+@@ -1992,7 +2140,8 @@
+ 	const buf_page_t*	bpage;
+ 
+ 	ut_ad(buf_pool);
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&LRU_list_mutex);
+ 
+ 	fprintf(stderr, "Pool ulint clock %lu\n",
+ 		(ulong) buf_pool->ulint_clock);
+@@ -2055,6 +2204,7 @@
+ 		bpage = UT_LIST_GET_NEXT(LRU, bpage);
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&LRU_list_mutex);
+ }
+ #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+diff -ruN innodb_plugin-1.0.3_orig/buf/buf0rea.c innodb_plugin-1.0.3_tmp/buf/buf0rea.c
+--- innodb_plugin-1.0.3_orig/buf/buf0rea.c	2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/buf/buf0rea.c	2009-03-19 18:51:20.000000000 +0900
+@@ -246,18 +246,22 @@
+ 
+ 	LRU_recent_limit = buf_LRU_get_recent_limit();
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	if (buf_pool->n_pend_reads
+ 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 
+ 		return(0);
+ 	}
++	mutex_exit(&buf_pool_mutex);
+ 
+ 	/* Count how many blocks in the area have been recently accessed,
+ 	that is, reside near the start of the LRU list. */
+ 
++	rw_lock_s_lock(&page_hash_latch);
+ 	for (i = low; i < high; i++) {
+ 		const buf_page_t*	bpage = buf_page_hash_get(space, i);
+ 
+@@ -269,13 +273,15 @@
+ 
+ 			if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+ 
+-				buf_pool_mutex_exit();
++				//buf_pool_mutex_exit();
++				rw_lock_s_unlock(&page_hash_latch);
+ 				goto read_ahead;
+ 			}
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 	/* Do nothing */
+ 	return(0);
+ 
+@@ -469,10 +475,12 @@
+ 
+ 	tablespace_version = fil_space_get_version(space);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&buf_pool_mutex);
+ 
+ 	if (high > fil_space_get_size(space)) {
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 		/* The area is not whole, return */
+ 
+ 		return(0);
+@@ -480,10 +488,12 @@
+ 
+ 	if (buf_pool->n_pend_reads
+ 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&buf_pool_mutex);
+ 
+ 		return(0);
+ 	}
++	mutex_exit(&buf_pool_mutex);
+ 
+ 	/* Check that almost all pages in the area have been accessed; if
+ 	offset == low, the accesses must be in a descending order, otherwise,
+@@ -497,6 +507,7 @@
+ 
+ 	fail_count = 0;
+ 
++	rw_lock_s_lock(&page_hash_latch);
+ 	for (i = low; i < high; i++) {
+ 		bpage = buf_page_hash_get(space, i);
+ 
+@@ -520,7 +531,8 @@
+ 	    * LINEAR_AREA_THRESHOLD_COEF) {
+ 		/* Too many failures: return */
+ 
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		rw_lock_s_unlock(&page_hash_latch);
+ 
+ 		return(0);
+ 	}
+@@ -531,7 +543,8 @@
+ 	bpage = buf_page_hash_get(space, offset);
+ 
+ 	if (bpage == NULL) {
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		rw_lock_s_unlock(&page_hash_latch);
+ 
+ 		return(0);
+ 	}
+@@ -557,7 +570,8 @@
+ 	pred_offset = fil_page_get_prev(frame);
+ 	succ_offset = fil_page_get_next(frame);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 
+ 	if ((offset == low) && (succ_offset == offset + 1)) {
+ 
+diff -ruN innodb_plugin-1.0.3_orig/handler/i_s.cc innodb_plugin-1.0.3_tmp/handler/i_s.cc
+--- innodb_plugin-1.0.3_orig/handler/i_s.cc	2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/i_s.cc	2009-03-19 18:51:20.000000000 +0900
+@@ -2280,7 +2280,8 @@
+ 
+ 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&zip_free_mutex);
+ 
+ 	for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+ 		buf_buddy_stat_t*	buddy_stat = &buf_buddy_stat[x];
+@@ -2306,7 +2307,8 @@
+ 		}
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&zip_free_mutex);
+ 	DBUG_RETURN(status);
+ }
+ 
+diff -ruN innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h
+--- innodb_plugin-1.0.3_orig/handler/innodb_patch_info.h	2009-03-19 18:47:48.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/handler/innodb_patch_info.h	2009-03-19 18:51:20.000000000 +0900
+@@ -29,5 +29,6 @@
+ {"innodb_rw_lock","InnoDB RW-lock fixes","Useful for 8+ cores SMP systems","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_opt_lru_count","Fix of buffer_pool mutex","Decreases contention on buffer_pool mutex on LRU operations","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_buffer_pool_pages","Information of buffer pool content","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buddy.h innodb_plugin-1.0.3_tmp/include/buf0buddy.h
+--- innodb_plugin-1.0.3_orig/include/buf0buddy.h	2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buddy.h	2009-03-19 18:51:20.000000000 +0900
+@@ -49,10 +49,11 @@
+ 			/* out: allocated block,
+ 			possibly NULL if lru == NULL */
+ 	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
+-	ibool*	lru)	/* in: pointer to a variable that will be assigned
++	ibool*	lru,	/* in: pointer to a variable that will be assigned
+ 			TRUE if storage was allocated from the LRU list
+ 			and buf_pool_mutex was temporarily released,
+ 			or NULL if the LRU list should not be used */
++	ibool	have_page_hash_mutex)
+ 	__attribute__((malloc));
+ 
+ /**************************************************************************
+@@ -63,7 +64,8 @@
+ /*===========*/
+ 	void*	buf,	/* in: block to be freed, must not be
+ 			pointed to by the buffer pool */
+-	ulint	size)	/* in: block size, up to UNIV_PAGE_SIZE */
++	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
++	ibool	have_page_hash_mutex)
+ 	__attribute__((nonnull));
+ 
+ /** Statistics of buddy blocks of a given size. */
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buddy.ic innodb_plugin-1.0.3_tmp/include/buf0buddy.ic
+--- innodb_plugin-1.0.3_orig/include/buf0buddy.ic	2009-02-19 00:04:03.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buddy.ic	2009-03-19 18:51:20.000000000 +0900
+@@ -44,10 +44,11 @@
+ 			possibly NULL if lru==NULL */
+ 	ulint	i,	/* in: index of buf_pool->zip_free[],
+ 			or BUF_BUDDY_SIZES */
+-	ibool*	lru)	/* in: pointer to a variable that will be assigned
++	ibool*	lru,	/* in: pointer to a variable that will be assigned
+ 			TRUE if storage was allocated from the LRU list
+ 			and buf_pool_mutex was temporarily released,
+ 			or NULL if the LRU list should not be used */
++	ibool	have_page_hash_mutex)
+ 	__attribute__((malloc));
+ 
+ /**************************************************************************
+@@ -58,8 +59,9 @@
+ /*===============*/
+ 	void*	buf,	/* in: block to be freed, must not be
+ 			pointed to by the buffer pool */
+-	ulint	i)	/* in: index of buf_pool->zip_free[],
++	ulint	i,	/* in: index of buf_pool->zip_free[],
+ 			or BUF_BUDDY_SIZES */
++	ibool	have_page_hash_mutex)
+ 	__attribute__((nonnull));
+ 
+ /**************************************************************************
+@@ -98,14 +100,15 @@
+ 			/* out: allocated block,
+ 			possibly NULL if lru == NULL */
+ 	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
+-	ibool*	lru)	/* in: pointer to a variable that will be assigned
++	ibool*	lru,	/* in: pointer to a variable that will be assigned
+ 			TRUE if storage was allocated from the LRU list
+ 			and buf_pool_mutex was temporarily released,
+ 			or NULL if the LRU list should not be used */
++	ibool	have_page_hash_mutex)
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 
+-	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
++	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+ }
+ 
+ /**************************************************************************
+@@ -116,11 +119,26 @@
+ /*===========*/
+ 	void*	buf,	/* in: block to be freed, must not be
+ 			pointed to by the buffer pool */
+-	ulint	size)	/* in: block size, up to UNIV_PAGE_SIZE */
++	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
++	ibool	have_page_hash_mutex)
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 
+-	buf_buddy_free_low(buf, buf_buddy_get_slot(size));
++	if (!have_page_hash_mutex) {
++		mutex_enter(&LRU_list_mutex);
++		mutex_enter(&flush_list_mutex);
++		rw_lock_x_lock(&page_hash_latch);
++	}
++
++	mutex_enter(&zip_free_mutex);
++	buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE);
++	mutex_exit(&zip_free_mutex);
++
++	if (!have_page_hash_mutex) {
++		mutex_exit(&LRU_list_mutex);
++		mutex_exit(&flush_list_mutex);
++		rw_lock_x_unlock(&page_hash_latch);
++	}
+ }
+ 
+ #ifdef UNIV_MATERIALIZE
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buf.h innodb_plugin-1.0.3_tmp/include/buf0buf.h
+--- innodb_plugin-1.0.3_orig/include/buf0buf.h	2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buf.h	2009-03-19 18:51:20.000000000 +0900
+@@ -1061,10 +1061,10 @@
+ 
+ 	UT_LIST_NODE_T(buf_page_t) LRU;
+ 					/* node of the LRU list */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+ 	ibool		in_LRU_list;	/* TRUE if the page is in the LRU list;
+ 					used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+ 	unsigned	old:1;		/* TRUE if the block is in the old
+ 					blocks in the LRU list */
+ 	unsigned	LRU_position:31;/* value which monotonically decreases
+@@ -1104,11 +1104,11 @@
+ 					a block is in the unzip_LRU list
+ 					if page.state == BUF_BLOCK_FILE_PAGE
+ 					and page.zip.data != NULL */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+ 	ibool		in_unzip_LRU_list;/* TRUE if the page is in the
+ 					decompressed LRU list;
+ 					used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+ 	byte*		frame;		/* pointer to buffer frame which
+ 					is of size UNIV_PAGE_SIZE, and
+ 					aligned to an address divisible by
+@@ -1316,6 +1316,12 @@
+ /* mutex protecting the buffer pool struct and control blocks, except the
+ read-write lock in them */
+ extern mutex_t	buf_pool_mutex;
++extern mutex_t	LRU_list_mutex;
++extern mutex_t	flush_list_mutex;
++extern rw_lock_t	page_hash_latch;
++extern mutex_t	free_list_mutex;
++extern mutex_t	zip_free_mutex;
++extern mutex_t	zip_hash_mutex;
+ /* mutex protecting the control blocks of compressed-only pages
+ (of type buf_page_t, not buf_block_t) */
+ extern mutex_t	buf_pool_zip_mutex;
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0buf.ic innodb_plugin-1.0.3_tmp/include/buf0buf.ic
+--- innodb_plugin-1.0.3_orig/include/buf0buf.ic	2009-02-17 16:56:33.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0buf.ic	2009-03-19 18:51:20.000000000 +0900
+@@ -100,7 +100,8 @@
+ 	buf_page_t*	bpage;
+ 	ib_uint64_t	lsn;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&flush_list_mutex);
+ 
+ 	bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ 
+@@ -111,7 +112,8 @@
+ 		lsn = bpage->oldest_modification;
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&flush_list_mutex);
+ 
+ 	/* The returned answer may be out of date: the flush_list can
+ 	change after the mutex has been released. */
+@@ -128,7 +130,8 @@
+ /*====================*/
+ 			/* out: new clock value */
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 
+ 	buf_pool->ulint_clock++;
+ 
+@@ -246,7 +249,7 @@
+ 	case BUF_BLOCK_ZIP_FREE:
+ 		/* This is a free page in buf_pool->zip_free[].
+ 		Such pages should only be accessed by the buddy allocator. */
+-		ut_error;
++		/* ut_error; */ /* optimistic */
+ 		break;
+ 	case BUF_BLOCK_ZIP_PAGE:
+ 	case BUF_BLOCK_ZIP_DIRTY:
+@@ -305,7 +308,7 @@
+ {
+ 	switch (buf_page_get_state(bpage)) {
+ 	case BUF_BLOCK_ZIP_FREE:
+-		ut_error;
++		/* ut_error; */ /* optimistic */
+ 		return(NULL);
+ 	case BUF_BLOCK_ZIP_PAGE:
+ 	case BUF_BLOCK_ZIP_DIRTY:
+@@ -410,7 +413,7 @@
+ 	buf_page_t*	bpage,	/* in/out: control block */
+ 	enum buf_io_fix	io_fix)	/* in: io_fix state */
+ {
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
+ 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ 
+ 	bpage->io_fix = io_fix;
+@@ -438,12 +441,13 @@
+ /*==================*/
+ 	const buf_page_t*	bpage)	/* control block being relocated */
+ {
+-	ut_ad(buf_pool_mutex_own());
+-	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+-	ut_ad(buf_page_in_file(bpage));
+-	ut_ad(bpage->in_LRU_list);
++	//ut_ad(buf_pool_mutex_own());
++	/* optimistic */
++	//ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++	//ut_ad(buf_page_in_file(bpage));
++	//ut_ad(bpage->in_LRU_list);
+ 
+-	return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
++	return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
+ 	       && bpage->buf_fix_count == 0);
+ }
+ 
+@@ -472,7 +476,8 @@
+ 	ibool		old)	/* in: old */
+ {
+ 	ut_a(buf_page_in_file(bpage));
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&LRU_list_mutex));
+ 	ut_ad(bpage->in_LRU_list);
+ 
+ #ifdef UNIV_LRU_DEBUG
+@@ -728,17 +733,17 @@
+ /*===========*/
+ 	buf_block_t*	block)	/* in, own: block to be freed */
+ {
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
+ 
+ 	mutex_enter(&block->mutex);
+ 
+ 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+ 
+-	buf_LRU_block_free_non_file_page(block);
++	buf_LRU_block_free_non_file_page(block, FALSE);
+ 
+ 	mutex_exit(&block->mutex);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
+ }
+ 
+ /*************************************************************************
+@@ -783,14 +788,17 @@
+ 	buf_page_t*	bpage)	/* in: buf_pool block, must be bufferfixed */
+ {
+ 	ibool	io_fixed;
++	mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(block_mutex);
+ 
+ 	ut_ad(buf_page_in_file(bpage));
+ 	ut_ad(bpage->buf_fix_count > 0);
+ 
+ 	io_fixed = buf_page_get_io_fix(bpage) != BUF_IO_NONE;
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(block_mutex);
+ 
+ 	return(io_fixed);
+ }
+@@ -917,7 +925,11 @@
+ 	ulint		fold;
+ 
+ 	ut_ad(buf_pool);
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++#ifdef UNIV_SYNC_DEBUG
++	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
++	      || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
++#endif
+ 
+ 	/* Look for the page in the hash table */
+ 
+@@ -966,11 +978,13 @@
+ {
+ 	const buf_page_t*	bpage;
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	rw_lock_s_lock(&page_hash_latch);
+ 
+ 	bpage = buf_page_hash_get(space, offset);
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	rw_lock_s_unlock(&page_hash_latch);
+ 
+ 	return(bpage != NULL);
+ }
+@@ -1033,12 +1047,17 @@
+ 	ut_a(block->page.buf_fix_count > 0);
+ 
+ 	if (rw_latch == RW_X_LATCH && mtr->modifications) {
+-		buf_pool_mutex_enter();
++		//buf_pool_mutex_enter();
++		mutex_enter(&flush_list_mutex);
++		mutex_enter(&block->mutex);
++		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ 		buf_flush_note_modification(block, mtr);
+-		buf_pool_mutex_exit();
++		//buf_pool_mutex_exit();
++		mutex_exit(&flush_list_mutex);
+ 	}
+-
++	else {
+ 	mutex_enter(&block->mutex);
++	}
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 	rw_lock_s_unlock(&(block->debug_latch));
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0flu.ic innodb_plugin-1.0.3_tmp/include/buf0flu.ic
+--- innodb_plugin-1.0.3_orig/include/buf0flu.ic	2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0flu.ic	2009-03-19 18:51:20.000000000 +0900
+@@ -59,7 +59,8 @@
+ #ifdef UNIV_SYNC_DEBUG
+ 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+-	ut_ad(buf_pool_mutex_own());
++	//ut_ad(buf_pool_mutex_own());
++	ut_ad(mutex_own(&flush_list_mutex));
+ 
+ 	ut_ad(mtr->start_lsn != 0);
+ 	ut_ad(mtr->modifications);
+@@ -99,7 +100,8 @@
+ 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+ 
+-	buf_pool_mutex_enter();
++	//buf_pool_mutex_enter();
++	mutex_enter(&flush_list_mutex);
+ 
+ 	ut_ad(block->page.newest_modification <= end_lsn);
+ 
+@@ -116,5 +118,6 @@
+ 		ut_ad(block->page.oldest_modification <= start_lsn);
+ 	}
+ 
+-	buf_pool_mutex_exit();
++	//buf_pool_mutex_exit();
++	mutex_exit(&flush_list_mutex);
+ }
+diff -ruN innodb_plugin-1.0.3_orig/include/buf0lru.h innodb_plugin-1.0.3_tmp/include/buf0lru.h
+--- innodb_plugin-1.0.3_orig/include/buf0lru.h	2009-02-17 17:59:22.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/include/buf0lru.h	2009-03-19 18:51:20.000000000 +0900
+@@ -122,10 +122,11 @@
+ 	buf_page_t*	bpage,	/* in: block to be freed */
+ 	ibool		zip,	/* in: TRUE if should remove also the
+ 				compressed page of an uncompressed page */
+-	ibool*		buf_pool_mutex_released);
++	ibool*		buf_pool_mutex_released,
+ 				/* in: pointer to a variable that will
+ 				be assigned TRUE if buf_pool_mutex
+ 				was temporarily released, or NULL */
++	ibool		have_LRU_mutex);
+ /**********************************************************************
+ Try to free a replaceable block. */
+ UNIV_INTERN
+@@ -169,7 +170,8 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+-	buf_block_t*	block);	/* in: block, must not contain a file page */
++	buf_block_t*	block,	/* in: block, must not contain a file page */
++	ibool		have_page_hash_mutex);
+ /**********************************************************************
+ Adds a block to the LRU list. */
+ UNIV_INTERN
+diff -ruN innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.result innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.result
+--- innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.result	1970-01-01 09:00:00.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.result	2009-03-19 18:51:20.000000000 +0900
+@@ -0,0 +1,2 @@
++SET GLOBAL innodb_file_format='Barracuda';
++SET GLOBAL innodb_file_per_table=ON;
+diff -ruN innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.test innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.test
+--- innodb_plugin-1.0.3_orig/mysql-test/innodb_xtradb_bug317074.test	1970-01-01 09:00:00.000000000 +0900
++++ innodb_plugin-1.0.3_tmp/mysql-test/innodb_xtradb_bug317074.test	2009-03-19 18:51:20.000000000 +0900
+@@ -0,0 +1,38 @@
++-- source include/have_innodb.inc
++
++SET GLOBAL innodb_file_format='Barracuda';
++SET GLOBAL innodb_file_per_table=ON;
++
++-- disable_query_log
++-- disable_result_log
++
++DROP TABLE IF EXISTS `test1`;
++CREATE TABLE IF NOT EXISTS `test1` (
++ `a` int primary key auto_increment,
++ `b` int default 0,
++ `c` char(100) default 'testtest'
++) ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
++
++delimiter |;
++CREATE PROCEDURE insert_many(p1 int)
++BEGIN
++SET @x = 0;
++SET @y = 0;
++REPEAT
++  insert into test1 set b=1;
++  SET @x = @x + 1;
++  SET @y = @y + 1;
++  IF @y >= 100 THEN
++    commit;
++    SET @y = 0;
++  END IF;
++UNTIL @x >= p1 END REPEAT;
++END|
++delimiter ;|
++call insert_many(100000);
++DROP PROCEDURE insert_many;
++
++# The bug is hangup at the following statement
++ALTER TABLE test1 ENGINE=MyISAM;
++
++DROP TABLE test1;

=== added file 'series'
--- series	1970-01-01 00:00:00 +0000
+++ series	2009-03-19 07:29:08 +0000
@@ -0,0 +1,9 @@
+innodb_show_enhancements.patch
+innodb_show_status.patch
+innodb_io_patches.patch
+innodb_rw_lock.patch
+innodb_opt_lru_count.patch
+i_s_innodb_buffer_pool_pages.patch
+innodb_split_buf_pool_mutex.patch
+innodb_expand_undo_slots.patch
+innodb_extra_rseg.patch