← Back to team overview

maria-developers team mailing list archive

bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2717)

 

#At lp:maria

 2717 knielsen@xxxxxxxxxxxxxxx	2009-08-03 [merge]
      Merge XtraDB 6 into MariaDB.
      modified:
        mysql-test/r/events_stress.result
        mysql-test/r/information_schema.result
        mysql-test/r/information_schema_all_engines.result
        mysql-test/r/innodb_bug36169.result
        mysql-test/r/innodb_xtradb_bug317074.result
        mysql-test/t/events_stress.test
        mysql-test/t/innodb-analyze.test
        mysql-test/t/innodb_bug36169.test
        mysql-test/t/innodb_bug36172.test
        mysql-test/t/innodb_xtradb_bug317074.test
        storage/xtradb/btr/btr0cur.c
        storage/xtradb/btr/btr0sea.c
        storage/xtradb/buf/buf0buddy.c
        storage/xtradb/buf/buf0buf.c
        storage/xtradb/buf/buf0flu.c
        storage/xtradb/buf/buf0lru.c
        storage/xtradb/buf/buf0rea.c
        storage/xtradb/dict/dict0boot.c
        storage/xtradb/dict/dict0crea.c
        storage/xtradb/dict/dict0dict.c
        storage/xtradb/dict/dict0load.c
        storage/xtradb/fil/fil0fil.c
        storage/xtradb/handler/ha_innodb.cc
        storage/xtradb/handler/i_s.cc
        storage/xtradb/handler/i_s.h
        storage/xtradb/handler/innodb_patch_info.h
        storage/xtradb/ibuf/ibuf0ibuf.c
        storage/xtradb/include/buf0buddy.h
        storage/xtradb/include/buf0buddy.ic
        storage/xtradb/include/buf0buf.h
        storage/xtradb/include/buf0buf.ic
        storage/xtradb/include/buf0flu.ic
        storage/xtradb/include/buf0lru.h
        storage/xtradb/include/dict0dict.h
        storage/xtradb/include/dict0dict.ic
        storage/xtradb/include/log0log.h
        storage/xtradb/include/rem0cmp.h
        storage/xtradb/include/rem0cmp.ic
        storage/xtradb/include/srv0srv.h
        storage/xtradb/include/sync0sync.h
        storage/xtradb/include/univ.i
        storage/xtradb/include/ut0auxconf.h
        storage/xtradb/log/log0log.c
        storage/xtradb/log/log0recv.c
        storage/xtradb/mtr/mtr0mtr.c
        storage/xtradb/os/os0file.c
        storage/xtradb/rem/rem0cmp.c
        storage/xtradb/row/row0mysql.c
        storage/xtradb/scripts/install_innodb_plugins.sql
        storage/xtradb/srv/srv0srv.c
        storage/xtradb/srv/srv0start.c
        storage/xtradb/sync/sync0sync.c
        storage/xtradb/ut/ut0ut.c

=== modified file 'mysql-test/r/events_stress.result'
--- a/mysql-test/r/events_stress.result	2006-09-01 11:08:44 +0000
+++ b/mysql-test/r/events_stress.result	2009-08-03 20:09:53 +0000
@@ -32,6 +32,7 @@ USE events_conn1_test2;
 SELECT COUNT(*) FROM INFORMATION_SCHEMA.EVENTS WHERE EVENT_SCHEMA='events_conn1_test2';
 COUNT(*)
 50
+SET @old_event_scheduler=@@event_scheduler;
 SET GLOBAL event_scheduler=on;
 DROP DATABASE events_conn1_test2;
 SET GLOBAL event_scheduler=off;
@@ -63,3 +64,4 @@ DROP TABLE fill_it1;
 DROP TABLE fill_it2;
 DROP TABLE fill_it3;
 DROP DATABASE events_test;
+SET GLOBAL event_scheduler=@old_event_scheduler;

=== modified file 'mysql-test/r/information_schema.result'
--- a/mysql-test/r/information_schema.result	2009-06-11 17:49:51 +0000
+++ b/mysql-test/r/information_schema.result	2009-08-03 20:09:53 +0000
@@ -61,9 +61,11 @@ INNODB_CMP
 INNODB_CMPMEM
 INNODB_CMPMEM_RESET
 INNODB_CMP_RESET
+INNODB_INDEX_STATS
 INNODB_LOCKS
 INNODB_LOCK_WAITS
 INNODB_RSEG
+INNODB_TABLE_STATS
 INNODB_TRX
 KEY_COLUMN_USAGE
 PARTITIONS
@@ -863,6 +865,8 @@ TABLE_CONSTRAINTS	TABLE_NAME	select
 TABLE_PRIVILEGES	TABLE_NAME	select
 VIEWS	TABLE_NAME	select
 INNODB_BUFFER_POOL_PAGES_INDEX	table_name	select
+INNODB_INDEX_STATS	table_name	select
+INNODB_TABLE_STATS	table_name	select
 delete from mysql.user where user='mysqltest_4';
 delete from mysql.db where user='mysqltest_4';
 flush privileges;

=== modified file 'mysql-test/r/information_schema_all_engines.result'
--- a/mysql-test/r/information_schema_all_engines.result	2009-06-11 12:53:26 +0000
+++ b/mysql-test/r/information_schema_all_engines.result	2009-08-03 20:09:53 +0000
@@ -35,13 +35,15 @@ INNODB_CMP
 INNODB_RSEG
 XTRADB_ENHANCEMENTS
 INNODB_BUFFER_POOL_PAGES_INDEX
-INNODB_BUFFER_POOL_PAGES_BLOB
+INNODB_INDEX_STATS
 INNODB_TRX
 INNODB_CMP_RESET
 INNODB_LOCK_WAITS
 INNODB_CMPMEM_RESET
 INNODB_LOCKS
 INNODB_CMPMEM
+INNODB_TABLE_STATS
+INNODB_BUFFER_POOL_PAGES_BLOB
 SELECT t.table_name, c1.column_name
 FROM information_schema.tables t
 INNER JOIN
@@ -91,13 +93,15 @@ INNODB_CMP	page_size
 INNODB_RSEG	rseg_id
 XTRADB_ENHANCEMENTS	name
 INNODB_BUFFER_POOL_PAGES_INDEX	schema_name
-INNODB_BUFFER_POOL_PAGES_BLOB	space_id
+INNODB_INDEX_STATS	table_name
 INNODB_TRX	trx_id
 INNODB_CMP_RESET	page_size
 INNODB_LOCK_WAITS	requesting_trx_id
 INNODB_CMPMEM_RESET	page_size
 INNODB_LOCKS	lock_id
 INNODB_CMPMEM	page_size
+INNODB_TABLE_STATS	table_name
+INNODB_BUFFER_POOL_PAGES_BLOB	space_id
 SELECT t.table_name, c1.column_name
 FROM information_schema.tables t
 INNER JOIN
@@ -147,13 +151,15 @@ INNODB_CMP	page_size
 INNODB_RSEG	rseg_id
 XTRADB_ENHANCEMENTS	name
 INNODB_BUFFER_POOL_PAGES_INDEX	schema_name
-INNODB_BUFFER_POOL_PAGES_BLOB	space_id
+INNODB_INDEX_STATS	table_name
 INNODB_TRX	trx_id
 INNODB_CMP_RESET	page_size
 INNODB_LOCK_WAITS	requesting_trx_id
 INNODB_CMPMEM_RESET	page_size
 INNODB_LOCKS	lock_id
 INNODB_CMPMEM	page_size
+INNODB_TABLE_STATS	table_name
+INNODB_BUFFER_POOL_PAGES_BLOB	space_id
 select 1 as f1 from information_schema.tables  where "CHARACTER_SETS"=
 (select cast(table_name as char)  from information_schema.tables
 order by table_name limit 1) limit 1;
@@ -192,9 +198,11 @@ INNODB_CMP	information_schema.INNODB_CMP
 INNODB_CMPMEM	information_schema.INNODB_CMPMEM	1
 INNODB_CMPMEM_RESET	information_schema.INNODB_CMPMEM_RESET	1
 INNODB_CMP_RESET	information_schema.INNODB_CMP_RESET	1
+INNODB_INDEX_STATS	information_schema.INNODB_INDEX_STATS	1
 INNODB_LOCKS	information_schema.INNODB_LOCKS	1
 INNODB_LOCK_WAITS	information_schema.INNODB_LOCK_WAITS	1
 INNODB_RSEG	information_schema.INNODB_RSEG	1
+INNODB_TABLE_STATS	information_schema.INNODB_TABLE_STATS	1
 INNODB_TRX	information_schema.INNODB_TRX	1
 KEY_COLUMN_USAGE	information_schema.KEY_COLUMN_USAGE	1
 PARTITIONS	information_schema.PARTITIONS	1
@@ -254,13 +262,15 @@ Database: information_schema
 | INNODB_RSEG                           |
 | XTRADB_ENHANCEMENTS                   |
 | INNODB_BUFFER_POOL_PAGES_INDEX        |
-| INNODB_BUFFER_POOL_PAGES_BLOB         |
+| INNODB_INDEX_STATS                    |
 | INNODB_TRX                            |
 | INNODB_CMP_RESET                      |
 | INNODB_LOCK_WAITS                     |
 | INNODB_CMPMEM_RESET                   |
 | INNODB_LOCKS                          |
 | INNODB_CMPMEM                         |
+| INNODB_TABLE_STATS                    |
+| INNODB_BUFFER_POOL_PAGES_BLOB         |
 +---------------------------------------+
 Database: INFORMATION_SCHEMA
 +---------------------------------------+
@@ -300,13 +310,15 @@ Database: INFORMATION_SCHEMA
 | INNODB_RSEG                           |
 | XTRADB_ENHANCEMENTS                   |
 | INNODB_BUFFER_POOL_PAGES_INDEX        |
-| INNODB_BUFFER_POOL_PAGES_BLOB         |
+| INNODB_INDEX_STATS                    |
 | INNODB_TRX                            |
 | INNODB_CMP_RESET                      |
 | INNODB_LOCK_WAITS                     |
 | INNODB_CMPMEM_RESET                   |
 | INNODB_LOCKS                          |
 | INNODB_CMPMEM                         |
+| INNODB_TABLE_STATS                    |
+| INNODB_BUFFER_POOL_PAGES_BLOB         |
 +---------------------------------------+
 Wildcard: inf_rmation_schema
 +--------------------+
@@ -316,5 +328,5 @@ Wildcard: inf_rmation_schema
 +--------------------+
 SELECT table_schema, count(*) FROM information_schema.TABLES WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test', 'mysqltest') AND table_name<>'ndb_binlog_index' AND table_name<>'ndb_apply_status' GROUP BY TABLE_SCHEMA;
 table_schema	count(*)
-information_schema	41
+information_schema	43
 mysql	22

=== modified file 'mysql-test/r/innodb_bug36169.result'
--- a/mysql-test/r/innodb_bug36169.result	2009-06-11 12:53:26 +0000
+++ b/mysql-test/r/innodb_bug36169.result	2009-08-03 20:09:53 +0000
@@ -1,5 +1,5 @@
-SET @save_innodb_file_format=@@global.innodb_file_format;
-SET @save_innodb_file_format_check=@@global.innodb_file_format_check;
-SET @save_innodb_file_per_table=@@global.innodb_file_per_table;
+set @old_innodb_file_per_table=@@innodb_file_per_table;
+set @old_innodb_file_format=@@innodb_file_format;
+set @old_innodb_file_format_check=@@innodb_file_format_check;
 SET GLOBAL innodb_file_format='Barracuda';
 SET GLOBAL innodb_file_per_table=ON;

=== modified file 'mysql-test/r/innodb_xtradb_bug317074.result'
--- a/mysql-test/r/innodb_xtradb_bug317074.result	2009-06-11 12:53:26 +0000
+++ b/mysql-test/r/innodb_xtradb_bug317074.result	2009-08-03 20:09:53 +0000
@@ -1,5 +1,5 @@
-SET @save_innodb_file_format=@@global.innodb_file_format;
-SET @save_innodb_file_format_check=@@global.innodb_file_format_check;
-SET @save_innodb_file_per_table=@@global.innodb_file_per_table;
+SET @old_innodb_file_format=@@innodb_file_format;
+SET @old_innodb_file_per_table=@@innodb_file_per_table;
+SET @old_innodb_file_format_check=@@innodb_file_format_check;
 SET GLOBAL innodb_file_format='Barracuda';
 SET GLOBAL innodb_file_per_table=ON;

=== modified file 'mysql-test/t/events_stress.test'
--- a/mysql-test/t/events_stress.test	2007-05-26 14:36:38 +0000
+++ b/mysql-test/t/events_stress.test	2009-08-03 20:09:53 +0000
@@ -61,6 +61,7 @@ while ($1)
 }
 --enable_query_log
 SELECT COUNT(*) FROM INFORMATION_SCHEMA.EVENTS WHERE EVENT_SCHEMA='events_conn1_test2';
+SET @old_event_scheduler=@@event_scheduler;
 SET GLOBAL event_scheduler=on;
 --sleep 2.5
 DROP DATABASE events_conn1_test2;
@@ -135,3 +136,6 @@ DROP USER event_user3@localhost;
 #
 
 DROP DATABASE events_test;
+
+# Cleanup
+SET GLOBAL event_scheduler=@old_event_scheduler;

=== modified file 'mysql-test/t/innodb-analyze.test'
--- a/mysql-test/t/innodb-analyze.test	2009-06-09 15:08:46 +0000
+++ b/mysql-test/t/innodb-analyze.test	2009-08-03 20:09:53 +0000
@@ -11,7 +11,7 @@
 -- disable_result_log
 -- enable_warnings
 
-SET @save_innodb_stats_sample_pages=@@innodb_stats_sample_pages;
+SET @old_innodb_stats_sample_pages=@@innodb_stats_sample_pages;
 SET GLOBAL innodb_stats_sample_pages=0;
 
 # check that the value has been adjusted to 1
@@ -61,5 +61,5 @@ ANALYZE TABLE innodb_analyze;
 SET GLOBAL innodb_stats_sample_pages=16;
 ANALYZE TABLE innodb_analyze;
 
-SET GLOBAL innodb_stats_sample_pages=@save_innodb_stats_sample_pages;
 DROP TABLE innodb_analyze;
+SET GLOBAL innodb_stats_sample_pages=@old_innodb_stats_sample_pages;

=== modified file 'mysql-test/t/innodb_bug36169.test'
--- a/mysql-test/t/innodb_bug36169.test	2009-06-11 12:53:26 +0000
+++ b/mysql-test/t/innodb_bug36169.test	2009-08-03 20:09:53 +0000
@@ -4,10 +4,10 @@
 #
 
 -- source include/have_innodb.inc
+set @old_innodb_file_per_table=@@innodb_file_per_table;
+set @old_innodb_file_format=@@innodb_file_format;
+set @old_innodb_file_format_check=@@innodb_file_format_check;
 
-SET @save_innodb_file_format=@@global.innodb_file_format;
-SET @save_innodb_file_format_check=@@global.innodb_file_format_check;
-SET @save_innodb_file_per_table=@@global.innodb_file_per_table;
 SET GLOBAL innodb_file_format='Barracuda';
 SET GLOBAL innodb_file_per_table=ON;
 
@@ -1148,10 +1148,6 @@ KEY `idx44` (`col176`(100),`col42`,`col7
 KEY `idx45` (`col2`(27),`col27`(116))
 )engine=innodb ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1;
 
-SET GLOBAL innodb_file_format=@save_innodb_file_format;
-SET GLOBAL innodb_file_format_check=@save_innodb_file_format_check;
-SET GLOBAL innodb_file_per_table=@save_innodb_file_per_table;
-
 DROP TABLE IF EXISTS table0;
 DROP TABLE IF EXISTS table1;
 DROP TABLE IF EXISTS table2;
@@ -1160,3 +1156,7 @@ DROP TABLE IF EXISTS table4;
 DROP TABLE IF EXISTS table5;
 DROP TABLE IF EXISTS table6;
 
+set global innodb_file_per_table=@old_innodb_file_per_table;
+set global innodb_file_format=@old_innodb_file_format;
+set global innodb_file_format_check=@old_innodb_file_format_check;
+

=== modified file 'mysql-test/t/innodb_bug36172.test'
--- a/mysql-test/t/innodb_bug36172.test	2009-06-11 12:53:26 +0000
+++ b/mysql-test/t/innodb_bug36172.test	2009-08-03 20:09:53 +0000
@@ -13,10 +13,10 @@ SET storage_engine=InnoDB;
 
 -- disable_query_log
 -- disable_result_log
+set @old_innodb_file_per_table=@@innodb_file_per_table;
+set @old_innodb_file_format=@@innodb_file_format;
+set @old_innodb_file_format_check=@@innodb_file_format_check;
 
-SET @save_innodb_file_format=@@global.innodb_file_format;
-SET @save_innodb_file_format_check=@@global.innodb_file_format_check;
-SET @save_innodb_file_per_table=@@global.innodb_file_per_table;
 SET GLOBAL innodb_file_format='Barracuda';
 SET GLOBAL innodb_file_per_table=on;
 
@@ -27,7 +27,8 @@ CHECK TABLE table0 EXTENDED;
 INSERT IGNORE INTO `table0` SET `col19` = '19940127002709', `col20` = 2383927.9055146948, `col21` = 4293243420.5621204000, `col22` = '20511211123705', `col23` = 4289899778.6573381000, `col24` = 4293449279.0540481000, `col25` = 'emphysemic', `col26` = 'dentally', `col27` = '2347406', `col28` = 'eruct', `col30` = 1222, `col31` = 4294372994.9941406000, `col32` = 4291385574.1173744000, `col33` = 'borrowing\'s', `col34` = 'septics', `col35` = 'ratter\'s', `col36` = 'Kaye', `col37` = 'Florentia', `col38` = 'allium', `col39` = 'barkeep', `col40` = '19510407003441', `col41` = 4293559200.4215522000, `col42` = 22482, `col43` = 'decussate', `col44` = 'Brom\'s', `col45` = 'violated', `col46` = 4925506.4635456400, `col47` = 930549, `col48` = '51296066', `col49` = 'voluminously', `col50` = '29306676', `col51` = -88, `col52` = -2153690, `col53` = 4290250202.1464887000, `col54` = 'expropriation', `col55` = 'Aberdeen\'s', `col56` = 20343, `col58` = '19640415171532', `col59` = 'extern', `col60` = 'Ubana', `col61` = 4290487961.8539081000, `col62` = '2147', `col63` = -24271, `col64` = '20750801194548', `col65` = 'Cunaxa\'s', `col66` = 'pasticcio', `col67` = 2795817, `col68` = 'Indore\'s', `col70` = 6864127, `col71` = '1817832', `col72` = '20540506114211', `col73` = '20040101012300', `col74` = 'rationalized', `col75` = '45522', `col76` = 'indene', `col77` = -6964559, `col78` = 4247535.5266884370, `col79` = '20720416124357', `col80` = '2143', `col81` = 4292060102.4466386000, `col82` = 'striving', `col83` = 'boneblack\'s', `col84` = 'redolent', `col85` = 6489697.9009369183, `col86` = 4287473465.9731131000, `col87` = 7726015, `col88` = 'perplexed', `col89` = '17153791', `col90` = 5478587.1108127078, `col91` = 4287091404.7004304000, `col92` = 'Boulez\'s', `col93` = '2931278';
 CHECK TABLE table0 EXTENDED;
 
-SET GLOBAL innodb_file_format=@save_innodb_file_format;
-SET GLOBAL innodb_file_format_check=@save_innodb_file_format_check;
-SET GLOBAL innodb_file_per_table=@save_innodb_file_per_table;
 DROP TABLE table0;
+set global innodb_file_per_table=@old_innodb_file_per_table;
+set global innodb_file_format=@old_innodb_file_format;
+set global innodb_file_format_check=@old_innodb_file_format_check;
+

=== modified file 'mysql-test/t/innodb_xtradb_bug317074.test'
--- a/mysql-test/t/innodb_xtradb_bug317074.test	2009-06-11 12:53:26 +0000
+++ b/mysql-test/t/innodb_xtradb_bug317074.test	2009-08-03 20:09:53 +0000
@@ -1,8 +1,8 @@
 -- source include/have_innodb.inc
 
-SET @save_innodb_file_format=@@global.innodb_file_format;
-SET @save_innodb_file_format_check=@@global.innodb_file_format_check;
-SET @save_innodb_file_per_table=@@global.innodb_file_per_table;
+SET @old_innodb_file_format=@@innodb_file_format;
+SET @old_innodb_file_per_table=@@innodb_file_per_table;
+SET @old_innodb_file_format_check=@@innodb_file_format_check;
 SET GLOBAL innodb_file_format='Barracuda';
 SET GLOBAL innodb_file_per_table=ON;
 
@@ -38,8 +38,7 @@ DROP PROCEDURE insert_many;
 # The bug is hangup at the following statement
 ALTER TABLE test1 ENGINE=MyISAM;
 
-SET GLOBAL innodb_file_format=@save_innodb_file_format;
-SET GLOBAL innodb_file_format_check=@save_innodb_file_format_check;
-SET GLOBAL innodb_file_per_table=@save_innodb_file_per_table;
-
 DROP TABLE test1;
+SET GLOBAL innodb_file_format=@old_innodb_file_format;
+SET GLOBAL innodb_file_per_table=@old_innodb_file_per_table;
+SET GLOBAL innodb_file_format_check=@old_innodb_file_format_check;

=== modified file 'storage/xtradb/btr/btr0cur.c'
--- a/storage/xtradb/btr/btr0cur.c	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/btr/btr0cur.c	2009-06-25 01:43:25 +0000
@@ -3202,7 +3202,9 @@ btr_estimate_number_of_different_key_val
 	ulint		n_cols;
 	ulint		matched_fields;
 	ulint		matched_bytes;
+	ib_int64_t	n_recs	= 0;
 	ib_int64_t*	n_diff;
+	ib_int64_t*	n_not_nulls;
 	ullint		n_sample_pages; /* number of pages to sample */
 	ulint		not_empty_flag	= 0;
 	ulint		total_external_size = 0;
@@ -3215,6 +3217,7 @@ btr_estimate_number_of_different_key_val
 	ulint		offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets_rec	= offsets_rec_;
 	ulint*		offsets_next_rec= offsets_next_rec_;
+	ulint		stats_method	= srv_stats_method;
 	rec_offs_init(offsets_rec_);
 	rec_offs_init(offsets_next_rec_);
 
@@ -3222,6 +3225,10 @@ btr_estimate_number_of_different_key_val
 
 	n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
 
+	if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
+		n_not_nulls = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
+	}
+
 	/* It makes no sense to test more pages than are contained
 	in the index, thus we lower the number if it is too high */
 	if (srv_stats_sample_pages > index->stat_index_size) {
@@ -3260,6 +3267,20 @@ btr_estimate_number_of_different_key_val
 		}
 
 		while (rec != supremum) {
+			/* count recs */
+			if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
+				n_recs++;
+				for (j = 0; j <= n_cols; j++) {
+					ulint	f_len;
+					rec_get_nth_field(rec, offsets_rec,
+							  j, &f_len);
+					if (f_len == UNIV_SQL_NULL)
+						break;
+
+					n_not_nulls[j]++;
+				}
+			}
+
 			rec_t*	next_rec = page_rec_get_next(rec);
 			if (next_rec == supremum) {
 				break;
@@ -3274,7 +3295,7 @@ btr_estimate_number_of_different_key_val
 			cmp_rec_rec_with_match(rec, next_rec,
 					       offsets_rec, offsets_next_rec,
 					       index, &matched_fields,
-					       &matched_bytes);
+					       &matched_bytes, srv_stats_method);
 
 			for (j = matched_fields + 1; j <= n_cols; j++) {
 				/* We add one if this index record has
@@ -3359,9 +3380,21 @@ btr_estimate_number_of_different_key_val
 		}
 
 		index->stat_n_diff_key_vals[j] += add_on;
+
+		/* revision for 'nulls_ignored' */
+		if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
+			if (!n_not_nulls[j])
+				n_not_nulls[j] = 1;
+			index->stat_n_diff_key_vals[j] =
+				index->stat_n_diff_key_vals[j] * n_recs
+					/ n_not_nulls[j];
+		}
 	}
 
 	mem_free(n_diff);
+	if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
+		mem_free(n_not_nulls);
+	}
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
@@ -3733,7 +3766,8 @@ btr_blob_free(
 
 	mtr_commit(mtr);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 	mutex_enter(&block->mutex);
 
 	/* Only free the block if it is still allocated to
@@ -3744,17 +3778,22 @@ btr_blob_free(
 	    && buf_block_get_space(block) == space
 	    && buf_block_get_page_no(block) == page_no) {
 
-		if (buf_LRU_free_block(&block->page, all, NULL)
+		if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
 		    != BUF_LRU_FREED
-		    && all && block->page.zip.data) {
+		    && all && block->page.zip.data
+		    /* Now, buf_LRU_free_block() may release mutex temporarily */
+		    && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
+		    && buf_block_get_space(block) == space
+		    && buf_block_get_page_no(block) == page_no) {
 			/* Attempt to deallocate the uncompressed page
 			if the whole block cannot be deallocted. */
 
-			buf_LRU_free_block(&block->page, FALSE, NULL);
+			buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 	mutex_exit(&block->mutex);
 }
 

=== modified file 'storage/xtradb/btr/btr0sea.c'
--- a/storage/xtradb/btr/btr0sea.c	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/btr/btr0sea.c	2009-06-25 01:43:25 +0000
@@ -1731,7 +1731,8 @@ btr_search_validate(void)
 	rec_offs_init(offsets_);
 
 	rw_lock_x_lock(&btr_search_latch);
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_x_lock(&page_hash_latch);
 
 	cell_count = hash_get_n_cells(btr_search_sys->hash_index);
 
@@ -1739,11 +1740,13 @@ btr_search_validate(void)
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if ((i != 0) && ((i % chunk_size) == 0)) {
-			buf_pool_mutex_exit();
+			//buf_pool_mutex_exit();
+			rw_lock_x_unlock(&page_hash_latch);
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
-			buf_pool_mutex_enter();
+			//buf_pool_mutex_enter();
+			rw_lock_x_lock(&page_hash_latch);
 		}
 
 		node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
@@ -1850,11 +1853,13 @@ btr_search_validate(void)
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if (i != 0) {
-			buf_pool_mutex_exit();
+			//buf_pool_mutex_exit();
+			rw_lock_x_unlock(&page_hash_latch);
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
-			buf_pool_mutex_enter();
+			//buf_pool_mutex_enter();
+			rw_lock_x_lock(&page_hash_latch);
 		}
 
 		if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
@@ -1862,7 +1867,8 @@ btr_search_validate(void)
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_x_unlock(&page_hash_latch);
 	rw_lock_x_unlock(&btr_search_latch);
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);

=== modified file 'storage/xtradb/buf/buf0buddy.c'
--- a/storage/xtradb/buf/buf0buddy.c	2009-05-04 04:32:30 +0000
+++ b/storage/xtradb/buf/buf0buddy.c	2009-06-25 01:43:25 +0000
@@ -82,7 +82,7 @@ buf_buddy_add_to_free(
 #endif /* UNIV_DEBUG_VALGRIND */
 
 	ut_ad(buf_pool->zip_free[i].start != bpage);
-	UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
+	UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
 
 #ifdef UNIV_DEBUG_VALGRIND
 	if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
@@ -100,8 +100,8 @@ buf_buddy_remove_from_free(
 	ulint		i)	/* in: index of buf_pool->zip_free[] */
 {
 #ifdef UNIV_DEBUG_VALGRIND
-	buf_page_t*	prev = UT_LIST_GET_PREV(list, bpage);
-	buf_page_t*	next = UT_LIST_GET_NEXT(list, bpage);
+	buf_page_t*	prev = UT_LIST_GET_PREV(zip_list, bpage);
+	buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
 
 	if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
 	if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
@@ -111,7 +111,7 @@ buf_buddy_remove_from_free(
 #endif /* UNIV_DEBUG_VALGRIND */
 
 	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
-	UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
+	UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
 
 #ifdef UNIV_DEBUG_VALGRIND
 	if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
@@ -131,12 +131,13 @@ buf_buddy_alloc_zip(
 {
 	buf_page_t*	bpage;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&zip_free_mutex));
 	ut_a(i < BUF_BUDDY_SIZES);
 
 #if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
 	/* Valgrind would complain about accessing free memory. */
-	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]);
+	UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i]);
 #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
 	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
 
@@ -177,16 +178,19 @@ static
 void
 buf_buddy_block_free(
 /*=================*/
-	void*	buf)	/* in: buffer frame to deallocate */
+	void*	buf,	/* in: buffer frame to deallocate */
+	ibool	have_page_hash_mutex)
 {
 	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
 	buf_page_t*	bpage;
 	buf_block_t*	block;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
 
+	mutex_enter(&zip_hash_mutex);
+
 	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
 		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
 			  && bpage->in_zip_hash && !bpage->in_page_hash),
@@ -198,12 +202,14 @@ buf_buddy_block_free(
 	ut_d(bpage->in_zip_hash = FALSE);
 	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
 
+	mutex_exit(&zip_hash_mutex);
+
 	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
 	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
 
 	block = (buf_block_t*) bpage;
 	mutex_enter(&block->mutex);
-	buf_LRU_block_free_non_file_page(block);
+	buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
 	mutex_exit(&block->mutex);
 
 	ut_ad(buf_buddy_n_frames > 0);
@@ -219,7 +225,7 @@ buf_buddy_block_register(
 	buf_block_t*	block)	/* in: buffer frame to allocate */
 {
 	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
@@ -230,7 +236,10 @@ buf_buddy_block_register(
 	ut_ad(!block->page.in_page_hash);
 	ut_ad(!block->page.in_zip_hash);
 	ut_d(block->page.in_zip_hash = TRUE);
+
+	mutex_enter(&zip_hash_mutex);
 	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
+	mutex_exit(&zip_hash_mutex);
 
 	ut_d(buf_buddy_n_frames++);
 }
@@ -264,7 +273,7 @@ buf_buddy_alloc_from(
 		bpage->state = BUF_BLOCK_ZIP_FREE;
 #if defined UNIV_DEBUG && !defined UNIV_DEBUG_VALGRIND
 		/* Valgrind would complain about accessing free memory. */
-		UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[j]);
+		UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[j]);
 #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
 		buf_buddy_add_to_free(bpage, j);
 	}
@@ -284,24 +293,28 @@ buf_buddy_alloc_low(
 			possibly NULL if lru==NULL */
 	ulint	i,	/* in: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
-	ibool*	lru)	/* in: pointer to a variable that will be assigned
+	ibool*	lru,	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
+	ibool	have_page_hash_mutex)
 {
 	buf_block_t*	block;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 
 	if (i < BUF_BUDDY_SIZES) {
 		/* Try to allocate from the buddy system. */
+		mutex_enter(&zip_free_mutex);
 		block = buf_buddy_alloc_zip(i);
 
 		if (block) {
 
 			goto func_exit;
 		}
+
+		mutex_exit(&zip_free_mutex);
 	}
 
 	/* Try allocating from the buf_pool->free list. */
@@ -318,18 +331,29 @@ buf_buddy_alloc_low(
 	}
 
 	/* Try replacing an uncompressed page in the buffer pool. */
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
+	if (have_page_hash_mutex) {
+		rw_lock_x_unlock(&page_hash_latch);
+	}
 	block = buf_LRU_get_free_block(0);
 	*lru = TRUE;
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	if (have_page_hash_mutex) {
+		rw_lock_x_lock(&page_hash_latch);
+	}
 
 alloc_big:
 	buf_buddy_block_register(block);
 
+	mutex_enter(&zip_free_mutex);
 	block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
 
 func_exit:
 	buf_buddy_stat[i].used++;
+	mutex_exit(&zip_free_mutex);
+
 	return(block);
 }
 
@@ -345,7 +369,10 @@ buf_buddy_relocate_block(
 {
 	buf_page_t*	b;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
+#endif
 
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_FREE:
@@ -354,7 +381,7 @@ buf_buddy_relocate_block(
 	case BUF_BLOCK_FILE_PAGE:
 	case BUF_BLOCK_MEMORY:
 	case BUF_BLOCK_REMOVE_HASH:
-		ut_error;
+		/* ut_error; */ /* optimistic */
 	case BUF_BLOCK_ZIP_DIRTY:
 		/* Cannot relocate dirty pages. */
 		return(FALSE);
@@ -364,9 +391,17 @@ buf_buddy_relocate_block(
 	}
 
 	mutex_enter(&buf_pool_zip_mutex);
+	mutex_enter(&zip_free_mutex);
 
 	if (!buf_page_can_relocate(bpage)) {
 		mutex_exit(&buf_pool_zip_mutex);
+		mutex_exit(&zip_free_mutex);
+		return(FALSE);
+	}
+
+	if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
+		mutex_exit(&buf_pool_zip_mutex);
+		mutex_exit(&zip_free_mutex);
 		return(FALSE);
 	}
 
@@ -374,16 +409,19 @@ buf_buddy_relocate_block(
 	ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
 
 	/* relocate buf_pool->zip_clean */
-	b = UT_LIST_GET_PREV(list, dpage);
-	UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
+	mutex_enter(&flush_list_mutex);
+	b = UT_LIST_GET_PREV(zip_list, dpage);
+	UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
 
 	if (b) {
-		UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
+		UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
 	} else {
-		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
+		UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
 	}
+	mutex_exit(&flush_list_mutex);
 
 	mutex_exit(&buf_pool_zip_mutex);
+	mutex_exit(&zip_free_mutex);
 	return(TRUE);
 }
 
@@ -396,13 +434,15 @@ buf_buddy_relocate(
 			/* out: TRUE if relocated */
 	void*	src,	/* in: block to relocate */
 	void*	dst,	/* in: free block to relocate to */
-	ulint	i)	/* in: index of buf_pool->zip_free[] */
+	ulint	i,	/* in: index of buf_pool->zip_free[] */
+	ibool	have_page_hash_mutex)
 {
 	buf_page_t*	bpage;
 	const ulint	size	= BUF_BUDDY_LOW << i;
 	ullint		usec	= ut_time_us(NULL);
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&zip_free_mutex));
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_ad(!ut_align_offset(src, size));
 	ut_ad(!ut_align_offset(dst, size));
@@ -421,9 +461,16 @@ buf_buddy_relocate(
 	actually is a properly initialized buf_page_t object. */
 
 	if (size >= PAGE_ZIP_MIN_SIZE) {
+		if (!have_page_hash_mutex)
+			mutex_exit(&zip_free_mutex);
+
 		/* This is a compressed page. */
 		mutex_t*	mutex;
 
+		if (!have_page_hash_mutex) {
+			mutex_enter(&LRU_list_mutex);
+			rw_lock_x_lock(&page_hash_latch);
+		}
 		/* The src block may be split into smaller blocks,
 		some of which may be free.  Thus, the
 		mach_read_from_4() calls below may attempt to read
@@ -444,6 +491,11 @@ buf_buddy_relocate(
 			added to buf_pool->page_hash yet.  Obviously,
 			it cannot be relocated. */
 
+			if (!have_page_hash_mutex) {
+				mutex_enter(&zip_free_mutex);
+				mutex_exit(&LRU_list_mutex);
+				rw_lock_x_unlock(&page_hash_latch);
+			}
 			return(FALSE);
 		}
 
@@ -453,16 +505,32 @@ buf_buddy_relocate(
 			For the sake of simplicity, give up. */
 			ut_ad(page_zip_get_size(&bpage->zip) < size);
 
+			if (!have_page_hash_mutex) {
+				mutex_enter(&zip_free_mutex);
+				mutex_exit(&LRU_list_mutex);
+				rw_lock_x_unlock(&page_hash_latch);
+			}
 			return(FALSE);
 		}
 
+		/* To keep latch order */
+		if (have_page_hash_mutex)
+			mutex_exit(&zip_free_mutex);
+
 		/* The block must have been allocated, but it may
 		contain uninitialized data. */
 		UNIV_MEM_ASSERT_W(src, size);
 
 		mutex = buf_page_get_mutex(bpage);
 
+retry_lock:
 		mutex_enter(mutex);
+		if (mutex != buf_page_get_mutex(bpage)) {
+			mutex_exit(mutex);
+			mutex = buf_page_get_mutex(bpage);
+			goto retry_lock;
+		}
+		mutex_enter(&zip_free_mutex);
 
 		if (buf_page_can_relocate(bpage)) {
 			/* Relocate the compressed page. */
@@ -479,17 +547,48 @@ success:
 				buddy_stat->relocated_usec
 					+= ut_time_us(NULL) - usec;
 			}
+
+			if (!have_page_hash_mutex) {
+				mutex_exit(&LRU_list_mutex);
+				rw_lock_x_unlock(&page_hash_latch);
+			}
 			return(TRUE);
 		}
 
+		if (!have_page_hash_mutex) {
+			mutex_exit(&LRU_list_mutex);
+			rw_lock_x_unlock(&page_hash_latch);
+		}
+
 		mutex_exit(mutex);
 	} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
 		/* This must be a buf_page_t object. */
 		UNIV_MEM_ASSERT_RW(src, size);
+
+		mutex_exit(&zip_free_mutex);
+
+		if (!have_page_hash_mutex) {
+			mutex_enter(&LRU_list_mutex);
+			rw_lock_x_lock(&page_hash_latch);
+		}
+
 		if (buf_buddy_relocate_block(src, dst)) {
+			mutex_enter(&zip_free_mutex);
+
+			if (!have_page_hash_mutex) {
+				mutex_exit(&LRU_list_mutex);
+				rw_lock_x_unlock(&page_hash_latch);
+			}
 
 			goto success;
 		}
+
+		mutex_enter(&zip_free_mutex);
+
+		if (!have_page_hash_mutex) {
+			mutex_exit(&LRU_list_mutex);
+			rw_lock_x_unlock(&page_hash_latch);
+		}
 	}
 
 	return(FALSE);
@@ -503,12 +602,14 @@ buf_buddy_free_low(
 /*===============*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
-	ulint	i)	/* in: index of buf_pool->zip_free[] */
+	ulint	i,	/* in: index of buf_pool->zip_free[] */
+	ibool	have_page_hash_mutex)
 {
 	buf_page_t*	bpage;
 	buf_page_t*	buddy;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&zip_free_mutex));
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_ad(i <= BUF_BUDDY_SIZES);
 	ut_ad(buf_buddy_stat[i].used > 0);
@@ -519,7 +620,9 @@ recombine:
 	ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
 
 	if (i == BUF_BUDDY_SIZES) {
-		buf_buddy_block_free(buf);
+		mutex_exit(&zip_free_mutex);
+		buf_buddy_block_free(buf, have_page_hash_mutex);
+		mutex_enter(&zip_free_mutex);
 		return;
 	}
 
@@ -564,7 +667,7 @@ buddy_free2:
 		ut_a(bpage != buf);
 
 		{
-			buf_page_t*	next = UT_LIST_GET_NEXT(list, bpage);
+			buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
 			UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
 			bpage = next;
 		}
@@ -573,11 +676,11 @@ buddy_free2:
 #ifndef UNIV_DEBUG_VALGRIND
 buddy_nonfree:
 	/* Valgrind would complain about accessing free memory. */
-	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i]));
+	ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i]));
 #endif /* UNIV_DEBUG_VALGRIND */
 
 	/* The buddy is not free. Is there a free block of this size? */
-	bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
 
 	if (bpage) {
 		/* Remove the block from the free list, because a successful
@@ -587,7 +690,7 @@ buddy_nonfree:
 		buf_buddy_remove_from_free(bpage, i);
 
 		/* Try to relocate the buddy of buf to the free block. */
-		if (buf_buddy_relocate(buddy, bpage, i)) {
+		if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
 
 			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
 			goto buddy_free2;
@@ -608,14 +711,14 @@ buddy_nonfree:
 			(Parts of the buddy can be free in
 			buf_pool->zip_free[j] with j < i.)*/
 			for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
-			     b; b = UT_LIST_GET_NEXT(list, b)) {
+			     b; b = UT_LIST_GET_NEXT(zip_list, b)) {
 
 				ut_a(b != buddy);
 			}
 		}
 #endif /* UNIV_DEBUG && !UNIV_DEBUG_VALGRIND */
 
-		if (buf_buddy_relocate(buddy, buf, i)) {
+		if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
 
 			buf = bpage;
 			UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);

=== modified file 'storage/xtradb/buf/buf0buf.c'
--- a/storage/xtradb/buf/buf0buf.c	2009-05-04 04:32:30 +0000
+++ b/storage/xtradb/buf/buf0buf.c	2009-06-25 01:43:25 +0000
@@ -244,6 +244,12 @@ UNIV_INTERN buf_pool_t*	buf_pool = NULL;
 /* mutex protecting the buffer pool struct and control blocks, except the
 read-write lock in them */
 UNIV_INTERN mutex_t		buf_pool_mutex;
+UNIV_INTERN mutex_t		LRU_list_mutex;
+UNIV_INTERN mutex_t		flush_list_mutex;
+UNIV_INTERN rw_lock_t		page_hash_latch;
+UNIV_INTERN mutex_t		free_list_mutex;
+UNIV_INTERN mutex_t		zip_free_mutex;
+UNIV_INTERN mutex_t		zip_hash_mutex;
 /* mutex protecting the control blocks of compressed-only pages
 (of type buf_page_t, not buf_block_t) */
 UNIV_INTERN mutex_t		buf_pool_zip_mutex;
@@ -664,9 +670,9 @@ buf_block_init(
 	block->page.in_zip_hash = FALSE;
 	block->page.in_flush_list = FALSE;
 	block->page.in_free_list = FALSE;
-	block->in_unzip_LRU_list = FALSE;
 #endif /* UNIV_DEBUG */
 	block->page.in_LRU_list = FALSE;
+	block->in_unzip_LRU_list = FALSE;
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	block->n_pointers = 0;
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -751,8 +757,10 @@ buf_chunk_init(
 		memset(block->frame, '\0', UNIV_PAGE_SIZE);
 #endif
 		/* Add the block to the free list */
-		UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+		mutex_enter(&free_list_mutex);
+		UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
 		ut_d(block->page.in_free_list = TRUE);
+		mutex_exit(&free_list_mutex);
 
 		block++;
 		frame += UNIV_PAGE_SIZE;
@@ -778,7 +786,7 @@ buf_chunk_contains_zip(
 	ulint		i;
 
 	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 
 	block = chunk->blocks;
 
@@ -832,7 +840,7 @@ buf_chunk_not_freed(
 	ulint		i;
 
 	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own()); /*optimistic...*/
 
 	block = chunk->blocks;
 
@@ -865,7 +873,7 @@ buf_chunk_all_free(
 	ulint			i;
 
 	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
 
 	block = chunk->blocks;
 
@@ -891,7 +899,7 @@ buf_chunk_free(
 	buf_block_t*		block;
 	const buf_block_t*	block_end;
 
-	ut_ad(buf_pool_mutex_own());
+	ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
 
 	block_end = chunk->blocks + chunk->size;
 
@@ -903,8 +911,10 @@ buf_chunk_free(
 		ut_ad(!block->in_unzip_LRU_list);
 		ut_ad(!block->page.in_flush_list);
 		/* Remove the block from the free list. */
+		mutex_enter(&free_list_mutex);
 		ut_ad(block->page.in_free_list);
-		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+		UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
+		mutex_exit(&free_list_mutex);
 
 		/* Free the latches. */
 		mutex_free(&block->mutex);
@@ -935,8 +945,17 @@ buf_pool_init(void)
 	/* 1. Initialize general fields
 	------------------------------- */
 	mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
+	mutex_create(&LRU_list_mutex, SYNC_BUF_LRU_LIST);
+	mutex_create(&flush_list_mutex, SYNC_BUF_FLUSH_LIST);
+	rw_lock_create(&page_hash_latch, SYNC_BUF_PAGE_HASH);
+	mutex_create(&free_list_mutex, SYNC_BUF_FREE_LIST);
+	mutex_create(&zip_free_mutex, SYNC_BUF_ZIP_FREE);
+	mutex_create(&zip_hash_mutex, SYNC_BUF_ZIP_HASH);
+
 	mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
 
+	mutex_enter(&LRU_list_mutex);
+	rw_lock_x_lock(&page_hash_latch);
 	buf_pool_mutex_enter();
 
 	buf_pool->n_chunks = 1;
@@ -973,6 +992,8 @@ buf_pool_init(void)
 	--------------------------- */
 	/* All fields are initialized by mem_zalloc(). */
 
+	mutex_exit(&LRU_list_mutex);
+	rw_lock_x_unlock(&page_hash_latch);
 	buf_pool_mutex_exit();
 
 	btr_search_sys_create(buf_pool->curr_size
@@ -1105,7 +1126,11 @@ buf_relocate(
 	buf_page_t*	b;
 	ulint		fold;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
+#endif
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	ut_a(bpage->buf_fix_count == 0);
@@ -1186,7 +1211,8 @@ buf_pool_shrink(
 
 try_again:
 	btr_search_disable(); /* Empty the adaptive hash index again */
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 shrink_again:
 	if (buf_pool->n_chunks <= 1) {
@@ -1257,7 +1283,7 @@ shrink_again:
 
 				buf_LRU_make_block_old(&block->page);
 				dirty++;
-			} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
+			} else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
 				   != BUF_LRU_FREED) {
 				nonfree++;
 			}
@@ -1265,7 +1291,8 @@ shrink_again:
 			mutex_exit(&block->mutex);
 		}
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
 
 		/* Request for a flush of the chunk if it helps.
 		Do not flush if there are non-free blocks, since
@@ -1314,7 +1341,8 @@ shrink_again:
 func_done:
 	srv_buf_pool_old_size = srv_buf_pool_size;
 func_exit:
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 	btr_search_enable();
 }
 
@@ -1332,7 +1360,11 @@ buf_pool_page_hash_rebuild(void)
 	hash_table_t*	zip_hash;
 	buf_page_t*	b;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	rw_lock_x_lock(&page_hash_latch);
+	mutex_enter(&flush_list_mutex);
+	
 
 	/* Free, create, and populate the hash table. */
 	hash_table_free(buf_pool->page_hash);
@@ -1374,7 +1406,7 @@ buf_pool_page_hash_rebuild(void)
 	in buf_pool->flush_list. */
 
 	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-	     b = UT_LIST_GET_NEXT(list, b)) {
+	     b = UT_LIST_GET_NEXT(zip_list, b)) {
 		ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 		ut_ad(!b->in_flush_list);
 		ut_ad(b->in_LRU_list);
@@ -1386,7 +1418,7 @@ buf_pool_page_hash_rebuild(void)
 	}
 
 	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-	     b = UT_LIST_GET_NEXT(list, b)) {
+	     b = UT_LIST_GET_NEXT(flush_list, b)) {
 		ut_ad(b->in_flush_list);
 		ut_ad(b->in_LRU_list);
 		ut_ad(b->in_page_hash);
@@ -1412,7 +1444,10 @@ buf_pool_page_hash_rebuild(void)
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
+	rw_lock_x_unlock(&page_hash_latch);
+	mutex_exit(&flush_list_mutex);
 }
 
 /************************************************************************
@@ -1422,17 +1457,20 @@ void
 buf_pool_resize(void)
 /*=================*/
 {
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 	if (srv_buf_pool_old_size == srv_buf_pool_size) {
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
 		return;
 	}
 
 	if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
 
 		/* Disable adaptive hash indexes and empty the index
 		in order to free up memory in the buffer pool chunks. */
@@ -1466,7 +1504,8 @@ buf_pool_resize(void)
 		}
 
 		srv_buf_pool_old_size = srv_buf_pool_size;
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
 	}
 
 	buf_pool_page_hash_rebuild();
@@ -1488,12 +1527,14 @@ buf_block_make_young(
 
 	if (buf_page_peek_if_too_old(bpage)) {
 
-		buf_pool_mutex_enter();
+		//buf_pool_mutex_enter();
+		mutex_enter(&LRU_list_mutex);
 		/* There has been freeing activity in the LRU list:
 		best to move to the head of the LRU list */
 
 		buf_LRU_make_block_young(bpage);
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
 	}
 }
 
@@ -1507,13 +1548,15 @@ buf_page_make_young(
 /*================*/
 	buf_page_t*	bpage)	/* in: buffer block of a file page */
 {
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 	ut_a(buf_page_in_file(bpage));
 
 	buf_LRU_make_block_young(bpage);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 }
 
 /************************************************************************
@@ -1528,7 +1571,8 @@ buf_reset_check_index_page_at_flush(
 {
 	buf_block_t*	block;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_s_lock(&page_hash_latch);
 
 	block = (buf_block_t*) buf_page_hash_get(space, offset);
 
@@ -1536,7 +1580,8 @@ buf_reset_check_index_page_at_flush(
 		block->check_index_page_at_flush = FALSE;
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 }
 
 /************************************************************************
@@ -1555,7 +1600,8 @@ buf_page_peek_if_search_hashed(
 	buf_block_t*	block;
 	ibool		is_hashed;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_s_lock(&page_hash_latch);
 
 	block = (buf_block_t*) buf_page_hash_get(space, offset);
 
@@ -1565,7 +1611,8 @@ buf_page_peek_if_search_hashed(
 		is_hashed = block->is_hashed;
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 
 	return(is_hashed);
 }
@@ -1587,7 +1634,8 @@ buf_page_set_file_page_was_freed(
 {
 	buf_page_t*	bpage;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_s_lock(&page_hash_latch);
 
 	bpage = buf_page_hash_get(space, offset);
 
@@ -1595,7 +1643,8 @@ buf_page_set_file_page_was_freed(
 		bpage->file_page_was_freed = TRUE;
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 
 	return(bpage);
 }
@@ -1616,7 +1665,8 @@ buf_page_reset_file_page_was_freed(
 {
 	buf_page_t*	bpage;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_s_lock(&page_hash_latch);
 
 	bpage = buf_page_hash_get(space, offset);
 
@@ -1624,7 +1674,8 @@ buf_page_reset_file_page_was_freed(
 		bpage->file_page_was_freed = FALSE;
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 
 	return(bpage);
 }
@@ -1657,8 +1708,9 @@ buf_page_get_zip(
 	buf_pool->n_page_gets++;
 
 	for (;;) {
-		buf_pool_mutex_enter();
+		//buf_pool_mutex_enter();
 lookup:
+		rw_lock_s_lock(&page_hash_latch);
 		bpage = buf_page_hash_get(space, offset);
 		if (bpage) {
 			break;
@@ -1666,7 +1718,8 @@ lookup:
 
 		/* Page not in buf_pool: needs to be read from file */
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		rw_lock_s_unlock(&page_hash_latch);
 
 		buf_read_page(space, zip_size, offset);
 
@@ -1677,12 +1730,21 @@ lookup:
 
 	if (UNIV_UNLIKELY(!bpage->zip.data)) {
 		/* There is no compressed page. */
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		rw_lock_s_unlock(&page_hash_latch);
 		return(NULL);
 	}
 
 	block_mutex = buf_page_get_mutex(bpage);
+retry_lock:
 	mutex_enter(block_mutex);
+	if (block_mutex != buf_page_get_mutex(bpage)) {
+		mutex_exit(block_mutex);
+		block_mutex = buf_page_get_mutex(bpage);
+		goto retry_lock;
+	}
+
+	rw_lock_s_unlock(&page_hash_latch);
 
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_NOT_USED:
@@ -1698,7 +1760,7 @@ lookup:
 		break;
 	case BUF_BLOCK_FILE_PAGE:
 		/* Discard the uncompressed page frame if possible. */
-		if (buf_LRU_free_block(bpage, FALSE, NULL)
+		if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
 		    == BUF_LRU_FREED) {
 
 			mutex_exit(block_mutex);
@@ -1712,7 +1774,7 @@ lookup:
 
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
 
 	buf_page_set_accessed(bpage, TRUE);
 
@@ -1943,7 +2005,7 @@ buf_block_is_uncompressed(
 	const buf_chunk_t*		chunk	= buf_pool->chunks;
 	const buf_chunk_t* const	echunk	= chunk + buf_pool->n_chunks;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 
 	if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
 		/* The pointer should be aligned. */
@@ -1986,6 +2048,7 @@ buf_page_get_gen(
 	ibool		accessed;
 	ulint		fix_type;
 	ibool		must_read;
+	mutex_t*	block_mutex;
 
 	ut_ad(mtr);
 	ut_ad((rw_latch == RW_S_LATCH)
@@ -2001,9 +2064,18 @@ buf_page_get_gen(
 	buf_pool->n_page_gets++;
 loop:
 	block = guess;
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
 
 	if (block) {
+		block_mutex = buf_page_get_mutex((buf_page_t*)block);
+retry_lock_1:
+		mutex_enter(block_mutex);
+		if (block_mutex != buf_page_get_mutex((buf_page_t*)block)) {
+			mutex_exit(block_mutex);
+			block_mutex = buf_page_get_mutex((buf_page_t*)block);
+			goto retry_lock_1;
+		}
+
 		/* If the guess is a compressed page descriptor that
 		has been allocated by buf_buddy_alloc(), it may have
 		been invalidated by buf_buddy_relocate().  In that
@@ -2017,6 +2089,8 @@ loop:
 		    || space != block->page.space
 		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
+			mutex_exit(block_mutex);
+
 			block = guess = NULL;
 		} else {
 			ut_ad(!block->page.in_zip_hash);
@@ -2025,14 +2099,26 @@ loop:
 	}
 
 	if (block == NULL) {
+		rw_lock_s_lock(&page_hash_latch);
 		block = (buf_block_t*) buf_page_hash_get(space, offset);
+		if (block) {
+			block_mutex = buf_page_get_mutex((buf_page_t*)block);
+retry_lock_2:
+			mutex_enter(block_mutex);
+			if (block_mutex != buf_page_get_mutex((buf_page_t*)block)) {
+				mutex_exit(block_mutex);
+				block_mutex = buf_page_get_mutex((buf_page_t*)block);
+				goto retry_lock_2;
+			}
+		}
+		rw_lock_s_unlock(&page_hash_latch);
 	}
 
 loop2:
 	if (block == NULL) {
 		/* Page not in buf_pool: needs to be read from file */
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
 
 		if (mode == BUF_GET_IF_IN_POOL) {
 
@@ -2053,7 +2139,8 @@ loop2:
 
 	if (must_read && mode == BUF_GET_IF_IN_POOL) {
 		/* The page is only being read to buffer */
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(block_mutex);
 
 		return(NULL);
 	}
@@ -2063,10 +2150,16 @@ loop2:
 		ibool		success;
 
 	case BUF_BLOCK_FILE_PAGE:
+		if (block_mutex == &buf_pool_zip_mutex) {
+			/* it is wrong mutex... */
+			mutex_exit(block_mutex);
+			goto loop;
+		}
 		break;
 
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
+		ut_ad(block_mutex == &buf_pool_zip_mutex);
 		bpage = &block->page;
 
 		if (bpage->buf_fix_count
@@ -2077,20 +2170,25 @@ loop2:
 wait_until_unfixed:
 			/* The block is buffer-fixed or I/O-fixed.
 			Try again later. */
-			buf_pool_mutex_exit();
+			//buf_pool_mutex_exit();
+			mutex_exit(block_mutex);
 			os_thread_sleep(WAIT_FOR_READ);
 
 			goto loop;
 		}
 
 		/* Allocate an uncompressed page. */
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(block_mutex);
 
 		block = buf_LRU_get_free_block(0);
 		ut_a(block);
+		block_mutex = &block->mutex;
 
-		buf_pool_mutex_enter();
-		mutex_enter(&block->mutex);
+		//buf_pool_mutex_enter();
+		mutex_enter(&LRU_list_mutex);
+		rw_lock_x_lock(&page_hash_latch);
+		mutex_enter(block_mutex);
 
 		{
 			buf_page_t*	hash_bpage
@@ -2101,35 +2199,55 @@ wait_until_unfixed:
 				while buf_pool_mutex was released.
 				Free the block that was allocated. */
 
-				buf_LRU_block_free_non_file_page(block);
-				mutex_exit(&block->mutex);
+				buf_LRU_block_free_non_file_page(block, TRUE);
+				mutex_exit(block_mutex);
 
 				block = (buf_block_t*) hash_bpage;
+				if (block) {
+					block_mutex = buf_page_get_mutex((buf_page_t*)block);
+retry_lock_3:
+					mutex_enter(block_mutex);
+					if (block_mutex != buf_page_get_mutex((buf_page_t*)block)) {
+						mutex_exit(block_mutex);
+						block_mutex = buf_page_get_mutex((buf_page_t*)block);
+						goto retry_lock_3;
+					}
+				}
+				rw_lock_x_unlock(&page_hash_latch);
+				mutex_exit(&LRU_list_mutex);
 				goto loop2;
 			}
 		}
 
+		mutex_enter(&buf_pool_zip_mutex);
+
 		if (UNIV_UNLIKELY
 		    (bpage->buf_fix_count
 		     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
 
+			mutex_exit(&buf_pool_zip_mutex);
 			/* The block was buffer-fixed or I/O-fixed
 			while buf_pool_mutex was not held by this thread.
 			Free the block that was allocated and try again.
 			This should be extremely unlikely. */
 
-			buf_LRU_block_free_non_file_page(block);
-			mutex_exit(&block->mutex);
+			buf_LRU_block_free_non_file_page(block, TRUE);
+			//mutex_exit(&block->mutex);
 
+			rw_lock_x_unlock(&page_hash_latch);
+			mutex_exit(&LRU_list_mutex);
 			goto wait_until_unfixed;
 		}
 
 		/* Move the compressed page from bpage to block,
 		and uncompress it. */
 
-		mutex_enter(&buf_pool_zip_mutex);
+		mutex_enter(&flush_list_mutex);
 
 		buf_relocate(bpage, &block->page);
+
+		rw_lock_x_unlock(&page_hash_latch);
+
 		buf_block_init_low(block);
 		block->lock_hash_val = lock_rec_hash(space, offset);
 
@@ -2138,29 +2256,31 @@ wait_until_unfixed:
 
 		if (buf_page_get_state(&block->page)
 		    == BUF_BLOCK_ZIP_PAGE) {
-			UT_LIST_REMOVE(list, buf_pool->zip_clean,
+			UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
 				       &block->page);
 			ut_ad(!block->page.in_flush_list);
 		} else {
 			/* Relocate buf_pool->flush_list. */
 			buf_page_t*	b;
 
-			b = UT_LIST_GET_PREV(list, &block->page);
+			b = UT_LIST_GET_PREV(flush_list, &block->page);
 			ut_ad(block->page.in_flush_list);
-			UT_LIST_REMOVE(list, buf_pool->flush_list,
+			UT_LIST_REMOVE(flush_list, buf_pool->flush_list,
 				       &block->page);
 
 			if (b) {
 				UT_LIST_INSERT_AFTER(
-					list, buf_pool->flush_list, b,
+					flush_list, buf_pool->flush_list, b,
 					&block->page);
 			} else {
 				UT_LIST_ADD_FIRST(
-					list, buf_pool->flush_list,
+					flush_list, buf_pool->flush_list,
 					&block->page);
 			}
 		}
 
+		mutex_exit(&flush_list_mutex);
+
 		/* Buffer-fix, I/O-fix, and X-latch the block
 		for the duration of the decompression.
 		Also add the block to the unzip_LRU list. */
@@ -2169,16 +2289,22 @@ wait_until_unfixed:
 		/* Insert at the front of unzip_LRU list */
 		buf_unzip_LRU_add_block(block, FALSE);
 
+		mutex_exit(&LRU_list_mutex);
+
 		block->page.buf_fix_count = 1;
 		buf_block_set_io_fix(block, BUF_IO_READ);
+
+		mutex_enter(&buf_pool_mutex);
 		buf_pool->n_pend_unzip++;
+		mutex_exit(&buf_pool_mutex);
+
 		rw_lock_x_lock(&block->lock);
-		mutex_exit(&block->mutex);
+		mutex_exit(block_mutex);
 		mutex_exit(&buf_pool_zip_mutex);
 
-		buf_buddy_free(bpage, sizeof *bpage);
+		buf_buddy_free(bpage, sizeof *bpage, FALSE);
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
 
 		/* Decompress the page and apply buffered operations
 		while not holding buf_pool_mutex or block->mutex. */
@@ -2190,17 +2316,21 @@ wait_until_unfixed:
 		}
 
 		/* Unfix and unlatch the block. */
-		buf_pool_mutex_enter();
-		mutex_enter(&block->mutex);
+		//buf_pool_mutex_enter();
+		block_mutex = &block->mutex;
+		mutex_enter(block_mutex);
+		mutex_enter(&buf_pool_mutex);
 		buf_pool->n_pend_unzip--;
+		mutex_exit(&buf_pool_mutex);
 		block->page.buf_fix_count--;
 		buf_block_set_io_fix(block, BUF_IO_NONE);
-		mutex_exit(&block->mutex);
+		//mutex_exit(&block->mutex);
 		rw_lock_x_unlock(&block->lock);
 
 		if (UNIV_UNLIKELY(!success)) {
 
-			buf_pool_mutex_exit();
+			//buf_pool_mutex_exit();
+			mutex_exit(block_mutex);
 			return(NULL);
 		}
 
@@ -2217,11 +2347,11 @@ wait_until_unfixed:
 
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
-	mutex_enter(&block->mutex);
+	//mutex_enter(&block->mutex);
 	UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
 
 	buf_block_buf_fix_inc(block, file, line);
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
 
 	/* Check if this is the first access to the page */
 
@@ -2229,7 +2359,7 @@ wait_until_unfixed:
 
 	buf_page_set_accessed(&block->page, TRUE);
 
-	mutex_exit(&block->mutex);
+	mutex_exit(block_mutex);
 
 	buf_block_make_young(&block->page);
 
@@ -2515,16 +2645,19 @@ buf_page_try_get_func(
 	ibool		success;
 	ulint		fix_type;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_s_lock(&page_hash_latch);
 	block = buf_block_hash_get(space_id, page_no);
 
 	if (!block) {
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		rw_lock_s_unlock(&page_hash_latch);
 		return(NULL);
 	}
 
 	mutex_enter(&block->mutex);
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
@@ -2644,7 +2777,10 @@ buf_page_init(
 {
 	buf_page_t*	hash_page;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
+#endif
 	ut_ad(mutex_own(&(block->mutex)));
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
@@ -2677,7 +2813,8 @@ buf_page_init(
 			(const void*) hash_page, (const void*) block);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		mutex_exit(&block->mutex);
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		rw_lock_x_unlock(&page_hash_latch);
 		buf_print();
 		buf_LRU_print();
 		buf_validate();
@@ -2756,16 +2893,24 @@ buf_page_init_for_read(
 		ut_ad(block);
 	}
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	rw_lock_x_lock(&page_hash_latch);
 
 	if (buf_page_hash_get(space, offset)) {
 		/* The page is already in the buffer pool. */
 err_exit:
 		if (block) {
 			mutex_enter(&block->mutex);
-			buf_LRU_block_free_non_file_page(block);
+			mutex_exit(&LRU_list_mutex);
+			rw_lock_x_unlock(&page_hash_latch);
+			buf_LRU_block_free_non_file_page(block, FALSE);
 			mutex_exit(&block->mutex);
 		}
+		else {
+			mutex_exit(&LRU_list_mutex);
+			rw_lock_x_unlock(&page_hash_latch);
+		}
 
 		bpage = NULL;
 		goto func_exit;
@@ -2785,6 +2930,8 @@ err_exit:
 		mutex_enter(&block->mutex);
 		buf_page_init(space, offset, block);
 
+		rw_lock_x_unlock(&page_hash_latch);
+
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 
@@ -2812,7 +2959,7 @@ err_exit:
 			been added to buf_pool->LRU and
 			buf_pool->page_hash. */
 			mutex_exit(&block->mutex);
-			data = buf_buddy_alloc(zip_size, &lru);
+			data = buf_buddy_alloc(zip_size, &lru, FALSE);
 			mutex_enter(&block->mutex);
 			block->page.zip.data = data;
 
@@ -2825,6 +2972,7 @@ err_exit:
 			buf_unzip_LRU_add_block(block, TRUE);
 		}
 
+		mutex_exit(&LRU_list_mutex);
 		mutex_exit(&block->mutex);
 	} else {
 		/* Defer buf_buddy_alloc() until after the block has
@@ -2836,8 +2984,8 @@ err_exit:
 		control block (bpage), in order to avoid the
 		invocation of buf_buddy_relocate_block() on
 		uninitialized data. */
-		data = buf_buddy_alloc(zip_size, &lru);
-		bpage = buf_buddy_alloc(sizeof *bpage, &lru);
+		data = buf_buddy_alloc(zip_size, &lru, TRUE);
+		bpage = buf_buddy_alloc(sizeof *bpage, &lru, TRUE);
 
 		/* If buf_buddy_alloc() allocated storage from the LRU list,
 		it released and reacquired buf_pool_mutex.  Thus, we must
@@ -2846,8 +2994,11 @@ err_exit:
 		    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
 
 			/* The block was added by some other thread. */
-			buf_buddy_free(bpage, sizeof *bpage);
-			buf_buddy_free(data, zip_size);
+			buf_buddy_free(bpage, sizeof *bpage, TRUE);
+			buf_buddy_free(data, zip_size, TRUE);
+
+			mutex_exit(&LRU_list_mutex);
+			rw_lock_x_unlock(&page_hash_latch);
 
 			bpage = NULL;
 			goto func_exit;
@@ -2877,18 +3028,26 @@ err_exit:
 		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 			    buf_page_address_fold(space, offset), bpage);
 
+		rw_lock_x_unlock(&page_hash_latch);
+
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+		mutex_enter(&flush_list_mutex);
 		buf_LRU_insert_zip_clean(bpage);
+		mutex_exit(&flush_list_mutex);
+
+		mutex_exit(&LRU_list_mutex);
 
 		buf_page_set_io_fix(bpage, BUF_IO_READ);
 
 		mutex_exit(&buf_pool_zip_mutex);
 	}
 
+	mutex_enter(&buf_pool_mutex);
 	buf_pool->n_pend_reads++;
+	mutex_exit(&buf_pool_mutex);
 func_exit:
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
 
 	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 
@@ -2924,7 +3083,9 @@ buf_page_create(
 
 	free_block = buf_LRU_get_free_block(0);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	rw_lock_x_lock(&page_hash_latch);
 
 	block = (buf_block_t*) buf_page_hash_get(space, offset);
 
@@ -2937,7 +3098,9 @@ buf_page_create(
 #endif /* UNIV_DEBUG_FILE_ACCESSES */
 
 		/* Page can be found in buf_pool */
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
+		rw_lock_x_unlock(&page_hash_latch);
 
 		buf_block_free(free_block);
 
@@ -2959,6 +3122,7 @@ buf_page_create(
 	mutex_enter(&block->mutex);
 
 	buf_page_init(space, offset, block);
+	rw_lock_x_unlock(&page_hash_latch);
 
 	/* The block must be put to the LRU list */
 	buf_LRU_add_block(&block->page, FALSE);
@@ -2985,7 +3149,7 @@ buf_page_create(
 		the reacquisition of buf_pool_mutex.  We also must
 		defer this operation until after the block descriptor
 		has been added to buf_pool->LRU and buf_pool->page_hash. */
-		data = buf_buddy_alloc(zip_size, &lru);
+		data = buf_buddy_alloc(zip_size, &lru, FALSE);
 		mutex_enter(&block->mutex);
 		block->page.zip.data = data;
 
@@ -3001,7 +3165,8 @@ buf_page_create(
 		rw_lock_x_unlock(&block->lock);
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 
 	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
 
@@ -3053,6 +3218,8 @@ buf_page_io_complete(
 	enum buf_io_fix	io_type;
 	const ibool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
+	enum buf_flush	flush_type;
+	mutex_t*	block_mutex;
 
 	ut_a(buf_page_in_file(bpage));
 
@@ -3187,8 +3354,23 @@ corrupt:
 		}
 	}
 
-	buf_pool_mutex_enter();
-	mutex_enter(buf_page_get_mutex(bpage));
+	//buf_pool_mutex_enter();
+	if (io_type == BUF_IO_WRITE) {
+		flush_type = buf_page_get_flush_type(bpage);
+		/* to keep consistency at buf_LRU_insert_zip_clean() */
+		//if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
+			mutex_enter(&LRU_list_mutex);
+		//}
+	}
+	block_mutex = buf_page_get_mutex(bpage);
+retry_lock:
+	mutex_enter(block_mutex);
+	if (block_mutex != buf_page_get_mutex(bpage)) {
+		mutex_exit(block_mutex);
+		block_mutex = buf_page_get_mutex(bpage);
+		goto retry_lock;
+	}
+	mutex_enter(&buf_pool_mutex);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
 	if (io_type == BUF_IO_WRITE || uncompressed) {
@@ -3228,6 +3410,11 @@ corrupt:
 
 		buf_flush_write_complete(bpage);
 
+		/* to keep consistency at buf_LRU_insert_zip_clean() */
+		//if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
+			mutex_exit(&LRU_list_mutex);
+		//}
+
 		if (uncompressed) {
 			rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
 					     BUF_IO_WRITE);
@@ -3250,8 +3437,9 @@ corrupt:
 	}
 #endif /* UNIV_DEBUG */
 
-	mutex_exit(buf_page_get_mutex(bpage));
-	buf_pool_mutex_exit();
+	mutex_exit(&buf_pool_mutex);
+	mutex_exit(block_mutex);
+	//buf_pool_mutex_exit();
 }
 
 /*************************************************************************
@@ -3273,12 +3461,14 @@ buf_pool_invalidate(void)
 		freed = buf_LRU_search_and_free_block(100);
 	}
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
 	ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 }
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -3302,7 +3492,10 @@ buf_validate(void)
 
 	ut_ad(buf_pool);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	rw_lock_x_lock(&page_hash_latch);
+	/* for keep the new latch order, it cannot validate correctly... */
 
 	chunk = buf_pool->chunks;
 
@@ -3401,7 +3594,7 @@ buf_validate(void)
 	/* Check clean compressed-only blocks. */
 
 	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-	     b = UT_LIST_GET_NEXT(list, b)) {
+	     b = UT_LIST_GET_NEXT(zip_list, b)) {
 		ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 		switch (buf_page_get_io_fix(b)) {
 		case BUF_IO_NONE:
@@ -3426,8 +3619,9 @@ buf_validate(void)
 
 	/* Check dirty compressed-only blocks. */
 
+	mutex_enter(&flush_list_mutex);
 	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-	     b = UT_LIST_GET_NEXT(list, b)) {
+	     b = UT_LIST_GET_NEXT(flush_list, b)) {
 		ut_ad(b->in_flush_list);
 
 		switch (buf_page_get_state(b)) {
@@ -3472,6 +3666,7 @@ buf_validate(void)
 		}
 		ut_a(buf_page_hash_get(b->space, b->offset) == b);
 	}
+	mutex_exit(&flush_list_mutex);
 
 	mutex_exit(&buf_pool_zip_mutex);
 
@@ -3483,19 +3678,27 @@ buf_validate(void)
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
+	/* because of latching order with block->mutex, we cannot get free_list_mutex before that */
+/*
 	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
 		fprintf(stderr, "Free list len %lu, free blocks %lu\n",
 			(ulong) UT_LIST_GET_LEN(buf_pool->free),
 			(ulong) n_free);
 		ut_error;
 	}
+*/
+	/* because of latching order with block->mutex, we cannot get flush_list_mutex before that */
+/*
 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
 
 	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
 	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
 	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
+*/
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
+	rw_lock_x_unlock(&page_hash_latch);
 
 	ut_a(buf_LRU_validate());
 	ut_a(buf_flush_validate());
@@ -3529,7 +3732,10 @@ buf_print(void)
 	index_ids = mem_alloc(sizeof(dulint) * size);
 	counts = mem_alloc(sizeof(ulint) * size);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	mutex_enter(&free_list_mutex);
+	mutex_enter(&flush_list_mutex);
 
 	fprintf(stderr,
 		"buf_pool size %lu\n"
@@ -3592,7 +3798,10 @@ buf_print(void)
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
+	mutex_exit(&free_list_mutex);
+	mutex_exit(&flush_list_mutex);
 
 	for (i = 0; i < n_found; i++) {
 		index = dict_index_get_if_in_cache(index_ids[i]);
@@ -3630,7 +3839,7 @@ buf_get_latched_pages_number(void)
 	ulint		i;
 	ulint		fixed_pages_number = 0;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
 
 	chunk = buf_pool->chunks;
 
@@ -3664,7 +3873,7 @@ buf_get_latched_pages_number(void)
 	/* Traverse the lists of clean and dirty compressed-only blocks. */
 
 	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-	     b = UT_LIST_GET_NEXT(list, b)) {
+	     b = UT_LIST_GET_NEXT(zip_list, b)) {
 		ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 		ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
 
@@ -3674,8 +3883,9 @@ buf_get_latched_pages_number(void)
 		}
 	}
 
+	mutex_enter(&flush_list_mutex);
 	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-	     b = UT_LIST_GET_NEXT(list, b)) {
+	     b = UT_LIST_GET_NEXT(flush_list, b)) {
 		ut_ad(b->in_flush_list);
 
 		switch (buf_page_get_state(b)) {
@@ -3698,9 +3908,10 @@ buf_get_latched_pages_number(void)
 			break;
 		}
 	}
+	mutex_exit(&flush_list_mutex);
 
 	mutex_exit(&buf_pool_zip_mutex);
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
 
 	return(fixed_pages_number);
 }
@@ -3757,7 +3968,11 @@ buf_print_io(
 	ut_ad(buf_pool);
 	size = buf_pool->curr_size;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	mutex_enter(&free_list_mutex);
+	mutex_enter(&buf_pool_mutex);
+	mutex_enter(&flush_list_mutex);
 
 	fprintf(file,
 		"Buffer pool size        %lu\n"
@@ -3824,7 +4039,11 @@ buf_print_io(
 		buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
 		buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
+	mutex_exit(&free_list_mutex);
+	mutex_exit(&buf_pool_mutex);
+	mutex_exit(&flush_list_mutex);
 }
 
 /**************************************************************************
@@ -3853,7 +4072,7 @@ buf_all_freed(void)
 
 	ut_ad(buf_pool);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter(); /* optimistic */
 
 	chunk = buf_pool->chunks;
 
@@ -3870,7 +4089,7 @@ buf_all_freed(void)
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit(); /* optimistic */
 
 	return(TRUE);
 }
@@ -3886,7 +4105,8 @@ buf_pool_check_no_pending_io(void)
 {
 	ibool	ret;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
 
 	if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
 	    + buf_pool->n_flush[BUF_FLUSH_LIST]
@@ -3896,7 +4116,8 @@ buf_pool_check_no_pending_io(void)
 		ret = TRUE;
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&buf_pool_mutex);
 
 	return(ret);
 }
@@ -3910,11 +4131,13 @@ buf_get_free_list_len(void)
 {
 	ulint	len;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&free_list_mutex);
 
 	len = UT_LIST_GET_LEN(buf_pool->free);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&free_list_mutex);
 
 	return(len);
 }

=== modified file 'storage/xtradb/buf/buf0flu.c'
--- a/storage/xtradb/buf/buf0flu.c	2009-05-04 04:32:30 +0000
+++ b/storage/xtradb/buf/buf0flu.c	2009-06-25 01:43:25 +0000
@@ -61,7 +61,9 @@ buf_flush_insert_into_flush_list(
 /*=============================*/
 	buf_block_t*	block)	/* in/out: block which is modified */
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&block->mutex));
+	ut_ad(mutex_own(&flush_list_mutex));
 	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
 	      || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
 		  <= block->page.oldest_modification));
@@ -72,7 +74,7 @@ buf_flush_insert_into_flush_list(
 	ut_ad(!block->page.in_zip_hash);
 	ut_ad(!block->page.in_flush_list);
 	ut_d(block->page.in_flush_list = TRUE);
-	UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+	UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_low());
@@ -92,7 +94,9 @@ buf_flush_insert_sorted_into_flush_list(
 	buf_page_t*	prev_b;
 	buf_page_t*	b;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&block->mutex));
+	ut_ad(mutex_own(&flush_list_mutex));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
 	ut_ad(block->page.in_LRU_list);
@@ -107,13 +111,13 @@ buf_flush_insert_sorted_into_flush_list(
 	while (b && b->oldest_modification > block->page.oldest_modification) {
 		ut_ad(b->in_flush_list);
 		prev_b = b;
-		b = UT_LIST_GET_NEXT(list, b);
+		b = UT_LIST_GET_NEXT(flush_list, b);
 	}
 
 	if (prev_b == NULL) {
-		UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+		UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
 	} else {
-		UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
+		UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
 				     prev_b, &block->page);
 	}
 
@@ -134,7 +138,7 @@ buf_flush_ready_for_replace(
 				buf_page_in_file(bpage) and in the LRU list */
 {
 	//ut_ad(buf_pool_mutex_own());
-	//ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	//ut_ad(bpage->in_LRU_list); /* optimistic use */
 
 	if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
@@ -169,12 +173,12 @@ buf_flush_ready_for_flush(
 				buf_page_in_file(bpage) */
 	enum buf_flush	flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
 {
-	ut_a(buf_page_in_file(bpage));
-	ut_ad(buf_pool_mutex_own());
+	//ut_a(buf_page_in_file(bpage));
+	//ut_ad(buf_pool_mutex_own()); /*optimistic...*/
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
 
-	if (bpage->oldest_modification != 0
+	if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
 	    && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
 		ut_ad(bpage->in_flush_list);
 
@@ -203,8 +207,11 @@ buf_flush_remove(
 /*=============*/
 	buf_page_t*	bpage)	/* in: pointer to the block in question */
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+	mutex_enter(&flush_list_mutex);
+
 	ut_ad(bpage->in_flush_list);
 	ut_d(bpage->in_flush_list = FALSE);
 
@@ -216,21 +223,23 @@ buf_flush_remove(
 	case BUF_BLOCK_READY_FOR_USE:
 	case BUF_BLOCK_MEMORY:
 	case BUF_BLOCK_REMOVE_HASH:
+		mutex_exit(&flush_list_mutex);
 		ut_error;
 		return;
 	case BUF_BLOCK_ZIP_DIRTY:
 		buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
-		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
 		buf_LRU_insert_zip_clean(bpage);
 		break;
 	case BUF_BLOCK_FILE_PAGE:
-		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
 		break;
 	}
 
 	bpage->oldest_modification = 0;
 
-	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
+	ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list));
+	mutex_exit(&flush_list_mutex);
 }
 
 /************************************************************************
@@ -678,7 +687,9 @@ buf_flush_write_block_low(
 	io_fixed and oldest_modification != 0.  Thus, it cannot be
 	relocated in the buffer pool or removed from flush_list or
 	LRU_list. */
-	ut_ad(!buf_pool_mutex_own());
+	//ut_ad(!buf_pool_mutex_own());
+	ut_ad(!mutex_own(&LRU_list_mutex));
+	ut_ad(!mutex_own(&flush_list_mutex));
 	ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
 	ut_ad(bpage->oldest_modification != 0);
@@ -762,12 +773,19 @@ buf_flush_page(
 	ibool		is_uncompressed;
 
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
+	      || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
+#endif
 	ut_ad(buf_page_in_file(bpage));
 
 	block_mutex = buf_page_get_mutex(bpage);
 	ut_ad(mutex_own(block_mutex));
 
+	mutex_enter(&buf_pool_mutex);
+	rw_lock_s_unlock(&page_hash_latch);
+
 	ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
 
 	buf_page_set_io_fix(bpage, BUF_IO_WRITE);
@@ -798,7 +816,8 @@ buf_flush_page(
 		}
 
 		mutex_exit(block_mutex);
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 
 		/* Even though bpage is not protected by any mutex at
 		this point, it is safe to access bpage, because it is
@@ -835,7 +854,8 @@ buf_flush_page(
 		immediately. */
 
 		mutex_exit(block_mutex);
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 		break;
 
 	default:
@@ -899,7 +919,8 @@ buf_flush_try_neighbors(
 		high = fil_space_get_size(space);
 	}
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_s_lock(&page_hash_latch);
 
 	for (i = low; i < high; i++) {
 
@@ -920,7 +941,13 @@ buf_flush_try_neighbors(
 		    || buf_page_is_old(bpage)) {
 			mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
+retry_lock:
 			mutex_enter(block_mutex);
+			if (block_mutex != buf_page_get_mutex(bpage)) {
+				mutex_exit(block_mutex);
+				block_mutex = buf_page_get_mutex(bpage);
+				goto retry_lock;
+			}
 
 			if (buf_flush_ready_for_flush(bpage, flush_type)
 			    && (i == offset || !bpage->buf_fix_count)) {
@@ -936,14 +963,16 @@ buf_flush_try_neighbors(
 				ut_ad(!mutex_own(block_mutex));
 				count++;
 
-				buf_pool_mutex_enter();
+				//buf_pool_mutex_enter();
+				rw_lock_s_lock(&page_hash_latch);
 			} else {
 				mutex_exit(block_mutex);
 			}
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 
 	return(count);
 }
@@ -980,6 +1009,7 @@ buf_flush_batch(
 	ulint		old_page_count;
 	ulint		space;
 	ulint		offset;
+	ulint		remaining	= 0;
 
 	ut_ad((flush_type == BUF_FLUSH_LRU)
 	      || (flush_type == BUF_FLUSH_LIST));
@@ -987,20 +1017,28 @@ buf_flush_batch(
 	ut_ad((flush_type != BUF_FLUSH_LIST)
 	      || sync_thread_levels_empty_gen(TRUE));
 #endif /* UNIV_SYNC_DEBUG */
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
 
 	if ((buf_pool->n_flush[flush_type] > 0)
 	    || (buf_pool->init_flush[flush_type] == TRUE)) {
 
 		/* There is already a flush batch of the same type running */
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 
 		return(ULINT_UNDEFINED);
 	}
 
 	buf_pool->init_flush[flush_type] = TRUE;
 
+	mutex_exit(&buf_pool_mutex);
+
+	if (flush_type == BUF_FLUSH_LRU) {
+		mutex_enter(&LRU_list_mutex);
+	}
+
 	for (;;) {
 flush_next:
 		/* If we have flushed enough, leave the loop */
@@ -1017,7 +1055,10 @@ flush_next:
 		} else {
 			ut_ad(flush_type == BUF_FLUSH_LIST);
 
+			mutex_enter(&flush_list_mutex);
+			remaining = UT_LIST_GET_LEN(buf_pool->flush_list);
 			bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+			mutex_exit(&flush_list_mutex);
 			if (!bpage
 			    || bpage->oldest_modification >= lsn_limit) {
 				/* We have flushed enough */
@@ -1037,9 +1078,15 @@ flush_next:
 			mutex_t*block_mutex = buf_page_get_mutex(bpage);
 			ibool	ready;
 
+retry_lock_1:
 			ut_a(buf_page_in_file(bpage));
 
 			mutex_enter(block_mutex);
+			if (block_mutex != buf_page_get_mutex(bpage)) {
+				mutex_exit(block_mutex);
+				block_mutex = buf_page_get_mutex(bpage);
+				goto retry_lock_1;
+			}
 			ready = buf_flush_ready_for_flush(bpage, flush_type);
 			mutex_exit(block_mutex);
 
@@ -1047,7 +1094,10 @@ flush_next:
 				space = buf_page_get_space(bpage);
 				offset = buf_page_get_page_no(bpage);
 
-				buf_pool_mutex_exit();
+				//buf_pool_mutex_exit();
+				if (flush_type == BUF_FLUSH_LRU) {
+					mutex_exit(&LRU_list_mutex);
+				}
 
 				old_page_count = page_count;
 
@@ -1057,10 +1107,17 @@ flush_next:
 					space, offset, flush_type);
 				} else {
 					/* Try to flush the page only */
-					buf_pool_mutex_enter();
+					//buf_pool_mutex_enter();
+					rw_lock_s_lock(&page_hash_latch);
 
 					mutex_t* block_mutex = buf_page_get_mutex(bpage);
+retry_lock_2:
 					mutex_enter(block_mutex);
+					if (block_mutex != buf_page_get_mutex(bpage)) {
+						mutex_exit(block_mutex);
+						block_mutex = buf_page_get_mutex(bpage);
+						goto retry_lock_2;
+					}
 
 					buf_page_t* bpage_tmp = buf_page_hash_get(space, offset);
 					if (bpage_tmp) {
@@ -1073,7 +1130,10 @@ flush_next:
 				flush_type, offset,
 				page_count - old_page_count); */
 
-				buf_pool_mutex_enter();
+				//buf_pool_mutex_enter();
+				if (flush_type == BUF_FLUSH_LRU) {
+					mutex_enter(&LRU_list_mutex);
+				}
 				goto flush_next;
 
 			} else if (flush_type == BUF_FLUSH_LRU) {
@@ -1081,16 +1141,28 @@ flush_next:
 			} else {
 				ut_ad(flush_type == BUF_FLUSH_LIST);
 
-				bpage = UT_LIST_GET_PREV(list, bpage);
-				ut_ad(!bpage || bpage->in_flush_list);
+				mutex_enter(&flush_list_mutex);
+				bpage = UT_LIST_GET_PREV(flush_list, bpage);
+				//ut_ad(!bpage || bpage->in_flush_list); /* optimistic */
+				mutex_exit(&flush_list_mutex);
+				remaining--;
 			}
 		} while (bpage != NULL);
 
+		if (remaining)
+			goto flush_next;
+
 		/* If we could not find anything to flush, leave the loop */
 
 		break;
 	}
 
+	if (flush_type == BUF_FLUSH_LRU) {
+		mutex_exit(&LRU_list_mutex);
+	}
+
+	mutex_enter(&buf_pool_mutex);
+
 	buf_pool->init_flush[flush_type] = FALSE;
 
 	if (buf_pool->n_flush[flush_type] == 0) {
@@ -1100,7 +1172,8 @@ flush_next:
 		os_event_set(buf_pool->no_flush[flush_type]);
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&buf_pool_mutex);
 
 	buf_flush_buffered_writes();
 
@@ -1154,7 +1227,7 @@ buf_flush_LRU_recommendation(void)
 
 	//buf_pool_mutex_enter();
 	if (have_LRU_mutex)
-		buf_pool_mutex_enter();
+		mutex_enter(&LRU_list_mutex);
 
 	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
 
@@ -1173,7 +1246,13 @@ buf_flush_LRU_recommendation(void)
 
 		mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
+retry_lock:
 		mutex_enter(block_mutex);
+		if (block_mutex != buf_page_get_mutex(bpage)) {
+			mutex_exit(block_mutex);
+			block_mutex = buf_page_get_mutex(bpage);
+			goto retry_lock;
+		}
 
 		if (buf_flush_ready_for_replace(bpage)) {
 			n_replaceable++;
@@ -1188,7 +1267,7 @@ buf_flush_LRU_recommendation(void)
 
 	//buf_pool_mutex_exit();
 	if (have_LRU_mutex)
-		buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
 
 	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
 
@@ -1238,17 +1317,17 @@ buf_flush_validate_low(void)
 {
 	buf_page_t*	bpage;
 
-	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
+	UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list);
 
 	bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
 	while (bpage != NULL) {
 		const ib_uint64_t om = bpage->oldest_modification;
 		ut_ad(bpage->in_flush_list);
-		ut_a(buf_page_in_file(bpage));
+		//ut_a(buf_page_in_file(bpage)); /* optimistic */
 		ut_a(om > 0);
 
-		bpage = UT_LIST_GET_NEXT(list, bpage);
+		bpage = UT_LIST_GET_NEXT(flush_list, bpage);
 
 		ut_a(!bpage || om >= bpage->oldest_modification);
 	}
@@ -1266,11 +1345,13 @@ buf_flush_validate(void)
 {
 	ibool	ret;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&flush_list_mutex);
 
 	ret = buf_flush_validate_low();
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&flush_list_mutex);
 
 	return(ret);
 }

=== modified file 'storage/xtradb/buf/buf0lru.c'
--- a/storage/xtradb/buf/buf0lru.c	2009-05-04 04:32:30 +0000
+++ b/storage/xtradb/buf/buf0lru.c	2009-06-25 01:43:25 +0000
@@ -129,25 +129,31 @@ static
 void
 buf_LRU_block_free_hashed_page(
 /*===========================*/
-	buf_block_t*	block);	/* in: block, must contain a file page and
+	buf_block_t*	block,	/* in: block, must contain a file page and
 				be in a state where it can be freed */
+	ibool		have_page_hash_mutex);
 
 /**********************************************************************
 Determines if the unzip_LRU list should be used for evicting a victim
 instead of the general LRU list. */
 UNIV_INLINE
 ibool
-buf_LRU_evict_from_unzip_LRU(void)
+buf_LRU_evict_from_unzip_LRU(
+	ibool		have_LRU_mutex)
 /*==============================*/
 				/* out: TRUE if should use unzip_LRU */
 {
 	ulint	io_avg;
 	ulint	unzip_avg;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 
+	if (!have_LRU_mutex)
+		mutex_enter(&LRU_list_mutex);
 	/* If the unzip_LRU list is empty, we can only use the LRU. */
 	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
+		if (!have_LRU_mutex)
+			mutex_exit(&LRU_list_mutex);
 		return(FALSE);
 	}
 
@@ -156,14 +162,20 @@ buf_LRU_evict_from_unzip_LRU(void)
 	decompressed pages in the buffer pool. */
 	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
 	    <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
+		if (!have_LRU_mutex)
+			mutex_exit(&LRU_list_mutex);
 		return(FALSE);
 	}
 
 	/* If eviction hasn't started yet, we assume by default
 	that a workload is disk bound. */
 	if (buf_pool->freed_page_clock == 0) {
+		if (!have_LRU_mutex)
+			mutex_exit(&LRU_list_mutex);
 		return(TRUE);
 	}
+	if (!have_LRU_mutex)
+		mutex_exit(&LRU_list_mutex);
 
 	/* Calculate the average over past intervals, and add the values
 	of the current interval. */
@@ -229,7 +241,8 @@ buf_LRU_drop_page_hash_for_tablespace(
 
 	page_arr = ut_malloc(sizeof(ulint)
 			     * BUF_LRU_DROP_SEARCH_HASH_SIZE);
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 scan_again:
 	num_entries = 0;
@@ -239,7 +252,13 @@ scan_again:
 		mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 		buf_page_t*	prev_bpage;
 
+retry_lock:
 		mutex_enter(block_mutex);
+		if (block_mutex != buf_page_get_mutex(bpage)) {
+			mutex_exit(block_mutex);
+			block_mutex = buf_page_get_mutex(bpage);
+			goto retry_lock;
+		}
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
 		ut_a(buf_page_in_file(bpage));
@@ -269,12 +288,14 @@ scan_again:
 			}
 			/* Array full. We release the buf_pool_mutex to
 			obey the latching order. */
-			buf_pool_mutex_exit();
+			//buf_pool_mutex_exit();
+			mutex_exit(&LRU_list_mutex);
 
 			buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
 						     num_entries);
 			num_entries = 0;
-			buf_pool_mutex_enter();
+			//buf_pool_mutex_enter();
+			mutex_enter(&LRU_list_mutex);
 		} else {
 			mutex_exit(block_mutex);
 		}
@@ -299,7 +320,8 @@ next_page:
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 
 	/* Drop any remaining batch of search hashed pages. */
 	buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
@@ -327,7 +349,9 @@ buf_LRU_invalidate_tablespace(
 	buf_LRU_drop_page_hash_for_tablespace(id);
 
 scan_again:
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	rw_lock_x_lock(&page_hash_latch);
 
 	all_freed = TRUE;
 
@@ -339,7 +363,13 @@ scan_again:
 
 		ut_a(buf_page_in_file(bpage));
 
+retry_lock:
 		mutex_enter(block_mutex);
+		if (block_mutex != buf_page_get_mutex(bpage)) {
+			mutex_exit(block_mutex);
+			block_mutex = buf_page_get_mutex(bpage);
+			goto retry_lock;
+		}
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
 		if (buf_page_get_space(bpage) == id) {
@@ -369,7 +399,9 @@ scan_again:
 				ulint	page_no;
 				ulint	zip_size;
 
-				buf_pool_mutex_exit();
+				//buf_pool_mutex_exit();
+				mutex_exit(&LRU_list_mutex);
+				rw_lock_x_unlock(&page_hash_latch);
 
 				zip_size = buf_page_get_zip_size(bpage);
 				page_no = buf_page_get_page_no(bpage);
@@ -393,7 +425,7 @@ scan_again:
 			if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
 			    != BUF_BLOCK_ZIP_FREE) {
 				buf_LRU_block_free_hashed_page((buf_block_t*)
-							       bpage);
+							       bpage, TRUE);
 			} else {
 				/* The block_mutex should have been
 				released by buf_LRU_block_remove_hashed_page()
@@ -416,7 +448,9 @@ next_page:
 		bpage = prev_bpage;
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
+	rw_lock_x_unlock(&page_hash_latch);
 
 	if (!all_freed) {
 		os_thread_sleep(20000);
@@ -439,14 +473,16 @@ buf_LRU_get_recent_limit(void)
 	ulint			len;
 	ulint			limit;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 	len = UT_LIST_GET_LEN(buf_pool->LRU);
 
 	if (len < BUF_LRU_OLD_MIN_LEN) {
 		/* The LRU list is too short to do read-ahead */
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
 
 		return(0);
 	}
@@ -455,7 +491,8 @@ buf_LRU_get_recent_limit(void)
 
 	limit = buf_page_get_LRU_position(bpage) - len / BUF_LRU_INITIAL_RATIO;
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 
 	return(limit);
 }
@@ -470,7 +507,9 @@ buf_LRU_insert_zip_clean(
 {
 	buf_page_t*	b;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
+	ut_ad(mutex_own(&flush_list_mutex));
 	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
 
 	/* Find the first successor of bpage in the LRU list
@@ -478,17 +517,17 @@ buf_LRU_insert_zip_clean(
 	b = bpage;
 	do {
 		b = UT_LIST_GET_NEXT(LRU, b);
-	} while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
+	} while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
 
 	/* Insert bpage before b, i.e., after the predecessor of b. */
 	if (b) {
-		b = UT_LIST_GET_PREV(list, b);
+		b = UT_LIST_GET_PREV(zip_list, b);
 	}
 
 	if (b) {
-		UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
+		UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
 	} else {
-		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
+		UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
 	}
 }
 
@@ -500,16 +539,17 @@ ibool
 buf_LRU_free_from_unzip_LRU_list(
 /*=============================*/
 				/* out: TRUE if freed */
-	ulint	n_iterations)	/* in: how many times this has been called
+	ulint	n_iterations,	/* in: how many times this has been called
 				repeatedly without result: a high value means
 				that we should search farther; we will search
 				n_iterations / 5 of the unzip_LRU list,
 				or nothing if n_iterations >= 5 */
+	ibool	have_LRU_mutex)
 {
 	buf_block_t*	block;
 	ulint		distance;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own()); /* optimistic */
 
 	/* Theoratically it should be much easier to find a victim
 	from unzip_LRU as we can choose even a dirty block (as we'll
@@ -519,7 +559,7 @@ buf_LRU_free_from_unzip_LRU_list(
 	if we have done five iterations so far. */
 
 	if (UNIV_UNLIKELY(n_iterations >= 5)
-	    || !buf_LRU_evict_from_unzip_LRU()) {
+	    || !buf_LRU_evict_from_unzip_LRU(have_LRU_mutex)) {
 
 		return(FALSE);
 	}
@@ -527,18 +567,25 @@ buf_LRU_free_from_unzip_LRU_list(
 	distance = 100 + (n_iterations
 			  * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
 
+restart:
 	for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
 	     UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
 	     block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
 
 		enum buf_lru_free_block_status	freed;
 
+		mutex_enter(&block->mutex);
+		if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
+		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+			mutex_exit(&block->mutex);
+			goto restart;
+		}
+
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 		ut_ad(block->in_unzip_LRU_list);
 		ut_ad(block->page.in_LRU_list);
 
-		mutex_enter(&block->mutex);
-		freed = buf_LRU_free_block(&block->page, FALSE, NULL);
+		freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
 		mutex_exit(&block->mutex);
 
 		switch (freed) {
@@ -571,20 +618,22 @@ ibool
 buf_LRU_free_from_common_LRU_list(
 /*==============================*/
 				/* out: TRUE if freed */
-	ulint	n_iterations)	/* in: how many times this has been called
+	ulint	n_iterations,	/* in: how many times this has been called
 				repeatedly without result: a high value means
 				that we should search farther; if
 				n_iterations < 10, then we search
 				n_iterations / 10 * buf_pool->curr_size
 				pages from the end of the LRU list */
+	ibool	have_LRU_mutex)
 {
 	buf_page_t*	bpage;
 	ulint		distance;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own()); /* optimistic */
 
 	distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
 
+restart:
 	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 	     UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
 	     bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
@@ -593,11 +642,25 @@ buf_LRU_free_from_common_LRU_list(
 		mutex_t*			block_mutex
 			= buf_page_get_mutex(bpage);
 
+retry_lock:
+		mutex_enter(block_mutex);
+
+		if (block_mutex != buf_page_get_mutex(bpage)) {
+			mutex_exit(block_mutex);
+			block_mutex = buf_page_get_mutex(bpage);
+			goto retry_lock;
+		}
+
+		if (!bpage->in_LRU_list
+		    || !buf_page_in_file(bpage)) {
+			mutex_exit(block_mutex);
+			goto restart;
+		}
+
 		ut_ad(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
-		mutex_enter(block_mutex);
-		freed = buf_LRU_free_block(bpage, TRUE, NULL);
+		freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
 		mutex_exit(block_mutex);
 
 		switch (freed) {
@@ -640,22 +703,33 @@ buf_LRU_search_and_free_block(
 				n_iterations / 5 of the unzip_LRU list. */
 {
 	ibool	freed = FALSE;
+	ibool	have_LRU_mutex = FALSE;
+
+	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
+		have_LRU_mutex = TRUE;
 
-	buf_pool_mutex_enter();
+	/* optimistic search... */
+	//buf_pool_mutex_enter();
+	if (have_LRU_mutex)
+		mutex_enter(&LRU_list_mutex);
 
-	freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
+	freed = buf_LRU_free_from_unzip_LRU_list(n_iterations, have_LRU_mutex);
 
 	if (!freed) {
-		freed = buf_LRU_free_from_common_LRU_list(n_iterations);
+		freed = buf_LRU_free_from_common_LRU_list(n_iterations, have_LRU_mutex);
 	}
 
+	mutex_enter(&buf_pool_mutex);
 	if (!freed) {
 		buf_pool->LRU_flush_ended = 0;
 	} else if (buf_pool->LRU_flush_ended > 0) {
 		buf_pool->LRU_flush_ended--;
 	}
+	mutex_exit(&buf_pool_mutex);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	if (have_LRU_mutex)
+		mutex_exit(&LRU_list_mutex);
 
 	return(freed);
 }
@@ -673,18 +747,22 @@ void
 buf_LRU_try_free_flushed_blocks(void)
 /*=================================*/
 {
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
 
 	while (buf_pool->LRU_flush_ended > 0) {
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 
 		buf_LRU_search_and_free_block(1);
 
-		buf_pool_mutex_enter();
+		//buf_pool_mutex_enter();
+		mutex_enter(&buf_pool_mutex);
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&buf_pool_mutex);
 }
 
 /**********************************************************************
@@ -700,7 +778,9 @@ buf_LRU_buf_pool_running_out(void)
 {
 	ibool	ret	= FALSE;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
+	mutex_enter(&free_list_mutex);
 
 	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
 	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
@@ -708,7 +788,9 @@ buf_LRU_buf_pool_running_out(void)
 		ret = TRUE;
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
+	mutex_exit(&free_list_mutex);
 
 	return(ret);
 }
@@ -725,9 +807,10 @@ buf_LRU_get_free_only(void)
 {
 	buf_block_t*	block;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 
-	block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
+	mutex_enter(&free_list_mutex);
+	block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
 
 	if (block) {
 		ut_ad(block->page.in_free_list);
@@ -735,7 +818,9 @@ buf_LRU_get_free_only(void)
 		ut_ad(!block->page.in_flush_list);
 		ut_ad(!block->page.in_LRU_list);
 		ut_a(!buf_page_in_file(&block->page));
-		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+		UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
+
+		mutex_exit(&free_list_mutex);
 
 		mutex_enter(&block->mutex);
 
@@ -743,6 +828,8 @@ buf_LRU_get_free_only(void)
 		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
 		mutex_exit(&block->mutex);
+	} else {
+		mutex_exit(&free_list_mutex);
 	}
 
 	return(block);
@@ -767,7 +854,7 @@ buf_LRU_get_free_block(
 	ibool		mon_value_was	= FALSE;
 	ibool		started_monitor	= FALSE;
 loop:
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
 
 	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
 	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
@@ -847,14 +934,16 @@ loop:
 		if (UNIV_UNLIKELY(zip_size)) {
 			ibool	lru;
 			page_zip_set_size(&block->page.zip, zip_size);
-			block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
+			mutex_enter(&LRU_list_mutex);
+			block->page.zip.data = buf_buddy_alloc(zip_size, &lru, FALSE);
+			mutex_exit(&LRU_list_mutex);
 			UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
 		} else {
 			page_zip_set_size(&block->page.zip, 0);
 			block->page.zip.data = NULL;
 		}
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
 
 		if (started_monitor) {
 			srv_print_innodb_monitor = mon_value_was;
@@ -866,7 +955,7 @@ loop:
 	/* If no block was in the free list, search from the end of the LRU
 	list and try to free a block there */
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
 
 	freed = buf_LRU_search_and_free_block(n_iterations);
 
@@ -915,18 +1004,21 @@ loop:
 
 	os_aio_simulated_wake_handler_threads();
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
 
 	if (buf_pool->LRU_flush_ended > 0) {
 		/* We have written pages in an LRU flush. To make the insert
 		buffer more efficient, we try to move these pages to the free
 		list. */
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 
 		buf_LRU_try_free_flushed_blocks();
 	} else {
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 	}
 
 	if (n_iterations > 10) {
@@ -951,7 +1043,8 @@ buf_LRU_old_adjust_len(void)
 	ulint	new_len;
 
 	ut_a(buf_pool->LRU_old);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 #if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
 # error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
 #endif
@@ -1009,7 +1102,8 @@ buf_LRU_old_init(void)
 {
 	buf_page_t*	bpage;
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
 
 	/* We first initialize all blocks in the LRU list as old and then use
@@ -1041,13 +1135,14 @@ buf_unzip_LRU_remove_block_if_needed(
 	ut_ad(buf_pool);
 	ut_ad(bpage);
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 
 	if (buf_page_belongs_to_unzip_LRU(bpage)) {
 		buf_block_t*	block = (buf_block_t*) bpage;
 
 		ut_ad(block->in_unzip_LRU_list);
-		ut_d(block->in_unzip_LRU_list = FALSE);
+		block->in_unzip_LRU_list = FALSE;
 
 		UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
 	}
@@ -1063,7 +1158,8 @@ buf_LRU_remove_block(
 {
 	ut_ad(buf_pool);
 	ut_ad(bpage);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 
 	ut_a(buf_page_in_file(bpage));
 
@@ -1126,12 +1222,13 @@ buf_unzip_LRU_add_block(
 {
 	ut_ad(buf_pool);
 	ut_ad(block);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 
 	ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
 
 	ut_ad(!block->in_unzip_LRU_list);
-	ut_d(block->in_unzip_LRU_list = TRUE);
+	block->in_unzip_LRU_list = TRUE;
 
 	if (old) {
 		UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
@@ -1152,7 +1249,8 @@ buf_LRU_add_block_to_end_low(
 
 	ut_ad(buf_pool);
 	ut_ad(bpage);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 
 	ut_a(buf_page_in_file(bpage));
 
@@ -1212,7 +1310,8 @@ buf_LRU_add_block_low(
 {
 	ut_ad(buf_pool);
 	ut_ad(bpage);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 
 	ut_a(buf_page_in_file(bpage));
 	ut_ad(!bpage->in_LRU_list);
@@ -1331,22 +1430,23 @@ buf_LRU_free_block(
 	buf_page_t*	bpage,	/* in: block to be freed */
 	ibool		zip,	/* in: TRUE if should remove also the
 				compressed page of an uncompressed page */
-	ibool*		buf_pool_mutex_released)
+	ibool*		buf_pool_mutex_released,
 				/* in: pointer to a variable that will
 				be assigned TRUE if buf_pool_mutex
 				was temporarily released, or NULL */
+	ibool		have_LRU_mutex)
 {
 	buf_page_t*	b = NULL;
 	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(block_mutex));
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(bpage->in_LRU_list);
+	//ut_ad(bpage->in_LRU_list);
 	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
 	UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
 
-	if (!buf_page_can_relocate(bpage)) {
+	if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
 
 		/* Do not free buffer-fixed or I/O-fixed blocks. */
 		return(BUF_LRU_NOT_FREED);
@@ -1378,15 +1478,15 @@ buf_LRU_free_block(
 		If it cannot be allocated (without freeing a block
 		from the LRU list), refuse to free bpage. */
 alloc:
-		buf_pool_mutex_exit_forbid();
-		b = buf_buddy_alloc(sizeof *b, NULL);
-		buf_pool_mutex_exit_allow();
+		//buf_pool_mutex_exit_forbid();
+		b = buf_buddy_alloc(sizeof *b, NULL, FALSE);
+		//buf_pool_mutex_exit_allow();
 
 		if (UNIV_UNLIKELY(!b)) {
 			return(BUF_LRU_CANNOT_RELOCATE);
 		}
 
-		memcpy(b, bpage, sizeof *b);
+		//memcpy(b, bpage, sizeof *b);
 	}
 
 #ifdef UNIV_DEBUG
@@ -1397,6 +1497,39 @@ alloc:
 	}
 #endif /* UNIV_DEBUG */
 
+	/* not to break latch order, must re-enter block_mutex */
+	mutex_exit(block_mutex);
+
+	if (!have_LRU_mutex)
+		mutex_enter(&LRU_list_mutex); /* optimistic */
+	rw_lock_x_lock(&page_hash_latch);
+	mutex_enter(block_mutex);
+
+	/* recheck states of block */
+	if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
+	    || !buf_page_can_relocate(bpage)) {
+not_freed:
+		if (b) {
+			buf_buddy_free(b, sizeof *b, TRUE);
+		}
+		if (!have_LRU_mutex)
+			mutex_exit(&LRU_list_mutex);
+		rw_lock_x_unlock(&page_hash_latch);
+		return(BUF_LRU_NOT_FREED);
+	} else if (zip || !bpage->zip.data) {
+		if (bpage->oldest_modification)
+			goto not_freed;
+	} else if (bpage->oldest_modification) {
+		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+			ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
+			goto not_freed;
+		}
+	}
+
+	if (b) {
+		memcpy(b, bpage, sizeof *b);
+	}
+
 	if (buf_LRU_block_remove_hashed_page(bpage, zip)
 	    != BUF_BLOCK_ZIP_FREE) {
 		ut_a(bpage->buf_fix_count == 0);
@@ -1408,6 +1541,10 @@ alloc:
 
 			ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
 
+			while (prev_b && !prev_b->in_LRU_list) {
+				prev_b = UT_LIST_GET_PREV(LRU, prev_b);
+			}
+
 			b->state = b->oldest_modification
 				? BUF_BLOCK_ZIP_DIRTY
 				: BUF_BLOCK_ZIP_PAGE;
@@ -1482,6 +1619,7 @@ alloc:
 				buf_LRU_add_block_low(b, buf_page_is_old(b));
 			}
 
+			mutex_enter(&flush_list_mutex);
 			if (b->state == BUF_BLOCK_ZIP_PAGE) {
 				buf_LRU_insert_zip_clean(b);
 			} else {
@@ -1490,22 +1628,23 @@ alloc:
 				ut_ad(b->in_flush_list);
 				ut_d(bpage->in_flush_list = FALSE);
 
-				prev = UT_LIST_GET_PREV(list, b);
-				UT_LIST_REMOVE(list, buf_pool->flush_list, b);
+				prev = UT_LIST_GET_PREV(flush_list, b);
+				UT_LIST_REMOVE(flush_list, buf_pool->flush_list, b);
 
 				if (prev) {
 					ut_ad(prev->in_flush_list);
 					UT_LIST_INSERT_AFTER(
-						list,
+						flush_list,
 						buf_pool->flush_list,
 						prev, b);
 				} else {
 					UT_LIST_ADD_FIRST(
-						list,
+						flush_list,
 						buf_pool->flush_list,
 						b);
 				}
 			}
+			mutex_exit(&flush_list_mutex);
 
 			bpage->zip.data = NULL;
 			page_zip_set_size(&bpage->zip, 0);
@@ -1521,7 +1660,9 @@ alloc:
 			*buf_pool_mutex_released = TRUE;
 		}
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
+		rw_lock_x_unlock(&page_hash_latch);
 		mutex_exit(block_mutex);
 
 		/* Remove possible adaptive hash index on the page.
@@ -1553,7 +1694,9 @@ alloc:
 				: BUF_NO_CHECKSUM_MAGIC);
 		}
 
-		buf_pool_mutex_enter();
+		//buf_pool_mutex_enter();
+		if (have_LRU_mutex)
+			mutex_enter(&LRU_list_mutex);
 		mutex_enter(block_mutex);
 
 		if (b) {
@@ -1563,13 +1706,17 @@ alloc:
 			mutex_exit(&buf_pool_zip_mutex);
 		}
 
-		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+		buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
 	} else {
 		/* The block_mutex should have been released by
 		buf_LRU_block_remove_hashed_page() when it returns
 		BUF_BLOCK_ZIP_FREE. */
 		ut_ad(block_mutex == &buf_pool_zip_mutex);
 		mutex_enter(block_mutex);
+
+		if (!have_LRU_mutex)
+			mutex_exit(&LRU_list_mutex);
+		rw_lock_x_unlock(&page_hash_latch);
 	}
 
 	return(BUF_LRU_FREED);
@@ -1581,12 +1728,13 @@ UNIV_INTERN
 void
 buf_LRU_block_free_non_file_page(
 /*=============================*/
-	buf_block_t*	block)	/* in: block, must not contain a file page */
+	buf_block_t*	block,	/* in: block, must not contain a file page */
+	ibool		have_page_hash_mutex)
 {
 	void*	data;
 
 	ut_ad(block);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(&block->mutex));
 
 	switch (buf_block_get_state(block)) {
@@ -1620,15 +1768,17 @@ buf_LRU_block_free_non_file_page(
 	if (data) {
 		block->page.zip.data = NULL;
 		mutex_exit(&block->mutex);
-		buf_pool_mutex_exit_forbid();
-		buf_buddy_free(data, page_zip_get_size(&block->page.zip));
-		buf_pool_mutex_exit_allow();
+		//buf_pool_mutex_exit_forbid();
+		buf_buddy_free(data, page_zip_get_size(&block->page.zip), have_page_hash_mutex);
+		//buf_pool_mutex_exit_allow();
 		mutex_enter(&block->mutex);
 		page_zip_set_size(&block->page.zip, 0);
 	}
 
-	UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
+	mutex_enter(&free_list_mutex);
+	UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
 	ut_d(block->page.in_free_list = TRUE);
+	mutex_exit(&free_list_mutex);
 
 	UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
 }
@@ -1657,7 +1807,11 @@ buf_LRU_block_remove_hashed_page(
 {
 	const buf_page_t*	hashed_bpage;
 	ut_ad(bpage);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
+#endif
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
@@ -1758,7 +1912,9 @@ buf_LRU_block_remove_hashed_page(
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		mutex_exit(buf_page_get_mutex(bpage));
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
+		rw_lock_x_unlock(&page_hash_latch);
 		buf_print();
 		buf_LRU_print();
 		buf_validate();
@@ -1781,14 +1937,14 @@ buf_LRU_block_remove_hashed_page(
 		ut_a(bpage->zip.data);
 		ut_a(buf_page_get_zip_size(bpage));
 
-		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+		UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
 
 		mutex_exit(&buf_pool_zip_mutex);
-		buf_pool_mutex_exit_forbid();
+		//buf_pool_mutex_exit_forbid();
 		buf_buddy_free(bpage->zip.data,
-			       page_zip_get_size(&bpage->zip));
-		buf_buddy_free(bpage, sizeof(*bpage));
-		buf_pool_mutex_exit_allow();
+			       page_zip_get_size(&bpage->zip), TRUE);
+		buf_buddy_free(bpage, sizeof(*bpage), TRUE);
+		//buf_pool_mutex_exit_allow();
 		UNIV_MEM_UNDESC(bpage);
 		return(BUF_BLOCK_ZIP_FREE);
 
@@ -1807,9 +1963,9 @@ buf_LRU_block_remove_hashed_page(
 			bpage->zip.data = NULL;
 
 			mutex_exit(&((buf_block_t*) bpage)->mutex);
-			buf_pool_mutex_exit_forbid();
-			buf_buddy_free(data, page_zip_get_size(&bpage->zip));
-			buf_pool_mutex_exit_allow();
+			//buf_pool_mutex_exit_forbid();
+			buf_buddy_free(data, page_zip_get_size(&bpage->zip), TRUE);
+			//buf_pool_mutex_exit_allow();
 			mutex_enter(&((buf_block_t*) bpage)->mutex);
 			page_zip_set_size(&bpage->zip, 0);
 		}
@@ -1835,15 +1991,16 @@ static
 void
 buf_LRU_block_free_hashed_page(
 /*===========================*/
-	buf_block_t*	block)	/* in: block, must contain a file page and
+	buf_block_t*	block,	/* in: block, must contain a file page and
 				be in a state where it can be freed */
+	ibool		have_page_hash_mutex)
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(&block->mutex));
 
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
-	buf_LRU_block_free_non_file_page(block);
+	buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
 }
 
 /************************************************************************
@@ -1861,7 +2018,8 @@ buf_LRU_stat_update(void)
 		goto func_exit;
 	}
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
 
 	/* Update the index. */
 	item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
@@ -1875,7 +2033,8 @@ buf_LRU_stat_update(void)
 	/* Put current entry in the array. */
 	memcpy(item, &buf_LRU_stat_cur, sizeof *item);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&buf_pool_mutex);
 
 func_exit:
 	/* Clear the current entry. */
@@ -1897,7 +2056,8 @@ buf_LRU_validate(void)
 	ulint		LRU_pos;
 
 	ut_ad(buf_pool);
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
 
@@ -1956,15 +2116,21 @@ buf_LRU_validate(void)
 		ut_a(buf_pool->LRU_old_len == old_len);
 	}
 
-	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free);
+	mutex_exit(&LRU_list_mutex);
+	mutex_enter(&free_list_mutex);
+
+	UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free);
 
 	for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
 	     bpage != NULL;
-	     bpage = UT_LIST_GET_NEXT(list, bpage)) {
+	     bpage = UT_LIST_GET_NEXT(free, bpage)) {
 
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
 	}
 
+	mutex_exit(&free_list_mutex);
+	mutex_enter(&LRU_list_mutex);
+
 	UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU);
 
 	for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
@@ -1976,7 +2142,8 @@ buf_LRU_validate(void)
 		ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 	return(TRUE);
 }
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -1992,7 +2159,8 @@ buf_LRU_print(void)
 	const buf_page_t*	bpage;
 
 	ut_ad(buf_pool);
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&LRU_list_mutex);
 
 	fprintf(stderr, "Pool ulint clock %lu\n",
 		(ulong) buf_pool->ulint_clock);
@@ -2055,6 +2223,7 @@ buf_LRU_print(void)
 		bpage = UT_LIST_GET_NEXT(LRU, bpage);
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&LRU_list_mutex);
 }
 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */

=== modified file 'storage/xtradb/buf/buf0rea.c'
--- a/storage/xtradb/buf/buf0rea.c	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/buf/buf0rea.c	2009-07-06 05:47:15 +0000
@@ -134,6 +134,46 @@ buf_read_page_low(
 	bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
 				       tablespace_version, offset);
 	if (bpage == NULL) {
+		/* bugfix: http://bugs.mysql.com/bug.php?id=43948 */
+		if (recv_recovery_is_on() && *err == DB_TABLESPACE_DELETED) {
+			/* hashed log recs must be treated here */
+			recv_addr_t*    recv_addr;
+
+			mutex_enter(&(recv_sys->mutex));
+
+			if (recv_sys->apply_log_recs == FALSE) {
+				mutex_exit(&(recv_sys->mutex));
+				goto not_to_recover;
+			}
+
+			/* recv_get_fil_addr_struct() */
+			recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
+					hash_calc_hash(ut_fold_ulint_pair(space, offset),
+						recv_sys->addr_hash));
+			while (recv_addr) {
+				if ((recv_addr->space == space)
+					&& (recv_addr->page_no == offset)) {
+					break;
+				}
+				recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
+			}
+
+			if ((recv_addr == NULL)
+			    || (recv_addr->state == RECV_BEING_PROCESSED)
+			    || (recv_addr->state == RECV_PROCESSED)) {
+				mutex_exit(&(recv_sys->mutex));
+				goto not_to_recover;
+			}
+
+			fprintf(stderr, " (cannot find space: %lu)", space);
+			recv_addr->state = RECV_PROCESSED;
+
+			ut_a(recv_sys->n_addrs);
+			recv_sys->n_addrs--;
+
+			mutex_exit(&(recv_sys->mutex));
+		}
+not_to_recover:
 
 		return(0);
 	}
@@ -246,18 +286,22 @@ buf_read_ahead_random(
 
 	LRU_recent_limit = buf_LRU_get_recent_limit();
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
 
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 
 		return(0);
 	}
+	mutex_exit(&buf_pool_mutex);
 
 	/* Count how many blocks in the area have been recently accessed,
 	that is, reside near the start of the LRU list. */
 
+	rw_lock_s_lock(&page_hash_latch);
 	for (i = low; i < high; i++) {
 		const buf_page_t*	bpage = buf_page_hash_get(space, i);
 
@@ -269,13 +313,15 @@ buf_read_ahead_random(
 
 			if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
 
-				buf_pool_mutex_exit();
+				//buf_pool_mutex_exit();
+				rw_lock_s_unlock(&page_hash_latch);
 				goto read_ahead;
 			}
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 	/* Do nothing */
 	return(0);
 
@@ -469,10 +515,12 @@ buf_read_ahead_linear(
 
 	tablespace_version = fil_space_get_version(space);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
 
 	if (high > fil_space_get_size(space)) {
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 		/* The area is not whole, return */
 
 		return(0);
@@ -480,10 +528,12 @@ buf_read_ahead_linear(
 
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
 
 		return(0);
 	}
+	mutex_exit(&buf_pool_mutex);
 
 	/* Check that almost all pages in the area have been accessed; if
 	offset == low, the accesses must be in a descending order, otherwise,
@@ -497,6 +547,7 @@ buf_read_ahead_linear(
 
 	fail_count = 0;
 
+	rw_lock_s_lock(&page_hash_latch);
 	for (i = low; i < high; i++) {
 		bpage = buf_page_hash_get(space, i);
 
@@ -520,7 +571,8 @@ buf_read_ahead_linear(
 	    * LINEAR_AREA_THRESHOLD_COEF) {
 		/* Too many failures: return */
 
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		rw_lock_s_unlock(&page_hash_latch);
 
 		return(0);
 	}
@@ -531,7 +583,8 @@ buf_read_ahead_linear(
 	bpage = buf_page_hash_get(space, offset);
 
 	if (bpage == NULL) {
-		buf_pool_mutex_exit();
+		//buf_pool_mutex_exit();
+		rw_lock_s_unlock(&page_hash_latch);
 
 		return(0);
 	}
@@ -557,7 +610,8 @@ buf_read_ahead_linear(
 	pred_offset = fil_page_get_prev(frame);
 	succ_offset = fil_page_get_next(frame);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 
 	if ((offset == low) && (succ_offset == offset + 1)) {
 
@@ -770,11 +824,11 @@ buf_read_recv_pages(
 		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
 
 			os_aio_simulated_wake_handler_threads();
-			os_thread_sleep(500000);
+			os_thread_sleep(10000);
 
 			count++;
 
-			if (count > 100) {
+			if (count > 5000) {
 				fprintf(stderr,
 					"InnoDB: Error: InnoDB has waited for"
 					" 50 seconds for pending\n"

=== modified file 'storage/xtradb/dict/dict0boot.c'
--- a/storage/xtradb/dict/dict0boot.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/dict/dict0boot.c	2009-06-25 01:43:25 +0000
@@ -265,6 +265,7 @@ dict_boot(void)
 	system tables */
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
+	table->n_mysql_handles_opened = 1; /* for pin */
 
 	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
@@ -314,6 +315,7 @@ dict_boot(void)
 
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
+	table->n_mysql_handles_opened = 1; /* for pin */
 
 	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
@@ -346,6 +348,7 @@ dict_boot(void)
 
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
+	table->n_mysql_handles_opened = 1; /* for pin */
 
 	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
@@ -388,6 +391,7 @@ dict_boot(void)
 
 	/*-------------------------*/
 	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
+	table->n_mysql_handles_opened = 1; /* for pin */
 
 	dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);

=== modified file 'storage/xtradb/dict/dict0crea.c'
--- a/storage/xtradb/dict/dict0crea.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/dict/dict0crea.c	2009-06-25 01:43:25 +0000
@@ -1184,6 +1184,9 @@ dict_create_or_check_foreign_constraint_
 		/* Foreign constraint system tables have already been
 		created, and they are ok */
 
+		table1->n_mysql_handles_opened = 1; /* for pin */
+		table2->n_mysql_handles_opened = 1; /* for pin */
+
 		mutex_exit(&(dict_sys->mutex));
 
 		return(DB_SUCCESS);
@@ -1265,6 +1268,11 @@ dict_create_or_check_foreign_constraint_
 
 	trx_commit_for_mysql(trx);
 
+	table1 = dict_table_get_low("SYS_FOREIGN");
+	table2 = dict_table_get_low("SYS_FOREIGN_COLS");
+	table1->n_mysql_handles_opened = 1; /* for pin */
+	table2->n_mysql_handles_opened = 1; /* for pin */
+
 	row_mysql_unlock_data_dictionary(trx);
 
 	trx_free_for_mysql(trx);

=== modified file 'storage/xtradb/dict/dict0dict.c'
--- a/storage/xtradb/dict/dict0dict.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/dict/dict0dict.c	2009-08-03 07:14:02 +0000
@@ -545,6 +545,8 @@ dict_table_get_on_id(
 
 	table = dict_table_get_on_id_low(table_id);
 
+	dict_table_LRU_trim(table);
+
 	mutex_exit(&(dict_sys->mutex));
 
 	return(table);
@@ -659,6 +661,8 @@ dict_table_get(
 		table->n_mysql_handles_opened++;
 	}
 
+	dict_table_LRU_trim(table);
+
 	mutex_exit(&(dict_sys->mutex));
 
 	if (table != NULL) {
@@ -1153,6 +1157,64 @@ dict_table_remove_from_cache(
 	dict_mem_table_free(table);
 }
 
+/**************************************************************************
+Frees tables from the end of table_LRU if the dictionary cache occupies
+too much space. */
+UNIV_INTERN
+void
+dict_table_LRU_trim(
+/*================*/
+	dict_table_t*	self)
+{
+	dict_table_t*	table;
+	dict_table_t*	prev_table;
+	dict_foreign_t*	foreign;
+	ulint		n_removed;
+	ulint		n_have_parent;
+	ulint		cached_foreign_tables;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+retry:
+	n_removed = n_have_parent = 0;
+	table = UT_LIST_GET_LAST(dict_sys->table_LRU);
+
+	while ( srv_dict_size_limit && table
+		&& ((dict_sys->table_hash->n_cells
+		     + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t)
+		    + dict_sys->size) > srv_dict_size_limit ) {
+		prev_table = UT_LIST_GET_PREV(table_LRU, table);
+
+		if (table == self || table->n_mysql_handles_opened)
+			goto next_loop;
+
+		cached_foreign_tables = 0;
+		foreign = UT_LIST_GET_FIRST(table->foreign_list);
+		while (foreign != NULL) {
+			if (foreign->referenced_table)
+				cached_foreign_tables++;
+			foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+		}
+
+		if (cached_foreign_tables == 0) {
+			dict_table_remove_from_cache(table);
+			n_removed++;
+		} else {
+			n_have_parent++;
+		}
+next_loop:
+		table = prev_table;
+	}
+
+	if ( srv_dict_size_limit && n_have_parent && n_removed
+		&& ((dict_sys->table_hash->n_cells
+		     + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t)
+		    + dict_sys->size) > srv_dict_size_limit )
+		goto retry;
+}
+
 /********************************************************************
 If the given column name is reserved for InnoDB system columns, return
 TRUE. */
@@ -2987,7 +3049,7 @@ scan_more:
 		} else if (quote) {
 			/* Within quotes: do not look for
 			starting quotes or comments. */
-		} else if (*sptr == '"' || *sptr == '`') {
+		} else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') {
 			/* Starting quote: remember the quote character. */
 			quote = *sptr;
 		} else if (*sptr == '#'
@@ -4276,7 +4338,8 @@ dict_table_print_low(
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
-	dict_update_statistics_low(table, TRUE);
+	if (srv_stats_auto_update)
+		dict_update_statistics_low(table, TRUE);
 
 	fprintf(stderr,
 		"--------------------------------------\n"

=== modified file 'storage/xtradb/dict/dict0load.c'
--- a/storage/xtradb/dict/dict0load.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/dict/dict0load.c	2009-06-25 01:43:25 +0000
@@ -223,7 +223,7 @@ loop:
 			/* The table definition was corrupt if there
 			is no index */
 
-			if (dict_table_get_first_index(table)) {
+			if (srv_stats_auto_update && dict_table_get_first_index(table)) {
 				dict_update_statistics_low(table, TRUE);
 			}
 

=== modified file 'storage/xtradb/fil/fil0fil.c'
--- a/storage/xtradb/fil/fil0fil.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/fil/fil0fil.c	2009-06-25 01:43:25 +0000
@@ -42,6 +42,10 @@ Created 10/25/1995 Heikki Tuuri
 #include "mtr0log.h"
 #include "dict0dict.h"
 #include "page0zip.h"
+#include "trx0trx.h"
+#include "trx0sys.h"
+#include "pars0pars.h"
+#include "row0mysql.h"
 
 
 /*
@@ -2977,7 +2981,7 @@ fil_open_single_table_tablespace(
 	ut_a(flags != DICT_TF_COMPACT);
 
 	file = os_file_create_simple_no_error_handling(
-		filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
+		filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
 	if (!success) {
 		/* The following call prints an error message */
 		os_file_get_last_error(TRUE);
@@ -3025,6 +3029,275 @@ fil_open_single_table_tablespace(
 	space_id = fsp_header_get_space_id(page);
 	space_flags = fsp_header_get_flags(page);
 
+	if (srv_expand_import && (space_id != id || space_flags != flags)) {
+		dulint		old_id[31];
+		dulint		new_id[31];
+		ulint		root_page[31];
+		ulint		n_index;
+		os_file_t	info_file = -1;
+		char*		info_file_path;
+		ulint	i;
+		int		len;
+		ib_uint64_t	current_lsn;
+
+		current_lsn = log_get_lsn();
+
+		/* overwrite fsp header */
+		fsp_header_init_fields(page, id, flags);
+		mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
+		space_id = id;
+		space_flags = flags;
+		if (mach_read_ull(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
+			mach_write_ull(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
+		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+				srv_use_checksums
+				? buf_calc_page_new_checksum(page)
+						: BUF_NO_CHECKSUM_MAGIC);
+		mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+				srv_use_checksums
+				? buf_calc_page_old_checksum(page)
+						: BUF_NO_CHECKSUM_MAGIC);
+		success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
+
+		/* get file size */
+		ulint		size_low, size_high, size;
+		ib_int64_t	size_bytes;
+		os_file_get_size(file, &size_low, &size_high);
+		size_bytes = (((ib_int64_t)size_high) << 32)
+				+ (ib_int64_t)size_low;
+
+		/* get cruster index information */
+		dict_table_t*   table;
+		dict_index_t*   index;
+		table = dict_table_get_low(name);
+		index = dict_table_get_first_index(table);
+		ut_a(index->page==3);
+
+
+		/* read metadata from .exp file */
+		n_index = 0;
+		bzero(old_id, sizeof(old_id));
+		bzero(new_id, sizeof(new_id));
+		bzero(root_page, sizeof(root_page));
+
+		info_file_path = fil_make_ibd_name(name, FALSE);
+		len = strlen(info_file_path);
+		info_file_path[len - 3] = 'e';
+		info_file_path[len - 2] = 'x';
+		info_file_path[len - 1] = 'p';
+
+		info_file = os_file_create_simple_no_error_handling(
+				info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
+		if (!success) {
+			fprintf(stderr, "InnoDB: cannot open %s\n", info_file_path);
+			goto skip_info;
+		}
+		success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
+		if (!success) {
+			fprintf(stderr, "InnoDB: cannot read %s\n", info_file_path);
+			goto skip_info;
+		}
+		if (mach_read_from_4(page) != 0x78706f72UL
+		    || mach_read_from_4(page + 4) != 0x74696e66UL) {
+			fprintf(stderr, "InnoDB: %s seems not to be a correct .exp file\n", info_file_path);
+			goto skip_info;
+		}
+
+		fprintf(stderr, "InnoDB: import: extended import of %s is started.\n", name);
+
+		n_index = mach_read_from_4(page + 8);
+		fprintf(stderr, "InnoDB: import: %lu indexes are detected.\n", (ulong)n_index);
+		for (i = 0; i < n_index; i++) {
+			new_id[i] =
+				dict_table_get_index_on_name(table,
+						(page + (i + 1) * 512 + 12))->id;
+			old_id[i] = mach_read_from_8(page + (i + 1) * 512);
+			root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8);
+		}
+
+skip_info:
+		if (info_file != -1)
+			os_file_close(info_file);
+
+		/*
+		if (size_bytes >= 1024 * 1024) {
+			size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
+		}
+		*/
+		if (!(flags & DICT_TF_ZSSIZE_MASK)) {
+			mem_heap_t*	heap = NULL;
+			ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+			ulint*		offsets = offsets_;
+			size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+			/* over write space id of all pages */
+			ib_int64_t     offset;
+
+			rec_offs_init(offsets_);
+
+			fprintf(stderr, "InnoDB: Progress in %:");
+
+			for (offset = 0; offset < size_bytes; offset += UNIV_PAGE_SIZE) {
+				success = os_file_read(file, page,
+							(ulint)(offset & 0xFFFFFFFFUL),
+							(ulint)(offset >> 32), UNIV_PAGE_SIZE);
+				if (mach_read_from_4(page + FIL_PAGE_OFFSET) || !offset) {
+					mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
+
+					for (i = 0; i < n_index; i++) {
+						if (offset / UNIV_PAGE_SIZE == root_page[i]) {
+							/* this is index root page */
+							mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+											+ FSEG_HDR_SPACE, id);
+							mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+											+ FSEG_HDR_SPACE, id);
+							break;
+						}
+					}
+
+					if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
+						dulint tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
+
+						if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
+						    && ut_dulint_cmp(old_id[0], tmp) == 0) {
+							/* leaf page of cluster index, reset trx_id of records */
+							rec_t*	rec;
+							rec_t*	supremum;
+							ulint	n_recs;
+
+							supremum = page_get_supremum_rec(page);
+							rec = page_rec_get_next(page_get_infimum_rec(page));
+							n_recs = page_get_n_recs(page);
+
+							while (rec && rec != supremum && n_recs > 0) {
+								ulint	offset = index->trx_id_offset;
+								if (!offset) {
+									offsets = rec_get_offsets(rec, index, offsets,
+											ULINT_UNDEFINED, &heap);
+									offset = row_get_trx_id_offset(rec, index, offsets);
+								}
+								trx_write_trx_id(rec + offset, ut_dulint_create(0, 1));
+								rec = page_rec_get_next(rec);
+								n_recs--;
+							}
+						}
+
+						for (i = 0; i < n_index; i++) {
+							if (ut_dulint_cmp(old_id[i], tmp) == 0) {
+								mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
+								break;
+							}
+						}
+					}
+
+					if (mach_read_ull(page + FIL_PAGE_LSN) > current_lsn) {
+						mach_write_ull(page + FIL_PAGE_LSN, current_lsn);
+						mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+										current_lsn);
+					}
+
+					mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+							srv_use_checksums
+							? buf_calc_page_new_checksum(page)
+									: BUF_NO_CHECKSUM_MAGIC);
+					mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+							srv_use_checksums
+							? buf_calc_page_old_checksum(page)
+									: BUF_NO_CHECKSUM_MAGIC);
+
+					success = os_file_write(filepath, file, page,
+								(ulint)(offset & 0xFFFFFFFFUL),
+								(ulint)(offset >> 32), UNIV_PAGE_SIZE);
+				}
+
+				if (size_bytes
+				    && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes)
+					!= ((offset * 100) / size_bytes)) {
+					fprintf(stderr, " %lu",
+						(ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / size_bytes));
+				}
+			}
+
+			fprintf(stderr, " done.\n");
+
+			/* update SYS_INDEXES set root page */
+			index = dict_table_get_first_index(table);
+			while (index) {
+				for (i = 0; i < n_index; i++) {
+					if (ut_dulint_cmp(new_id[i], index->id) == 0) {
+						break;
+					}
+				}
+
+				if (i != n_index
+				    && root_page[i] != index->page) {
+					/* must update */
+					ulint	error;
+					trx_t*	trx;
+					pars_info_t*	info = NULL;
+
+					trx = trx_allocate_for_mysql();
+					trx->op_info = "extended import";
+
+					info = pars_info_create();
+
+					pars_info_add_dulint_literal(info, "indexid", new_id[i]);
+					pars_info_add_int4_literal(info, "new_page", (lint) root_page[i]);
+
+					error = que_eval_sql(info,
+						"PROCEDURE UPDATE_INDEX_PAGE () IS\n"
+						"BEGIN\n"
+						"UPDATE SYS_INDEXES"
+						" SET PAGE_NO = :new_page"
+						" WHERE ID = :indexid;\n"
+						"COMMIT WORK;\n"
+						"END;\n",
+						FALSE, trx);
+
+					if (error != DB_SUCCESS) {
+						fprintf(stderr, "InnoDB: failed to update SYS_INDEXES\n");
+					}
+
+					trx_commit_for_mysql(trx);
+
+					trx_free_for_mysql(trx);
+
+					index->page = root_page[i];
+				}
+
+				index = dict_table_get_next_index(index);
+			}
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+		} else {
+			/* zip page? */
+			size = (ulint)
+			(size_bytes
+					/ dict_table_flags_to_zip_size(flags));
+			fprintf(stderr, "InnoDB: import: table %s seems to be in newer format."
+					" It may not be able to treated for now.\n", name);
+		}
+		/* .exp file should be removed */
+		success = os_file_delete(info_file_path);
+		if (!success) {
+			success = os_file_delete_if_exists(info_file_path);
+		}
+		mem_free(info_file_path);
+
+		fil_system_t*	system	= fil_system;
+		mutex_enter(&(system->mutex));
+		fil_node_t*	node = NULL;
+		fil_space_t*	space;
+		space = fil_space_get_by_id(id);
+		if (space)
+			node = UT_LIST_GET_FIRST(space->chain);
+		if (node && node->size < size) {
+			space->size += (size - node->size);
+			node->size = size;
+		}
+		mutex_exit(&(system->mutex));
+	}
+
 	ut_free(buf2);
 
 	if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {

=== modified file 'storage/xtradb/handler/ha_innodb.cc'
--- a/storage/xtradb/handler/ha_innodb.cc	2009-06-18 12:39:21 +0000
+++ b/storage/xtradb/handler/ha_innodb.cc	2009-08-03 20:09:53 +0000
@@ -157,6 +157,7 @@ static long innobase_mirrored_log_groups
 	innobase_autoinc_lock_mode;
 
 static unsigned long innobase_read_io_threads, innobase_write_io_threads;
+static my_bool innobase_thread_concurrency_timer_based;
 static long long innobase_buffer_pool_size, innobase_log_file_size;
 
 /* The default values for the following char* start-up parameters
@@ -488,6 +489,8 @@ static SHOW_VAR innodb_status_variables[
   (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
   {"dblwr_writes",
   (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
+  {"dict_tables",
+  (char*) &export_vars.innodb_dict_tables,		  SHOW_LONG},
   {"have_atomic_builtins",
   (char*) &export_vars.innodb_have_atomic_builtins,	  SHOW_BOOL},
   {"log_waits",
@@ -2100,77 +2103,6 @@ mem_free_and_error:
 		goto error;
 	}
 
-#ifdef HAVE_REPLICATION
-#ifdef MYSQL_SERVER
-	if(innobase_overwrite_relay_log_info) {
-	/* If InnoDB progressed from relay-log.info, overwrite it */
-	if (fname[0] == '\0') {
-		fprintf(stderr,
-			"InnoDB: something wrong with relay-info.log. InnoDB will not overwrite it.\n");
-	} else if (0 != strcmp(fname, trx_sys_mysql_master_log_name)
-		   || pos != trx_sys_mysql_master_log_pos) {
-		/* Overwrite relay-log.info */
-		bzero((char*) &info_file, sizeof(info_file));
-		fn_format(fname, relay_log_info_file, mysql_data_home, "", 4+32);
-
-		int error = 0;
-
-		if (!access(fname,F_OK)) {
-			/* exist */
-			if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) {
-				error = 1;
-			} else if (init_io_cache(&info_file, info_fd, IO_SIZE*2,
-						WRITE_CACHE, 0L, 0, MYF(MY_WME))) {
-				error = 1;
-			}
-
-			if (error) {
-				if (info_fd >= 0)
-					my_close(info_fd, MYF(0));
-				goto skip_overwrite;
-			}
-		} else {
-			error = 1;
-			goto skip_overwrite;
-		}
-
-		char buff[FN_REFLEN*2+22*2+4], *pos;
-
-		my_b_seek(&info_file, 0L);
-		pos=strmov(buff, trx_sys_mysql_relay_log_name);
-		*pos++='\n';
-		pos=longlong2str(trx_sys_mysql_relay_log_pos, pos, 10);
-		*pos++='\n';
-		pos=strmov(pos, trx_sys_mysql_master_log_name);
-		*pos++='\n';
-		pos=longlong2str(trx_sys_mysql_master_log_pos, pos, 10);
-		*pos='\n';
-
-		if (my_b_write(&info_file, (uchar*) buff, (size_t) (pos-buff)+1))
-			error = 1;
-		if (flush_io_cache(&info_file))
-			error = 1;
-
-		end_io_cache(&info_file);
-		if (info_fd >= 0)
-			my_close(info_fd, MYF(0));
-skip_overwrite:
-		if (error) {
-			fprintf(stderr,
-				"InnoDB: ERROR: error occured during overwriting relay-log.info.\n");
-		} else {
-			fprintf(stderr,
-				"InnoDB: relay-log.info was overwritten.\n");
-		}
-	} else {
-		fprintf(stderr,
-			"InnoDB: InnoDB and relay-log.info are synchronized. InnoDB will not overwrite it.\n");
-	}
-	}
-#endif /* MYSQL_SERVER */
-#endif /* HAVE_REPLICATION */
-
-
 	srv_extra_undoslots = (ibool) innobase_extra_undoslots;
 
 	/* -------------- Log files ---------------------------*/
@@ -2266,6 +2198,9 @@ skip_overwrite:
 	srv_n_log_files = (ulint) innobase_log_files_in_group;
 	srv_log_file_size = (ulint) innobase_log_file_size;
 
+	srv_thread_concurrency_timer_based =
+		(ibool) innobase_thread_concurrency_timer_based;
+
 #ifdef UNIV_LOG_ARCHIVE
 	srv_log_archive_on = (ulint) innobase_log_archive;
 #endif /* UNIV_LOG_ARCHIVE */
@@ -2280,6 +2215,7 @@ skip_overwrite:
 	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
 
 	srv_read_ahead &= 3;
+	srv_adaptive_checkpoint %= 3;
 
 	srv_force_recovery = (ulint) innobase_force_recovery;
 
@@ -2329,6 +2265,76 @@ skip_overwrite:
 		goto mem_free_and_error;
 	}
 
+#ifdef HAVE_REPLICATION
+#ifdef MYSQL_SERVER
+	if(innobase_overwrite_relay_log_info) {
+	/* If InnoDB progressed from relay-log.info, overwrite it */
+	if (fname[0] == '\0') {
+		fprintf(stderr,
+			"InnoDB: something wrong with relay-info.log. InnoDB will not overwrite it.\n");
+	} else if (0 != strcmp(fname, trx_sys_mysql_master_log_name)
+		   || pos != trx_sys_mysql_master_log_pos) {
+		/* Overwrite relay-log.info */
+		bzero((char*) &info_file, sizeof(info_file));
+		fn_format(fname, relay_log_info_file, mysql_data_home, "", 4+32);
+
+		int error = 0;
+
+		if (!access(fname,F_OK)) {
+			/* exist */
+			if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) {
+				error = 1;
+			} else if (init_io_cache(&info_file, info_fd, IO_SIZE*2,
+						WRITE_CACHE, 0L, 0, MYF(MY_WME))) {
+				error = 1;
+			}
+
+			if (error) {
+				if (info_fd >= 0)
+					my_close(info_fd, MYF(0));
+				goto skip_overwrite;
+			}
+		} else {
+			error = 1;
+			goto skip_overwrite;
+		}
+
+		char buff[FN_REFLEN*2+22*2+4], *pos;
+
+		my_b_seek(&info_file, 0L);
+		pos=strmov(buff, trx_sys_mysql_relay_log_name);
+		*pos++='\n';
+		pos=longlong2str(trx_sys_mysql_relay_log_pos, pos, 10);
+		*pos++='\n';
+		pos=strmov(pos, trx_sys_mysql_master_log_name);
+		*pos++='\n';
+		pos=longlong2str(trx_sys_mysql_master_log_pos, pos, 10);
+		*pos='\n';
+
+		if (my_b_write(&info_file, (uchar*) buff, (size_t) (pos-buff)+1))
+			error = 1;
+		if (flush_io_cache(&info_file))
+			error = 1;
+
+		end_io_cache(&info_file);
+		if (info_fd >= 0)
+			my_close(info_fd, MYF(0));
+skip_overwrite:
+		if (error) {
+			fprintf(stderr,
+				"InnoDB: ERROR: error occured during overwriting relay-log.info.\n");
+		} else {
+			fprintf(stderr,
+				"InnoDB: relay-log.info was overwritten.\n");
+		}
+	} else {
+		fprintf(stderr,
+			"InnoDB: InnoDB and relay-log.info are synchronized. InnoDB will not overwrite it.\n");
+	}
+	}
+#endif /* MYSQL_SERVER */
+#endif /* HAVE_REPLICATION */
+
 	innobase_open_tables = hash_create(200);
 	pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
 	pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
@@ -7079,7 +7085,9 @@ ha_innobase::info(
 	ib_table = prebuilt->table;
 
 	if (flag & HA_STATUS_TIME) {
-		if (innobase_stats_on_metadata) {
+		if (innobase_stats_on_metadata
+		    && (thd_sql_command(user_thd) == SQLCOM_ANALYZE
+			|| srv_stats_auto_update)) {
 			/* In sql_show we call with this flag: update
 			then statistics so that they are up-to-date */
 
@@ -9321,7 +9329,8 @@ ha_innobase::check_if_incompatible_data(
 	if (info_row_type == ROW_TYPE_DEFAULT)
 		info_row_type = ROW_TYPE_COMPACT;
 	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) &&
-	    row_type != info_row_type) {
+	    get_row_type() != ((info->row_type == ROW_TYPE_DEFAULT)
+				? ROW_TYPE_COMPACT : info->row_type)) {
 
 		DBUG_PRINT("info", ("get_row_type()=%d != info->row_type=%d -> "
 				    "COMPATIBLE_DATA_NO",
@@ -9830,6 +9839,31 @@ static MYSQL_SYSVAR_ULONGLONG(stats_samp
   "The number of index pages to sample when calculating statistics (default 8)",
   NULL, NULL, 8, 1, ~0ULL, 0);
 
+const char *innobase_stats_method_names[]=
+{
+  "nulls_equal",
+  "nulls_unequal",
+  "nulls_ignored",
+  NullS
+};
+TYPELIB innobase_stats_method_typelib=
+{
+  array_elements(innobase_stats_method_names) - 1, "innobase_stats_method_typelib",
+  innobase_stats_method_names, NULL
+};
+static MYSQL_SYSVAR_ENUM(stats_method, srv_stats_method,
+  PLUGIN_VAR_RQCMDARG,
+  "Specifies how InnoDB index statistics collection code should threat NULLs. "
+  "Possible values of name are same to for 'myisam_stats_method'. "
+  "This is startup parameter.",
+  NULL, NULL, 0, &innobase_stats_method_typelib);
+
+static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update,
+  PLUGIN_VAR_RQCMDARG,
+  "Enable/Disable InnoDB's auto update statistics of indexes. "
+  "(except for ANALYZE TABLE command) 0:disable 1:enable",
+  NULL, NULL, 1, 0, 1, 0);
+
 static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
   PLUGIN_VAR_OPCMDARG,
   "Enable InnoDB adaptive hash index (enabled by default).  "
@@ -9907,6 +9941,12 @@ static MYSQL_SYSVAR_ULONG(sync_spin_loop
   "Count of spin-loop rounds in InnoDB mutexes",
   NULL, NULL, 20L, 0L, ~0L, 0);
 
+static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based,
+  innobase_thread_concurrency_timer_based,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Use InnoDB timer based concurrency throttling. ",
+  NULL, NULL, FALSE);
+
 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
   PLUGIN_VAR_RQCMDARG,
   "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
@@ -9953,7 +9993,7 @@ static MYSQL_SYSVAR_STR(change_buffering
 static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
   PLUGIN_VAR_RQCMDARG,
   "Number of IO operations per second the server can do. Tunes background IO rate.",
-  NULL, NULL, 100, 100, 999999999, 0);
+  NULL, NULL, 200, 100, 999999999, 0);
 
 static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -10008,10 +10048,36 @@ static MYSQL_SYSVAR_ENUM(read_ahead, srv
   "Control read ahead activity. (none, random, linear, [both])",
   NULL, innodb_read_ahead_update, 3, &read_ahead_typelib);
 
-static MYSQL_SYSVAR_ULONG(adaptive_checkpoint, srv_adaptive_checkpoint,
+static
+void
+innodb_adaptive_checkpoint_update(
+  THD* thd,
+  struct st_mysql_sys_var*     var,
+  void*        var_ptr,
+  const void*  save)
+{
+  *(long *)var_ptr= (*(long *)save) % 3;
+}
+const char *adaptive_checkpoint_names[]=
+{
+  "none", /* 0 */
+  "reflex", /* 1 */
+  "estimate", /* 2 */
+  /* For compatibility of the older patch */
+  "0", /* 3 ("none" + 3) */
+  "1", /* 4 ("reflex" + 3) */
+  "2", /* 5 ("estimate" + 3) */
+  NullS
+};
+TYPELIB adaptive_checkpoint_typelib=
+{
+  array_elements(adaptive_checkpoint_names) - 1, "adaptive_checkpoint_typelib",
+  adaptive_checkpoint_names, NULL
+};
+static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint,
   PLUGIN_VAR_RQCMDARG,
-  "Enable/Disable flushing along modified age. 0:disable 1:enable",
-  NULL, NULL, 0, 0, 1, 0);
+  "Enable/Disable flushing along modified age. ([none], reflex, estimate)",
+  NULL, innodb_adaptive_checkpoint_update, 0, &adaptive_checkpoint_typelib);
 
 static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
   PLUGIN_VAR_RQCMDARG,
@@ -10021,18 +10087,28 @@ static MYSQL_SYSVAR_ULONG(enable_unsafe_
 static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of background read I/O threads in InnoDB.",
-  NULL, NULL, 1, 1, 64, 0);
+  NULL, NULL, 8, 1, 64, 0);
 
 static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of background write I/O threads in InnoDB.",
-  NULL, NULL, 1, 1, 64, 0);
+  NULL, NULL, 8, 1, 64, 0);
+
+static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
+  PLUGIN_VAR_RQCMDARG,
+  "Enable/Disable converting automatically *.ibd files when import tablespace.",
+  NULL, NULL, 0, 0, 1, 0);
 
 static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of extra user rollback segments when create new database.",
   NULL, NULL, 0, 0, 127, 0);
 
+static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
+  PLUGIN_VAR_RQCMDARG,
+  "Limit the allocated memory for dictionary cache. (0: unlimited)",
+  NULL, NULL, 0, 0, LONG_MAX, 0);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
   MYSQL_SYSVAR(autoextend_increment),
@@ -10069,6 +10145,8 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(overwrite_relay_log_info),
   MYSQL_SYSVAR(rollback_on_timeout),
   MYSQL_SYSVAR(stats_on_metadata),
+  MYSQL_SYSVAR(stats_method),
+  MYSQL_SYSVAR(stats_auto_update),
   MYSQL_SYSVAR(stats_sample_pages),
   MYSQL_SYSVAR(adaptive_hash_index),
   MYSQL_SYSVAR(replication_delay),
@@ -10078,6 +10156,7 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(sync_spin_loops),
   MYSQL_SYSVAR(table_locks),
   MYSQL_SYSVAR(thread_concurrency),
+  MYSQL_SYSVAR(thread_concurrency_timer_based),
   MYSQL_SYSVAR(thread_sleep_delay),
   MYSQL_SYSVAR(autoinc_lock_mode),
   MYSQL_SYSVAR(show_verbose_locks),
@@ -10093,7 +10172,9 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(enable_unsafe_group_commit),
   MYSQL_SYSVAR(read_io_threads),
   MYSQL_SYSVAR(write_io_threads),
+  MYSQL_SYSVAR(expand_import),
   MYSQL_SYSVAR(extra_rsegments),
+  MYSQL_SYSVAR(dict_size_limit),
   MYSQL_SYSVAR(use_sys_malloc),
   MYSQL_SYSVAR(change_buffering),
   NULL
@@ -10287,6 +10368,8 @@ i_s_innodb_cmp,
 i_s_innodb_cmp_reset,
 i_s_innodb_cmpmem,
 i_s_innodb_cmpmem_reset,
+i_s_innodb_table_stats,
+i_s_innodb_index_stats,
 i_s_innodb_patches
 mysql_declare_plugin_end;
 

=== modified file 'storage/xtradb/handler/i_s.cc'
--- a/storage/xtradb/handler/i_s.cc	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/handler/i_s.cc	2009-06-25 01:43:25 +0000
@@ -45,6 +45,7 @@ extern "C" {
 #include "dict0dict.h" /* for dict_index_get_if_in_cache */
 #include "trx0rseg.h" /* for trx_rseg_struct */
 #include "trx0sys.h" /* for trx_sys */
+#include "dict0dict.h" /* for dict_sys */
 /* from buf0buf.c */
 struct buf_chunk_struct{
 	ulint		mem_size;	/* allocated size of the chunk */
@@ -2282,7 +2283,8 @@ i_s_cmpmem_fill_low(
 
 	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	mutex_enter(&zip_free_mutex);
 
 	for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
 		buf_buddy_stat_t*	buddy_stat = &buf_buddy_stat[x];
@@ -2308,7 +2310,8 @@ i_s_cmpmem_fill_low(
 		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&zip_free_mutex);
 	DBUG_RETURN(status);
 }
 
@@ -2653,3 +2656,299 @@ UNIV_INTERN struct st_mysql_plugin	i_s_i
 	/* void* */
 	STRUCT_FLD(__reserved1, NULL)
 };
+
+/***********************************************************************
+*/
+static ST_FIELD_INFO	i_s_innodb_table_stats_info[] =
+{
+	{STRUCT_FLD(field_name,		"table_name"),
+	 STRUCT_FLD(field_length,	NAME_LEN),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"rows"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"clust_size"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"other_size"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"modified"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+static ST_FIELD_INFO	i_s_innodb_index_stats_info[] =
+{
+	{STRUCT_FLD(field_name,		"table_name"),
+	 STRUCT_FLD(field_length,	NAME_LEN),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"index_name"),
+	 STRUCT_FLD(field_length,	NAME_LEN),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"fields"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"row_per_keys"),
+	 STRUCT_FLD(field_length,	256),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"index_size"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	{STRUCT_FLD(field_name,		"leaf_pages"),
+	 STRUCT_FLD(field_length,	MY_INT64_NUM_DECIMAL_DIGITS),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_LONGLONG),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	MY_I_S_UNSIGNED),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+static
+int
+i_s_innodb_table_stats_fill(
+/*========================*/
+	THD*		thd,
+	TABLE_LIST*	tables,
+	COND*		cond)
+{
+	TABLE*	i_s_table	= (TABLE *) tables->table;
+	int	status	= 0;
+	dict_table_t*	table;
+
+	DBUG_ENTER("i_s_innodb_table_stats_fill");
+
+	/* deny access to non-superusers */
+	if (check_global_access(thd, PROCESS_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	mutex_enter(&(dict_sys->mutex));
+
+	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+
+	while (table) {
+		if (table->stat_clustered_index_size == 0) {
+			table = UT_LIST_GET_NEXT(table_LRU, table);
+			continue;
+		}
+
+		field_store_string(i_s_table->field[0], table->name);
+		i_s_table->field[1]->store(table->stat_n_rows);
+		i_s_table->field[2]->store(table->stat_clustered_index_size);
+		i_s_table->field[3]->store(table->stat_sum_of_other_index_sizes);
+		i_s_table->field[4]->store(table->stat_modified_counter);
+
+		if (schema_table_store_record(thd, i_s_table)) {
+			status = 1;
+			break;
+		}
+
+		table = UT_LIST_GET_NEXT(table_LRU, table);
+	}
+
+	mutex_exit(&(dict_sys->mutex));
+
+	DBUG_RETURN(status);
+}
+
+static
+int
+i_s_innodb_index_stats_fill(
+/*========================*/
+	THD*		thd,
+	TABLE_LIST*	tables,
+	COND*		cond)
+{
+	TABLE*	i_s_table	= (TABLE *) tables->table;
+	int	status	= 0;
+	dict_table_t*	table;
+	dict_index_t*	index;
+
+	DBUG_ENTER("i_s_innodb_index_stats_fill");
+
+	/* deny access to non-superusers */
+	if (check_global_access(thd, PROCESS_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	mutex_enter(&(dict_sys->mutex));
+
+	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+
+	while (table) {
+		if (table->stat_clustered_index_size == 0) {
+			table = UT_LIST_GET_NEXT(table_LRU, table);
+			continue;
+		}
+
+		ib_int64_t	n_rows = table->stat_n_rows;
+
+		if (n_rows < 0) {
+			n_rows = 0;
+		}
+
+		index = dict_table_get_first_index(table);
+
+		while (index) {
+			char	buff[256+1];
+			char	row_per_keys[256+1];
+			ulint	i;
+
+			field_store_string(i_s_table->field[0], table->name);
+			field_store_string(i_s_table->field[1], index->name);
+			i_s_table->field[2]->store(index->n_uniq);
+
+			row_per_keys[0] = '\0';
+			if (index->stat_n_diff_key_vals) {
+				for (i = 1; i <= index->n_uniq; i++) {
+					ib_int64_t	rec_per_key;
+					if (index->stat_n_diff_key_vals[i]) {
+						rec_per_key = n_rows / index->stat_n_diff_key_vals[i];
+					} else {
+						rec_per_key = n_rows;
+					}
+					snprintf(buff, 256, (i == index->n_uniq)?"%llu":"%llu, ",
+						 rec_per_key);
+					strncat(row_per_keys, buff, 256 - strlen(row_per_keys));
+				}
+			}
+			field_store_string(i_s_table->field[3], row_per_keys);
+
+			i_s_table->field[4]->store(index->stat_index_size);
+			i_s_table->field[5]->store(index->stat_n_leaf_pages);
+
+			if (schema_table_store_record(thd, i_s_table)) {
+				status = 1;
+				break;
+			}
+
+			index = dict_table_get_next_index(index);
+		}
+
+		if (status == 1) {
+			break;
+		}
+
+		table = UT_LIST_GET_NEXT(table_LRU, table);
+	}
+
+	mutex_exit(&(dict_sys->mutex));
+
+	DBUG_RETURN(status);
+}
+
+static
+int
+i_s_innodb_table_stats_init(
+/*========================*/
+	void*   p)
+{
+	DBUG_ENTER("i_s_innodb_table_stats_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_innodb_table_stats_info;
+	schema->fill_table = i_s_innodb_table_stats_fill;
+
+	DBUG_RETURN(0);
+}
+
+static
+int
+i_s_innodb_index_stats_init(
+/*========================*/
+	void*	p)
+{
+	DBUG_ENTER("i_s_innodb_index_stats_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_innodb_index_stats_info;
+	schema->fill_table = i_s_innodb_index_stats_fill;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_table_stats =
+{
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(name, "INNODB_TABLE_STATS"),
+	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(descr, "InnoDB table statistics in memory"),
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+	STRUCT_FLD(init, i_s_innodb_table_stats_init),
+	STRUCT_FLD(deinit, i_s_common_deinit),
+	STRUCT_FLD(version, 0x0100 /* 1.0 */),
+	STRUCT_FLD(status_vars, NULL),
+	STRUCT_FLD(system_vars, NULL),
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_index_stats =
+{
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(name, "INNODB_INDEX_STATS"),
+	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(descr, "InnoDB index statistics in memory"),
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+	STRUCT_FLD(init, i_s_innodb_index_stats_init),
+	STRUCT_FLD(deinit, i_s_common_deinit),
+	STRUCT_FLD(version, 0x0100 /* 1.0 */),
+	STRUCT_FLD(status_vars, NULL),
+	STRUCT_FLD(system_vars, NULL),
+	STRUCT_FLD(__reserved1, NULL)
+};

=== modified file 'storage/xtradb/handler/i_s.h'
--- a/storage/xtradb/handler/i_s.h	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/handler/i_s.h	2009-06-25 01:43:25 +0000
@@ -37,5 +37,7 @@ extern struct st_mysql_plugin	i_s_innodb
 extern struct st_mysql_plugin	i_s_innodb_cmpmem_reset;
 extern struct st_mysql_plugin	i_s_innodb_patches;
 extern struct st_mysql_plugin	i_s_innodb_rseg;
+extern struct st_mysql_plugin	i_s_innodb_table_stats;
+extern struct st_mysql_plugin	i_s_innodb_index_stats;
 
 #endif /* i_s_h */

=== modified file 'storage/xtradb/handler/innodb_patch_info.h'
--- a/storage/xtradb/handler/innodb_patch_info.h	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/handler/innodb_patch_info.h	2009-07-06 05:47:15 +0000
@@ -31,5 +31,12 @@ struct innodb_enhancement {
 {"innodb_expand_undo_slots","expandable maximum number of undo slots","from 1024 (default) to about 4000","http://www.percona.com/docs/wiki/percona-xtradb"},
 {"innodb_extra_rseg","allow to create extra rollback segments","When create new db, the new parameter allows to create more rollback segments","http://www.percona.com/docs/wiki/percona-xtradb"},
 {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
+{"innodb_pause_in_spin","use 'pause' instruction during spin loop for x86 (gcc)","","http://www.percona.com/docs/wiki/percona-xtradb"},
+{"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
+{"innodb_expand_import","convert .ibd file automatically when import tablespace","the files are generated by xtrabackup export mode.","http://www.percona.com/docs/wiki/percona-xtradb"},
+{"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
+{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
+{"innodb_stats","Additional features about InnoDB statistics/optimizer","","http://www.percona.com/docs/wiki/percona-xtradb"},
+{"innodb_recovery_patches","Bugfixes and adjustments about recovery process","","http://www.percona.com/docs/wiki/percona-xtradb"},
 {NULL, NULL, NULL, NULL}
 };

=== modified file 'storage/xtradb/ibuf/ibuf0ibuf.c'
--- a/storage/xtradb/ibuf/ibuf0ibuf.c	2009-06-22 08:06:35 +0000
+++ b/storage/xtradb/ibuf/ibuf0ibuf.c	2009-08-03 20:09:53 +0000
@@ -472,6 +472,7 @@ ibuf_init_at_db_start(void)
 
 	/* Use old-style record format for the insert buffer. */
 	table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
+	table->n_mysql_handles_opened = 1; /* for pin */
 
 	dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
 

=== modified file 'storage/xtradb/include/buf0buddy.h'
--- a/storage/xtradb/include/buf0buddy.h	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/include/buf0buddy.h	2009-06-25 01:43:25 +0000
@@ -49,10 +49,11 @@ buf_buddy_alloc(
 			/* out: allocated block,
 			possibly NULL if lru == NULL */
 	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
-	ibool*	lru)	/* in: pointer to a variable that will be assigned
+	ibool*	lru,	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
+	ibool	have_page_hash_mutex)
 	__attribute__((malloc));
 
 /**************************************************************************
@@ -63,7 +64,8 @@ buf_buddy_free(
 /*===========*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
-	ulint	size)	/* in: block size, up to UNIV_PAGE_SIZE */
+	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
+	ibool	have_page_hash_mutex)
 	__attribute__((nonnull));
 
 /** Statistics of buddy blocks of a given size. */

=== modified file 'storage/xtradb/include/buf0buddy.ic'
--- a/storage/xtradb/include/buf0buddy.ic	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/include/buf0buddy.ic	2009-06-25 01:43:25 +0000
@@ -44,10 +44,11 @@ buf_buddy_alloc_low(
 			possibly NULL if lru==NULL */
 	ulint	i,	/* in: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
-	ibool*	lru)	/* in: pointer to a variable that will be assigned
+	ibool*	lru,	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
+	ibool	have_page_hash_mutex)
 	__attribute__((malloc));
 
 /**************************************************************************
@@ -58,8 +59,9 @@ buf_buddy_free_low(
 /*===============*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
-	ulint	i)	/* in: index of buf_pool->zip_free[],
+	ulint	i,	/* in: index of buf_pool->zip_free[],
 			or BUF_BUDDY_SIZES */
+	ibool	have_page_hash_mutex)
 	__attribute__((nonnull));
 
 /**************************************************************************
@@ -98,14 +100,15 @@ buf_buddy_alloc(
 			/* out: allocated block,
 			possibly NULL if lru == NULL */
 	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
-	ibool*	lru)	/* in: pointer to a variable that will be assigned
+	ibool*	lru,	/* in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
 			and buf_pool_mutex was temporarily released,
 			or NULL if the LRU list should not be used */
+	ibool	have_page_hash_mutex)
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 
-	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
+	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
 }
 
 /**************************************************************************
@@ -116,11 +119,24 @@ buf_buddy_free(
 /*===========*/
 	void*	buf,	/* in: block to be freed, must not be
 			pointed to by the buffer pool */
-	ulint	size)	/* in: block size, up to UNIV_PAGE_SIZE */
+	ulint	size,	/* in: block size, up to UNIV_PAGE_SIZE */
+	ibool	have_page_hash_mutex)
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 
-	buf_buddy_free_low(buf, buf_buddy_get_slot(size));
+	if (!have_page_hash_mutex) {
+		mutex_enter(&LRU_list_mutex);
+		rw_lock_x_lock(&page_hash_latch);
+	}
+
+	mutex_enter(&zip_free_mutex);
+	buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE);
+	mutex_exit(&zip_free_mutex);
+
+	if (!have_page_hash_mutex) {
+		mutex_exit(&LRU_list_mutex);
+		rw_lock_x_unlock(&page_hash_latch);
+	}
 }
 
 #ifdef UNIV_MATERIALIZE

=== modified file 'storage/xtradb/include/buf0buf.h'
--- a/storage/xtradb/include/buf0buf.h	2009-05-04 04:32:30 +0000
+++ b/storage/xtradb/include/buf0buf.h	2009-06-25 01:43:25 +0000
@@ -1024,7 +1024,7 @@ struct buf_page_struct{
 
 	/* 2. Page flushing fields; protected by buf_pool_mutex */
 
-	UT_LIST_NODE_T(buf_page_t) list;
+	/* UT_LIST_NODE_T(buf_page_t) list; */
 					/* based on state, this is a list
 					node in one of the following lists
 					in buf_pool:
@@ -1034,6 +1034,10 @@ struct buf_page_struct{
 					BUF_BLOCK_ZIP_DIRTY:	flush_list
 					BUF_BLOCK_ZIP_PAGE:	zip_clean
 					BUF_BLOCK_ZIP_FREE:	zip_free[] */
+	/* resplit for optimistic use */
+	UT_LIST_NODE_T(buf_page_t) free;
+	UT_LIST_NODE_T(buf_page_t) flush_list;
+	UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
 #ifdef UNIV_DEBUG
 	ibool		in_flush_list;	/* TRUE if in buf_pool->flush_list;
 					when buf_pool_mutex is free, the
@@ -1104,11 +1108,11 @@ struct buf_block_struct{
 					a block is in the unzip_LRU list
 					if page.state == BUF_BLOCK_FILE_PAGE
 					and page.zip.data != NULL */
-#ifdef UNIV_DEBUG
+//#ifdef UNIV_DEBUG
 	ibool		in_unzip_LRU_list;/* TRUE if the page is in the
 					decompressed LRU list;
 					used in debugging */
-#endif /* UNIV_DEBUG */
+//#endif /* UNIV_DEBUG */
 	byte*		frame;		/* pointer to buffer frame which
 					is of size UNIV_PAGE_SIZE, and
 					aligned to an address divisible by
@@ -1316,6 +1320,12 @@ struct buf_pool_struct{
 /* mutex protecting the buffer pool struct and control blocks, except the
 read-write lock in them */
 extern mutex_t	buf_pool_mutex;
+extern mutex_t	LRU_list_mutex;
+extern mutex_t	flush_list_mutex;
+extern rw_lock_t	page_hash_latch;
+extern mutex_t	free_list_mutex;
+extern mutex_t	zip_free_mutex;
+extern mutex_t	zip_hash_mutex;
 /* mutex protecting the control blocks of compressed-only pages
 (of type buf_page_t, not buf_block_t) */
 extern mutex_t	buf_pool_zip_mutex;

=== modified file 'storage/xtradb/include/buf0buf.ic'
--- a/storage/xtradb/include/buf0buf.ic	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/include/buf0buf.ic	2009-06-25 01:43:25 +0000
@@ -100,7 +100,9 @@ buf_pool_get_oldest_modification(void)
 	buf_page_t*	bpage;
 	ib_uint64_t	lsn;
 
-	buf_pool_mutex_enter();
+try_again:
+	//buf_pool_mutex_enter();
+	mutex_enter(&flush_list_mutex);
 
 	bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
 
@@ -109,9 +111,14 @@ buf_pool_get_oldest_modification(void)
 	} else {
 		ut_ad(bpage->in_flush_list);
 		lsn = bpage->oldest_modification;
+		if (lsn == 0) {
+			mutex_exit(&flush_list_mutex);
+			goto try_again;
+		}
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(&flush_list_mutex);
 
 	/* The returned answer may be out of date: the flush_list can
 	change after the mutex has been released. */
@@ -128,7 +135,8 @@ buf_pool_clock_tic(void)
 /*====================*/
 			/* out: new clock value */
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 
 	buf_pool->ulint_clock++;
 
@@ -246,7 +254,7 @@ buf_page_in_file(
 	case BUF_BLOCK_ZIP_FREE:
 		/* This is a free page in buf_pool->zip_free[].
 		Such pages should only be accessed by the buddy allocator. */
-		ut_error;
+		/* ut_error; */ /* optimistic */
 		break;
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
@@ -288,7 +296,7 @@ buf_page_get_LRU_position(
 	const buf_page_t*	bpage)	/* in: control block */
 {
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own()); /* This is used in optimistic */
 
 	return(bpage->LRU_position);
 }
@@ -305,7 +313,7 @@ buf_page_get_mutex(
 {
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_FREE:
-		ut_error;
+		/* ut_error; */ /* optimistic */
 		return(NULL);
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
@@ -410,7 +418,7 @@ buf_page_set_io_fix(
 	buf_page_t*	bpage,	/* in/out: control block */
 	enum buf_io_fix	io_fix)	/* in: io_fix state */
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
 	bpage->io_fix = io_fix;
@@ -438,12 +446,13 @@ buf_page_can_relocate(
 /*==================*/
 	const buf_page_t*	bpage)	/* control block being relocated */
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(bpage->in_LRU_list);
+	/* optimistic */
+	//ut_ad(bpage->in_LRU_list);
 
-	return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
+	return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
 	       && bpage->buf_fix_count == 0);
 }
 
@@ -457,7 +466,7 @@ buf_page_is_old(
 	const buf_page_t*	bpage)	/* in: control block */
 {
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own()); /* This is used in optimistic */
 
 	return(bpage->old);
 }
@@ -472,7 +481,8 @@ buf_page_set_old(
 	ibool		old)	/* in: old */
 {
 	ut_a(buf_page_in_file(bpage));
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+	ut_ad(mutex_own(&LRU_list_mutex));
 	ut_ad(bpage->in_LRU_list);
 
 #ifdef UNIV_LRU_DEBUG
@@ -728,17 +738,17 @@ buf_block_free(
 /*===========*/
 	buf_block_t*	block)	/* in, own: block to be freed */
 {
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
 
 	mutex_enter(&block->mutex);
 
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
-	buf_LRU_block_free_non_file_page(block);
+	buf_LRU_block_free_non_file_page(block, FALSE);
 
 	mutex_exit(&block->mutex);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
 }
 
 /*************************************************************************
@@ -783,14 +793,23 @@ buf_page_io_query(
 	buf_page_t*	bpage)	/* in: buf_pool block, must be bufferfixed */
 {
 	ibool	io_fixed;
+	mutex_t* block_mutex = buf_page_get_mutex(bpage);
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+retry_lock:
+	mutex_enter(block_mutex);
+	if (block_mutex != buf_page_get_mutex(bpage)) {
+		mutex_exit(block_mutex);
+		block_mutex = buf_page_get_mutex(bpage);
+		goto retry_lock;
+	}
 
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->buf_fix_count > 0);
 
 	io_fixed = buf_page_get_io_fix(bpage) != BUF_IO_NONE;
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	mutex_exit(block_mutex);
 
 	return(io_fixed);
 }
@@ -809,7 +828,13 @@ buf_page_get_newest_modification(
 	ib_uint64_t	lsn;
 	mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 
+retry_lock:
 	mutex_enter(block_mutex);
+	if (block_mutex != buf_page_get_mutex(bpage)) {
+		mutex_exit(block_mutex);
+		block_mutex = buf_page_get_mutex(bpage);
+		goto retry_lock;
+	}
 
 	if (buf_page_in_file(bpage)) {
 		lsn = bpage->newest_modification;
@@ -833,7 +858,7 @@ buf_block_modify_clock_inc(
 	buf_block_t*	block)	/* in: block */
 {
 #ifdef UNIV_SYNC_DEBUG
-	ut_ad((buf_pool_mutex_own()
+	ut_ad((mutex_own(&LRU_list_mutex)
 	       && (block->page.buf_fix_count == 0))
 	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
 #endif /* UNIV_SYNC_DEBUG */
@@ -917,7 +942,11 @@ buf_page_hash_get(
 	ulint		fold;
 
 	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
+	      || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
+#endif
 
 	/* Look for the page in the hash table */
 
@@ -966,11 +995,13 @@ buf_page_peek(
 {
 	const buf_page_t*	bpage;
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
+	rw_lock_s_lock(&page_hash_latch);
 
 	bpage = buf_page_hash_get(space, offset);
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
 
 	return(bpage != NULL);
 }
@@ -1032,11 +1063,14 @@ buf_page_release(
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_a(block->page.buf_fix_count > 0);
 
+	/* buf_flush_note_modification() should be called before this function. */
+/*
 	if (rw_latch == RW_X_LATCH && mtr->modifications) {
 		buf_pool_mutex_enter();
 		buf_flush_note_modification(block, mtr);
 		buf_pool_mutex_exit();
 	}
+*/
 
 	mutex_enter(&block->mutex);
 

=== modified file 'storage/xtradb/include/buf0flu.ic'
--- a/storage/xtradb/include/buf0flu.ic	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/include/buf0flu.ic	2009-06-25 01:43:25 +0000
@@ -53,13 +53,23 @@ buf_flush_note_modification(
 	buf_block_t*	block,	/* in: block which is modified */
 	mtr_t*		mtr)	/* in: mtr */
 {
+	ibool	use_LRU_mutex = FALSE;
+
+	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
+		use_LRU_mutex = TRUE;
+
+	if (use_LRU_mutex)
+		mutex_enter(&LRU_list_mutex);
+
+	mutex_enter(&block->mutex);
+
 	ut_ad(block);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 
 	ut_ad(mtr->start_lsn != 0);
 	ut_ad(mtr->modifications);
@@ -68,16 +78,23 @@ buf_flush_note_modification(
 	block->page.newest_modification = mtr->end_lsn;
 
 	if (!block->page.oldest_modification) {
+		mutex_enter(&flush_list_mutex);
 
 		block->page.oldest_modification = mtr->start_lsn;
 		ut_ad(block->page.oldest_modification != 0);
 
 		buf_flush_insert_into_flush_list(block);
+		mutex_exit(&flush_list_mutex);
 	} else {
 		ut_ad(block->page.oldest_modification <= mtr->start_lsn);
 	}
 
+	mutex_exit(&block->mutex);
+
 	++srv_buf_pool_write_requests;
+
+	if (use_LRU_mutex)
+		mutex_exit(&LRU_list_mutex);
 }
 
 /************************************************************************
@@ -92,6 +109,16 @@ buf_flush_recv_note_modification(
 	ib_uint64_t	end_lsn)	/* in: end lsn of the last mtr in the
 					set of mtr's */
 {
+	ibool	use_LRU_mutex = FALSE;
+
+	if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
+		use_LRU_mutex = TRUE;
+
+	if (use_LRU_mutex)
+		mutex_enter(&LRU_list_mutex);
+
+	mutex_enter(&(block->mutex));
+
 	ut_ad(block);
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.buf_fix_count > 0);
@@ -99,22 +126,27 @@ buf_flush_recv_note_modification(
 	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
 
-	buf_pool_mutex_enter();
+	//buf_pool_mutex_enter();
 
 	ut_ad(block->page.newest_modification <= end_lsn);
 
 	block->page.newest_modification = end_lsn;
 
 	if (!block->page.oldest_modification) {
+		mutex_enter(&flush_list_mutex);
 
 		block->page.oldest_modification = start_lsn;
 
 		ut_ad(block->page.oldest_modification != 0);
 
 		buf_flush_insert_sorted_into_flush_list(block);
+		mutex_exit(&flush_list_mutex);
 	} else {
 		ut_ad(block->page.oldest_modification <= start_lsn);
 	}
 
-	buf_pool_mutex_exit();
+	//buf_pool_mutex_exit();
+	if (use_LRU_mutex)
+		mutex_exit(&LRU_list_mutex);
+	mutex_exit(&(block->mutex));
 }

=== modified file 'storage/xtradb/include/buf0lru.h'
--- a/storage/xtradb/include/buf0lru.h	2009-05-04 02:45:47 +0000
+++ b/storage/xtradb/include/buf0lru.h	2009-06-25 01:43:25 +0000
@@ -122,10 +122,11 @@ buf_LRU_free_block(
 	buf_page_t*	bpage,	/* in: block to be freed */
 	ibool		zip,	/* in: TRUE if should remove also the
 				compressed page of an uncompressed page */
-	ibool*		buf_pool_mutex_released);
+	ibool*		buf_pool_mutex_released,
 				/* in: pointer to a variable that will
 				be assigned TRUE if buf_pool_mutex
 				was temporarily released, or NULL */
+	ibool		have_LRU_mutex);
 /**********************************************************************
 Try to free a replaceable block. */
 UNIV_INTERN
@@ -169,7 +170,8 @@ UNIV_INTERN
 void
 buf_LRU_block_free_non_file_page(
 /*=============================*/
-	buf_block_t*	block);	/* in: block, must not contain a file page */
+	buf_block_t*	block,	/* in: block, must not contain a file page */
+	ibool		have_page_hash_mutex);
 /**********************************************************************
 Adds a block to the LRU list. */
 UNIV_INTERN

=== modified file 'storage/xtradb/include/dict0dict.h'
--- a/storage/xtradb/include/dict0dict.h	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/include/dict0dict.h	2009-06-25 01:43:25 +0000
@@ -1102,6 +1102,12 @@ dict_table_get_index_on_name_and_min_id(
 				/* out: index, NULL if does not exist */
 	dict_table_t*	table,	/* in: table */
 	const char*	name);	/* in: name of the index to find */
+
+UNIV_INTERN
+void
+dict_table_LRU_trim(
+/*================*/
+	dict_table_t*	self);
 /* Buffers for storing detailed information about the latest foreign key
 and unique key errors */
 extern FILE*	dict_foreign_err_file;

=== modified file 'storage/xtradb/include/dict0dict.ic'
--- a/storage/xtradb/include/dict0dict.ic	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/include/dict0dict.ic	2009-06-25 01:43:25 +0000
@@ -723,6 +723,13 @@ dict_table_check_if_in_cache_low(
 	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
 		    dict_table_t*, table, ut_ad(table->cached),
 		    !strcmp(table->name, table_name));
+
+	/* make young in table_LRU */
+	if (table) {
+		UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+		UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+	}
+
 	return(table);
 }
 
@@ -776,6 +783,12 @@ dict_table_get_on_id_low(
 		table = dict_load_table_on_id(table_id);
 	}
 
+	/* make young in table_LRU */
+	if (table) {
+		UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
+		UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
+	}
+
 	ut_ad(!table || table->cached);
 
 	/* TODO: should get the type information from MySQL */

=== modified file 'storage/xtradb/include/log0log.h'
--- a/storage/xtradb/include/log0log.h	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/include/log0log.h	2009-06-25 01:43:25 +0000
@@ -186,6 +186,13 @@ void
 log_buffer_flush_to_disk(void);
 /*==========================*/
 /********************************************************************
+Flushes the log buffer. Forces it to disk depending on the value of
+the configuration parameter innodb_flush_log_at_trx_commit. */
+UNIV_INTERN
+void
+log_buffer_flush_maybe_sync(void);
+/*=============================*/
+/********************************************************************
 Advances the smallest lsn for which there are unflushed dirty blocks in the
 buffer pool and also may make a new checkpoint. NOTE: this function may only
 be called if the calling thread owns no synchronization objects! */

=== modified file 'storage/xtradb/include/rem0cmp.h'
--- a/storage/xtradb/include/rem0cmp.h	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/include/rem0cmp.h	2009-06-25 01:43:25 +0000
@@ -177,10 +177,11 @@ cmp_rec_rec_with_match(
 				matched fields; when the function returns,
 				contains the value the for current
 				comparison */
-	ulint*		matched_bytes);/* in/out: number of already matched
+	ulint*		matched_bytes, /* in/out: number of already matched
 				bytes within the first field not completely
 				matched; when the function returns, contains
 				the value for the current comparison */
+	ulint		stats_method);
 /*****************************************************************
 This function is used to compare two physical records. Only the common
 first fields are compared. */

=== modified file 'storage/xtradb/include/rem0cmp.ic'
--- a/storage/xtradb/include/rem0cmp.ic	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/include/rem0cmp.ic	2009-06-25 01:43:25 +0000
@@ -88,5 +88,5 @@ cmp_rec_rec(
 	ulint	match_b		= 0;
 
 	return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
-				      &match_f, &match_b));
+				      &match_f, &match_b, 0));
 }

=== modified file 'storage/xtradb/include/srv0srv.h'
--- a/storage/xtradb/include/srv0srv.h	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/include/srv0srv.h	2009-06-25 01:43:25 +0000
@@ -127,6 +127,8 @@ extern ulint	srv_buf_pool_curr_size;	/* 
 extern ulint	srv_mem_pool_size;
 extern ulint	srv_lock_table_size;
 
+extern ibool	srv_thread_concurrency_timer_based;
+
 extern ulint	srv_n_file_io_threads;
 extern ulint	srv_n_read_io_threads;
 extern ulint	srv_n_write_io_threads;
@@ -163,6 +165,11 @@ extern ulint	srv_fast_shutdown;	 /* If t
 extern ibool	srv_innodb_status;
 
 extern unsigned long long	srv_stats_sample_pages;
+extern ulint	srv_stats_method;
+#define SRV_STATS_METHOD_NULLS_EQUAL     0
+#define SRV_STATS_METHOD_NULLS_NOT_EQUAL 1
+#define SRV_STATS_METHOD_IGNORE_NULLS    2
+extern ulint	srv_stats_auto_update;
 
 extern ibool	srv_use_doublewrite_buf;
 extern ibool	srv_use_checksums;
@@ -184,8 +191,10 @@ extern ulint	srv_enable_unsafe_group_com
 extern ulint	srv_read_ahead;
 extern ulint	srv_adaptive_checkpoint;
 
-extern ulint	srv_extra_rsegments;
+extern ulint	srv_expand_import;
 
+extern ulint	srv_extra_rsegments;
+extern ulint	srv_dict_size_limit;
 /*-------------------------------------------*/
 
 extern ulint	srv_n_rows_inserted;
@@ -552,6 +561,7 @@ struct export_var_struct{
 	ulint innodb_data_writes;
 	ulint innodb_data_written;
 	ulint innodb_data_reads;
+	ulint innodb_dict_tables;
 	ulint innodb_buffer_pool_pages_total;
 	ulint innodb_buffer_pool_pages_data;
 	ulint innodb_buffer_pool_pages_dirty;

=== modified file 'storage/xtradb/include/sync0sync.h'
--- a/storage/xtradb/include/sync0sync.h	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/include/sync0sync.h	2009-06-25 01:43:25 +0000
@@ -464,8 +464,14 @@ or row lock! */
 					SYNC_SEARCH_SYS, as memory allocation
 					can call routines there! Otherwise
 					the level is SYNC_MEM_HASH. */
+#define SYNC_BUF_LRU_LIST	157
+#define SYNC_BUF_PAGE_HASH	156
+#define	SYNC_BUF_BLOCK		155
+#define SYNC_BUF_FREE_LIST	153
+#define SYNC_BUF_ZIP_FREE	152
+#define SYNC_BUF_ZIP_HASH	151
 #define	SYNC_BUF_POOL		150
-#define	SYNC_BUF_BLOCK		149
+#define SYNC_BUF_FLUSH_LIST	149
 #define SYNC_DOUBLEWRITE	140
 #define	SYNC_ANY_LATCH		135
 #define SYNC_THR_LOCAL		133

=== modified file 'storage/xtradb/include/univ.i'
--- a/storage/xtradb/include/univ.i	2009-06-18 12:39:21 +0000
+++ b/storage/xtradb/include/univ.i	2009-08-03 20:09:53 +0000
@@ -35,7 +35,7 @@ Created 1/20/1994 Heikki Tuuri
 #define INNODB_VERSION_MAJOR	1
 #define INNODB_VERSION_MINOR	0
 #define INNODB_VERSION_BUGFIX	3
-#define PERCONA_INNODB_VERSION	5a
+#define PERCONA_INNODB_VERSION	6a
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;

=== modified file 'storage/xtradb/include/ut0auxconf.h'
--- a/storage/xtradb/include/ut0auxconf.h	2009-04-27 04:54:14 +0000
+++ b/storage/xtradb/include/ut0auxconf.h	2009-06-25 01:43:25 +0000
@@ -12,3 +12,8 @@ If by any chance Makefile.in and ./confi
 the hack from Makefile.in wiped away then the "real" check from plug.in
 will take over.
 */
+/* This is temprary fix for http://bugs.mysql.com/43740 */
+/* force to enable */
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+#define HAVE_ATOMIC_PTHREAD_T
+#endif

=== modified file 'storage/xtradb/log/log0log.c'
--- a/storage/xtradb/log/log0log.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/log/log0log.c	2009-06-25 01:43:25 +0000
@@ -1526,6 +1526,26 @@ log_buffer_flush_to_disk(void)
 }
 
 /********************************************************************
+Flush the log buffer. Force it to disk depending on the value of
+innodb_flush_log_at_trx_commit. */
+UNIV_INTERN
+void
+log_buffer_flush_maybe_sync(void)
+/*=============================*/
+{
+	ib_uint64_t	lsn;
+
+	mutex_enter(&(log_sys->mutex));
+
+	lsn = log_sys->lsn;
+
+	mutex_exit(&(log_sys->mutex));
+
+	/* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
+	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
+			srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE);
+}
+/********************************************************************
 Tries to establish a big enough margin of free space in the log buffer, such
 that a new log entry can be catenated without an immediate need for a flush. */
 static

=== modified file 'storage/xtradb/log/log0recv.c'
--- a/storage/xtradb/log/log0recv.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/log/log0recv.c	2009-07-06 05:47:15 +0000
@@ -110,7 +110,7 @@ the log and store the scanned log record
 use these free frames to read in pages when we start applying the
 log records to the database. */
 
-UNIV_INTERN ulint	recv_n_pool_free_frames		= 256;
+UNIV_INTERN ulint	recv_n_pool_free_frames		= 1024;
 
 /* The maximum lsn we see for a page during the recovery process. If this
 is bigger than the lsn we are able to scan up to, that is an indication that
@@ -1225,6 +1225,8 @@ recv_recover_page(
 					     buf_block_get_page_no(block));
 
 	if ((recv_addr == NULL)
+		/* bugfix: http://bugs.mysql.com/bug.php?id=44140 */
+	    || (recv_addr->state == RECV_BEING_READ && !just_read_in)
 	    || (recv_addr->state == RECV_BEING_PROCESSED)
 	    || (recv_addr->state == RECV_PROCESSED)) {
 

=== modified file 'storage/xtradb/mtr/mtr0mtr.c'
--- a/storage/xtradb/mtr/mtr0mtr.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/mtr/mtr0mtr.c	2009-06-25 01:43:25 +0000
@@ -102,6 +102,38 @@ mtr_memo_pop_all(
 	}
 }
 
+UNIV_INLINE
+void
+mtr_memo_note_modification_all(
+/*===========================*/
+	mtr_t*	mtr)	/* in: mtr */
+{
+	mtr_memo_slot_t* slot;
+	dyn_array_t*	memo;
+	ulint		offset;
+
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
+					     commit */
+	ut_ad(mtr->modifications);
+
+	memo = &(mtr->memo);
+
+	offset = dyn_array_get_data_size(memo);
+
+	while (offset > 0) {
+		offset -= sizeof(mtr_memo_slot_t);
+		slot = dyn_array_get_element(memo, offset);
+
+		if (UNIV_LIKELY(slot->object != NULL) &&
+		    slot->type == MTR_MEMO_PAGE_X_FIX) {
+			buf_flush_note_modification(
+				(buf_block_t*)slot->object, mtr);
+		}
+	}
+}
+
 /****************************************************************
 Writes the contents of a mini-transaction log, if any, to the database log. */
 static
@@ -180,6 +212,8 @@ mtr_commit(
 
 	if (write_log) {
 		mtr_log_reserve_and_write(mtr);
+
+		mtr_memo_note_modification_all(mtr);
 	}
 
 	/* We first update the modification info to buffer pages, and only
@@ -190,12 +224,13 @@ mtr_commit(
 	required when we insert modified buffer pages in to the flush list
 	which must be sorted on oldest_modification. */
 
-	mtr_memo_pop_all(mtr);
-
 	if (write_log) {
 		log_release();
 	}
 
+	/* All unlocking has been moved here, after log_sys mutex release. */
+	mtr_memo_pop_all(mtr);
+
 	ut_d(mtr->state = MTR_COMMITTED);
 	dyn_array_free(&(mtr->memo));
 	dyn_array_free(&(mtr->log));
@@ -263,6 +298,12 @@ mtr_memo_release(
 		slot = dyn_array_get_element(memo, offset);
 
 		if ((object == slot->object) && (type == slot->type)) {
+			if (mtr->modifications &&
+			    UNIV_LIKELY(slot->object != NULL) &&
+			    slot->type == MTR_MEMO_PAGE_X_FIX) {
+				buf_flush_note_modification(
+					(buf_block_t*)slot->object, mtr);
+			}
 
 			mtr_memo_slot_release(mtr, slot);
 

=== modified file 'storage/xtradb/os/os0file.c'
--- a/storage/xtradb/os/os0file.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/os/os0file.c	2009-06-25 01:43:25 +0000
@@ -73,6 +73,28 @@ UNIV_INTERN ibool	os_aio_use_native_aio	
 
 UNIV_INTERN ibool	os_aio_print_debug	= FALSE;
 
+/* State for the state of an IO request in simulated AIO.
+   Protocol for simulated aio:
+     client requests IO: find slot with reserved = FALSE. Add entry with
+                         status = OS_AIO_NOT_ISSUED.
+     IO thread wakes: find adjacent slots with reserved = TRUE and status =
+                      OS_AIO_NOT_ISSUED. Change status for slots to
+                      OS_AIO_ISSUED.
+     IO operation completes: set status for slots to OS_AIO_DONE. set status
+                             for the first slot to OS_AIO_CLAIMED and return
+                             result for that slot.
+   When there are multiple read and write threads, they all compete to execute
+   the requests in the array (os_aio_array_t). This avoids the need to load
+   balance requests at the time the request is made at the cost of waking all
+   threads when a request is available.
+*/
+typedef enum {
+	OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */
+	OS_AIO_ISSUED,     /* Being processed by an IO thread. */
+	OS_AIO_DONE,       /* Request processed. */
+	OS_AIO_CLAIMED     /* Result being returned to client. */
+} os_aio_status;
+
 /* The aio array slot structure */
 typedef struct os_aio_slot_struct	os_aio_slot_t;
 
@@ -81,6 +103,8 @@ struct os_aio_slot_struct{
 	ulint		pos;		/* index of the slot in the aio
 					array */
 	ibool		reserved;	/* TRUE if this slot is reserved */
+	os_aio_status   status;		/* Status for current request. Valid when reserved
+					is TRUE. Used only in simulated aio. */
 	time_t		reservation_time;/* time when reserved */
 	ulint		len;		/* length of the block to read or
 					write */
@@ -91,11 +115,11 @@ struct os_aio_slot_struct{
 	ulint		offset_high;	/* 32 high bits of file offset */
 	os_file_t	file;		/* file where to read or write */
 	const char*	name;		/* file name or path */
-	ibool		io_already_done;/* used only in simulated aio:
-					TRUE if the physical i/o already
-					made and only the slot message
-					needs to be passed to the caller
-					of os_aio_simulated_handle */
+//	ibool		io_already_done;/* used only in simulated aio:
+//					TRUE if the physical i/o already
+//					made and only the slot message
+//					needs to be passed to the caller
+//					of os_aio_simulated_handle */
 	fil_node_t*	message1;	/* message which is given by the */
 	void*		message2;	/* the requester of an aio operation
 					and which can be used to identify
@@ -141,6 +165,13 @@ struct os_aio_array_struct{
 /* Array of events used in simulated aio */
 static os_event_t*	os_aio_segment_wait_events	= NULL;
 
+/* Number for the first global segment for reading. */
+const ulint os_aio_first_read_segment = 2;
+
+/* Number for the first global segment for writing. Set to
+2 + os_aio_read_write_threads. */
+ulint os_aio_first_write_segment = 0;
+
 /* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
 are NULL when the module has not yet been initialized. */
 static os_aio_array_t*	os_aio_read_array	= NULL;
@@ -149,11 +180,17 @@ static os_aio_array_t*	os_aio_ibuf_array
 static os_aio_array_t*	os_aio_log_array	= NULL;
 static os_aio_array_t*	os_aio_sync_array	= NULL;
 
+/* Per thread buffer used for merged IO requests. Used by
+os_aio_simulated_handle so that a buffer doesn't have to be allocated
+for each request. */
+static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS];
+static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS];
+
 static ulint	os_aio_n_segments	= ULINT_UNDEFINED;
 
 /* If the following is TRUE, read i/o handler threads try to
 wait until a batch of new read requests have been posted */
-static ibool	os_aio_recommend_sleep_for_read_threads	= FALSE;
+static volatile ibool	os_aio_recommend_sleep_for_read_threads	= FALSE;
 
 UNIV_INTERN ulint	os_n_file_reads		= 0;
 UNIV_INTERN ulint	os_bytes_read_since_printout = 0;
@@ -2956,6 +2993,8 @@ os_aio_init(
 
 	for (i = 0; i < n_segments; i++) {
 		srv_set_io_thread_op_info(i, "not started yet");
+		os_aio_thread_buffer[i] = 0;
+		os_aio_thread_buffer_size[i] = 0;
 	}
 
 	n_per_seg = n / n_segments;
@@ -2964,6 +3003,7 @@ os_aio_init(
 
 	/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
 
+	os_aio_first_write_segment = os_aio_first_read_segment + n_read_threads;
 	os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
 
 	srv_io_thread_function[0] = "insert buffer thread";
@@ -2972,14 +3012,14 @@ os_aio_init(
 
 	srv_io_thread_function[1] = "log thread";
 
-	os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
+	os_aio_read_array = os_aio_array_create(n_per_seg,
 						n_read_segs);
 	for (i = 2; i < 2 + n_read_segs; i++) {
 		ut_a(i < SRV_MAX_N_IO_THREADS);
 		srv_io_thread_function[i] = "read thread";
 	}
 
-	os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
+	os_aio_write_array = os_aio_array_create(n_per_seg,
 						 n_write_segs);
 	for (i = 2 + n_read_segs; i < n_segments; i++) {
 		ut_a(i < SRV_MAX_N_IO_THREADS);
@@ -3225,7 +3265,8 @@ loop:
 	slot->buf      = buf;
 	slot->offset   = offset;
 	slot->offset_high = offset_high;
-	slot->io_already_done = FALSE;
+//	slot->io_already_done = FALSE;
+	slot->status = OS_AIO_NOT_ISSUED;
 
 #ifdef WIN_ASYNC_IO
 	control = &(slot->control);
@@ -3256,6 +3297,7 @@ os_aio_array_free_slot(
 	ut_ad(slot->reserved);
 
 	slot->reserved = FALSE;
+	slot->status = OS_AIO_NOT_ISSUED;
 
 	array->n_reserved--;
 
@@ -3292,16 +3334,18 @@ os_aio_simulated_wake_handler_thread(
 
 	segment = os_aio_get_array_and_local_segment(&array, global_segment);
 
-	n = array->n_slots / array->n_segments;
+	n = array->n_slots;
 
 	/* Look through n slots after the segment * n'th slot */
 
 	os_mutex_enter(array->mutex);
 
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+		slot = os_aio_array_get_nth_slot(array, i);
 
-		if (slot->reserved) {
+		if (slot->reserved &&
+		    (slot->status == OS_AIO_NOT_ISSUED ||
+		     slot->status == OS_AIO_DONE)) {
 			/* Found an i/o request */
 
 			break;
@@ -3311,7 +3355,25 @@ os_aio_simulated_wake_handler_thread(
 	os_mutex_exit(array->mutex);
 
 	if (i < n) {
-		os_event_set(os_aio_segment_wait_events[global_segment]);
+		if (array == os_aio_ibuf_array) {
+			os_event_set(os_aio_segment_wait_events[0]);
+
+		} else if (array == os_aio_log_array) {
+			os_event_set(os_aio_segment_wait_events[1]);
+
+		} else if (array == os_aio_read_array) {
+			ulint	x;
+			for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++)
+				os_event_set(os_aio_segment_wait_events[x]);
+
+		} else if (array == os_aio_write_array) {
+			ulint	x;
+			for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++)
+				os_event_set(os_aio_segment_wait_events[x]);
+
+		} else {
+			ut_a(0);
+		}
 	}
 }
 
@@ -3322,8 +3384,6 @@ void
 os_aio_simulated_wake_handler_threads(void)
 /*=======================================*/
 {
-	ulint	i;
-
 	if (os_aio_use_native_aio) {
 		/* We do not use simulated aio: do nothing */
 
@@ -3332,9 +3392,10 @@ os_aio_simulated_wake_handler_threads(vo
 
 	os_aio_recommend_sleep_for_read_threads	= FALSE;
 
-	for (i = 0; i < os_aio_n_segments; i++) {
-		os_aio_simulated_wake_handler_thread(i);
-	}
+	os_aio_simulated_wake_handler_thread(0);
+	os_aio_simulated_wake_handler_thread(1);
+	os_aio_simulated_wake_handler_thread(os_aio_first_read_segment);
+	os_aio_simulated_wake_handler_thread(os_aio_first_write_segment);
 }
 
 /**************************************************************************
@@ -3606,7 +3667,7 @@ os_aio_windows_handle(
 	ut_ad(os_aio_validate());
 	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots / array->n_segments;
+	n = array->n_slots;
 
 	if (array == os_aio_sync_array) {
 		os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
@@ -3615,12 +3676,12 @@ os_aio_windows_handle(
 		srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
 		i = os_event_wait_multiple(n,
 					   (array->native_events)
-					   + segment * n);
+					   );
 	}
 
 	os_mutex_enter(array->mutex);
 
-	slot = os_aio_array_get_nth_slot(array, i + segment * n);
+	slot = os_aio_array_get_nth_slot(array, i);
 
 	ut_a(slot->reserved);
 
@@ -3685,10 +3746,13 @@ os_aio_simulated_handle(
 	os_aio_slot_t*	slot;
 	os_aio_slot_t*	slot2;
 	os_aio_slot_t*	consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
+	os_aio_slot_t*  lowest_request;
+	os_aio_slot_t*	oldest_request;
 	ulint		n_consecutive;
 	ulint		total_len;
 	ulint		offs;
 	ulint		lowest_offset;
+	ulint		oldest_offset;
 	ulint		biggest_age;
 	ulint		age;
 	byte*		combined_buf;
@@ -3696,6 +3760,7 @@ os_aio_simulated_handle(
 	ibool		ret;
 	ulint		n;
 	ulint		i;
+ 	time_t          now;
 
 	segment = os_aio_get_array_and_local_segment(&array, global_segment);
 
@@ -3708,7 +3773,7 @@ restart:
 	ut_ad(os_aio_validate());
 	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots / array->n_segments;
+	n = array->n_slots;
 
 	/* Look through n slots after the segment * n'th slot */
 
@@ -3730,9 +3795,9 @@ restart:
 	done */
 
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+		slot = os_aio_array_get_nth_slot(array, i);
 
-		if (slot->reserved && slot->io_already_done) {
+		if (slot->reserved && slot->status == OS_AIO_DONE) {
 
 			if (os_aio_print_debug) {
 				fprintf(stderr,
@@ -3754,67 +3819,57 @@ restart:
 	then pick the one at the lowest offset. */
 
 	biggest_age = 0;
-	lowest_offset = ULINT_MAX;
+	now = time(NULL);
+	oldest_request = lowest_request = NULL;
+	oldest_offset = lowest_offset = ULINT_MAX;
 
+	/* Find the oldest request and the request with the smallest offset */
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+		slot = os_aio_array_get_nth_slot(array, i);
 
-		if (slot->reserved) {
-			age = (ulint)difftime(time(NULL),
-					      slot->reservation_time);
+		if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) {
+			age = (ulint)difftime(now, slot->reservation_time);
 
 			if ((age >= 2 && age > biggest_age)
 			    || (age >= 2 && age == biggest_age
-				&& slot->offset < lowest_offset)) {
+				&& slot->offset < oldest_offset)) {
 
 				/* Found an i/o request */
-				consecutive_ios[0] = slot;
-
-				n_consecutive = 1;
-
 				biggest_age = age;
-				lowest_offset = slot->offset;
+				oldest_request = slot;
+				oldest_offset = slot->offset;
 			}
-		}
-	}
-
-	if (n_consecutive == 0) {
-		/* There were no old requests. Look for an i/o request at the
-		lowest offset in the array (we ignore the high 32 bits of the
-		offset in these heuristics) */
-
-		lowest_offset = ULINT_MAX;
-
-		for (i = 0; i < n; i++) {
-			slot = os_aio_array_get_nth_slot(array,
-							 i + segment * n);
-
-			if (slot->reserved && slot->offset < lowest_offset) {
 
+			/* Look for an i/o request at the lowest offset in the array
+			 * (we ignore the high 32 bits of the offset) */
+			if (slot->offset < lowest_offset) {
 				/* Found an i/o request */
-				consecutive_ios[0] = slot;
-
-				n_consecutive = 1;
-
+				lowest_request = slot;
 				lowest_offset = slot->offset;
 			}
 		}
 	}
 
-	if (n_consecutive == 0) {
+	if (!lowest_request && !oldest_request) {
 
 		/* No i/o requested at the moment */
 
 		goto wait_for_io;
 	}
 
-	slot = consecutive_ios[0];
+	if (oldest_request) {
+		slot = oldest_request;
+	} else {
+		slot = lowest_request;
+	}
+	consecutive_ios[0] = slot;
+	n_consecutive = 1;
 
 	/* Check if there are several consecutive blocks to read or write */
 
 consecutive_loop:
 	for (i = 0; i < n; i++) {
-		slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+		slot2 = os_aio_array_get_nth_slot(array, i);
 
 		if (slot2->reserved && slot2 != slot
 		    && slot2->offset == slot->offset + slot->len
@@ -3822,7 +3877,8 @@ consecutive_loop:
 		    && slot->offset + slot->len > slot->offset
 		    && slot2->offset_high == slot->offset_high
 		    && slot2->type == slot->type
-		    && slot2->file == slot->file) {
+		    && slot2->file == slot->file
+		    && slot2->status == OS_AIO_NOT_ISSUED) {
 
 			/* Found a consecutive i/o request */
 
@@ -3851,6 +3907,8 @@ consecutive_loop:
 
 	for (i = 0; i < n_consecutive; i++) {
 		total_len += consecutive_ios[i]->len;
+		ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED);
+		consecutive_ios[i]->status = OS_AIO_ISSUED;
 	}
 
 	if (n_consecutive == 1) {
@@ -3858,7 +3916,14 @@ consecutive_loop:
 		combined_buf = slot->buf;
 		combined_buf2 = NULL;
 	} else {
-		combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
+		if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) {
+			if (os_aio_thread_buffer[global_segment])
+				ut_free(os_aio_thread_buffer[global_segment]);
+
+			os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE);
+			os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE;
+		}
+		combined_buf2 = os_aio_thread_buffer[global_segment];
 
 		ut_a(combined_buf2);
 
@@ -3869,6 +3934,9 @@ consecutive_loop:
 	this assumes that there is just one i/o-handler thread serving
 	a single segment of slots! */
 
+	ut_a(slot->reserved);
+	ut_a(slot->status == OS_AIO_ISSUED);
+
 	os_mutex_exit(array->mutex);
 
 	if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
@@ -3924,16 +3992,13 @@ consecutive_loop:
 		}
 	}
 
-	if (combined_buf2) {
-		ut_free(combined_buf2);
-	}
-
 	os_mutex_enter(array->mutex);
 
 	/* Mark the i/os done in slots */
 
 	for (i = 0; i < n_consecutive; i++) {
-		consecutive_ios[i]->io_already_done = TRUE;
+		ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED);
+		consecutive_ios[i]->status = OS_AIO_DONE;
 	}
 
 	/* We return the messages for the first slot now, and if there were
@@ -3943,6 +4008,8 @@ consecutive_loop:
 slot_io_done:
 
 	ut_a(slot->reserved);
+	ut_a(slot->status == OS_AIO_DONE);
+	slot->status = OS_AIO_CLAIMED;
 
 	*message1 = slot->message1;
 	*message2 = slot->message2;

=== modified file 'storage/xtradb/rem/rem0cmp.c'
--- a/storage/xtradb/rem/rem0cmp.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/rem/rem0cmp.c	2009-06-25 01:43:25 +0000
@@ -892,10 +892,11 @@ cmp_rec_rec_with_match(
 				matched fields; when the function returns,
 				contains the value the for current
 				comparison */
-	ulint*		matched_bytes) /* in/out: number of already matched
+	ulint*		matched_bytes, /* in/out: number of already matched
 				bytes within the first field not completely
 				matched; when the function returns, contains
 				the value for the current comparison */
+	ulint		stats_method)
 {
 #ifndef UNIV_HOTBACKUP
 	ulint		rec1_n_fields;	/* the number of fields in rec */
@@ -989,7 +990,11 @@ cmp_rec_rec_with_match(
 
 				if (rec1_f_len == rec2_f_len) {
 
-					goto next_field;
+					if (stats_method == SRV_STATS_METHOD_NULLS_EQUAL) {
+						goto next_field;
+					} else {
+						ret = -1;
+					}
 
 				} else if (rec2_f_len == UNIV_SQL_NULL) {
 

=== modified file 'storage/xtradb/row/row0mysql.c'
--- a/storage/xtradb/row/row0mysql.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/row/row0mysql.c	2009-06-25 01:43:25 +0000
@@ -854,6 +854,9 @@ row_update_statistics_if_needed(
 
 	table->stat_modified_counter = counter + 1;
 
+	if (!srv_stats_auto_update)
+		return;
+
 	/* Calculate new statistics if 1 / 16 of table has been modified
 	since the last time a statistics batch was run, or if
 	stat_modified_counter > 2 000 000 000 (to avoid wrap-around).

=== modified file 'storage/xtradb/scripts/install_innodb_plugins.sql'
--- a/storage/xtradb/scripts/install_innodb_plugins.sql	2009-01-29 16:54:13 +0000
+++ b/storage/xtradb/scripts/install_innodb_plugins.sql	2009-06-25 01:43:25 +0000
@@ -12,3 +12,5 @@ INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES 
 INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_BLOB SONAME 'ha_innodb.so';
 INSTALL PLUGIN INNODB_BUFFER_POOL_PAGES_INDEX SONAME 'ha_innodb.so';
 INSTALL PLUGIN innodb_rseg SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_table_stats SONAME 'ha_innodb.so';
+INSTALL PLUGIN innodb_index_stats SONAME 'ha_innodb.so';

=== modified file 'storage/xtradb/srv/srv0srv.c'
--- a/storage/xtradb/srv/srv0srv.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/srv/srv0srv.c	2009-07-06 05:47:15 +0000
@@ -285,6 +285,7 @@ Value 10 should be good if there are les
 computer. Bigger computers need bigger values. Value 0 will disable the
 concurrency check. */
 
+UNIV_INTERN ibool	srv_thread_concurrency_timer_based = FALSE;
 UNIV_INTERN ulong	srv_thread_concurrency	= 0;
 UNIV_INTERN ulong	srv_commit_concurrency	= 0;
 
@@ -336,6 +337,8 @@ UNIV_INTERN ibool	srv_innodb_status	= FA
 /* When estimating number of different key values in an index, sample
 this many index pages */
 UNIV_INTERN unsigned long long	srv_stats_sample_pages = 8;
+UNIV_INTERN ulint	srv_stats_method = 0;
+UNIV_INTERN ulint	srv_stats_auto_update = 1;
 
 UNIV_INTERN ibool	srv_use_doublewrite_buf	= TRUE;
 UNIV_INTERN ibool	srv_use_checksums = TRUE;
@@ -361,14 +364,18 @@ UNIV_INTERN ulint	srv_flush_neighbor_pag
 
 UNIV_INTERN ulint	srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
 UNIV_INTERN ulint	srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
-UNIV_INTERN ulint	srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
+UNIV_INTERN ulint	srv_adaptive_checkpoint = 0; /* 0: none  1: reflex  2: estimate */
+
+UNIV_INTERN ulint	srv_expand_import = 0; /* 0:disable 1:enable */
 
 UNIV_INTERN ulint	srv_extra_rsegments = 0; /* extra rseg for users */
+UNIV_INTERN ulint	srv_dict_size_limit = 0;
 /*-------------------------------------------*/
 UNIV_INTERN ulong	srv_n_spin_wait_rounds	= 20;
 UNIV_INTERN ulong	srv_n_free_tickets_to_enter = 500;
 UNIV_INTERN ulong	srv_thread_sleep_delay = 10000;
 UNIV_INTERN ulint	srv_spin_wait_delay	= 5;
+UNIV_INTERN ulint	srv_spins_microsec	= 50;
 UNIV_INTERN ibool	srv_priority_boost	= TRUE;
 
 #ifdef UNIV_DEBUG
@@ -657,6 +664,47 @@ are indexed by the type of the thread. *
 UNIV_INTERN ulint	srv_n_threads_active[SRV_MASTER + 1];
 UNIV_INTERN ulint	srv_n_threads[SRV_MASTER + 1];
 
+static
+void
+srv_align_spins_microsec(void)
+{
+	ulint	start_sec, end_sec;
+	ulint	start_usec, end_usec;
+	ib_uint64_t	usecs;
+
+	/* change temporary */
+	srv_spins_microsec = 1;
+
+	if (ut_usectime(&start_sec, &start_usec)) {
+		srv_spins_microsec = 50;
+		goto end;
+	}
+
+	ut_delay(100000);
+
+	if (ut_usectime(&end_sec, &end_usec)) {
+		srv_spins_microsec = 50;
+		goto end;
+	}
+
+	usecs = (end_sec - start_sec) * 1000000LL + (end_usec - start_usec);
+
+	if (usecs) {
+		srv_spins_microsec = 100000 / usecs;
+		if (srv_spins_microsec == 0)
+			srv_spins_microsec = 1;
+		if (srv_spins_microsec > 50)
+			srv_spins_microsec = 50;
+	} else {
+		srv_spins_microsec = 50;
+	}
+end:
+	if (srv_spins_microsec != 50)
+		fprintf(stderr,
+			"InnoDB: unit of spin count at ut_delay() is aligned to %lu\n",
+			srv_spins_microsec);
+}
+
 /*************************************************************************
 Sets the info describing an i/o thread current state. */
 UNIV_INTERN
@@ -889,6 +937,8 @@ srv_init(void)
 	dict_table_t*		table;
 	ulint			i;
 
+	srv_align_spins_microsec();
+
 	srv_sys = mem_alloc(sizeof(srv_sys_t));
 
 	kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
@@ -1009,6 +1059,75 @@ UNIV_INTERN ulong	srv_max_purge_lag		= 0
 /*************************************************************************
 Puts an OS thread to wait if there are too many concurrent threads
 (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+static void
+enter_innodb_with_tickets(trx_t* trx)
+{
+	trx->declared_to_be_inside_innodb = TRUE;
+	trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
+	return;
+}
+
+static void
+srv_conc_enter_innodb_timer_based(trx_t* trx)
+{
+	lint	conc_n_threads;
+	ibool	has_yielded = FALSE;
+	ulint	has_slept = 0;
+
+	if (trx->declared_to_be_inside_innodb) {
+		ut_print_timestamp(stderr);
+		fputs(
+"  InnoDB: Error: trying to declare trx to enter InnoDB, but\n"
+"InnoDB: it already is declared.\n", stderr);
+		trx_print(stderr, trx, 0);
+		putc('\n', stderr);
+	}
+retry:
+	if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
+		conc_n_threads = __sync_add_and_fetch(&srv_conc_n_threads, 1);
+		if (conc_n_threads <= (lint) srv_thread_concurrency) {
+			enter_innodb_with_tickets(trx);
+			return;
+		}
+		__sync_add_and_fetch(&srv_conc_n_threads, -1);
+	}
+	if (!has_yielded)
+	{
+		has_yielded = TRUE;
+		os_thread_yield();
+		goto retry;
+	}
+	if (trx->has_search_latch
+	    || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
+
+		conc_n_threads = __sync_add_and_fetch(&srv_conc_n_threads, 1);
+		enter_innodb_with_tickets(trx);
+		return;
+	}
+	if (has_slept < 2)
+	{
+		trx->op_info = "sleeping before entering InnoDB";
+		os_thread_sleep(10000);
+		trx->op_info = "";
+		has_slept++;
+	}
+	conc_n_threads = __sync_add_and_fetch(&srv_conc_n_threads, 1);
+	enter_innodb_with_tickets(trx);
+	return;
+}
+
+static void
+srv_conc_exit_innodb_timer_based(trx_t* trx)
+{
+	__sync_add_and_fetch(&srv_conc_n_threads, -1);
+	trx->declared_to_be_inside_innodb = FALSE;
+	trx->n_tickets_to_enter_innodb = 0;
+	return;
+}
+#endif
+
 UNIV_INTERN
 void
 srv_conc_enter_innodb(
@@ -1039,6 +1158,13 @@ srv_conc_enter_innodb(
 		return;
 	}
 
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	if (srv_thread_concurrency_timer_based) {
+		srv_conc_enter_innodb_timer_based(trx);
+		return;
+	}
+#endif
+
 	os_fast_mutex_lock(&srv_conc_mutex);
 retry:
 	if (trx->declared_to_be_inside_innodb) {
@@ -1182,6 +1308,14 @@ srv_conc_force_enter_innodb(
 	}
 
 	ut_ad(srv_conc_n_threads >= 0);
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	if (srv_thread_concurrency_timer_based) {
+		__sync_add_and_fetch(&srv_conc_n_threads, 1);
+		trx->declared_to_be_inside_innodb = TRUE;
+		trx->n_tickets_to_enter_innodb = 1;
+		return;
+	}
+#endif
 
 	os_fast_mutex_lock(&srv_conc_mutex);
 
@@ -1215,6 +1349,13 @@ srv_conc_force_exit_innodb(
 		return;
 	}
 
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	if (srv_thread_concurrency_timer_based) {
+		srv_conc_exit_innodb_timer_based(trx);
+		return;
+	}
+#endif
+
 	os_fast_mutex_lock(&srv_conc_mutex);
 
 	ut_ad(srv_conc_n_threads > 0);
@@ -1934,6 +2075,7 @@ srv_export_innodb_status(void)
 	export_vars.innodb_data_reads = os_n_file_reads;
 	export_vars.innodb_data_writes = os_n_file_writes;
 	export_vars.innodb_data_written = srv_data_written;
+	export_vars.innodb_dict_tables= (dict_sys ? UT_LIST_GET_LEN(dict_sys->table_LRU) : 0);
 	export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
 	export_vars.innodb_buffer_pool_write_requests
 		= srv_buf_pool_write_requests;
@@ -2348,6 +2490,8 @@ srv_master_thread(
 	ibool		skip_sleep	= FALSE;
 	ulint		i;
 
+	ib_uint64_t	lsn_old;
+
 	ib_uint64_t	oldest_lsn;
 	
 #ifdef UNIV_DEBUG_THREAD_CREATION
@@ -2365,6 +2509,9 @@ srv_master_thread(
 
 	mutex_exit(&kernel_mutex);
 
+	mutex_enter(&(log_sys->mutex));
+	lsn_old = log_sys->lsn;
+	mutex_exit(&(log_sys->mutex));
 loop:
 	/*****************************************************************/
 	/* ---- When there is database activity by users, we cycle in this
@@ -2399,6 +2546,19 @@ loop:
 		if (!skip_sleep) {
 
 			os_thread_sleep(1000000);
+
+			/*
+			mutex_enter(&(log_sys->mutex));
+			oldest_lsn = buf_pool_get_oldest_modification();
+			ib_uint64_t	lsn = log_sys->lsn;
+			mutex_exit(&(log_sys->mutex));
+
+			if(oldest_lsn)
+			fprintf(stderr,
+				"InnoDB flush: age pct: %lu, lsn progress: %lu\n",
+				(lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
+				lsn - lsn_old);
+			*/
 		}
 
 		skip_sleep = FALSE;
@@ -2437,14 +2597,15 @@ loop:
 			+ log_sys->n_pending_writes;
 		n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
 			+ buf_pool->n_pages_written;
-		if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) {
+		if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
 			srv_main_thread_op_info = "doing insert buffer merge";
 			ibuf_contract_for_n_pages(
 				TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size / 4)));
 
 			srv_main_thread_op_info = "flushing log";
 
-			log_buffer_flush_to_disk();
+			/* No fsync when srv_flush_log_at_trx_commit != 1 */
+			log_buffer_flush_maybe_sync();
 		}
 
 		if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
@@ -2462,13 +2623,16 @@ loop:
 			iteration of this loop. */
 
 			skip_sleep = TRUE;
-		} else if (srv_adaptive_checkpoint) {
+			mutex_enter(&(log_sys->mutex));
+			lsn_old = log_sys->lsn;
+			mutex_exit(&(log_sys->mutex));
+		} else if (srv_adaptive_checkpoint == 1) {
 
 			/* Try to keep modified age not to exceed
 			max_checkpoint_age * 7/8 line */
 
 			mutex_enter(&(log_sys->mutex));
-
+			lsn_old = log_sys->lsn;
 			oldest_lsn = buf_pool_get_oldest_modification();
 			if (oldest_lsn == 0) {
 
@@ -2504,7 +2668,93 @@ loop:
 					mutex_exit(&(log_sys->mutex));
 				}
 			}
+		} else if (srv_adaptive_checkpoint == 2) {
 
+			/* Try to keep modified age not to exceed
+			max_checkpoint_age * 7/8 line */
+
+			mutex_enter(&(log_sys->mutex));
+
+			oldest_lsn = buf_pool_get_oldest_modification();
+			if (oldest_lsn == 0) {
+				lsn_old = log_sys->lsn;
+				mutex_exit(&(log_sys->mutex));
+
+			} else {
+				if ((log_sys->lsn - oldest_lsn)
+				    > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
+					/* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
+					/* We should not flush from here. */
+					lsn_old = log_sys->lsn;
+					mutex_exit(&(log_sys->mutex));
+				} else if ((log_sys->lsn - oldest_lsn)
+					   > (log_sys->max_checkpoint_age)/2 ) {
+
+					/* defence line (max_checkpoint_age * 1/2) */
+					ib_uint64_t	lsn = log_sys->lsn;
+
+					mutex_exit(&(log_sys->mutex));
+
+					ib_uint64_t level, bpl;
+					buf_page_t* bpage;
+
+					mutex_enter(&flush_list_mutex);
+
+					level = 0;
+					bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+					while (bpage != NULL) {
+						ib_uint64_t	oldest_modification = bpage->oldest_modification;
+						if (oldest_modification != 0) {
+							level += log_sys->max_checkpoint_age
+								 - (lsn - oldest_modification);
+						}
+						bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+					}
+
+					if (level) {
+						bpl = ((ib_uint64_t) UT_LIST_GET_LEN(buf_pool->flush_list)
+							* UT_LIST_GET_LEN(buf_pool->flush_list)
+							* (lsn - lsn_old)) / level;
+					} else {
+						bpl = 0;
+					}
+
+					mutex_exit(&flush_list_mutex);
+
+					if (!srv_use_doublewrite_buf) {
+						/* flush is faster than when doublewrite */
+						bpl = (bpl * 3) / 4;
+					}
+
+					if (bpl) {
+retry_flush_batch:
+						n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+									bpl,
+									oldest_lsn + (lsn - lsn_old));
+						if (n_pages_flushed == ULINT_UNDEFINED) {
+							os_thread_sleep(5000);
+							goto retry_flush_batch;
+						}
+					}
+
+					lsn_old = lsn;
+					/*
+					fprintf(stderr,
+						"InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
+						(lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
+						lsn - lsn_old, bpl);
+					*/
+				} else {
+					lsn_old = log_sys->lsn;
+					mutex_exit(&(log_sys->mutex));
+				}
+			}
+
+		} else {
+			mutex_enter(&(log_sys->mutex));
+			lsn_old = log_sys->lsn;
+			mutex_exit(&(log_sys->mutex));
 		}
 
 		if (srv_activity_count == old_activity_count) {
@@ -2537,7 +2787,8 @@ loop:
  		buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), IB_ULONGLONG_MAX);
 
 		srv_main_thread_op_info = "flushing log";
-		log_buffer_flush_to_disk();
+		/* No fsync when srv_flush_log_at_trx_commit != 1 */
+		log_buffer_flush_maybe_sync();
 	}
 
 	/* We run a batch of insert buffer merge every 10 seconds,
@@ -2547,7 +2798,8 @@ loop:
 	ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO((srv_insert_buffer_batch_size / 4)));
 
 	srv_main_thread_op_info = "flushing log";
-	log_buffer_flush_to_disk();
+	/* No fsync when srv_flush_log_at_trx_commit != 1 */
+	log_buffer_flush_maybe_sync();
 
 	/* We run a full purge every 10 seconds, even if the server
 	were active */
@@ -2718,7 +2970,14 @@ flush_loop:
 
 	srv_main_thread_op_info = "flushing log";
 
-	log_buffer_flush_to_disk();
+	current_time = time(NULL);
+	if (difftime(current_time, last_flush_time) > 1) {
+		log_buffer_flush_to_disk();
+		last_flush_time = current_time;
+	} else {
+		/* No fsync when srv_flush_log_at_trx_commit != 1 */
+		log_buffer_flush_maybe_sync();
+	}
 
 	srv_main_thread_op_info = "making checkpoint";
 

=== modified file 'storage/xtradb/srv/srv0start.c'
--- a/storage/xtradb/srv/srv0start.c	2009-06-09 15:08:46 +0000
+++ b/storage/xtradb/srv/srv0start.c	2009-08-03 20:09:53 +0000
@@ -1269,7 +1269,7 @@ innobase_start_or_create_for_mysql(void)
 		os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
 			    * srv_n_file_io_threads,
 			    srv_n_read_io_threads, srv_n_write_io_threads,
-			    SRV_MAX_N_PENDING_SYNC_IOS * 8);
+			    SRV_MAX_N_PENDING_SYNC_IOS);
 	} else {
 		os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
 			    * srv_n_file_io_threads,

=== modified file 'storage/xtradb/sync/sync0sync.c'
--- a/storage/xtradb/sync/sync0sync.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/sync/sync0sync.c	2009-06-25 01:43:25 +0000
@@ -1081,6 +1081,12 @@ sync_thread_add_level(
 	case SYNC_TRX_SYS_HEADER:
 	case SYNC_FILE_FORMAT_TAG:
 	case SYNC_DOUBLEWRITE:
+	case SYNC_BUF_LRU_LIST:
+	case SYNC_BUF_FLUSH_LIST:
+	case SYNC_BUF_PAGE_HASH:
+	case SYNC_BUF_FREE_LIST:
+	case SYNC_BUF_ZIP_FREE:
+	case SYNC_BUF_ZIP_HASH:
 	case SYNC_BUF_POOL:
 	case SYNC_SEARCH_SYS:
 	case SYNC_SEARCH_SYS_CONF:
@@ -1107,7 +1113,7 @@ sync_thread_add_level(
 		/* Either the thread must own the buffer pool mutex
 		(buf_pool_mutex), or it is allowed to latch only ONE
 		buffer block (block->mutex or buf_pool_zip_mutex). */
-		ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
+		ut_a((sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST)
 		      && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
 		     || sync_thread_levels_g(array, SYNC_BUF_BLOCK));
 		break;

=== modified file 'storage/xtradb/ut/ut0ut.c'
--- a/storage/xtradb/ut/ut0ut.c	2009-03-26 06:11:11 +0000
+++ b/storage/xtradb/ut/ut0ut.c	2009-06-25 01:43:25 +0000
@@ -372,6 +372,8 @@ ut_get_year_month_day(
 /*****************************************************************
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++. */
+extern ulint	srv_spins_microsec;
+
 UNIV_INTERN
 ulint
 ut_delay(
@@ -383,7 +385,11 @@ ut_delay(
 
 	j = 0;
 
-	for (i = 0; i < delay * 50; i++) {
+	for (i = 0; i < delay * srv_spins_microsec; i++) {
+#if (defined (__i386__) || defined (__x86_64__)) && defined (__GNUC__)
+		/* it is equal to the instruction 'pause' */
+		__asm__ __volatile__ ("rep; nop");
+#endif
 		j += i;
 	}