1 : /* Copyright (C) 2000-2008 MySQL AB
2 :
3 : This program is free software; you can redistribute it and/or modify
4 : it under the terms of the GNU General Public License as published by
5 : the Free Software Foundation; version 2 of the License.
6 :
7 : This program is distributed in the hope that it will be useful,
8 : but WITHOUT ANY WARRANTY; without even the implied warranty of
9 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 : GNU General Public License for more details.
11 :
12 : You should have received a copy of the GNU General Public License
13 : along with this program; if not, write to the Free Software
14 : Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
15 :
16 : /*
17 : These functions handle page caching for Maria tables.
18 :
19 : One cache can handle many files.
20 : It must contain buffers of the same blocksize.
21 : init_pagecache() should be used to init cache handler.
22 :
23 : The free list (free_block_list) is a stack like structure.
24 : When a block is freed by free_block(), it is pushed onto the stack.
25 : When a new block is required it is first tried to pop one from the stack.
26 : If the stack is empty, it is tried to get a never-used block from the pool.
27 : If this is empty too, then a block is taken from the LRU ring, flushing it
28 : to disk, if necessary. This is handled in find_block().
29 : With the new free list, the blocks can have three temperatures:
30 : hot, warm and cold (which is free). This is remembered in the block header
31 : by the enum PCBLOCK_TEMPERATURE temperature variable. Remembering the
32 : temperature is necessary to correctly count the number of warm blocks,
33 : which is required to decide when blocks are allowed to become hot. Whenever
34 : a block is inserted to another (sub-)chain, we take the old and new
35 : temperature into account to decide if we got one more or less warm block.
36 : blocks_unused is the sum of never used blocks in the pool and of currently
37 : free blocks. blocks_used is the number of blocks fetched from the pool and
38 : as such gives the maximum number of in-use blocks at any time.
39 :
40 : TODO: Write operation locks whole cache till the end of the operation.
41 : Should be fixed.
42 : */
43 :
44 : #include "maria_def.h"
45 : #include <m_string.h>
46 : #include "ma_pagecache.h"
47 : #include "ma_blockrec.h"
48 : #include <my_bit.h>
49 : #include <errno.h>
50 :
51 : /*
52 : Some compilation flags have been added specifically for this module
53 : to control the following:
54 : - not to let a thread to yield the control when reading directly
55 : from page cache, which might improve performance in many cases;
56 : to enable this add:
57 : #define SERIALIZED_READ_FROM_CACHE
58 : - to set an upper bound for number of threads simultaneously
59 : using the page cache; this setting helps to determine an optimal
60 : size for hash table and improve performance when the number of
61 : blocks in the page cache much less than the number of threads
62 : accessing it;
63 : to set this number equal to <N> add
64 : #define MAX_THREADS <N>
65 : - to substitute calls of pthread_cond_wait for calls of
66 : pthread_cond_timedwait (wait with timeout set up);
67 : this setting should be used only when you want to trap a deadlock
68 : situation, which theoretically should not happen;
69 : to set timeout equal to <T> seconds add
70 : #define PAGECACHE_TIMEOUT <T>
71 : - to enable the module traps and to send debug information from
72 : page cache module to a special debug log add:
73 : #define PAGECACHE_DEBUG
74 : the name of this debug log file <LOG NAME> can be set through:
75 : #define PAGECACHE_DEBUG_LOG <LOG NAME>
76 : if the name is not defined, it's set by default;
77 : if the PAGECACHE_DEBUG flag is not set up and we are in a debug
78 : mode, i.e. when ! defined(DBUG_OFF), the debug information from the
79 : module is sent to the regular debug log.
80 :
81 : Example of the settings:
82 : #define SERIALIZED_READ_FROM_CACHE
83 : #define MAX_THREADS 100
84 : #define PAGECACHE_TIMEOUT 1
85 : #define PAGECACHE_DEBUG
86 : #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log"
87 : */
88 :
89 : /*
90 : In key cache we have external raw locking here we use
91 : SERIALIZED_READ_FROM_CACHE to avoid problem of reading
92 : not consistent data from the page.
93 : (keycache functions (key_cache_read(), key_cache_insert() and
94 : key_cache_write()) rely on external MyISAM lock, we don't)
95 : */
96 : #define SERIALIZED_READ_FROM_CACHE yes
97 :
98 : #define PCBLOCK_INFO(B) \
99 : DBUG_PRINT("info", \
100 : ("block: 0x%lx fd: %lu page: %lu s: %0x hshL: " \
101 : " 0x%lx req: %u/%u wrlocks: %u rdlocks %u " \
102 : "rdlocks_q: %u pins: %u status: %u type: %s", \
103 : (ulong)(B), \
104 : (ulong)((B)->hash_link ? \
105 : (B)->hash_link->file.file : \
106 : 0), \
107 : (ulong)((B)->hash_link ? \
108 : (B)->hash_link->pageno : \
109 : 0), \
110 : (B)->status, \
111 : (ulong)(B)->hash_link, \
112 : (uint) (B)->requests, \
113 : (uint)((B)->hash_link ? \
114 : (B)->hash_link->requests : \
115 : 0), \
116 : block->wlocks, block->rlocks, block->rlocks_queue, \
117 : (uint)(B)->pins, (uint)(B)->status, \
118 : page_cache_page_type_str[(B)->type]))
119 :
120 : /* TODO: put it to my_static.c */
121 : my_bool my_disable_flush_pagecache_blocks= 0;
122 :
123 : #define STRUCT_PTR(TYPE, MEMBER, a) \
124 : (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
125 :
126 : /* types of condition variables */
127 : #define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */
128 : #define COND_FOR_SAVED 1 /* queue of thread waiting for flush */
129 : #define COND_FOR_WRLOCK 2 /* queue of write lock */
130 : #define COND_SIZE 3 /* number of COND_* queues */
131 :
132 : typedef pthread_cond_t KEYCACHE_CONDVAR;
133 :
134 : /* descriptor of the page in the page cache block buffer */
135 : struct st_pagecache_page
136 : {
137 : PAGECACHE_FILE file; /* file to which the page belongs to */
138 : pgcache_page_no_t pageno; /* number of the page in the file */
139 : };
140 :
141 : /* element in the chain of a hash table bucket */
142 : struct st_pagecache_hash_link
143 : {
144 : struct st_pagecache_hash_link
145 : *next, **prev; /* to connect links in the same bucket */
146 : struct st_pagecache_block_link
147 : *block; /* reference to the block for the page: */
148 : PAGECACHE_FILE file; /* from such a file */
149 : pgcache_page_no_t pageno; /* this page */
150 : uint requests; /* number of requests for the page */
151 : };
152 :
153 : /* simple states of a block */
154 : #define PCBLOCK_ERROR 1 /* an error occurred when performing disk i/o */
155 : #define PCBLOCK_READ 2 /* the is page in the block buffer */
156 : #define PCBLOCK_IN_SWITCH 4 /* block is preparing to read new page */
157 : #define PCBLOCK_REASSIGNED 8 /* block does not accept requests for old page */
158 : #define PCBLOCK_IN_FLUSH 16 /* block is in flush operation */
159 : #define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */
160 : #define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */
161 :
162 : /* page status, returned by find_block */
163 : #define PAGE_READ 0
164 : #define PAGE_TO_BE_READ 1
165 : #define PAGE_WAIT_TO_BE_READ 2
166 :
167 : /* block temperature determines in which (sub-)chain the block currently is */
168 : enum PCBLOCK_TEMPERATURE { PCBLOCK_COLD /*free*/ , PCBLOCK_WARM , PCBLOCK_HOT };
169 :
170 : /* debug info */
171 : #ifndef DBUG_OFF
172 : static const char *page_cache_page_type_str[]=
173 : {
174 : /* used only for control page type changing during debugging */
175 : "EMPTY",
176 : "PLAIN",
177 : "LSN",
178 : "READ_UNKNOWN"
179 : };
180 :
181 : static const char *page_cache_page_write_mode_str[]=
182 : {
183 : "DELAY",
184 : "DONE"
185 : };
186 :
187 : static const char *page_cache_page_lock_str[]=
188 : {
189 : "free -> free",
190 : "read -> read",
191 : "write -> write",
192 : "free -> read",
193 : "free -> write",
194 : "read -> free",
195 : "write -> free",
196 : "write -> read"
197 : };
198 :
199 : static const char *page_cache_page_pin_str[]=
200 : {
201 : "pinned -> pinned",
202 : "unpinned -> unpinned",
203 : "unpinned -> pinned",
204 : "pinned -> unpinned"
205 : };
206 :
207 :
208 : typedef struct st_pagecache_pin_info
209 : {
210 : struct st_pagecache_pin_info *next, **prev;
211 : struct st_my_thread_var *thread;
212 : } PAGECACHE_PIN_INFO;
213 :
214 : /*
215 : st_pagecache_lock_info structure should be kept in next, prev, thread part
216 : compatible with st_pagecache_pin_info to be compatible in functions.
217 : */
218 :
219 : typedef struct st_pagecache_lock_info
220 : {
221 : struct st_pagecache_lock_info *next, **prev;
222 : struct st_my_thread_var *thread;
223 : my_bool write_lock;
224 : } PAGECACHE_LOCK_INFO;
225 :
226 :
227 : /* service functions maintain debugging info about pin & lock */
228 :
229 :
230 : /*
231 : Links information about thread pinned/locked the block to the list
232 :
233 : SYNOPSIS
234 : info_link()
235 : list the list to link in
236 : node the node which should be linked
237 : */
238 :
239 : static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
240 32929814 : {
241 32929814 : if ((node->next= *list))
242 12466628 : node->next->prev= &(node->next);
243 32929814 : *list= node;
244 32929814 : node->prev= list;
245 : }
246 :
247 :
248 : /*
249 : Unlinks information about thread pinned/locked the block from the list
250 :
251 : SYNOPSIS
252 : info_unlink()
253 : node the node which should be unlinked
254 : */
255 :
256 : static void info_unlink(PAGECACHE_PIN_INFO *node)
257 32929814 : {
258 32929814 : if ((*node->prev= node->next))
259 12466628 : node->next->prev= node->prev;
260 : }
261 :
262 :
263 : /*
264 : Finds information about given thread in the list of threads which
265 : pinned/locked this block.
266 :
267 : SYNOPSIS
268 : info_find()
269 : list the list where to find the thread
270 : thread thread ID (reference to the st_my_thread_var
271 : of the thread)
272 : any return any thread of the list
273 :
274 : RETURN
275 : 0 - the thread was not found
276 : pointer to the information node of the thread in the list, or, if 'any',
277 : to any thread of the list.
278 : */
279 :
280 : static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
281 : struct st_my_thread_var *thread,
282 : my_bool any)
283 33390278 : {
284 33390278 : register PAGECACHE_PIN_INFO *i= list;
285 33390278 : if (any)
286 0 : return i;
287 0 : for(; i != 0; i= i->next)
288 33390278 : if (i->thread == thread)
289 33390278 : return i;
290 0 : return 0;
291 : }
292 :
293 : #endif /* !DBUG_OFF */
294 :
295 : /* page cache block */
296 : struct st_pagecache_block_link
297 : {
298 : struct st_pagecache_block_link
299 : *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
300 : struct st_pagecache_block_link
301 : *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
302 : struct st_pagecache_hash_link
303 : *hash_link; /* backward ptr to referring hash_link */
304 : #ifndef DBUG_OFF
305 : PAGECACHE_PIN_INFO *pin_list;
306 : PAGECACHE_LOCK_INFO *lock_list;
307 : #endif
308 : KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
309 : uchar *buffer; /* buffer for the block page */
310 : pthread_t write_locker;
311 :
312 : ulonglong last_hit_time; /* timestamp of the last hit */
313 : WQUEUE
314 : wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */
315 : uint32 requests; /* number of requests for the block */
316 : uint32 pins; /* pin counter */
317 : uint32 wlocks; /* write locks counter */
318 : uint32 rlocks; /* read locks counter */
319 : uint32 rlocks_queue; /* rd. locks waiting wr. lock of this thread */
320 : uint16 status; /* state of the block */
321 : int16 error; /* error code for block in case of error */
322 : enum PCBLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot*/
323 : enum pagecache_page_type type; /* type of the block */
324 : uint hits_left; /* number of hits left until promotion */
325 : /** @brief LSN when first became dirty; LSN_MAX means "not yet set" */
326 : LSN rec_lsn;
327 : };
328 :
329 : /** @brief information describing a run of flush_pagecache_blocks_int() */
330 : struct st_file_in_flush
331 : {
332 : File file;
333 : /**
334 : @brief threads waiting for the thread currently flushing this file to be
335 : done
336 : */
337 : WQUEUE flush_queue;
338 : /**
339 : @brief if the thread currently flushing the file has a non-empty
340 : first_in_switch list.
341 : */
342 : my_bool first_in_switch;
343 : };
344 :
345 : #ifndef DBUG_OFF
346 : /* debug checks */
347 :
348 : #ifdef NOT_USED
349 : static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
350 : enum pagecache_page_pin mode
351 : __attribute__((unused)))
352 : {
353 : struct st_my_thread_var *thread= my_thread_var;
354 : PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread);
355 : DBUG_ENTER("info_check_pin");
356 : DBUG_PRINT("enter", ("thread: 0x%lx pin: %s",
357 : (ulong) thread, page_cache_page_pin_str[mode]));
358 : if (info)
359 : {
360 : if (mode == PAGECACHE_PIN_LEFT_UNPINNED)
361 : {
362 : DBUG_PRINT("info",
363 : ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_UNPINNED!!!",
364 : (ulong)thread, (ulong)block));
365 : DBUG_RETURN(1);
366 : }
367 : else if (mode == PAGECACHE_PIN)
368 : {
369 : DBUG_PRINT("info",
370 : ("info_check_pin: thread: 0x%lx block: 0x%lx ; PIN!!!",
371 : (ulong)thread, (ulong)block));
372 : DBUG_RETURN(1);
373 : }
374 : }
375 : else
376 : {
377 : if (mode == PAGECACHE_PIN_LEFT_PINNED)
378 : {
379 : DBUG_PRINT("info",
380 : ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_PINNED!!!",
381 : (ulong)thread, (ulong)block));
382 : DBUG_RETURN(1);
383 : }
384 : else if (mode == PAGECACHE_UNPIN)
385 : {
386 : DBUG_PRINT("info",
387 : ("info_check_pin: thread: 0x%lx block: 0x%lx ; UNPIN!!!",
388 : (ulong)thread, (ulong)block));
389 : DBUG_RETURN(1);
390 : }
391 : }
392 : DBUG_RETURN(0);
393 : }
394 :
395 :
396 : /*
397 : Debug function which checks current lock/pin state and requested changes
398 :
399 : SYNOPSIS
400 : info_check_lock()
401 : lock requested lock changes
402 : pin requested pin changes
403 :
404 : RETURN
405 : 0 - OK
406 : 1 - Error
407 : */
408 :
409 : static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
410 : enum pagecache_page_lock lock,
411 : enum pagecache_page_pin pin)
412 : {
413 : struct st_my_thread_var *thread= my_thread_var;
414 : PAGECACHE_LOCK_INFO *info=
415 : (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list,
416 : thread);
417 : DBUG_ENTER("info_check_lock");
418 : switch(lock) {
419 : case PAGECACHE_LOCK_LEFT_UNLOCKED:
420 : if (pin != PAGECACHE_PIN_LEFT_UNPINNED ||
421 : info)
422 : goto error;
423 : break;
424 : case PAGECACHE_LOCK_LEFT_READLOCKED:
425 : if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
426 : pin != PAGECACHE_PIN_LEFT_PINNED) ||
427 : info == 0 || info->write_lock)
428 : goto error;
429 : break;
430 : case PAGECACHE_LOCK_LEFT_WRITELOCKED:
431 : if (pin != PAGECACHE_PIN_LEFT_PINNED ||
432 : info == 0 || !info->write_lock)
433 : goto error;
434 : break;
435 : case PAGECACHE_LOCK_READ:
436 : if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
437 : pin != PAGECACHE_PIN) ||
438 : info != 0)
439 : goto error;
440 : break;
441 : case PAGECACHE_LOCK_WRITE:
442 : if (pin != PAGECACHE_PIN ||
443 : info != 0)
444 : goto error;
445 : break;
446 : case PAGECACHE_LOCK_READ_UNLOCK:
447 : if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
448 : pin != PAGECACHE_UNPIN) ||
449 : info == 0 || info->write_lock)
450 : goto error;
451 : break;
452 : case PAGECACHE_LOCK_WRITE_UNLOCK:
453 : if (pin != PAGECACHE_UNPIN ||
454 : info == 0 || !info->write_lock)
455 : goto error;
456 : break;
457 : case PAGECACHE_LOCK_WRITE_TO_READ:
458 : if ((pin != PAGECACHE_PIN_LEFT_PINNED &&
459 : pin != PAGECACHE_UNPIN) ||
460 : info == 0 || !info->write_lock)
461 : goto error;
462 : break;
463 : }
464 : DBUG_RETURN(0);
465 : error:
466 : DBUG_PRINT("info",
467 : ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d,"
468 : "to lock: %s, to pin: %s",
469 : (ulong)thread, (ulong)block, test(info),
470 : (info ? info->write_lock : 0),
471 : page_cache_page_lock_str[lock],
472 : page_cache_page_pin_str[pin]));
473 : DBUG_RETURN(1);
474 : }
475 : #endif /* NOT_USED */
476 : #endif /* !DBUG_OFF */
477 :
478 : #define FLUSH_CACHE 2000 /* sort this many blocks at once */
479 :
480 : static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block);
481 : #ifndef DBUG_OFF
482 : static void test_key_cache(PAGECACHE *pagecache,
483 : const char *where, my_bool lock);
484 : #endif
485 :
486 : #define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \
487 : (ulong) (f).file) & (p->hash_entries-1))
488 : #define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1))
489 :
490 : #define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log"
491 :
492 : #if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG)
493 : #define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG
494 : #endif
495 :
496 : #if defined(PAGECACHE_DEBUG_LOG)
497 : static FILE *pagecache_debug_log= NULL;
498 : static void pagecache_debug_print _VARARGS((const char *fmt, ...));
499 : #define PAGECACHE_DEBUG_OPEN \
500 : if (!pagecache_debug_log) \
501 : { \
502 : pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \
503 : (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \
504 : }
505 :
506 : #define PAGECACHE_DEBUG_CLOSE \
507 : if (pagecache_debug_log) \
508 : { \
509 : fclose(pagecache_debug_log); \
510 : pagecache_debug_log= 0; \
511 : }
512 : #else
513 : #define PAGECACHE_DEBUG_OPEN
514 : #define PAGECACHE_DEBUG_CLOSE
515 : #endif /* defined(PAGECACHE_DEBUG_LOG) */
516 :
517 : #if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG)
518 : #define KEYCACHE_DBUG_PRINT(l, m) \
519 : { if (pagecache_debug_log) \
520 : fprintf(pagecache_debug_log, "%s: ", l); \
521 : pagecache_debug_print m; }
522 :
523 : #define KEYCACHE_DBUG_ASSERT(a) \
524 : { if (! (a) && pagecache_debug_log) \
525 : fclose(pagecache_debug_log); \
526 : assert(a); }
527 : #else
528 : #define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
529 : #define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
530 : #endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */
531 :
532 : #if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF)
533 : #ifdef THREAD
534 : static long pagecache_thread_id;
535 : #define KEYCACHE_THREAD_TRACE(l) \
536 : KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id))
537 :
538 : #define KEYCACHE_THREAD_TRACE_BEGIN(l) \
539 : { struct st_my_thread_var *thread_var= my_thread_var; \
540 : pagecache_thread_id= thread_var->id; \
541 : KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) }
542 :
543 : #define KEYCACHE_THREAD_TRACE_END(l) \
544 : KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id))
545 : #else /* THREAD */
546 : #define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,(""))
547 : #define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,(""))
548 : #define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,(""))
549 : #endif /* THREAD */
550 : #else
551 : #define KEYCACHE_THREAD_TRACE_BEGIN(l)
552 : #define KEYCACHE_THREAD_TRACE_END(l)
553 : #define KEYCACHE_THREAD_TRACE(l)
554 : #endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */
555 :
556 : #define PCBLOCK_NUMBER(p, b) \
557 : ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK)))
558 : #define PAGECACHE_HASH_LINK_NUMBER(p, h) \
559 : ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \
560 : sizeof(PAGECACHE_HASH_LINK)))
561 :
562 : #if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG)
563 : static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
564 : pthread_mutex_t *mutex);
565 : #else
566 : #define pagecache_pthread_cond_wait pthread_cond_wait
567 : #endif
568 :
569 : #if defined(PAGECACHE_DEBUG)
570 : static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex);
571 : static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex);
572 : static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
573 : #define pagecache_pthread_mutex_lock(M) \
574 : { DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \
575 : ___pagecache_pthread_mutex_lock(M);}
576 : #define pagecache_pthread_mutex_unlock(M) \
577 : { DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \
578 : ___pagecache_pthread_mutex_unlock(M);}
579 : #define pagecache_pthread_cond_signal(M) \
580 : { DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \
581 : ___pagecache_pthread_cond_signal(M);}
582 : #else
583 : #define pagecache_pthread_mutex_lock pthread_mutex_lock
584 : #define pagecache_pthread_mutex_unlock pthread_mutex_unlock
585 : #define pagecache_pthread_cond_signal pthread_cond_signal
586 : #endif /* defined(PAGECACHE_DEBUG) */
587 :
588 : extern my_bool translog_flush(TRANSLOG_ADDRESS lsn);
589 :
590 : /*
591 : Write page to the disk
592 :
593 : SYNOPSIS
594 : pagecache_fwrite()
595 : pagecache - page cache pointer
596 : filedesc - pagecache file descriptor structure
597 : buffer - buffer which we will write
598 : type - page type (plain or with LSN)
599 : flags - MYF() flags
600 :
601 : RETURN
602 : 0 - OK
603 : 1 - Error
604 : */
605 :
606 : static my_bool pagecache_fwrite(PAGECACHE *pagecache,
607 : PAGECACHE_FILE *filedesc,
608 : uchar *buffer,
609 : pgcache_page_no_t pageno,
610 : enum pagecache_page_type type
611 : __attribute__((unused)),
612 : myf flags)
613 851382 : {
614 851382 : DBUG_ENTER("pagecache_fwrite");
615 851382 : DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
616 :
617 : /* Todo: Integrate this with write_callback so we have only one callback */
618 851382 : if ((*filedesc->flush_log_callback)(buffer, pageno, filedesc->callback_data))
619 0 : DBUG_RETURN(1);
620 851382 : DBUG_PRINT("info", ("write_callback: 0x%lx data: 0x%lx",
621 : (ulong) filedesc->write_callback,
622 : (ulong) filedesc->callback_data));
623 851382 : if ((*filedesc->write_callback)(buffer, pageno, filedesc->callback_data))
624 : {
625 0 : DBUG_PRINT("error", ("write callback problem"));
626 0 : DBUG_RETURN(1);
627 : }
628 851382 : if (my_pwrite(filedesc->file, buffer, pagecache->block_size,
629 : ((my_off_t) pageno << pagecache->shift), flags))
630 : {
631 0 : (*filedesc->write_fail)(filedesc->callback_data);
632 0 : DBUG_RETURN(1);
633 : }
634 851382 : DBUG_RETURN(0);
635 : }
636 :
637 :
638 : /*
639 : Read page from the disk
640 :
641 : SYNOPSIS
642 : pagecache_fread()
643 : pagecache - page cache pointer
644 : filedesc - pagecache file descriptor structure
645 : buffer - buffer in which we will read
646 : pageno - page number
647 : flags - MYF() flags
648 : */
649 : #define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \
650 : my_pread((filedesc)->file, buffer, pagecache->block_size, \
651 : ((my_off_t) pageno << pagecache->shift), flags)
652 :
653 :
654 : /**
655 : @brief set rec_lsn of pagecache block (if it is needed)
656 :
657 : @param block block where to set rec_lsn
658 : @param first_REDO_LSN_for_page the LSN to set
659 : */
660 :
661 : static inline void pagecache_set_block_rec_lsn(PAGECACHE_BLOCK_LINK *block,
662 : LSN first_REDO_LSN_for_page)
663 1761848 : {
664 1761848 : if (block->rec_lsn == LSN_MAX)
665 149902 : block->rec_lsn= first_REDO_LSN_for_page;
666 : else
667 1611946 : DBUG_ASSERT(cmp_translog_addr(block->rec_lsn,
668 : first_REDO_LSN_for_page) <= 0);
669 : }
670 :
671 :
672 : /*
673 : next_power(value) is 2 at the power of (1+floor(log2(value)));
674 : e.g. next_power(2)=4, next_power(3)=4.
675 : */
676 : static inline uint next_power(uint value)
677 2559 : {
678 2559 : return (uint) my_round_up_to_next_power((uint32) value) << 1;
679 : }
680 :
681 :
682 : /*
683 : Initialize a page cache
684 :
685 : SYNOPSIS
686 : init_pagecache()
687 : pagecache pointer to a page cache data structure
688 : key_cache_block_size size of blocks to keep cached data
689 : use_mem total memory to use for the key cache
690 : division_limit division limit (may be zero)
691 : age_threshold age threshold (may be zero)
692 : block_size size of block (should be power of 2)
693 : my_read_flags Flags used for all pread/pwrite calls
694 : Usually MY_WME in case of recovery
695 :
696 : RETURN VALUE
697 : number of blocks in the key cache, if successful,
698 : 0 - otherwise.
699 :
700 : NOTES.
701 : if pagecache->inited != 0 we assume that the key cache
702 : is already initialized. This is for now used by myisamchk, but shouldn't
703 : be something that a program should rely on!
704 :
705 : It's assumed that no two threads call this function simultaneously
706 : referring to the same key cache handle.
707 :
708 : */
709 :
710 : ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
711 : uint division_limit, uint age_threshold,
712 : uint block_size, myf my_readwrite_flags)
713 2559 : {
714 : ulong blocks, hash_links, length;
715 : int error;
716 2559 : DBUG_ENTER("init_pagecache");
717 2559 : DBUG_ASSERT(block_size >= 512);
718 :
719 : PAGECACHE_DEBUG_OPEN;
720 2559 : if (pagecache->inited && pagecache->disk_blocks > 0)
721 : {
722 0 : DBUG_PRINT("warning",("key cache already in use"));
723 0 : DBUG_RETURN(0);
724 : }
725 :
726 2559 : pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0;
727 2559 : pagecache->global_cache_read= pagecache->global_cache_write= 0;
728 2559 : pagecache->disk_blocks= -1;
729 2559 : if (! pagecache->inited)
730 : {
731 2559 : if (pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST) ||
732 : hash_init(&pagecache->files_in_flush, &my_charset_bin, 32,
733 : offsetof(struct st_file_in_flush, file),
734 : sizeof(((struct st_file_in_flush *)NULL)->file),
735 : NULL, NULL, 0))
736 : goto err;
737 2559 : pagecache->inited= 1;
738 2559 : pagecache->in_init= 0;
739 2559 : pagecache->resize_queue.last_thread= NULL;
740 : }
741 :
742 2559 : pagecache->mem_size= use_mem;
743 2559 : pagecache->block_size= block_size;
744 2559 : pagecache->shift= my_bit_log2(block_size);
745 2559 : pagecache->readwrite_flags= my_readwrite_flags | MY_NABP | MY_WAIT_IF_FULL;
746 2559 : pagecache->org_readwrite_flags= pagecache->readwrite_flags;
747 2559 : DBUG_PRINT("info", ("block_size: %u", block_size));
748 2559 : DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size);
749 :
750 2559 : blocks= (ulong) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) +
751 : 2 * sizeof(PAGECACHE_HASH_LINK) +
752 : sizeof(PAGECACHE_HASH_LINK*) *
753 : 5/4 + block_size));
754 : /*
755 : We need to support page cache with just one block to be able to do
756 : scanning of rows-in-block files
757 : */
758 : for ( ; ; )
759 : {
760 2559 : if (blocks < 8)
761 : {
762 0 : my_errno= ENOMEM;
763 0 : goto err;
764 : }
765 : /* Set my_hash_entries to the next bigger 2 power */
766 2559 : if ((pagecache->hash_entries= next_power(blocks)) <
767 : (blocks) * 5/4)
768 0 : pagecache->hash_entries<<= 1;
769 2559 : hash_links= 2 * blocks;
770 : #if defined(MAX_THREADS)
771 : if (hash_links < MAX_THREADS + blocks - 1)
772 : hash_links= MAX_THREADS + blocks - 1;
773 : #endif
774 14062 : while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) +
775 : ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) +
776 : ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) *
777 : pagecache->hash_entries))) +
778 : (blocks << pagecache->shift) > use_mem)
779 8944 : blocks--;
780 : /* Allocate memory for cache page buffers */
781 2559 : if ((pagecache->block_mem=
782 : my_large_malloc((ulong) blocks * pagecache->block_size,
783 : MYF(MY_WME))))
784 : {
785 : /*
786 : Allocate memory for blocks, hash_links and hash entries;
787 : For each block 2 hash links are allocated
788 : */
789 2559 : if ((pagecache->block_root=
790 : (PAGECACHE_BLOCK_LINK*) my_malloc((size_t) length, MYF(0))))
791 0 : break;
792 0 : my_large_free(pagecache->block_mem, MYF(0));
793 0 : pagecache->block_mem= 0;
794 : }
795 0 : blocks= blocks / 4*3;
796 0 : }
797 2559 : pagecache->blocks_unused= blocks;
798 2559 : pagecache->disk_blocks= (long) blocks;
799 2559 : pagecache->hash_links= hash_links;
800 2559 : pagecache->hash_root=
801 : (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root +
802 : ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK)));
803 2559 : pagecache->hash_link_root=
804 : (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root +
805 : ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) *
806 : pagecache->hash_entries)));
807 2559 : bzero((uchar*) pagecache->block_root,
808 : pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK));
809 2559 : bzero((uchar*) pagecache->hash_root,
810 : pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*));
811 2559 : bzero((uchar*) pagecache->hash_link_root,
812 : pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK));
813 2559 : pagecache->hash_links_used= 0;
814 2559 : pagecache->free_hash_list= NULL;
815 2559 : pagecache->blocks_used= pagecache->blocks_changed= 0;
816 :
817 2559 : pagecache->global_blocks_changed= 0;
818 2559 : pagecache->blocks_available=0; /* For debugging */
819 :
820 : /* The LRU chain is empty after initialization */
821 2559 : pagecache->used_last= NULL;
822 2559 : pagecache->used_ins= NULL;
823 2559 : pagecache->free_block_list= NULL;
824 2559 : pagecache->time= 0;
825 2559 : pagecache->warm_blocks= 0;
826 2559 : pagecache->min_warm_blocks= (division_limit ?
827 : blocks * division_limit / 100 + 1 :
828 : blocks);
829 2559 : pagecache->age_threshold= (age_threshold ?
830 : blocks * age_threshold / 100 :
831 : blocks);
832 :
833 2559 : pagecache->cnt_for_resize_op= 0;
834 2559 : pagecache->resize_in_flush= 0;
835 2559 : pagecache->can_be_used= 1;
836 :
837 2559 : pagecache->waiting_for_hash_link.last_thread= NULL;
838 2559 : pagecache->waiting_for_block.last_thread= NULL;
839 2559 : DBUG_PRINT("exit",
840 : ("disk_blocks: %ld block_root: 0x%lx hash_entries: %ld\
841 : hash_root: 0x%lx hash_links: %ld hash_link_root: 0x%lx",
842 : pagecache->disk_blocks, (long) pagecache->block_root,
843 : pagecache->hash_entries, (long) pagecache->hash_root,
844 : pagecache->hash_links, (long) pagecache->hash_link_root));
845 2559 : bzero((uchar*) pagecache->changed_blocks,
846 : sizeof(pagecache->changed_blocks[0]) *
847 : PAGECACHE_CHANGED_BLOCKS_HASH);
848 2559 : bzero((uchar*) pagecache->file_blocks,
849 : sizeof(pagecache->file_blocks[0]) *
850 : PAGECACHE_CHANGED_BLOCKS_HASH);
851 :
852 2559 : pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0;
853 2559 : DBUG_RETURN((ulong) pagecache->disk_blocks);
854 :
855 0 : err:
856 0 : error= my_errno;
857 0 : pagecache->disk_blocks= 0;
858 0 : pagecache->blocks= 0;
859 0 : if (pagecache->block_mem)
860 : {
861 0 : my_large_free(pagecache->block_mem, MYF(0));
862 0 : pagecache->block_mem= NULL;
863 : }
864 0 : if (pagecache->block_root)
865 : {
866 0 : my_free(pagecache->block_root, MYF(0));
867 0 : pagecache->block_root= NULL;
868 : }
869 0 : my_errno= error;
870 0 : pagecache->can_be_used= 0;
871 0 : DBUG_RETURN(0);
872 : }
873 :
874 :
875 : /*
876 : Flush all blocks in the key cache to disk
877 : */
878 :
879 : #ifdef NOT_USED
880 : static int flush_all_key_blocks(PAGECACHE *pagecache)
881 : {
882 : #if defined(PAGECACHE_DEBUG)
883 : uint cnt=0;
884 : #endif
885 : while (pagecache->blocks_changed > 0)
886 : {
887 : PAGECACHE_BLOCK_LINK *block;
888 : for (block= pagecache->used_last->next_used ; ; block=block->next_used)
889 : {
890 : if (block->hash_link)
891 : {
892 : #if defined(PAGECACHE_DEBUG)
893 : cnt++;
894 : KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
895 : #endif
896 : if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file,
897 : FLUSH_RELEASE, NULL, NULL))
898 : return 1;
899 : break;
900 : }
901 : if (block == pagecache->used_last)
902 : break;
903 : }
904 : }
905 : return 0;
906 : }
907 : #endif /* NOT_USED */
908 :
909 : /*
910 : Resize a key cache
911 :
912 : SYNOPSIS
913 : resize_pagecache()
914 : pagecache pointer to a page cache data structure
915 : use_mem total memory to use for the new key cache
916 : division_limit new division limit (if not zero)
917 : age_threshold new age threshold (if not zero)
918 :
919 : RETURN VALUE
920 : number of blocks in the key cache, if successful,
921 : 0 - otherwise.
922 :
923 : NOTES.
924 : The function first compares the memory size parameter
925 : with the key cache value.
926 :
927 : If they differ the function free the the memory allocated for the
928 : old key cache blocks by calling the end_pagecache function and
929 : then rebuilds the key cache with new blocks by calling
930 : init_key_cache.
931 :
932 : The function starts the operation only when all other threads
933 : performing operations with the key cache let her to proceed
934 : (when cnt_for_resize=0).
935 :
936 : Before being usable, this function needs:
937 : - to receive fixes for BUG#17332 "changing key_buffer_size on a running
938 : server can crash under load" similar to those done to the key cache
939 : - to have us (Sanja) look at the additional constraints placed on
940 : resizing, due to the page locking specific to this page cache.
941 : So we disable it for now.
942 : */
943 : #if NOT_USED /* keep disabled until code is fixed see above !! */
944 : ulong resize_pagecache(PAGECACHE *pagecache,
945 : size_t use_mem, uint division_limit,
946 : uint age_threshold)
947 : {
948 : ulong blocks;
949 : #ifdef THREAD
950 : struct st_my_thread_var *thread;
951 : WQUEUE *wqueue;
952 :
953 : #endif
954 : DBUG_ENTER("resize_pagecache");
955 :
956 : if (!pagecache->inited)
957 : DBUG_RETURN(pagecache->disk_blocks);
958 :
959 : if(use_mem == pagecache->mem_size)
960 : {
961 : change_pagecache_param(pagecache, division_limit, age_threshold);
962 : DBUG_RETURN(pagecache->disk_blocks);
963 : }
964 :
965 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
966 :
967 : #ifdef THREAD
968 : wqueue= &pagecache->resize_queue;
969 : thread= my_thread_var;
970 : wqueue_link_into_queue(wqueue, thread);
971 :
972 : while (wqueue->last_thread->next != thread)
973 : {
974 : pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
975 : }
976 : #endif
977 :
978 : pagecache->resize_in_flush= 1;
979 : if (flush_all_key_blocks(pagecache))
980 : {
981 : /* TODO: if this happens, we should write a warning in the log file ! */
982 : pagecache->resize_in_flush= 0;
983 : blocks= 0;
984 : pagecache->can_be_used= 0;
985 : goto finish;
986 : }
987 : pagecache->resize_in_flush= 0;
988 : pagecache->can_be_used= 0;
989 : #ifdef THREAD
990 : while (pagecache->cnt_for_resize_op)
991 : {
992 : KEYCACHE_DBUG_PRINT("resize_pagecache: wait",
993 : ("suspend thread %ld", thread->id));
994 : pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
995 : }
996 : #else
997 : KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0);
998 : #endif
999 :
1000 : end_pagecache(pagecache, 0); /* Don't free mutex */
1001 : /* The following will work even if use_mem is 0 */
1002 : blocks= init_pagecache(pagecache, pagecache->block_size, use_mem,
1003 : division_limit, age_threshold,
1004 : pagecache->readwrite_flags);
1005 :
1006 : finish:
1007 : #ifdef THREAD
1008 : wqueue_unlink_from_queue(wqueue, thread);
1009 : /* Signal for the next resize request to proceeed if any */
1010 : if (wqueue->last_thread)
1011 : {
1012 : KEYCACHE_DBUG_PRINT("resize_pagecache: signal",
1013 : ("thread %ld", wqueue->last_thread->next->id));
1014 : pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend);
1015 : }
1016 : #endif
1017 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
1018 : DBUG_RETURN(blocks);
1019 : }
1020 : #endif /* 0 */
1021 :
1022 :
1023 : /*
1024 : Increment counter blocking resize key cache operation
1025 : */
1026 : static inline void inc_counter_for_resize_op(PAGECACHE *pagecache)
1027 35162786 : {
1028 35162786 : pagecache->cnt_for_resize_op++;
1029 : }
1030 :
1031 :
1032 : /*
1033 : Decrement counter blocking resize key cache operation;
1034 : Signal the operation to proceed when counter becomes equal zero
1035 : */
1036 : static inline void dec_counter_for_resize_op(PAGECACHE *pagecache)
1037 35328623 : {
1038 : #ifdef THREAD
1039 : struct st_my_thread_var *last_thread;
1040 35328623 : if (!--pagecache->cnt_for_resize_op &&
1041 : (last_thread= pagecache->resize_queue.last_thread))
1042 : {
1043 0 : KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal",
1044 : ("thread %ld", last_thread->next->id));
1045 0 : pagecache_pthread_cond_signal(&last_thread->next->suspend);
1046 : }
1047 : #else
1048 : pagecache->cnt_for_resize_op--;
1049 : #endif
1050 : }
1051 :
1052 : /*
1053 : Change the page cache parameters
1054 :
1055 : SYNOPSIS
1056 : change_pagecache_param()
1057 : pagecache pointer to a page cache data structure
1058 : division_limit new division limit (if not zero)
1059 : age_threshold new age threshold (if not zero)
1060 :
1061 : RETURN VALUE
1062 : none
1063 :
1064 : NOTES.
1065 : Presently the function resets the key cache parameters
1066 : concerning midpoint insertion strategy - division_limit and
1067 : age_threshold.
1068 : */
1069 :
1070 : void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
1071 : uint age_threshold)
1072 0 : {
1073 0 : DBUG_ENTER("change_pagecache_param");
1074 :
1075 0 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
1076 0 : if (division_limit)
1077 0 : pagecache->min_warm_blocks= (pagecache->disk_blocks *
1078 : division_limit / 100 + 1);
1079 0 : if (age_threshold)
1080 0 : pagecache->age_threshold= (pagecache->disk_blocks *
1081 : age_threshold / 100);
1082 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
1083 0 : DBUG_VOID_RETURN;
1084 : }
1085 :
1086 :
1087 : /*
1088 : Removes page cache from memory. Does NOT flush pages to disk.
1089 :
1090 : SYNOPSIS
1091 : end_pagecache()
1092 : pagecache page cache handle
1093 : cleanup Complete free (Free also mutex for key cache)
1094 :
1095 : RETURN VALUE
1096 : none
1097 : */
1098 :
1099 : void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
1100 6316 : {
1101 6316 : DBUG_ENTER("end_pagecache");
1102 6316 : DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache));
1103 :
1104 6316 : if (!pagecache->inited)
1105 3959 : DBUG_VOID_RETURN;
1106 :
1107 2357 : if (pagecache->disk_blocks > 0)
1108 : {
1109 2357 : if (pagecache->block_mem)
1110 : {
1111 2357 : my_large_free(pagecache->block_mem, MYF(0));
1112 2357 : pagecache->block_mem= NULL;
1113 2357 : my_free(pagecache->block_root, MYF(0));
1114 2357 : pagecache->block_root= NULL;
1115 : }
1116 2357 : pagecache->disk_blocks= -1;
1117 : /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
1118 2357 : pagecache->blocks_changed= 0;
1119 : }
1120 :
1121 2357 : DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
1122 : "writes: %lu r_requests: %lu reads: %lu",
1123 : pagecache->blocks_used,
1124 : pagecache->global_blocks_changed,
1125 : (ulong) pagecache->global_cache_w_requests,
1126 : (ulong) pagecache->global_cache_write,
1127 : (ulong) pagecache->global_cache_r_requests,
1128 : (ulong) pagecache->global_cache_read));
1129 :
1130 2357 : if (cleanup)
1131 : {
1132 2357 : hash_free(&pagecache->files_in_flush);
1133 2357 : pthread_mutex_destroy(&pagecache->cache_lock);
1134 2357 : pagecache->inited= pagecache->can_be_used= 0;
1135 : PAGECACHE_DEBUG_CLOSE;
1136 : }
1137 2357 : DBUG_VOID_RETURN;
1138 : } /* end_pagecache */
1139 :
1140 :
1141 : /*
1142 : Unlink a block from the chain of dirty/clean blocks
1143 : */
1144 :
1145 : static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block)
1146 3043517 : {
1147 3043517 : if (block->next_changed)
1148 1234330 : block->next_changed->prev_changed= block->prev_changed;
1149 3043517 : *block->prev_changed= block->next_changed;
1150 : }
1151 :
1152 :
1153 : /*
1154 : Link a block into the chain of dirty/clean blocks
1155 : */
1156 :
1157 : static inline void link_changed(PAGECACHE_BLOCK_LINK *block,
1158 : PAGECACHE_BLOCK_LINK **phead)
1159 3076669 : {
1160 3076669 : block->prev_changed= phead;
1161 3076669 : if ((block->next_changed= *phead))
1162 2477758 : (*phead)->prev_changed= &block->next_changed;
1163 3076669 : *phead= block;
1164 : }
1165 :
1166 :
1167 : /*
1168 : Unlink a block from the chain of dirty/clean blocks, if it's asked for,
1169 : and link it to the chain of clean blocks for the specified file
1170 : */
1171 :
1172 : static void link_to_file_list(PAGECACHE *pagecache,
1173 : PAGECACHE_BLOCK_LINK *block,
1174 : PAGECACHE_FILE *file, my_bool unlink_flag)
1175 2221356 : {
1176 2221356 : if (unlink_flag)
1177 2076147 : unlink_changed(block);
1178 2221356 : link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]);
1179 2221356 : if (block->status & PCBLOCK_CHANGED)
1180 : {
1181 836461 : block->status&= ~PCBLOCK_CHANGED;
1182 836461 : block->rec_lsn= LSN_MAX;
1183 836461 : pagecache->blocks_changed--;
1184 836461 : pagecache->global_blocks_changed--;
1185 : }
1186 : }
1187 :
1188 :
1189 : /*
1190 : Unlink a block from the chain of clean blocks for the specified
1191 : file and link it to the chain of dirty blocks for this file
1192 : */
1193 :
1194 : static inline void link_to_changed_list(PAGECACHE *pagecache,
1195 : PAGECACHE_BLOCK_LINK *block)
1196 855313 : {
1197 855313 : unlink_changed(block);
1198 855313 : link_changed(block,
1199 : &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]);
1200 855313 : block->status|=PCBLOCK_CHANGED;
1201 855313 : pagecache->blocks_changed++;
1202 855313 : pagecache->global_blocks_changed++;
1203 : }
1204 :
1205 :
1206 : /*
1207 : Link a block to the LRU chain at the beginning or at the end of
1208 : one of two parts.
1209 :
1210 : SYNOPSIS
1211 : link_block()
1212 : pagecache pointer to a page cache data structure
1213 : block pointer to the block to link to the LRU chain
1214 : hot <-> to link the block into the hot subchain
1215 : at_end <-> to link the block at the end of the subchain
1216 :
1217 : RETURN VALUE
1218 : none
1219 :
1220 : NOTES.
1221 : The LRU chain is represented by a circular list of block structures.
1222 : The list is double-linked of the type (**prev,*next) type.
1223 : The LRU chain is divided into two parts - hot and warm.
1224 : There are two pointers to access the last blocks of these two
1225 : parts. The beginning of the warm part follows right after the
1226 : end of the hot part.
1227 : Only blocks of the warm part can be used for replacement.
1228 : The first block from the beginning of this subchain is always
1229 : taken for eviction (pagecache->last_used->next)
1230 :
1231 : LRU chain: +------+ H O T +------+
1232 : +----| end |----...<----| beg |----+
1233 : | +------+last +------+ |
1234 : v<-link in latest hot (new end) |
1235 : | link in latest warm (new end)->^
1236 : | +------+ W A R M +------+ |
1237 : +----| beg |---->...----| end |----+
1238 : +------+ +------+ins
1239 : first for eviction
1240 : */
1241 :
1242 : static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
1243 : my_bool hot, my_bool at_end)
1244 15648894 : {
1245 : PAGECACHE_BLOCK_LINK *ins;
1246 : PAGECACHE_BLOCK_LINK **ptr_ins;
1247 :
1248 15648894 : PCBLOCK_INFO(block);
1249 15648894 : KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
1250 : #ifdef THREAD
1251 15648894 : if (!hot && pagecache->waiting_for_block.last_thread)
1252 : {
1253 : /* Signal that in the LRU warm sub-chain an available block has appeared */
1254 : struct st_my_thread_var *last_thread=
1255 0 : pagecache->waiting_for_block.last_thread;
1256 0 : struct st_my_thread_var *first_thread= last_thread->next;
1257 0 : struct st_my_thread_var *next_thread= first_thread;
1258 : PAGECACHE_HASH_LINK *hash_link=
1259 0 : (PAGECACHE_HASH_LINK *) first_thread->opt_info;
1260 : struct st_my_thread_var *thread;
1261 : do
1262 : {
1263 0 : thread= next_thread;
1264 0 : next_thread= thread->next;
1265 : /*
1266 : We notify about the event all threads that ask
1267 : for the same page as the first thread in the queue
1268 : */
1269 0 : if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link)
1270 : {
1271 0 : KEYCACHE_DBUG_PRINT("link_block: signal", ("thread: %ld", thread->id));
1272 0 : pagecache_pthread_cond_signal(&thread->suspend);
1273 0 : wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
1274 0 : block->requests++;
1275 : }
1276 : }
1277 0 : while (thread != last_thread);
1278 0 : hash_link->block= block;
1279 0 : KEYCACHE_THREAD_TRACE("link_block: after signaling");
1280 : #if defined(PAGECACHE_DEBUG)
1281 : KEYCACHE_DBUG_PRINT("link_block",
1282 : ("linked,unlinked block: %u status: %x #requests: %u #available: %u",
1283 : PCBLOCK_NUMBER(pagecache, block), block->status,
1284 : block->requests, pagecache->blocks_available));
1285 : #endif
1286 0 : return;
1287 : }
1288 : #else /* THREAD */
1289 : KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread));
1290 : /* Condition not transformed using DeMorgan, to keep the text identical */
1291 : #endif /* THREAD */
1292 15648894 : ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last;
1293 15648894 : ins= *ptr_ins;
1294 15648894 : if (ins)
1295 : {
1296 15633452 : ins->next_used->prev_used= &block->next_used;
1297 15633452 : block->next_used= ins->next_used;
1298 15633452 : block->prev_used= &ins->next_used;
1299 15633452 : ins->next_used= block;
1300 15633452 : if (at_end)
1301 11056206 : *ptr_ins= block;
1302 : }
1303 : else
1304 : {
1305 : /* The LRU chain is empty */
1306 15442 : pagecache->used_last= pagecache->used_ins= block->next_used= block;
1307 15442 : block->prev_used= &block->next_used;
1308 : }
1309 15648894 : KEYCACHE_THREAD_TRACE("link_block");
1310 : #if defined(PAGECACHE_DEBUG)
1311 : pagecache->blocks_available++;
1312 : KEYCACHE_DBUG_PRINT("link_block",
1313 : ("linked block: %u:%1u status: %x #requests: %u #available: %u",
1314 : PCBLOCK_NUMBER(pagecache, block), at_end, block->status,
1315 : block->requests, pagecache->blocks_available));
1316 : KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <=
1317 : pagecache->blocks_used);
1318 : #endif
1319 : }
1320 :
1321 :
1322 : /*
1323 : Unlink a block from the LRU chain
1324 :
1325 : SYNOPSIS
1326 : unlink_block()
1327 : pagecache pointer to a page cache data structure
1328 : block pointer to the block to unlink from the LRU chain
1329 :
1330 : RETURN VALUE
1331 : none
1332 :
1333 : NOTES.
1334 : See NOTES for link_block
1335 : */
1336 :
1337 : static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
1338 15615742 : {
1339 15615742 : DBUG_ENTER("unlink_block");
1340 15615742 : DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block));
1341 15615742 : DBUG_ASSERT(block->next_used != NULL);
1342 15615742 : if (block->next_used == block)
1343 : {
1344 : /* The list contains only one member */
1345 14435 : pagecache->used_last= pagecache->used_ins= NULL;
1346 : }
1347 : else
1348 : {
1349 15601307 : block->next_used->prev_used= block->prev_used;
1350 15601307 : *block->prev_used= block->next_used;
1351 15601307 : if (pagecache->used_last == block)
1352 1306075 : pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
1353 : next_used, block->prev_used);
1354 15601307 : if (pagecache->used_ins == block)
1355 4637857 : pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
1356 : next_used, block->prev_used);
1357 : }
1358 15615742 : block->next_used= NULL;
1359 :
1360 15615742 : KEYCACHE_THREAD_TRACE("unlink_block");
1361 : #if defined(PAGECACHE_DEBUG)
1362 : KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
1363 : pagecache->blocks_available--;
1364 : KEYCACHE_DBUG_PRINT("unlink_block",
1365 : ("unlinked block: 0x%lx (%u) status: %x #requests: %u #available: %u",
1366 : (ulong)block, PCBLOCK_NUMBER(pagecache, block),
1367 : block->status,
1368 : block->requests, pagecache->blocks_available));
1369 : PCBLOCK_INFO(block);
1370 : #endif
1371 15615742 : DBUG_VOID_RETURN;
1372 : }
1373 :
1374 :
1375 : /*
1376 : Register requests for a block
1377 :
1378 : SYNOPSIS
1379 : reg_requests()
1380 : pagecache this page cache reference
1381 : block the block we request reference
1382 : count how many requests we register (it is 1 everywhere)
1383 :
1384 : NOTE
1385 : Registration of request means we are going to use this block so we exclude
1386 : it from the LRU if it is first request
1387 : */
1388 : static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
1389 : int count)
1390 17454953 : {
1391 17454953 : DBUG_ENTER("reg_requests");
1392 17454953 : DBUG_PRINT("enter", ("block: 0x%lx (%u) status: %x reqs: %u",
1393 : (ulong)block, PCBLOCK_NUMBER(pagecache, block),
1394 : block->status, block->requests));
1395 17454953 : PCBLOCK_INFO(block);
1396 17454953 : if (! block->requests)
1397 : /* First request for the block unlinks it */
1398 11033076 : unlink_block(pagecache, block);
1399 17454953 : block->requests+= count;
1400 17454953 : DBUG_VOID_RETURN;
1401 : }
1402 :
1403 :
1404 : /*
1405 : Unregister request for a block
1406 : linking it to the LRU chain if it's the last request
1407 :
1408 : SYNOPSIS
1409 : unreg_request()
1410 : pagecache pointer to a page cache data structure
1411 : block pointer to the block to link to the LRU chain
1412 : at_end <-> to link the block at the end of the LRU chain
1413 :
1414 : RETURN VALUE
1415 : none
1416 :
1417 : NOTES.
1418 : Every linking to the LRU chain decrements by one a special block
1419 : counter (if it's positive). If the at_end parameter is TRUE the block is
1420 : added either at the end of warm sub-chain or at the end of hot sub-chain.
1421 : It is added to the hot subchain if its counter is zero and number of
1422 : blocks in warm sub-chain is not less than some low limit (determined by
1423 : the division_limit parameter). Otherwise the block is added to the warm
1424 : sub-chain. If the at_end parameter is FALSE the block is always added
1425 : at beginning of the warm sub-chain.
1426 : Thus a warm block can be promoted to the hot sub-chain when its counter
1427 : becomes zero for the first time.
1428 : At the same time the block at the very beginning of the hot subchain
1429 : might be moved to the beginning of the warm subchain if it stays untouched
1430 : for a too long time (this time is determined by parameter age_threshold).
1431 : */
1432 :
1433 : static void unreg_request(PAGECACHE *pagecache,
1434 : PAGECACHE_BLOCK_LINK *block, int at_end)
1435 17600162 : {
1436 17600162 : DBUG_ENTER("unreg_request");
1437 17600162 : DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x reqs: %u",
1438 : (ulong)block, PCBLOCK_NUMBER(pagecache, block),
1439 : block->status, block->requests));
1440 17600162 : PCBLOCK_INFO(block);
1441 17600162 : DBUG_ASSERT(block->requests > 0);
1442 17600162 : if (! --block->requests)
1443 : {
1444 : my_bool hot;
1445 11178285 : if (block->hits_left)
1446 1551466 : block->hits_left--;
1447 11178285 : hot= !block->hits_left && at_end &&
1448 : pagecache->warm_blocks > pagecache->min_warm_blocks;
1449 11178285 : if (hot)
1450 : {
1451 0 : if (block->temperature == PCBLOCK_WARM)
1452 0 : pagecache->warm_blocks--;
1453 0 : block->temperature= PCBLOCK_HOT;
1454 0 : KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1455 : pagecache->warm_blocks));
1456 : }
1457 11178285 : link_block(pagecache, block, hot, (my_bool)at_end);
1458 11178285 : block->last_hit_time= pagecache->time;
1459 11178285 : pagecache->time++;
1460 :
1461 11178285 : block= pagecache->used_ins;
1462 : /* Check if we should link a hot block to the warm block */
1463 11178285 : if (block && pagecache->time - block->last_hit_time >
1464 : pagecache->age_threshold)
1465 : {
1466 4470609 : unlink_block(pagecache, block);
1467 4470609 : link_block(pagecache, block, 0, 0);
1468 4470609 : if (block->temperature != PCBLOCK_WARM)
1469 : {
1470 36090 : pagecache->warm_blocks++;
1471 36090 : block->temperature= PCBLOCK_WARM;
1472 : }
1473 4470609 : KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1474 : pagecache->warm_blocks));
1475 : }
1476 : }
1477 17600162 : DBUG_VOID_RETURN;
1478 : }
1479 :
1480 : /*
1481 : Remove a reader of the page in block
1482 : */
1483 :
1484 : static inline void remove_reader(PAGECACHE_BLOCK_LINK *block)
1485 16688269 : {
1486 16688269 : DBUG_ENTER("remove_reader");
1487 16688269 : PCBLOCK_INFO(block);
1488 16688269 : DBUG_ASSERT(block->hash_link->requests > 0);
1489 : #ifdef THREAD
1490 16688269 : if (! --block->hash_link->requests && block->condvar)
1491 0 : pagecache_pthread_cond_signal(block->condvar);
1492 : #else
1493 : --block->hash_link->requests;
1494 : #endif
1495 16688269 : DBUG_VOID_RETURN;
1496 : }
1497 :
1498 :
1499 : /*
1500 : Wait until the last reader of the page in block
1501 : signals on its termination
1502 : */
1503 :
1504 : static inline void wait_for_readers(PAGECACHE *pagecache
1505 : __attribute__((unused)),
1506 : PAGECACHE_BLOCK_LINK *block)
1507 2162294 : {
1508 : #ifdef THREAD
1509 2162294 : struct st_my_thread_var *thread= my_thread_var;
1510 4324588 : while (block->hash_link->requests)
1511 : {
1512 0 : KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
1513 : ("suspend thread: %ld block: %u",
1514 : thread->id, PCBLOCK_NUMBER(pagecache, block)));
1515 0 : block->condvar= &thread->suspend;
1516 0 : pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
1517 0 : block->condvar= NULL;
1518 : }
1519 : #else
1520 : KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0);
1521 : #endif
1522 : }
1523 :
1524 :
1525 : /*
1526 : Add a hash link to a bucket in the hash_table
1527 : */
1528 :
1529 : static inline void link_hash(PAGECACHE_HASH_LINK **start,
1530 : PAGECACHE_HASH_LINK *hash_link)
1531 2195446 : {
1532 2195446 : if (*start)
1533 119382 : (*start)->prev= &hash_link->next;
1534 2195446 : hash_link->next= *start;
1535 2195446 : hash_link->prev= start;
1536 2195446 : *start= hash_link;
1537 : }
1538 :
1539 :
1540 : /*
1541 : Remove a hash link from the hash table
1542 : */
1543 :
1544 : static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
1545 2162294 : {
1546 2162294 : KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
1547 : (uint) hash_link->file.file, (ulong) hash_link->pageno,
1548 : hash_link->requests));
1549 2162294 : KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
1550 2162294 : if ((*hash_link->prev= hash_link->next))
1551 35745 : hash_link->next->prev= hash_link->prev;
1552 2162294 : hash_link->block= NULL;
1553 : #ifdef THREAD
1554 2162294 : if (pagecache->waiting_for_hash_link.last_thread)
1555 : {
1556 : /* Signal that a free hash link has appeared */
1557 : struct st_my_thread_var *last_thread=
1558 0 : pagecache->waiting_for_hash_link.last_thread;
1559 0 : struct st_my_thread_var *first_thread= last_thread->next;
1560 0 : struct st_my_thread_var *next_thread= first_thread;
1561 0 : PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info);
1562 : struct st_my_thread_var *thread;
1563 :
1564 0 : hash_link->file= first_page->file;
1565 0 : DBUG_ASSERT(first_page->pageno < ((ULL(1)) << 40));
1566 0 : hash_link->pageno= first_page->pageno;
1567 : do
1568 : {
1569 : PAGECACHE_PAGE *page;
1570 0 : thread= next_thread;
1571 0 : page= (PAGECACHE_PAGE *) thread->opt_info;
1572 0 : next_thread= thread->next;
1573 : /*
1574 : We notify about the event all threads that ask
1575 : for the same page as the first thread in the queue
1576 : */
1577 0 : if (page->file.file == hash_link->file.file &&
1578 : page->pageno == hash_link->pageno)
1579 : {
1580 0 : KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
1581 0 : pagecache_pthread_cond_signal(&thread->suspend);
1582 0 : wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
1583 : }
1584 : }
1585 0 : while (thread != last_thread);
1586 0 : link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache,
1587 : hash_link->file,
1588 : hash_link->pageno)],
1589 : hash_link);
1590 0 : return;
1591 : }
1592 : #else /* THREAD */
1593 : KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread));
1594 : #endif /* THREAD */
1595 2162294 : hash_link->next= pagecache->free_hash_list;
1596 2162294 : pagecache->free_hash_list= hash_link;
1597 : }
1598 :
1599 :
1600 : /*
1601 : Get the hash link for the page if it is in the cache (do not put the
1602 : page in the cache if it is absent there)
1603 :
1604 : SYNOPSIS
1605 : get_present_hash_link()
1606 : pagecache Pagecache reference
1607 : file file ID
1608 : pageno page number in the file
1609 : start where to put pointer to found hash bucket (for
1610 : direct referring it)
1611 :
1612 : RETURN
1613 : found hashlink pointer
1614 : */
1615 :
1616 : static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
1617 : PAGECACHE_FILE *file,
1618 : pgcache_page_no_t pageno,
1619 : PAGECACHE_HASH_LINK ***start)
1620 20717143 : {
1621 : reg1 PAGECACHE_HASH_LINK *hash_link;
1622 : #if defined(PAGECACHE_DEBUG)
1623 : int cnt;
1624 : #endif
1625 20717143 : DBUG_ENTER("get_present_hash_link");
1626 :
1627 20717143 : KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
1628 : (uint) file->file, (ulong) pageno));
1629 :
1630 : /*
1631 : Find the bucket in the hash table for the pair (file, pageno);
1632 : start contains the head of the bucket list,
1633 : hash_link points to the first member of the list
1634 : */
1635 20717143 : hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache,
1636 : *file, pageno)]);
1637 : #if defined(PAGECACHE_DEBUG)
1638 : cnt= 0;
1639 : #endif
1640 : /* Look for an element for the pair (file, pageno) in the bucket chain */
1641 43592433 : while (hash_link &&
1642 : (hash_link->pageno != pageno ||
1643 : hash_link->file.file != file->file))
1644 : {
1645 2158147 : hash_link= hash_link->next;
1646 : #if defined(PAGECACHE_DEBUG)
1647 : cnt++;
1648 : if (! (cnt <= pagecache->hash_links_used))
1649 : {
1650 : int i;
1651 : for (i=0, hash_link= **start ;
1652 : i < cnt ; i++, hash_link= hash_link->next)
1653 : {
1654 : KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
1655 : (uint) hash_link->file.file, (ulong) hash_link->pageno));
1656 : }
1657 : }
1658 : KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used);
1659 : #endif
1660 : }
1661 20717143 : if (hash_link)
1662 : {
1663 : /* Register the request for the page */
1664 18520889 : hash_link->requests++;
1665 : }
1666 : /*
1667 : As soon as the caller will release the page cache's lock, "hash_link"
1668 : will be potentially obsolete (unusable) information.
1669 : */
1670 20717143 : DBUG_RETURN(hash_link);
1671 : }
1672 :
1673 :
1674 : /*
1675 : Get the hash link for a page
1676 : */
1677 :
1678 : static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache,
1679 : PAGECACHE_FILE *file,
1680 : pgcache_page_no_t pageno)
1681 20706424 : {
1682 : reg1 PAGECACHE_HASH_LINK *hash_link;
1683 : PAGECACHE_HASH_LINK **start;
1684 :
1685 20706424 : KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1686 : (uint) file->file, (ulong) pageno));
1687 :
1688 20706424 : restart:
1689 : /* try to find the page in the cache */
1690 20706424 : hash_link= get_present_hash_link(pagecache, file, pageno,
1691 : &start);
1692 20706424 : if (!hash_link)
1693 : {
1694 : /* There is no hash link in the hash table for the pair (file, pageno) */
1695 2195446 : if (pagecache->free_hash_list)
1696 : {
1697 2076787 : hash_link= pagecache->free_hash_list;
1698 2076787 : pagecache->free_hash_list= hash_link->next;
1699 : }
1700 118659 : else if (pagecache->hash_links_used < pagecache->hash_links)
1701 : {
1702 118659 : hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++];
1703 : }
1704 : else
1705 : {
1706 : #ifdef THREAD
1707 : /* Wait for a free hash link */
1708 0 : struct st_my_thread_var *thread= my_thread_var;
1709 : PAGECACHE_PAGE page;
1710 0 : KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1711 0 : page.file= *file;
1712 0 : page.pageno= pageno;
1713 0 : thread->opt_info= (void *) &page;
1714 0 : wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
1715 0 : KEYCACHE_DBUG_PRINT("get_hash_link: wait",
1716 : ("suspend thread %ld", thread->id));
1717 0 : pagecache_pthread_cond_wait(&thread->suspend,
1718 : &pagecache->cache_lock);
1719 0 : thread->opt_info= NULL;
1720 : #else
1721 : KEYCACHE_DBUG_ASSERT(0);
1722 : #endif
1723 0 : DBUG_PRINT("info", ("restarting..."));
1724 0 : goto restart;
1725 : }
1726 2195446 : hash_link->file= *file;
1727 2195446 : DBUG_ASSERT(pageno < ((ULL(1)) << 40));
1728 2195446 : hash_link->pageno= pageno;
1729 2195446 : link_hash(start, hash_link);
1730 : /* Register the request for the page */
1731 2195446 : hash_link->requests++;
1732 : }
1733 :
1734 20706424 : return hash_link;
1735 : }
1736 :
1737 :
1738 : /*
1739 : Get a block for the file page requested by a pagecache read/write operation;
1740 : If the page is not in the cache return a free block, if there is none
1741 : return the lru block after saving its buffer if the page is dirty.
1742 :
1743 : SYNOPSIS
1744 :
1745 : find_block()
1746 : pagecache pointer to a page cache data structure
1747 : file handler for the file to read page from
1748 : pageno number of the page in the file
1749 : init_hits_left how initialize the block counter for the page
1750 : wrmode <-> get for writing
1751 : reg_req Register request to thye page
1752 : page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1753 :
1754 : RETURN VALUE
1755 : Pointer to the found block if successful, 0 - otherwise
1756 :
1757 : NOTES.
1758 : For the page from file positioned at pageno the function checks whether
1759 : the page is in the key cache specified by the first parameter.
1760 : If this is the case it immediately returns the block.
1761 : If not, the function first chooses a block for this page. If there is
1762 : no not used blocks in the key cache yet, the function takes the block
1763 : at the very beginning of the warm sub-chain. It saves the page in that
1764 : block if it's dirty before returning the pointer to it.
1765 : The function returns in the page_st parameter the following values:
1766 : PAGE_READ - if page already in the block,
1767 : PAGE_TO_BE_READ - if it is to be read yet by the current thread
1768 : WAIT_TO_BE_READ - if it is to be read by another thread
1769 : If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
1770 : It might happen that there are no blocks in LRU chain (in warm part) -
1771 : all blocks are unlinked for some read/write operations. Then the function
1772 : waits until first of this operations links any block back.
1773 : */
1774 :
1775 : static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
1776 : PAGECACHE_FILE *file,
1777 : pgcache_page_no_t pageno,
1778 : int init_hits_left,
1779 : my_bool wrmode,
1780 : my_bool reg_req,
1781 : int *page_st)
1782 20706424 : {
1783 : PAGECACHE_HASH_LINK *hash_link;
1784 : PAGECACHE_BLOCK_LINK *block;
1785 20706424 : int error= 0;
1786 : int page_status;
1787 :
1788 20706424 : DBUG_ENTER("find_block");
1789 20706424 : KEYCACHE_THREAD_TRACE("find_block:begin");
1790 20706424 : DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
1791 : file->file, (ulong) pageno, wrmode));
1792 20706424 : KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d",
1793 : file->file, (ulong) pageno,
1794 : wrmode));
1795 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1796 20706424 : DBUG_EXECUTE("check_pagecache",
1797 : test_key_cache(pagecache, "start of find_block", 0););
1798 : #endif
1799 :
1800 20706424 : restart:
1801 : /* Find the hash link for the requested page (file, pageno) */
1802 20706424 : hash_link= get_hash_link(pagecache, file, pageno);
1803 :
1804 20706424 : page_status= -1;
1805 20706424 : if ((block= hash_link->block) &&
1806 : block->hash_link == hash_link && (block->status & PCBLOCK_READ))
1807 18509483 : page_status= PAGE_READ;
1808 :
1809 20706424 : if (wrmode && pagecache->resize_in_flush)
1810 : {
1811 : /* This is a write request during the flush phase of a resize operation */
1812 :
1813 0 : if (page_status != PAGE_READ)
1814 : {
1815 : /* We don't need the page in the cache: we are going to write on disk */
1816 0 : DBUG_ASSERT(hash_link->requests > 0);
1817 0 : hash_link->requests--;
1818 0 : unlink_hash(pagecache, hash_link);
1819 0 : return 0;
1820 : }
1821 0 : if (!(block->status & PCBLOCK_IN_FLUSH))
1822 : {
1823 0 : DBUG_ASSERT(hash_link->requests > 0);
1824 0 : hash_link->requests--;
1825 : /*
1826 : Remove block to invalidate the page in the block buffer
1827 : as we are going to write directly on disk.
1828 : Although we have an exclusive lock for the updated key part
1829 : the control can be yielded by the current thread as we might
1830 : have unfinished readers of other key parts in the block
1831 : buffer. Still we are guaranteed not to have any readers
1832 : of the key part we are writing into until the block is
1833 : removed from the cache as we set the PCBLOCK_REASSIGNED
1834 : flag (see the code below that handles reading requests).
1835 : */
1836 0 : free_block(pagecache, block);
1837 0 : return 0;
1838 : }
1839 : /* Wait until the page is flushed on disk */
1840 0 : DBUG_ASSERT(hash_link->requests > 0);
1841 0 : hash_link->requests--;
1842 : {
1843 : #ifdef THREAD
1844 0 : struct st_my_thread_var *thread= my_thread_var;
1845 0 : wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
1846 : do
1847 : {
1848 0 : KEYCACHE_DBUG_PRINT("find_block: wait",
1849 : ("suspend thread %ld", thread->id));
1850 0 : pagecache_pthread_cond_wait(&thread->suspend,
1851 : &pagecache->cache_lock);
1852 : }
1853 0 : while(thread->next);
1854 : #else
1855 : KEYCACHE_DBUG_ASSERT(0);
1856 : /*
1857 : Given the use of "resize_in_flush", it seems impossible
1858 : that this whole branch is ever entered in single-threaded case
1859 : because "(wrmode && pagecache->resize_in_flush)" cannot be true.
1860 : TODO: Check this, and then put the whole branch into the
1861 : "#ifdef THREAD" guard.
1862 : */
1863 : #endif
1864 : }
1865 : /* Invalidate page in the block if it has not been done yet */
1866 0 : if (block->status)
1867 0 : free_block(pagecache, block);
1868 0 : return 0;
1869 : }
1870 :
1871 20706424 : if (page_status == PAGE_READ &&
1872 : (block->status & (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)))
1873 : {
1874 : /* This is a request for a page to be removed from cache */
1875 :
1876 0 : KEYCACHE_DBUG_PRINT("find_block",
1877 : ("request for old page in block: %u "
1878 : "wrmode: %d block->status: %d",
1879 : PCBLOCK_NUMBER(pagecache, block), wrmode,
1880 : block->status));
1881 : /*
1882 : Only reading requests can proceed until the old dirty page is flushed,
1883 : all others are to be suspended, then resubmitted
1884 : */
1885 0 : if (!wrmode && !(block->status & PCBLOCK_REASSIGNED))
1886 : {
1887 0 : if (reg_req)
1888 0 : reg_requests(pagecache, block, 1);
1889 : }
1890 : else
1891 : {
1892 0 : DBUG_ASSERT(hash_link->requests > 0);
1893 0 : hash_link->requests--;
1894 0 : KEYCACHE_DBUG_PRINT("find_block",
1895 : ("request waiting for old page to be saved"));
1896 : {
1897 : #ifdef THREAD
1898 0 : struct st_my_thread_var *thread= my_thread_var;
1899 : /* Put the request into the queue of those waiting for the old page */
1900 0 : wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
1901 : /* Wait until the request can be resubmitted */
1902 : do
1903 : {
1904 0 : KEYCACHE_DBUG_PRINT("find_block: wait",
1905 : ("suspend thread %ld", thread->id));
1906 0 : pagecache_pthread_cond_wait(&thread->suspend,
1907 : &pagecache->cache_lock);
1908 : }
1909 0 : while(thread->next);
1910 : #else
1911 : KEYCACHE_DBUG_ASSERT(0);
1912 : /* No parallel requests in single-threaded case */
1913 : #endif
1914 : }
1915 0 : KEYCACHE_DBUG_PRINT("find_block",
1916 : ("request for old page resubmitted"));
1917 0 : DBUG_PRINT("info", ("restarting..."));
1918 : /* Resubmit the request */
1919 0 : goto restart;
1920 : }
1921 : }
1922 : else
1923 : {
1924 : /* This is a request for a new page or for a page not to be removed */
1925 20706424 : if (! block)
1926 : {
1927 : /* No block is assigned for the page yet */
1928 2195446 : if (pagecache->blocks_unused)
1929 : {
1930 145209 : if (pagecache->free_block_list)
1931 : {
1932 : /* There is a block in the free list. */
1933 26776 : block= pagecache->free_block_list;
1934 26776 : pagecache->free_block_list= block->next_used;
1935 26776 : block->next_used= NULL;
1936 : }
1937 : else
1938 : {
1939 : /* There are some never used blocks, take first of them */
1940 118433 : block= &pagecache->block_root[pagecache->blocks_used];
1941 118433 : block->buffer= ADD_TO_PTR(pagecache->block_mem,
1942 : ((ulong) pagecache->blocks_used*
1943 : pagecache->block_size),
1944 : uchar*);
1945 118433 : pagecache->blocks_used++;
1946 : }
1947 145209 : pagecache->blocks_unused--;
1948 145209 : DBUG_ASSERT(block->wlocks == 0);
1949 145209 : DBUG_ASSERT(block->rlocks == 0);
1950 145209 : DBUG_ASSERT(block->rlocks_queue == 0);
1951 145209 : DBUG_ASSERT(block->pins == 0);
1952 145209 : block->status= 0;
1953 : #ifndef DBUG_OFF
1954 145209 : block->type= PAGECACHE_EMPTY_PAGE;
1955 : #endif
1956 145209 : block->requests= 1;
1957 145209 : block->temperature= PCBLOCK_COLD;
1958 145209 : block->hits_left= init_hits_left;
1959 145209 : block->last_hit_time= 0;
1960 145209 : block->rec_lsn= LSN_MAX;
1961 145209 : link_to_file_list(pagecache, block, file, 0);
1962 145209 : block->hash_link= hash_link;
1963 145209 : hash_link->block= block;
1964 145209 : page_status= PAGE_TO_BE_READ;
1965 145209 : DBUG_PRINT("info", ("page to be read set for page 0x%lx",
1966 : (ulong)block));
1967 145209 : KEYCACHE_DBUG_PRINT("find_block",
1968 : ("got free or never used block %u",
1969 : PCBLOCK_NUMBER(pagecache, block)));
1970 : }
1971 : else
1972 : {
1973 : /* There are no never used blocks, use a block from the LRU chain */
1974 :
1975 : /*
1976 : Wait until a new block is added to the LRU chain;
1977 : several threads might wait here for the same page,
1978 : all of them must get the same block
1979 : */
1980 :
1981 : #ifdef THREAD
1982 2050237 : if (! pagecache->used_last)
1983 : {
1984 0 : struct st_my_thread_var *thread= my_thread_var;
1985 0 : thread->opt_info= (void *) hash_link;
1986 0 : wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
1987 : do
1988 : {
1989 0 : KEYCACHE_DBUG_PRINT("find_block: wait",
1990 : ("suspend thread %ld", thread->id));
1991 0 : pagecache_pthread_cond_wait(&thread->suspend,
1992 : &pagecache->cache_lock);
1993 : }
1994 0 : while (thread->next);
1995 0 : thread->opt_info= NULL;
1996 : }
1997 : #else
1998 : KEYCACHE_DBUG_ASSERT(pagecache->used_last);
1999 : #endif
2000 2050237 : block= hash_link->block;
2001 2050237 : if (! block)
2002 : {
2003 : /*
2004 : Take the first block from the LRU chain
2005 : unlinking it from the chain
2006 : */
2007 2050237 : block= pagecache->used_last->next_used;
2008 2050237 : block->hits_left= init_hits_left;
2009 2050237 : block->last_hit_time= 0;
2010 2050237 : if (reg_req)
2011 2050237 : reg_requests(pagecache, block, 1);
2012 2050237 : hash_link->block= block;
2013 : }
2014 2050237 : PCBLOCK_INFO(block);
2015 2050237 : DBUG_ASSERT(block->wlocks == 0);
2016 2050237 : DBUG_ASSERT(block->rlocks == 0);
2017 2050237 : DBUG_ASSERT(block->rlocks_queue == 0);
2018 2050237 : DBUG_ASSERT(block->pins == 0);
2019 :
2020 2050237 : if (block->hash_link != hash_link &&
2021 : ! (block->status & PCBLOCK_IN_SWITCH) )
2022 : {
2023 : /* this is a primary request for a new page */
2024 2050237 : DBUG_ASSERT(block->wlocks == 0);
2025 2050237 : DBUG_ASSERT(block->rlocks == 0);
2026 2050237 : DBUG_ASSERT(block->rlocks_queue == 0);
2027 2050237 : DBUG_ASSERT(block->pins == 0);
2028 2050237 : block->status|= PCBLOCK_IN_SWITCH;
2029 :
2030 2050237 : KEYCACHE_DBUG_PRINT("find_block",
2031 : ("got block %u for new page",
2032 : PCBLOCK_NUMBER(pagecache, block)));
2033 :
2034 2050237 : if (block->status & PCBLOCK_CHANGED)
2035 : {
2036 : /* The block contains a dirty page - push it out of the cache */
2037 :
2038 810551 : KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
2039 :
2040 810551 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2041 : /*
2042 : The call is thread safe because only the current
2043 : thread might change the block->hash_link value
2044 : */
2045 810551 : DBUG_ASSERT(block->pins == 0);
2046 810551 : error= pagecache_fwrite(pagecache,
2047 : &block->hash_link->file,
2048 : block->buffer,
2049 : block->hash_link->pageno,
2050 : block->type,
2051 : pagecache->readwrite_flags);
2052 810551 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2053 810551 : pagecache->global_cache_write++;
2054 : }
2055 :
2056 2050237 : block->status|= PCBLOCK_REASSIGNED;
2057 2050237 : if (block->hash_link)
2058 : {
2059 : /*
2060 : Wait until all pending read requests
2061 : for this page are executed
2062 : (we could have avoided this waiting, if we had read
2063 : a page in the cache in a sweep, without yielding control)
2064 : */
2065 2050237 : wait_for_readers(pagecache, block);
2066 :
2067 : /* Remove the hash link for this page from the hash table */
2068 2050237 : unlink_hash(pagecache, block->hash_link);
2069 : /* All pending requests for this page must be resubmitted */
2070 : #ifdef THREAD
2071 2050237 : if (block->wqueue[COND_FOR_SAVED].last_thread)
2072 0 : wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
2073 : #endif
2074 : }
2075 2050237 : link_to_file_list(pagecache, block, file,
2076 : (my_bool)(block->hash_link ? 1 : 0));
2077 2050237 : PCBLOCK_INFO(block);
2078 2050237 : block->status= error ? PCBLOCK_ERROR : 0;
2079 2050237 : block->error= (int16) my_errno;
2080 : #ifndef DBUG_OFF
2081 2050237 : block->type= PAGECACHE_EMPTY_PAGE;
2082 2050237 : if (error)
2083 0 : my_debug_put_break_here();
2084 : #endif
2085 2050237 : block->hash_link= hash_link;
2086 2050237 : page_status= PAGE_TO_BE_READ;
2087 2050237 : DBUG_PRINT("info", ("page to be read set for page 0x%lx",
2088 : (ulong)block));
2089 :
2090 2050237 : KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2091 2050237 : KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2092 : }
2093 : else
2094 : {
2095 : /* This is for secondary requests for a new page only */
2096 0 : KEYCACHE_DBUG_PRINT("find_block",
2097 : ("block->hash_link: %p hash_link: %p "
2098 : "block->status: %u", block->hash_link,
2099 : hash_link, block->status ));
2100 0 : page_status= (((block->hash_link == hash_link) &&
2101 : (block->status & PCBLOCK_READ)) ?
2102 : PAGE_READ : PAGE_WAIT_TO_BE_READ);
2103 : }
2104 : }
2105 : }
2106 : else
2107 : {
2108 18510978 : if (reg_req)
2109 15264803 : reg_requests(pagecache, block, 1);
2110 18510978 : KEYCACHE_DBUG_PRINT("find_block",
2111 : ("block->hash_link: %p hash_link: %p "
2112 : "block->status: %u", block->hash_link,
2113 : hash_link, block->status ));
2114 18510978 : page_status= (((block->hash_link == hash_link) &&
2115 : (block->status & PCBLOCK_READ)) ?
2116 : PAGE_READ : PAGE_WAIT_TO_BE_READ);
2117 : }
2118 : }
2119 :
2120 20706424 : KEYCACHE_DBUG_ASSERT(page_status != -1);
2121 20706424 : *page_st= page_status;
2122 20706424 : DBUG_PRINT("info",
2123 : ("block: 0x%lx fd: %u pos: %lu block->status: %u page_status: %u",
2124 : (ulong) block, (uint) file->file,
2125 : (ulong) pageno, block->status, (uint) page_status));
2126 20706424 : KEYCACHE_DBUG_PRINT("find_block",
2127 : ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d",
2128 : (ulong) block,
2129 : file->file, (ulong) pageno, block->status,
2130 : page_status));
2131 :
2132 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2133 20706424 : DBUG_EXECUTE("check_pagecache",
2134 : test_key_cache(pagecache, "end of find_block",0););
2135 : #endif
2136 20706424 : KEYCACHE_THREAD_TRACE("find_block:end");
2137 20706424 : DBUG_RETURN(block);
2138 : }
2139 :
2140 :
2141 : static void add_pin(PAGECACHE_BLOCK_LINK *block)
2142 15420425 : {
2143 15420425 : DBUG_ENTER("add_pin");
2144 15420425 : DBUG_PRINT("enter", ("block: 0x%lx pins: %u",
2145 : (ulong) block,
2146 : block->pins));
2147 15420425 : PCBLOCK_INFO(block);
2148 15420425 : block->pins++;
2149 : #ifndef DBUG_OFF
2150 : {
2151 : PAGECACHE_PIN_INFO *info=
2152 15420425 : (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0));
2153 15420425 : info->thread= my_thread_var;
2154 15420425 : info_link(&block->pin_list, info);
2155 : }
2156 : #endif
2157 15420425 : DBUG_VOID_RETURN;
2158 : }
2159 :
2160 : static void remove_pin(PAGECACHE_BLOCK_LINK *block, my_bool any
2161 : #ifdef DBUG_OFF
2162 : __attribute__((unused))
2163 : #endif
2164 : )
2165 15420425 : {
2166 15420425 : DBUG_ENTER("remove_pin");
2167 15420425 : DBUG_PRINT("enter", ("block: 0x%lx pins: %u any: %d",
2168 : (ulong) block,
2169 : block->pins, (int)any));
2170 15420425 : PCBLOCK_INFO(block);
2171 15420425 : DBUG_ASSERT(block->pins > 0);
2172 15420425 : block->pins--;
2173 : #ifndef DBUG_OFF
2174 : {
2175 15420425 : PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var, any);
2176 15420425 : DBUG_ASSERT(info != 0);
2177 15420425 : info_unlink(info);
2178 15420425 : my_free(info, MYF(0));
2179 : }
2180 : #endif
2181 15420425 : DBUG_VOID_RETURN;
2182 : }
2183 : #ifndef DBUG_OFF
2184 : static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
2185 17509389 : {
2186 : PAGECACHE_LOCK_INFO *info=
2187 17509389 : (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0));
2188 17509389 : info->thread= my_thread_var;
2189 17509389 : info->write_lock= wl;
2190 17509389 : info_link((PAGECACHE_PIN_INFO **)&block->lock_list,
2191 : (PAGECACHE_PIN_INFO *)info);
2192 : }
2193 : static void info_remove_lock(PAGECACHE_BLOCK_LINK *block)
2194 17509389 : {
2195 : PAGECACHE_LOCK_INFO *info=
2196 : (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
2197 17509389 : my_thread_var, FALSE);
2198 17509389 : DBUG_ASSERT(info != 0);
2199 17509389 : info_unlink((PAGECACHE_PIN_INFO *)info);
2200 17509389 : my_free(info, MYF(0));
2201 : }
2202 : static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
2203 460464 : {
2204 : PAGECACHE_LOCK_INFO *info=
2205 : (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
2206 460464 : my_thread_var, FALSE);
2207 460464 : DBUG_ASSERT(info != 0);
2208 460464 : DBUG_ASSERT(info->write_lock != wl);
2209 460464 : info->write_lock= wl;
2210 : }
2211 : #else
2212 : #define info_add_lock(B,W)
2213 : #define info_remove_lock(B)
2214 : #define info_change_lock(B,W)
2215 : #endif
2216 :
2217 :
2218 : /**
2219 : @brief waiting for lock for read and write lock
2220 :
2221 : @parem pagecache pointer to a page cache data structure
2222 : @parem block the block to work with
2223 : @param file file of the block when it was locked
2224 : @param pageno page number of the block when it was locked
2225 : @param lock_type MY_PTHREAD_LOCK_READ or MY_PTHREAD_LOCK_WRITE
2226 :
2227 : @retval 0 OK
2228 : @retval 1 Can't lock this block, need retry
2229 : */
2230 :
2231 : static my_bool pagecache_wait_lock(PAGECACHE *pagecache,
2232 : PAGECACHE_BLOCK_LINK *block,
2233 : PAGECACHE_FILE file,
2234 : pgcache_page_no_t pageno,
2235 : uint lock_type)
2236 166643 : {
2237 : /* Lock failed we will wait */
2238 : #ifdef THREAD
2239 166643 : struct st_my_thread_var *thread= my_thread_var;
2240 166643 : DBUG_ENTER("pagecache_wait_lock");
2241 166643 : DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block));
2242 166643 : thread->lock_type= lock_type;
2243 166643 : wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
2244 166643 : dec_counter_for_resize_op(pagecache);
2245 : do
2246 : {
2247 166643 : KEYCACHE_DBUG_PRINT("get_wrlock: wait",
2248 : ("suspend thread %ld", thread->id));
2249 166643 : pagecache_pthread_cond_wait(&thread->suspend,
2250 : &pagecache->cache_lock);
2251 : }
2252 166643 : while(thread->next);
2253 : #else
2254 : DBUG_ASSERT(0);
2255 : #endif
2256 166643 : PCBLOCK_INFO(block);
2257 166643 : if ((block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH)) ||
2258 : file.file != block->hash_link->file.file ||
2259 : pageno != block->hash_link->pageno)
2260 : {
2261 0 : DBUG_PRINT("info", ("the block 0x%lx changed => need retry "
2262 : "status: %x files %d != %d or pages %lu != %lu",
2263 : (ulong)block, block->status,
2264 : file.file, block->hash_link->file.file,
2265 : (ulong) pageno, (ulong) block->hash_link->pageno));
2266 0 : DBUG_RETURN(1);
2267 : }
2268 166643 : DBUG_RETURN(0);
2269 : }
2270 :
2271 : /**
2272 : @brief Put on the block write lock
2273 :
2274 : @parem pagecache pointer to a page cache data structure
2275 : @parem block the block to work with
2276 :
2277 : @note We have loose scheme for locking by the same thread:
2278 : * Downgrade to read lock if no other locks are taken
2279 : * Our scheme of locking allow for the same thread
2280 : - the same kind of lock
2281 : - taking read lock if write lock present
2282 : - downgrading to read lock if still other place the same
2283 : thread keep write lock
2284 : * But unlock operation number should be the same to lock operation.
2285 : * If we try to get read lock having active write locks we put read
2286 : locks to queue, and as soon as write lock(s) gone the read locks
2287 : from queue came in force.
2288 : * If read lock is unlocked earlier then it came to force it
2289 : just removed from the queue
2290 :
2291 : @retval 0 OK
2292 : @retval 1 Can't lock this block, need retry
2293 : */
2294 :
2295 : static my_bool get_wrlock(PAGECACHE *pagecache,
2296 : PAGECACHE_BLOCK_LINK *block)
2297 8231171 : {
2298 8231171 : PAGECACHE_FILE file= block->hash_link->file;
2299 8231171 : pgcache_page_no_t pageno= block->hash_link->pageno;
2300 8231171 : pthread_t locker= pthread_self();
2301 8231171 : DBUG_ENTER("get_wrlock");
2302 8231171 : DBUG_PRINT("info", ("the block 0x%lx "
2303 : "files %d(%d) pages %lu(%lu)",
2304 : (ulong) block,
2305 : file.file, block->hash_link->file.file,
2306 : (ulong) pageno, (ulong) block->hash_link->pageno));
2307 8231171 : PCBLOCK_INFO(block);
2308 : /*
2309 : We assume that the same thread will try write lock on block on which it
2310 : has already read lock.
2311 : */
2312 16612062 : while ((block->wlocks && !pthread_equal(block->write_locker, locker)) ||
2313 : block->rlocks)
2314 : {
2315 : /* Lock failed we will wait */
2316 149720 : if (pagecache_wait_lock(pagecache, block, file, pageno,
2317 : MY_PTHREAD_LOCK_WRITE))
2318 0 : DBUG_RETURN(1);
2319 : }
2320 : /* we are doing it by global cache mutex protection, so it is OK */
2321 8231171 : block->wlocks++;
2322 8231171 : block->write_locker= locker;
2323 8231171 : DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block));
2324 8231171 : DBUG_RETURN(0);
2325 : }
2326 :
2327 :
2328 : /*
2329 : @brief Put on the block read lock
2330 :
2331 : @param pagecache pointer to a page cache data structure
2332 : @param block the block to work with
2333 : @param user_file Unique handler per handler file. Used to check if
2334 : we request many write locks withing the same
2335 : statement
2336 :
2337 : @note see note for get_wrlock().
2338 :
2339 : @retvalue 0 OK
2340 : @retvalue 1 Can't lock this block, need retry
2341 : */
2342 :
2343 : static my_bool get_rdlock(PAGECACHE *pagecache,
2344 : PAGECACHE_BLOCK_LINK *block)
2345 9278218 : {
2346 9278218 : PAGECACHE_FILE file= block->hash_link->file;
2347 9278218 : pgcache_page_no_t pageno= block->hash_link->pageno;
2348 9278218 : pthread_t locker= pthread_self();
2349 9278218 : DBUG_ENTER("get_rdlock");
2350 9278218 : DBUG_PRINT("info", ("the block 0x%lx "
2351 : "files %d(%d) pages %lu(%lu)",
2352 : (ulong) block,
2353 : file.file, block->hash_link->file.file,
2354 : (ulong) pageno, (ulong) block->hash_link->pageno));
2355 9278218 : PCBLOCK_INFO(block);
2356 18573359 : while (block->wlocks && !pthread_equal(block->write_locker, locker))
2357 : {
2358 : /* Lock failed we will wait */
2359 16923 : if (pagecache_wait_lock(pagecache, block, file, pageno,
2360 : MY_PTHREAD_LOCK_READ))
2361 0 : DBUG_RETURN(1);
2362 : }
2363 : /* we are doing it by global cache mutex protection, so it is OK */
2364 9278218 : if (block->wlocks)
2365 : {
2366 0 : DBUG_ASSERT(pthread_equal(block->write_locker, locker));
2367 0 : block->rlocks_queue++;
2368 0 : DBUG_PRINT("info", ("RD lock put into queue, block 0x%lx", (ulong)block));
2369 : }
2370 : else
2371 : {
2372 9278218 : block->rlocks++;
2373 9278218 : DBUG_PRINT("info", ("RD lock set, block 0x%lx", (ulong)block));
2374 : }
2375 9278218 : DBUG_RETURN(0);
2376 : }
2377 :
2378 :
2379 : /*
2380 : @brief Remove write lock from the block
2381 :
2382 : @param pagecache pointer to a page cache data structure
2383 : @param block the block to work with
2384 : @param read_lock downgrade to read lock
2385 :
2386 : @note see note for get_wrlock().
2387 : */
2388 :
2389 : static void release_wrlock(PAGECACHE_BLOCK_LINK *block, my_bool read_lock)
2390 8231171 : {
2391 8231171 : DBUG_ENTER("release_wrlock");
2392 8231171 : PCBLOCK_INFO(block);
2393 8231171 : DBUG_ASSERT(block->wlocks > 0);
2394 8231171 : DBUG_ASSERT(block->rlocks == 0);
2395 8231171 : DBUG_ASSERT(block->pins > 0);
2396 8231171 : if (read_lock)
2397 460464 : block->rlocks_queue++;
2398 8231171 : if (block->wlocks == 1)
2399 : {
2400 8195435 : block->rlocks= block->rlocks_queue;
2401 8195435 : block->rlocks_queue= 0;
2402 : }
2403 8231171 : block->wlocks--;
2404 8231171 : if (block->wlocks > 0)
2405 35736 : DBUG_VOID_RETURN; /* Multiple write locked */
2406 8195435 : DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block));
2407 : #ifdef THREAD
2408 : /* release all threads waiting for read lock or one waiting for write */
2409 8195435 : if (block->wqueue[COND_FOR_WRLOCK].last_thread)
2410 163153 : wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
2411 : #endif
2412 8195435 : PCBLOCK_INFO(block);
2413 8195435 : DBUG_VOID_RETURN;
2414 : }
2415 :
2416 : /*
2417 : @brief Remove read lock from the block
2418 :
2419 : @param pagecache pointer to a page cache data structure
2420 : @param block the block to work with
2421 :
2422 : @note see note for get_wrlock().
2423 : */
2424 :
2425 : static void release_rdlock(PAGECACHE_BLOCK_LINK *block)
2426 9738682 : {
2427 9738682 : DBUG_ENTER("release_wrlock");
2428 9738682 : PCBLOCK_INFO(block);
2429 9738682 : if (block->wlocks)
2430 : {
2431 59 : DBUG_ASSERT(pthread_equal(block->write_locker, pthread_self()));
2432 59 : DBUG_ASSERT(block->rlocks == 0);
2433 59 : DBUG_ASSERT(block->rlocks_queue > 0);
2434 59 : block->rlocks_queue--;
2435 59 : DBUG_PRINT("info", ("RD lock queue decreased, block 0x%lx", (ulong)block));
2436 59 : DBUG_VOID_RETURN;
2437 : }
2438 9738623 : DBUG_ASSERT(block->rlocks > 0);
2439 9738623 : DBUG_ASSERT(block->rlocks_queue == 0);
2440 9738623 : block->rlocks--;
2441 9738623 : DBUG_PRINT("info", ("RD lock decreased, block 0x%lx", (ulong)block));
2442 9738623 : if (block->rlocks > 0)
2443 6197578 : DBUG_VOID_RETURN; /* Multiple write locked */
2444 3541045 : DBUG_PRINT("info", ("RD lock reset, block 0x%lx", (ulong)block));
2445 : #ifdef THREAD
2446 : /* release all threads waiting for read lock or one waiting for write */
2447 3541045 : if (block->wqueue[COND_FOR_WRLOCK].last_thread)
2448 0 : wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
2449 : #endif
2450 3541045 : PCBLOCK_INFO(block);
2451 3541045 : DBUG_VOID_RETURN;
2452 : }
2453 :
2454 : /**
2455 : @brief Try to lock/unlock and pin/unpin the block
2456 :
2457 : @param pagecache pointer to a page cache data structure
2458 : @param block the block to work with
2459 : @param lock lock change mode
2460 : @param pin pinchange mode
2461 : @param file File handler requesting pin
2462 : @param any allow unpinning block pinned by any thread; possible
2463 : only if not locked, see pagecache_unlock_by_link()
2464 :
2465 : @retval 0 OK
2466 : @retval 1 Try to lock the block failed
2467 : */
2468 :
2469 : static my_bool make_lock_and_pin(PAGECACHE *pagecache,
2470 : PAGECACHE_BLOCK_LINK *block,
2471 : enum pagecache_page_lock lock,
2472 : enum pagecache_page_pin pin,
2473 : my_bool any)
2474 53359379 : {
2475 53359379 : DBUG_ENTER("make_lock_and_pin");
2476 :
2477 53359379 : DBUG_PRINT("enter", ("block: 0x%lx", (ulong)block));
2478 : #ifndef DBUG_OFF
2479 53359379 : if (block)
2480 : {
2481 53359379 : DBUG_PRINT("enter", ("block: 0x%lx (%u) wrlocks: %u rdlocks: %u "
2482 : "rdlocks_q: %u pins: %u lock: %s pin: %s any %d",
2483 : (ulong)block, PCBLOCK_NUMBER(pagecache, block),
2484 : block->wlocks, block->rlocks, block->rlocks_queue,
2485 : block->pins,
2486 : page_cache_page_lock_str[lock],
2487 : page_cache_page_pin_str[pin], (int)any));
2488 53359379 : PCBLOCK_INFO(block);
2489 : }
2490 : #endif
2491 :
2492 53359379 : DBUG_ASSERT(!any ||
2493 : ((lock == PAGECACHE_LOCK_LEFT_UNLOCKED) &&
2494 : (pin == PAGECACHE_UNPIN)));
2495 :
2496 53359379 : switch (lock) {
2497 : case PAGECACHE_LOCK_WRITE: /* free -> write */
2498 : /* Writelock and pin the buffer */
2499 8231171 : if (get_wrlock(pagecache, block))
2500 : {
2501 : /* Couldn't lock because block changed status => need retry */
2502 8231171 : goto retry;
2503 : }
2504 :
2505 : /* The cache is locked so nothing afraid of */
2506 8231171 : add_pin(block);
2507 8231171 : info_add_lock(block, 1);
2508 8231171 : break;
2509 : case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */
2510 : case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */
2511 : /* Removes write lock and puts read lock */
2512 8231171 : release_wrlock(block, lock == PAGECACHE_LOCK_WRITE_TO_READ);
2513 : /* fall through */
2514 : case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */
2515 17969853 : if (lock == PAGECACHE_LOCK_READ_UNLOCK)
2516 9738682 : release_rdlock(block);
2517 : /* fall through */
2518 : case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */
2519 25159109 : if (pin == PAGECACHE_UNPIN)
2520 : {
2521 15420423 : remove_pin(block, FALSE);
2522 : }
2523 25159109 : if (lock == PAGECACHE_LOCK_WRITE_TO_READ)
2524 : {
2525 460464 : info_change_lock(block, 0);
2526 : }
2527 24698645 : else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2528 : lock == PAGECACHE_LOCK_READ_UNLOCK)
2529 : {
2530 17509389 : info_remove_lock(block);
2531 : }
2532 : break;
2533 : case PAGECACHE_LOCK_READ: /* free -> read */
2534 9278218 : if (get_rdlock(pagecache, block))
2535 : {
2536 : /* Couldn't lock because block changed status => need retry */
2537 9278218 : goto retry;
2538 : }
2539 :
2540 9278218 : if (pin == PAGECACHE_PIN)
2541 : {
2542 : /* The cache is locked so nothing afraid off */
2543 7189254 : add_pin(block);
2544 : }
2545 9278218 : info_add_lock(block, 0);
2546 9278218 : break;
2547 : case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */
2548 2 : if (pin == PAGECACHE_UNPIN)
2549 : {
2550 2 : remove_pin(block, any);
2551 : }
2552 : /* fall through */
2553 : case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */
2554 : break; /* do nothing */
2555 : default:
2556 0 : DBUG_ASSERT(0); /* Never should happened */
2557 : }
2558 :
2559 : #ifndef DBUG_OFF
2560 53359379 : if (block)
2561 53359379 : PCBLOCK_INFO(block);
2562 : #endif
2563 53359379 : DBUG_RETURN(0);
2564 0 : retry:
2565 0 : DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block));
2566 0 : PCBLOCK_INFO(block);
2567 0 : DBUG_ASSERT(block->hash_link->requests > 0);
2568 0 : block->hash_link->requests--;
2569 0 : PCBLOCK_INFO(block);
2570 0 : DBUG_RETURN(1);
2571 :
2572 : }
2573 :
2574 :
2575 : /*
2576 : Read into a key cache block buffer from disk.
2577 :
2578 : SYNOPSIS
2579 :
2580 : read_block()
2581 : pagecache pointer to a page cache data structure
2582 : block block to which buffer the data is to be read
2583 : primary <-> the current thread will read the data
2584 :
2585 : RETURN VALUE
2586 : None
2587 :
2588 : NOTES.
2589 : The function either reads a page data from file to the block buffer,
2590 : or waits until another thread reads it. What page to read is determined
2591 : by a block parameter - reference to a hash link for this page.
2592 : If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
2593 :
2594 : On entry cache_lock is locked
2595 : */
2596 :
2597 : static void read_block(PAGECACHE *pagecache,
2598 : PAGECACHE_BLOCK_LINK *block,
2599 : my_bool primary)
2600 1446199 : {
2601 :
2602 1446199 : DBUG_ENTER("read_block");
2603 1446199 : DBUG_PRINT("enter", ("read block: 0x%lx primary: %d",
2604 : (ulong)block, primary));
2605 1446199 : if (primary)
2606 : {
2607 : size_t error;
2608 : /*
2609 : This code is executed only by threads
2610 : that submitted primary requests
2611 : */
2612 :
2613 1446199 : pagecache->global_cache_read++;
2614 : /* Page is not in buffer yet, is to be read from disk */
2615 1446199 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2616 : /*
2617 : Here other threads may step in and register as secondary readers.
2618 : They will register in block->wqueue[COND_FOR_REQUESTED].
2619 : */
2620 1446199 : error= pagecache_fread(pagecache, &block->hash_link->file,
2621 : block->buffer,
2622 : block->hash_link->pageno,
2623 : pagecache->readwrite_flags);
2624 1446199 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2625 1446199 : if (error)
2626 : {
2627 1495 : block->status|= PCBLOCK_ERROR;
2628 1495 : block->error= (int16) my_errno;
2629 1495 : my_debug_put_break_here();
2630 : }
2631 : else
2632 : {
2633 1444704 : block->status|= PCBLOCK_READ;
2634 1444704 : if ((*block->hash_link->file.read_callback)(block->buffer,
2635 : block->hash_link->pageno,
2636 : block->hash_link->
2637 : file.callback_data))
2638 : {
2639 128 : DBUG_PRINT("error", ("read callback problem"));
2640 128 : block->status|= PCBLOCK_ERROR;
2641 128 : block->error= (int16) my_errno;
2642 128 : my_debug_put_break_here();
2643 : }
2644 : }
2645 1446199 : DBUG_PRINT("read_block",
2646 : ("primary request: new page in cache"));
2647 : /* Signal that all pending requests for this page now can be processed */
2648 : #ifdef THREAD
2649 1446199 : if (block->wqueue[COND_FOR_REQUESTED].last_thread)
2650 0 : wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
2651 : #endif
2652 : }
2653 : else
2654 : {
2655 : /*
2656 : This code is executed only by threads
2657 : that submitted secondary requests
2658 : */
2659 :
2660 : #ifdef THREAD
2661 0 : struct st_my_thread_var *thread= my_thread_var;
2662 : /* Put the request into a queue and wait until it can be processed */
2663 0 : wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
2664 : do
2665 : {
2666 0 : DBUG_PRINT("read_block: wait",
2667 : ("suspend thread %ld", thread->id));
2668 0 : pagecache_pthread_cond_wait(&thread->suspend,
2669 : &pagecache->cache_lock);
2670 : }
2671 0 : while (thread->next);
2672 : #else
2673 : KEYCACHE_DBUG_ASSERT(0);
2674 : /* No parallel requests in single-threaded case */
2675 : #endif
2676 0 : DBUG_PRINT("read_block",
2677 : ("secondary request: new page in cache"));
2678 : }
2679 1446199 : DBUG_VOID_RETURN;
2680 : }
2681 :
2682 :
2683 : /**
2684 : @brief Set LSN on the page to the given one if the given LSN is bigger
2685 :
2686 : @param pagecache pointer to a page cache data structure
2687 : @param lsn LSN to set
2688 : @param block block to check and set
2689 : */
2690 :
2691 : static void check_and_set_lsn(PAGECACHE *pagecache,
2692 : LSN lsn, PAGECACHE_BLOCK_LINK *block)
2693 2632325 : {
2694 : LSN old;
2695 2632325 : DBUG_ENTER("check_and_set_lsn");
2696 : /*
2697 : In recovery, we can _ma_unpin_all_pages() to put a LSN on page, though
2698 : page would be PAGECACHE_PLAIN_PAGE (transactionality temporarily disabled
2699 : to not log REDOs).
2700 : */
2701 2632325 : DBUG_ASSERT((block->type == PAGECACHE_LSN_PAGE) || maria_in_recovery);
2702 2632325 : old= lsn_korr(block->buffer);
2703 2632325 : DBUG_PRINT("info", ("old lsn: (%lu, 0x%lx) new lsn: (%lu, 0x%lx)",
2704 : LSN_IN_PARTS(old), LSN_IN_PARTS(lsn)));
2705 2632325 : if (cmp_translog_addr(lsn, old) > 0)
2706 : {
2707 :
2708 2596604 : DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE);
2709 2596604 : lsn_store(block->buffer, lsn);
2710 : /* we stored LSN in page so we dirtied it */
2711 2596604 : if (!(block->status & PCBLOCK_CHANGED))
2712 2135 : link_to_changed_list(pagecache, block);
2713 : }
2714 2632325 : DBUG_VOID_RETURN;
2715 : }
2716 :
2717 :
2718 : /**
2719 : @brief Unlock/unpin page and put LSN stamp if it need
2720 :
2721 : @param pagecache pointer to a page cache data structure
2722 : @pagam file handler for the file for the block of data to be read
2723 : @param pageno number of the block of data in the file
2724 : @param lock lock change
2725 : @param pin pin page
2726 : @param first_REDO_LSN_for_page do not set it if it is zero
2727 : @param lsn if it is not LSN_IMPOSSIBLE (0) and it
2728 : is bigger then LSN on the page it will be written on
2729 : the page
2730 : @param was_changed should be true if the page was write locked with
2731 : direct link giving and the page was changed
2732 :
2733 : @note
2734 : Pininig uses requests registration mechanism it works following way:
2735 : | beginnig | ending |
2736 : | of func. | of func. |
2737 : ----------------------------+-------------+---------------+
2738 : PAGECACHE_PIN_LEFT_PINNED | - | - |
2739 : PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request |
2740 : PAGECACHE_PIN | reg request | - |
2741 : PAGECACHE_UNPIN | - | unreg request |
2742 :
2743 :
2744 : */
2745 :
2746 : void pagecache_unlock(PAGECACHE *pagecache,
2747 : PAGECACHE_FILE *file,
2748 : pgcache_page_no_t pageno,
2749 : enum pagecache_page_lock lock,
2750 : enum pagecache_page_pin pin,
2751 : LSN first_REDO_LSN_for_page,
2752 : LSN lsn, my_bool was_changed)
2753 4 : {
2754 : PAGECACHE_BLOCK_LINK *block;
2755 : int page_st;
2756 4 : DBUG_ENTER("pagecache_unlock");
2757 4 : DBUG_PRINT("enter", ("fd: %u page: %lu %s %s",
2758 : (uint) file->file, (ulong) pageno,
2759 : page_cache_page_lock_str[lock],
2760 : page_cache_page_pin_str[pin]));
2761 : /* we do not allow any lock/pin increasing here */
2762 4 : DBUG_ASSERT(pin != PAGECACHE_PIN);
2763 4 : DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
2764 4 : DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
2765 :
2766 4 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2767 : /*
2768 : As soon as we keep lock cache can be used, and we have lock because want
2769 : to unlock.
2770 : */
2771 4 : DBUG_ASSERT(pagecache->can_be_used);
2772 :
2773 4 : inc_counter_for_resize_op(pagecache);
2774 : /* See NOTE for pagecache_unlock about registering requests */
2775 4 : block= find_block(pagecache, file, pageno, 0, 0,
2776 : pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st);
2777 4 : PCBLOCK_INFO(block);
2778 4 : DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
2779 4 : if (first_REDO_LSN_for_page)
2780 : {
2781 0 : DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK);
2782 0 : DBUG_ASSERT(pin == PAGECACHE_UNPIN);
2783 0 : pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
2784 : }
2785 4 : if (lsn != LSN_IMPOSSIBLE)
2786 0 : check_and_set_lsn(pagecache, lsn, block);
2787 :
2788 : /* if we lock for write we must link the block to changed blocks */
2789 4 : DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
2790 : (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2791 : lock == PAGECACHE_LOCK_WRITE_TO_READ ||
2792 : lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
2793 : /*
2794 : if was_changed then status should be PCBLOCK_DIRECT_W or marked
2795 : as dirty
2796 : */
2797 4 : DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
2798 : (block->status & PCBLOCK_CHANGED));
2799 4 : if ((block->status & PCBLOCK_DIRECT_W) &&
2800 : (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2801 : lock == PAGECACHE_LOCK_WRITE_TO_READ))
2802 : {
2803 0 : if (!(block->status & PCBLOCK_CHANGED) && was_changed)
2804 0 : link_to_changed_list(pagecache, block);
2805 0 : block->status&= ~PCBLOCK_DIRECT_W;
2806 0 : DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
2807 : (ulong) block));
2808 : }
2809 :
2810 4 : if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
2811 : {
2812 0 : DBUG_ASSERT(0); /* should not happend */
2813 : }
2814 :
2815 4 : remove_reader(block);
2816 : /*
2817 : Link the block into the LRU chain if it's the last submitted request
2818 : for the block and block will not be pinned.
2819 : See NOTE for pagecache_unlock about registering requests.
2820 : */
2821 4 : if (pin != PAGECACHE_PIN_LEFT_PINNED)
2822 4 : unreg_request(pagecache, block, 1);
2823 :
2824 4 : dec_counter_for_resize_op(pagecache);
2825 :
2826 4 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2827 :
2828 4 : DBUG_VOID_RETURN;
2829 : }
2830 :
2831 :
2832 : /*
2833 : Unpin page
2834 :
2835 : SYNOPSIS
2836 : pagecache_unpin()
2837 : pagecache pointer to a page cache data structure
2838 : file handler for the file for the block of data to be read
2839 : pageno number of the block of data in the file
2840 : lsn if it is not LSN_IMPOSSIBLE (0) and it
2841 : is bigger then LSN on the page it will be written on
2842 : the page
2843 : */
2844 :
2845 : void pagecache_unpin(PAGECACHE *pagecache,
2846 : PAGECACHE_FILE *file,
2847 : pgcache_page_no_t pageno,
2848 : LSN lsn)
2849 0 : {
2850 : PAGECACHE_BLOCK_LINK *block;
2851 : int page_st;
2852 0 : DBUG_ENTER("pagecache_unpin");
2853 0 : DBUG_PRINT("enter", ("fd: %u page: %lu",
2854 : (uint) file->file, (ulong) pageno));
2855 0 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2856 : /*
2857 : As soon as we keep lock cache can be used, and we have lock bacause want
2858 : aunlock.
2859 : */
2860 0 : DBUG_ASSERT(pagecache->can_be_used);
2861 :
2862 0 : inc_counter_for_resize_op(pagecache);
2863 : /* See NOTE for pagecache_unlock about registering requests */
2864 0 : block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
2865 0 : DBUG_ASSERT(block != 0);
2866 0 : DBUG_ASSERT(page_st == PAGE_READ);
2867 : /* we can't unpin such page without unlock */
2868 0 : DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
2869 :
2870 0 : if (lsn != LSN_IMPOSSIBLE)
2871 0 : check_and_set_lsn(pagecache, lsn, block);
2872 :
2873 : /*
2874 : we can just unpin only with keeping read lock because:
2875 : a) we can't pin without any lock
2876 : b) we can't unpin keeping write lock
2877 : */
2878 0 : if (make_lock_and_pin(pagecache, block,
2879 : PAGECACHE_LOCK_LEFT_READLOCKED,
2880 : PAGECACHE_UNPIN, FALSE))
2881 0 : DBUG_ASSERT(0); /* should not happend */
2882 :
2883 0 : remove_reader(block);
2884 : /*
2885 : Link the block into the LRU chain if it's the last submitted request
2886 : for the block and block will not be pinned.
2887 : See NOTE for pagecache_unlock about registering requests
2888 : */
2889 0 : unreg_request(pagecache, block, 1);
2890 :
2891 0 : dec_counter_for_resize_op(pagecache);
2892 :
2893 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2894 :
2895 0 : DBUG_VOID_RETURN;
2896 : }
2897 :
2898 :
2899 : /**
2900 : @brief Unlock/unpin page and put LSN stamp if it need
2901 : (uses direct block/page pointer)
2902 :
2903 : @param pagecache pointer to a page cache data structure
2904 : @param link direct link to page (returned by read or write)
2905 : @param lock lock change
2906 : @param pin pin page
2907 : @param first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0)
2908 : @param lsn if it is not LSN_IMPOSSIBLE and it is bigger then
2909 : LSN on the page it will be written on the page
2910 : @param was_changed should be true if the page was write locked with
2911 : direct link giving and the page was changed
2912 : @param any allow unpinning block pinned by any thread; possible
2913 : only if not locked
2914 :
2915 : @note 'any' is a hack so that _ma_bitmap_unpin_all() is allowed to unpin
2916 : non-locked bitmap pages pinned by other threads. Because it always uses
2917 : PAGECACHE_LOCK_LEFT_UNLOCKED and PAGECACHE_UNPIN
2918 : (see write_changed_bitmap()), the hack is limited to these conditions.
2919 : */
2920 :
2921 : void pagecache_unlock_by_link(PAGECACHE *pagecache,
2922 : PAGECACHE_BLOCK_LINK *block,
2923 : enum pagecache_page_lock lock,
2924 : enum pagecache_page_pin pin,
2925 : LSN first_REDO_LSN_for_page,
2926 : LSN lsn, my_bool was_changed,
2927 : my_bool any)
2928 14438709 : {
2929 14438709 : DBUG_ENTER("pagecache_unlock_by_link");
2930 14438709 : DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu changed: %d %s %s",
2931 : (ulong) block,
2932 : (uint) block->hash_link->file.file,
2933 : (ulong) block->hash_link->pageno, was_changed,
2934 : page_cache_page_lock_str[lock],
2935 : page_cache_page_pin_str[pin]));
2936 : /*
2937 : We do not allow any lock/pin increasing here and page can't be
2938 : unpinned because we use direct link.
2939 : */
2940 14438709 : DBUG_ASSERT(pin != PAGECACHE_PIN);
2941 14438709 : DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED);
2942 14438709 : DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
2943 14438709 : DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
2944 14438709 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2945 14438709 : if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
2946 : lock == PAGECACHE_LOCK_READ_UNLOCK)
2947 : {
2948 0 : if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
2949 0 : DBUG_ASSERT(0); /* should not happend */
2950 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2951 0 : DBUG_VOID_RETURN;
2952 : }
2953 :
2954 : /*
2955 : As soon as we keep lock cache can be used, and we have lock because want
2956 : unlock.
2957 : */
2958 14438709 : DBUG_ASSERT(pagecache->can_be_used);
2959 :
2960 14438709 : inc_counter_for_resize_op(pagecache);
2961 14438709 : if (was_changed)
2962 : {
2963 4498860 : if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE)
2964 : {
2965 : /*
2966 : LOCK_READ_UNLOCK is ok here as the page may have first locked
2967 : with WRITE lock that was temporarly converted to READ lock before
2968 : it's unpinned
2969 : */
2970 1752791 : DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2971 : lock == PAGECACHE_LOCK_READ_UNLOCK);
2972 1752791 : DBUG_ASSERT(pin == PAGECACHE_UNPIN);
2973 1752791 : pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
2974 : }
2975 4498860 : if (lsn != LSN_IMPOSSIBLE)
2976 2632325 : check_and_set_lsn(pagecache, lsn, block);
2977 : /*
2978 : Reset error flag. Mark also that page is active; This may not have
2979 : been the case if there was an error reading the page
2980 : */
2981 4498860 : block->status= (block->status & ~PCBLOCK_ERROR) | PCBLOCK_READ;
2982 : }
2983 :
2984 : /* if we lock for write we must link the block to changed blocks */
2985 14438709 : DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
2986 : (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2987 : lock == PAGECACHE_LOCK_WRITE_TO_READ ||
2988 : lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
2989 : /*
2990 : If was_changed then status should be PCBLOCK_DIRECT_W or marked
2991 : as dirty
2992 : */
2993 14438709 : DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
2994 : (block->status & PCBLOCK_CHANGED));
2995 14438709 : if ((block->status & PCBLOCK_DIRECT_W) &&
2996 : (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2997 : lock == PAGECACHE_LOCK_WRITE_TO_READ))
2998 : {
2999 1041095 : if (!(block->status & PCBLOCK_CHANGED) && was_changed)
3000 30561 : link_to_changed_list(pagecache, block);
3001 1041095 : block->status&= ~PCBLOCK_DIRECT_W;
3002 1041095 : DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
3003 : (ulong) block));
3004 : }
3005 :
3006 14438709 : if (make_lock_and_pin(pagecache, block, lock, pin, any))
3007 0 : DBUG_ASSERT(0); /* should not happend */
3008 :
3009 : /*
3010 : Link the block into the LRU chain if it's the last submitted request
3011 : for the block and block will not be pinned.
3012 : See NOTE for pagecache_unlock about registering requests.
3013 : */
3014 14438709 : if (pin != PAGECACHE_PIN_LEFT_PINNED)
3015 13949073 : unreg_request(pagecache, block, 1);
3016 :
3017 14438709 : dec_counter_for_resize_op(pagecache);
3018 :
3019 14438709 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3020 :
3021 14438709 : DBUG_VOID_RETURN;
3022 : }
3023 :
3024 :
3025 : /*
3026 : Unpin page
3027 : (uses direct block/page pointer)
3028 :
3029 : SYNOPSIS
3030 : pagecache_unpin_by_link()
3031 : pagecache pointer to a page cache data structure
3032 : link direct link to page (returned by read or write)
3033 : lsn if it is not LSN_IMPOSSIBLE (0) and it
3034 : is bigger then LSN on the page it will be written on
3035 : the page
3036 : */
3037 :
3038 : void pagecache_unpin_by_link(PAGECACHE *pagecache,
3039 : PAGECACHE_BLOCK_LINK *block,
3040 : LSN lsn)
3041 2 : {
3042 2 : DBUG_ENTER("pagecache_unpin_by_link");
3043 2 : DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu",
3044 : (ulong) block,
3045 : (uint) block->hash_link->file.file,
3046 : (ulong) block->hash_link->pageno));
3047 :
3048 2 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3049 : /*
3050 : As soon as we keep lock cache can be used, and we have lock because want
3051 : unlock.
3052 : */
3053 2 : DBUG_ASSERT(pagecache->can_be_used);
3054 : /* we can't unpin such page without unlock */
3055 2 : DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
3056 :
3057 2 : inc_counter_for_resize_op(pagecache);
3058 :
3059 2 : if (lsn != LSN_IMPOSSIBLE)
3060 0 : check_and_set_lsn(pagecache, lsn, block);
3061 :
3062 : /*
3063 : We can just unpin only with keeping read lock because:
3064 : a) we can't pin without any lock
3065 : b) we can't unpin keeping write lock
3066 : */
3067 2 : if (make_lock_and_pin(pagecache, block,
3068 : PAGECACHE_LOCK_LEFT_READLOCKED,
3069 : PAGECACHE_UNPIN, FALSE))
3070 0 : DBUG_ASSERT(0); /* should not happend */
3071 :
3072 : /*
3073 : Link the block into the LRU chain if it's the last submitted request
3074 : for the block and block will not be pinned.
3075 : See NOTE for pagecache_unlock about registering requests.
3076 : */
3077 2 : unreg_request(pagecache, block, 1);
3078 :
3079 2 : dec_counter_for_resize_op(pagecache);
3080 :
3081 2 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3082 :
3083 2 : DBUG_VOID_RETURN;
3084 : }
3085 :
3086 : /* description of how to change lock before and after read/write */
3087 : struct rw_lock_change
3088 : {
3089 : my_bool need_lock_change; /* need changing of lock at the end */
3090 : enum pagecache_page_lock new_lock; /* lock at the beginning */
3091 : enum pagecache_page_lock unlock_lock; /* lock at the end */
3092 : };
3093 :
3094 : /* description of how to change pin before and after read/write */
3095 : struct rw_pin_change
3096 : {
3097 : enum pagecache_page_pin new_pin; /* pin status at the beginning */
3098 : enum pagecache_page_pin unlock_pin; /* pin status at the end */
3099 : };
3100 :
3101 : /**
3102 : Depending on the lock which the user wants in pagecache_read(), we
3103 : need to acquire a first type of lock at start of pagecache_read(), and
3104 : downgrade it to a second type of lock at end. For example, if user
3105 : asked for no lock (PAGECACHE_LOCK_LEFT_UNLOCKED) this translates into
3106 : taking first a read lock PAGECACHE_LOCK_READ (to rightfully block on
3107 : existing write locks) then read then unlock the lock i.e. change lock
3108 : to PAGECACHE_LOCK_READ_UNLOCK (the "1" below tells that a change is
3109 : needed).
3110 : */
3111 :
3112 : static struct rw_lock_change lock_to_read[8]=
3113 : {
3114 : { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
3115 : 1,
3116 : PAGECACHE_LOCK_READ, PAGECACHE_LOCK_READ_UNLOCK
3117 : },
3118 : { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
3119 : 0,
3120 : PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_LEFT_READLOCKED
3121 : },
3122 : { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
3123 : 0,
3124 : PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_LEFT_WRITELOCKED
3125 : },
3126 : { /*PAGECACHE_LOCK_READ*/
3127 : 1,
3128 : PAGECACHE_LOCK_READ, PAGECACHE_LOCK_LEFT_READLOCKED
3129 : },
3130 : { /*PAGECACHE_LOCK_WRITE*/
3131 : 1,
3132 : PAGECACHE_LOCK_WRITE, PAGECACHE_LOCK_LEFT_WRITELOCKED
3133 : },
3134 : { /*PAGECACHE_LOCK_READ_UNLOCK*/
3135 : 1,
3136 : PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_READ_UNLOCK
3137 : },
3138 : { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
3139 : 1,
3140 : PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_UNLOCK
3141 : },
3142 : { /*PAGECACHE_LOCK_WRITE_TO_READ*/
3143 : 1,
3144 : PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_TO_READ
3145 : }
3146 : };
3147 :
3148 : /**
3149 : Two sets of pin modes (every as for lock upper but for pinning). The
3150 : difference between sets if whether we are going to provide caller with
3151 : reference on the block or not
3152 : */
3153 :
3154 : static struct rw_pin_change lock_to_pin[2][8]=
3155 : {
3156 : {
3157 : { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
3158 : PAGECACHE_PIN_LEFT_UNPINNED,
3159 : PAGECACHE_PIN_LEFT_UNPINNED
3160 : },
3161 : { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
3162 : PAGECACHE_PIN_LEFT_UNPINNED,
3163 : PAGECACHE_PIN_LEFT_UNPINNED,
3164 : },
3165 : { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
3166 : PAGECACHE_PIN_LEFT_PINNED,
3167 : PAGECACHE_PIN_LEFT_PINNED
3168 : },
3169 : { /*PAGECACHE_LOCK_READ*/
3170 : PAGECACHE_PIN_LEFT_UNPINNED,
3171 : PAGECACHE_PIN_LEFT_UNPINNED
3172 : },
3173 : { /*PAGECACHE_LOCK_WRITE*/
3174 : PAGECACHE_PIN,
3175 : PAGECACHE_PIN_LEFT_PINNED
3176 : },
3177 : { /*PAGECACHE_LOCK_READ_UNLOCK*/
3178 : PAGECACHE_PIN_LEFT_UNPINNED,
3179 : PAGECACHE_PIN_LEFT_UNPINNED
3180 : },
3181 : { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
3182 : PAGECACHE_PIN_LEFT_PINNED,
3183 : PAGECACHE_UNPIN
3184 : },
3185 : { /*PAGECACHE_LOCK_WRITE_TO_READ*/
3186 : PAGECACHE_PIN_LEFT_PINNED,
3187 : PAGECACHE_UNPIN
3188 : }
3189 : },
3190 : {
3191 : { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
3192 : PAGECACHE_PIN_LEFT_UNPINNED,
3193 : PAGECACHE_PIN_LEFT_UNPINNED
3194 : },
3195 : { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
3196 : PAGECACHE_PIN_LEFT_UNPINNED,
3197 : PAGECACHE_PIN_LEFT_UNPINNED,
3198 : },
3199 : { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
3200 : PAGECACHE_PIN_LEFT_PINNED,
3201 : PAGECACHE_PIN_LEFT_PINNED
3202 : },
3203 : { /*PAGECACHE_LOCK_READ*/
3204 : PAGECACHE_PIN,
3205 : PAGECACHE_PIN_LEFT_PINNED
3206 : },
3207 : { /*PAGECACHE_LOCK_WRITE*/
3208 : PAGECACHE_PIN,
3209 : PAGECACHE_PIN_LEFT_PINNED
3210 : },
3211 : { /*PAGECACHE_LOCK_READ_UNLOCK*/
3212 : PAGECACHE_PIN_LEFT_UNPINNED,
3213 : PAGECACHE_PIN_LEFT_UNPINNED
3214 : },
3215 : { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
3216 : PAGECACHE_PIN_LEFT_PINNED,
3217 : PAGECACHE_UNPIN
3218 : },
3219 : { /*PAGECACHE_LOCK_WRITE_TO_READ*/
3220 : PAGECACHE_PIN_LEFT_PINNED,
3221 : PAGECACHE_PIN_LEFT_PINNED,
3222 : }
3223 : }
3224 : };
3225 :
3226 :
3227 : /*
3228 : @brief Read a block of data from a cached file into a buffer;
3229 :
3230 : @param pagecache pointer to a page cache data structure
3231 : @param file handler for the file for the block of data to be read
3232 : @param pageno number of the block of data in the file
3233 : @param level determines the weight of the data
3234 : @param buff buffer to where the data must be placed
3235 : @param type type of the page
3236 : @param lock lock change
3237 : @param link link to the page if we pin it
3238 :
3239 : @return address from where the data is placed if successful, 0 - otherwise.
3240 :
3241 : @note Pin will be chosen according to lock parameter (see lock_to_pin)
3242 :
3243 : @note 'buff', if not NULL, must be long-aligned.
3244 :
3245 : @note If buff==0 then we provide reference on the page so should keep the
3246 : page pinned.
3247 : */
3248 :
3249 : uchar *pagecache_read(PAGECACHE *pagecache,
3250 : PAGECACHE_FILE *file,
3251 : pgcache_page_no_t pageno,
3252 : uint level,
3253 : uchar *buff,
3254 : enum pagecache_page_type type,
3255 : enum pagecache_page_lock lock,
3256 : PAGECACHE_BLOCK_LINK **page_link)
3257 16688265 : {
3258 16688265 : my_bool error= 0;
3259 : enum pagecache_page_pin
3260 16688265 : new_pin= lock_to_pin[buff==0][lock].new_pin,
3261 16688265 : unlock_pin= lock_to_pin[buff==0][lock].unlock_pin;
3262 : PAGECACHE_BLOCK_LINK *fake_link;
3263 : my_bool reg_request;
3264 : #ifndef DBUG_OFF
3265 : char llbuf[22];
3266 16688265 : DBUG_ENTER("pagecache_read");
3267 16688265 : DBUG_PRINT("enter", ("fd: %u page: %s buffer: 0x%lx level: %u "
3268 : "t:%s (%d)%s->%s %s->%s",
3269 : (uint) file->file, ullstr(pageno, llbuf),
3270 : (ulong) buff, level,
3271 : page_cache_page_type_str[type],
3272 : lock_to_read[lock].need_lock_change,
3273 : page_cache_page_lock_str[lock_to_read[lock].new_lock],
3274 : page_cache_page_lock_str[lock_to_read[lock].unlock_lock],
3275 : page_cache_page_pin_str[new_pin],
3276 : page_cache_page_pin_str[unlock_pin]));
3277 16688265 : DBUG_ASSERT(buff != 0 || (buff == 0 && (unlock_pin == PAGECACHE_PIN ||
3278 : unlock_pin == PAGECACHE_PIN_LEFT_PINNED)));
3279 16688265 : DBUG_ASSERT(pageno < ((ULL(1)) << 40));
3280 : #endif
3281 :
3282 16688265 : if (!page_link)
3283 1907051 : page_link= &fake_link;
3284 16688265 : *page_link= 0; /* Catch errors */
3285 :
3286 16688265 : restart:
3287 :
3288 16688265 : if (pagecache->can_be_used)
3289 : {
3290 : /* Key cache is used */
3291 : PAGECACHE_BLOCK_LINK *block;
3292 : uint status;
3293 : int page_st;
3294 :
3295 16688265 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3296 16688265 : if (!pagecache->can_be_used)
3297 : {
3298 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3299 0 : goto no_key_cache;
3300 : }
3301 :
3302 16688265 : inc_counter_for_resize_op(pagecache);
3303 16688265 : pagecache->global_cache_r_requests++;
3304 : /* See NOTE for pagecache_unlock about registering requests. */
3305 16688265 : reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
3306 : (new_pin == PAGECACHE_PIN));
3307 16688265 : block= find_block(pagecache, file, pageno, level,
3308 : lock == PAGECACHE_LOCK_WRITE,
3309 : reg_request, &page_st);
3310 16688265 : DBUG_PRINT("info", ("Block type: %s current type %s",
3311 : page_cache_page_type_str[block->type],
3312 : page_cache_page_type_str[type]));
3313 16688265 : if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ))
3314 : {
3315 : /* The requested page is to be read into the block buffer */
3316 1446199 : read_block(pagecache, block,
3317 : (my_bool)(page_st == PAGE_TO_BE_READ));
3318 1446199 : DBUG_PRINT("info", ("read is done"));
3319 : }
3320 : /*
3321 : Assert after block is read. Imagine two concurrent SELECTs on same
3322 : table (thread1 and 2), which want to pagecache_read() the same
3323 : pageno/fileno. Thread1 calls find_block(), decides to evict a dirty
3324 : page from LRU; while it's writing this dirty page to disk, it is
3325 : pre-empted and thread2 runs its find_block(), gets the block (in
3326 : PAGE_TO_BE_READ state). This block is still containing the in-eviction
3327 : dirty page so has an its type, which cannot be tested.
3328 : So thread2 has to wait for read_block() to finish (when it wakes up in
3329 : read_block(), it's woken up by read_block() of thread1, which implies
3330 : that block's type was set to EMPTY by thread1 as part of find_block()).
3331 : */
3332 16688265 : DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
3333 : block->type == type ||
3334 : type == PAGECACHE_LSN_PAGE ||
3335 : type == PAGECACHE_READ_UNKNOWN_PAGE ||
3336 : block->type == PAGECACHE_READ_UNKNOWN_PAGE);
3337 16688265 : if (type != PAGECACHE_READ_UNKNOWN_PAGE ||
3338 : block->type == PAGECACHE_EMPTY_PAGE)
3339 16688265 : block->type= type;
3340 :
3341 16688265 : if (make_lock_and_pin(pagecache, block, lock_to_read[lock].new_lock,
3342 : new_pin, FALSE))
3343 : {
3344 : /*
3345 : We failed to write lock the block, cache is unlocked,
3346 : we will try to get the block again.
3347 : */
3348 0 : if (reg_request)
3349 0 : unreg_request(pagecache, block, 1);
3350 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3351 0 : DBUG_PRINT("info", ("restarting..."));
3352 0 : goto restart;
3353 : }
3354 :
3355 16688265 : status= block->status;
3356 16688265 : if (!buff)
3357 : {
3358 9614298 : buff= block->buffer;
3359 : /* possibly we will write here (resolved on unlock) */
3360 9614298 : if ((lock == PAGECACHE_LOCK_WRITE ||
3361 : lock == PAGECACHE_LOCK_LEFT_WRITELOCKED) &&
3362 : !(block->status & PCBLOCK_CHANGED))
3363 : {
3364 1043190 : block->status|= PCBLOCK_DIRECT_W;
3365 1043190 : DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: 0x%lx",
3366 : (ulong) block));
3367 : }
3368 : }
3369 : else
3370 : {
3371 7073967 : if (!(status & PCBLOCK_ERROR))
3372 : {
3373 : #if !defined(SERIALIZED_READ_FROM_CACHE)
3374 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3375 : #endif
3376 :
3377 7073967 : DBUG_ASSERT((pagecache->block_size & 511) == 0);
3378 : /* Copy data from the cache buffer */
3379 7073967 : bmove512(buff, block->buffer, pagecache->block_size);
3380 :
3381 : #if !defined(SERIALIZED_READ_FROM_CACHE)
3382 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3383 : #endif
3384 : }
3385 : else
3386 0 : my_errno= block->error;
3387 : }
3388 :
3389 16688265 : remove_reader(block);
3390 16688265 : if (lock_to_read[lock].need_lock_change)
3391 : {
3392 16688265 : if (make_lock_and_pin(pagecache, block,
3393 : lock_to_read[lock].unlock_lock,
3394 : unlock_pin, FALSE))
3395 0 : DBUG_ASSERT(0);
3396 : }
3397 : /*
3398 : Link the block into the LRU chain if it's the last submitted request
3399 : for the block and block will not be pinned.
3400 : See NOTE for pagecache_unlock about registering requests.
3401 : */
3402 18777229 : if (unlock_pin == PAGECACHE_PIN_LEFT_UNPINNED ||
3403 : unlock_pin == PAGECACHE_UNPIN)
3404 2088964 : unreg_request(pagecache, block, 1);
3405 : else
3406 14599301 : *page_link= block;
3407 :
3408 16688265 : dec_counter_for_resize_op(pagecache);
3409 :
3410 16688265 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3411 :
3412 16688265 : if (status & PCBLOCK_ERROR)
3413 : {
3414 1623 : DBUG_ASSERT(my_errno != 0);
3415 1623 : DBUG_PRINT("error", ("Got error %d when doing page read", my_errno));
3416 1623 : DBUG_RETURN((uchar *) 0);
3417 : }
3418 :
3419 16686642 : DBUG_RETURN(buff);
3420 : }
3421 :
3422 0 : no_key_cache: /* Key cache is not used */
3423 :
3424 : /* We can't use mutex here as the key cache may not be initialized */
3425 0 : pagecache->global_cache_r_requests++;
3426 0 : pagecache->global_cache_read++;
3427 0 : if (pagecache_fread(pagecache, file, buff, pageno,
3428 : pagecache->readwrite_flags))
3429 0 : error= 1;
3430 0 : DBUG_RETURN(error ? (uchar*) 0 : buff);
3431 : }
3432 :
3433 :
3434 : /*
3435 : @brief Delete page from the buffer (common part for link and file/page)
3436 :
3437 : @param pagecache pointer to a page cache data structure
3438 : @param block direct link to page (returned by read or write)
3439 : @param page_link hash link of the block
3440 : @param flush flush page if it is dirty
3441 :
3442 : @retval 0 deleted or was not present at all
3443 : @retval 1 error
3444 :
3445 : */
3446 :
3447 : static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
3448 : PAGECACHE_BLOCK_LINK *block,
3449 : PAGECACHE_HASH_LINK *page_link,
3450 : my_bool flush)
3451 9913 : {
3452 9913 : my_bool error= 0;
3453 9913 : if (block->status & PCBLOCK_CHANGED)
3454 : {
3455 2009 : if (flush)
3456 : {
3457 : /* The block contains a dirty page - push it out of the cache */
3458 :
3459 1602 : KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
3460 :
3461 1602 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3462 : /*
3463 : The call is thread safe because only the current
3464 : thread might change the block->hash_link value
3465 : */
3466 1602 : DBUG_ASSERT(block->pins == 1);
3467 1602 : error= pagecache_fwrite(pagecache,
3468 : &block->hash_link->file,
3469 : block->buffer,
3470 : block->hash_link->pageno,
3471 : block->type,
3472 : pagecache->readwrite_flags);
3473 1602 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3474 1602 : pagecache->global_cache_write++;
3475 :
3476 1602 : if (error)
3477 : {
3478 0 : block->status|= PCBLOCK_ERROR;
3479 0 : block->error= (int16) my_errno;
3480 0 : my_debug_put_break_here();
3481 0 : goto err;
3482 : }
3483 : }
3484 2009 : pagecache->blocks_changed--;
3485 2009 : pagecache->global_blocks_changed--;
3486 : /*
3487 : free_block() will change the status and rec_lsn of the block so no
3488 : need to change them here.
3489 : */
3490 : }
3491 : /* Cache is locked, so we can relese page before freeing it */
3492 9913 : if (make_lock_and_pin(pagecache, block,
3493 : PAGECACHE_LOCK_WRITE_UNLOCK,
3494 : PAGECACHE_UNPIN, FALSE))
3495 0 : DBUG_ASSERT(0);
3496 9913 : DBUG_ASSERT(block->hash_link->requests > 0);
3497 9913 : page_link->requests--;
3498 : /* See NOTE for pagecache_unlock about registering requests. */
3499 9913 : free_block(pagecache, block);
3500 :
3501 9913 : err:
3502 9913 : dec_counter_for_resize_op(pagecache);
3503 9913 : return error;
3504 : }
3505 :
3506 :
3507 : /*
3508 : @brief Delete page from the buffer by link
3509 :
3510 : @param pagecache pointer to a page cache data structure
3511 : @param link direct link to page (returned by read or write)
3512 : @param lock lock change
3513 : @param flush flush page if it is dirty
3514 :
3515 : @retval 0 deleted or was not present at all
3516 : @retval 1 error
3517 :
3518 : @note lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was
3519 : write locked before) or PAGECACHE_LOCK_WRITE (delete will write
3520 : lock page before delete)
3521 : */
3522 :
3523 : my_bool pagecache_delete_by_link(PAGECACHE *pagecache,
3524 : PAGECACHE_BLOCK_LINK *block,
3525 : enum pagecache_page_lock lock,
3526 : my_bool flush)
3527 2 : {
3528 2 : my_bool error= 0;
3529 2 : enum pagecache_page_pin pin= PAGECACHE_PIN_LEFT_PINNED;
3530 2 : DBUG_ENTER("pagecache_delete_by_link");
3531 2 : DBUG_PRINT("enter", ("fd: %d block 0x%lx %s %s",
3532 : block->hash_link->file.file,
3533 : (ulong) block,
3534 : page_cache_page_lock_str[lock],
3535 : page_cache_page_pin_str[pin]));
3536 2 : DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
3537 : lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
3538 2 : DBUG_ASSERT(block->pins != 0); /* should be pinned */
3539 :
3540 2 : if (pagecache->can_be_used)
3541 : {
3542 2 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3543 2 : if (!pagecache->can_be_used)
3544 2 : goto end;
3545 :
3546 : /*
3547 : This block should be pinned (i.e. has not zero request counter) =>
3548 : Such block can't be chosen for eviction.
3549 : */
3550 2 : DBUG_ASSERT((block->status &
3551 : (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)) == 0);
3552 : /*
3553 : make_lock_and_pin() can't fail here, because we are keeping pin on the
3554 : block and it can't be evicted (which is cause of lock fail and retry)
3555 : */
3556 2 : if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
3557 0 : DBUG_ASSERT(0);
3558 :
3559 : /*
3560 : get_present_hash_link() side effect emulation before call
3561 : pagecache_delete_internal()
3562 : */
3563 2 : block->hash_link->requests++;
3564 :
3565 2 : error= pagecache_delete_internal(pagecache, block, block->hash_link,
3566 : flush);
3567 2 : end:
3568 2 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3569 : }
3570 :
3571 2 : DBUG_RETURN(error);
3572 : }
3573 :
3574 :
3575 : /**
3576 : @brief Returns "hits" for promotion
3577 :
3578 : @return "hits" for promotion
3579 : */
3580 :
3581 : uint pagecache_pagelevel(PAGECACHE_BLOCK_LINK *block)
3582 0 : {
3583 0 : return block->hits_left;
3584 : }
3585 :
3586 : /*
3587 : @brief Adds "hits" to the page
3588 :
3589 : @param link direct link to page (returned by read or write)
3590 : @param level number of "hits" which we add to the page
3591 : */
3592 :
3593 : void pagecache_add_level_by_link(PAGECACHE_BLOCK_LINK *block,
3594 : uint level)
3595 0 : {
3596 0 : DBUG_ASSERT(block->pins != 0); /* should be pinned */
3597 : /*
3598 : Operation is just for statistics so it is not really important
3599 : if it interfere with other hit increasing => we are doing it without
3600 : locking the pagecache.
3601 : */
3602 0 : block->hits_left+= level;
3603 : }
3604 :
3605 : /*
3606 : @brief Delete page from the buffer
3607 :
3608 : @param pagecache pointer to a page cache data structure
3609 : @param file handler for the file for the block of data to be read
3610 : @param pageno number of the block of data in the file
3611 : @param lock lock change
3612 : @param flush flush page if it is dirty
3613 :
3614 : @retval 0 deleted or was not present at all
3615 : @retval 1 error
3616 :
3617 : @note lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was
3618 : write locked before) or PAGECACHE_LOCK_WRITE (delete will write
3619 : lock page before delete)
3620 : */
3621 : static enum pagecache_page_pin lock_to_pin_one_phase[8]=
3622 : {
3623 : PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
3624 : PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
3625 : PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
3626 : PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
3627 : PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/,
3628 : PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
3629 : PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
3630 : PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/
3631 : };
3632 :
3633 : my_bool pagecache_delete(PAGECACHE *pagecache,
3634 : PAGECACHE_FILE *file,
3635 : pgcache_page_no_t pageno,
3636 : enum pagecache_page_lock lock,
3637 : my_bool flush)
3638 10719 : {
3639 10719 : my_bool error= 0;
3640 10719 : enum pagecache_page_pin pin= lock_to_pin_one_phase[lock];
3641 10719 : DBUG_ENTER("pagecache_delete");
3642 10719 : DBUG_PRINT("enter", ("fd: %u page: %lu %s %s",
3643 : (uint) file->file, (ulong) pageno,
3644 : page_cache_page_lock_str[lock],
3645 : page_cache_page_pin_str[pin]));
3646 10719 : DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
3647 : lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
3648 10719 : DBUG_ASSERT(pin == PAGECACHE_PIN ||
3649 : pin == PAGECACHE_PIN_LEFT_PINNED);
3650 10719 : restart:
3651 :
3652 10719 : DBUG_ASSERT(pageno < ((ULL(1)) << 40));
3653 10719 : if (pagecache->can_be_used)
3654 : {
3655 : /* Key cache is used */
3656 : reg1 PAGECACHE_BLOCK_LINK *block;
3657 : PAGECACHE_HASH_LINK **unused_start, *page_link;
3658 :
3659 10719 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3660 10719 : if (!pagecache->can_be_used)
3661 10719 : goto end;
3662 :
3663 10719 : inc_counter_for_resize_op(pagecache);
3664 10719 : page_link= get_present_hash_link(pagecache, file, pageno, &unused_start);
3665 10719 : if (!page_link)
3666 : {
3667 808 : DBUG_PRINT("info", ("There is no such page in the cache"));
3668 808 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3669 808 : DBUG_RETURN(0);
3670 : }
3671 9911 : block= page_link->block;
3672 9911 : if (block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH))
3673 : {
3674 0 : DBUG_PRINT("info", ("Block 0x%0lx already is %s",
3675 : (ulong) block,
3676 : ((block->status & PCBLOCK_REASSIGNED) ?
3677 : "reassigned" : "in switch")));
3678 0 : PCBLOCK_INFO(block);
3679 0 : page_link->requests--;
3680 0 : goto end;
3681 : }
3682 : /* See NOTE for pagecache_unlock about registering requests. */
3683 9911 : if (pin == PAGECACHE_PIN)
3684 9911 : reg_requests(pagecache, block, 1);
3685 9911 : DBUG_ASSERT(block != 0);
3686 9911 : if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
3687 : {
3688 : /*
3689 : We failed to writelock the block, cache is unlocked, and last write
3690 : lock is released, we will try to get the block again.
3691 : */
3692 0 : if (pin == PAGECACHE_PIN)
3693 0 : unreg_request(pagecache, block, 1);
3694 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3695 0 : DBUG_PRINT("info", ("restarting..."));
3696 0 : goto restart;
3697 : }
3698 :
3699 : /* we can't delete with opened direct link for write */
3700 9911 : DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
3701 :
3702 9911 : error= pagecache_delete_internal(pagecache, block, page_link, flush);
3703 9911 : end:
3704 9911 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3705 : }
3706 :
3707 9911 : DBUG_RETURN(error);
3708 : }
3709 :
3710 :
3711 : my_bool pagecache_delete_pages(PAGECACHE *pagecache,
3712 : PAGECACHE_FILE *file,
3713 : pgcache_page_no_t pageno,
3714 : uint page_count,
3715 : enum pagecache_page_lock lock,
3716 : my_bool flush)
3717 3661 : {
3718 : pgcache_page_no_t page_end;
3719 3661 : DBUG_ENTER("pagecache_delete_pages");
3720 3661 : DBUG_ASSERT(page_count > 0);
3721 :
3722 3661 : page_end= pageno + page_count;
3723 : do
3724 : {
3725 10717 : if (pagecache_delete(pagecache, file, pageno,
3726 : lock, flush))
3727 0 : DBUG_RETURN(1);
3728 10717 : } while (++pageno != page_end);
3729 3661 : DBUG_RETURN(0);
3730 : }
3731 :
3732 :
3733 : /**
3734 : @brief Writes a buffer into a cached file.
3735 :
3736 : @param pagecache pointer to a page cache data structure
3737 : @param file handler for the file to write data to
3738 : @param pageno number of the block of data in the file
3739 : @param level determines the weight of the data
3740 : @param buff buffer with the data
3741 : @param type type of the page
3742 : @param lock lock change
3743 : @param pin pin page
3744 : @param write_mode how to write page
3745 : @param link link to the page if we pin it
3746 : @param first_REDO_LSN_for_page the lsn to set rec_lsn
3747 : @param offset offset in the page
3748 : @param size size of data
3749 : @param validator read page validator
3750 : @param validator_data the validator data
3751 :
3752 : @retval 0 if a success.
3753 : @retval 1 Error.
3754 : */
3755 :
3756 : static struct rw_lock_change write_lock_change_table[]=
3757 : {
3758 : {1,
3759 : PAGECACHE_LOCK_WRITE,
3760 : PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
3761 : {0, /*unsupported (we can't write having the block read locked) */
3762 : PAGECACHE_LOCK_LEFT_UNLOCKED,
3763 : PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
3764 : {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
3765 : {1,
3766 : PAGECACHE_LOCK_WRITE,
3767 : PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/,
3768 : {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/,
3769 : {0, /*unsupported (we can't write having the block read locked) */
3770 : PAGECACHE_LOCK_LEFT_UNLOCKED,
3771 : PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/,
3772 : {1,
3773 : PAGECACHE_LOCK_LEFT_WRITELOCKED,
3774 : PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
3775 : {1,
3776 : PAGECACHE_LOCK_LEFT_WRITELOCKED,
3777 : PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_WRITE_TO_READ*/
3778 : };
3779 :
3780 :
3781 : static struct rw_pin_change write_pin_change_table[]=
3782 : {
3783 : {PAGECACHE_PIN_LEFT_PINNED,
3784 : PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/,
3785 : {PAGECACHE_PIN,
3786 : PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/,
3787 : {PAGECACHE_PIN,
3788 : PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/,
3789 : {PAGECACHE_PIN_LEFT_PINNED,
3790 : PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/
3791 : };
3792 :
3793 :
3794 : /**
3795 : @note 'buff', if not NULL, must be long-aligned.
3796 : */
3797 :
3798 : my_bool pagecache_write_part(PAGECACHE *pagecache,
3799 : PAGECACHE_FILE *file,
3800 : pgcache_page_no_t pageno,
3801 : uint level,
3802 : uchar *buff,
3803 : enum pagecache_page_type type,
3804 : enum pagecache_page_lock lock,
3805 : enum pagecache_page_pin pin,
3806 : enum pagecache_write_mode write_mode,
3807 : PAGECACHE_BLOCK_LINK **page_link,
3808 : LSN first_REDO_LSN_for_page,
3809 : uint offset, uint size)
3810 4018155 : {
3811 4018155 : PAGECACHE_BLOCK_LINK *block= NULL;
3812 : PAGECACHE_BLOCK_LINK *fake_link;
3813 4018155 : my_bool error= 0;
3814 4018155 : int need_lock_change= write_lock_change_table[lock].need_lock_change;
3815 : my_bool reg_request;
3816 : #ifndef DBUG_OFF
3817 : char llbuf[22];
3818 4018155 : DBUG_ENTER("pagecache_write_part");
3819 4018155 : DBUG_PRINT("enter", ("fd: %u page: %s level: %u type: %s lock: %s "
3820 : "pin: %s mode: %s offset: %u size %u",
3821 : (uint) file->file, ullstr(pageno, llbuf), level,
3822 : page_cache_page_type_str[type],
3823 : page_cache_page_lock_str[lock],
3824 : page_cache_page_pin_str[pin],
3825 : page_cache_page_write_mode_str[write_mode],
3826 : offset, size));
3827 4018155 : DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
3828 4018155 : DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED);
3829 4018155 : DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK);
3830 4018155 : DBUG_ASSERT(offset + size <= pagecache->block_size);
3831 4018155 : DBUG_ASSERT(pageno < ((ULL(1)) << 40));
3832 : #endif
3833 :
3834 4018155 : if (!page_link)
3835 1422210 : page_link= &fake_link;
3836 4018155 : *page_link= 0;
3837 :
3838 4018155 : restart:
3839 :
3840 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3841 4018155 : DBUG_EXECUTE("check_pagecache",
3842 : test_key_cache(pagecache, "start of key_cache_write", 1););
3843 : #endif
3844 :
3845 4018155 : if (pagecache->can_be_used)
3846 : {
3847 : /* Key cache is used */
3848 : int page_st;
3849 4018155 : my_bool need_page_ready_signal= FALSE;
3850 :
3851 4018155 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3852 4018155 : if (!pagecache->can_be_used)
3853 : {
3854 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3855 0 : goto no_key_cache;
3856 : }
3857 :
3858 4018155 : inc_counter_for_resize_op(pagecache);
3859 4018155 : pagecache->global_cache_w_requests++;
3860 : /* See NOTE for pagecache_unlock about registering requests. */
3861 4018155 : reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
3862 : (pin == PAGECACHE_PIN));
3863 4018155 : block= find_block(pagecache, file, pageno, level,
3864 : TRUE,
3865 : reg_request, &page_st);
3866 4018155 : if (!block)
3867 : {
3868 0 : DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE);
3869 : /* It happens only for requests submitted during resize operation */
3870 0 : dec_counter_for_resize_op(pagecache);
3871 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3872 : /* Write to the disk key cache is in resize at the moment*/
3873 0 : goto no_key_cache;
3874 : }
3875 4018155 : DBUG_PRINT("info", ("page status: %d", page_st));
3876 4018155 : if (!(block->status & PCBLOCK_ERROR) &&
3877 : ((page_st == PAGE_TO_BE_READ &&
3878 : (offset || size < pagecache->block_size)) ||
3879 : (page_st == PAGE_WAIT_TO_BE_READ)))
3880 : {
3881 : /* The requested page is to be read into the block buffer */
3882 0 : read_block(pagecache, block,
3883 : (my_bool)(page_st == PAGE_TO_BE_READ));
3884 0 : DBUG_PRINT("info", ("read is done"));
3885 : }
3886 4018155 : else if (page_st == PAGE_TO_BE_READ)
3887 : {
3888 749247 : need_page_ready_signal= TRUE;
3889 : }
3890 :
3891 4018155 : DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
3892 : block->type == PAGECACHE_READ_UNKNOWN_PAGE ||
3893 : block->type == type ||
3894 : /* this is for when going to non-trans to trans */
3895 : (block->type == PAGECACHE_PLAIN_PAGE &&
3896 : type == PAGECACHE_LSN_PAGE));
3897 4018155 : block->type= type;
3898 : /* we write to the page so it has no sense to keep the flag */
3899 4018155 : block->status&= ~PCBLOCK_DIRECT_W;
3900 4018155 : DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
3901 : (ulong) block));
3902 :
3903 4018155 : if (make_lock_and_pin(pagecache, block,
3904 : write_lock_change_table[lock].new_lock,
3905 : (need_lock_change ?
3906 : write_pin_change_table[pin].new_pin :
3907 : pin), FALSE))
3908 : {
3909 : /*
3910 : We failed to writelock the block, cache is unlocked, and last write
3911 : lock is released, we will try to get the block again.
3912 : */
3913 0 : if (reg_request)
3914 0 : unreg_request(pagecache, block, 1);
3915 0 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3916 0 : DBUG_PRINT("info", ("restarting..."));
3917 0 : goto restart;
3918 : }
3919 :
3920 4018155 : if (write_mode == PAGECACHE_WRITE_DONE)
3921 : {
3922 210947 : if (block->status & PCBLOCK_ERROR)
3923 : {
3924 0 : my_debug_put_break_here();
3925 0 : DBUG_PRINT("warning", ("Writing on page with error"));
3926 : }
3927 : else
3928 : {
3929 : /* Copy data from buff */
3930 210947 : if (!(size & 511))
3931 210947 : bmove512(block->buffer + offset, buff, size);
3932 : else
3933 0 : memcpy(block->buffer + offset, buff, size);
3934 210947 : block->status= PCBLOCK_READ;
3935 : /*
3936 : The read_callback can change the page content (removing page
3937 : protection) so it have to be called
3938 : */
3939 210947 : DBUG_PRINT("info", ("read_callback: 0x%lx data: 0x%lx",
3940 : (ulong) block->hash_link->file.read_callback,
3941 : (ulong) block->hash_link->file.callback_data));
3942 210947 : if ((*block->hash_link->file.read_callback)(block->buffer,
3943 : block->hash_link->pageno,
3944 : block->hash_link->
3945 : file.callback_data))
3946 : {
3947 0 : DBUG_PRINT("error", ("read callback problem"));
3948 0 : block->status|= PCBLOCK_ERROR;
3949 0 : block->error= (int16) my_errno;
3950 0 : my_debug_put_break_here();
3951 : }
3952 210947 : KEYCACHE_DBUG_PRINT("key_cache_insert",
3953 : ("Page injection"));
3954 : #ifdef THREAD
3955 : /* Signal that all pending requests for this now can be processed. */
3956 210947 : if (block->wqueue[COND_FOR_REQUESTED].last_thread)
3957 0 : wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
3958 : #endif
3959 : }
3960 : }
3961 : else
3962 : {
3963 3807208 : if (! (block->status & PCBLOCK_CHANGED))
3964 822617 : link_to_changed_list(pagecache, block);
3965 :
3966 3807208 : if (!(size & 511))
3967 3803137 : bmove512(block->buffer + offset, buff, size);
3968 : else
3969 4071 : memcpy(block->buffer + offset, buff, size);
3970 3807208 : block->status|= PCBLOCK_READ;
3971 : /* Page is correct again if we made a full write in it */
3972 3807208 : if (size == pagecache->block_size)
3973 3803137 : block->status&= ~PCBLOCK_ERROR;
3974 : }
3975 :
3976 : #ifdef THREAD
3977 4018155 : if (need_page_ready_signal &&
3978 : block->wqueue[COND_FOR_REQUESTED].last_thread)
3979 0 : wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
3980 : #endif
3981 :
3982 4018155 : if (first_REDO_LSN_for_page)
3983 : {
3984 : /* single write action of the last write action */
3985 9057 : DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
3986 : lock == PAGECACHE_LOCK_LEFT_UNLOCKED);
3987 9057 : DBUG_ASSERT(pin == PAGECACHE_UNPIN ||
3988 : pin == PAGECACHE_PIN_LEFT_UNPINNED);
3989 9057 : pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
3990 : }
3991 :
3992 4018155 : if (need_lock_change)
3993 : {
3994 : /*
3995 : We don't set rec_lsn of the block; this is ok as for the
3996 : Maria-block-record's pages, we always keep pages pinned here.
3997 : */
3998 1427695 : if (make_lock_and_pin(pagecache, block,
3999 : write_lock_change_table[lock].unlock_lock,
4000 : write_pin_change_table[pin].unlock_pin, FALSE))
4001 0 : DBUG_ASSERT(0);
4002 : }
4003 :
4004 : /* Unregister the request */
4005 4018155 : DBUG_ASSERT(block->hash_link->requests > 0);
4006 4018155 : block->hash_link->requests--;
4007 : /* See NOTE for pagecache_unlock about registering requests. */
4008 5440359 : if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
4009 1422204 : unreg_request(pagecache, block, 1);
4010 : else
4011 2595951 : *page_link= block;
4012 :
4013 4018155 : if (block->status & PCBLOCK_ERROR)
4014 : {
4015 0 : error= 1;
4016 0 : my_debug_put_break_here();
4017 : }
4018 :
4019 4018155 : dec_counter_for_resize_op(pagecache);
4020 :
4021 4018155 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4022 :
4023 4018155 : goto end;
4024 : }
4025 :
4026 0 : no_key_cache:
4027 : /*
4028 : We can't by pass the normal page cache operations because need
4029 : whole page for calling callbacks & so on.
4030 : This branch should not be used for now (but it is fixed as it
4031 : should be just to avoid confusing)
4032 : */
4033 0 : DBUG_ASSERT(0);
4034 : /* Key cache is not used */
4035 : if (write_mode == PAGECACHE_WRITE_DELAY)
4036 : {
4037 : /* We can't use mutex here as the key cache may not be initialized */
4038 : pagecache->global_cache_w_requests++;
4039 : pagecache->global_cache_write++;
4040 : if (offset != 0 || size != pagecache->block_size)
4041 : {
4042 : uchar *page_buffer= (uchar *) alloca(pagecache->block_size);
4043 :
4044 : pagecache->global_cache_read++;
4045 : if ((error= (pagecache_fread(pagecache, file,
4046 : page_buffer,
4047 : pageno,
4048 : pagecache->readwrite_flags) != 0)))
4049 : goto end;
4050 : if ((file->read_callback)(page_buffer, pageno, file->callback_data))
4051 : {
4052 : DBUG_PRINT("error", ("read callback problem"));
4053 : error= 1;
4054 : goto end;
4055 : }
4056 : memcpy((char *)page_buffer + offset, buff, size);
4057 : buff= page_buffer;
4058 : }
4059 : if (pagecache_fwrite(pagecache, file, buff, pageno, type,
4060 : pagecache->readwrite_flags))
4061 : error= 1;
4062 : }
4063 :
4064 4018155 : end:
4065 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
4066 4018155 : DBUG_EXECUTE("exec",
4067 : test_key_cache(pagecache, "end of key_cache_write", 1););
4068 : #endif
4069 4018155 : if (block)
4070 4018155 : PCBLOCK_INFO(block);
4071 : else
4072 0 : DBUG_PRINT("info", ("No block"));
4073 4018155 : DBUG_RETURN(error);
4074 : }
4075 :
4076 :
4077 : /*
4078 : Free block: remove reference to it from hash table,
4079 : remove it from the chain file of dirty/clean blocks
4080 : and add it to the free list.
4081 : */
4082 :
4083 : static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
4084 112057 : {
4085 112057 : KEYCACHE_THREAD_TRACE("free block");
4086 112057 : KEYCACHE_DBUG_PRINT("free_block",
4087 : ("block: %u hash_link 0x%lx",
4088 : PCBLOCK_NUMBER(pagecache, block),
4089 : (long) block->hash_link));
4090 112057 : if (block->hash_link)
4091 : {
4092 : /*
4093 : While waiting for readers to finish, new readers might request the
4094 : block. But since we set block->status|= PCBLOCK_REASSIGNED, they
4095 : will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
4096 : later.
4097 : */
4098 112057 : block->status|= PCBLOCK_REASSIGNED;
4099 112057 : wait_for_readers(pagecache, block);
4100 112057 : unlink_hash(pagecache, block->hash_link);
4101 : }
4102 :
4103 112057 : unlink_changed(block);
4104 112057 : DBUG_ASSERT(block->wlocks == 0);
4105 112057 : DBUG_ASSERT(block->rlocks == 0);
4106 112057 : DBUG_ASSERT(block->rlocks_queue == 0);
4107 112057 : DBUG_ASSERT(block->pins == 0);
4108 112057 : block->status= 0;
4109 : #ifndef DBUG_OFF
4110 112057 : block->type= PAGECACHE_EMPTY_PAGE;
4111 : #endif
4112 112057 : block->rec_lsn= LSN_MAX;
4113 112057 : KEYCACHE_THREAD_TRACE("free block");
4114 112057 : KEYCACHE_DBUG_PRINT("free_block",
4115 : ("block is freed"));
4116 112057 : unreg_request(pagecache, block, 0);
4117 112057 : block->hash_link= NULL;
4118 :
4119 : /* Remove the free block from the LRU ring. */
4120 112057 : unlink_block(pagecache, block);
4121 112057 : if (block->temperature == PCBLOCK_WARM)
4122 35071 : pagecache->warm_blocks--;
4123 112057 : block->temperature= PCBLOCK_COLD;
4124 : /* Insert the free block in the free list. */
4125 112057 : block->next_used= pagecache->free_block_list;
4126 112057 : pagecache->free_block_list= block;
4127 : /* Keep track of the number of currently unused blocks. */
4128 112057 : pagecache->blocks_unused++;
4129 :
4130 : #ifdef THREAD
4131 : /* All pending requests for this page must be resubmitted. */
4132 112057 : if (block->wqueue[COND_FOR_SAVED].last_thread)
4133 0 : wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
4134 : #endif
4135 : }
4136 :
4137 :
4138 : static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b)
4139 158536 : {
4140 158536 : return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 :
4141 : ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0);
4142 : }
4143 :
4144 :
4145 : /**
4146 : @brief Flush a portion of changed blocks to disk, free used blocks
4147 : if requested
4148 :
4149 : @param pagecache This page cache reference.
4150 : @param file File which should be flushed
4151 : @param cache Beginning of array of the block.
4152 : @param end Reference to the block after last in the array.
4153 : @param flush_type Type of the flush.
4154 : @param first_errno Where to store first errno of the flush.
4155 :
4156 :
4157 : @return Operation status
4158 : @retval PCFLUSH_OK OK
4159 : @retval PCFLUSH_ERROR There was errors during the flush process.
4160 : @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
4161 : @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
4162 : */
4163 :
4164 : static int flush_cached_blocks(PAGECACHE *pagecache,
4165 : PAGECACHE_FILE *file,
4166 : PAGECACHE_BLOCK_LINK **cache,
4167 : PAGECACHE_BLOCK_LINK **end,
4168 : enum flush_type type,
4169 : int *first_errno)
4170 2807 : {
4171 2807 : int rc= PCFLUSH_OK;
4172 : my_bool error;
4173 2807 : uint count= (uint) (end-cache);
4174 2807 : DBUG_ENTER("flush_cached_blocks");
4175 2807 : *first_errno= 0;
4176 :
4177 : /* Don't lock the cache during the flush */
4178 2807 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4179 : /*
4180 : As all blocks referred in 'cache' are marked by PCBLOCK_IN_FLUSH
4181 : we are guaranteed that no thread will change them
4182 : */
4183 2807 : qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
4184 :
4185 2807 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4186 43984 : for (; cache != end; cache++)
4187 : {
4188 41177 : PAGECACHE_BLOCK_LINK *block= *cache;
4189 :
4190 41177 : if (block->pins)
4191 : {
4192 1948 : KEYCACHE_DBUG_PRINT("flush_cached_blocks",
4193 : ("block: %u (0x%lx) pinned",
4194 : PCBLOCK_NUMBER(pagecache, block), (ulong)block));
4195 1948 : DBUG_PRINT("info", ("block: %u (0x%lx) pinned",
4196 : PCBLOCK_NUMBER(pagecache, block), (ulong)block));
4197 1948 : PCBLOCK_INFO(block);
4198 : /* undo the mark put by flush_pagecache_blocks_int(): */
4199 1948 : block->status&= ~PCBLOCK_IN_FLUSH;
4200 1948 : rc|= PCFLUSH_PINNED;
4201 1948 : DBUG_PRINT("warning", ("Page pinned"));
4202 1948 : unreg_request(pagecache, block, 1);
4203 1948 : if (!*first_errno)
4204 583 : *first_errno= HA_ERR_INTERNAL_ERROR;
4205 : continue;
4206 : }
4207 : /* if the block is not pinned then it is not write locked */
4208 39229 : DBUG_ASSERT(block->wlocks == 0);
4209 39229 : DBUG_ASSERT(block->pins == 0);
4210 39229 : if (make_lock_and_pin(pagecache, block,
4211 : PAGECACHE_LOCK_WRITE, PAGECACHE_PIN, FALSE))
4212 0 : DBUG_ASSERT(0);
4213 39229 : DBUG_ASSERT(block->pins == 1);
4214 :
4215 39229 : KEYCACHE_DBUG_PRINT("flush_cached_blocks",
4216 : ("block: %u (0x%lx) to be flushed",
4217 : PCBLOCK_NUMBER(pagecache, block), (ulong)block));
4218 39229 : DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed",
4219 : PCBLOCK_NUMBER(pagecache, block), (ulong)block));
4220 39229 : PCBLOCK_INFO(block);
4221 39229 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4222 39229 : DBUG_PRINT("info", ("block: %u (0x%lx) pins: %u",
4223 : PCBLOCK_NUMBER(pagecache, block), (ulong)block,
4224 : block->pins));
4225 39229 : DBUG_ASSERT(block->pins == 1);
4226 : /**
4227 : @todo IO If page is contiguous with next page to flush, group flushes
4228 : in one single my_pwrite().
4229 : */
4230 : /**
4231 : It is important to use block->hash_link->file below and not 'file', as
4232 : the first one is right and the second may have different out-of-date
4233 : content (see StaleFilePointersInFlush in ma_checkpoint.c).
4234 : @todo change argument of functions to be File.
4235 : */
4236 39229 : error= pagecache_fwrite(pagecache, &block->hash_link->file,
4237 : block->buffer,
4238 : block->hash_link->pageno,
4239 : block->type,
4240 : pagecache->readwrite_flags);
4241 39229 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4242 :
4243 39229 : if (make_lock_and_pin(pagecache, block,
4244 : PAGECACHE_LOCK_WRITE_UNLOCK,
4245 : PAGECACHE_UNPIN, FALSE))
4246 0 : DBUG_ASSERT(0);
4247 :
4248 39229 : pagecache->global_cache_write++;
4249 39229 : if (error)
4250 : {
4251 0 : block->status|= PCBLOCK_ERROR;
4252 0 : block->error= (int16) my_errno;
4253 0 : my_debug_put_break_here();
4254 0 : if (!*first_errno)
4255 0 : *first_errno= my_errno ? my_errno : -1;
4256 0 : rc|= PCFLUSH_ERROR;
4257 : }
4258 : #ifdef THREAD
4259 : /*
4260 : Let to proceed for possible waiting requests to write to the block page.
4261 : It might happen only during an operation to resize the key cache.
4262 : */
4263 39229 : if (block->wqueue[COND_FOR_SAVED].last_thread)
4264 0 : wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
4265 : #endif
4266 : /* type will never be FLUSH_IGNORE_CHANGED here */
4267 52548 : if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
4268 : type == FLUSH_FORCE_WRITE))
4269 : {
4270 13319 : pagecache->blocks_changed--;
4271 13319 : pagecache->global_blocks_changed--;
4272 13319 : free_block(pagecache, block);
4273 : }
4274 : else
4275 : {
4276 25910 : block->status&= ~PCBLOCK_IN_FLUSH;
4277 25910 : link_to_file_list(pagecache, block, file, 1);
4278 25910 : unreg_request(pagecache, block, 1);
4279 : }
4280 : }
4281 2807 : DBUG_RETURN(rc);
4282 : }
4283 :
4284 :
4285 : /**
4286 : @brief flush all blocks for a file to disk but don't do any mutex locks
4287 :
4288 : @param pagecache pointer to a pagecache data structure
4289 : @param file handler for the file to flush to
4290 : @param flush_type type of the flush
4291 : @param filter optional function which tells what blocks to flush;
4292 : can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
4293 : or FLUSH_FORCE_WRITE.
4294 : @param filter_arg an argument to pass to 'filter'. Information about
4295 : the block will be passed too.
4296 :
4297 : @note
4298 : Flushes all blocks having the same OS file descriptor as 'file->file', so
4299 : can flush blocks having '*block->hash_link->file' != '*file'.
4300 :
4301 : @note
4302 : This function doesn't do any mutex locks because it needs to be called
4303 : both from flush_pagecache_blocks and flush_all_key_blocks (the later one
4304 : does the mutex lock in the resize_pagecache() function).
4305 :
4306 : @note
4307 : This function can cause problems if two threads call it
4308 : concurrently on the same file (look for "PageCacheFlushConcurrencyBugs"
4309 : in ma_checkpoint.c); to avoid them, it has internal logic to serialize in
4310 : this situation.
4311 :
4312 : @return Operation status
4313 : @retval PCFLUSH_OK OK
4314 : @retval PCFLUSH_ERROR There was errors during the flush process.
4315 : @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
4316 : @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
4317 : */
4318 :
4319 : static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
4320 : PAGECACHE_FILE *file,
4321 : enum flush_type type,
4322 : PAGECACHE_FLUSH_FILTER filter,
4323 : void *filter_arg)
4324 6932 : {
4325 : PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
4326 6932 : int last_errno= 0;
4327 6932 : int rc= PCFLUSH_OK;
4328 6932 : DBUG_ENTER("flush_pagecache_blocks_int");
4329 6932 : DBUG_PRINT("enter",
4330 : ("fd: %d blocks_used: %lu blocks_changed: %lu type: %d",
4331 : file->file, pagecache->blocks_used, pagecache->blocks_changed,
4332 : type));
4333 :
4334 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
4335 6932 : DBUG_EXECUTE("check_pagecache",
4336 : test_key_cache(pagecache,
4337 : "start of flush_pagecache_blocks", 0););
4338 : #endif
4339 :
4340 6932 : cache= cache_buff;
4341 6932 : if (pagecache->disk_blocks > 0 &&
4342 : (!my_disable_flush_pagecache_blocks ||
4343 : (type != FLUSH_KEEP && type != FLUSH_KEEP_LAZY)))
4344 : {
4345 : /*
4346 : Key cache exists. If my_disable_flush_pagecache_blocks is true it
4347 : disables the operation but only FLUSH_KEEP[_LAZY]: other flushes still
4348 : need to be allowed: FLUSH_RELEASE has to free blocks, and
4349 : FLUSH_FORCE_WRITE is to overrule my_disable_flush_pagecache_blocks.
4350 : */
4351 6932 : int error= 0;
4352 6932 : uint count= 0;
4353 : PAGECACHE_BLOCK_LINK **pos, **end;
4354 6932 : PAGECACHE_BLOCK_LINK *first_in_switch= NULL;
4355 : PAGECACHE_BLOCK_LINK *block, *next;
4356 : #if defined(PAGECACHE_DEBUG)
4357 : uint cnt= 0;
4358 : #endif
4359 :
4360 : #ifdef THREAD
4361 : struct st_file_in_flush us_flusher, *other_flusher;
4362 6932 : us_flusher.file= file->file;
4363 6932 : us_flusher.flush_queue.last_thread= NULL;
4364 6932 : us_flusher.first_in_switch= FALSE;
4365 14153 : while ((other_flusher= (struct st_file_in_flush *)
4366 : hash_search(&pagecache->files_in_flush, (uchar *)&file->file,
4367 : sizeof(file->file))))
4368 : {
4369 : /*
4370 : File is in flush already: wait, unless FLUSH_KEEP_LAZY. "Flusher"
4371 : means "who can mark PCBLOCK_IN_FLUSH", i.e. caller of
4372 : flush_pagecache_blocks_int().
4373 : */
4374 : struct st_my_thread_var *thread;
4375 289 : if (type == FLUSH_KEEP_LAZY)
4376 : {
4377 0 : DBUG_PRINT("info",("FLUSH_KEEP_LAZY skips"));
4378 0 : DBUG_RETURN(0);
4379 : }
4380 289 : thread= my_thread_var;
4381 289 : wqueue_add_to_queue(&other_flusher->flush_queue, thread);
4382 : do
4383 : {
4384 289 : KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait1",
4385 : ("suspend thread %ld", thread->id));
4386 289 : pagecache_pthread_cond_wait(&thread->suspend,
4387 : &pagecache->cache_lock);
4388 : }
4389 289 : while (thread->next);
4390 : }
4391 : /* we are the only flusher of this file now */
4392 6932 : while (my_hash_insert(&pagecache->files_in_flush, (uchar *)&us_flusher))
4393 : {
4394 : /*
4395 : Out of memory, wait for flushers to empty the hash and retry; should
4396 : rarely happen. Other threads are flushing the file; when done, they
4397 : are going to remove themselves from the hash, and thus memory will
4398 : appear again. However, this memory may be stolen by yet another thread
4399 : (for a purpose unrelated to page cache), before we retry
4400 : hash_insert(). So the loop may run for long. Only if the thread was
4401 : killed do we abort the loop, returning 1 (error) which can cause the
4402 : table to be marked as corrupted (cf maria_chk_size(), maria_close())
4403 : and thus require a table check.
4404 : */
4405 0 : DBUG_ASSERT(0);
4406 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4407 : if (my_thread_var->abort)
4408 : DBUG_RETURN(1); /* End if aborted by user */
4409 : sleep(10);
4410 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4411 : }
4412 : #endif
4413 :
4414 6932 : if (type != FLUSH_IGNORE_CHANGED)
4415 : {
4416 : /*
4417 : Count how many key blocks we have to cache to be able
4418 : to flush all dirty pages with minimum seek moves.
4419 : */
4420 6716 : for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
4421 55057 : block;
4422 41625 : block= block->next_changed)
4423 : {
4424 41625 : if (block->hash_link->file.file == file->file)
4425 : {
4426 41625 : count++;
4427 41625 : KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used);
4428 : }
4429 : }
4430 : /* Allocate a new buffer only if its bigger than the one we have */
4431 6716 : if (count > FLUSH_CACHE &&
4432 : !(cache=
4433 : (PAGECACHE_BLOCK_LINK**)
4434 : my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0))))
4435 : {
4436 0 : cache= cache_buff;
4437 0 : count= FLUSH_CACHE;
4438 : }
4439 : }
4440 :
4441 : /* Retrieve the blocks and write them to a buffer to be flushed */
4442 6932 : restart:
4443 6932 : end= (pos= cache)+count;
4444 6932 : for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
4445 58163 : block;
4446 44299 : block= next)
4447 : {
4448 : #if defined(PAGECACHE_DEBUG)
4449 : cnt++;
4450 : KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
4451 : #endif
4452 44299 : next= block->next_changed;
4453 44299 : if (block->hash_link->file.file != file->file)
4454 44299 : continue;
4455 44299 : if (filter != NULL)
4456 : {
4457 : int filter_res= (*filter)(block->type, block->hash_link->pageno,
4458 512 : block->rec_lsn, filter_arg);
4459 512 : DBUG_PRINT("info",("filter returned %d", filter_res));
4460 512 : if (filter_res == FLUSH_FILTER_SKIP_TRY_NEXT)
4461 64 : continue;
4462 64 : if (filter_res == FLUSH_FILTER_SKIP_ALL)
4463 64 : break;
4464 64 : DBUG_ASSERT(filter_res == FLUSH_FILTER_OK);
4465 : }
4466 : {
4467 : /*
4468 : Mark the block with BLOCK_IN_FLUSH in order not to let
4469 : other threads to use it for new pages and interfere with
4470 : our sequence of flushing dirty file pages
4471 : */
4472 43851 : block->status|= PCBLOCK_IN_FLUSH;
4473 :
4474 43851 : if (! (block->status & PCBLOCK_IN_SWITCH))
4475 : {
4476 : /*
4477 : We care only for the blocks for which flushing was not
4478 : initiated by other threads as a result of page swapping
4479 : */
4480 43851 : reg_requests(pagecache, block, 1);
4481 43851 : if (type != FLUSH_IGNORE_CHANGED)
4482 : {
4483 : /* It's not a temporary file */
4484 41177 : if (pos == end)
4485 : {
4486 : /*
4487 : This happens only if there is not enough
4488 : memory for the big block
4489 : */
4490 0 : if ((rc|= flush_cached_blocks(pagecache, file, cache,
4491 : end, type, &error)) &
4492 : (PCFLUSH_ERROR | PCFLUSH_PINNED))
4493 0 : last_errno=error;
4494 0 : DBUG_PRINT("info", ("restarting..."));
4495 : /*
4496 : Restart the scan as some other thread might have changed
4497 : the changed blocks chain: the blocks that were in switch
4498 : state before the flush started have to be excluded
4499 : */
4500 0 : goto restart;
4501 : }
4502 41177 : *pos++= block;
4503 : }
4504 : else
4505 : {
4506 : /* It's a temporary file */
4507 2674 : pagecache->blocks_changed--;
4508 2674 : pagecache->global_blocks_changed--;
4509 2674 : free_block(pagecache, block);
4510 : }
4511 : }
4512 0 : else if (type != FLUSH_KEEP_LAZY)
4513 : {
4514 : /*
4515 : Link the block into a list of blocks 'in switch', and then we will
4516 : wait for this list to be empty, which means they have been flushed
4517 : */
4518 0 : unlink_changed(block);
4519 0 : link_changed(block, &first_in_switch);
4520 0 : us_flusher.first_in_switch= TRUE;
4521 : }
4522 : }
4523 : }
4524 6932 : if (pos != cache)
4525 : {
4526 2807 : if ((rc|= flush_cached_blocks(pagecache, file, cache, pos, type,
4527 : &error)) &
4528 : (PCFLUSH_ERROR | PCFLUSH_PINNED))
4529 583 : last_errno= error;
4530 : }
4531 : /* Wait until list of blocks in switch is empty */
4532 6932 : while (first_in_switch)
4533 : {
4534 : #if defined(PAGECACHE_DEBUG)
4535 : cnt= 0;
4536 : #endif
4537 0 : block= first_in_switch;
4538 : {
4539 : #ifdef THREAD
4540 0 : struct st_my_thread_var *thread= my_thread_var;
4541 0 : wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
4542 : do
4543 : {
4544 0 : KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait2",
4545 : ("suspend thread %ld", thread->id));
4546 0 : pagecache_pthread_cond_wait(&thread->suspend,
4547 : &pagecache->cache_lock);
4548 : }
4549 0 : while (thread->next);
4550 : #else
4551 : KEYCACHE_DBUG_ASSERT(0);
4552 : /* No parallel requests in single-threaded case */
4553 : #endif
4554 : }
4555 : #if defined(PAGECACHE_DEBUG)
4556 : cnt++;
4557 : KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
4558 : #endif
4559 : }
4560 6932 : us_flusher.first_in_switch= FALSE;
4561 : /* The following happens very seldom */
4562 6932 : if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
4563 : type == FLUSH_FORCE_WRITE))
4564 : {
4565 : /*
4566 : this code would free all blocks while filter maybe handled only a
4567 : few, that is not possible.
4568 : */
4569 4085 : DBUG_ASSERT(filter == NULL);
4570 : #if defined(PAGECACHE_DEBUG)
4571 : cnt=0;
4572 : #endif
4573 4085 : for (block= pagecache->file_blocks[FILE_HASH(*file)] ;
4574 94321 : block;
4575 86151 : block= next)
4576 : {
4577 : #if defined(PAGECACHE_DEBUG)
4578 : cnt++;
4579 : KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
4580 : #endif
4581 86151 : next= block->next_changed;
4582 86151 : if (block->hash_link->file.file == file->file &&
4583 : (! (block->status & PCBLOCK_CHANGED)
4584 : || type == FLUSH_IGNORE_CHANGED))
4585 : {
4586 86151 : reg_requests(pagecache, block, 1);
4587 86151 : free_block(pagecache, block);
4588 : }
4589 : }
4590 : }
4591 : #ifdef THREAD
4592 : /* wake up others waiting to flush this file */
4593 6932 : hash_delete(&pagecache->files_in_flush, (uchar *)&us_flusher);
4594 6932 : if (us_flusher.flush_queue.last_thread)
4595 176 : wqueue_release_queue(&us_flusher.flush_queue);
4596 : #endif
4597 : }
4598 :
4599 : #ifndef DBUG_OFF
4600 6932 : DBUG_EXECUTE("check_pagecache",
4601 : test_key_cache(pagecache, "end of flush_pagecache_blocks", 0););
4602 : #endif
4603 6932 : if (cache != cache_buff)
4604 0 : my_free(cache, MYF(0));
4605 6932 : if (rc != 0)
4606 : {
4607 583 : if (last_errno)
4608 583 : my_errno= last_errno; /* Return first error */
4609 583 : DBUG_PRINT("error", ("Got error: %d", my_errno));
4610 : }
4611 6932 : DBUG_RETURN(rc);
4612 : }
4613 :
4614 :
4615 : /**
4616 : @brief flush all blocks for a file to disk
4617 :
4618 : @param pagecache pointer to a pagecache data structure
4619 : @param file handler for the file to flush to
4620 : @param flush_type type of the flush
4621 : @param filter optional function which tells what blocks to flush;
4622 : can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
4623 : or FLUSH_FORCE_WRITE.
4624 : @param filter_arg an argument to pass to 'filter'. Information about
4625 : the block will be passed too.
4626 :
4627 : @return Operation status
4628 : @retval PCFLUSH_OK OK
4629 : @retval PCFLUSH_ERROR There was errors during the flush process.
4630 : @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
4631 : @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
4632 : */
4633 :
4634 : int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache,
4635 : PAGECACHE_FILE *file,
4636 : enum flush_type type,
4637 : PAGECACHE_FLUSH_FILTER filter,
4638 : void *filter_arg)
4639 10671 : {
4640 : int res;
4641 10671 : DBUG_ENTER("flush_pagecache_blocks_with_filter");
4642 10671 : DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache));
4643 :
4644 10671 : if (pagecache->disk_blocks <= 0)
4645 3739 : DBUG_RETURN(0);
4646 6932 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4647 6932 : inc_counter_for_resize_op(pagecache);
4648 6932 : res= flush_pagecache_blocks_int(pagecache, file, type, filter, filter_arg);
4649 6932 : dec_counter_for_resize_op(pagecache);
4650 6932 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4651 6932 : DBUG_RETURN(res);
4652 : }
4653 :
4654 :
4655 : /*
4656 : Reset the counters of a key cache.
4657 :
4658 : SYNOPSIS
4659 : reset_pagecache_counters()
4660 : name the name of a key cache
4661 : pagecache pointer to the pagecache to be reset
4662 :
4663 : DESCRIPTION
4664 : This procedure is used to reset the counters of all currently used key
4665 : caches, both the default one and the named ones.
4666 :
4667 : RETURN
4668 : 0 on success (always because it can't fail)
4669 : */
4670 :
4671 : int reset_pagecache_counters(const char *name __attribute__((unused)),
4672 : PAGECACHE *pagecache)
4673 0 : {
4674 0 : DBUG_ENTER("reset_pagecache_counters");
4675 0 : if (!pagecache->inited)
4676 : {
4677 0 : DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4678 0 : DBUG_RETURN(0);
4679 : }
4680 0 : DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4681 :
4682 0 : pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
4683 0 : pagecache->global_cache_r_requests= 0; /* Key_read_requests */
4684 0 : pagecache->global_cache_read= 0; /* Key_reads */
4685 0 : pagecache->global_cache_w_requests= 0; /* Key_write_requests */
4686 0 : pagecache->global_cache_write= 0; /* Key_writes */
4687 0 : DBUG_RETURN(0);
4688 : }
4689 :
4690 :
4691 : /**
4692 : @brief Allocates a buffer and stores in it some info about all dirty pages
4693 :
4694 : Does the allocation because the caller cannot know the size itself.
4695 : Memory freeing is to be done by the caller (if the "str" member of the
4696 : LEX_STRING is not NULL).
4697 : Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they
4698 : are not interesting for a checkpoint record.
4699 : The caller has the intention of doing checkpoints.
4700 :
4701 : @param pagecache pointer to the page cache
4702 : @param[out] str pointer to where the allocated buffer, and
4703 : its size, will be put
4704 : @param[out] min_rec_lsn pointer to where the minimum rec_lsn of all
4705 : relevant dirty pages will be put
4706 : @return Operation status
4707 : @retval 0 OK
4708 : @retval 1 Error
4709 : */
4710 :
4711 : my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
4712 : LEX_STRING *str,
4713 : LSN *min_rec_lsn)
4714 105 : {
4715 105 : my_bool error= 0;
4716 105 : ulong stored_list_size= 0;
4717 : uint file_hash;
4718 : char *ptr;
4719 105 : LSN minimum_rec_lsn= LSN_MAX;
4720 105 : DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN");
4721 :
4722 105 : DBUG_ASSERT(NULL == str->str);
4723 : /*
4724 : We lock the entire cache but will be quick, just reading/writing a few MBs
4725 : of memory at most.
4726 : */
4727 105 : pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4728 : #ifdef THREAD
4729 : for (;;)
4730 : {
4731 : struct st_file_in_flush *other_flusher;
4732 105 : for (file_hash= 0;
4733 210 : (other_flusher= (struct st_file_in_flush *)
4734 : hash_element(&pagecache->files_in_flush, file_hash)) != NULL &&
4735 : !other_flusher->first_in_switch;
4736 0 : file_hash++)
4737 : {}
4738 105 : if (other_flusher == NULL)
4739 0 : break;
4740 : /*
4741 : other_flusher.first_in_switch is true: some thread is flushing a file
4742 : and has removed dirty blocks from changed_blocks[] while they were still
4743 : dirty (they were being evicted (=>flushed) by yet another thread, which
4744 : may not have flushed the block yet so it may still be dirty).
4745 : If Checkpoint proceeds now, it will not see the page. If there is a
4746 : crash right after writing the checkpoint record, before the page is
4747 : flushed, at recovery the page will be wrongly ignored because it won't
4748 : be in the dirty pages list in the checkpoint record. So wait.
4749 : */
4750 : {
4751 0 : struct st_my_thread_var *thread= my_thread_var;
4752 0 : wqueue_add_to_queue(&other_flusher->flush_queue, thread);
4753 : do
4754 : {
4755 0 : KEYCACHE_DBUG_PRINT("pagecache_collect_changed_blocks_with_lsn: wait",
4756 : ("suspend thread %ld", thread->id));
4757 0 : pagecache_pthread_cond_wait(&thread->suspend,
4758 : &pagecache->cache_lock);
4759 : }
4760 0 : while (thread->next);
4761 : }
4762 : }
4763 : #endif
4764 :
4765 : /* Count how many dirty pages are interesting */
4766 13545 : for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
4767 : {
4768 : PAGECACHE_BLOCK_LINK *block;
4769 13440 : for (block= pagecache->changed_blocks[file_hash] ;
4770 43103 : block;
4771 16223 : block= block->next_changed)
4772 : {
4773 : /*
4774 : Q: is there something subtle with block->hash_link: can it be NULL?
4775 : does it have to be == hash_link->block... ?
4776 : */
4777 16223 : DBUG_ASSERT(block->hash_link != NULL);
4778 16223 : DBUG_ASSERT(block->status & PCBLOCK_CHANGED);
4779 : /*
4780 : Note that we don't store bitmap pages, or pages from non-transactional
4781 : (like temporary) tables. Don't checkpoint during Recovery which uses
4782 : PAGECACHE_PLAIN_PAGE.
4783 : */
4784 16223 : if (block->type != PAGECACHE_LSN_PAGE)
4785 256 : continue; /* no need to store it */
4786 256 : stored_list_size++;
4787 : }
4788 : }
4789 :
4790 : compile_time_assert(sizeof(pagecache->blocks) <= 8);
4791 105 : str->length= 8 + /* number of dirty pages */
4792 : (2 + /* table id */
4793 : 1 + /* data or index file */
4794 : 5 + /* pageno */
4795 : LSN_STORE_SIZE /* rec_lsn */
4796 : ) * stored_list_size;
4797 105 : if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME))))
4798 105 : goto err;
4799 105 : ptr= str->str;
4800 105 : int8store(ptr, (ulonglong)stored_list_size);
4801 105 : ptr+= 8;
4802 105 : DBUG_PRINT("info", ("found %lu dirty pages", stored_list_size));
4803 105 : if (stored_list_size == 0)
4804 64 : goto end;
4805 8256 : for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
4806 : {
4807 : PAGECACHE_BLOCK_LINK *block;
4808 8192 : for (block= pagecache->changed_blocks[file_hash] ;
4809 16640 : block;
4810 256 : block= block->next_changed)
4811 : {
4812 : uint16 table_id;
4813 : MARIA_SHARE *share;
4814 256 : if (block->type != PAGECACHE_LSN_PAGE)
4815 256 : continue; /* no need to store it in the checkpoint record */
4816 256 : share= (MARIA_SHARE *)(block->hash_link->file.callback_data);
4817 256 : table_id= share->id;
4818 256 : int2store(ptr, table_id);
4819 256 : ptr+= 2;
4820 256 : ptr[0]= (share->kfile.file == block->hash_link->file.file);
4821 256 : ptr++;
4822 256 : DBUG_ASSERT(block->hash_link->pageno < ((ULL(1)) << 40));
4823 256 : page_store(ptr, block->hash_link->pageno);
4824 256 : ptr+= PAGE_STORE_SIZE;
4825 256 : lsn_store(ptr, block->rec_lsn);
4826 256 : ptr+= LSN_STORE_SIZE;
4827 256 : if (block->rec_lsn != LSN_MAX)
4828 : {
4829 256 : DBUG_ASSERT(LSN_VALID(block->rec_lsn));
4830 256 : if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0)
4831 128 : minimum_rec_lsn= block->rec_lsn;
4832 : } /* otherwise, some trn->rec_lsn should hold the correct info */
4833 : }
4834 : }
4835 105 : end:
4836 105 : pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4837 105 : *min_rec_lsn= minimum_rec_lsn;
4838 105 : DBUG_RETURN(error);
4839 :
4840 0 : err:
4841 0 : error= 1;
4842 0 : goto end;
4843 : }
4844 :
4845 :
4846 : #ifndef DBUG_OFF
4847 :
4848 : /**
4849 : Verifies that a file has no dirty pages.
4850 : */
4851 :
4852 : void pagecache_file_no_dirty_page(PAGECACHE *pagecache, PAGECACHE_FILE *file)
4853 384 : {
4854 384 : File fd= file->file;
4855 : PAGECACHE_BLOCK_LINK *block;
4856 384 : for (block= pagecache->changed_blocks[FILE_HASH(*file)];
4857 768 : block != NULL;
4858 0 : block= block->next_changed)
4859 0 : if (block->hash_link->file.file == fd)
4860 : {
4861 0 : DBUG_PRINT("info", ("pagecache_file_not_in error"));
4862 0 : PCBLOCK_INFO(block);
4863 0 : DBUG_ASSERT(0);
4864 : }
4865 : }
4866 :
4867 :
4868 : /*
4869 : Test if disk-cache is ok
4870 : */
4871 : static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)),
4872 : const char *where __attribute__((unused)),
4873 : my_bool lock __attribute__((unused)))
4874 0 : {
4875 : /* TODO */
4876 : }
4877 : #endif
4878 :
4879 : uchar *pagecache_block_link_to_buffer(PAGECACHE_BLOCK_LINK *block)
4880 1623 : {
4881 1623 : return block->buffer;
4882 : }
4883 :
4884 : #if defined(PAGECACHE_TIMEOUT)
4885 :
4886 : #define KEYCACHE_DUMP_FILE "pagecache_dump.txt"
4887 : #define MAX_QUEUE_LEN 100
4888 :
4889 :
4890 : static void pagecache_dump(PAGECACHE *pagecache)
4891 : {
4892 : FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
4893 : struct st_my_thread_var *last;
4894 : struct st_my_thread_var *thread;
4895 : PAGECACHE_BLOCK_LINK *block;
4896 : PAGECACHE_HASH_LINK *hash_link;
4897 : PAGECACHE_PAGE *page;
4898 : uint i;
4899 :
4900 : fprintf(pagecache_dump_file, "thread:%u\n", thread->id);
4901 :
4902 : i=0;
4903 : thread=last=waiting_for_hash_link.last_thread;
4904 : fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n");
4905 : if (thread)
4906 : do
4907 : {
4908 : thread= thread->next;
4909 : page= (PAGECACHE_PAGE *) thread->opt_info;
4910 : fprintf(pagecache_dump_file,
4911 : "thread:%u, (file,pageno)=(%u,%lu)\n",
4912 : thread->id,(uint) page->file.file,(ulong) page->pageno);
4913 : if (++i == MAX_QUEUE_LEN)
4914 : break;
4915 : }
4916 : while (thread != last);
4917 :
4918 : i=0;
4919 : thread=last=waiting_for_block.last_thread;
4920 : fprintf(pagecache_dump_file, "queue of threads waiting for block\n");
4921 : if (thread)
4922 : do
4923 : {
4924 : thread=thread->next;
4925 : hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info;
4926 : fprintf(pagecache_dump_file,
4927 : "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n",
4928 : thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
4929 : (uint) hash_link->file.file,(ulong) hash_link->pageno);
4930 : if (++i == MAX_QUEUE_LEN)
4931 : break;
4932 : }
4933 : while (thread != last);
4934 :
4935 : for (i=0 ; i < pagecache->blocks_used ; i++)
4936 : {
4937 : int j;
4938 : block= &pagecache->block_root[i];
4939 : hash_link= block->hash_link;
4940 : fprintf(pagecache_dump_file,
4941 : "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
4942 : i, (int) (hash_link ?
4943 : PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) :
4944 : -1),
4945 : block->status, block->requests, block->condvar ? 1 : 0);
4946 : for (j=0 ; j < COND_SIZE; j++)
4947 : {
4948 : PAGECACHE_WQUEUE *wqueue=&block->wqueue[j];
4949 : thread= last= wqueue->last_thread;
4950 : fprintf(pagecache_dump_file, "queue #%d\n", j);
4951 : if (thread)
4952 : {
4953 : do
4954 : {
4955 : thread=thread->next;
4956 : fprintf(pagecache_dump_file,
4957 : "thread:%u\n", thread->id);
4958 : if (++i == MAX_QUEUE_LEN)
4959 : break;
4960 : }
4961 : while (thread != last);
4962 : }
4963 : }
4964 : }
4965 : fprintf(pagecache_dump_file, "LRU chain:");
4966 : block= pagecache= used_last;
4967 : if (block)
4968 : {
4969 : do
4970 : {
4971 : block= block->next_used;
4972 : fprintf(pagecache_dump_file,
4973 : "block:%u, ", PCBLOCK_NUMBER(pagecache, block));
4974 : }
4975 : while (block != pagecache->used_last);
4976 : }
4977 : fprintf(pagecache_dump_file, "\n");
4978 :
4979 : fclose(pagecache_dump_file);
4980 : }
4981 :
4982 : #endif /* defined(PAGECACHE_TIMEOUT) */
4983 :
4984 : #if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)
4985 :
4986 :
4987 : static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
4988 : pthread_mutex_t *mutex)
4989 : {
4990 : int rc;
4991 : struct timeval now; /* time when we started waiting */
4992 : struct timespec timeout; /* timeout value for the wait function */
4993 : struct timezone tz;
4994 : #if defined(PAGECACHE_DEBUG)
4995 : int cnt=0;
4996 : #endif
4997 :
4998 : /* Get current time */
4999 : gettimeofday(&now, &tz);
5000 : /* Prepare timeout value */
5001 : timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT;
5002 : /*
5003 : timeval uses microseconds.
5004 : timespec uses nanoseconds.
5005 : 1 nanosecond = 1000 micro seconds
5006 : */
5007 : timeout.tv_nsec= now.tv_usec * 1000;
5008 : KEYCACHE_THREAD_TRACE_END("started waiting");
5009 : #if defined(PAGECACHE_DEBUG)
5010 : cnt++;
5011 : if (cnt % 100 == 0)
5012 : fprintf(pagecache_debug_log, "waiting...\n");
5013 : fflush(pagecache_debug_log);
5014 : #endif
5015 : rc= pthread_cond_timedwait(cond, mutex, &timeout);
5016 : KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
5017 : if (rc == ETIMEDOUT || rc == ETIME)
5018 : {
5019 : #if defined(PAGECACHE_DEBUG)
5020 : fprintf(pagecache_debug_log,"aborted by pagecache timeout\n");
5021 : fclose(pagecache_debug_log);
5022 : abort();
5023 : #endif
5024 : pagecache_dump();
5025 : }
5026 :
5027 : #if defined(PAGECACHE_DEBUG)
5028 : KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
5029 : #else
5030 : assert(rc != ETIMEDOUT);
5031 : #endif
5032 : return rc;
5033 : }
5034 : #else
5035 : #if defined(PAGECACHE_DEBUG)
5036 : static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
5037 : pthread_mutex_t *mutex)
5038 : {
5039 : int rc;
5040 : KEYCACHE_THREAD_TRACE_END("started waiting");
5041 : rc= pthread_cond_wait(cond, mutex);
5042 : KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
5043 : return rc;
5044 : }
5045 : #endif
5046 : #endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
5047 :
5048 : #if defined(PAGECACHE_DEBUG)
5049 : static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex)
5050 : {
5051 : int rc;
5052 : rc= pthread_mutex_lock(mutex);
5053 : KEYCACHE_THREAD_TRACE_BEGIN("");
5054 : return rc;
5055 : }
5056 :
5057 :
5058 : static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex)
5059 : {
5060 : KEYCACHE_THREAD_TRACE_END("");
5061 : pthread_mutex_unlock(mutex);
5062 : }
5063 :
5064 :
5065 : static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond)
5066 : {
5067 : int rc;
5068 : KEYCACHE_THREAD_TRACE("signal");
5069 : rc= pthread_cond_signal(cond);
5070 : return rc;
5071 : }
5072 :
5073 :
5074 : #if defined(PAGECACHE_DEBUG_LOG)
5075 :
5076 :
5077 : static void pagecache_debug_print(const char * fmt, ...)
5078 : {
5079 : va_list args;
5080 : va_start(args,fmt);
5081 : if (pagecache_debug_log)
5082 : {
5083 : VOID(vfprintf(pagecache_debug_log, fmt, args));
5084 : VOID(fputc('\n',pagecache_debug_log));
5085 : }
5086 : va_end(args);
5087 : }
5088 : #endif /* defined(PAGECACHE_DEBUG_LOG) */
5089 :
5090 : #if defined(PAGECACHE_DEBUG_LOG)
5091 :
5092 :
5093 : void pagecache_debug_log_close(void)
5094 : {
5095 : if (pagecache_debug_log)
5096 : fclose(pagecache_debug_log);
5097 : }
5098 : #endif /* defined(PAGECACHE_DEBUG_LOG) */
5099 :
5100 : #endif /* defined(PAGECACHE_DEBUG) */
|