LTP GCOV extension - code coverage report
Current view: directory - storage/maria - ma_pagecache.c
Test: maria-mtr.html
Date: 2009-03-04 Instrumented lines: 1322
Code covered: 49.1 % Executed lines: 649

       1                 : /* Copyright (C) 2000-2008 MySQL AB
       2                 : 
       3                 :    This program is free software; you can redistribute it and/or modify
       4                 :    it under the terms of the GNU General Public License as published by
       5                 :    the Free Software Foundation; version 2 of the License.
       6                 : 
       7                 :    This program is distributed in the hope that it will be useful,
       8                 :    but WITHOUT ANY WARRANTY; without even the implied warranty of
       9                 :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      10                 :    GNU General Public License for more details.
      11                 : 
      12                 :    You should have received a copy of the GNU General Public License
      13                 :    along with this program; if not, write to the Free Software
      14                 :    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
      15                 : 
      16                 : /*
      17                 :   These functions handle page caching for Maria tables.
      18                 : 
      19                 :   One cache can handle many files.
      20                 :   It must contain buffers of the same blocksize.
      21                 :   init_pagecache() should be used to init cache handler.
      22                 : 
      23                 :   The free list (free_block_list) is a stack like structure.
      24                 :   When a block is freed by free_block(), it is pushed onto the stack.
      25                 :   When a new block is required it is first tried to pop one from the stack.
      26                 :   If the stack is empty, it is tried to get a never-used block from the pool.
      27                 :   If this is empty too, then a block is taken from the LRU ring, flushing it
      28                 :   to disk, if necessary. This is handled in find_block().
      29                 :   With the new free list, the blocks can have three temperatures:
      30                 :   hot, warm and cold (which is free). This is remembered in the block header
      31                 :   by the enum PCBLOCK_TEMPERATURE temperature variable. Remembering the
      32                 :   temperature is necessary to correctly count the number of warm blocks,
      33                 :   which is required to decide when blocks are allowed to become hot. Whenever
      34                 :   a block is inserted to another (sub-)chain, we take the old and new
      35                 :   temperature into account to decide if we got one more or less warm block.
      36                 :   blocks_unused is the sum of never used blocks in the pool and of currently
      37                 :   free blocks. blocks_used is the number of blocks fetched from the pool and
      38                 :   as such gives the maximum number of in-use blocks at any time.
      39                 : 
      40                 :   TODO: Write operation locks whole cache till the end of the operation.
      41                 :     Should be fixed.
      42                 : */
      43                 : 
      44                 : #include "maria_def.h"
      45                 : #include <m_string.h>
      46                 : #include "ma_pagecache.h"
      47                 : #include "ma_blockrec.h"
      48                 : #include <my_bit.h>
      49                 : #include <errno.h>
      50                 : 
      51                 : /*
      52                 :   Some compilation flags have been added specifically for this module
      53                 :   to control the following:
      54                 :   - not to let a thread to yield the control when reading directly
      55                 :     from page cache, which might improve performance in many cases;
      56                 :     to enable this add:
      57                 :     #define SERIALIZED_READ_FROM_CACHE
      58                 :   - to set an upper bound for number of threads simultaneously
      59                 :     using the page cache; this setting helps to determine an optimal
      60                 :     size for hash table and improve performance when the number of
      61                 :     blocks in the page cache much less than the number of threads
      62                 :     accessing it;
      63                 :     to set this number equal to <N> add
      64                 :       #define MAX_THREADS <N>
      65                 :   - to substitute calls of pthread_cond_wait for calls of
      66                 :     pthread_cond_timedwait (wait with timeout set up);
      67                 :     this setting should be used only when you want to trap a deadlock
      68                 :     situation, which theoretically should not happen;
      69                 :     to set timeout equal to <T> seconds add
      70                 :       #define PAGECACHE_TIMEOUT <T>
      71                 :   - to enable the module traps and to send debug information from
      72                 :     page cache module to a special debug log add:
      73                 :       #define PAGECACHE_DEBUG
      74                 :     the name of this debug log file <LOG NAME> can be set through:
      75                 :       #define PAGECACHE_DEBUG_LOG  <LOG NAME>
      76                 :     if the name is not defined, it's set by default;
      77                 :     if the PAGECACHE_DEBUG flag is not set up and we are in a debug
      78                 :     mode, i.e. when ! defined(DBUG_OFF), the debug information from the
      79                 :     module is sent to the regular debug log.
      80                 : 
      81                 :   Example of the settings:
      82                 :     #define SERIALIZED_READ_FROM_CACHE
      83                 :     #define MAX_THREADS   100
      84                 :     #define PAGECACHE_TIMEOUT  1
      85                 :     #define PAGECACHE_DEBUG
      86                 :     #define PAGECACHE_DEBUG_LOG  "my_pagecache_debug.log"
      87                 : */
      88                 : 
      89                 : /*
      90                 :   In key cache we have external raw locking here we use
      91                 :   SERIALIZED_READ_FROM_CACHE to avoid problem of reading
      92                 :   not consistent data from the page.
      93                 :   (keycache functions (key_cache_read(), key_cache_insert() and
      94                 :   key_cache_write()) rely on external MyISAM lock, we don't)
      95                 : */
      96                 : #define SERIALIZED_READ_FROM_CACHE yes
      97                 : 
      98                 : #define PCBLOCK_INFO(B) \
      99                 :   DBUG_PRINT("info", \
     100                 :              ("block: 0x%lx  fd: %lu  page: %lu  s: %0x  hshL: " \
     101                 :               " 0x%lx  req: %u/%u wrlocks: %u  rdlocks %u  " \
     102                 :               "rdlocks_q: %u  pins: %u  status: %u  type: %s", \
     103                 :               (ulong)(B), \
     104                 :               (ulong)((B)->hash_link ? \
     105                 :                       (B)->hash_link->file.file : \
     106                 :                       0), \
     107                 :               (ulong)((B)->hash_link ? \
     108                 :                       (B)->hash_link->pageno : \
     109                 :                       0), \
     110                 :               (B)->status, \
     111                 :               (ulong)(B)->hash_link, \
     112                 :               (uint) (B)->requests, \
     113                 :               (uint)((B)->hash_link ? \
     114                 :                      (B)->hash_link->requests : \
     115                 :                        0), \
     116                 :               block->wlocks, block->rlocks, block->rlocks_queue, \
     117                 :               (uint)(B)->pins, (uint)(B)->status, \
     118                 :               page_cache_page_type_str[(B)->type]))
     119                 : 
     120                 : /* TODO: put it to my_static.c */
     121                 : my_bool my_disable_flush_pagecache_blocks= 0;
     122                 : 
     123                 : #define STRUCT_PTR(TYPE, MEMBER, a)                                           \
     124                 :           (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
     125                 : 
     126                 : /* types of condition variables */
     127                 : #define  COND_FOR_REQUESTED 0  /* queue of thread waiting for read operation */
     128                 : #define  COND_FOR_SAVED     1  /* queue of thread waiting for flush */
     129                 : #define  COND_FOR_WRLOCK    2  /* queue of write lock */
     130                 : #define  COND_SIZE          3  /* number of COND_* queues */
     131                 : 
     132                 : typedef pthread_cond_t KEYCACHE_CONDVAR;
     133                 : 
     134                 : /* descriptor of the page in the page cache block buffer */
     135                 : struct st_pagecache_page
     136                 : {
     137                 :   PAGECACHE_FILE file;    /* file to which the page belongs to  */
     138                 :   pgcache_page_no_t pageno; /* number of the page in the file   */
     139                 : };
     140                 : 
     141                 : /* element in the chain of a hash table bucket */
     142                 : struct st_pagecache_hash_link
     143                 : {
     144                 :   struct st_pagecache_hash_link
     145                 :     *next, **prev;                   /* to connect links in the same bucket  */
     146                 :   struct st_pagecache_block_link
     147                 :     *block;                          /* reference to the block for the page: */
     148                 :   PAGECACHE_FILE file;               /* from such a file                     */
     149                 :   pgcache_page_no_t pageno;            /* this page                            */
     150                 :   uint requests;                     /* number of requests for the page      */
     151                 : };
     152                 : 
     153                 : /* simple states of a block */
     154                 : #define PCBLOCK_ERROR       1 /* an error occurred when performing disk i/o  */
     155                 : #define PCBLOCK_READ        2 /* the is page in the block buffer             */
     156                 : #define PCBLOCK_IN_SWITCH   4 /* block is preparing to read new page         */
     157                 : #define PCBLOCK_REASSIGNED  8 /* block does not accept requests for old page */
     158                 : #define PCBLOCK_IN_FLUSH   16 /* block is in flush operation                 */
     159                 : #define PCBLOCK_CHANGED    32 /* block buffer contains a dirty page          */
     160                 : #define PCBLOCK_DIRECT_W   64 /* possible direct write to the block          */
     161                 : 
     162                 : /* page status, returned by find_block */
     163                 : #define PAGE_READ               0
     164                 : #define PAGE_TO_BE_READ         1
     165                 : #define PAGE_WAIT_TO_BE_READ    2
     166                 : 
     167                 : /* block temperature determines in which (sub-)chain the block currently is */
     168                 : enum PCBLOCK_TEMPERATURE { PCBLOCK_COLD /*free*/ , PCBLOCK_WARM , PCBLOCK_HOT };
     169                 : 
     170                 : /* debug info */
     171                 : #ifndef DBUG_OFF
     172                 : static const char *page_cache_page_type_str[]=
     173                 : {
     174                 :   /* used only for control page type changing during debugging */
     175                 :   "EMPTY",
     176                 :   "PLAIN",
     177                 :   "LSN",
     178                 :   "READ_UNKNOWN"
     179                 : };
     180                 : 
     181                 : static const char *page_cache_page_write_mode_str[]=
     182                 : {
     183                 :   "DELAY",
     184                 :   "DONE"
     185                 : };
     186                 : 
     187                 : static const char *page_cache_page_lock_str[]=
     188                 : {
     189                 :   "free -> free",
     190                 :   "read -> read",
     191                 :   "write -> write",
     192                 :   "free -> read",
     193                 :   "free -> write",
     194                 :   "read -> free",
     195                 :   "write -> free",
     196                 :   "write -> read"
     197                 : };
     198                 : 
     199                 : static const char *page_cache_page_pin_str[]=
     200                 : {
     201                 :   "pinned -> pinned",
     202                 :   "unpinned -> unpinned",
     203                 :   "unpinned -> pinned",
     204                 :   "pinned -> unpinned"
     205                 : };
     206                 : 
     207                 : 
     208                 : typedef struct st_pagecache_pin_info
     209                 : {
     210                 :   struct st_pagecache_pin_info *next, **prev;
     211                 :   struct st_my_thread_var *thread;
     212                 : }  PAGECACHE_PIN_INFO;
     213                 : 
     214                 : /*
     215                 :   st_pagecache_lock_info structure should be kept in next, prev, thread part
     216                 :   compatible with st_pagecache_pin_info to be compatible in functions.
     217                 : */
     218                 : 
     219                 : typedef struct st_pagecache_lock_info
     220                 : {
     221                 :   struct st_pagecache_lock_info *next, **prev;
     222                 :   struct st_my_thread_var *thread;
     223                 :   my_bool write_lock;
     224                 : } PAGECACHE_LOCK_INFO;
     225                 : 
     226                 : 
     227                 : /* service functions maintain debugging info about pin & lock */
     228                 : 
     229                 : 
     230                 : /*
     231                 :   Links information about thread pinned/locked the block to the list
     232                 : 
     233                 :   SYNOPSIS
     234                 :     info_link()
     235                 :     list                 the list to link in
     236                 :     node                 the node which should be linked
     237                 : */
     238                 : 
     239                 : static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
     240            5839 : {
     241            5839 :   if ((node->next= *list))
     242               0 :     node->next->prev= &(node->next);
     243            5839 :   *list= node;
     244            5839 :   node->prev= list;
     245                 : }
     246                 : 
     247                 : 
     248                 : /*
     249                 :   Unlinks information about thread pinned/locked the block from the list
     250                 : 
     251                 :   SYNOPSIS
     252                 :     info_unlink()
     253                 :     node                 the node which should be unlinked
     254                 : */
     255                 : 
     256                 : static void info_unlink(PAGECACHE_PIN_INFO *node)
     257            5839 : {
     258            5839 :   if ((*node->prev= node->next))
     259               0 :    node->next->prev= node->prev;
     260                 : }
     261                 : 
     262                 : 
     263                 : /*
     264                 :   Finds information about given thread in the list of threads which
     265                 :   pinned/locked this block.
     266                 : 
     267                 :   SYNOPSIS
     268                 :     info_find()
     269                 :     list                 the list where to find the thread
     270                 :     thread               thread ID (reference to the st_my_thread_var
     271                 :                          of the thread)
     272                 :     any                  return any thread of the list
     273                 : 
     274                 :   RETURN
     275                 :     0 - the thread was not found
     276                 :     pointer to the information node of the thread in the list, or, if 'any',
     277                 :     to any thread of the list.
     278                 : */
     279                 : 
     280                 : static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
     281                 :                                      struct st_my_thread_var *thread,
     282                 :                                      my_bool any)
     283            7756 : {
     284            7756 :   register PAGECACHE_PIN_INFO *i= list;
     285            7756 :   if (any)
     286               0 :     return i;
     287               0 :   for(; i != 0; i= i->next)
     288            7756 :     if (i->thread == thread)
     289            7756 :       return i;
     290               0 :   return 0;
     291                 : }
     292                 : 
     293                 : #endif /* !DBUG_OFF */
     294                 : 
     295                 : /* page cache block */
     296                 : struct st_pagecache_block_link
     297                 : {
     298                 :   struct st_pagecache_block_link
     299                 :     *next_used, **prev_used;   /* to connect links in the LRU chain (ring)   */
     300                 :   struct st_pagecache_block_link
     301                 :     *next_changed, **prev_changed; /* for lists of file dirty/clean blocks   */
     302                 :   struct st_pagecache_hash_link
     303                 :     *hash_link;           /* backward ptr to referring hash_link             */
     304                 : #ifndef DBUG_OFF
     305                 :   PAGECACHE_PIN_INFO *pin_list;
     306                 :   PAGECACHE_LOCK_INFO *lock_list;
     307                 : #endif
     308                 :   KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event    */
     309                 :   uchar *buffer;           /* buffer for the block page                      */
     310                 :   pthread_t write_locker;
     311                 : 
     312                 :   ulonglong last_hit_time; /* timestamp of the last hit                      */
     313                 :   WQUEUE
     314                 :     wqueue[COND_SIZE];    /* queues on waiting requests for new/old pages    */
     315                 :   uint32 requests;        /* number of requests for the block                */
     316                 :   uint32 pins;            /* pin counter                                     */
     317                 :   uint32 wlocks;          /* write locks counter                             */
     318                 :   uint32 rlocks;          /* read locks counter                              */
     319                 :   uint32 rlocks_queue;    /* rd. locks waiting wr. lock of this thread       */
     320                 :   uint16 status;          /* state of the block                              */
     321                 :   int16  error;           /* error code for block in case of error */
     322                 :   enum PCBLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot*/
     323                 :   enum pagecache_page_type type; /* type of the block                        */
     324                 :   uint hits_left;         /* number of hits left until promotion             */
     325                 :   /** @brief LSN when first became dirty; LSN_MAX means "not yet set"        */
     326                 :   LSN rec_lsn;
     327                 : };
     328                 : 
     329                 : /** @brief information describing a run of flush_pagecache_blocks_int() */
     330                 : struct st_file_in_flush
     331                 : {
     332                 :   File file;
     333                 :   /**
     334                 :      @brief threads waiting for the thread currently flushing this file to be
     335                 :      done
     336                 :   */
     337                 :   WQUEUE flush_queue;
     338                 :   /**
     339                 :      @brief if the thread currently flushing the file has a non-empty
     340                 :      first_in_switch list.
     341                 :   */
     342                 :   my_bool first_in_switch;
     343                 : };
     344                 : 
     345                 : #ifndef DBUG_OFF
     346                 : /* debug checks */
     347                 : 
     348                 : #ifdef NOT_USED
     349                 : static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
     350                 :                               enum pagecache_page_pin mode
     351                 :                               __attribute__((unused)))
     352                 : {
     353                 :   struct st_my_thread_var *thread= my_thread_var;
     354                 :   PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread);
     355                 :   DBUG_ENTER("info_check_pin");
     356                 :   DBUG_PRINT("enter", ("thread: 0x%lx  pin: %s",
     357                 :                        (ulong) thread, page_cache_page_pin_str[mode]));
     358                 :   if (info)
     359                 :   {
     360                 :     if (mode == PAGECACHE_PIN_LEFT_UNPINNED)
     361                 :     {
     362                 :       DBUG_PRINT("info",
     363                 :                  ("info_check_pin: thread: 0x%lx  block: 0x%lx  ; LEFT_UNPINNED!!!",
     364                 :                   (ulong)thread, (ulong)block));
     365                 :       DBUG_RETURN(1);
     366                 :     }
     367                 :     else if (mode == PAGECACHE_PIN)
     368                 :     {
     369                 :       DBUG_PRINT("info",
     370                 :                  ("info_check_pin: thread: 0x%lx  block: 0x%lx  ; PIN!!!",
     371                 :                   (ulong)thread, (ulong)block));
     372                 :       DBUG_RETURN(1);
     373                 :     }
     374                 :   }
     375                 :   else
     376                 :   {
     377                 :     if (mode == PAGECACHE_PIN_LEFT_PINNED)
     378                 :     {
     379                 :       DBUG_PRINT("info",
     380                 :                  ("info_check_pin: thread: 0x%lx  block: 0x%lx  ; LEFT_PINNED!!!",
     381                 :                   (ulong)thread, (ulong)block));
     382                 :       DBUG_RETURN(1);
     383                 :     }
     384                 :     else if (mode == PAGECACHE_UNPIN)
     385                 :     {
     386                 :       DBUG_PRINT("info",
     387                 :                  ("info_check_pin: thread: 0x%lx  block: 0x%lx  ; UNPIN!!!",
     388                 :                   (ulong)thread, (ulong)block));
     389                 :       DBUG_RETURN(1);
     390                 :     }
     391                 :   }
     392                 :   DBUG_RETURN(0);
     393                 : }
     394                 : 
     395                 : 
     396                 : /*
     397                 :   Debug function which checks current lock/pin state and requested changes
     398                 : 
     399                 :   SYNOPSIS
     400                 :     info_check_lock()
     401                 :     lock                 requested lock changes
     402                 :     pin                  requested pin changes
     403                 : 
     404                 :   RETURN
     405                 :     0 - OK
     406                 :     1 - Error
     407                 : */
     408                 : 
     409                 : static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
     410                 :                                enum pagecache_page_lock lock,
     411                 :                                enum pagecache_page_pin pin)
     412                 : {
     413                 :   struct st_my_thread_var *thread= my_thread_var;
     414                 :   PAGECACHE_LOCK_INFO *info=
     415                 :     (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list,
     416                 :                                       thread);
     417                 :   DBUG_ENTER("info_check_lock");
     418                 :   switch(lock) {
     419                 :   case PAGECACHE_LOCK_LEFT_UNLOCKED:
     420                 :     if (pin != PAGECACHE_PIN_LEFT_UNPINNED ||
     421                 :         info)
     422                 :       goto error;
     423                 :     break;
     424                 :   case PAGECACHE_LOCK_LEFT_READLOCKED:
     425                 :     if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
     426                 :          pin != PAGECACHE_PIN_LEFT_PINNED) ||
     427                 :         info == 0 || info->write_lock)
     428                 :       goto error;
     429                 :     break;
     430                 :   case PAGECACHE_LOCK_LEFT_WRITELOCKED:
     431                 :     if (pin != PAGECACHE_PIN_LEFT_PINNED ||
     432                 :         info == 0 || !info->write_lock)
     433                 :       goto error;
     434                 :     break;
     435                 :   case PAGECACHE_LOCK_READ:
     436                 :     if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
     437                 :          pin != PAGECACHE_PIN) ||
     438                 :         info != 0)
     439                 :       goto error;
     440                 :     break;
     441                 :   case PAGECACHE_LOCK_WRITE:
     442                 :     if (pin != PAGECACHE_PIN ||
     443                 :         info != 0)
     444                 :       goto error;
     445                 :     break;
     446                 :   case PAGECACHE_LOCK_READ_UNLOCK:
     447                 :     if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
     448                 :          pin != PAGECACHE_UNPIN) ||
     449                 :         info == 0 || info->write_lock)
     450                 :       goto error;
     451                 :     break;
     452                 :   case PAGECACHE_LOCK_WRITE_UNLOCK:
     453                 :     if (pin != PAGECACHE_UNPIN ||
     454                 :         info == 0 || !info->write_lock)
     455                 :       goto error;
     456                 :     break;
     457                 :   case PAGECACHE_LOCK_WRITE_TO_READ:
     458                 :     if ((pin != PAGECACHE_PIN_LEFT_PINNED &&
     459                 :          pin != PAGECACHE_UNPIN) ||
     460                 :         info == 0 || !info->write_lock)
     461                 :       goto error;
     462                 :     break;
     463                 :   }
     464                 :   DBUG_RETURN(0);
     465                 : error:
     466                 :   DBUG_PRINT("info",
     467                 :              ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d,"
     468                 :               "to lock: %s, to pin: %s",
     469                 :               (ulong)thread, (ulong)block, test(info),
     470                 :               (info ? info->write_lock : 0),
     471                 :               page_cache_page_lock_str[lock],
     472                 :               page_cache_page_pin_str[pin]));
     473                 :   DBUG_RETURN(1);
     474                 : }
     475                 : #endif /* NOT_USED */
     476                 : #endif /* !DBUG_OFF */
     477                 : 
     478                 : #define FLUSH_CACHE         2000            /* sort this many blocks at once */
     479                 : 
     480                 : static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block);
     481                 : #ifndef DBUG_OFF
     482                 : static void test_key_cache(PAGECACHE *pagecache,
     483                 :                            const char *where, my_bool lock);
     484                 : #endif
     485                 : 
     486                 : #define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) +                          \
     487                 :                                     (ulong) (f).file) & (p->hash_entries-1))
     488                 : #define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1))
     489                 : 
     490                 : #define DEFAULT_PAGECACHE_DEBUG_LOG  "pagecache_debug.log"
     491                 : 
     492                 : #if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG)
     493                 : #define PAGECACHE_DEBUG_LOG  DEFAULT_PAGECACHE_DEBUG_LOG
     494                 : #endif
     495                 : 
     496                 : #if defined(PAGECACHE_DEBUG_LOG)
     497                 : static FILE *pagecache_debug_log= NULL;
     498                 : static void pagecache_debug_print _VARARGS((const char *fmt, ...));
     499                 : #define PAGECACHE_DEBUG_OPEN                                                  \
     500                 :           if (!pagecache_debug_log)                                           \
     501                 :           {                                                                   \
     502                 :             pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w");             \
     503                 :             (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ);        \
     504                 :           }
     505                 : 
     506                 : #define PAGECACHE_DEBUG_CLOSE                                                 \
     507                 :           if (pagecache_debug_log)                                            \
     508                 :           {                                                                   \
     509                 :             fclose(pagecache_debug_log);                                      \
     510                 :             pagecache_debug_log= 0;                                           \
     511                 :           }
     512                 : #else
     513                 : #define PAGECACHE_DEBUG_OPEN
     514                 : #define PAGECACHE_DEBUG_CLOSE
     515                 : #endif /* defined(PAGECACHE_DEBUG_LOG) */
     516                 : 
     517                 : #if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG)
     518                 : #define KEYCACHE_DBUG_PRINT(l, m)                                             \
     519                 :             { if (pagecache_debug_log)                                        \
     520                 :                 fprintf(pagecache_debug_log, "%s: ", l);                      \
     521                 :               pagecache_debug_print m; }
     522                 : 
     523                 : #define KEYCACHE_DBUG_ASSERT(a)                                               \
     524                 :             { if (! (a) && pagecache_debug_log)                               \
     525                 :                 fclose(pagecache_debug_log);                                  \
     526                 :               assert(a); }
     527                 : #else
     528                 : #define KEYCACHE_DBUG_PRINT(l, m)  DBUG_PRINT(l, m)
     529                 : #define KEYCACHE_DBUG_ASSERT(a)    DBUG_ASSERT(a)
     530                 : #endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */
     531                 : 
     532                 : #if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF)
     533                 : #ifdef THREAD
     534                 : static long pagecache_thread_id;
     535                 : #define KEYCACHE_THREAD_TRACE(l)                                              \
     536                 :              KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id))
     537                 : 
     538                 : #define KEYCACHE_THREAD_TRACE_BEGIN(l)                                        \
     539                 :             { struct st_my_thread_var *thread_var= my_thread_var;             \
     540                 :               pagecache_thread_id= thread_var->id;                            \
     541                 :               KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) }
     542                 : 
     543                 : #define KEYCACHE_THREAD_TRACE_END(l)                                          \
     544                 :             KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id))
     545                 : #else /* THREAD */
     546                 : #define KEYCACHE_THREAD_TRACE(l)        KEYCACHE_DBUG_PRINT(l,(""))
     547                 : #define KEYCACHE_THREAD_TRACE_BEGIN(l)  KEYCACHE_DBUG_PRINT(l,(""))
     548                 : #define KEYCACHE_THREAD_TRACE_END(l)    KEYCACHE_DBUG_PRINT(l,(""))
     549                 : #endif /* THREAD */
     550                 : #else
     551                 : #define KEYCACHE_THREAD_TRACE_BEGIN(l)
     552                 : #define KEYCACHE_THREAD_TRACE_END(l)
     553                 : #define KEYCACHE_THREAD_TRACE(l)
     554                 : #endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */
     555                 : 
     556                 : #define PCBLOCK_NUMBER(p, b)                                                    \
     557                 :   ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK)))
     558                 : #define PAGECACHE_HASH_LINK_NUMBER(p, h)                                      \
     559                 :   ((uint) (((char*)(h)-(char *) p->hash_link_root)/                           \
     560                 :            sizeof(PAGECACHE_HASH_LINK)))
     561                 : 
     562                 : #if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG)
     563                 : static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
     564                 :                                       pthread_mutex_t *mutex);
     565                 : #else
     566                 : #define  pagecache_pthread_cond_wait pthread_cond_wait
     567                 : #endif
     568                 : 
     569                 : #if defined(PAGECACHE_DEBUG)
     570                 : static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex);
     571                 : static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex);
     572                 : static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
     573                 : #define pagecache_pthread_mutex_lock(M) \
     574                 : { DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \
     575                 :   ___pagecache_pthread_mutex_lock(M);}
     576                 : #define pagecache_pthread_mutex_unlock(M) \
     577                 : { DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \
     578                 :   ___pagecache_pthread_mutex_unlock(M);}
     579                 : #define pagecache_pthread_cond_signal(M) \
     580                 : { DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \
     581                 :   ___pagecache_pthread_cond_signal(M);}
     582                 : #else
     583                 : #define pagecache_pthread_mutex_lock pthread_mutex_lock
     584                 : #define pagecache_pthread_mutex_unlock pthread_mutex_unlock
     585                 : #define pagecache_pthread_cond_signal pthread_cond_signal
     586                 : #endif /* defined(PAGECACHE_DEBUG) */
     587                 : 
     588                 : extern my_bool translog_flush(TRANSLOG_ADDRESS lsn);
     589                 : 
     590                 : /*
     591                 :   Write page to the disk
     592                 : 
     593                 :   SYNOPSIS
     594                 :     pagecache_fwrite()
     595                 :     pagecache - page cache pointer
     596                 :     filedesc  - pagecache file descriptor structure
     597                 :     buffer    - buffer which we will write
     598                 :     type      - page type (plain or with LSN)
     599                 :     flags     - MYF() flags
     600                 : 
     601                 :   RETURN
     602                 :     0   - OK
     603                 :     1   - Error
     604                 : */
     605                 : 
     606                 : static my_bool pagecache_fwrite(PAGECACHE *pagecache,
     607                 :                                 PAGECACHE_FILE *filedesc,
     608                 :                                 uchar *buffer,
     609                 :                                 pgcache_page_no_t pageno,
     610                 :                                 enum pagecache_page_type type
     611                 :                                 __attribute__((unused)),
     612                 :                                 myf flags)
     613               0 : {
     614               0 :   DBUG_ENTER("pagecache_fwrite");
     615               0 :   DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
     616                 : 
     617                 :   /* Todo: Integrate this with write_callback so we have only one callback */
     618               0 :   if ((*filedesc->flush_log_callback)(buffer, pageno, filedesc->callback_data))
     619               0 :     DBUG_RETURN(1);
     620               0 :   DBUG_PRINT("info", ("write_callback: 0x%lx  data: 0x%lx",
     621                 :                       (ulong) filedesc->write_callback,
     622                 :                       (ulong) filedesc->callback_data));
     623               0 :   if ((*filedesc->write_callback)(buffer, pageno, filedesc->callback_data))
     624                 :   {
     625               0 :     DBUG_PRINT("error", ("write callback problem"));
     626               0 :     DBUG_RETURN(1);
     627                 :   }
     628               0 :   if (my_pwrite(filedesc->file, buffer, pagecache->block_size,
     629                 :                 ((my_off_t) pageno << pagecache->shift), flags))
     630                 :   {
     631               0 :     (*filedesc->write_fail)(filedesc->callback_data);
     632               0 :     DBUG_RETURN(1);
     633                 :   }
     634               0 :   DBUG_RETURN(0);
     635                 : }
     636                 : 
     637                 : 
     638                 : /*
     639                 :   Read page from the disk
     640                 : 
     641                 :   SYNOPSIS
     642                 :     pagecache_fread()
     643                 :     pagecache - page cache pointer
     644                 :     filedesc  - pagecache file descriptor structure
     645                 :     buffer    - buffer in which we will read
     646                 :     pageno    - page number
     647                 :     flags     - MYF() flags
     648                 : */
     649                 : #define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \
     650                 :   my_pread((filedesc)->file, buffer, pagecache->block_size,         \
     651                 :            ((my_off_t) pageno << pagecache->shift), flags)
     652                 : 
     653                 : 
     654                 : /**
     655                 :   @brief set rec_lsn of pagecache block (if it is needed)
     656                 : 
     657                 :   @param block                   block where to set rec_lsn
     658                 :   @param first_REDO_LSN_for_page the LSN to set
     659                 : */
     660                 : 
     661                 : static inline void pagecache_set_block_rec_lsn(PAGECACHE_BLOCK_LINK *block,
     662                 :                                                LSN first_REDO_LSN_for_page)
     663               0 : {
     664               0 :   if (block->rec_lsn == LSN_MAX)
     665               0 :     block->rec_lsn= first_REDO_LSN_for_page;
     666                 :   else
     667               0 :     DBUG_ASSERT(cmp_translog_addr(block->rec_lsn,
     668                 :                                   first_REDO_LSN_for_page) <= 0);
     669                 : }
     670                 : 
     671                 : 
     672                 : /*
     673                 :   next_power(value) is 2 at the power of (1+floor(log2(value)));
     674                 :   e.g. next_power(2)=4, next_power(3)=4.
     675                 : */
     676                 : static inline uint next_power(uint value)
     677              10 : {
     678              10 :   return (uint) my_round_up_to_next_power((uint32) value) << 1;
     679                 : }
     680                 : 
     681                 : 
     682                 : /*
     683                 :   Initialize a page cache
     684                 : 
     685                 :   SYNOPSIS
     686                 :     init_pagecache()
     687                 :     pagecache                   pointer to a page cache data structure
     688                 :     key_cache_block_size        size of blocks to keep cached data
     689                 :     use_mem                     total memory to use for the key cache
     690                 :     division_limit              division limit (may be zero)
     691                 :     age_threshold               age threshold (may be zero)
     692                 :     block_size                  size of block (should be power of 2)
     693                 :     my_read_flags               Flags used for all pread/pwrite calls
     694                 :                                 Usually MY_WME in case of recovery
     695                 : 
     696                 :   RETURN VALUE
     697                 :     number of blocks in the key cache, if successful,
     698                 :     0 - otherwise.
     699                 : 
     700                 :   NOTES.
     701                 :     if pagecache->inited != 0 we assume that the key cache
     702                 :     is already initialized.  This is for now used by myisamchk, but shouldn't
     703                 :     be something that a program should rely on!
     704                 : 
     705                 :     It's assumed that no two threads call this function simultaneously
     706                 :     referring to the same key cache handle.
     707                 : 
     708                 : */
     709                 : 
     710                 : ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
     711                 :                      uint division_limit, uint age_threshold,
     712                 :                      uint block_size, myf my_readwrite_flags)
     713              10 : {
     714                 :   ulong blocks, hash_links, length;
     715                 :   int error;
     716              10 :   DBUG_ENTER("init_pagecache");
     717              10 :   DBUG_ASSERT(block_size >= 512);
     718                 : 
     719                 :   PAGECACHE_DEBUG_OPEN;
     720              10 :   if (pagecache->inited && pagecache->disk_blocks > 0)
     721                 :   {
     722               0 :     DBUG_PRINT("warning",("key cache already in use"));
     723               0 :     DBUG_RETURN(0);
     724                 :   }
     725                 : 
     726              10 :   pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0;
     727              10 :   pagecache->global_cache_read= pagecache->global_cache_write= 0;
     728              10 :   pagecache->disk_blocks= -1;
     729              10 :   if (! pagecache->inited)
     730                 :   {
     731              10 :     if (pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST) ||
     732                 :         hash_init(&pagecache->files_in_flush, &my_charset_bin, 32,
     733                 :                   offsetof(struct st_file_in_flush, file),
     734                 :                   sizeof(((struct st_file_in_flush *)NULL)->file),
     735                 :                   NULL, NULL, 0))
     736                 :       goto err;
     737              10 :     pagecache->inited= 1;
     738              10 :     pagecache->in_init= 0;
     739              10 :     pagecache->resize_queue.last_thread= NULL;
     740                 :   }
     741                 : 
     742              10 :   pagecache->mem_size= use_mem;
     743              10 :   pagecache->block_size= block_size;
     744              10 :   pagecache->shift= my_bit_log2(block_size);
     745              10 :   pagecache->readwrite_flags= my_readwrite_flags | MY_NABP | MY_WAIT_IF_FULL;
     746              10 :   pagecache->org_readwrite_flags= pagecache->readwrite_flags;
     747              10 :   DBUG_PRINT("info", ("block_size: %u", block_size));
     748              10 :   DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size);
     749                 : 
     750              10 :   blocks= (ulong) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) +
     751                 :                               2 * sizeof(PAGECACHE_HASH_LINK) +
     752                 :                               sizeof(PAGECACHE_HASH_LINK*) *
     753                 :                               5/4 + block_size));
     754                 :   /*
     755                 :     We need to support page cache with just one block to be able to do
     756                 :     scanning of rows-in-block files
     757                 :   */
     758                 :   for ( ; ; )
     759                 :   {
     760              10 :     if (blocks < 8)
     761                 :     {
     762               0 :       my_errno= ENOMEM;
     763               0 :       goto err;
     764                 :     }
     765                 :     /* Set my_hash_entries to the next bigger 2 power */
     766              10 :     if ((pagecache->hash_entries= next_power(blocks)) <
     767                 :         (blocks) * 5/4)
     768               0 :       pagecache->hash_entries<<= 1;
     769              10 :     hash_links= 2 * blocks;
     770                 : #if defined(MAX_THREADS)
     771                 :     if (hash_links < MAX_THREADS + blocks - 1)
     772                 :       hash_links= MAX_THREADS + blocks - 1;
     773                 : #endif
     774              20 :     while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) +
     775                 :                      ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) +
     776                 :                      ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) *
     777                 :                                 pagecache->hash_entries))) +
     778                 :            (blocks << pagecache->shift) > use_mem)
     779               0 :       blocks--;
     780                 :     /* Allocate memory for cache page buffers */
     781              10 :     if ((pagecache->block_mem=
     782                 :          my_large_malloc((ulong) blocks * pagecache->block_size,
     783                 :                          MYF(MY_WME))))
     784                 :     {
     785                 :       /*
     786                 :         Allocate memory for blocks, hash_links and hash entries;
     787                 :         For each block 2 hash links are allocated
     788                 :       */
     789              10 :       if ((pagecache->block_root=
     790                 :            (PAGECACHE_BLOCK_LINK*) my_malloc((size_t) length, MYF(0))))
     791               0 :         break;
     792               0 :       my_large_free(pagecache->block_mem, MYF(0));
     793               0 :       pagecache->block_mem= 0;
     794                 :     }
     795               0 :     blocks= blocks / 4*3;
     796               0 :   }
     797              10 :   pagecache->blocks_unused= blocks;
     798              10 :   pagecache->disk_blocks= (long) blocks;
     799              10 :   pagecache->hash_links= hash_links;
     800              10 :   pagecache->hash_root=
     801                 :     (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root +
     802                 :                              ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK)));
     803              10 :   pagecache->hash_link_root=
     804                 :     (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root +
     805                 :                             ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) *
     806                 :                                         pagecache->hash_entries)));
     807              10 :   bzero((uchar*) pagecache->block_root,
     808                 :         pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK));
     809              10 :   bzero((uchar*) pagecache->hash_root,
     810                 :         pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*));
     811              10 :   bzero((uchar*) pagecache->hash_link_root,
     812                 :         pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK));
     813              10 :   pagecache->hash_links_used= 0;
     814              10 :   pagecache->free_hash_list= NULL;
     815              10 :   pagecache->blocks_used= pagecache->blocks_changed= 0;
     816                 : 
     817              10 :   pagecache->global_blocks_changed= 0;
     818              10 :   pagecache->blocks_available=0;             /* For debugging */
     819                 : 
     820                 :   /* The LRU chain is empty after initialization */
     821              10 :   pagecache->used_last= NULL;
     822              10 :   pagecache->used_ins= NULL;
     823              10 :   pagecache->free_block_list= NULL;
     824              10 :   pagecache->time= 0;
     825              10 :   pagecache->warm_blocks= 0;
     826              10 :   pagecache->min_warm_blocks= (division_limit ?
     827                 :                                blocks * division_limit / 100 + 1 :
     828                 :                                blocks);
     829              10 :   pagecache->age_threshold= (age_threshold ?
     830                 :                              blocks * age_threshold / 100 :
     831                 :                              blocks);
     832                 : 
     833              10 :   pagecache->cnt_for_resize_op= 0;
     834              10 :   pagecache->resize_in_flush= 0;
     835              10 :   pagecache->can_be_used= 1;
     836                 : 
     837              10 :   pagecache->waiting_for_hash_link.last_thread= NULL;
     838              10 :   pagecache->waiting_for_block.last_thread= NULL;
     839              10 :   DBUG_PRINT("exit",
     840                 :              ("disk_blocks: %ld  block_root: 0x%lx  hash_entries: %ld\
     841                 :  hash_root: 0x%lx  hash_links: %ld  hash_link_root: 0x%lx",
     842                 :               pagecache->disk_blocks, (long) pagecache->block_root,
     843                 :               pagecache->hash_entries, (long) pagecache->hash_root,
     844                 :               pagecache->hash_links, (long) pagecache->hash_link_root));
     845              10 :   bzero((uchar*) pagecache->changed_blocks,
     846                 :         sizeof(pagecache->changed_blocks[0]) *
     847                 :         PAGECACHE_CHANGED_BLOCKS_HASH);
     848              10 :   bzero((uchar*) pagecache->file_blocks,
     849                 :         sizeof(pagecache->file_blocks[0]) *
     850                 :         PAGECACHE_CHANGED_BLOCKS_HASH);
     851                 : 
     852              10 :   pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0;
     853              10 :   DBUG_RETURN((ulong) pagecache->disk_blocks);
     854                 : 
     855               0 : err:
     856               0 :   error= my_errno;
     857               0 :   pagecache->disk_blocks= 0;
     858               0 :   pagecache->blocks=  0;
     859               0 :   if (pagecache->block_mem)
     860                 :   {
     861               0 :     my_large_free(pagecache->block_mem, MYF(0));
     862               0 :     pagecache->block_mem= NULL;
     863                 :   }
     864               0 :   if (pagecache->block_root)
     865                 :   {
     866               0 :     my_free(pagecache->block_root, MYF(0));
     867               0 :     pagecache->block_root= NULL;
     868                 :   }
     869               0 :   my_errno= error;
     870               0 :   pagecache->can_be_used= 0;
     871               0 :   DBUG_RETURN(0);
     872                 : }
     873                 : 
     874                 : 
     875                 : /*
     876                 :   Flush all blocks in the key cache to disk
     877                 : */
     878                 : 
     879                 : #ifdef NOT_USED
     880                 : static int flush_all_key_blocks(PAGECACHE *pagecache)
     881                 : {
     882                 : #if defined(PAGECACHE_DEBUG)
     883                 :   uint cnt=0;
     884                 : #endif
     885                 :   while (pagecache->blocks_changed > 0)
     886                 :   {
     887                 :     PAGECACHE_BLOCK_LINK *block;
     888                 :     for (block= pagecache->used_last->next_used ; ; block=block->next_used)
     889                 :     {
     890                 :       if (block->hash_link)
     891                 :       {
     892                 : #if defined(PAGECACHE_DEBUG)
     893                 :         cnt++;
     894                 :         KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
     895                 : #endif
     896                 :         if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file,
     897                 :                                        FLUSH_RELEASE, NULL, NULL))
     898                 :           return 1;
     899                 :         break;
     900                 :       }
     901                 :       if (block == pagecache->used_last)
     902                 :         break;
     903                 :     }
     904                 :   }
     905                 :   return 0;
     906                 : }
     907                 : #endif /* NOT_USED */
     908                 : 
     909                 : /*
     910                 :   Resize a key cache
     911                 : 
     912                 :   SYNOPSIS
     913                 :     resize_pagecache()
     914                 :     pagecache                   pointer to a page cache data structure
     915                 :     use_mem                     total memory to use for the new key cache
     916                 :     division_limit              new division limit (if not zero)
     917                 :     age_threshold               new age threshold (if not zero)
     918                 : 
     919                 :   RETURN VALUE
     920                 :     number of blocks in the key cache, if successful,
     921                 :     0 - otherwise.
     922                 : 
     923                 :   NOTES.
     924                 :     The function first compares the memory size parameter
     925                 :     with the key cache value.
     926                 : 
     927                 :     If they differ the function free the the memory allocated for the
     928                 :     old key cache blocks by calling the end_pagecache function and
     929                 :     then rebuilds the key cache with new blocks by calling
     930                 :     init_key_cache.
     931                 : 
     932                 :     The function starts the operation only when all other threads
     933                 :     performing operations with the key cache let her to proceed
     934                 :     (when cnt_for_resize=0).
     935                 : 
     936                 :      Before being usable, this function needs:
     937                 :      - to receive fixes for BUG#17332 "changing key_buffer_size on a running
     938                 :      server can crash under load" similar to those done to the key cache
     939                 :      - to have us (Sanja) look at the additional constraints placed on
     940                 :      resizing, due to the page locking specific to this page cache.
     941                 :      So we disable it for now.
     942                 : */
     943                 : #if NOT_USED /* keep disabled until code is fixed see above !! */
     944                 : ulong resize_pagecache(PAGECACHE *pagecache,
     945                 :                        size_t use_mem, uint division_limit,
     946                 :                        uint age_threshold)
     947                 : {
     948                 :   ulong blocks;
     949                 : #ifdef THREAD
     950                 :   struct st_my_thread_var *thread;
     951                 :   WQUEUE *wqueue;
     952                 : 
     953                 : #endif
     954                 :   DBUG_ENTER("resize_pagecache");
     955                 : 
     956                 :   if (!pagecache->inited)
     957                 :     DBUG_RETURN(pagecache->disk_blocks);
     958                 : 
     959                 :   if(use_mem == pagecache->mem_size)
     960                 :   {
     961                 :     change_pagecache_param(pagecache, division_limit, age_threshold);
     962                 :     DBUG_RETURN(pagecache->disk_blocks);
     963                 :   }
     964                 : 
     965                 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
     966                 : 
     967                 : #ifdef THREAD
     968                 :   wqueue= &pagecache->resize_queue;
     969                 :   thread= my_thread_var;
     970                 :   wqueue_link_into_queue(wqueue, thread);
     971                 : 
     972                 :   while (wqueue->last_thread->next != thread)
     973                 :   {
     974                 :     pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
     975                 :   }
     976                 : #endif
     977                 : 
     978                 :   pagecache->resize_in_flush= 1;
     979                 :   if (flush_all_key_blocks(pagecache))
     980                 :   {
     981                 :     /* TODO: if this happens, we should write a warning in the log file ! */
     982                 :     pagecache->resize_in_flush= 0;
     983                 :     blocks= 0;
     984                 :     pagecache->can_be_used= 0;
     985                 :     goto finish;
     986                 :   }
     987                 :   pagecache->resize_in_flush= 0;
     988                 :   pagecache->can_be_used= 0;
     989                 : #ifdef THREAD
     990                 :   while (pagecache->cnt_for_resize_op)
     991                 :   {
     992                 :     KEYCACHE_DBUG_PRINT("resize_pagecache: wait",
     993                 :                         ("suspend thread %ld", thread->id));
     994                 :     pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
     995                 :   }
     996                 : #else
     997                 :   KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0);
     998                 : #endif
     999                 : 
    1000                 :   end_pagecache(pagecache, 0);                  /* Don't free mutex */
    1001                 :   /* The following will work even if use_mem is 0 */
    1002                 :   blocks= init_pagecache(pagecache, pagecache->block_size, use_mem,
    1003                 :                          division_limit, age_threshold,
    1004                 :                          pagecache->readwrite_flags);
    1005                 : 
    1006                 : finish:
    1007                 : #ifdef THREAD
    1008                 :   wqueue_unlink_from_queue(wqueue, thread);
    1009                 :   /* Signal for the next resize request to proceeed if any */
    1010                 :   if (wqueue->last_thread)
    1011                 :   {
    1012                 :     KEYCACHE_DBUG_PRINT("resize_pagecache: signal",
    1013                 :                         ("thread %ld", wqueue->last_thread->next->id));
    1014                 :     pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend);
    1015                 :   }
    1016                 : #endif
    1017                 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    1018                 :   DBUG_RETURN(blocks);
    1019                 : }
    1020                 : #endif /* 0 */
    1021                 : 
    1022                 : 
    1023                 : /*
    1024                 :   Increment counter blocking resize key cache operation
    1025                 : */
    1026                 : static inline void inc_counter_for_resize_op(PAGECACHE *pagecache)
    1027            7743 : {
    1028            7743 :   pagecache->cnt_for_resize_op++;
    1029                 : }
    1030                 : 
    1031                 : 
    1032                 : /*
    1033                 :   Decrement counter blocking resize key cache operation;
    1034                 :   Signal the operation to proceed when counter becomes equal zero
    1035                 : */
    1036                 : static inline void dec_counter_for_resize_op(PAGECACHE *pagecache)
    1037            7743 : {
    1038                 : #ifdef THREAD
    1039                 :   struct st_my_thread_var *last_thread;
    1040            7743 :   if (!--pagecache->cnt_for_resize_op &&
    1041                 :       (last_thread= pagecache->resize_queue.last_thread))
    1042                 :   {
    1043               0 :     KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal",
    1044                 :                         ("thread %ld", last_thread->next->id));
    1045               0 :     pagecache_pthread_cond_signal(&last_thread->next->suspend);
    1046                 :   }
    1047                 : #else
    1048                 :   pagecache->cnt_for_resize_op--;
    1049                 : #endif
    1050                 : }
    1051                 : 
    1052                 : /*
    1053                 :   Change the page cache parameters
    1054                 : 
    1055                 :   SYNOPSIS
    1056                 :     change_pagecache_param()
    1057                 :     pagecache                   pointer to a page cache data structure
    1058                 :     division_limit              new division limit (if not zero)
    1059                 :     age_threshold               new age threshold (if not zero)
    1060                 : 
    1061                 :   RETURN VALUE
    1062                 :     none
    1063                 : 
    1064                 :   NOTES.
    1065                 :     Presently the function resets the key cache parameters
    1066                 :     concerning midpoint insertion strategy - division_limit and
    1067                 :     age_threshold.
    1068                 : */
    1069                 : 
    1070                 : void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
    1071                 :                             uint age_threshold)
    1072               0 : {
    1073               0 :   DBUG_ENTER("change_pagecache_param");
    1074                 : 
    1075               0 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    1076               0 :   if (division_limit)
    1077               0 :     pagecache->min_warm_blocks= (pagecache->disk_blocks *
    1078                 :                                 division_limit / 100 + 1);
    1079               0 :   if (age_threshold)
    1080               0 :     pagecache->age_threshold=   (pagecache->disk_blocks *
    1081                 :                                 age_threshold / 100);
    1082               0 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    1083               0 :   DBUG_VOID_RETURN;
    1084                 : }
    1085                 : 
    1086                 : 
    1087                 : /*
    1088                 :   Removes page cache from memory. Does NOT flush pages to disk.
    1089                 : 
    1090                 :   SYNOPSIS
    1091                 :     end_pagecache()
    1092                 :     pagecache           page cache handle
    1093                 :     cleanup             Complete free (Free also mutex for key cache)
    1094                 : 
    1095                 :   RETURN VALUE
    1096                 :     none
    1097                 : */
    1098                 : 
    1099                 : void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
    1100             232 : {
    1101             232 :   DBUG_ENTER("end_pagecache");
    1102             232 :   DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache));
    1103                 : 
    1104             232 :   if (!pagecache->inited)
    1105             224 :     DBUG_VOID_RETURN;
    1106                 : 
    1107               8 :   if (pagecache->disk_blocks > 0)
    1108                 :   {
    1109               8 :     if (pagecache->block_mem)
    1110                 :     {
    1111               8 :       my_large_free(pagecache->block_mem, MYF(0));
    1112               8 :       pagecache->block_mem= NULL;
    1113               8 :       my_free(pagecache->block_root, MYF(0));
    1114               8 :       pagecache->block_root= NULL;
    1115                 :     }
    1116               8 :     pagecache->disk_blocks= -1;
    1117                 :     /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
    1118               8 :     pagecache->blocks_changed= 0;
    1119                 :   }
    1120                 : 
    1121               8 :   DBUG_PRINT("status", ("used: %lu  changed: %lu  w_requests: %lu  "
    1122                 :                         "writes: %lu  r_requests: %lu  reads: %lu",
    1123                 :                         pagecache->blocks_used,
    1124                 :                         pagecache->global_blocks_changed,
    1125                 :                         (ulong) pagecache->global_cache_w_requests,
    1126                 :                         (ulong) pagecache->global_cache_write,
    1127                 :                         (ulong) pagecache->global_cache_r_requests,
    1128                 :                         (ulong) pagecache->global_cache_read));
    1129                 : 
    1130               8 :   if (cleanup)
    1131                 :   {
    1132               8 :     hash_free(&pagecache->files_in_flush);
    1133               8 :     pthread_mutex_destroy(&pagecache->cache_lock);
    1134               8 :     pagecache->inited= pagecache->can_be_used= 0;
    1135                 :     PAGECACHE_DEBUG_CLOSE;
    1136                 :   }
    1137               8 :   DBUG_VOID_RETURN;
    1138                 : } /* end_pagecache */
    1139                 : 
    1140                 : 
    1141                 : /*
    1142                 :   Unlink a block from the chain of dirty/clean blocks
    1143                 : */
    1144                 : 
    1145                 : static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block)
    1146              94 : {
    1147              94 :   if (block->next_changed)
    1148              72 :     block->next_changed->prev_changed= block->prev_changed;
    1149              94 :   *block->prev_changed= block->next_changed;
    1150                 : }
    1151                 : 
    1152                 : 
    1153                 : /*
    1154                 :   Link a block into the chain of dirty/clean blocks
    1155                 : */
    1156                 : 
    1157                 : static inline void link_changed(PAGECACHE_BLOCK_LINK *block,
    1158                 :                                 PAGECACHE_BLOCK_LINK **phead)
    1159              94 : {
    1160              94 :   block->prev_changed= phead;
    1161              94 :   if ((block->next_changed= *phead))
    1162              72 :     (*phead)->prev_changed= &block->next_changed;
    1163              94 :   *phead= block;
    1164                 : }
    1165                 : 
    1166                 : 
    1167                 : /*
    1168                 :   Unlink a block from the chain of dirty/clean blocks, if it's asked for,
    1169                 :   and link it to the chain of clean blocks for the specified file
    1170                 : */
    1171                 : 
    1172                 : static void link_to_file_list(PAGECACHE *pagecache,
    1173                 :                               PAGECACHE_BLOCK_LINK *block,
    1174                 :                               PAGECACHE_FILE *file, my_bool unlink_flag)
    1175              49 : {
    1176              49 :   if (unlink_flag)
    1177               0 :     unlink_changed(block);
    1178              49 :   link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]);
    1179              49 :   if (block->status & PCBLOCK_CHANGED)
    1180                 :   {
    1181               0 :     block->status&= ~PCBLOCK_CHANGED;
    1182               0 :     block->rec_lsn= LSN_MAX;
    1183               0 :     pagecache->blocks_changed--;
    1184               0 :     pagecache->global_blocks_changed--;
    1185                 :   }
    1186                 : }
    1187                 : 
    1188                 : 
    1189                 : /*
    1190                 :   Unlink a block from the chain of clean blocks for the specified
    1191                 :   file and link it to the chain of dirty blocks for this file
    1192                 : */
    1193                 : 
    1194                 : static inline void link_to_changed_list(PAGECACHE *pagecache,
    1195                 :                                         PAGECACHE_BLOCK_LINK *block)
    1196              45 : {
    1197              45 :   unlink_changed(block);
    1198              45 :   link_changed(block,
    1199                 :                &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]);
    1200              45 :   block->status|=PCBLOCK_CHANGED;
    1201              45 :   pagecache->blocks_changed++;
    1202              45 :   pagecache->global_blocks_changed++;
    1203                 : }
    1204                 : 
    1205                 : 
    1206                 : /*
    1207                 :   Link a block to the LRU chain at the beginning or at the end of
    1208                 :   one of two parts.
    1209                 : 
    1210                 :   SYNOPSIS
    1211                 :     link_block()
    1212                 :       pagecache            pointer to a page cache data structure
    1213                 :       block               pointer to the block to link to the LRU chain
    1214                 :       hot                 <-> to link the block into the hot subchain
    1215                 :       at_end              <-> to link the block at the end of the subchain
    1216                 : 
    1217                 :   RETURN VALUE
    1218                 :     none
    1219                 : 
    1220                 :   NOTES.
    1221                 :     The LRU chain is represented by a circular list of block structures.
    1222                 :     The list is double-linked of the type (**prev,*next) type.
    1223                 :     The LRU chain is divided into two parts - hot and warm.
    1224                 :     There are two pointers to access the last blocks of these two
    1225                 :     parts. The beginning of the warm part follows right after the
    1226                 :     end of the hot part.
    1227                 :     Only blocks of the warm part can be used for replacement.
    1228                 :     The first block from the beginning of this subchain is always
    1229                 :     taken for eviction (pagecache->last_used->next)
    1230                 : 
    1231                 :     LRU chain:       +------+   H O T    +------+
    1232                 :                 +----| end  |----...<----| beg  |----+
    1233                 :                 |    +------+last        +------+    |
    1234                 :                 v<-link in latest hot (new end)      |
    1235                 :                 |     link in latest warm (new end)->^
    1236                 :                 |    +------+  W A R M   +------+    |
    1237                 :                 +----| beg  |---->...----| end  |----+
    1238                 :                      +------+            +------+ins
    1239                 :                   first for eviction
    1240                 : */
    1241                 : 
    1242                 : static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
    1243                 :                        my_bool hot, my_bool at_end)
    1244            3954 : {
    1245                 :   PAGECACHE_BLOCK_LINK *ins;
    1246                 :   PAGECACHE_BLOCK_LINK **ptr_ins;
    1247                 : 
    1248            3954 :   PCBLOCK_INFO(block);
    1249            3954 :   KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
    1250                 : #ifdef THREAD
    1251            3954 :   if (!hot && pagecache->waiting_for_block.last_thread)
    1252                 :   {
    1253                 :     /* Signal that in the LRU warm sub-chain an available block has appeared */
    1254                 :     struct st_my_thread_var *last_thread=
    1255               0 :                                pagecache->waiting_for_block.last_thread;
    1256               0 :     struct st_my_thread_var *first_thread= last_thread->next;
    1257               0 :     struct st_my_thread_var *next_thread= first_thread;
    1258                 :     PAGECACHE_HASH_LINK *hash_link=
    1259               0 :       (PAGECACHE_HASH_LINK *) first_thread->opt_info;
    1260                 :     struct st_my_thread_var *thread;
    1261                 :     do
    1262                 :     {
    1263               0 :       thread= next_thread;
    1264               0 :       next_thread= thread->next;
    1265                 :       /*
    1266                 :          We notify about the event all threads that ask
    1267                 :          for the same page as the first thread in the queue
    1268                 :       */
    1269               0 :       if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link)
    1270                 :       {
    1271               0 :         KEYCACHE_DBUG_PRINT("link_block: signal", ("thread: %ld", thread->id));
    1272               0 :         pagecache_pthread_cond_signal(&thread->suspend);
    1273               0 :         wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
    1274               0 :         block->requests++;
    1275                 :       }
    1276                 :     }
    1277               0 :     while (thread != last_thread);
    1278               0 :     hash_link->block= block;
    1279               0 :     KEYCACHE_THREAD_TRACE("link_block: after signaling");
    1280                 : #if defined(PAGECACHE_DEBUG)
    1281                 :     KEYCACHE_DBUG_PRINT("link_block",
    1282                 :         ("linked,unlinked block: %u  status: %x  #requests: %u  #available: %u",
    1283                 :          PCBLOCK_NUMBER(pagecache, block), block->status,
    1284                 :          block->requests, pagecache->blocks_available));
    1285                 : #endif
    1286               0 :     return;
    1287                 :   }
    1288                 : #else /* THREAD */
    1289                 :   KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread));
    1290                 :   /* Condition not transformed using DeMorgan, to keep the text identical */
    1291                 : #endif /* THREAD */
    1292            3954 :   ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last;
    1293            3954 :   ins= *ptr_ins;
    1294            3954 :   if (ins)
    1295                 :   {
    1296            3924 :     ins->next_used->prev_used= &block->next_used;
    1297            3924 :     block->next_used= ins->next_used;
    1298            3924 :     block->prev_used= &ins->next_used;
    1299            3924 :     ins->next_used= block;
    1300            3924 :     if (at_end)
    1301            3888 :       *ptr_ins= block;
    1302                 :   }
    1303                 :   else
    1304                 :   {
    1305                 :     /* The LRU chain is empty */
    1306              30 :     pagecache->used_last= pagecache->used_ins= block->next_used= block;
    1307              30 :     block->prev_used= &block->next_used;
    1308                 :   }
    1309            3954 :   KEYCACHE_THREAD_TRACE("link_block");
    1310                 : #if defined(PAGECACHE_DEBUG)
    1311                 :   pagecache->blocks_available++;
    1312                 :   KEYCACHE_DBUG_PRINT("link_block",
    1313                 :                       ("linked block: %u:%1u  status: %x  #requests: %u  #available: %u",
    1314                 :                        PCBLOCK_NUMBER(pagecache, block), at_end, block->status,
    1315                 :                        block->requests, pagecache->blocks_available));
    1316                 :   KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <=
    1317                 :                        pagecache->blocks_used);
    1318                 : #endif
    1319                 : }
    1320                 : 
    1321                 : 
    1322                 : /*
    1323                 :   Unlink a block from the LRU chain
    1324                 : 
    1325                 :   SYNOPSIS
    1326                 :     unlink_block()
    1327                 :       pagecache            pointer to a page cache data structure
    1328                 :       block               pointer to the block to unlink from the LRU chain
    1329                 : 
    1330                 :   RETURN VALUE
    1331                 :     none
    1332                 : 
    1333                 :   NOTES.
    1334                 :     See NOTES for link_block
    1335                 : */
    1336                 : 
    1337                 : static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
    1338            3954 : {
    1339            3954 :   DBUG_ENTER("unlink_block");
    1340            3954 :   DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block));
    1341            3954 :   DBUG_ASSERT(block->next_used != NULL);
    1342            3954 :   if (block->next_used == block)
    1343                 :   {
    1344                 :     /* The list contains only one member */
    1345              30 :     pagecache->used_last= pagecache->used_ins= NULL;
    1346                 :   }
    1347                 :   else
    1348                 :   {
    1349            3924 :     block->next_used->prev_used= block->prev_used;
    1350            3924 :     *block->prev_used= block->next_used;
    1351            3924 :     if (pagecache->used_last == block)
    1352            3798 :       pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
    1353                 :                                        next_used, block->prev_used);
    1354            3924 :     if (pagecache->used_ins == block)
    1355              63 :       pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
    1356                 :                                       next_used, block->prev_used);
    1357                 :   }
    1358            3954 :   block->next_used= NULL;
    1359                 : 
    1360            3954 :   KEYCACHE_THREAD_TRACE("unlink_block");
    1361                 : #if defined(PAGECACHE_DEBUG)
    1362                 :   KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
    1363                 :   pagecache->blocks_available--;
    1364                 :   KEYCACHE_DBUG_PRINT("unlink_block",
    1365                 :                       ("unlinked block: 0x%lx (%u)  status: %x   #requests: %u  #available: %u",
    1366                 :                        (ulong)block, PCBLOCK_NUMBER(pagecache, block),
    1367                 :                        block->status,
    1368                 :                        block->requests, pagecache->blocks_available));
    1369                 :   PCBLOCK_INFO(block);
    1370                 : #endif
    1371            3954 :   DBUG_VOID_RETURN;
    1372                 : }
    1373                 : 
    1374                 : 
    1375                 : /*
    1376                 :   Register requests for a block
    1377                 : 
    1378                 :   SYNOPSIS
    1379                 :     reg_requests()
    1380                 :     pagecache            this page cache reference
    1381                 :     block                the block we request reference
    1382                 :     count                how many requests we register (it is 1 everywhere)
    1383                 : 
    1384                 :   NOTE
    1385                 :   Registration of request means we are going to use this block so we exclude
    1386                 :   it from the LRU if it is first request
    1387                 : */
    1388                 : static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
    1389                 :                          int count)
    1390            3905 : {
    1391            3905 :   DBUG_ENTER("reg_requests");
    1392            3905 :   DBUG_PRINT("enter", ("block: 0x%lx (%u)  status: %x  reqs: %u",
    1393                 :                        (ulong)block, PCBLOCK_NUMBER(pagecache, block),
    1394                 :                        block->status, block->requests));
    1395            3905 :   PCBLOCK_INFO(block);
    1396            3905 :   if (! block->requests)
    1397                 :     /* First request for the block unlinks it */
    1398            3905 :     unlink_block(pagecache, block);
    1399            3905 :   block->requests+= count;
    1400            3905 :   DBUG_VOID_RETURN;
    1401                 : }
    1402                 : 
    1403                 : 
    1404                 : /*
    1405                 :   Unregister request for a block
    1406                 :   linking it to the LRU chain if it's the last request
    1407                 : 
    1408                 :   SYNOPSIS
    1409                 :     unreg_request()
    1410                 :     pagecache            pointer to a page cache data structure
    1411                 :     block               pointer to the block to link to the LRU chain
    1412                 :     at_end              <-> to link the block at the end of the LRU chain
    1413                 : 
    1414                 :   RETURN VALUE
    1415                 :     none
    1416                 : 
    1417                 :   NOTES.
    1418                 :     Every linking to the LRU chain decrements by one a special block
    1419                 :     counter (if it's positive). If the at_end parameter is TRUE the block is
    1420                 :     added either at the end of warm sub-chain or at the end of hot sub-chain.
    1421                 :     It is added to the hot subchain if its counter is zero and number of
    1422                 :     blocks in warm sub-chain is not less than some low limit (determined by
    1423                 :     the division_limit parameter). Otherwise the block is added to the warm
    1424                 :     sub-chain. If the at_end parameter is FALSE the block is always added
    1425                 :     at beginning of the warm sub-chain.
    1426                 :     Thus a warm block can be promoted to the hot sub-chain when its counter
    1427                 :     becomes zero for the first time.
    1428                 :     At the same time  the block at the very beginning of the hot subchain
    1429                 :     might be moved to the beginning of the warm subchain if it stays untouched
    1430                 :     for a too long time (this time is determined by parameter age_threshold).
    1431                 : */
    1432                 : 
    1433                 : static void unreg_request(PAGECACHE *pagecache,
    1434                 :                           PAGECACHE_BLOCK_LINK *block, int at_end)
    1435            3954 : {
    1436            3954 :   DBUG_ENTER("unreg_request");
    1437            3954 :   DBUG_PRINT("enter", ("block 0x%lx (%u)  status: %x  reqs: %u",
    1438                 :                        (ulong)block, PCBLOCK_NUMBER(pagecache, block),
    1439                 :                        block->status, block->requests));
    1440            3954 :   PCBLOCK_INFO(block);
    1441            3954 :   DBUG_ASSERT(block->requests > 0);
    1442            3954 :   if (! --block->requests)
    1443                 :   {
    1444                 :     my_bool hot;
    1445            3954 :     if (block->hits_left)
    1446              12 :       block->hits_left--;
    1447            3954 :     hot= !block->hits_left && at_end &&
    1448                 :       pagecache->warm_blocks > pagecache->min_warm_blocks;
    1449            3954 :     if (hot)
    1450                 :     {
    1451               0 :       if (block->temperature == PCBLOCK_WARM)
    1452               0 :         pagecache->warm_blocks--;
    1453               0 :       block->temperature= PCBLOCK_HOT;
    1454               0 :       KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
    1455                 :                            pagecache->warm_blocks));
    1456                 :     }
    1457            3954 :     link_block(pagecache, block, hot, (my_bool)at_end);
    1458            3954 :     block->last_hit_time= pagecache->time;
    1459            3954 :     pagecache->time++;
    1460                 : 
    1461            3954 :     block= pagecache->used_ins;
    1462                 :     /* Check if we should link a hot block to the warm block */
    1463            3954 :     if (block && pagecache->time - block->last_hit_time >
    1464                 :         pagecache->age_threshold)
    1465                 :     {
    1466               0 :       unlink_block(pagecache, block);
    1467               0 :       link_block(pagecache, block, 0, 0);
    1468               0 :       if (block->temperature != PCBLOCK_WARM)
    1469                 :       {
    1470               0 :         pagecache->warm_blocks++;
    1471               0 :         block->temperature= PCBLOCK_WARM;
    1472                 :       }
    1473               0 :       KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
    1474                 :                            pagecache->warm_blocks));
    1475                 :     }
    1476                 :   }
    1477            3954 :   DBUG_VOID_RETURN;
    1478                 : }
    1479                 : 
    1480                 : /*
    1481                 :   Remove a reader of the page in block
    1482                 : */
    1483                 : 
    1484                 : static inline void remove_reader(PAGECACHE_BLOCK_LINK *block)
    1485            3852 : {
    1486            3852 :   DBUG_ENTER("remove_reader");
    1487            3852 :   PCBLOCK_INFO(block);
    1488            3852 :   DBUG_ASSERT(block->hash_link->requests > 0);
    1489                 : #ifdef THREAD
    1490            3852 :   if (! --block->hash_link->requests && block->condvar)
    1491               0 :     pagecache_pthread_cond_signal(block->condvar);
    1492                 : #else
    1493                 :   --block->hash_link->requests;
    1494                 : #endif
    1495            3852 :   DBUG_VOID_RETURN;
    1496                 : }
    1497                 : 
    1498                 : 
    1499                 : /*
    1500                 :   Wait until the last reader of the page in block
    1501                 :   signals on its termination
    1502                 : */
    1503                 : 
    1504                 : static inline void wait_for_readers(PAGECACHE *pagecache
    1505                 :                                     __attribute__((unused)),
    1506                 :                                     PAGECACHE_BLOCK_LINK *block)
    1507              49 : {
    1508                 : #ifdef THREAD
    1509              49 :   struct st_my_thread_var *thread= my_thread_var;
    1510              98 :   while (block->hash_link->requests)
    1511                 :   {
    1512               0 :     KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
    1513                 :                         ("suspend thread: %ld  block: %u",
    1514                 :                          thread->id, PCBLOCK_NUMBER(pagecache, block)));
    1515               0 :     block->condvar= &thread->suspend;
    1516               0 :     pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
    1517               0 :     block->condvar= NULL;
    1518                 :   }
    1519                 : #else
    1520                 :   KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0);
    1521                 : #endif
    1522                 : }
    1523                 : 
    1524                 : 
    1525                 : /*
    1526                 :   Add a hash link to a bucket in the hash_table
    1527                 : */
    1528                 : 
    1529                 : static inline void link_hash(PAGECACHE_HASH_LINK **start,
    1530                 :                              PAGECACHE_HASH_LINK *hash_link)
    1531              49 : {
    1532              49 :   if (*start)
    1533               0 :     (*start)->prev= &hash_link->next;
    1534              49 :   hash_link->next= *start;
    1535              49 :   hash_link->prev= start;
    1536              49 :   *start= hash_link;
    1537                 : }
    1538                 : 
    1539                 : 
    1540                 : /*
    1541                 :   Remove a hash link from the hash table
    1542                 : */
    1543                 : 
    1544                 : static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
    1545              49 : {
    1546              49 :   KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u  pos_ %lu  #requests=%u",
    1547                 :       (uint) hash_link->file.file, (ulong) hash_link->pageno,
    1548                 :       hash_link->requests));
    1549              49 :   KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
    1550              49 :   if ((*hash_link->prev= hash_link->next))
    1551               0 :     hash_link->next->prev= hash_link->prev;
    1552              49 :   hash_link->block= NULL;
    1553                 : #ifdef THREAD
    1554              49 :   if (pagecache->waiting_for_hash_link.last_thread)
    1555                 :   {
    1556                 :     /* Signal that a free hash link has appeared */
    1557                 :     struct st_my_thread_var *last_thread=
    1558               0 :                                pagecache->waiting_for_hash_link.last_thread;
    1559               0 :     struct st_my_thread_var *first_thread= last_thread->next;
    1560               0 :     struct st_my_thread_var *next_thread= first_thread;
    1561               0 :     PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info);
    1562                 :     struct st_my_thread_var *thread;
    1563                 : 
    1564               0 :     hash_link->file= first_page->file;
    1565               0 :     DBUG_ASSERT(first_page->pageno < ((ULL(1)) << 40));
    1566               0 :     hash_link->pageno= first_page->pageno;
    1567                 :     do
    1568                 :     {
    1569                 :       PAGECACHE_PAGE *page;
    1570               0 :       thread= next_thread;
    1571               0 :       page= (PAGECACHE_PAGE *) thread->opt_info;
    1572               0 :       next_thread= thread->next;
    1573                 :       /*
    1574                 :          We notify about the event all threads that ask
    1575                 :          for the same page as the first thread in the queue
    1576                 :       */
    1577               0 :       if (page->file.file == hash_link->file.file &&
    1578                 :           page->pageno == hash_link->pageno)
    1579                 :       {
    1580               0 :         KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
    1581               0 :         pagecache_pthread_cond_signal(&thread->suspend);
    1582               0 :         wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
    1583                 :       }
    1584                 :     }
    1585               0 :     while (thread != last_thread);
    1586               0 :     link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache,
    1587                 :                                                    hash_link->file,
    1588                 :                                                    hash_link->pageno)],
    1589                 :               hash_link);
    1590               0 :     return;
    1591                 :   }
    1592                 : #else /* THREAD */
    1593                 :   KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread));
    1594                 : #endif /* THREAD */
    1595              49 :   hash_link->next= pagecache->free_hash_list;
    1596              49 :   pagecache->free_hash_list= hash_link;
    1597                 : }
    1598                 : 
    1599                 : 
    1600                 : /*
    1601                 :   Get the hash link for the page if it is in the cache (do not put the
    1602                 :   page in the cache if it is absent there)
    1603                 : 
    1604                 :   SYNOPSIS
    1605                 :     get_present_hash_link()
    1606                 :     pagecache            Pagecache reference
    1607                 :     file                 file ID
    1608                 :     pageno               page number in the file
    1609                 :     start                where to put pointer to found hash bucket (for
    1610                 :                          direct referring it)
    1611                 : 
    1612                 :   RETURN
    1613                 :     found hashlink pointer
    1614                 : */
    1615                 : 
    1616                 : static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
    1617                 :                                                   PAGECACHE_FILE *file,
    1618                 :                                                   pgcache_page_no_t pageno,
    1619                 :                                                   PAGECACHE_HASH_LINK ***start)
    1620            3905 : {
    1621                 :   reg1 PAGECACHE_HASH_LINK *hash_link;
    1622                 : #if defined(PAGECACHE_DEBUG)
    1623                 :   int cnt;
    1624                 : #endif
    1625            3905 :   DBUG_ENTER("get_present_hash_link");
    1626                 : 
    1627            3905 :   KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u  pos: %lu",
    1628                 :                       (uint) file->file, (ulong) pageno));
    1629                 : 
    1630                 :   /*
    1631                 :      Find the bucket in the hash table for the pair (file, pageno);
    1632                 :      start contains the head of the bucket list,
    1633                 :      hash_link points to the first member of the list
    1634                 :   */
    1635            3905 :   hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache,
    1636                 :                                                             *file, pageno)]);
    1637                 : #if defined(PAGECACHE_DEBUG)
    1638                 :   cnt= 0;
    1639                 : #endif
    1640                 :   /* Look for an element for the pair (file, pageno) in the bucket chain */
    1641            7810 :   while (hash_link &&
    1642                 :          (hash_link->pageno != pageno ||
    1643                 :           hash_link->file.file != file->file))
    1644                 :   {
    1645               0 :     hash_link= hash_link->next;
    1646                 : #if defined(PAGECACHE_DEBUG)
    1647                 :     cnt++;
    1648                 :     if (! (cnt <= pagecache->hash_links_used))
    1649                 :     {
    1650                 :       int i;
    1651                 :       for (i=0, hash_link= **start ;
    1652                 :            i < cnt ; i++, hash_link= hash_link->next)
    1653                 :       {
    1654                 :         KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u  pos: %lu",
    1655                 :             (uint) hash_link->file.file, (ulong) hash_link->pageno));
    1656                 :       }
    1657                 :     }
    1658                 :     KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used);
    1659                 : #endif
    1660                 :   }
    1661            3905 :   if (hash_link)
    1662                 :   {
    1663                 :     /* Register the request for the page */
    1664            3856 :     hash_link->requests++;
    1665                 :   }
    1666                 :   /*
    1667                 :     As soon as the caller will release the page cache's lock, "hash_link"
    1668                 :     will be potentially obsolete (unusable) information.
    1669                 :   */
    1670            3905 :   DBUG_RETURN(hash_link);
    1671                 : }
    1672                 : 
    1673                 : 
    1674                 : /*
    1675                 :   Get the hash link for a page
    1676                 : */
    1677                 : 
    1678                 : static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache,
    1679                 :                                           PAGECACHE_FILE *file,
    1680                 :                                           pgcache_page_no_t pageno)
    1681            3905 : {
    1682                 :   reg1 PAGECACHE_HASH_LINK *hash_link;
    1683                 :   PAGECACHE_HASH_LINK **start;
    1684                 : 
    1685            3905 :   KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u  pos: %lu",
    1686                 :                       (uint) file->file, (ulong) pageno));
    1687                 : 
    1688            3905 : restart:
    1689                 :   /* try to find the page in the cache */
    1690            3905 :   hash_link= get_present_hash_link(pagecache, file, pageno,
    1691                 :                                    &start);
    1692            3905 :   if (!hash_link)
    1693                 :   {
    1694                 :     /* There is no hash link in the hash table for the pair (file, pageno) */
    1695              49 :     if (pagecache->free_hash_list)
    1696                 :     {
    1697              20 :       hash_link= pagecache->free_hash_list;
    1698              20 :       pagecache->free_hash_list= hash_link->next;
    1699                 :     }
    1700              29 :     else if (pagecache->hash_links_used < pagecache->hash_links)
    1701                 :     {
    1702              29 :       hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++];
    1703                 :     }
    1704                 :     else
    1705                 :     {
    1706                 : #ifdef THREAD
    1707                 :       /* Wait for a free hash link */
    1708               0 :       struct st_my_thread_var *thread= my_thread_var;
    1709                 :       PAGECACHE_PAGE page;
    1710               0 :       KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
    1711               0 :       page.file= *file;
    1712               0 :       page.pageno= pageno;
    1713               0 :       thread->opt_info= (void *) &page;
    1714               0 :       wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
    1715               0 :       KEYCACHE_DBUG_PRINT("get_hash_link: wait",
    1716                 :                         ("suspend thread %ld", thread->id));
    1717               0 :       pagecache_pthread_cond_wait(&thread->suspend,
    1718                 :                                  &pagecache->cache_lock);
    1719               0 :       thread->opt_info= NULL;
    1720                 : #else
    1721                 :       KEYCACHE_DBUG_ASSERT(0);
    1722                 : #endif
    1723               0 :       DBUG_PRINT("info", ("restarting..."));
    1724               0 :       goto restart;
    1725                 :     }
    1726              49 :     hash_link->file= *file;
    1727              49 :     DBUG_ASSERT(pageno < ((ULL(1)) << 40));
    1728              49 :     hash_link->pageno= pageno;
    1729              49 :     link_hash(start, hash_link);
    1730                 :     /* Register the request for the page */
    1731              49 :     hash_link->requests++;
    1732                 :   }
    1733                 : 
    1734            3905 :   return hash_link;
    1735                 : }
    1736                 : 
    1737                 : 
    1738                 : /*
    1739                 :   Get a block for the file page requested by a pagecache read/write operation;
    1740                 :   If the page is not in the cache return a free block, if there is none
    1741                 :   return the lru block after saving its buffer if the page is dirty.
    1742                 : 
    1743                 :   SYNOPSIS
    1744                 : 
    1745                 :     find_block()
    1746                 :       pagecache            pointer to a page cache data structure
    1747                 :       file                handler for the file to read page from
    1748                 :       pageno              number of the page in the file
    1749                 :       init_hits_left      how initialize the block counter for the page
    1750                 :       wrmode              <-> get for writing
    1751                 :       reg_req             Register request to thye page
    1752                 :       page_st        out  {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
    1753                 : 
    1754                 :   RETURN VALUE
    1755                 :     Pointer to the found block if successful, 0 - otherwise
    1756                 : 
    1757                 :   NOTES.
    1758                 :     For the page from file positioned at pageno the function checks whether
    1759                 :     the page is in the key cache specified by the first parameter.
    1760                 :     If this is the case it immediately returns the block.
    1761                 :     If not, the function first chooses  a block for this page. If there is
    1762                 :     no not used blocks in the key cache yet, the function takes the block
    1763                 :     at the very beginning of the warm sub-chain. It saves the page in that
    1764                 :     block if it's dirty before returning the pointer to it.
    1765                 :     The function returns in the page_st parameter the following values:
    1766                 :       PAGE_READ         - if page already in the block,
    1767                 :       PAGE_TO_BE_READ   - if it is to be read yet by the current thread
    1768                 :       WAIT_TO_BE_READ   - if it is to be read by another thread
    1769                 :     If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
    1770                 :     It might happen that there are no blocks in LRU chain (in warm part) -
    1771                 :     all blocks  are unlinked for some read/write operations. Then the function
    1772                 :     waits until first of this operations links any block back.
    1773                 : */
    1774                 : 
    1775                 : static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
    1776                 :                                         PAGECACHE_FILE *file,
    1777                 :                                         pgcache_page_no_t pageno,
    1778                 :                                         int init_hits_left,
    1779                 :                                         my_bool wrmode,
    1780                 :                                         my_bool reg_req,
    1781                 :                                         int *page_st)
    1782            3905 : {
    1783                 :   PAGECACHE_HASH_LINK *hash_link;
    1784                 :   PAGECACHE_BLOCK_LINK *block;
    1785            3905 :   int error= 0;
    1786                 :   int page_status;
    1787                 : 
    1788            3905 :   DBUG_ENTER("find_block");
    1789            3905 :   KEYCACHE_THREAD_TRACE("find_block:begin");
    1790            3905 :   DBUG_PRINT("enter", ("fd: %d  pos: %lu  wrmode: %d",
    1791                 :                        file->file, (ulong) pageno, wrmode));
    1792            3905 :   KEYCACHE_DBUG_PRINT("find_block", ("fd: %d  pos: %lu  wrmode: %d",
    1793                 :                                      file->file, (ulong) pageno,
    1794                 :                                      wrmode));
    1795                 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
    1796            3905 :   DBUG_EXECUTE("check_pagecache",
    1797                 :                test_key_cache(pagecache, "start of find_block", 0););
    1798                 : #endif
    1799                 : 
    1800            3905 : restart:
    1801                 :   /* Find the hash link for the requested page (file, pageno) */
    1802            3905 :   hash_link= get_hash_link(pagecache, file, pageno);
    1803                 : 
    1804            3905 :   page_status= -1;
    1805            3905 :   if ((block= hash_link->block) &&
    1806                 :       block->hash_link == hash_link && (block->status & PCBLOCK_READ))
    1807            3856 :     page_status= PAGE_READ;
    1808                 : 
    1809            3905 :   if (wrmode && pagecache->resize_in_flush)
    1810                 :   {
    1811                 :     /* This is a write request during the flush phase of a resize operation */
    1812                 : 
    1813               0 :     if (page_status != PAGE_READ)
    1814                 :     {
    1815                 :       /* We don't need the page in the cache: we are going to write on disk */
    1816               0 :       DBUG_ASSERT(hash_link->requests > 0);
    1817               0 :       hash_link->requests--;
    1818               0 :       unlink_hash(pagecache, hash_link);
    1819               0 :       return 0;
    1820                 :     }
    1821               0 :     if (!(block->status & PCBLOCK_IN_FLUSH))
    1822                 :     {
    1823               0 :       DBUG_ASSERT(hash_link->requests > 0);
    1824               0 :       hash_link->requests--;
    1825                 :       /*
    1826                 :         Remove block to invalidate the page in the block buffer
    1827                 :         as we are going to write directly on disk.
    1828                 :         Although we have an exclusive lock for the updated key part
    1829                 :         the control can be yielded by the current thread as we might
    1830                 :         have unfinished readers of other key parts in the block
    1831                 :         buffer. Still we are guaranteed not to have any readers
    1832                 :         of the key part we are writing into until the block is
    1833                 :         removed from the cache as we set the PCBLOCK_REASSIGNED
    1834                 :         flag (see the code below that handles reading requests).
    1835                 :       */
    1836               0 :       free_block(pagecache, block);
    1837               0 :       return 0;
    1838                 :     }
    1839                 :     /* Wait until the page is flushed on disk */
    1840               0 :     DBUG_ASSERT(hash_link->requests > 0);
    1841               0 :     hash_link->requests--;
    1842                 :     {
    1843                 : #ifdef THREAD
    1844               0 :       struct st_my_thread_var *thread= my_thread_var;
    1845               0 :       wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
    1846                 :       do
    1847                 :       {
    1848               0 :         KEYCACHE_DBUG_PRINT("find_block: wait",
    1849                 :                             ("suspend thread %ld", thread->id));
    1850               0 :         pagecache_pthread_cond_wait(&thread->suspend,
    1851                 :                                    &pagecache->cache_lock);
    1852                 :       }
    1853               0 :       while(thread->next);
    1854                 : #else
    1855                 :       KEYCACHE_DBUG_ASSERT(0);
    1856                 :       /*
    1857                 :         Given the use of "resize_in_flush", it seems impossible
    1858                 :         that this whole branch is ever entered in single-threaded case
    1859                 :         because "(wrmode && pagecache->resize_in_flush)" cannot be true.
    1860                 :         TODO: Check this, and then put the whole branch into the
    1861                 :         "#ifdef THREAD" guard.
    1862                 :       */
    1863                 : #endif
    1864                 :     }
    1865                 :     /* Invalidate page in the block if it has not been done yet */
    1866               0 :     if (block->status)
    1867               0 :       free_block(pagecache, block);
    1868               0 :     return 0;
    1869                 :   }
    1870                 : 
    1871            3905 :   if (page_status == PAGE_READ &&
    1872                 :       (block->status & (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)))
    1873                 :   {
    1874                 :     /* This is a request for a page to be removed from cache */
    1875                 : 
    1876               0 :     KEYCACHE_DBUG_PRINT("find_block",
    1877                 :                         ("request for old page in block: %u  "
    1878                 :                          "wrmode: %d  block->status: %d",
    1879                 :                          PCBLOCK_NUMBER(pagecache, block), wrmode,
    1880                 :                          block->status));
    1881                 :     /*
    1882                 :        Only reading requests can proceed until the old dirty page is flushed,
    1883                 :        all others are to be suspended, then resubmitted
    1884                 :     */
    1885               0 :     if (!wrmode && !(block->status & PCBLOCK_REASSIGNED))
    1886                 :     {
    1887               0 :       if (reg_req)
    1888               0 :         reg_requests(pagecache, block, 1);
    1889                 :     }
    1890                 :     else
    1891                 :     {
    1892               0 :       DBUG_ASSERT(hash_link->requests > 0);
    1893               0 :       hash_link->requests--;
    1894               0 :       KEYCACHE_DBUG_PRINT("find_block",
    1895                 :                           ("request waiting for old page to be saved"));
    1896                 :       {
    1897                 : #ifdef THREAD
    1898               0 :         struct st_my_thread_var *thread= my_thread_var;
    1899                 :         /* Put the request into the queue of those waiting for the old page */
    1900               0 :         wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
    1901                 :         /* Wait until the request can be resubmitted */
    1902                 :         do
    1903                 :         {
    1904               0 :           KEYCACHE_DBUG_PRINT("find_block: wait",
    1905                 :                               ("suspend thread %ld", thread->id));
    1906               0 :           pagecache_pthread_cond_wait(&thread->suspend,
    1907                 :                                      &pagecache->cache_lock);
    1908                 :         }
    1909               0 :         while(thread->next);
    1910                 : #else
    1911                 :         KEYCACHE_DBUG_ASSERT(0);
    1912                 :           /* No parallel requests in single-threaded case */
    1913                 : #endif
    1914                 :       }
    1915               0 :       KEYCACHE_DBUG_PRINT("find_block",
    1916                 :                           ("request for old page resubmitted"));
    1917               0 :       DBUG_PRINT("info", ("restarting..."));
    1918                 :       /* Resubmit the request */
    1919               0 :       goto restart;
    1920                 :     }
    1921                 :   }
    1922                 :   else
    1923                 :   {
    1924                 :     /* This is a request for a new page or for a page not to be removed */
    1925            3905 :     if (! block)
    1926                 :     {
    1927                 :       /* No block is assigned for the page yet */
    1928              49 :       if (pagecache->blocks_unused)
    1929                 :       {
    1930              49 :         if (pagecache->free_block_list)
    1931                 :         {
    1932                 :           /* There is a block in the free list. */
    1933              20 :           block= pagecache->free_block_list;
    1934              20 :           pagecache->free_block_list= block->next_used;
    1935              20 :           block->next_used= NULL;
    1936                 :         }
    1937                 :         else
    1938                 :         {
    1939                 :           /* There are some never used blocks, take first of them */
    1940              29 :           block= &pagecache->block_root[pagecache->blocks_used];
    1941              29 :           block->buffer= ADD_TO_PTR(pagecache->block_mem,
    1942                 :                                     ((ulong) pagecache->blocks_used*
    1943                 :                                      pagecache->block_size),
    1944                 :                                     uchar*);
    1945              29 :           pagecache->blocks_used++;
    1946                 :         }
    1947              49 :         pagecache->blocks_unused--;
    1948              49 :         DBUG_ASSERT(block->wlocks == 0);
    1949              49 :         DBUG_ASSERT(block->rlocks == 0);
    1950              49 :         DBUG_ASSERT(block->rlocks_queue == 0);
    1951              49 :         DBUG_ASSERT(block->pins == 0);
    1952              49 :         block->status= 0;
    1953                 : #ifndef DBUG_OFF
    1954              49 :         block->type= PAGECACHE_EMPTY_PAGE;
    1955                 : #endif
    1956              49 :         block->requests= 1;
    1957              49 :         block->temperature= PCBLOCK_COLD;
    1958              49 :         block->hits_left= init_hits_left;
    1959              49 :         block->last_hit_time= 0;
    1960              49 :         block->rec_lsn= LSN_MAX;
    1961              49 :         link_to_file_list(pagecache, block, file, 0);
    1962              49 :         block->hash_link= hash_link;
    1963              49 :         hash_link->block= block;
    1964              49 :         page_status= PAGE_TO_BE_READ;
    1965              49 :         DBUG_PRINT("info", ("page to be read set for page 0x%lx",
    1966                 :                             (ulong)block));
    1967              49 :         KEYCACHE_DBUG_PRINT("find_block",
    1968                 :                             ("got free or never used block %u",
    1969                 :                              PCBLOCK_NUMBER(pagecache, block)));
    1970                 :       }
    1971                 :       else
    1972                 :       {
    1973                 :         /* There are no never used blocks, use a block from the LRU chain */
    1974                 : 
    1975                 :         /*
    1976                 :           Wait until a new block is added to the LRU chain;
    1977                 :           several threads might wait here for the same page,
    1978                 :           all of them must get the same block
    1979                 :         */
    1980                 : 
    1981                 : #ifdef THREAD
    1982               0 :         if (! pagecache->used_last)
    1983                 :         {
    1984               0 :           struct st_my_thread_var *thread= my_thread_var;
    1985               0 :           thread->opt_info= (void *) hash_link;
    1986               0 :           wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
    1987                 :           do
    1988                 :           {
    1989               0 :             KEYCACHE_DBUG_PRINT("find_block: wait",
    1990                 :                                 ("suspend thread %ld", thread->id));
    1991               0 :             pagecache_pthread_cond_wait(&thread->suspend,
    1992                 :                                        &pagecache->cache_lock);
    1993                 :           }
    1994               0 :           while (thread->next);
    1995               0 :           thread->opt_info= NULL;
    1996                 :         }
    1997                 : #else
    1998                 :         KEYCACHE_DBUG_ASSERT(pagecache->used_last);
    1999                 : #endif
    2000               0 :         block= hash_link->block;
    2001               0 :         if (! block)
    2002                 :         {
    2003                 :           /*
    2004                 :              Take the first block from the LRU chain
    2005                 :              unlinking it from the chain
    2006                 :           */
    2007               0 :           block= pagecache->used_last->next_used;
    2008               0 :           block->hits_left= init_hits_left;
    2009               0 :           block->last_hit_time= 0;
    2010               0 :           if (reg_req)
    2011               0 :             reg_requests(pagecache, block, 1);
    2012               0 :           hash_link->block= block;
    2013                 :         }
    2014               0 :         PCBLOCK_INFO(block);
    2015               0 :         DBUG_ASSERT(block->wlocks == 0);
    2016               0 :         DBUG_ASSERT(block->rlocks == 0);
    2017               0 :         DBUG_ASSERT(block->rlocks_queue == 0);
    2018               0 :         DBUG_ASSERT(block->pins == 0);
    2019                 : 
    2020               0 :         if (block->hash_link != hash_link &&
    2021                 :             ! (block->status & PCBLOCK_IN_SWITCH) )
    2022                 :         {
    2023                 :           /* this is a primary request for a new page */
    2024               0 :           DBUG_ASSERT(block->wlocks == 0);
    2025               0 :           DBUG_ASSERT(block->rlocks == 0);
    2026               0 :           DBUG_ASSERT(block->rlocks_queue == 0);
    2027               0 :           DBUG_ASSERT(block->pins == 0);
    2028               0 :           block->status|= PCBLOCK_IN_SWITCH;
    2029                 : 
    2030               0 :           KEYCACHE_DBUG_PRINT("find_block",
    2031                 :                               ("got block %u for new page",
    2032                 :                                PCBLOCK_NUMBER(pagecache, block)));
    2033                 : 
    2034               0 :           if (block->status & PCBLOCK_CHANGED)
    2035                 :           {
    2036                 :             /* The block contains a dirty page - push it out of the cache */
    2037                 : 
    2038               0 :             KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
    2039                 : 
    2040               0 :             pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    2041                 :             /*
    2042                 :               The call is thread safe because only the current
    2043                 :               thread might change the block->hash_link value
    2044                 :             */
    2045               0 :             DBUG_ASSERT(block->pins == 0);
    2046               0 :             error= pagecache_fwrite(pagecache,
    2047                 :                                     &block->hash_link->file,
    2048                 :                                     block->buffer,
    2049                 :                                     block->hash_link->pageno,
    2050                 :                                     block->type,
    2051                 :                                     pagecache->readwrite_flags);
    2052               0 :             pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    2053               0 :             pagecache->global_cache_write++;
    2054                 :           }
    2055                 : 
    2056               0 :           block->status|= PCBLOCK_REASSIGNED;
    2057               0 :           if (block->hash_link)
    2058                 :           {
    2059                 :             /*
    2060                 :               Wait until all pending read requests
    2061                 :               for this page are executed
    2062                 :               (we could have avoided this waiting, if we had read
    2063                 :               a page in the cache in a sweep, without yielding control)
    2064                 :             */
    2065               0 :             wait_for_readers(pagecache, block);
    2066                 : 
    2067                 :             /* Remove the hash link for this page from the hash table */
    2068               0 :             unlink_hash(pagecache, block->hash_link);
    2069                 :             /* All pending requests for this page must be resubmitted */
    2070                 : #ifdef THREAD
    2071               0 :             if (block->wqueue[COND_FOR_SAVED].last_thread)
    2072               0 :               wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
    2073                 : #endif
    2074                 :           }
    2075               0 :           link_to_file_list(pagecache, block, file,
    2076                 :                             (my_bool)(block->hash_link ? 1 : 0));
    2077               0 :           PCBLOCK_INFO(block);
    2078               0 :           block->status= error ? PCBLOCK_ERROR : 0;
    2079               0 :           block->error=  (int16) my_errno;
    2080                 : #ifndef DBUG_OFF
    2081               0 :           block->type= PAGECACHE_EMPTY_PAGE;
    2082               0 :           if (error)
    2083               0 :             my_debug_put_break_here();
    2084                 : #endif
    2085               0 :           block->hash_link= hash_link;
    2086               0 :           page_status= PAGE_TO_BE_READ;
    2087               0 :           DBUG_PRINT("info", ("page to be read set for page 0x%lx",
    2088                 :                               (ulong)block));
    2089                 : 
    2090               0 :           KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
    2091               0 :           KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
    2092                 :         }
    2093                 :         else
    2094                 :         {
    2095                 :           /* This is for secondary requests for a new page only */
    2096               0 :           KEYCACHE_DBUG_PRINT("find_block",
    2097                 :                               ("block->hash_link: %p  hash_link: %p  "
    2098                 :                                "block->status: %u", block->hash_link,
    2099                 :                                hash_link, block->status ));
    2100               0 :           page_status= (((block->hash_link == hash_link) &&
    2101                 :                          (block->status & PCBLOCK_READ)) ?
    2102                 :                         PAGE_READ : PAGE_WAIT_TO_BE_READ);
    2103                 :         }
    2104                 :       }
    2105                 :     }
    2106                 :     else
    2107                 :     {
    2108            3856 :       if (reg_req)
    2109            3856 :         reg_requests(pagecache, block, 1);
    2110            3856 :       KEYCACHE_DBUG_PRINT("find_block",
    2111                 :                           ("block->hash_link: %p  hash_link: %p  "
    2112                 :                            "block->status: %u", block->hash_link,
    2113                 :                            hash_link, block->status ));
    2114            3856 :       page_status= (((block->hash_link == hash_link) &&
    2115                 :                      (block->status & PCBLOCK_READ)) ?
    2116                 :                     PAGE_READ : PAGE_WAIT_TO_BE_READ);
    2117                 :     }
    2118                 :   }
    2119                 : 
    2120            3905 :   KEYCACHE_DBUG_ASSERT(page_status != -1);
    2121            3905 :   *page_st= page_status;
    2122            3905 :   DBUG_PRINT("info",
    2123                 :              ("block: 0x%lx  fd: %u  pos: %lu  block->status: %u  page_status: %u",
    2124                 :               (ulong) block, (uint) file->file,
    2125                 :               (ulong) pageno, block->status, (uint) page_status));
    2126            3905 :   KEYCACHE_DBUG_PRINT("find_block",
    2127                 :                       ("block: 0x%lx  fd: %d  pos: %lu  block->status: %u  page_status: %d",
    2128                 :                        (ulong) block,
    2129                 :                        file->file, (ulong) pageno, block->status,
    2130                 :                        page_status));
    2131                 : 
    2132                 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
    2133            3905 :   DBUG_EXECUTE("check_pagecache",
    2134                 :                test_key_cache(pagecache, "end of find_block",0););
    2135                 : #endif
    2136            3905 :   KEYCACHE_THREAD_TRACE("find_block:end");
    2137            3905 :   DBUG_RETURN(block);
    2138                 : }
    2139                 : 
    2140                 : 
    2141                 : static void add_pin(PAGECACHE_BLOCK_LINK *block)
    2142            1934 : {
    2143            1934 :   DBUG_ENTER("add_pin");
    2144            1934 :   DBUG_PRINT("enter", ("block: 0x%lx  pins: %u",
    2145                 :                        (ulong) block,
    2146                 :                        block->pins));
    2147            1934 :   PCBLOCK_INFO(block);
    2148            1934 :   block->pins++;
    2149                 : #ifndef DBUG_OFF
    2150                 :   {
    2151                 :     PAGECACHE_PIN_INFO *info=
    2152            1934 :       (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0));
    2153            1934 :     info->thread= my_thread_var;
    2154            1934 :     info_link(&block->pin_list, info);
    2155                 :   }
    2156                 : #endif
    2157            1934 :   DBUG_VOID_RETURN;
    2158                 : }
    2159                 : 
    2160                 : static void remove_pin(PAGECACHE_BLOCK_LINK *block, my_bool any
    2161                 : #ifdef DBUG_OFF
    2162                 :                        __attribute__((unused))
    2163                 : #endif
    2164                 :                        )
    2165            1934 : {
    2166            1934 :   DBUG_ENTER("remove_pin");
    2167            1934 :   DBUG_PRINT("enter", ("block: 0x%lx  pins: %u  any: %d",
    2168                 :                        (ulong) block,
    2169                 :                        block->pins, (int)any));
    2170            1934 :   PCBLOCK_INFO(block);
    2171            1934 :   DBUG_ASSERT(block->pins > 0);
    2172            1934 :   block->pins--;
    2173                 : #ifndef DBUG_OFF
    2174                 :   {
    2175            1934 :     PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var, any);
    2176            1934 :     DBUG_ASSERT(info != 0);
    2177            1934 :     info_unlink(info);
    2178            1934 :     my_free(info, MYF(0));
    2179                 :   }
    2180                 : #endif
    2181            1934 :   DBUG_VOID_RETURN;
    2182                 : }
    2183                 : #ifndef DBUG_OFF
    2184                 : static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
    2185            3905 : {
    2186                 :   PAGECACHE_LOCK_INFO *info=
    2187            3905 :     (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0));
    2188            3905 :   info->thread= my_thread_var;
    2189            3905 :   info->write_lock= wl;
    2190            3905 :   info_link((PAGECACHE_PIN_INFO **)&block->lock_list,
    2191                 :             (PAGECACHE_PIN_INFO *)info);
    2192                 : }
    2193                 : static void info_remove_lock(PAGECACHE_BLOCK_LINK *block)
    2194            3905 : {
    2195                 :   PAGECACHE_LOCK_INFO *info=
    2196                 :     (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
    2197            3905 :                                      my_thread_var, FALSE);
    2198            3905 :   DBUG_ASSERT(info != 0);
    2199            3905 :   info_unlink((PAGECACHE_PIN_INFO *)info);
    2200            3905 :   my_free(info, MYF(0));
    2201                 : }
    2202                 : static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
    2203            1917 : {
    2204                 :   PAGECACHE_LOCK_INFO *info=
    2205                 :     (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
    2206            1917 :                                      my_thread_var, FALSE);
    2207            1917 :   DBUG_ASSERT(info != 0);
    2208            1917 :   DBUG_ASSERT(info->write_lock != wl);
    2209            1917 :   info->write_lock= wl;
    2210                 : }
    2211                 : #else
    2212                 : #define info_add_lock(B,W)
    2213                 : #define info_remove_lock(B)
    2214                 : #define info_change_lock(B,W)
    2215                 : #endif
    2216                 : 
    2217                 : 
    2218                 : /**
    2219                 :   @brief waiting for lock for read and write lock
    2220                 : 
    2221                 :   @parem pagecache       pointer to a page cache data structure
    2222                 :   @parem block           the block to work with
    2223                 :   @param file            file of the block when it was locked
    2224                 :   @param pageno          page number of the block when it was locked
    2225                 :   @param lock_type       MY_PTHREAD_LOCK_READ or MY_PTHREAD_LOCK_WRITE
    2226                 : 
    2227                 :   @retval 0 OK
    2228                 :   @retval 1 Can't lock this block, need retry
    2229                 : */
    2230                 : 
    2231                 : static my_bool pagecache_wait_lock(PAGECACHE *pagecache,
    2232                 :                                   PAGECACHE_BLOCK_LINK *block,
    2233                 :                                   PAGECACHE_FILE file,
    2234                 :                                   pgcache_page_no_t pageno,
    2235                 :                                   uint lock_type)
    2236               0 : {
    2237                 :   /* Lock failed we will wait */
    2238                 : #ifdef THREAD
    2239               0 :   struct st_my_thread_var *thread= my_thread_var;
    2240               0 :   DBUG_ENTER("pagecache_wait_lock");
    2241               0 :   DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block));
    2242               0 :   thread->lock_type= lock_type;
    2243               0 :   wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
    2244               0 :   dec_counter_for_resize_op(pagecache);
    2245                 :   do
    2246                 :   {
    2247               0 :     KEYCACHE_DBUG_PRINT("get_wrlock: wait",
    2248                 :                         ("suspend thread %ld", thread->id));
    2249               0 :     pagecache_pthread_cond_wait(&thread->suspend,
    2250                 :                                 &pagecache->cache_lock);
    2251                 :   }
    2252               0 :   while(thread->next);
    2253                 : #else
    2254                 :   DBUG_ASSERT(0);
    2255                 : #endif
    2256               0 :   PCBLOCK_INFO(block);
    2257               0 :   if ((block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH)) ||
    2258                 :       file.file != block->hash_link->file.file ||
    2259                 :       pageno != block->hash_link->pageno)
    2260                 :   {
    2261               0 :     DBUG_PRINT("info", ("the block 0x%lx changed => need retry "
    2262                 :                         "status: %x  files %d != %d or pages %lu != %lu",
    2263                 :                         (ulong)block, block->status,
    2264                 :                         file.file, block->hash_link->file.file,
    2265                 :                         (ulong) pageno, (ulong) block->hash_link->pageno));
    2266               0 :     DBUG_RETURN(1);
    2267                 :   }
    2268               0 :   DBUG_RETURN(0);
    2269                 : }
    2270                 : 
    2271                 : /**
    2272                 :   @brief Put on the block write lock
    2273                 : 
    2274                 :   @parem pagecache       pointer to a page cache data structure
    2275                 :   @parem block           the block to work with
    2276                 : 
    2277                 :   @note We have loose scheme for locking by the same thread:
    2278                 :     * Downgrade to read lock if no other locks are taken
    2279                 :     * Our scheme of locking allow for the same thread
    2280                 :       - the same kind of lock
    2281                 :       - taking read lock if write lock present
    2282                 :       - downgrading to read lock if still other place the same
    2283                 :         thread keep write lock
    2284                 :     * But unlock operation number should be the same to lock operation.
    2285                 :     * If we try to get read lock having active write locks we put read
    2286                 :       locks to queue, and as soon as write lock(s) gone the read locks
    2287                 :       from queue came in force.
    2288                 :     * If read lock is unlocked earlier then it came to force it
    2289                 :       just removed from the queue
    2290                 : 
    2291                 :   @retval 0 OK
    2292                 :   @retval 1 Can't lock this block, need retry
    2293                 : */
    2294                 : 
    2295                 : static my_bool get_wrlock(PAGECACHE *pagecache,
    2296                 :                           PAGECACHE_BLOCK_LINK *block)
    2297            1934 : {
    2298            1934 :   PAGECACHE_FILE file= block->hash_link->file;
    2299            1934 :   pgcache_page_no_t pageno= block->hash_link->pageno;
    2300            1934 :   pthread_t locker= pthread_self();
    2301            1934 :   DBUG_ENTER("get_wrlock");
    2302            1934 :   DBUG_PRINT("info", ("the block 0x%lx "
    2303                 :                       "files %d(%d)  pages %lu(%lu)",
    2304                 :                       (ulong) block,
    2305                 :                       file.file, block->hash_link->file.file,
    2306                 :                       (ulong) pageno, (ulong) block->hash_link->pageno));
    2307            1934 :   PCBLOCK_INFO(block);
    2308                 :   /*
    2309                 :     We assume that the same thread will try write lock on block on which it
    2310                 :     has already read lock.
    2311                 :   */
    2312            3868 :   while ((block->wlocks && !pthread_equal(block->write_locker, locker)) ||
    2313                 :          block->rlocks)
    2314                 :   {
    2315                 :     /* Lock failed we will wait */
    2316               0 :     if (pagecache_wait_lock(pagecache, block, file, pageno,
    2317                 :                            MY_PTHREAD_LOCK_WRITE))
    2318               0 :       DBUG_RETURN(1);
    2319                 :   }
    2320                 :   /* we are doing it by global cache mutex protection, so it is OK */
    2321            1934 :   block->wlocks++;
    2322            1934 :   block->write_locker= locker;
    2323            1934 :   DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block));
    2324            1934 :   DBUG_RETURN(0);
    2325                 : }
    2326                 : 
    2327                 : 
    2328                 : /*
    2329                 :   @brief Put on the block read lock
    2330                 : 
    2331                 :   @param pagecache       pointer to a page cache data structure
    2332                 :   @param block           the block to work with
    2333                 :   @param user_file       Unique handler per handler file. Used to check if
    2334                 :                          we request many write locks withing the same
    2335                 :                          statement
    2336                 : 
    2337                 :   @note see note for get_wrlock().
    2338                 : 
    2339                 :   @retvalue 0 OK
    2340                 :   @retvalue 1 Can't lock this block, need retry
    2341                 : */
    2342                 : 
    2343                 : static my_bool get_rdlock(PAGECACHE *pagecache,
    2344                 :                           PAGECACHE_BLOCK_LINK *block)
    2345            1971 : {
    2346            1971 :   PAGECACHE_FILE file= block->hash_link->file;
    2347            1971 :   pgcache_page_no_t pageno= block->hash_link->pageno;
    2348            1971 :   pthread_t locker= pthread_self();
    2349            1971 :   DBUG_ENTER("get_rdlock");
    2350            1971 :   DBUG_PRINT("info", ("the block 0x%lx "
    2351                 :                       "files %d(%d)  pages %lu(%lu)",
    2352                 :                       (ulong) block,
    2353                 :                       file.file, block->hash_link->file.file,
    2354                 :                       (ulong) pageno, (ulong) block->hash_link->pageno));
    2355            1971 :   PCBLOCK_INFO(block);
    2356            3942 :   while (block->wlocks && !pthread_equal(block->write_locker, locker))
    2357                 :   {
    2358                 :     /* Lock failed we will wait */
    2359               0 :     if (pagecache_wait_lock(pagecache, block, file, pageno,
    2360                 :                            MY_PTHREAD_LOCK_READ))
    2361               0 :       DBUG_RETURN(1);
    2362                 :   }
    2363                 :   /* we are doing it by global cache mutex protection, so it is OK */
    2364            1971 :   if (block->wlocks)
    2365                 :   {
    2366               0 :     DBUG_ASSERT(pthread_equal(block->write_locker, locker));
    2367               0 :     block->rlocks_queue++;
    2368               0 :     DBUG_PRINT("info", ("RD lock put into queue, block 0x%lx", (ulong)block));
    2369                 :   }
    2370                 :   else
    2371                 :   {
    2372            1971 :     block->rlocks++;
    2373            1971 :     DBUG_PRINT("info", ("RD lock set, block 0x%lx", (ulong)block));
    2374                 :   }
    2375            1971 :   DBUG_RETURN(0);
    2376                 : }
    2377                 : 
    2378                 : 
    2379                 : /*
    2380                 :   @brief Remove write lock from the block
    2381                 : 
    2382                 :   @param pagecache       pointer to a page cache data structure
    2383                 :   @param block           the block to work with
    2384                 :   @param read_lock       downgrade to read lock
    2385                 : 
    2386                 :   @note see note for get_wrlock().
    2387                 : */
    2388                 : 
    2389                 : static void release_wrlock(PAGECACHE_BLOCK_LINK *block, my_bool read_lock)
    2390            1934 : {
    2391            1934 :   DBUG_ENTER("release_wrlock");
    2392            1934 :   PCBLOCK_INFO(block);
    2393            1934 :   DBUG_ASSERT(block->wlocks > 0);
    2394            1934 :   DBUG_ASSERT(block->rlocks == 0);
    2395            1934 :   DBUG_ASSERT(block->pins > 0);
    2396            1934 :   if (read_lock)
    2397            1917 :     block->rlocks_queue++;
    2398            1934 :   if (block->wlocks == 1)
    2399                 :   {
    2400            1934 :     block->rlocks= block->rlocks_queue;
    2401            1934 :     block->rlocks_queue= 0;
    2402                 :   }
    2403            1934 :   block->wlocks--;
    2404            1934 :   if (block->wlocks > 0)
    2405               0 :     DBUG_VOID_RETURN;                      /* Multiple write locked */
    2406            1934 :   DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block));
    2407                 : #ifdef THREAD
    2408                 :   /* release all threads waiting for read lock or one waiting for write */
    2409            1934 :   if (block->wqueue[COND_FOR_WRLOCK].last_thread)
    2410               0 :     wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
    2411                 : #endif
    2412            1934 :   PCBLOCK_INFO(block);
    2413            1934 :   DBUG_VOID_RETURN;
    2414                 : }
    2415                 : 
    2416                 : /*
    2417                 :   @brief Remove read lock from the block
    2418                 : 
    2419                 :   @param pagecache       pointer to a page cache data structure
    2420                 :   @param block           the block to work with
    2421                 : 
    2422                 :   @note see note for get_wrlock().
    2423                 : */
    2424                 : 
    2425                 : static void release_rdlock(PAGECACHE_BLOCK_LINK *block)
    2426            3888 : {
    2427            3888 :   DBUG_ENTER("release_wrlock");
    2428            3888 :   PCBLOCK_INFO(block);
    2429            3888 :   if (block->wlocks)
    2430                 :   {
    2431               0 :     DBUG_ASSERT(pthread_equal(block->write_locker, pthread_self()));
    2432               0 :     DBUG_ASSERT(block->rlocks == 0);
    2433               0 :     DBUG_ASSERT(block->rlocks_queue > 0);
    2434               0 :     block->rlocks_queue--;
    2435               0 :     DBUG_PRINT("info", ("RD lock queue decreased, block 0x%lx", (ulong)block));
    2436               0 :     DBUG_VOID_RETURN;
    2437                 :   }
    2438            3888 :   DBUG_ASSERT(block->rlocks > 0);
    2439            3888 :   DBUG_ASSERT(block->rlocks_queue == 0);
    2440            3888 :   block->rlocks--;
    2441            3888 :   DBUG_PRINT("info", ("RD lock decreased, block 0x%lx", (ulong)block));
    2442            3888 :   if (block->rlocks > 0)
    2443               0 :     DBUG_VOID_RETURN;                      /* Multiple write locked */
    2444            3888 :   DBUG_PRINT("info", ("RD lock reset, block 0x%lx", (ulong)block));
    2445                 : #ifdef THREAD
    2446                 :   /* release all threads waiting for read lock or one waiting for write */
    2447            3888 :   if (block->wqueue[COND_FOR_WRLOCK].last_thread)
    2448               0 :     wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
    2449                 : #endif
    2450            3888 :   PCBLOCK_INFO(block);
    2451            3888 :   DBUG_VOID_RETURN;
    2452                 : }
    2453                 : 
    2454                 : /**
    2455                 :   @brief Try to lock/unlock and pin/unpin the block
    2456                 : 
    2457                 :   @param pagecache       pointer to a page cache data structure
    2458                 :   @param block           the block to work with
    2459                 :   @param lock            lock change mode
    2460                 :   @param pin             pinchange mode
    2461                 :   @param file            File handler requesting pin
    2462                 :   @param any             allow unpinning block pinned by any thread; possible
    2463                 :                          only if not locked, see pagecache_unlock_by_link()
    2464                 : 
    2465                 :   @retval 0 OK
    2466                 :   @retval 1 Try to lock the block failed
    2467                 : */
    2468                 : 
    2469                 : static my_bool make_lock_and_pin(PAGECACHE *pagecache,
    2470                 :                                  PAGECACHE_BLOCK_LINK *block,
    2471                 :                                  enum pagecache_page_lock lock,
    2472                 :                                  enum pagecache_page_pin pin,
    2473                 :                                  my_bool any)
    2474           11608 : {
    2475           11608 :   DBUG_ENTER("make_lock_and_pin");
    2476                 : 
    2477           11608 :   DBUG_PRINT("enter", ("block: 0x%lx", (ulong)block));
    2478                 : #ifndef DBUG_OFF
    2479           11608 :   if (block)
    2480                 :   {
    2481           11608 :     DBUG_PRINT("enter", ("block: 0x%lx (%u)  wrlocks: %u  rdlocks: %u  "
    2482                 :                          "rdlocks_q: %u  pins: %u  lock: %s  pin: %s any %d",
    2483                 :                          (ulong)block, PCBLOCK_NUMBER(pagecache, block),
    2484                 :                          block->wlocks, block->rlocks, block->rlocks_queue,
    2485                 :                          block->pins,
    2486                 :                          page_cache_page_lock_str[lock],
    2487                 :                          page_cache_page_pin_str[pin], (int)any));
    2488           11608 :     PCBLOCK_INFO(block);
    2489                 :   }
    2490                 : #endif
    2491                 : 
    2492           11608 :   DBUG_ASSERT(!any ||
    2493                 :               ((lock == PAGECACHE_LOCK_LEFT_UNLOCKED) &&
    2494                 :                (pin == PAGECACHE_UNPIN)));
    2495                 : 
    2496           11608 :   switch (lock) {
    2497                 :   case PAGECACHE_LOCK_WRITE:               /* free  -> write */
    2498                 :     /* Writelock and pin the buffer */
    2499            1934 :     if (get_wrlock(pagecache, block))
    2500                 :     {
    2501                 :       /* Couldn't lock because block changed status => need retry */
    2502            1934 :       goto retry;
    2503                 :     }
    2504                 : 
    2505                 :     /* The cache is locked so nothing afraid of */
    2506            1934 :     add_pin(block);
    2507            1934 :     info_add_lock(block, 1);
    2508            1934 :     break;
    2509                 :   case PAGECACHE_LOCK_WRITE_TO_READ:       /* write -> read  */
    2510                 :   case PAGECACHE_LOCK_WRITE_UNLOCK:        /* write -> free  */
    2511                 :     /* Removes write lock and puts read lock */
    2512            1934 :     release_wrlock(block, lock == PAGECACHE_LOCK_WRITE_TO_READ);
    2513                 :     /* fall through */
    2514                 :   case PAGECACHE_LOCK_READ_UNLOCK:         /* read  -> free  */
    2515            5822 :     if (lock == PAGECACHE_LOCK_READ_UNLOCK)
    2516            3888 :       release_rdlock(block);
    2517                 :     /* fall through */
    2518                 :   case PAGECACHE_LOCK_LEFT_READLOCKED:     /* read  -> read  */
    2519            5822 :     if (pin == PAGECACHE_UNPIN)
    2520                 :     {
    2521            1934 :       remove_pin(block, FALSE);
    2522                 :     }
    2523            5822 :     if (lock == PAGECACHE_LOCK_WRITE_TO_READ)
    2524                 :     {
    2525            1917 :       info_change_lock(block, 0);
    2526                 :     }
    2527            3905 :     else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
    2528                 :              lock == PAGECACHE_LOCK_READ_UNLOCK)
    2529                 :     {
    2530            3905 :       info_remove_lock(block);
    2531                 :     }
    2532                 :     break;
    2533                 :   case PAGECACHE_LOCK_READ:                /* free  -> read  */
    2534            1971 :     if (get_rdlock(pagecache, block))
    2535                 :     {
    2536                 :       /* Couldn't lock because block changed status => need retry */
    2537            1971 :       goto retry;
    2538                 :     }
    2539                 : 
    2540            1971 :     if (pin == PAGECACHE_PIN)
    2541                 :     {
    2542                 :       /* The cache is locked so nothing afraid off */
    2543               0 :       add_pin(block);
    2544                 :     }
    2545            1971 :     info_add_lock(block, 0);
    2546            1971 :     break;
    2547                 :   case PAGECACHE_LOCK_LEFT_UNLOCKED:       /* free  -> free  */
    2548               0 :     if (pin == PAGECACHE_UNPIN)
    2549                 :     {
    2550               0 :       remove_pin(block, any);
    2551                 :     }
    2552                 :     /* fall through */
    2553                 :   case PAGECACHE_LOCK_LEFT_WRITELOCKED:    /* write -> write */
    2554                 :     break; /* do nothing */
    2555                 :   default:
    2556               0 :     DBUG_ASSERT(0); /* Never should happened */
    2557                 :   }
    2558                 : 
    2559                 : #ifndef DBUG_OFF
    2560           11608 :   if (block)
    2561           11608 :     PCBLOCK_INFO(block);
    2562                 : #endif
    2563           11608 :   DBUG_RETURN(0);
    2564               0 : retry:
    2565               0 :   DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block));
    2566               0 :   PCBLOCK_INFO(block);
    2567               0 :   DBUG_ASSERT(block->hash_link->requests > 0);
    2568               0 :   block->hash_link->requests--;
    2569               0 :   PCBLOCK_INFO(block);
    2570               0 :   DBUG_RETURN(1);
    2571                 : 
    2572                 : }
    2573                 : 
    2574                 : 
    2575                 : /*
    2576                 :   Read into a key cache block buffer from disk.
    2577                 : 
    2578                 :   SYNOPSIS
    2579                 : 
    2580                 :     read_block()
    2581                 :       pagecache           pointer to a page cache data structure
    2582                 :       block               block to which buffer the data is to be read
    2583                 :       primary             <-> the current thread will read the data
    2584                 : 
    2585                 :   RETURN VALUE
    2586                 :     None
    2587                 : 
    2588                 :   NOTES.
    2589                 :     The function either reads a page data from file to the block buffer,
    2590                 :     or waits until another thread reads it. What page to read is determined
    2591                 :     by a block parameter - reference to a hash link for this page.
    2592                 :     If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
    2593                 : 
    2594                 :     On entry cache_lock is locked
    2595                 : */
    2596                 : 
    2597                 : static void read_block(PAGECACHE *pagecache,
    2598                 :                        PAGECACHE_BLOCK_LINK *block,
    2599                 :                        my_bool primary)
    2600               9 : {
    2601                 : 
    2602               9 :   DBUG_ENTER("read_block");
    2603               9 :   DBUG_PRINT("enter", ("read block: 0x%lx  primary: %d",
    2604                 :                        (ulong)block, primary));
    2605               9 :   if (primary)
    2606                 :   {
    2607                 :     size_t error;
    2608                 :     /*
    2609                 :       This code is executed only by threads
    2610                 :       that submitted primary requests
    2611                 :     */
    2612                 : 
    2613               9 :     pagecache->global_cache_read++;
    2614                 :     /* Page is not in buffer yet, is to be read from disk */
    2615               9 :     pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    2616                 :     /*
    2617                 :       Here other threads may step in and register as secondary readers.
    2618                 :       They will register in block->wqueue[COND_FOR_REQUESTED].
    2619                 :     */
    2620               9 :     error= pagecache_fread(pagecache, &block->hash_link->file,
    2621                 :                            block->buffer,
    2622                 :                            block->hash_link->pageno,
    2623                 :                            pagecache->readwrite_flags);
    2624               9 :     pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    2625               9 :     if (error)
    2626                 :     {
    2627               0 :       block->status|= PCBLOCK_ERROR;
    2628               0 :       block->error=   (int16) my_errno;
    2629               0 :       my_debug_put_break_here();
    2630                 :     }
    2631                 :     else
    2632                 :     {
    2633               9 :       block->status|= PCBLOCK_READ;
    2634               9 :       if ((*block->hash_link->file.read_callback)(block->buffer,
    2635                 :                                                   block->hash_link->pageno,
    2636                 :                                                   block->hash_link->
    2637                 :                                                   file.callback_data))
    2638                 :       {
    2639               0 :         DBUG_PRINT("error", ("read callback problem"));
    2640               0 :         block->status|= PCBLOCK_ERROR;
    2641               0 :         block->error=  (int16) my_errno;
    2642               0 :         my_debug_put_break_here();
    2643                 :       }
    2644                 :     }
    2645               9 :     DBUG_PRINT("read_block",
    2646                 :                ("primary request: new page in cache"));
    2647                 :     /* Signal that all pending requests for this page now can be processed */
    2648                 : #ifdef THREAD
    2649               9 :     if (block->wqueue[COND_FOR_REQUESTED].last_thread)
    2650               0 :       wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
    2651                 : #endif
    2652                 :   }
    2653                 :   else
    2654                 :   {
    2655                 :     /*
    2656                 :       This code is executed only by threads
    2657                 :       that submitted secondary requests
    2658                 :     */
    2659                 : 
    2660                 : #ifdef THREAD
    2661               0 :       struct st_my_thread_var *thread= my_thread_var;
    2662                 :       /* Put the request into a queue and wait until it can be processed */
    2663               0 :       wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
    2664                 :       do
    2665                 :       {
    2666               0 :         DBUG_PRINT("read_block: wait",
    2667                 :                   ("suspend thread %ld", thread->id));
    2668               0 :         pagecache_pthread_cond_wait(&thread->suspend,
    2669                 :                                    &pagecache->cache_lock);
    2670                 :       }
    2671               0 :       while (thread->next);
    2672                 : #else
    2673                 :       KEYCACHE_DBUG_ASSERT(0);
    2674                 :       /* No parallel requests in single-threaded case */
    2675                 : #endif
    2676               0 :     DBUG_PRINT("read_block",
    2677                 :                ("secondary request: new page in cache"));
    2678                 :   }
    2679               9 :   DBUG_VOID_RETURN;
    2680                 : }
    2681                 : 
    2682                 : 
    2683                 : /**
    2684                 :    @brief Set LSN on the page to the given one if the given LSN is bigger
    2685                 : 
    2686                 :    @param  pagecache        pointer to a page cache data structure
    2687                 :    @param  lsn              LSN to set
    2688                 :    @param  block            block to check and set
    2689                 : */
    2690                 : 
    2691                 : static void check_and_set_lsn(PAGECACHE *pagecache,
    2692                 :                               LSN lsn, PAGECACHE_BLOCK_LINK *block)
    2693               0 : {
    2694                 :   LSN old;
    2695               0 :   DBUG_ENTER("check_and_set_lsn");
    2696                 :   /*
    2697                 :     In recovery, we can _ma_unpin_all_pages() to put a LSN on page, though
    2698                 :     page would be PAGECACHE_PLAIN_PAGE (transactionality temporarily disabled
    2699                 :     to not log REDOs).
    2700                 :   */
    2701               0 :   DBUG_ASSERT((block->type == PAGECACHE_LSN_PAGE) || maria_in_recovery);
    2702               0 :   old= lsn_korr(block->buffer);
    2703               0 :   DBUG_PRINT("info", ("old lsn: (%lu, 0x%lx)  new lsn: (%lu, 0x%lx)",
    2704                 :                       LSN_IN_PARTS(old), LSN_IN_PARTS(lsn)));
    2705               0 :   if (cmp_translog_addr(lsn, old) > 0)
    2706                 :   {
    2707                 : 
    2708               0 :     DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE);
    2709               0 :     lsn_store(block->buffer, lsn);
    2710                 :     /* we stored LSN in page so we dirtied it */
    2711               0 :     if (!(block->status & PCBLOCK_CHANGED))
    2712               0 :       link_to_changed_list(pagecache, block);
    2713                 :   }
    2714               0 :   DBUG_VOID_RETURN;
    2715                 : }
    2716                 : 
    2717                 : 
    2718                 : /**
    2719                 :   @brief Unlock/unpin page and put LSN stamp if it need
    2720                 : 
    2721                 :   @param pagecache      pointer to a page cache data structure
    2722                 :   @pagam file           handler for the file for the block of data to be read
    2723                 :   @param pageno         number of the block of data in the file
    2724                 :   @param lock           lock change
    2725                 :   @param pin            pin page
    2726                 :   @param first_REDO_LSN_for_page do not set it if it is zero
    2727                 :   @param lsn            if it is not LSN_IMPOSSIBLE (0) and it
    2728                 :                         is bigger then LSN on the page it will be written on
    2729                 :                         the page
    2730                 :   @param was_changed    should be true if the page was write locked with
    2731                 :                         direct link giving and the page was changed
    2732                 : 
    2733                 :   @note
    2734                 :     Pininig uses requests registration mechanism it works following way:
    2735                 :                                 | beginnig    | ending        |
    2736                 :                                 | of func.    | of func.      |
    2737                 :     ----------------------------+-------------+---------------+
    2738                 :     PAGECACHE_PIN_LEFT_PINNED   |      -      |       -       |
    2739                 :     PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request |
    2740                 :     PAGECACHE_PIN               | reg request |       -       |
    2741                 :     PAGECACHE_UNPIN             |      -      | unreg request |
    2742                 : 
    2743                 : 
    2744                 : */
    2745                 : 
    2746                 : void pagecache_unlock(PAGECACHE *pagecache,
    2747                 :                       PAGECACHE_FILE *file,
    2748                 :                       pgcache_page_no_t pageno,
    2749                 :                       enum pagecache_page_lock lock,
    2750                 :                       enum pagecache_page_pin pin,
    2751                 :                       LSN first_REDO_LSN_for_page,
    2752                 :                       LSN lsn, my_bool was_changed)
    2753               0 : {
    2754                 :   PAGECACHE_BLOCK_LINK *block;
    2755                 :   int page_st;
    2756               0 :   DBUG_ENTER("pagecache_unlock");
    2757               0 :   DBUG_PRINT("enter", ("fd: %u  page: %lu  %s  %s",
    2758                 :                        (uint) file->file, (ulong) pageno,
    2759                 :                        page_cache_page_lock_str[lock],
    2760                 :                        page_cache_page_pin_str[pin]));
    2761                 :   /* we do not allow any lock/pin increasing here */
    2762               0 :   DBUG_ASSERT(pin != PAGECACHE_PIN);
    2763               0 :   DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
    2764               0 :   DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
    2765                 : 
    2766               0 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    2767                 :   /*
    2768                 :     As soon as we keep lock cache can be used, and we have lock because want
    2769                 :     to unlock.
    2770                 :   */
    2771               0 :   DBUG_ASSERT(pagecache->can_be_used);
    2772                 : 
    2773               0 :   inc_counter_for_resize_op(pagecache);
    2774                 :   /* See NOTE for pagecache_unlock about registering requests */
    2775               0 :   block= find_block(pagecache, file, pageno, 0, 0,
    2776                 :                     pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st);
    2777               0 :   PCBLOCK_INFO(block);
    2778               0 :   DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
    2779               0 :   if (first_REDO_LSN_for_page)
    2780                 :   {
    2781               0 :     DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK);
    2782               0 :     DBUG_ASSERT(pin == PAGECACHE_UNPIN);
    2783               0 :     pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
    2784                 :   }
    2785               0 :   if (lsn != LSN_IMPOSSIBLE)
    2786               0 :     check_and_set_lsn(pagecache, lsn, block);
    2787                 : 
    2788                 :   /* if we lock for write we must link the block to changed blocks */
    2789               0 :   DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
    2790                 :               (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
    2791                 :                lock == PAGECACHE_LOCK_WRITE_TO_READ ||
    2792                 :                lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
    2793                 :   /*
    2794                 :     if was_changed then status should be PCBLOCK_DIRECT_W or marked
    2795                 :     as dirty
    2796                 :   */
    2797               0 :   DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
    2798                 :               (block->status & PCBLOCK_CHANGED));
    2799               0 :   if ((block->status & PCBLOCK_DIRECT_W) &&
    2800                 :       (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
    2801                 :        lock == PAGECACHE_LOCK_WRITE_TO_READ))
    2802                 :   {
    2803               0 :     if (!(block->status & PCBLOCK_CHANGED) && was_changed)
    2804               0 :       link_to_changed_list(pagecache, block);
    2805               0 :     block->status&= ~PCBLOCK_DIRECT_W;
    2806               0 :     DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
    2807                 :                         (ulong) block));
    2808                 :   }
    2809                 : 
    2810               0 :   if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
    2811                 :   {
    2812               0 :     DBUG_ASSERT(0); /* should not happend */
    2813                 :   }
    2814                 : 
    2815               0 :   remove_reader(block);
    2816                 :   /*
    2817                 :     Link the block into the LRU chain if it's the last submitted request
    2818                 :     for the block and block will not be pinned.
    2819                 :     See NOTE for pagecache_unlock about registering requests.
    2820                 :   */
    2821               0 :   if (pin != PAGECACHE_PIN_LEFT_PINNED)
    2822               0 :     unreg_request(pagecache, block, 1);
    2823                 : 
    2824               0 :   dec_counter_for_resize_op(pagecache);
    2825                 : 
    2826               0 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    2827                 : 
    2828               0 :   DBUG_VOID_RETURN;
    2829                 : }
    2830                 : 
    2831                 : 
    2832                 : /*
    2833                 :   Unpin page
    2834                 : 
    2835                 :   SYNOPSIS
    2836                 :     pagecache_unpin()
    2837                 :     pagecache           pointer to a page cache data structure
    2838                 :     file                handler for the file for the block of data to be read
    2839                 :     pageno              number of the block of data in the file
    2840                 :     lsn                 if it is not LSN_IMPOSSIBLE (0) and it
    2841                 :                         is bigger then LSN on the page it will be written on
    2842                 :                         the page
    2843                 : */
    2844                 : 
    2845                 : void pagecache_unpin(PAGECACHE *pagecache,
    2846                 :                      PAGECACHE_FILE *file,
    2847                 :                      pgcache_page_no_t pageno,
    2848                 :                      LSN lsn)
    2849               0 : {
    2850                 :   PAGECACHE_BLOCK_LINK *block;
    2851                 :   int page_st;
    2852               0 :   DBUG_ENTER("pagecache_unpin");
    2853               0 :   DBUG_PRINT("enter", ("fd: %u  page: %lu",
    2854                 :                        (uint) file->file, (ulong) pageno));
    2855               0 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    2856                 :   /*
    2857                 :     As soon as we keep lock cache can be used, and we have lock bacause want
    2858                 :     aunlock.
    2859                 :   */
    2860               0 :   DBUG_ASSERT(pagecache->can_be_used);
    2861                 : 
    2862               0 :   inc_counter_for_resize_op(pagecache);
    2863                 :   /* See NOTE for pagecache_unlock about registering requests */
    2864               0 :   block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
    2865               0 :   DBUG_ASSERT(block != 0);
    2866               0 :   DBUG_ASSERT(page_st == PAGE_READ);
    2867                 :   /* we can't unpin such page without unlock */
    2868               0 :   DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
    2869                 : 
    2870               0 :   if (lsn != LSN_IMPOSSIBLE)
    2871               0 :     check_and_set_lsn(pagecache, lsn, block);
    2872                 : 
    2873                 :   /*
    2874                 :     we can just unpin only with keeping read lock because:
    2875                 :     a) we can't pin without any lock
    2876                 :     b) we can't unpin keeping write lock
    2877                 :   */
    2878               0 :   if (make_lock_and_pin(pagecache, block,
    2879                 :                         PAGECACHE_LOCK_LEFT_READLOCKED,
    2880                 :                         PAGECACHE_UNPIN, FALSE))
    2881               0 :     DBUG_ASSERT(0);                           /* should not happend */
    2882                 : 
    2883               0 :   remove_reader(block);
    2884                 :   /*
    2885                 :     Link the block into the LRU chain if it's the last submitted request
    2886                 :     for the block and block will not be pinned.
    2887                 :     See NOTE for pagecache_unlock about registering requests
    2888                 :   */
    2889               0 :   unreg_request(pagecache, block, 1);
    2890                 : 
    2891               0 :   dec_counter_for_resize_op(pagecache);
    2892                 : 
    2893               0 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    2894                 : 
    2895               0 :   DBUG_VOID_RETURN;
    2896                 : }
    2897                 : 
    2898                 : 
    2899                 : /**
    2900                 :   @brief Unlock/unpin page and put LSN stamp if it need
    2901                 :   (uses direct block/page pointer)
    2902                 : 
    2903                 :   @param pagecache       pointer to a page cache data structure
    2904                 :   @param link            direct link to page (returned by read or write)
    2905                 :   @param lock            lock change
    2906                 :   @param pin             pin page
    2907                 :   @param first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0)
    2908                 :   @param lsn             if it is not LSN_IMPOSSIBLE and it is bigger then
    2909                 :                          LSN on the page it will be written on the page
    2910                 :   @param was_changed     should be true if the page was write locked with
    2911                 :                          direct link giving and the page was changed
    2912                 :   @param any             allow unpinning block pinned by any thread; possible
    2913                 :                          only if not locked
    2914                 : 
    2915                 :   @note 'any' is a hack so that _ma_bitmap_unpin_all() is allowed to unpin
    2916                 :   non-locked bitmap pages pinned by other threads. Because it always uses
    2917                 :   PAGECACHE_LOCK_LEFT_UNLOCKED and PAGECACHE_UNPIN
    2918                 :   (see write_changed_bitmap()), the hack is limited to these conditions.
    2919                 : */
    2920                 : 
    2921                 : void pagecache_unlock_by_link(PAGECACHE *pagecache,
    2922                 :                               PAGECACHE_BLOCK_LINK *block,
    2923                 :                               enum pagecache_page_lock lock,
    2924                 :                               enum pagecache_page_pin pin,
    2925                 :                               LSN first_REDO_LSN_for_page,
    2926                 :                               LSN lsn, my_bool was_changed,
    2927                 :                               my_bool any)
    2928            3798 : {
    2929            3798 :   DBUG_ENTER("pagecache_unlock_by_link");
    2930            3798 :   DBUG_PRINT("enter", ("block: 0x%lx  fd: %u  page: %lu  changed: %d  %s  %s",
    2931                 :                        (ulong) block,
    2932                 :                        (uint) block->hash_link->file.file,
    2933                 :                        (ulong) block->hash_link->pageno, was_changed,
    2934                 :                        page_cache_page_lock_str[lock],
    2935                 :                        page_cache_page_pin_str[pin]));
    2936                 :   /*
    2937                 :     We do not allow any lock/pin increasing here and page can't be
    2938                 :     unpinned because we use direct link.
    2939                 :   */
    2940            3798 :   DBUG_ASSERT(pin != PAGECACHE_PIN);
    2941            3798 :   DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED);
    2942            3798 :   DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
    2943            3798 :   DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
    2944            3798 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    2945            3798 :   if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
    2946                 :       lock == PAGECACHE_LOCK_READ_UNLOCK)
    2947                 :   {
    2948               0 :     if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
    2949               0 :       DBUG_ASSERT(0);                         /* should not happend */
    2950               0 :     pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    2951               0 :     DBUG_VOID_RETURN;
    2952                 :   }
    2953                 : 
    2954                 :   /*
    2955                 :     As soon as we keep lock cache can be used, and we have lock because want
    2956                 :     unlock.
    2957                 :   */
    2958            3798 :   DBUG_ASSERT(pagecache->can_be_used);
    2959                 : 
    2960            3798 :   inc_counter_for_resize_op(pagecache);
    2961            3798 :   if (was_changed)
    2962                 :   {
    2963            3798 :     if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE)
    2964                 :     {
    2965                 :       /*
    2966                 :         LOCK_READ_UNLOCK is ok here as the page may have first locked
    2967                 :         with WRITE lock that was temporarly converted to READ lock before
    2968                 :         it's unpinned
    2969                 :       */
    2970               0 :       DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
    2971                 :                   lock == PAGECACHE_LOCK_READ_UNLOCK);
    2972               0 :       DBUG_ASSERT(pin == PAGECACHE_UNPIN);
    2973               0 :       pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
    2974                 :     }
    2975            3798 :     if (lsn != LSN_IMPOSSIBLE)
    2976               0 :       check_and_set_lsn(pagecache, lsn, block);
    2977                 :     /*
    2978                 :       Reset error flag. Mark also that page is active; This may not have
    2979                 :       been the case if there was an error reading the page
    2980                 :     */
    2981            3798 :     block->status= (block->status & ~PCBLOCK_ERROR) | PCBLOCK_READ;
    2982                 :   }
    2983                 : 
    2984                 :   /* if we lock for write we must link the block to changed blocks */
    2985            3798 :   DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
    2986                 :               (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
    2987                 :                lock == PAGECACHE_LOCK_WRITE_TO_READ ||
    2988                 :                lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
    2989                 :   /*
    2990                 :     If was_changed then status should be PCBLOCK_DIRECT_W or marked
    2991                 :     as dirty
    2992                 :   */
    2993            3798 :   DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
    2994                 :               (block->status & PCBLOCK_CHANGED));
    2995            3798 :   if ((block->status & PCBLOCK_DIRECT_W) &&
    2996                 :       (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
    2997                 :        lock == PAGECACHE_LOCK_WRITE_TO_READ))
    2998                 :   {
    2999               0 :     if (!(block->status & PCBLOCK_CHANGED) && was_changed)
    3000               0 :       link_to_changed_list(pagecache, block);
    3001               0 :     block->status&= ~PCBLOCK_DIRECT_W;
    3002               0 :     DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
    3003                 :                         (ulong) block));
    3004                 :   }
    3005                 : 
    3006            3798 :   if (make_lock_and_pin(pagecache, block, lock, pin, any))
    3007               0 :     DBUG_ASSERT(0);                           /* should not happend */
    3008                 : 
    3009                 :   /*
    3010                 :     Link the block into the LRU chain if it's the last submitted request
    3011                 :     for the block and block will not be pinned.
    3012                 :     See NOTE for pagecache_unlock about registering requests.
    3013                 :   */
    3014            3798 :   if (pin != PAGECACHE_PIN_LEFT_PINNED)
    3015            1917 :     unreg_request(pagecache, block, 1);
    3016                 : 
    3017            3798 :   dec_counter_for_resize_op(pagecache);
    3018                 : 
    3019            3798 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3020                 : 
    3021            3798 :   DBUG_VOID_RETURN;
    3022                 : }
    3023                 : 
    3024                 : 
    3025                 : /*
    3026                 :   Unpin page
    3027                 :   (uses direct block/page pointer)
    3028                 : 
    3029                 :   SYNOPSIS
    3030                 :     pagecache_unpin_by_link()
    3031                 :     pagecache           pointer to a page cache data structure
    3032                 :     link                direct link to page (returned by read or write)
    3033                 :     lsn                 if it is not LSN_IMPOSSIBLE (0) and it
    3034                 :                         is bigger then LSN on the page it will be written on
    3035                 :                         the page
    3036                 : */
    3037                 : 
    3038                 : void pagecache_unpin_by_link(PAGECACHE *pagecache,
    3039                 :                              PAGECACHE_BLOCK_LINK *block,
    3040                 :                              LSN lsn)
    3041               0 : {
    3042               0 :   DBUG_ENTER("pagecache_unpin_by_link");
    3043               0 :   DBUG_PRINT("enter", ("block: 0x%lx  fd: %u page: %lu",
    3044                 :                        (ulong) block,
    3045                 :                        (uint) block->hash_link->file.file,
    3046                 :                        (ulong) block->hash_link->pageno));
    3047                 : 
    3048               0 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    3049                 :   /*
    3050                 :     As soon as we keep lock cache can be used, and we have lock because want
    3051                 :     unlock.
    3052                 :   */
    3053               0 :   DBUG_ASSERT(pagecache->can_be_used);
    3054                 :   /* we can't unpin such page without unlock */
    3055               0 :   DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
    3056                 : 
    3057               0 :   inc_counter_for_resize_op(pagecache);
    3058                 : 
    3059               0 :   if (lsn != LSN_IMPOSSIBLE)
    3060               0 :     check_and_set_lsn(pagecache, lsn, block);
    3061                 : 
    3062                 :   /*
    3063                 :     We can just unpin only with keeping read lock because:
    3064                 :     a) we can't pin without any lock
    3065                 :     b) we can't unpin keeping write lock
    3066                 :   */
    3067               0 :   if (make_lock_and_pin(pagecache, block,
    3068                 :                         PAGECACHE_LOCK_LEFT_READLOCKED,
    3069                 :                         PAGECACHE_UNPIN, FALSE))
    3070               0 :     DBUG_ASSERT(0); /* should not happend */
    3071                 : 
    3072                 :   /*
    3073                 :     Link the block into the LRU chain if it's the last submitted request
    3074                 :     for the block and block will not be pinned.
    3075                 :     See NOTE for pagecache_unlock about registering requests.
    3076                 :   */
    3077               0 :   unreg_request(pagecache, block, 1);
    3078                 : 
    3079               0 :   dec_counter_for_resize_op(pagecache);
    3080                 : 
    3081               0 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3082                 : 
    3083               0 :   DBUG_VOID_RETURN;
    3084                 : }
    3085                 : 
    3086                 : /* description of how to change lock before and after read/write */
    3087                 : struct rw_lock_change
    3088                 : {
    3089                 :   my_bool need_lock_change; /* need changing of lock at the end */
    3090                 :   enum pagecache_page_lock new_lock; /* lock at the beginning */
    3091                 :   enum pagecache_page_lock unlock_lock; /* lock at the end */
    3092                 : };
    3093                 : 
    3094                 : /* description of how to change pin before and after read/write */
    3095                 : struct rw_pin_change
    3096                 : {
    3097                 :   enum pagecache_page_pin new_pin; /* pin status at the beginning */
    3098                 :   enum pagecache_page_pin unlock_pin; /* pin status at the end */
    3099                 : };
    3100                 : 
    3101                 : /**
    3102                 :   Depending on the lock which the user wants in pagecache_read(), we
    3103                 :   need to acquire a first type of lock at start of pagecache_read(), and
    3104                 :   downgrade it to a second type of lock at end. For example, if user
    3105                 :   asked for no lock (PAGECACHE_LOCK_LEFT_UNLOCKED) this translates into
    3106                 :   taking first a read lock PAGECACHE_LOCK_READ (to rightfully block on
    3107                 :   existing write locks) then read then unlock the lock i.e. change lock
    3108                 :   to PAGECACHE_LOCK_READ_UNLOCK (the "1" below tells that a change is
    3109                 :   needed).
    3110                 : */ 
    3111                 : 
    3112                 : static struct rw_lock_change lock_to_read[8]=
    3113                 : {
    3114                 :   { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
    3115                 :     1,
    3116                 :     PAGECACHE_LOCK_READ, PAGECACHE_LOCK_READ_UNLOCK
    3117                 :   },
    3118                 :   { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
    3119                 :     0,
    3120                 :     PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_LEFT_READLOCKED
    3121                 :   },
    3122                 :   { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
    3123                 :     0,
    3124                 :     PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_LEFT_WRITELOCKED
    3125                 :   },
    3126                 :   { /*PAGECACHE_LOCK_READ*/
    3127                 :     1,
    3128                 :     PAGECACHE_LOCK_READ, PAGECACHE_LOCK_LEFT_READLOCKED
    3129                 :   },
    3130                 :   { /*PAGECACHE_LOCK_WRITE*/
    3131                 :     1,
    3132                 :     PAGECACHE_LOCK_WRITE, PAGECACHE_LOCK_LEFT_WRITELOCKED
    3133                 :   },
    3134                 :   { /*PAGECACHE_LOCK_READ_UNLOCK*/
    3135                 :     1,
    3136                 :     PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_READ_UNLOCK
    3137                 :   },
    3138                 :   { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
    3139                 :     1,
    3140                 :     PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_UNLOCK
    3141                 :   },
    3142                 :   { /*PAGECACHE_LOCK_WRITE_TO_READ*/
    3143                 :     1,
    3144                 :     PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_TO_READ
    3145                 :   }
    3146                 : };
    3147                 : 
    3148                 : /**
    3149                 :   Two sets of pin modes (every as for lock upper but for pinning). The
    3150                 :   difference between sets if whether we are going to provide caller with
    3151                 :   reference on the block or not
    3152                 : */
    3153                 : 
    3154                 : static struct rw_pin_change lock_to_pin[2][8]=
    3155                 : {
    3156                 :   {
    3157                 :     { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
    3158                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3159                 :       PAGECACHE_PIN_LEFT_UNPINNED
    3160                 :     },
    3161                 :     { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
    3162                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3163                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3164                 :     },
    3165                 :     { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
    3166                 :       PAGECACHE_PIN_LEFT_PINNED,
    3167                 :       PAGECACHE_PIN_LEFT_PINNED
    3168                 :     },
    3169                 :     { /*PAGECACHE_LOCK_READ*/
    3170                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3171                 :       PAGECACHE_PIN_LEFT_UNPINNED
    3172                 :     },
    3173                 :     { /*PAGECACHE_LOCK_WRITE*/
    3174                 :       PAGECACHE_PIN,
    3175                 :       PAGECACHE_PIN_LEFT_PINNED
    3176                 :     },
    3177                 :     { /*PAGECACHE_LOCK_READ_UNLOCK*/
    3178                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3179                 :       PAGECACHE_PIN_LEFT_UNPINNED
    3180                 :     },
    3181                 :     { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
    3182                 :       PAGECACHE_PIN_LEFT_PINNED,
    3183                 :       PAGECACHE_UNPIN
    3184                 :     },
    3185                 :     { /*PAGECACHE_LOCK_WRITE_TO_READ*/
    3186                 :       PAGECACHE_PIN_LEFT_PINNED,
    3187                 :       PAGECACHE_UNPIN
    3188                 :     }
    3189                 :   },
    3190                 :   {
    3191                 :     { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
    3192                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3193                 :       PAGECACHE_PIN_LEFT_UNPINNED
    3194                 :     },
    3195                 :     { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
    3196                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3197                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3198                 :     },
    3199                 :     { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
    3200                 :       PAGECACHE_PIN_LEFT_PINNED,
    3201                 :       PAGECACHE_PIN_LEFT_PINNED
    3202                 :     },
    3203                 :     { /*PAGECACHE_LOCK_READ*/
    3204                 :       PAGECACHE_PIN,
    3205                 :       PAGECACHE_PIN_LEFT_PINNED
    3206                 :     },
    3207                 :     { /*PAGECACHE_LOCK_WRITE*/
    3208                 :       PAGECACHE_PIN,
    3209                 :       PAGECACHE_PIN_LEFT_PINNED
    3210                 :     },
    3211                 :     { /*PAGECACHE_LOCK_READ_UNLOCK*/
    3212                 :       PAGECACHE_PIN_LEFT_UNPINNED,
    3213                 :       PAGECACHE_PIN_LEFT_UNPINNED
    3214                 :     },
    3215                 :     { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
    3216                 :       PAGECACHE_PIN_LEFT_PINNED,
    3217                 :       PAGECACHE_UNPIN
    3218                 :     },
    3219                 :     { /*PAGECACHE_LOCK_WRITE_TO_READ*/
    3220                 :       PAGECACHE_PIN_LEFT_PINNED,
    3221                 :       PAGECACHE_PIN_LEFT_PINNED,
    3222                 :     }
    3223                 :   }
    3224                 : };
    3225                 : 
    3226                 : 
    3227                 : /*
    3228                 :   @brief Read a block of data from a cached file into a buffer;
    3229                 : 
    3230                 :   @param pagecache      pointer to a page cache data structure
    3231                 :   @param file           handler for the file for the block of data to be read
    3232                 :   @param pageno         number of the block of data in the file
    3233                 :   @param level          determines the weight of the data
    3234                 :   @param buff           buffer to where the data must be placed
    3235                 :   @param type           type of the page
    3236                 :   @param lock           lock change
    3237                 :   @param link           link to the page if we pin it
    3238                 : 
    3239                 :   @return address from where the data is placed if successful, 0 - otherwise.
    3240                 : 
    3241                 :   @note Pin will be chosen according to lock parameter (see lock_to_pin)
    3242                 : 
    3243                 :   @note 'buff', if not NULL, must be long-aligned.
    3244                 : 
    3245                 :   @note  If buff==0 then we provide reference on the page so should keep the
    3246                 :   page pinned.
    3247                 : */
    3248                 : 
    3249                 : uchar *pagecache_read(PAGECACHE *pagecache,
    3250                 :                       PAGECACHE_FILE *file,
    3251                 :                       pgcache_page_no_t pageno,
    3252                 :                       uint level,
    3253                 :                       uchar *buff,
    3254                 :                       enum pagecache_page_type type,
    3255                 :                       enum pagecache_page_lock lock,
    3256                 :                       PAGECACHE_BLOCK_LINK **page_link)
    3257            3852 : {
    3258            3852 :   my_bool error= 0;
    3259                 :   enum pagecache_page_pin
    3260            3852 :     new_pin= lock_to_pin[buff==0][lock].new_pin,
    3261            3852 :     unlock_pin= lock_to_pin[buff==0][lock].unlock_pin;
    3262                 :   PAGECACHE_BLOCK_LINK *fake_link;
    3263                 :   my_bool reg_request;
    3264                 : #ifndef DBUG_OFF
    3265                 :   char llbuf[22];
    3266            3852 :   DBUG_ENTER("pagecache_read");
    3267            3852 :   DBUG_PRINT("enter", ("fd: %u  page: %s  buffer: 0x%lx level: %u  "
    3268                 :                        "t:%s  (%d)%s->%s  %s->%s",
    3269                 :                        (uint) file->file, ullstr(pageno, llbuf),
    3270                 :                        (ulong) buff, level,
    3271                 :                        page_cache_page_type_str[type],
    3272                 :                        lock_to_read[lock].need_lock_change,
    3273                 :                        page_cache_page_lock_str[lock_to_read[lock].new_lock],
    3274                 :                        page_cache_page_lock_str[lock_to_read[lock].unlock_lock],
    3275                 :                        page_cache_page_pin_str[new_pin],
    3276                 :                        page_cache_page_pin_str[unlock_pin]));
    3277            3852 :   DBUG_ASSERT(buff != 0 || (buff == 0 && (unlock_pin == PAGECACHE_PIN ||
    3278                 :                                           unlock_pin == PAGECACHE_PIN_LEFT_PINNED)));
    3279            3852 :   DBUG_ASSERT(pageno < ((ULL(1)) << 40));
    3280                 : #endif
    3281                 : 
    3282            3852 :   if (!page_link)
    3283            1971 :     page_link= &fake_link;
    3284            3852 :   *page_link= 0;                                 /* Catch errors */
    3285                 : 
    3286            3852 : restart:
    3287                 : 
    3288            3852 :   if (pagecache->can_be_used)
    3289                 :   {
    3290                 :     /* Key cache is used */
    3291                 :     PAGECACHE_BLOCK_LINK *block;
    3292                 :     uint status;
    3293                 :     int page_st;
    3294                 : 
    3295            3852 :     pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    3296            3852 :     if (!pagecache->can_be_used)
    3297                 :     {
    3298               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3299               0 :       goto no_key_cache;
    3300                 :     }
    3301                 : 
    3302            3852 :     inc_counter_for_resize_op(pagecache);
    3303            3852 :     pagecache->global_cache_r_requests++;
    3304                 :     /* See NOTE for pagecache_unlock about registering requests. */
    3305            3852 :     reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
    3306                 :                   (new_pin == PAGECACHE_PIN));
    3307            3852 :     block= find_block(pagecache, file, pageno, level,
    3308                 :                       lock == PAGECACHE_LOCK_WRITE,
    3309                 :                       reg_request, &page_st);
    3310            3852 :     DBUG_PRINT("info", ("Block type: %s current type %s",
    3311                 :                         page_cache_page_type_str[block->type],
    3312                 :                         page_cache_page_type_str[type]));
    3313            3852 :     if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ))
    3314                 :     {
    3315                 :       /* The requested page is to be read into the block buffer */
    3316               9 :       read_block(pagecache, block,
    3317                 :                  (my_bool)(page_st == PAGE_TO_BE_READ));
    3318               9 :       DBUG_PRINT("info", ("read is done"));
    3319                 :     }
    3320                 :     /*
    3321                 :       Assert after block is read. Imagine two concurrent SELECTs on same
    3322                 :       table (thread1 and 2), which want to pagecache_read() the same
    3323                 :       pageno/fileno. Thread1 calls find_block(), decides to evict a dirty
    3324                 :       page from LRU; while it's writing this dirty page to disk, it is
    3325                 :       pre-empted and thread2 runs its find_block(), gets the block (in
    3326                 :       PAGE_TO_BE_READ state). This block is still containing the in-eviction
    3327                 :       dirty page so has an its type, which cannot be tested.
    3328                 :       So thread2 has to wait for read_block() to finish (when it wakes up in
    3329                 :       read_block(), it's woken up by read_block() of thread1, which implies
    3330                 :       that block's type was set to EMPTY by thread1 as part of find_block()).
    3331                 :     */
    3332            3852 :     DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
    3333                 :                 block->type == type ||
    3334                 :                 type == PAGECACHE_LSN_PAGE ||
    3335                 :                 type == PAGECACHE_READ_UNKNOWN_PAGE ||
    3336                 :                 block->type == PAGECACHE_READ_UNKNOWN_PAGE);
    3337            3852 :     if (type != PAGECACHE_READ_UNKNOWN_PAGE ||
    3338                 :         block->type == PAGECACHE_EMPTY_PAGE)
    3339            3852 :       block->type= type;
    3340                 : 
    3341            3852 :     if (make_lock_and_pin(pagecache, block, lock_to_read[lock].new_lock,
    3342                 :                           new_pin, FALSE))
    3343                 :     {
    3344                 :       /*
    3345                 :         We failed to write lock the block, cache is unlocked,
    3346                 :         we will try to get the block again.
    3347                 :       */
    3348               0 :       if (reg_request)
    3349               0 :         unreg_request(pagecache, block, 1);
    3350               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3351               0 :       DBUG_PRINT("info", ("restarting..."));
    3352               0 :       goto restart;
    3353                 :     }
    3354                 : 
    3355            3852 :     status= block->status;
    3356            3852 :     if (!buff)
    3357                 :     {
    3358            1881 :       buff=  block->buffer;
    3359                 :       /* possibly we will write here (resolved on unlock) */
    3360            1881 :       if ((lock == PAGECACHE_LOCK_WRITE ||
    3361                 :            lock == PAGECACHE_LOCK_LEFT_WRITELOCKED) &&
    3362                 :           !(block->status & PCBLOCK_CHANGED))
    3363                 :       {
    3364               0 :         block->status|= PCBLOCK_DIRECT_W;
    3365               0 :         DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: 0x%lx",
    3366                 :                             (ulong) block));
    3367                 :       }
    3368                 :     }
    3369                 :     else
    3370                 :     {
    3371            1971 :       if (!(status & PCBLOCK_ERROR))
    3372                 :       {
    3373                 : #if !defined(SERIALIZED_READ_FROM_CACHE)
    3374                 :         pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3375                 : #endif
    3376                 : 
    3377            1971 :         DBUG_ASSERT((pagecache->block_size & 511) == 0);
    3378                 :         /* Copy data from the cache buffer */
    3379            1971 :         bmove512(buff, block->buffer, pagecache->block_size);
    3380                 : 
    3381                 : #if !defined(SERIALIZED_READ_FROM_CACHE)
    3382                 :         pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    3383                 : #endif
    3384                 :       }
    3385                 :       else
    3386               0 :         my_errno= block->error;
    3387                 :     }
    3388                 : 
    3389            3852 :     remove_reader(block);
    3390            3852 :     if (lock_to_read[lock].need_lock_change)
    3391                 :     {
    3392            3852 :       if (make_lock_and_pin(pagecache, block,
    3393                 :                             lock_to_read[lock].unlock_lock,
    3394                 :                             unlock_pin, FALSE))
    3395               0 :         DBUG_ASSERT(0);
    3396                 :     }
    3397                 :     /*
    3398                 :       Link the block into the LRU chain if it's the last submitted request
    3399                 :       for the block and block will not be pinned.
    3400                 :       See NOTE for pagecache_unlock about registering requests.
    3401                 :     */
    3402            5823 :     if (unlock_pin == PAGECACHE_PIN_LEFT_UNPINNED ||
    3403                 :         unlock_pin == PAGECACHE_UNPIN)
    3404            1971 :       unreg_request(pagecache, block, 1);
    3405                 :     else
    3406            1881 :       *page_link= block;
    3407                 : 
    3408            3852 :     dec_counter_for_resize_op(pagecache);
    3409                 : 
    3410            3852 :     pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3411                 : 
    3412            3852 :     if (status & PCBLOCK_ERROR)
    3413                 :     {
    3414               0 :       DBUG_ASSERT(my_errno != 0);
    3415               0 :       DBUG_PRINT("error", ("Got error %d when doing page read", my_errno));
    3416               0 :       DBUG_RETURN((uchar *) 0);
    3417                 :     }
    3418                 : 
    3419            3852 :     DBUG_RETURN(buff);
    3420                 :   }
    3421                 : 
    3422               0 : no_key_cache:                                   /* Key cache is not used */
    3423                 : 
    3424                 :   /* We can't use mutex here as the key cache may not be initialized */
    3425               0 :   pagecache->global_cache_r_requests++;
    3426               0 :   pagecache->global_cache_read++;
    3427               0 :   if (pagecache_fread(pagecache, file, buff, pageno,
    3428                 :                       pagecache->readwrite_flags))
    3429               0 :     error= 1;
    3430               0 :   DBUG_RETURN(error ? (uchar*) 0 : buff);
    3431                 : }
    3432                 : 
    3433                 : 
    3434                 : /*
    3435                 :   @brief Delete page from the buffer (common part for link and file/page)
    3436                 : 
    3437                 :   @param pagecache      pointer to a page cache data structure
    3438                 :   @param block          direct link to page (returned by read or write)
    3439                 :   @param page_link      hash link of the block
    3440                 :   @param flush          flush page if it is dirty
    3441                 : 
    3442                 :   @retval 0 deleted or was not present at all
    3443                 :   @retval 1 error
    3444                 : 
    3445                 : */
    3446                 : 
    3447                 : static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
    3448                 :                                          PAGECACHE_BLOCK_LINK *block,
    3449                 :                                          PAGECACHE_HASH_LINK *page_link,
    3450                 :                                          my_bool flush)
    3451               0 : {
    3452               0 :   my_bool error= 0;
    3453               0 :   if (block->status & PCBLOCK_CHANGED)
    3454                 :   {
    3455               0 :     if (flush)
    3456                 :     {
    3457                 :       /* The block contains a dirty page - push it out of the cache */
    3458                 : 
    3459               0 :       KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
    3460                 : 
    3461               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3462                 :       /*
    3463                 :         The call is thread safe because only the current
    3464                 :         thread might change the block->hash_link value
    3465                 :       */
    3466               0 :       DBUG_ASSERT(block->pins == 1);
    3467               0 :       error= pagecache_fwrite(pagecache,
    3468                 :                               &block->hash_link->file,
    3469                 :                               block->buffer,
    3470                 :                               block->hash_link->pageno,
    3471                 :                               block->type,
    3472                 :                               pagecache->readwrite_flags);
    3473               0 :       pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    3474               0 :       pagecache->global_cache_write++;
    3475                 : 
    3476               0 :       if (error)
    3477                 :       {
    3478               0 :         block->status|= PCBLOCK_ERROR;
    3479               0 :         block->error=   (int16) my_errno;
    3480               0 :         my_debug_put_break_here();
    3481               0 :         goto err;
    3482                 :       }
    3483                 :     }
    3484               0 :     pagecache->blocks_changed--;
    3485               0 :     pagecache->global_blocks_changed--;
    3486                 :     /*
    3487                 :       free_block() will change the status and rec_lsn of the block so no
    3488                 :       need to change them here.
    3489                 :     */
    3490                 :   }
    3491                 :   /* Cache is locked, so we can relese page before freeing it */
    3492               0 :   if (make_lock_and_pin(pagecache, block,
    3493                 :                         PAGECACHE_LOCK_WRITE_UNLOCK,
    3494                 :                         PAGECACHE_UNPIN, FALSE))
    3495               0 :     DBUG_ASSERT(0);
    3496               0 :   DBUG_ASSERT(block->hash_link->requests > 0);
    3497               0 :   page_link->requests--;
    3498                 :   /* See NOTE for pagecache_unlock about registering requests. */
    3499               0 :   free_block(pagecache, block);
    3500                 : 
    3501               0 : err:
    3502               0 :   dec_counter_for_resize_op(pagecache);
    3503               0 :   return error;
    3504                 : }
    3505                 : 
    3506                 : 
    3507                 : /*
    3508                 :   @brief Delete page from the buffer by link
    3509                 : 
    3510                 :   @param pagecache      pointer to a page cache data structure
    3511                 :   @param link           direct link to page (returned by read or write)
    3512                 :   @param lock           lock change
    3513                 :   @param flush          flush page if it is dirty
    3514                 : 
    3515                 :   @retval 0 deleted or was not present at all
    3516                 :   @retval 1 error
    3517                 : 
    3518                 :   @note lock  can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was
    3519                 :   write locked before) or PAGECACHE_LOCK_WRITE (delete will write
    3520                 :   lock page before delete)
    3521                 : */
    3522                 : 
    3523                 : my_bool pagecache_delete_by_link(PAGECACHE *pagecache,
    3524                 :                                  PAGECACHE_BLOCK_LINK *block,
    3525                 :                                  enum pagecache_page_lock lock,
    3526                 :                                  my_bool flush)
    3527               0 : {
    3528               0 :   my_bool error= 0;
    3529               0 :   enum pagecache_page_pin pin= PAGECACHE_PIN_LEFT_PINNED;
    3530               0 :   DBUG_ENTER("pagecache_delete_by_link");
    3531               0 :   DBUG_PRINT("enter", ("fd: %d block 0x%lx  %s  %s",
    3532                 :                        block->hash_link->file.file,
    3533                 :                        (ulong) block,
    3534                 :                        page_cache_page_lock_str[lock],
    3535                 :                        page_cache_page_pin_str[pin]));
    3536               0 :   DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
    3537                 :               lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
    3538               0 :   DBUG_ASSERT(block->pins != 0); /* should be pinned */
    3539                 : 
    3540               0 :   if (pagecache->can_be_used)
    3541                 :   {
    3542               0 :     pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    3543               0 :     if (!pagecache->can_be_used)
    3544               0 :       goto end;
    3545                 : 
    3546                 :     /*
    3547                 :       This block should be pinned (i.e. has not zero request counter) =>
    3548                 :       Such block can't be chosen for eviction.
    3549                 :     */
    3550               0 :     DBUG_ASSERT((block->status &
    3551                 :                  (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)) == 0);
    3552                 :     /*
    3553                 :       make_lock_and_pin() can't fail here, because we are keeping pin on the
    3554                 :       block and it can't be evicted (which is cause of lock fail and retry)
    3555                 :     */
    3556               0 :     if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
    3557               0 :       DBUG_ASSERT(0);
    3558                 : 
    3559                 :     /*
    3560                 :       get_present_hash_link() side effect emulation before call
    3561                 :       pagecache_delete_internal()
    3562                 :     */
    3563               0 :     block->hash_link->requests++;
    3564                 : 
    3565               0 :     error= pagecache_delete_internal(pagecache, block, block->hash_link,
    3566                 :                                      flush);
    3567               0 : end:
    3568               0 :     pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3569                 :   }
    3570                 : 
    3571               0 :   DBUG_RETURN(error);
    3572                 : }
    3573                 : 
    3574                 : 
    3575                 : /**
    3576                 :   @brief Returns "hits" for promotion
    3577                 : 
    3578                 :   @return "hits" for promotion
    3579                 : */
    3580                 : 
    3581                 : uint pagecache_pagelevel(PAGECACHE_BLOCK_LINK *block)
    3582               0 : {
    3583               0 :   return block->hits_left;
    3584                 : }
    3585                 : 
    3586                 : /*
    3587                 :   @brief Adds "hits" to the page
    3588                 : 
    3589                 :   @param link           direct link to page (returned by read or write)
    3590                 :   @param level          number of "hits" which we add to the page
    3591                 : */
    3592                 : 
    3593                 : void pagecache_add_level_by_link(PAGECACHE_BLOCK_LINK *block,
    3594                 :                                  uint level)
    3595               0 : {
    3596               0 :   DBUG_ASSERT(block->pins != 0); /* should be pinned */
    3597                 :   /*
    3598                 :     Operation is just for statistics so it is not really important
    3599                 :     if it interfere with other hit increasing => we are doing it without
    3600                 :     locking the pagecache.
    3601                 :   */
    3602               0 :   block->hits_left+= level;
    3603                 : }
    3604                 : 
    3605                 : /*
    3606                 :   @brief Delete page from the buffer
    3607                 : 
    3608                 :   @param pagecache      pointer to a page cache data structure
    3609                 :   @param file           handler for the file for the block of data to be read
    3610                 :   @param pageno         number of the block of data in the file
    3611                 :   @param lock           lock change
    3612                 :   @param flush          flush page if it is dirty
    3613                 : 
    3614                 :   @retval 0 deleted or was not present at all
    3615                 :   @retval 1 error
    3616                 : 
    3617                 :   @note lock  can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was
    3618                 :   write locked before) or PAGECACHE_LOCK_WRITE (delete will write
    3619                 :   lock page before delete)
    3620                 : */
    3621                 : static enum pagecache_page_pin lock_to_pin_one_phase[8]=
    3622                 : {
    3623                 :   PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
    3624                 :   PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
    3625                 :   PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
    3626                 :   PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
    3627                 :   PAGECACHE_PIN               /*PAGECACHE_LOCK_WRITE*/,
    3628                 :   PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
    3629                 :   PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
    3630                 :   PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_TO_READ*/
    3631                 : };
    3632                 : 
    3633                 : my_bool pagecache_delete(PAGECACHE *pagecache,
    3634                 :                          PAGECACHE_FILE *file,
    3635                 :                          pgcache_page_no_t pageno,
    3636                 :                          enum pagecache_page_lock lock,
    3637                 :                          my_bool flush)
    3638               0 : {
    3639               0 :   my_bool error= 0;
    3640               0 :   enum pagecache_page_pin pin= lock_to_pin_one_phase[lock];
    3641               0 :   DBUG_ENTER("pagecache_delete");
    3642               0 :   DBUG_PRINT("enter", ("fd: %u  page: %lu  %s  %s",
    3643                 :                        (uint) file->file, (ulong) pageno,
    3644                 :                        page_cache_page_lock_str[lock],
    3645                 :                        page_cache_page_pin_str[pin]));
    3646               0 :   DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
    3647                 :               lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
    3648               0 :   DBUG_ASSERT(pin == PAGECACHE_PIN ||
    3649                 :               pin == PAGECACHE_PIN_LEFT_PINNED);
    3650               0 : restart:
    3651                 : 
    3652               0 :   DBUG_ASSERT(pageno < ((ULL(1)) << 40));
    3653               0 :   if (pagecache->can_be_used)
    3654                 :   {
    3655                 :     /* Key cache is used */
    3656                 :     reg1 PAGECACHE_BLOCK_LINK *block;
    3657                 :     PAGECACHE_HASH_LINK **unused_start, *page_link;
    3658                 : 
    3659               0 :     pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    3660               0 :     if (!pagecache->can_be_used)
    3661               0 :       goto end;
    3662                 : 
    3663               0 :     inc_counter_for_resize_op(pagecache);
    3664               0 :     page_link= get_present_hash_link(pagecache, file, pageno, &unused_start);
    3665               0 :     if (!page_link)
    3666                 :     {
    3667               0 :       DBUG_PRINT("info", ("There is no such page in the cache"));
    3668               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3669               0 :       DBUG_RETURN(0);
    3670                 :     }
    3671               0 :     block= page_link->block;
    3672               0 :     if (block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH))
    3673                 :     {
    3674               0 :       DBUG_PRINT("info", ("Block 0x%0lx already is %s",
    3675                 :                           (ulong) block,
    3676                 :                           ((block->status & PCBLOCK_REASSIGNED) ?
    3677                 :                            "reassigned" : "in switch")));
    3678               0 :       PCBLOCK_INFO(block);
    3679               0 :       page_link->requests--;
    3680               0 :       goto end;
    3681                 :     }
    3682                 :     /* See NOTE for pagecache_unlock about registering requests. */
    3683               0 :     if (pin == PAGECACHE_PIN)
    3684               0 :       reg_requests(pagecache, block, 1);
    3685               0 :     DBUG_ASSERT(block != 0);
    3686               0 :     if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
    3687                 :     {
    3688                 :       /*
    3689                 :         We failed to writelock the block, cache is unlocked, and last write
    3690                 :         lock is released, we will try to get the block again.
    3691                 :       */
    3692               0 :       if (pin == PAGECACHE_PIN)
    3693               0 :         unreg_request(pagecache, block, 1);
    3694               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3695               0 :       DBUG_PRINT("info", ("restarting..."));
    3696               0 :       goto restart;
    3697                 :     }
    3698                 : 
    3699                 :     /* we can't delete with opened direct link for write */
    3700               0 :     DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
    3701                 : 
    3702               0 :     error= pagecache_delete_internal(pagecache, block, page_link, flush);
    3703               0 : end:
    3704               0 :     pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3705                 :   }
    3706                 : 
    3707               0 :   DBUG_RETURN(error);
    3708                 : }
    3709                 : 
    3710                 : 
    3711                 : my_bool pagecache_delete_pages(PAGECACHE *pagecache,
    3712                 :                                PAGECACHE_FILE *file,
    3713                 :                                pgcache_page_no_t pageno,
    3714                 :                                uint page_count,
    3715                 :                                enum pagecache_page_lock lock,
    3716                 :                                my_bool flush)
    3717               0 : {
    3718                 :   pgcache_page_no_t page_end;
    3719               0 :   DBUG_ENTER("pagecache_delete_pages");
    3720               0 :   DBUG_ASSERT(page_count > 0);
    3721                 : 
    3722               0 :   page_end= pageno + page_count;
    3723                 :   do
    3724                 :   {
    3725               0 :     if (pagecache_delete(pagecache, file, pageno,
    3726                 :                          lock, flush))
    3727               0 :       DBUG_RETURN(1);
    3728               0 :   } while (++pageno != page_end);
    3729               0 :   DBUG_RETURN(0);
    3730                 : }
    3731                 : 
    3732                 : 
    3733                 : /**
    3734                 :   @brief Writes a buffer into a cached file.
    3735                 : 
    3736                 :   @param pagecache       pointer to a page cache data structure
    3737                 :   @param file            handler for the file to write data to
    3738                 :   @param pageno          number of the block of data in the file
    3739                 :   @param level           determines the weight of the data
    3740                 :   @param buff            buffer with the data
    3741                 :   @param type            type of the page
    3742                 :   @param lock            lock change
    3743                 :   @param pin             pin page
    3744                 :   @param write_mode      how to write page
    3745                 :   @param link            link to the page if we pin it
    3746                 :   @param first_REDO_LSN_for_page the lsn to set rec_lsn
    3747                 :   @param offset          offset in the page
    3748                 :   @param size            size of data
    3749                 :   @param validator       read page validator
    3750                 :   @param validator_data  the validator data
    3751                 : 
    3752                 :   @retval 0 if a success.
    3753                 :   @retval 1 Error.
    3754                 : */
    3755                 : 
    3756                 : static struct rw_lock_change write_lock_change_table[]=
    3757                 : {
    3758                 :   {1,
    3759                 :    PAGECACHE_LOCK_WRITE,
    3760                 :    PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
    3761                 :   {0, /*unsupported (we can't write having the block read locked) */
    3762                 :    PAGECACHE_LOCK_LEFT_UNLOCKED,
    3763                 :    PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
    3764                 :   {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
    3765                 :   {1,
    3766                 :    PAGECACHE_LOCK_WRITE,
    3767                 :    PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/,
    3768                 :   {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/,
    3769                 :   {0, /*unsupported (we can't write having the block read locked) */
    3770                 :    PAGECACHE_LOCK_LEFT_UNLOCKED,
    3771                 :    PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/,
    3772                 :   {1,
    3773                 :    PAGECACHE_LOCK_LEFT_WRITELOCKED,
    3774                 :    PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
    3775                 :   {1,
    3776                 :    PAGECACHE_LOCK_LEFT_WRITELOCKED,
    3777                 :    PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_WRITE_TO_READ*/
    3778                 : };
    3779                 : 
    3780                 : 
    3781                 : static struct rw_pin_change write_pin_change_table[]=
    3782                 : {
    3783                 :   {PAGECACHE_PIN_LEFT_PINNED,
    3784                 :    PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/,
    3785                 :   {PAGECACHE_PIN,
    3786                 :    PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/,
    3787                 :   {PAGECACHE_PIN,
    3788                 :    PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/,
    3789                 :   {PAGECACHE_PIN_LEFT_PINNED,
    3790                 :    PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/
    3791                 : };
    3792                 : 
    3793                 : 
    3794                 : /**
    3795                 :   @note 'buff', if not NULL, must be long-aligned.
    3796                 : */
    3797                 : 
    3798                 : my_bool pagecache_write_part(PAGECACHE *pagecache,
    3799                 :                              PAGECACHE_FILE *file,
    3800                 :                              pgcache_page_no_t pageno,
    3801                 :                              uint level,
    3802                 :                              uchar *buff,
    3803                 :                              enum pagecache_page_type type,
    3804                 :                              enum pagecache_page_lock lock,
    3805                 :                              enum pagecache_page_pin pin,
    3806                 :                              enum pagecache_write_mode write_mode,
    3807                 :                              PAGECACHE_BLOCK_LINK **page_link,
    3808                 :                              LSN first_REDO_LSN_for_page,
    3809                 :                              uint offset, uint size)
    3810              53 : {
    3811              53 :   PAGECACHE_BLOCK_LINK *block= NULL;
    3812                 :   PAGECACHE_BLOCK_LINK *fake_link;
    3813              53 :   my_bool error= 0;
    3814              53 :   int need_lock_change= write_lock_change_table[lock].need_lock_change;
    3815                 :   my_bool reg_request;
    3816                 : #ifndef DBUG_OFF
    3817                 :   char llbuf[22];
    3818              53 :   DBUG_ENTER("pagecache_write_part");
    3819              53 :   DBUG_PRINT("enter", ("fd: %u  page: %s  level: %u  type: %s  lock: %s  "
    3820                 :                        "pin: %s   mode: %s  offset: %u  size %u",
    3821                 :                        (uint) file->file, ullstr(pageno, llbuf), level,
    3822                 :                        page_cache_page_type_str[type],
    3823                 :                        page_cache_page_lock_str[lock],
    3824                 :                        page_cache_page_pin_str[pin],
    3825                 :                        page_cache_page_write_mode_str[write_mode],
    3826                 :                        offset, size));
    3827              53 :   DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
    3828              53 :   DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED);
    3829              53 :   DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK);
    3830              53 :   DBUG_ASSERT(offset + size <= pagecache->block_size);
    3831              53 :   DBUG_ASSERT(pageno < ((ULL(1)) << 40));
    3832                 : #endif
    3833                 : 
    3834              53 :   if (!page_link)
    3835              17 :     page_link= &fake_link;
    3836              53 :   *page_link= 0;
    3837                 : 
    3838              53 : restart:
    3839                 : 
    3840                 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
    3841              53 :   DBUG_EXECUTE("check_pagecache",
    3842                 :                test_key_cache(pagecache, "start of key_cache_write", 1););
    3843                 : #endif
    3844                 : 
    3845              53 :   if (pagecache->can_be_used)
    3846                 :   {
    3847                 :     /* Key cache is used */
    3848                 :     int page_st;
    3849              53 :     my_bool need_page_ready_signal= FALSE;
    3850                 : 
    3851              53 :     pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    3852              53 :     if (!pagecache->can_be_used)
    3853                 :     {
    3854               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3855               0 :       goto no_key_cache;
    3856                 :     }
    3857                 : 
    3858              53 :     inc_counter_for_resize_op(pagecache);
    3859              53 :     pagecache->global_cache_w_requests++;
    3860                 :     /* See NOTE for pagecache_unlock about registering requests. */
    3861              53 :     reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
    3862                 :                   (pin == PAGECACHE_PIN));
    3863              53 :     block= find_block(pagecache, file, pageno, level,
    3864                 :                       TRUE,
    3865                 :                       reg_request, &page_st);
    3866              53 :     if (!block)
    3867                 :     {
    3868               0 :       DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE);
    3869                 :       /* It happens only for requests submitted during resize operation */
    3870               0 :       dec_counter_for_resize_op(pagecache);
    3871               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3872                 :       /* Write to the disk key cache is in resize at the moment*/
    3873               0 :       goto no_key_cache;
    3874                 :     }
    3875              53 :     DBUG_PRINT("info", ("page status: %d", page_st));
    3876              53 :     if (!(block->status & PCBLOCK_ERROR) &&
    3877                 :         ((page_st == PAGE_TO_BE_READ &&
    3878                 :           (offset || size < pagecache->block_size)) ||
    3879                 :          (page_st == PAGE_WAIT_TO_BE_READ)))
    3880                 :     {
    3881                 :       /* The requested page is to be read into the block buffer */
    3882               0 :       read_block(pagecache, block,
    3883                 :                  (my_bool)(page_st == PAGE_TO_BE_READ));
    3884               0 :       DBUG_PRINT("info", ("read is done"));
    3885                 :     }
    3886              53 :     else if (page_st == PAGE_TO_BE_READ)
    3887                 :     {
    3888              40 :       need_page_ready_signal= TRUE;
    3889                 :     }
    3890                 : 
    3891              53 :     DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
    3892                 :                 block->type == PAGECACHE_READ_UNKNOWN_PAGE ||
    3893                 :                 block->type == type ||
    3894                 :                 /* this is for when going to non-trans to trans */
    3895                 :                 (block->type == PAGECACHE_PLAIN_PAGE &&
    3896                 :                  type == PAGECACHE_LSN_PAGE));
    3897              53 :     block->type= type;
    3898                 :     /* we write to the page so it has no sense to keep the flag */
    3899              53 :     block->status&= ~PCBLOCK_DIRECT_W;
    3900              53 :     DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
    3901                 :                         (ulong) block));
    3902                 : 
    3903              53 :     if (make_lock_and_pin(pagecache, block,
    3904                 :                           write_lock_change_table[lock].new_lock,
    3905                 :                           (need_lock_change ?
    3906                 :                            write_pin_change_table[pin].new_pin :
    3907                 :                            pin), FALSE))
    3908                 :     {
    3909                 :       /*
    3910                 :         We failed to writelock the block, cache is unlocked, and last write
    3911                 :         lock is released, we will try to get the block again.
    3912                 :       */
    3913               0 :       if (reg_request)
    3914               0 :         unreg_request(pagecache, block, 1);
    3915               0 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    3916               0 :       DBUG_PRINT("info", ("restarting..."));
    3917               0 :       goto restart;
    3918                 :     }
    3919                 : 
    3920              53 :     if (write_mode == PAGECACHE_WRITE_DONE)
    3921                 :     {
    3922               8 :       if (block->status & PCBLOCK_ERROR)
    3923                 :       {
    3924               0 :         my_debug_put_break_here();
    3925               0 :         DBUG_PRINT("warning", ("Writing on page with error"));
    3926                 :       }
    3927                 :       else
    3928                 :       {
    3929                 :         /* Copy data from buff */
    3930               8 :         if (!(size & 511))
    3931               8 :           bmove512(block->buffer + offset, buff, size);
    3932                 :         else
    3933               0 :           memcpy(block->buffer + offset, buff, size);
    3934               8 :         block->status= PCBLOCK_READ;
    3935                 :         /*
    3936                 :           The read_callback can change the page content (removing page
    3937                 :           protection) so it have to be called
    3938                 :         */
    3939               8 :         DBUG_PRINT("info", ("read_callback: 0x%lx  data: 0x%lx",
    3940                 :                             (ulong) block->hash_link->file.read_callback,
    3941                 :                             (ulong) block->hash_link->file.callback_data));
    3942               8 :         if ((*block->hash_link->file.read_callback)(block->buffer,
    3943                 :                                                     block->hash_link->pageno,
    3944                 :                                                     block->hash_link->
    3945                 :                                                     file.callback_data))
    3946                 :         {
    3947               0 :           DBUG_PRINT("error", ("read callback problem"));
    3948               0 :           block->status|= PCBLOCK_ERROR;
    3949               0 :           block->error= (int16) my_errno;
    3950               0 :           my_debug_put_break_here();
    3951                 :         }
    3952               8 :         KEYCACHE_DBUG_PRINT("key_cache_insert",
    3953                 :                             ("Page injection"));
    3954                 : #ifdef THREAD
    3955                 :         /* Signal that all pending requests for this now can be processed. */
    3956               8 :         if (block->wqueue[COND_FOR_REQUESTED].last_thread)
    3957               0 :           wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
    3958                 : #endif
    3959                 :       }
    3960                 :     }
    3961                 :     else
    3962                 :     {
    3963              45 :       if (! (block->status & PCBLOCK_CHANGED))
    3964              45 :           link_to_changed_list(pagecache, block);
    3965                 : 
    3966              45 :       if (!(size & 511))
    3967              45 :         bmove512(block->buffer + offset, buff, size);
    3968                 :       else
    3969               0 :         memcpy(block->buffer + offset, buff, size);
    3970              45 :       block->status|= PCBLOCK_READ;
    3971                 :       /* Page is correct again if we made a full write in it */
    3972              45 :       if (size == pagecache->block_size)
    3973              45 :         block->status&= ~PCBLOCK_ERROR;
    3974                 :     }
    3975                 : 
    3976                 : #ifdef THREAD
    3977              53 :     if (need_page_ready_signal &&
    3978                 :         block->wqueue[COND_FOR_REQUESTED].last_thread)
    3979               0 :       wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
    3980                 : #endif
    3981                 : 
    3982              53 :     if (first_REDO_LSN_for_page)
    3983                 :     {
    3984                 :       /* single write action of the last write action */
    3985               0 :       DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
    3986                 :                   lock == PAGECACHE_LOCK_LEFT_UNLOCKED);
    3987               0 :       DBUG_ASSERT(pin == PAGECACHE_UNPIN ||
    3988                 :                   pin == PAGECACHE_PIN_LEFT_UNPINNED);
    3989               0 :       pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
    3990                 :     }
    3991                 : 
    3992              53 :     if (need_lock_change)
    3993                 :     {
    3994                 :       /*
    3995                 :         We don't set rec_lsn of the block; this is ok as for the
    3996                 :         Maria-block-record's pages, we always keep pages pinned here.
    3997                 :       */
    3998              53 :       if (make_lock_and_pin(pagecache, block,
    3999                 :                             write_lock_change_table[lock].unlock_lock,
    4000                 :                             write_pin_change_table[pin].unlock_pin, FALSE))
    4001               0 :         DBUG_ASSERT(0);
    4002                 :     }
    4003                 : 
    4004                 :     /* Unregister the request */
    4005              53 :     DBUG_ASSERT(block->hash_link->requests > 0);
    4006              53 :     block->hash_link->requests--;
    4007                 :     /* See NOTE for pagecache_unlock about registering requests. */
    4008              70 :     if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
    4009              17 :       unreg_request(pagecache, block, 1);
    4010                 :     else
    4011              36 :       *page_link= block;
    4012                 : 
    4013              53 :     if (block->status & PCBLOCK_ERROR)
    4014                 :     {
    4015               0 :       error= 1;
    4016               0 :       my_debug_put_break_here();
    4017                 :     }
    4018                 : 
    4019              53 :     dec_counter_for_resize_op(pagecache);
    4020                 : 
    4021              53 :     pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    4022                 : 
    4023              53 :     goto end;
    4024                 :   }
    4025                 : 
    4026               0 : no_key_cache:
    4027                 :   /*
    4028                 :     We can't by pass the normal page cache operations because need
    4029                 :     whole page for calling callbacks & so on.
    4030                 :     This branch should not be used for now (but it is fixed as it
    4031                 :     should be just to avoid confusing)
    4032                 :   */
    4033               0 :   DBUG_ASSERT(0);
    4034                 :   /* Key cache is not used */
    4035                 :   if (write_mode == PAGECACHE_WRITE_DELAY)
    4036                 :   {
    4037                 :     /* We can't use mutex here as the key cache may not be initialized */
    4038                 :     pagecache->global_cache_w_requests++;
    4039                 :     pagecache->global_cache_write++;
    4040                 :     if (offset != 0 || size != pagecache->block_size)
    4041                 :     {
    4042                 :       uchar *page_buffer= (uchar *) alloca(pagecache->block_size);
    4043                 : 
    4044                 :       pagecache->global_cache_read++;
    4045                 :       if ((error= (pagecache_fread(pagecache, file,
    4046                 :                                    page_buffer,
    4047                 :                                    pageno,
    4048                 :                                    pagecache->readwrite_flags) != 0)))
    4049                 :         goto end;
    4050                 :       if ((file->read_callback)(page_buffer, pageno, file->callback_data))
    4051                 :       {
    4052                 :         DBUG_PRINT("error", ("read callback problem"));
    4053                 :         error= 1;
    4054                 :         goto end;
    4055                 :       }
    4056                 :       memcpy((char *)page_buffer + offset, buff, size);
    4057                 :       buff= page_buffer;
    4058                 :     }
    4059                 :     if (pagecache_fwrite(pagecache, file, buff, pageno, type,
    4060                 :                          pagecache->readwrite_flags))
    4061                 :       error= 1;
    4062                 :   }
    4063                 : 
    4064              53 : end:
    4065                 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
    4066              53 :   DBUG_EXECUTE("exec",
    4067                 :                test_key_cache(pagecache, "end of key_cache_write", 1););
    4068                 : #endif
    4069              53 :   if (block)
    4070              53 :     PCBLOCK_INFO(block);
    4071                 :   else
    4072               0 :     DBUG_PRINT("info", ("No block"));
    4073              53 :   DBUG_RETURN(error);
    4074                 : }
    4075                 : 
    4076                 : 
    4077                 : /*
    4078                 :   Free block: remove reference to it from hash table,
    4079                 :   remove it from the chain file of dirty/clean blocks
    4080                 :   and add it to the free list.
    4081                 : */
    4082                 : 
    4083                 : static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
    4084              49 : {
    4085              49 :   KEYCACHE_THREAD_TRACE("free block");
    4086              49 :   KEYCACHE_DBUG_PRINT("free_block",
    4087                 :                       ("block: %u  hash_link 0x%lx",
    4088                 :                        PCBLOCK_NUMBER(pagecache, block),
    4089                 :                        (long) block->hash_link));
    4090              49 :   if (block->hash_link)
    4091                 :   {
    4092                 :     /*
    4093                 :       While waiting for readers to finish, new readers might request the
    4094                 :       block. But since we set block->status|= PCBLOCK_REASSIGNED, they
    4095                 :       will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
    4096                 :       later.
    4097                 :     */
    4098              49 :     block->status|= PCBLOCK_REASSIGNED;
    4099              49 :     wait_for_readers(pagecache, block);
    4100              49 :     unlink_hash(pagecache, block->hash_link);
    4101                 :   }
    4102                 : 
    4103              49 :   unlink_changed(block);
    4104              49 :   DBUG_ASSERT(block->wlocks == 0);
    4105              49 :   DBUG_ASSERT(block->rlocks == 0);
    4106              49 :   DBUG_ASSERT(block->rlocks_queue == 0);
    4107              49 :   DBUG_ASSERT(block->pins == 0);
    4108              49 :   block->status= 0;
    4109                 : #ifndef DBUG_OFF
    4110              49 :   block->type= PAGECACHE_EMPTY_PAGE;
    4111                 : #endif
    4112              49 :   block->rec_lsn= LSN_MAX;
    4113              49 :   KEYCACHE_THREAD_TRACE("free block");
    4114              49 :   KEYCACHE_DBUG_PRINT("free_block",
    4115                 :                       ("block is freed"));
    4116              49 :   unreg_request(pagecache, block, 0);
    4117              49 :   block->hash_link= NULL;
    4118                 : 
    4119                 :   /* Remove the free block from the LRU ring. */
    4120              49 :   unlink_block(pagecache, block);
    4121              49 :   if (block->temperature == PCBLOCK_WARM)
    4122               0 :     pagecache->warm_blocks--;
    4123              49 :   block->temperature= PCBLOCK_COLD;
    4124                 :   /* Insert the free block in the free list. */
    4125              49 :   block->next_used= pagecache->free_block_list;
    4126              49 :   pagecache->free_block_list= block;
    4127                 :   /* Keep track of the number of currently unused blocks. */
    4128              49 :   pagecache->blocks_unused++;
    4129                 : 
    4130                 : #ifdef THREAD
    4131                 :   /* All pending requests for this page must be resubmitted. */
    4132              49 :   if (block->wqueue[COND_FOR_SAVED].last_thread)
    4133               0 :     wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
    4134                 : #endif
    4135                 : }
    4136                 : 
    4137                 : 
    4138                 : static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b)
    4139               0 : {
    4140               0 :   return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 :
    4141                 :       ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0);
    4142                 : }
    4143                 : 
    4144                 : 
    4145                 : /**
    4146                 :   @brief Flush a portion of changed blocks to disk, free used blocks
    4147                 :   if requested
    4148                 : 
    4149                 :   @param pagecache       This page cache reference.
    4150                 :   @param file            File which should be flushed
    4151                 :   @param cache           Beginning of array of the block.
    4152                 :   @param end             Reference to the block after last in the array.
    4153                 :   @param flush_type      Type of the flush.
    4154                 :   @param first_errno     Where to store first errno of the flush.
    4155                 : 
    4156                 : 
    4157                 :   @return Operation status
    4158                 :   @retval PCFLUSH_OK OK
    4159                 :   @retval PCFLUSH_ERROR There was errors during the flush process.
    4160                 :   @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
    4161                 :   @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
    4162                 : */
    4163                 : 
    4164                 : static int flush_cached_blocks(PAGECACHE *pagecache,
    4165                 :                                PAGECACHE_FILE *file,
    4166                 :                                PAGECACHE_BLOCK_LINK **cache,
    4167                 :                                PAGECACHE_BLOCK_LINK **end,
    4168                 :                                enum flush_type type,
    4169                 :                                int *first_errno)
    4170               0 : {
    4171               0 :   int rc= PCFLUSH_OK;
    4172                 :   my_bool error;
    4173               0 :   uint count= (uint) (end-cache);
    4174               0 :   DBUG_ENTER("flush_cached_blocks");
    4175               0 :   *first_errno= 0;
    4176                 : 
    4177                 :   /* Don't lock the cache during the flush */
    4178               0 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    4179                 :   /*
    4180                 :      As all blocks referred in 'cache' are marked by PCBLOCK_IN_FLUSH
    4181                 :      we are guaranteed that no thread will change them
    4182                 :   */
    4183               0 :   qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
    4184                 : 
    4185               0 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    4186               0 :   for (; cache != end; cache++)
    4187                 :   {
    4188               0 :     PAGECACHE_BLOCK_LINK *block= *cache;
    4189                 : 
    4190               0 :     if (block->pins)
    4191                 :     {
    4192               0 :       KEYCACHE_DBUG_PRINT("flush_cached_blocks",
    4193                 :                           ("block: %u (0x%lx)  pinned",
    4194                 :                            PCBLOCK_NUMBER(pagecache, block), (ulong)block));
    4195               0 :       DBUG_PRINT("info", ("block: %u (0x%lx)  pinned",
    4196                 :                           PCBLOCK_NUMBER(pagecache, block), (ulong)block));
    4197               0 :       PCBLOCK_INFO(block);
    4198                 :       /* undo the mark put by flush_pagecache_blocks_int(): */
    4199               0 :       block->status&= ~PCBLOCK_IN_FLUSH;
    4200               0 :       rc|= PCFLUSH_PINNED;
    4201               0 :       DBUG_PRINT("warning", ("Page pinned"));
    4202               0 :       unreg_request(pagecache, block, 1);
    4203               0 :       if (!*first_errno)
    4204               0 :         *first_errno= HA_ERR_INTERNAL_ERROR;
    4205                 :       continue;
    4206                 :     }
    4207                 :     /* if the block is not pinned then it is not write locked */
    4208               0 :     DBUG_ASSERT(block->wlocks == 0);
    4209               0 :     DBUG_ASSERT(block->pins == 0);
    4210               0 :     if (make_lock_and_pin(pagecache, block,
    4211                 :                           PAGECACHE_LOCK_WRITE, PAGECACHE_PIN, FALSE))
    4212               0 :       DBUG_ASSERT(0);
    4213               0 :     DBUG_ASSERT(block->pins == 1);
    4214                 : 
    4215               0 :     KEYCACHE_DBUG_PRINT("flush_cached_blocks",
    4216                 :                         ("block: %u (0x%lx)  to be flushed",
    4217                 :                          PCBLOCK_NUMBER(pagecache, block), (ulong)block));
    4218               0 :     DBUG_PRINT("info", ("block: %u (0x%lx)  to be flushed",
    4219                 :                         PCBLOCK_NUMBER(pagecache, block), (ulong)block));
    4220               0 :     PCBLOCK_INFO(block);
    4221               0 :     pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    4222               0 :     DBUG_PRINT("info", ("block: %u (0x%lx)  pins: %u",
    4223                 :                         PCBLOCK_NUMBER(pagecache, block), (ulong)block,
    4224                 :                         block->pins));
    4225               0 :     DBUG_ASSERT(block->pins == 1);
    4226                 :     /**
    4227                 :        @todo IO If page is contiguous with next page to flush, group flushes
    4228                 :        in one single my_pwrite().
    4229                 :     */
    4230                 :     /**
    4231                 :       It is important to use block->hash_link->file below and not 'file', as
    4232                 :       the first one is right and the second may have different out-of-date
    4233                 :       content (see StaleFilePointersInFlush in ma_checkpoint.c).
    4234                 :       @todo change argument of functions to be File.
    4235                 :     */
    4236               0 :     error= pagecache_fwrite(pagecache, &block->hash_link->file,
    4237                 :                             block->buffer,
    4238                 :                             block->hash_link->pageno,
    4239                 :                             block->type,
    4240                 :                             pagecache->readwrite_flags);
    4241               0 :     pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    4242                 : 
    4243               0 :     if (make_lock_and_pin(pagecache, block,
    4244                 :                           PAGECACHE_LOCK_WRITE_UNLOCK,
    4245                 :                           PAGECACHE_UNPIN, FALSE))
    4246               0 :       DBUG_ASSERT(0);
    4247                 : 
    4248               0 :     pagecache->global_cache_write++;
    4249               0 :     if (error)
    4250                 :     {
    4251               0 :       block->status|= PCBLOCK_ERROR;
    4252               0 :       block->error=   (int16) my_errno;
    4253               0 :       my_debug_put_break_here();
    4254               0 :       if (!*first_errno)
    4255               0 :         *first_errno= my_errno ? my_errno : -1;
    4256               0 :       rc|= PCFLUSH_ERROR;
    4257                 :     }
    4258                 : #ifdef THREAD
    4259                 :     /*
    4260                 :       Let to proceed for possible waiting requests to write to the block page.
    4261                 :       It might happen only during an operation to resize the key cache.
    4262                 :     */
    4263               0 :     if (block->wqueue[COND_FOR_SAVED].last_thread)
    4264               0 :       wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
    4265                 : #endif
    4266                 :     /* type will never be FLUSH_IGNORE_CHANGED here */
    4267               0 :     if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
    4268                 :            type == FLUSH_FORCE_WRITE))
    4269                 :     {
    4270               0 :       pagecache->blocks_changed--;
    4271               0 :       pagecache->global_blocks_changed--;
    4272               0 :       free_block(pagecache, block);
    4273                 :     }
    4274                 :     else
    4275                 :     {
    4276               0 :       block->status&= ~PCBLOCK_IN_FLUSH;
    4277               0 :       link_to_file_list(pagecache, block, file, 1);
    4278               0 :       unreg_request(pagecache, block, 1);
    4279                 :     }
    4280                 :   }
    4281               0 :   DBUG_RETURN(rc);
    4282                 : }
    4283                 : 
    4284                 : 
    4285                 : /**
    4286                 :    @brief flush all blocks for a file to disk but don't do any mutex locks
    4287                 : 
    4288                 :    @param  pagecache       pointer to a pagecache data structure
    4289                 :    @param  file            handler for the file to flush to
    4290                 :    @param  flush_type      type of the flush
    4291                 :    @param  filter          optional function which tells what blocks to flush;
    4292                 :                            can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
    4293                 :                            or FLUSH_FORCE_WRITE.
    4294                 :    @param  filter_arg      an argument to pass to 'filter'. Information about
    4295                 :                            the block will be passed too.
    4296                 : 
    4297                 :    @note
    4298                 :      Flushes all blocks having the same OS file descriptor as 'file->file', so
    4299                 :      can flush blocks having '*block->hash_link->file' != '*file'.
    4300                 : 
    4301                 :    @note
    4302                 :      This function doesn't do any mutex locks because it needs to be called
    4303                 :      both from flush_pagecache_blocks and flush_all_key_blocks (the later one
    4304                 :      does the mutex lock in the resize_pagecache() function).
    4305                 : 
    4306                 :    @note
    4307                 :      This function can cause problems if two threads call it
    4308                 :      concurrently on the same file (look for "PageCacheFlushConcurrencyBugs"
    4309                 :      in ma_checkpoint.c); to avoid them, it has internal logic to serialize in
    4310                 :      this situation.
    4311                 : 
    4312                 :    @return Operation status
    4313                 :    @retval PCFLUSH_OK OK
    4314                 :    @retval PCFLUSH_ERROR There was errors during the flush process.
    4315                 :    @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
    4316                 :    @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
    4317                 : */
    4318                 : 
    4319                 : static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
    4320                 :                                       PAGECACHE_FILE *file,
    4321                 :                                       enum flush_type type,
    4322                 :                                       PAGECACHE_FLUSH_FILTER filter,
    4323                 :                                       void *filter_arg)
    4324              40 : {
    4325                 :   PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
    4326              40 :   int last_errno= 0;
    4327              40 :   int rc= PCFLUSH_OK;
    4328              40 :   DBUG_ENTER("flush_pagecache_blocks_int");
    4329              40 :   DBUG_PRINT("enter",
    4330                 :              ("fd: %d  blocks_used: %lu  blocks_changed: %lu  type: %d",
    4331                 :               file->file, pagecache->blocks_used, pagecache->blocks_changed,
    4332                 :               type));
    4333                 : 
    4334                 : #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
    4335              40 :     DBUG_EXECUTE("check_pagecache",
    4336                 :                  test_key_cache(pagecache,
    4337                 :                                 "start of flush_pagecache_blocks", 0););
    4338                 : #endif
    4339                 : 
    4340              40 :   cache= cache_buff;
    4341              40 :   if (pagecache->disk_blocks > 0 &&
    4342                 :       (!my_disable_flush_pagecache_blocks ||
    4343                 :        (type != FLUSH_KEEP && type != FLUSH_KEEP_LAZY)))
    4344                 :   {
    4345                 :     /*
    4346                 :       Key cache exists. If my_disable_flush_pagecache_blocks is true it
    4347                 :       disables the operation but only FLUSH_KEEP[_LAZY]: other flushes still
    4348                 :       need to be allowed: FLUSH_RELEASE has to free blocks, and
    4349                 :       FLUSH_FORCE_WRITE is to overrule my_disable_flush_pagecache_blocks.
    4350                 :     */
    4351              40 :     int error= 0;
    4352              40 :     uint count= 0;
    4353                 :     PAGECACHE_BLOCK_LINK **pos, **end;
    4354              40 :     PAGECACHE_BLOCK_LINK *first_in_switch= NULL;
    4355                 :     PAGECACHE_BLOCK_LINK *block, *next;
    4356                 : #if defined(PAGECACHE_DEBUG)
    4357                 :     uint cnt= 0;
    4358                 : #endif
    4359                 : 
    4360                 : #ifdef THREAD
    4361                 :     struct st_file_in_flush us_flusher, *other_flusher;
    4362              40 :     us_flusher.file= file->file;
    4363              40 :     us_flusher.flush_queue.last_thread= NULL;
    4364              40 :     us_flusher.first_in_switch= FALSE;
    4365              80 :     while ((other_flusher= (struct st_file_in_flush *)
    4366                 :             hash_search(&pagecache->files_in_flush, (uchar *)&file->file,
    4367                 :                         sizeof(file->file))))
    4368                 :     {
    4369                 :       /*
    4370                 :         File is in flush already: wait, unless FLUSH_KEEP_LAZY. "Flusher"
    4371                 :         means "who can mark PCBLOCK_IN_FLUSH", i.e. caller of
    4372                 :         flush_pagecache_blocks_int().
    4373                 :       */
    4374                 :       struct st_my_thread_var *thread;
    4375               0 :       if (type == FLUSH_KEEP_LAZY)
    4376                 :       {
    4377               0 :         DBUG_PRINT("info",("FLUSH_KEEP_LAZY skips"));
    4378               0 :         DBUG_RETURN(0);
    4379                 :       }
    4380               0 :       thread= my_thread_var;
    4381               0 :       wqueue_add_to_queue(&other_flusher->flush_queue, thread);
    4382                 :       do
    4383                 :       {
    4384               0 :         KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait1",
    4385                 :                             ("suspend thread %ld", thread->id));
    4386               0 :         pagecache_pthread_cond_wait(&thread->suspend,
    4387                 :                                     &pagecache->cache_lock);
    4388                 :       }
    4389               0 :       while (thread->next);
    4390                 :     }
    4391                 :     /* we are the only flusher of this file now */
    4392              40 :     while (my_hash_insert(&pagecache->files_in_flush, (uchar *)&us_flusher))
    4393                 :     {
    4394                 :       /*
    4395                 :         Out of memory, wait for flushers to empty the hash and retry; should
    4396                 :         rarely happen. Other threads are flushing the file; when done, they
    4397                 :         are going to remove themselves from the hash, and thus memory will
    4398                 :         appear again. However, this memory may be stolen by yet another thread
    4399                 :         (for a purpose unrelated to page cache), before we retry
    4400                 :         hash_insert(). So the loop may run for long. Only if the thread was
    4401                 :         killed do we abort the loop, returning 1 (error) which can cause the
    4402                 :         table to be marked as corrupted (cf maria_chk_size(), maria_close())
    4403                 :         and thus require a table check.
    4404                 :       */
    4405               0 :       DBUG_ASSERT(0);
    4406                 :       pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    4407                 :       if (my_thread_var->abort)
    4408                 :         DBUG_RETURN(1);         /* End if aborted by user */
    4409                 :       sleep(10);
    4410                 :       pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    4411                 :     }
    4412                 : #endif
    4413                 : 
    4414              40 :     if (type != FLUSH_IGNORE_CHANGED)
    4415                 :     {
    4416                 :       /*
    4417                 :         Count how many key blocks we have to cache to be able
    4418                 :         to flush all dirty pages with minimum seek moves.
    4419                 :       */
    4420              22 :       for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
    4421              44 :            block;
    4422               0 :            block= block->next_changed)
    4423                 :       {
    4424               0 :         if (block->hash_link->file.file == file->file)
    4425                 :         {
    4426               0 :           count++;
    4427               0 :           KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used);
    4428                 :         }
    4429                 :       }
    4430                 :       /* Allocate a new buffer only if its bigger than the one we have */
    4431              22 :       if (count > FLUSH_CACHE &&
    4432                 :           !(cache=
    4433                 :             (PAGECACHE_BLOCK_LINK**)
    4434                 :             my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0))))
    4435                 :       {
    4436               0 :         cache= cache_buff;
    4437               0 :         count= FLUSH_CACHE;
    4438                 :       }
    4439                 :     }
    4440                 : 
    4441                 :     /* Retrieve the blocks and write them to a buffer to be flushed */
    4442              40 : restart:
    4443              40 :     end= (pos= cache)+count;
    4444              40 :     for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
    4445             125 :          block;
    4446              45 :          block= next)
    4447                 :     {
    4448                 : #if defined(PAGECACHE_DEBUG)
    4449                 :       cnt++;
    4450                 :       KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
    4451                 : #endif
    4452              45 :       next= block->next_changed;
    4453              45 :       if (block->hash_link->file.file != file->file)
    4454              45 :         continue;
    4455              45 :       if (filter != NULL)
    4456                 :       {
    4457                 :         int filter_res= (*filter)(block->type, block->hash_link->pageno,
    4458               0 :                                   block->rec_lsn, filter_arg);
    4459               0 :         DBUG_PRINT("info",("filter returned %d", filter_res));
    4460               0 :         if (filter_res == FLUSH_FILTER_SKIP_TRY_NEXT)
    4461               0 :           continue;
    4462               0 :         if (filter_res == FLUSH_FILTER_SKIP_ALL)
    4463               0 :           break;
    4464               0 :         DBUG_ASSERT(filter_res == FLUSH_FILTER_OK);
    4465                 :       }
    4466                 :       {
    4467                 :         /*
    4468                 :            Mark the block with BLOCK_IN_FLUSH in order not to let
    4469                 :            other threads to use it for new pages and interfere with
    4470                 :            our sequence of flushing dirty file pages
    4471                 :         */
    4472              45 :         block->status|= PCBLOCK_IN_FLUSH;
    4473                 : 
    4474              45 :         if (! (block->status & PCBLOCK_IN_SWITCH))
    4475                 :         {
    4476                 :           /*
    4477                 :             We care only for the blocks for which flushing was not
    4478                 :             initiated by other threads as a result of page swapping
    4479                 :           */
    4480              45 :           reg_requests(pagecache, block, 1);
    4481              45 :           if (type != FLUSH_IGNORE_CHANGED)
    4482                 :           {
    4483                 :             /* It's not a temporary file */
    4484               0 :             if (pos == end)
    4485                 :             {
    4486                 :               /*
    4487                 :                 This happens only if there is not enough
    4488                 :                 memory for the big block
    4489                 :               */
    4490               0 :               if ((rc|= flush_cached_blocks(pagecache, file, cache,
    4491                 :                                             end, type, &error)) &
    4492                 :                   (PCFLUSH_ERROR | PCFLUSH_PINNED))
    4493               0 :                 last_errno=error;
    4494               0 :               DBUG_PRINT("info", ("restarting..."));
    4495                 :               /*
    4496                 :                 Restart the scan as some other thread might have changed
    4497                 :                 the changed blocks chain: the blocks that were in switch
    4498                 :                 state before the flush started have to be excluded
    4499                 :               */
    4500               0 :               goto restart;
    4501                 :             }
    4502               0 :             *pos++= block;
    4503                 :           }
    4504                 :           else
    4505                 :           {
    4506                 :             /* It's a temporary file */
    4507              45 :             pagecache->blocks_changed--;
    4508              45 :             pagecache->global_blocks_changed--;
    4509              45 :             free_block(pagecache, block);
    4510                 :           }
    4511                 :         }
    4512               0 :         else if (type != FLUSH_KEEP_LAZY)
    4513                 :         {
    4514                 :           /*
    4515                 :             Link the block into a list of blocks 'in switch', and then we will
    4516                 :             wait for this list to be empty, which means they have been flushed
    4517                 :           */
    4518               0 :           unlink_changed(block);
    4519               0 :           link_changed(block, &first_in_switch);
    4520               0 :           us_flusher.first_in_switch= TRUE;
    4521                 :         }
    4522                 :       }
    4523                 :     }
    4524              40 :     if (pos != cache)
    4525                 :     {
    4526               0 :       if ((rc|= flush_cached_blocks(pagecache, file, cache, pos, type,
    4527                 :                                     &error)) &
    4528                 :           (PCFLUSH_ERROR | PCFLUSH_PINNED))
    4529               0 :         last_errno= error;
    4530                 :     }
    4531                 :     /* Wait until list of blocks in switch is empty */
    4532              40 :     while (first_in_switch)
    4533                 :     {
    4534                 : #if defined(PAGECACHE_DEBUG)
    4535                 :       cnt= 0;
    4536                 : #endif
    4537               0 :       block= first_in_switch;
    4538                 :       {
    4539                 : #ifdef THREAD
    4540               0 :         struct st_my_thread_var *thread= my_thread_var;
    4541               0 :         wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
    4542                 :         do
    4543                 :         {
    4544               0 :           KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait2",
    4545                 :                               ("suspend thread %ld", thread->id));
    4546               0 :           pagecache_pthread_cond_wait(&thread->suspend,
    4547                 :                                      &pagecache->cache_lock);
    4548                 :         }
    4549               0 :         while (thread->next);
    4550                 : #else
    4551                 :         KEYCACHE_DBUG_ASSERT(0);
    4552                 :         /* No parallel requests in single-threaded case */
    4553                 : #endif
    4554                 :       }
    4555                 : #if defined(PAGECACHE_DEBUG)
    4556                 :       cnt++;
    4557                 :       KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
    4558                 : #endif
    4559                 :     }
    4560              40 :     us_flusher.first_in_switch= FALSE;
    4561                 :     /* The following happens very seldom */
    4562              40 :     if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
    4563                 :            type == FLUSH_FORCE_WRITE))
    4564                 :     {
    4565                 :       /*
    4566                 :         this code would free all blocks while filter maybe handled only a
    4567                 :         few, that is not possible.
    4568                 :       */
    4569              40 :       DBUG_ASSERT(filter == NULL);
    4570                 : #if defined(PAGECACHE_DEBUG)
    4571                 :       cnt=0;
    4572                 : #endif
    4573              40 :       for (block= pagecache->file_blocks[FILE_HASH(*file)] ;
    4574              84 :            block;
    4575               4 :            block= next)
    4576                 :       {
    4577                 : #if defined(PAGECACHE_DEBUG)
    4578                 :         cnt++;
    4579                 :         KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
    4580                 : #endif
    4581               4 :         next= block->next_changed;
    4582               4 :         if (block->hash_link->file.file == file->file &&
    4583                 :             (! (block->status & PCBLOCK_CHANGED)
    4584                 :              || type == FLUSH_IGNORE_CHANGED))
    4585                 :         {
    4586               4 :           reg_requests(pagecache, block, 1);
    4587               4 :           free_block(pagecache, block);
    4588                 :         }
    4589                 :       }
    4590                 :     }
    4591                 : #ifdef THREAD
    4592                 :     /* wake up others waiting to flush this file */
    4593              40 :     hash_delete(&pagecache->files_in_flush, (uchar *)&us_flusher);
    4594              40 :     if (us_flusher.flush_queue.last_thread)
    4595               0 :       wqueue_release_queue(&us_flusher.flush_queue);
    4596                 : #endif
    4597                 :   }
    4598                 : 
    4599                 : #ifndef DBUG_OFF
    4600              40 :   DBUG_EXECUTE("check_pagecache",
    4601                 :                test_key_cache(pagecache, "end of flush_pagecache_blocks", 0););
    4602                 : #endif
    4603              40 :   if (cache != cache_buff)
    4604               0 :     my_free(cache, MYF(0));
    4605              40 :   if (rc != 0)
    4606                 :   {
    4607               0 :     if (last_errno)
    4608               0 :       my_errno= last_errno;                /* Return first error */
    4609               0 :     DBUG_PRINT("error", ("Got error: %d", my_errno));
    4610                 :   }
    4611              40 :   DBUG_RETURN(rc);
    4612                 : }
    4613                 : 
    4614                 : 
    4615                 : /**
    4616                 :    @brief flush all blocks for a file to disk
    4617                 : 
    4618                 :    @param  pagecache       pointer to a pagecache data structure
    4619                 :    @param  file            handler for the file to flush to
    4620                 :    @param  flush_type      type of the flush
    4621                 :    @param  filter          optional function which tells what blocks to flush;
    4622                 :                            can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
    4623                 :                            or FLUSH_FORCE_WRITE.
    4624                 :    @param  filter_arg      an argument to pass to 'filter'. Information about
    4625                 :                            the block will be passed too.
    4626                 : 
    4627                 :    @return Operation status
    4628                 :    @retval PCFLUSH_OK OK
    4629                 :    @retval PCFLUSH_ERROR There was errors during the flush process.
    4630                 :    @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
    4631                 :    @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
    4632                 : */
    4633                 : 
    4634                 : int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache,
    4635                 :                                        PAGECACHE_FILE *file,
    4636                 :                                        enum flush_type type,
    4637                 :                                        PAGECACHE_FLUSH_FILTER filter,
    4638                 :                                        void *filter_arg)
    4639             264 : {
    4640                 :   int res;
    4641             264 :   DBUG_ENTER("flush_pagecache_blocks_with_filter");
    4642             264 :   DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache));
    4643                 : 
    4644             264 :   if (pagecache->disk_blocks <= 0)
    4645             224 :     DBUG_RETURN(0);
    4646              40 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    4647              40 :   inc_counter_for_resize_op(pagecache);
    4648              40 :   res= flush_pagecache_blocks_int(pagecache, file, type, filter, filter_arg);
    4649              40 :   dec_counter_for_resize_op(pagecache);
    4650              40 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    4651              40 :   DBUG_RETURN(res);
    4652                 : }
    4653                 : 
    4654                 : 
    4655                 : /*
    4656                 :   Reset the counters of a key cache.
    4657                 : 
    4658                 :   SYNOPSIS
    4659                 :     reset_pagecache_counters()
    4660                 :     name       the name of a key cache
    4661                 :     pagecache  pointer to the pagecache to be reset
    4662                 : 
    4663                 :   DESCRIPTION
    4664                 :     This procedure is used to reset the counters of all currently used key
    4665                 :     caches, both the default one and the named ones.
    4666                 : 
    4667                 :   RETURN
    4668                 :     0 on success (always because it can't fail)
    4669                 : */
    4670                 : 
    4671                 : int reset_pagecache_counters(const char *name __attribute__((unused)),
    4672                 :                              PAGECACHE *pagecache)
    4673               0 : {
    4674               0 :   DBUG_ENTER("reset_pagecache_counters");
    4675               0 :   if (!pagecache->inited)
    4676                 :   {
    4677               0 :     DBUG_PRINT("info", ("Key cache %s not initialized.", name));
    4678               0 :     DBUG_RETURN(0);
    4679                 :   }
    4680               0 :   DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
    4681                 : 
    4682               0 :   pagecache->global_blocks_changed= 0;   /* Key_blocks_not_flushed */
    4683               0 :   pagecache->global_cache_r_requests= 0; /* Key_read_requests */
    4684               0 :   pagecache->global_cache_read= 0;       /* Key_reads */
    4685               0 :   pagecache->global_cache_w_requests= 0; /* Key_write_requests */
    4686               0 :   pagecache->global_cache_write= 0;      /* Key_writes */
    4687               0 :   DBUG_RETURN(0);
    4688                 : }
    4689                 : 
    4690                 : 
    4691                 : /**
    4692                 :    @brief Allocates a buffer and stores in it some info about all dirty pages
    4693                 : 
    4694                 :    Does the allocation because the caller cannot know the size itself.
    4695                 :    Memory freeing is to be done by the caller (if the "str" member of the
    4696                 :    LEX_STRING is not NULL).
    4697                 :    Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they
    4698                 :    are not interesting for a checkpoint record.
    4699                 :    The caller has the intention of doing checkpoints.
    4700                 : 
    4701                 :    @param       pagecache   pointer to the page cache
    4702                 :    @param[out]  str         pointer to where the allocated buffer, and
    4703                 :                             its size, will be put
    4704                 :    @param[out]  min_rec_lsn pointer to where the minimum rec_lsn of all
    4705                 :                             relevant dirty pages will be put
    4706                 :    @return Operation status
    4707                 :      @retval 0      OK
    4708                 :      @retval 1      Error
    4709                 : */
    4710                 : 
    4711                 : my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
    4712                 :                                                   LEX_STRING *str,
    4713                 :                                                   LSN *min_rec_lsn)
    4714               4 : {
    4715               4 :   my_bool error= 0;
    4716               4 :   ulong stored_list_size= 0;
    4717                 :   uint file_hash;
    4718                 :   char *ptr;
    4719               4 :   LSN minimum_rec_lsn= LSN_MAX;
    4720               4 :   DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN");
    4721                 : 
    4722               4 :   DBUG_ASSERT(NULL == str->str);
    4723                 :   /*
    4724                 :     We lock the entire cache but will be quick, just reading/writing a few MBs
    4725                 :     of memory at most.
    4726                 :   */
    4727               4 :   pagecache_pthread_mutex_lock(&pagecache->cache_lock);
    4728                 : #ifdef THREAD
    4729                 :   for (;;)
    4730                 :   {
    4731                 :     struct st_file_in_flush *other_flusher;
    4732               4 :     for (file_hash= 0;
    4733               8 :          (other_flusher= (struct st_file_in_flush *)
    4734                 :           hash_element(&pagecache->files_in_flush, file_hash)) != NULL &&
    4735                 :            !other_flusher->first_in_switch;
    4736               0 :          file_hash++)
    4737                 :     {}
    4738               4 :     if (other_flusher == NULL)
    4739               0 :       break;
    4740                 :     /*
    4741                 :       other_flusher.first_in_switch is true: some thread is flushing a file
    4742                 :       and has removed dirty blocks from changed_blocks[] while they were still
    4743                 :       dirty (they were being evicted (=>flushed) by yet another thread, which
    4744                 :       may not have flushed the block yet so it may still be dirty).
    4745                 :       If Checkpoint proceeds now, it will not see the page. If there is a
    4746                 :       crash right after writing the checkpoint record, before the page is
    4747                 :       flushed, at recovery the page will be wrongly ignored because it won't
    4748                 :       be in the dirty pages list in the checkpoint record. So wait.
    4749                 :     */
    4750                 :     {
    4751               0 :       struct st_my_thread_var *thread= my_thread_var;
    4752               0 :       wqueue_add_to_queue(&other_flusher->flush_queue, thread);
    4753                 :       do
    4754                 :       {
    4755               0 :         KEYCACHE_DBUG_PRINT("pagecache_collect_changed_blocks_with_lsn: wait",
    4756                 :                             ("suspend thread %ld", thread->id));
    4757               0 :         pagecache_pthread_cond_wait(&thread->suspend,
    4758                 :                                     &pagecache->cache_lock);
    4759                 :       }
    4760               0 :       while (thread->next);
    4761                 :     }
    4762                 :   }
    4763                 : #endif
    4764                 : 
    4765                 :   /* Count how many dirty pages are interesting */
    4766             516 :   for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
    4767                 :   {
    4768                 :     PAGECACHE_BLOCK_LINK *block;
    4769             512 :     for (block= pagecache->changed_blocks[file_hash] ;
    4770            1024 :          block;
    4771               0 :          block= block->next_changed)
    4772                 :     {
    4773                 :       /*
    4774                 :         Q: is there something subtle with block->hash_link: can it be NULL?
    4775                 :         does it have to be == hash_link->block... ?
    4776                 :       */
    4777               0 :       DBUG_ASSERT(block->hash_link != NULL);
    4778               0 :       DBUG_ASSERT(block->status & PCBLOCK_CHANGED);
    4779                 :       /*
    4780                 :         Note that we don't store bitmap pages, or pages from non-transactional
    4781                 :         (like temporary) tables. Don't checkpoint during Recovery which uses
    4782                 :         PAGECACHE_PLAIN_PAGE.
    4783                 :       */
    4784               0 :       if (block->type != PAGECACHE_LSN_PAGE)
    4785               0 :         continue; /* no need to store it */
    4786               0 :       stored_list_size++;
    4787                 :     }
    4788                 :   }
    4789                 : 
    4790                 :   compile_time_assert(sizeof(pagecache->blocks) <= 8);
    4791               4 :   str->length= 8 + /* number of dirty pages */
    4792                 :     (2 + /* table id */
    4793                 :      1 + /* data or index file */
    4794                 :      5 + /* pageno */
    4795                 :      LSN_STORE_SIZE /* rec_lsn */
    4796                 :      ) * stored_list_size;
    4797               4 :   if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME))))
    4798               4 :     goto err;
    4799               4 :   ptr= str->str;
    4800               4 :   int8store(ptr, (ulonglong)stored_list_size);
    4801               4 :   ptr+= 8;
    4802               4 :   DBUG_PRINT("info", ("found %lu dirty pages", stored_list_size));
    4803               4 :   if (stored_list_size == 0)
    4804               0 :     goto end;
    4805               0 :   for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
    4806                 :   {
    4807                 :     PAGECACHE_BLOCK_LINK *block;
    4808               0 :     for (block= pagecache->changed_blocks[file_hash] ;
    4809               0 :          block;
    4810               0 :          block= block->next_changed)
    4811                 :     {
    4812                 :       uint16 table_id;
    4813                 :       MARIA_SHARE *share;
    4814               0 :       if (block->type != PAGECACHE_LSN_PAGE)
    4815               0 :         continue; /* no need to store it in the checkpoint record */
    4816               0 :       share= (MARIA_SHARE *)(block->hash_link->file.callback_data);
    4817               0 :       table_id= share->id;
    4818               0 :       int2store(ptr, table_id);
    4819               0 :       ptr+= 2;
    4820               0 :       ptr[0]= (share->kfile.file == block->hash_link->file.file);
    4821               0 :       ptr++;
    4822               0 :       DBUG_ASSERT(block->hash_link->pageno < ((ULL(1)) << 40));
    4823               0 :       page_store(ptr, block->hash_link->pageno);
    4824               0 :       ptr+= PAGE_STORE_SIZE;
    4825               0 :       lsn_store(ptr, block->rec_lsn);
    4826               0 :       ptr+= LSN_STORE_SIZE;
    4827               0 :       if (block->rec_lsn != LSN_MAX)
    4828                 :       {
    4829               0 :         DBUG_ASSERT(LSN_VALID(block->rec_lsn));
    4830               0 :         if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0)
    4831               0 :           minimum_rec_lsn= block->rec_lsn;
    4832                 :       } /* otherwise, some trn->rec_lsn should hold the correct info */
    4833                 :     }
    4834                 :   }
    4835               4 : end:
    4836               4 :   pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
    4837               4 :   *min_rec_lsn= minimum_rec_lsn;
    4838               4 :   DBUG_RETURN(error);
    4839                 : 
    4840               0 : err:
    4841               0 :   error= 1;
    4842               0 :   goto end;
    4843                 : }
    4844                 : 
    4845                 : 
    4846                 : #ifndef DBUG_OFF
    4847                 : 
    4848                 : /**
    4849                 :   Verifies that a file has no dirty pages.
    4850                 : */
    4851                 : 
    4852                 : void pagecache_file_no_dirty_page(PAGECACHE *pagecache, PAGECACHE_FILE *file)
    4853               0 : {
    4854               0 :   File fd= file->file;
    4855                 :   PAGECACHE_BLOCK_LINK *block;
    4856               0 :   for (block= pagecache->changed_blocks[FILE_HASH(*file)];
    4857               0 :        block != NULL;
    4858               0 :        block= block->next_changed)
    4859               0 :     if (block->hash_link->file.file == fd)
    4860                 :     {
    4861               0 :       DBUG_PRINT("info", ("pagecache_file_not_in error"));
    4862               0 :       PCBLOCK_INFO(block);
    4863               0 :       DBUG_ASSERT(0);
    4864                 :     }
    4865                 : }
    4866                 : 
    4867                 : 
    4868                 : /*
    4869                 :   Test if disk-cache is ok
    4870                 : */
    4871                 : static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)),
    4872                 :                            const char *where __attribute__((unused)),
    4873                 :                            my_bool lock __attribute__((unused)))
    4874               0 : {
    4875                 :   /* TODO */
    4876                 : }
    4877                 : #endif
    4878                 : 
    4879                 : uchar *pagecache_block_link_to_buffer(PAGECACHE_BLOCK_LINK *block)
    4880               0 : {
    4881               0 :   return block->buffer;
    4882                 : }
    4883                 : 
    4884                 : #if defined(PAGECACHE_TIMEOUT)
    4885                 : 
    4886                 : #define KEYCACHE_DUMP_FILE  "pagecache_dump.txt"
    4887                 : #define MAX_QUEUE_LEN  100
    4888                 : 
    4889                 : 
    4890                 : static void pagecache_dump(PAGECACHE *pagecache)
    4891                 : {
    4892                 :   FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
    4893                 :   struct st_my_thread_var *last;
    4894                 :   struct st_my_thread_var *thread;
    4895                 :   PAGECACHE_BLOCK_LINK *block;
    4896                 :   PAGECACHE_HASH_LINK *hash_link;
    4897                 :   PAGECACHE_PAGE *page;
    4898                 :   uint i;
    4899                 : 
    4900                 :   fprintf(pagecache_dump_file, "thread:%u\n", thread->id);
    4901                 : 
    4902                 :   i=0;
    4903                 :   thread=last=waiting_for_hash_link.last_thread;
    4904                 :   fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n");
    4905                 :   if (thread)
    4906                 :     do
    4907                 :     {
    4908                 :       thread= thread->next;
    4909                 :       page= (PAGECACHE_PAGE *) thread->opt_info;
    4910                 :       fprintf(pagecache_dump_file,
    4911                 :               "thread:%u, (file,pageno)=(%u,%lu)\n",
    4912                 :               thread->id,(uint) page->file.file,(ulong) page->pageno);
    4913                 :       if (++i == MAX_QUEUE_LEN)
    4914                 :         break;
    4915                 :     }
    4916                 :     while (thread != last);
    4917                 : 
    4918                 :   i=0;
    4919                 :   thread=last=waiting_for_block.last_thread;
    4920                 :   fprintf(pagecache_dump_file, "queue of threads waiting for block\n");
    4921                 :   if (thread)
    4922                 :     do
    4923                 :     {
    4924                 :       thread=thread->next;
    4925                 :       hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info;
    4926                 :       fprintf(pagecache_dump_file,
    4927                 :         "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n",
    4928                 :         thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
    4929                 :         (uint) hash_link->file.file,(ulong) hash_link->pageno);
    4930                 :       if (++i == MAX_QUEUE_LEN)
    4931                 :         break;
    4932                 :     }
    4933                 :     while (thread != last);
    4934                 : 
    4935                 :   for (i=0 ; i < pagecache->blocks_used ; i++)
    4936                 :   {
    4937                 :     int j;
    4938                 :     block= &pagecache->block_root[i];
    4939                 :     hash_link= block->hash_link;
    4940                 :     fprintf(pagecache_dump_file,
    4941                 :             "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
    4942                 :             i, (int) (hash_link ?
    4943                 :                       PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) :
    4944                 :                       -1),
    4945                 :             block->status, block->requests, block->condvar ? 1 : 0);
    4946                 :     for (j=0 ; j < COND_SIZE; j++)
    4947                 :     {
    4948                 :       PAGECACHE_WQUEUE *wqueue=&block->wqueue[j];
    4949                 :       thread= last= wqueue->last_thread;
    4950                 :       fprintf(pagecache_dump_file, "queue #%d\n", j);
    4951                 :       if (thread)
    4952                 :       {
    4953                 :         do
    4954                 :         {
    4955                 :           thread=thread->next;
    4956                 :           fprintf(pagecache_dump_file,
    4957                 :                   "thread:%u\n", thread->id);
    4958                 :           if (++i == MAX_QUEUE_LEN)
    4959                 :             break;
    4960                 :         }
    4961                 :         while (thread != last);
    4962                 :       }
    4963                 :     }
    4964                 :   }
    4965                 :   fprintf(pagecache_dump_file, "LRU chain:");
    4966                 :   block= pagecache= used_last;
    4967                 :   if (block)
    4968                 :   {
    4969                 :     do
    4970                 :     {
    4971                 :       block= block->next_used;
    4972                 :       fprintf(pagecache_dump_file,
    4973                 :               "block:%u, ", PCBLOCK_NUMBER(pagecache, block));
    4974                 :     }
    4975                 :     while (block != pagecache->used_last);
    4976                 :   }
    4977                 :   fprintf(pagecache_dump_file, "\n");
    4978                 : 
    4979                 :   fclose(pagecache_dump_file);
    4980                 : }
    4981                 : 
    4982                 : #endif /* defined(PAGECACHE_TIMEOUT) */
    4983                 : 
    4984                 : #if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)
    4985                 : 
    4986                 : 
    4987                 : static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
    4988                 :                                       pthread_mutex_t *mutex)
    4989                 : {
    4990                 :   int rc;
    4991                 :   struct timeval  now;            /* time when we started waiting        */
    4992                 :   struct timespec timeout;        /* timeout value for the wait function */
    4993                 :   struct timezone tz;
    4994                 : #if defined(PAGECACHE_DEBUG)
    4995                 :   int cnt=0;
    4996                 : #endif
    4997                 : 
    4998                 :   /* Get current time */
    4999                 :   gettimeofday(&now, &tz);
    5000                 :   /* Prepare timeout value */
    5001                 :   timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT;
    5002                 :  /*
    5003                 :    timeval uses microseconds.
    5004                 :    timespec uses nanoseconds.
    5005                 :    1 nanosecond = 1000 micro seconds
    5006                 :  */
    5007                 :   timeout.tv_nsec= now.tv_usec * 1000;
    5008                 :   KEYCACHE_THREAD_TRACE_END("started waiting");
    5009                 : #if defined(PAGECACHE_DEBUG)
    5010                 :   cnt++;
    5011                 :   if (cnt % 100 == 0)
    5012                 :     fprintf(pagecache_debug_log, "waiting...\n");
    5013                 :     fflush(pagecache_debug_log);
    5014                 : #endif
    5015                 :   rc= pthread_cond_timedwait(cond, mutex, &timeout);
    5016                 :   KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
    5017                 :   if (rc == ETIMEDOUT || rc == ETIME)
    5018                 :   {
    5019                 : #if defined(PAGECACHE_DEBUG)
    5020                 :     fprintf(pagecache_debug_log,"aborted by pagecache timeout\n");
    5021                 :     fclose(pagecache_debug_log);
    5022                 :     abort();
    5023                 : #endif
    5024                 :     pagecache_dump();
    5025                 :   }
    5026                 : 
    5027                 : #if defined(PAGECACHE_DEBUG)
    5028                 :   KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
    5029                 : #else
    5030                 :   assert(rc != ETIMEDOUT);
    5031                 : #endif
    5032                 :   return rc;
    5033                 : }
    5034                 : #else
    5035                 : #if defined(PAGECACHE_DEBUG)
    5036                 : static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
    5037                 :                                       pthread_mutex_t *mutex)
    5038                 : {
    5039                 :   int rc;
    5040                 :   KEYCACHE_THREAD_TRACE_END("started waiting");
    5041                 :   rc= pthread_cond_wait(cond, mutex);
    5042                 :   KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
    5043                 :   return rc;
    5044                 : }
    5045                 : #endif
    5046                 : #endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
    5047                 : 
    5048                 : #if defined(PAGECACHE_DEBUG)
    5049                 : static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex)
    5050                 : {
    5051                 :   int rc;
    5052                 :   rc= pthread_mutex_lock(mutex);
    5053                 :   KEYCACHE_THREAD_TRACE_BEGIN("");
    5054                 :   return rc;
    5055                 : }
    5056                 : 
    5057                 : 
    5058                 : static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex)
    5059                 : {
    5060                 :   KEYCACHE_THREAD_TRACE_END("");
    5061                 :   pthread_mutex_unlock(mutex);
    5062                 : }
    5063                 : 
    5064                 : 
    5065                 : static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond)
    5066                 : {
    5067                 :   int rc;
    5068                 :   KEYCACHE_THREAD_TRACE("signal");
    5069                 :   rc= pthread_cond_signal(cond);
    5070                 :   return rc;
    5071                 : }
    5072                 : 
    5073                 : 
    5074                 : #if defined(PAGECACHE_DEBUG_LOG)
    5075                 : 
    5076                 : 
    5077                 : static void pagecache_debug_print(const char * fmt, ...)
    5078                 : {
    5079                 :   va_list args;
    5080                 :   va_start(args,fmt);
    5081                 :   if (pagecache_debug_log)
    5082                 :   {
    5083                 :     VOID(vfprintf(pagecache_debug_log, fmt, args));
    5084                 :     VOID(fputc('\n',pagecache_debug_log));
    5085                 :   }
    5086                 :   va_end(args);
    5087                 : }
    5088                 : #endif /* defined(PAGECACHE_DEBUG_LOG) */
    5089                 : 
    5090                 : #if defined(PAGECACHE_DEBUG_LOG)
    5091                 : 
    5092                 : 
    5093                 : void pagecache_debug_log_close(void)
    5094                 : {
    5095                 :   if (pagecache_debug_log)
    5096                 :     fclose(pagecache_debug_log);
    5097                 : }
    5098                 : #endif /* defined(PAGECACHE_DEBUG_LOG) */
    5099                 : 
    5100                 : #endif /* defined(PAGECACHE_DEBUG) */

Generated by: LTP GCOV extension version 1.4