1 : /* Copyright (C) 2007 Michael Widenius
2 :
3 : This program is free software; you can redistribute it and/or modify
4 : it under the terms of the GNU General Public License as published by
5 : the Free Software Foundation; version 2 of the License.
6 :
7 : This program is distributed in the hope that it will be useful,
8 : but WITHOUT ANY WARRANTY; without even the implied warranty of
9 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 : GNU General Public License for more details.
11 :
12 : You should have received a copy of the GNU General Public License
13 : along with this program; if not, write to the Free Software
14 : Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
15 :
16 : /*
17 : Bitmap handling (for records in block)
18 :
19 : The data file starts with a bitmap page, followed by as many data
20 : pages as the bitmap can cover. After this there is a new bitmap page
21 : and more data pages etc.
22 :
23 : The bitmap code assumes there is always an active bitmap page and thus
24 : that there is at least one bitmap page in the file
25 :
26 : Structure of bitmap page:
27 :
28 : Fixed size records (to be implemented later):
29 :
30 : 2 bits are used to indicate:
31 :
32 : 0 Empty
33 : 1 0-75 % full (at least room for 2 records)
34 : 2 75-100 % full (at least room for one record)
35 : 3 100 % full (no more room for records)
36 :
37 : Assuming 8K pages, this will allow us to map:
38 : 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M
39 :
40 : (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN)
41 :
42 : Note that for fixed size rows, we can't add more columns without doing
43 : a full reorganization of the table. The user can always force a dynamic
44 : size row format by specifying ROW_FORMAT=dynamic.
45 :
46 :
47 : Dynamic size records:
48 :
49 : 3 bits are used to indicate Bytes free in 8K page
50 :
51 : 0 Empty page 8176 (head or tail)
52 : 1 0-30 % full (at least room for 3 records) 5724
53 : 2 30-60 % full (at least room for 2 records) 3271
54 : 3 60-90 % full (at least room for one record) 818
55 : 4 100 % full (no more room for records) 0
56 : 5 Tail page, 0-40 % full 4906
57 : 6 Tail page, 40-80 % full 1636
58 : 7 Full tail page or full blob page 0
59 :
60 : Assuming 8K pages, this will allow us to map:
61 : 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M
62 :
63 : Note that values 1-3 may be adjust for each individual table based on
64 : 'min record length'. Tail pages are for overflow data which can be of
65 : any size and thus doesn't have to be adjusted for different tables.
66 : If we add more columns to the table, some of the originally calculated
67 : 'cut off' points may not be optimal, but they shouldn't be 'drasticly
68 : wrong'.
69 :
70 : When allocating data from the bitmap, we are trying to do it in a
71 : 'best fit' manner. Blobs and varchar blocks are given out in large
72 : continuous extents to allow fast access to these. Before allowing a
73 : row to 'flow over' to other blocks, we will compact the page and use
74 : all space on it. If there is many rows in the page, we will ensure
75 : there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other
76 : rows to grow.
77 :
78 : The bitmap format allows us to extend the row file in big chunks, if needed.
79 :
80 : When calculating the size for a packed row, we will calculate the following
81 : things separately:
82 : - Row header + null_bits + empty_bits fixed size segments etc.
83 : - Size of all char/varchar fields
84 : - Size of each blob field
85 :
86 : The bitmap handler will get all the above information and return
87 : either one page or a set of pages to put the different parts.
88 :
89 : Bitmaps are read on demand in response to insert/delete/update operations.
90 : The following bitmap pointers will be cached and stored on disk on close:
91 : - Current insert_bitmap; When inserting new data we will first try to
92 : fill this one.
93 : - First bitmap which is not completely full. This is updated when we
94 : free data with an update or delete.
95 :
96 : While flushing out bitmaps, we will cache the status of the bitmap in memory
97 : to avoid having to read a bitmap for insert of new data that will not
98 : be of any use
99 : - Total empty space
100 : - Largest number of continuous pages
101 :
102 : Bitmap ONLY goes to disk in the following scenarios
103 : - The file is closed (and we flush all changes to disk)
104 : - On checkpoint
105 : (Ie: When we do a checkpoint, we have to ensure that all bitmaps are
106 : put on disk even if they are not in the page cache).
107 : - When explicitely requested (for example on backup or after recvoery,
108 : to simplify things)
109 :
110 : The flow of writing a row is that:
111 : - Lock the bitmap
112 : - Decide which data pages we will write to
113 : - Mark them full in the bitmap page so that other threads do not try to
114 : use the same data pages as us
115 : - We unlock the bitmap
116 : - Write the data pages
117 : - Lock the bitmap
118 : - Correct the bitmap page with the true final occupation of the data
119 : pages (that is, we marked pages full but when we are done we realize
120 : we didn't fill them)
121 : - Unlock the bitmap.
122 : */
123 :
124 : #include "maria_def.h"
125 : #include "ma_blockrec.h"
126 :
127 : #define FULL_HEAD_PAGE 4
128 : #define FULL_TAIL_PAGE 7
129 :
130 : /*#define WRONG_BITMAP_FLUSH 1*/ /*define only for provoking bugs*/
131 : #undef WRONG_BITMAP_FLUSH
132 :
133 : static my_bool _ma_read_bitmap_page(MARIA_HA *info,
134 : MARIA_FILE_BITMAP *bitmap,
135 : pgcache_page_no_t page);
136 : static my_bool _ma_bitmap_create_missing(MARIA_HA *info,
137 : MARIA_FILE_BITMAP *bitmap,
138 : pgcache_page_no_t page);
139 :
140 : /* Write bitmap page to key cache */
141 :
142 : static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
143 : MARIA_FILE_BITMAP *bitmap)
144 885 : {
145 885 : DBUG_ENTER("write_changed_bitmap");
146 885 : DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
147 885 : DBUG_ASSERT(bitmap->file.write_callback != 0);
148 885 : DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
149 :
150 885 : if ((bitmap->non_flushable == 0)
151 : #ifdef WRONG_BITMAP_FLUSH
152 : || 1
153 : #endif
154 : )
155 : {
156 885 : my_bool res= pagecache_write(share->pagecache,
157 : &bitmap->file, bitmap->page, 0,
158 : bitmap->map, PAGECACHE_PLAIN_PAGE,
159 : PAGECACHE_LOCK_LEFT_UNLOCKED,
160 : PAGECACHE_PIN_LEFT_UNPINNED,
161 : PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE);
162 885 : DBUG_RETURN(res);
163 : }
164 : else
165 : {
166 : MARIA_PINNED_PAGE page_link;
167 0 : int res= pagecache_write(share->pagecache,
168 : &bitmap->file, bitmap->page, 0,
169 : bitmap->map, PAGECACHE_PLAIN_PAGE,
170 : PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN,
171 : PAGECACHE_WRITE_DELAY, &page_link.link,
172 : LSN_IMPOSSIBLE);
173 0 : page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED;
174 0 : page_link.changed= 1;
175 0 : push_dynamic(&bitmap->pinned_pages, (void*) &page_link);
176 0 : DBUG_RETURN(res);
177 : }
178 : }
179 :
180 : /*
181 : Initialize bitmap variables in share
182 :
183 : SYNOPSIS
184 : _ma_bitmap_init()
185 : share Share handler
186 : file data file handler
187 :
188 : NOTES
189 : This is called the first time a file is opened.
190 :
191 : RETURN
192 : 0 ok
193 : 1 error
194 : */
195 :
196 : my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
197 3113 : {
198 : uint aligned_bit_blocks;
199 : uint max_page_size;
200 3113 : MARIA_FILE_BITMAP *bitmap= &share->bitmap;
201 3113 : uint size= share->block_size;
202 : #ifndef DBUG_OFF
203 : /* We want to have a copy of the bitmap to be able to print differences */
204 3113 : size*= 2;
205 : #endif
206 :
207 3113 : if (((bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))) == NULL) ||
208 : my_init_dynamic_array(&bitmap->pinned_pages,
209 : sizeof(MARIA_PINNED_PAGE), 1, 1))
210 0 : return 1;
211 :
212 3113 : bitmap->block_size= share->block_size;
213 3113 : bitmap->file.file= file;
214 3113 : _ma_bitmap_set_pagecache_callbacks(&bitmap->file, share);
215 :
216 : /* Size needs to be aligned on 6 */
217 3113 : aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6;
218 3113 : bitmap->total_size= aligned_bit_blocks * 6;
219 : /*
220 : In each 6 bytes, we have 6*8/3 = 16 pages covered
221 : The +1 is to add the bitmap page, as this doesn't have to be covered
222 : */
223 3113 : bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
224 3113 : bitmap->flush_all_requested= bitmap->non_flushable= 0;
225 :
226 : /* Update size for bits */
227 : /* TODO; Make this dependent of the row size */
228 3113 : max_page_size= share->block_size - PAGE_OVERHEAD_SIZE + DIR_ENTRY_SIZE;
229 3113 : bitmap->sizes[0]= max_page_size; /* Empty page */
230 3113 : bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100;
231 3113 : bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100;
232 3113 : bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100;
233 3113 : bitmap->sizes[4]= 0; /* Full page */
234 3113 : bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100;
235 3113 : bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100;
236 3113 : bitmap->sizes[7]= 0;
237 :
238 3113 : pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW);
239 3113 : pthread_cond_init(&share->bitmap.bitmap_cond, 0);
240 :
241 3113 : _ma_bitmap_reset_cache(share);
242 :
243 3113 : if (share->state.first_bitmap_with_space == ~(pgcache_page_no_t) 0)
244 : {
245 : /* Start scanning for free space from start of file */
246 32 : share->state.first_bitmap_with_space = 0;
247 : }
248 3113 : return 0;
249 : }
250 :
251 :
252 : /*
253 : Free data allocated by _ma_bitmap_init
254 :
255 : SYNOPSIS
256 : _ma_bitmap_end()
257 : share Share handler
258 : */
259 :
260 : my_bool _ma_bitmap_end(MARIA_SHARE *share)
261 3012 : {
262 3012 : my_bool res= _ma_bitmap_flush(share);
263 3012 : safe_mutex_assert_owner(&share->close_lock);
264 3012 : pthread_mutex_destroy(&share->bitmap.bitmap_lock);
265 3012 : pthread_cond_destroy(&share->bitmap.bitmap_cond);
266 3012 : delete_dynamic(&share->bitmap.pinned_pages);
267 3012 : my_free(share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR));
268 3012 : share->bitmap.map= 0;
269 3012 : return res;
270 : }
271 :
272 :
273 : /*
274 : Send updated bitmap to the page cache
275 :
276 : SYNOPSIS
277 : _ma_bitmap_flush()
278 : share Share handler
279 :
280 : NOTES
281 : In the future, _ma_bitmap_flush() will be called to flush changes don't
282 : by this thread (ie, checking the changed flag is ok). The reason we
283 : check it again in the mutex is that if someone else did a flush at the
284 : same time, we don't have to do the write.
285 : This is also ok for _ma_scan_init_block_record() which does not want to
286 : miss rows: it cares only for committed rows, that is, rows for which there
287 : was a commit before our transaction started; as commit and transaction's
288 : start are protected by the same LOCK_trn_list mutex, we see memory at
289 : least as new as at other transaction's commit time, so if the committed
290 : rows caused bitmap->changed to be true, we see it; if we see 0 it really
291 : means a flush happened since then. So, it's ok to read without bitmap's
292 : mutex.
293 :
294 : RETURN
295 : 0 ok
296 : 1 error
297 : */
298 :
299 : my_bool _ma_bitmap_flush(MARIA_SHARE *share)
300 5310 : {
301 5310 : my_bool res= 0;
302 5310 : DBUG_ENTER("_ma_bitmap_flush");
303 5310 : if (share->bitmap.changed)
304 : {
305 806 : pthread_mutex_lock(&share->bitmap.bitmap_lock);
306 806 : if (share->bitmap.changed)
307 : {
308 806 : share->bitmap.changed= 0;
309 806 : res= write_changed_bitmap(share, &share->bitmap);
310 : }
311 806 : pthread_mutex_unlock(&share->bitmap.bitmap_lock);
312 : }
313 5310 : DBUG_RETURN(res);
314 : }
315 :
316 :
317 : /**
318 : Dirty-page filtering criteria for bitmap pages
319 :
320 : @param type Page's type
321 : @param pageno Page's number
322 : @param rec_lsn Page's rec_lsn
323 : @param arg pages_covered of bitmap
324 : */
325 :
326 : static enum pagecache_flush_filter_result
327 : filter_flush_bitmap_pages(enum pagecache_page_type type
328 : __attribute__ ((unused)),
329 : pgcache_page_no_t pageno,
330 : LSN rec_lsn __attribute__ ((unused)),
331 : void *arg)
332 256 : {
333 256 : return ((pageno % (*(ulong*)arg)) == 0);
334 : }
335 :
336 :
337 : /**
338 : Flushes current bitmap page to the pagecache, and then all bitmap pages
339 : from pagecache to the file. Used by Checkpoint.
340 :
341 : @param share Table's share
342 : */
343 :
344 : my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
345 64 : {
346 64 : my_bool res= 0;
347 64 : MARIA_FILE_BITMAP *bitmap= &share->bitmap;
348 64 : DBUG_ENTER("_ma_bitmap_flush_all");
349 64 : pthread_mutex_lock(&bitmap->bitmap_lock);
350 64 : if (bitmap->changed)
351 : {
352 64 : bitmap->flush_all_requested= TRUE;
353 : #ifndef WRONG_BITMAP_FLUSH
354 128 : while (bitmap->non_flushable > 0)
355 : {
356 0 : DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
357 0 : pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
358 : }
359 : #endif
360 : /*
361 : Bitmap is in a flushable state: its contents in memory are reflected by
362 : log records (complete REDO-UNDO groups) and all bitmap pages are
363 : unpinned. We keep the mutex to preserve this situation, and flush to the
364 : file.
365 : */
366 64 : if (bitmap->changed)
367 : {
368 64 : res= write_changed_bitmap(share, bitmap);
369 64 : bitmap->changed= FALSE;
370 : }
371 : /*
372 : We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
373 : pages have been flushed. That's a condition of correctness of
374 : Recovery: data pages may have been all flushed, if we write the
375 : checkpoint record Recovery will start from after their REDOs. If
376 : bitmap page was not flushed, as the REDOs about it will be skipped, it
377 : will wrongly not be recovered. If bitmap pages had a rec_lsn it would
378 : be different.
379 : There should be no pinned pages as bitmap->non_flushable==0.
380 : */
381 64 : if (flush_pagecache_blocks_with_filter(share->pagecache,
382 : &bitmap->file, FLUSH_KEEP,
383 : filter_flush_bitmap_pages,
384 : &bitmap->pages_covered) &
385 : PCFLUSH_PINNED_AND_ERROR)
386 0 : res= TRUE;
387 64 : bitmap->flush_all_requested= FALSE;
388 : /*
389 : Some well-behaved threads may be waiting for flush_all_requested to
390 : become false, wake them up.
391 : */
392 64 : DBUG_PRINT("info", ("bitmap flusher waking up others"));
393 64 : pthread_cond_broadcast(&bitmap->bitmap_cond);
394 : }
395 64 : pthread_mutex_unlock(&bitmap->bitmap_lock);
396 64 : DBUG_RETURN(res);
397 : }
398 :
399 :
400 : /**
401 : @brief Unpin all pinned bitmap pages
402 :
403 : @param share Table's share
404 :
405 : @return Operation status
406 : @retval 0 ok
407 :
408 : @note This unpins pages pinned by other threads.
409 : */
410 :
411 : static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
412 284190 : {
413 284190 : MARIA_FILE_BITMAP *bitmap= &share->bitmap;
414 : MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
415 284190 : dynamic_array_ptr(&bitmap->pinned_pages, 0));
416 284190 : MARIA_PINNED_PAGE *pinned_page= page_link + bitmap->pinned_pages.elements;
417 284190 : DBUG_ENTER("_ma_bitmap_unpin_all");
418 284190 : DBUG_PRINT("info", ("pinned: %u", bitmap->pinned_pages.elements));
419 568380 : while (pinned_page-- != page_link)
420 0 : pagecache_unlock_by_link(share->pagecache, pinned_page->link,
421 : pinned_page->unlock, PAGECACHE_UNPIN,
422 : LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE, TRUE);
423 284190 : bitmap->pinned_pages.elements= 0;
424 284190 : DBUG_VOID_RETURN;
425 : }
426 :
427 :
428 : /*
429 : Intialize bitmap in memory to a zero bitmap
430 :
431 : SYNOPSIS
432 : _ma_bitmap_delete_all()
433 : share Share handler
434 :
435 : NOTES
436 : This is called on maria_delete_all_rows (truncate data file).
437 : */
438 :
439 : void _ma_bitmap_delete_all(MARIA_SHARE *share)
440 569 : {
441 569 : MARIA_FILE_BITMAP *bitmap= &share->bitmap;
442 569 : DBUG_ENTER("_ma_bitmap_delete_all");
443 569 : if (bitmap->map) /* Not in create */
444 : {
445 144 : bzero(bitmap->map, bitmap->block_size);
446 144 : bitmap->changed= 1;
447 144 : bitmap->page= 0;
448 144 : bitmap->used_size= bitmap->total_size;
449 : }
450 569 : DBUG_VOID_RETURN;
451 : }
452 :
453 :
454 : /**
455 : @brief Reset bitmap caches
456 :
457 : @fn _ma_bitmap_reset_cache()
458 : @param share Maria share
459 :
460 : @notes
461 : This is called after we have swapped file descriptors and we want
462 : bitmap to forget all cached information
463 : */
464 :
465 : void _ma_bitmap_reset_cache(MARIA_SHARE *share)
466 3225 : {
467 3225 : MARIA_FILE_BITMAP *bitmap= &share->bitmap;
468 :
469 3225 : if (bitmap->map) /* If using bitmap */
470 : {
471 : /* Forget changes in current bitmap page */
472 3173 : bitmap->changed= 0;
473 :
474 : /*
475 : We can't read a page yet, as in some case we don't have an active
476 : page cache yet.
477 : Pretend we have a dummy, full and not changed bitmap page in memory.
478 : */
479 3173 : bitmap->page= ~(ulonglong) 0;
480 3173 : bitmap->used_size= bitmap->total_size;
481 3173 : bfill(bitmap->map, share->block_size, 255);
482 : #ifndef DBUG_OFF
483 3173 : memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
484 : #endif
485 : }
486 : }
487 :
488 :
489 : /*
490 : Return bitmap pattern for the smallest head block that can hold 'size'
491 :
492 : SYNOPSIS
493 : size_to_head_pattern()
494 : bitmap Bitmap
495 : size Requested size
496 :
497 : RETURN
498 : 0-3 For a description of the bitmap sizes, see the header
499 : */
500 :
501 : static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
502 287885 : {
503 287885 : if (size <= bitmap->sizes[3])
504 285269 : return 3;
505 2616 : if (size <= bitmap->sizes[2])
506 2213 : return 2;
507 403 : if (size <= bitmap->sizes[1])
508 197 : return 1;
509 206 : DBUG_ASSERT(size <= bitmap->sizes[0]);
510 206 : return 0;
511 : }
512 :
513 :
514 : /*
515 : Return bitmap pattern for head block where there is size bytes free
516 :
517 : SYNOPSIS
518 : _ma_free_size_to_head_pattern()
519 : bitmap Bitmap
520 : size Requested size
521 :
522 : RETURN
523 : 0-4 (Possible bitmap patterns for head block)
524 : */
525 :
526 : uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
527 717180 : {
528 717180 : if (size < bitmap->sizes[3])
529 23238 : return 4;
530 693942 : if (size < bitmap->sizes[2])
531 204462 : return 3;
532 489480 : if (size < bitmap->sizes[1])
533 206963 : return 2;
534 282517 : return (size < bitmap->sizes[0]) ? 1 : 0;
535 : }
536 :
537 :
538 : /*
539 : Return bitmap pattern for the smallest tail block that can hold 'size'
540 :
541 : SYNOPSIS
542 : size_to_tail_pattern()
543 : bitmap Bitmap
544 : size Requested size
545 :
546 : RETURN
547 : 0, 5 or 6 For a description of the bitmap sizes, see the header
548 : */
549 :
550 : static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
551 2716 : {
552 2716 : if (size <= bitmap->sizes[6])
553 765 : return 6;
554 1951 : if (size <= bitmap->sizes[5])
555 1331 : return 5;
556 620 : DBUG_ASSERT(size <= bitmap->sizes[0]);
557 620 : return 0;
558 : }
559 :
560 :
561 : /*
562 : Return bitmap pattern for tail block where there is size bytes free
563 :
564 : SYNOPSIS
565 : free_size_to_tail_pattern()
566 : bitmap Bitmap
567 : size Requested size
568 :
569 : RETURN
570 : 0, 5, 6, 7 For a description of the bitmap sizes, see the header
571 : */
572 :
573 : static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
574 12477 : {
575 12477 : if (size >= bitmap->sizes[0])
576 1607 : return 0; /* Revert to empty page */
577 10870 : if (size < bitmap->sizes[6])
578 1266 : return 7;
579 9604 : if (size < bitmap->sizes[5])
580 6248 : return 6;
581 3356 : return 5;
582 : }
583 :
584 :
585 : /*
586 : Return size guranteed to be available on a page
587 :
588 : SYNOPSIS
589 : pattern_to_head_size()
590 : bitmap Bitmap
591 : pattern Pattern (0-7)
592 :
593 : RETURN
594 : 0 - block_size
595 : */
596 :
597 : static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern)
598 290237 : {
599 290237 : DBUG_ASSERT(pattern <= 7);
600 290237 : return bitmap->sizes[pattern];
601 : }
602 :
603 :
604 : /*
605 : Print bitmap for debugging
606 :
607 : SYNOPSIS
608 : _ma_print_bitmap()
609 : bitmap Bitmap to print
610 :
611 : IMPLEMENTATION
612 : Prints all changed bits since last call to _ma_print_bitmap().
613 : This is done by having a copy of the last bitmap in
614 : bitmap->map+bitmap->block_size.
615 : */
616 :
617 : #ifndef DBUG_OFF
618 :
619 : const char *bits_to_txt[]=
620 : {
621 : "empty", "00-30% full", "30-60% full", "60-90% full", "full",
622 : "tail 00-40 % full", "tail 40-80 % full", "tail/blob full"
623 : };
624 :
625 : static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap)
626 0 : {
627 : uchar *pos, *end, *org_pos;
628 : ulong page;
629 :
630 0 : end= bitmap->map + bitmap->used_size;
631 0 : DBUG_LOCK_FILE;
632 0 : fprintf(DBUG_FILE,"\nBitmap page changes at page %lu\n",
633 : (ulong) bitmap->page);
634 :
635 0 : page= (ulong) bitmap->page+1;
636 0 : for (pos= bitmap->map, org_pos= bitmap->map + bitmap->block_size ;
637 0 : pos < end ;
638 0 : pos+= 6, org_pos+= 6)
639 : {
640 0 : ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
641 0 : ulonglong org_bits= uint6korr(org_pos);
642 : uint i;
643 :
644 : /*
645 : Test if there is any changes in the next 16 bitmaps (to not have to
646 : loop through all bits if we know they are the same)
647 : */
648 0 : if (bits != org_bits)
649 : {
650 0 : for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3)
651 : {
652 0 : if ((bits & 7) != (org_bits & 7))
653 0 : fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n", page+i,
654 : bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]);
655 : }
656 : }
657 0 : page+= 16;
658 : }
659 0 : fputc('\n', DBUG_FILE);
660 0 : DBUG_UNLOCK_FILE;
661 0 : memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
662 : }
663 :
664 :
665 : /* Print content of bitmap for debugging */
666 :
667 : void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
668 : pgcache_page_no_t page)
669 0 : {
670 : uchar *pos, *end;
671 : char llbuff[22];
672 :
673 0 : end= bitmap->map + bitmap->used_size;
674 0 : DBUG_LOCK_FILE;
675 0 : fprintf(DBUG_FILE,"\nDump of bitmap page at %s\n", llstr(page, llbuff));
676 :
677 0 : page++; /* Skip bitmap page */
678 0 : for (pos= data, end= pos + bitmap->total_size;
679 0 : pos < end ;
680 0 : pos+= 6)
681 : {
682 0 : ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
683 :
684 : /*
685 : Test if there is any changes in the next 16 bitmaps (to not have to
686 : loop through all bits if we know they are the same)
687 : */
688 0 : if (bits)
689 : {
690 : uint i;
691 0 : for (i= 0; i < 16 ; i++, bits>>= 3)
692 : {
693 0 : if (bits & 7)
694 0 : fprintf(DBUG_FILE, "Page: %8s %s\n", llstr(page+i, llbuff),
695 : bits_to_txt[bits & 7]);
696 : }
697 : }
698 0 : page+= 16;
699 : }
700 0 : fputc('\n', DBUG_FILE);
701 0 : DBUG_UNLOCK_FILE;
702 : }
703 :
704 : #endif /* DBUG_OFF */
705 :
706 :
707 : /***************************************************************************
708 : Reading & writing bitmap pages
709 : ***************************************************************************/
710 :
711 : /*
712 : Read a given bitmap page
713 :
714 : SYNOPSIS
715 : _ma_read_bitmap_page()
716 : info Maria handler
717 : bitmap Bitmap handler
718 : page Page to read
719 :
720 : TODO
721 : Update 'bitmap->used_size' to real size of used bitmap
722 :
723 : NOTE
724 : We don't always have share->bitmap.bitmap_lock here
725 : (when called from_ma_check_bitmap_data() for example).
726 :
727 : RETURN
728 : 0 ok
729 : 1 error (Error writing old bitmap or reading bitmap page)
730 : */
731 :
732 : static my_bool _ma_read_bitmap_page(MARIA_HA *info,
733 : MARIA_FILE_BITMAP *bitmap,
734 : pgcache_page_no_t page)
735 1430 : {
736 1430 : MARIA_SHARE *share= info->s;
737 : my_bool res;
738 1430 : DBUG_ENTER("_ma_read_bitmap_page");
739 1430 : DBUG_ASSERT(page % bitmap->pages_covered == 0);
740 1430 : DBUG_ASSERT(!bitmap->changed);
741 :
742 1430 : bitmap->page= page;
743 1430 : if (((page + 1) * bitmap->block_size) > share->state.state.data_file_length)
744 : {
745 : /* Inexistent or half-created page */
746 15 : res= _ma_bitmap_create_missing(info, bitmap, page);
747 15 : DBUG_RETURN(res);
748 : }
749 1415 : bitmap->used_size= bitmap->total_size;
750 1415 : DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
751 1415 : res= pagecache_read(share->pagecache,
752 : &bitmap->file, page, 0,
753 : bitmap->map, PAGECACHE_PLAIN_PAGE,
754 : PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == NULL;
755 :
756 : /*
757 : We can't check maria_bitmap_marker here as if the bitmap page
758 : previously had a true checksum and the user switched mode to not checksum
759 : this may have any value, except maria_normal_page_marker.
760 :
761 : Using maria_normal_page_marker gives us a protection against bugs
762 : when running without any checksums.
763 : */
764 :
765 : #ifndef DBUG_OFF
766 1415 : if (!res)
767 1415 : memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
768 : #endif
769 1415 : DBUG_RETURN(res);
770 : }
771 :
772 :
773 : /*
774 : Change to another bitmap page
775 :
776 : SYNOPSIS
777 : _ma_change_bitmap_page()
778 : info Maria handler
779 : bitmap Bitmap handler
780 : page Bitmap page to read
781 :
782 : NOTES
783 : If old bitmap was changed, write it out before reading new one
784 : We return empty bitmap if page is outside of file size
785 :
786 : RETURN
787 : 0 ok
788 : 1 error (Error writing old bitmap or reading bitmap page)
789 : */
790 :
791 : static my_bool _ma_change_bitmap_page(MARIA_HA *info,
792 : MARIA_FILE_BITMAP *bitmap,
793 : pgcache_page_no_t page)
794 1430 : {
795 1430 : DBUG_ENTER("_ma_change_bitmap_page");
796 :
797 1430 : if (bitmap->changed)
798 : {
799 15 : if (write_changed_bitmap(info->s, bitmap))
800 0 : DBUG_RETURN(1);
801 15 : bitmap->changed= 0;
802 : }
803 1430 : DBUG_RETURN(_ma_read_bitmap_page(info, bitmap, page));
804 : }
805 :
806 :
807 : /*
808 : Read next suitable bitmap
809 :
810 : SYNOPSIS
811 : move_to_next_bitmap()
812 : bitmap Bitmap handle
813 :
814 : NOTES
815 : The found bitmap may be full, so calling function may need to call this
816 : repeatedly until it finds enough space.
817 :
818 : TODO
819 : Add cache of bitmaps to not read something that is not usable
820 :
821 : RETURN
822 : 0 ok
823 : 1 error (either couldn't save old bitmap or read new one)
824 : */
825 :
826 : static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap)
827 418 : {
828 418 : pgcache_page_no_t page= bitmap->page;
829 418 : MARIA_STATE_INFO *state= &info->s->state;
830 418 : DBUG_ENTER("move_to_next_bitmap");
831 :
832 816 : if (state->first_bitmap_with_space != ~(ulonglong) 0 &&
833 : state->first_bitmap_with_space != page)
834 : {
835 398 : page= state->first_bitmap_with_space;
836 398 : state->first_bitmap_with_space= ~(ulonglong) 0;
837 : }
838 : else
839 20 : page+= bitmap->pages_covered;
840 418 : DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page));
841 : }
842 :
843 :
844 : /****************************************************************************
845 : Allocate data in bitmaps
846 : ****************************************************************************/
847 :
848 : /*
849 : Store data in 'block' and mark the place used in the bitmap
850 :
851 : SYNOPSIS
852 : fill_block()
853 : bitmap Bitmap handle
854 : block Store data about what we found
855 : best_data Pointer to best 6 uchar aligned area in bitmap->map
856 : best_pos Which bit in *best_data the area starts
857 : 0 = first bit pattern, 1 second bit pattern etc
858 : best_bits The original value of the bits at best_pos
859 : fill_pattern Bitmap pattern to store in best_data[best_pos]
860 :
861 : NOTES
862 : We mark all pages to be 'TAIL's, which means that
863 : block->page_count is really a row position inside the page.
864 : */
865 :
866 : static void fill_block(MARIA_FILE_BITMAP *bitmap,
867 : MARIA_BITMAP_BLOCK *block,
868 : uchar *best_data, uint best_pos, uint best_bits,
869 : uint fill_pattern)
870 290237 : {
871 : uint page, offset, tmp;
872 : uchar *data;
873 :
874 : /* For each 6 bytes we have 6*8/3= 16 patterns */
875 290237 : page= ((uint) (best_data - bitmap->map)) / 6 * 16 + best_pos;
876 290237 : DBUG_ASSERT(page + 1 < bitmap->pages_covered);
877 290237 : block->page= bitmap->page + 1 + page;
878 290237 : block->page_count= TAIL_PAGE_COUNT_MARKER;
879 290237 : block->empty_space= pattern_to_size(bitmap, best_bits);
880 290237 : block->sub_blocks= 0;
881 290237 : block->org_bitmap_value= best_bits;
882 290237 : block->used= BLOCKUSED_TAIL; /* See _ma_bitmap_release_unused() */
883 :
884 : /*
885 : Mark place used by reading/writing 2 bytes at a time to handle
886 : bitmaps in overlapping bytes
887 : */
888 290237 : best_pos*= 3;
889 290237 : data= best_data+ best_pos / 8;
890 290237 : offset= best_pos & 7;
891 290237 : tmp= uint2korr(data);
892 :
893 : /* we turn off the 3 bits and replace them with fill_pattern */
894 290237 : tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
895 290237 : int2store(data, tmp);
896 290237 : bitmap->changed= 1;
897 290237 : DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
898 : }
899 :
900 :
901 : /*
902 : Allocate data for head block
903 :
904 : SYNOPSIS
905 : allocate_head()
906 : bitmap bitmap
907 : size Size of data region we need to store
908 : block Store found information here
909 :
910 : IMPLEMENTATION
911 : Find the best-fit page to put a region of 'size'
912 : This is defined as the first page of the set of pages
913 : with the smallest free space that can hold 'size'.
914 :
915 : RETURN
916 : 0 ok (block is updated)
917 : 1 error (no space in bitmap; block is not touched)
918 : */
919 :
920 :
921 : static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size,
922 : MARIA_BITMAP_BLOCK *block)
923 287885 : {
924 287885 : uint min_bits= size_to_head_pattern(bitmap, size);
925 287885 : uchar *data= bitmap->map, *end= data + bitmap->used_size;
926 287885 : uchar *best_data= 0;
927 287885 : uint best_bits= (uint) -1, best_pos;
928 287885 : DBUG_ENTER("allocate_head");
929 :
930 287885 : LINT_INIT(best_pos);
931 287885 : DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size));
932 :
933 272855106 : for (; data < end; data+= 6)
934 : {
935 272921190 : ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
936 : uint i;
937 :
938 : /*
939 : Skip common patterns
940 : We can skip empty pages (if we already found a match) or
941 : anything matching the following pattern as this will be either
942 : a full page or a tail page
943 : */
944 272921190 : if ((!bits && best_data) ||
945 : ((bits & LL(04444444444444444)) == LL(04444444444444444)))
946 : continue;
947 5932327 : for (i= 0; i < 16 ; i++, bits >>= 3)
948 : {
949 5610607 : uint pattern= (uint) (bits & 7);
950 5610607 : if (pattern <= min_bits)
951 : {
952 : /* There is enough space here */
953 2155695 : if ((int) pattern > (int) best_bits)
954 : {
955 : /*
956 : There is more than enough space here and it's better than what
957 : we have found so far. Remember it, as we will choose it if we
958 : don't find anything in this bitmap page.
959 : */
960 288507 : best_bits= pattern;
961 288507 : best_data= data;
962 288507 : best_pos= i;
963 288507 : if (pattern == min_bits)
964 5544523 : goto found; /* Best possible match */
965 : }
966 : }
967 : }
968 : }
969 221801 : if (!best_data) /* Found no place */
970 : {
971 364 : if (data >= bitmap->map + bitmap->total_size)
972 364 : DBUG_RETURN(1); /* No space in bitmap */
973 : /* Allocate data at end of bitmap */
974 0 : bitmap->used_size+= 6;
975 0 : set_if_smaller(bitmap->used_size, bitmap->total_size);
976 0 : best_data= data;
977 0 : best_pos= best_bits= 0;
978 : }
979 :
980 287521 : found:
981 287521 : fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE);
982 287521 : DBUG_RETURN(0);
983 : }
984 :
985 :
986 : /*
987 : Allocate data for tail block
988 :
989 : SYNOPSIS
990 : allocate_tail()
991 : bitmap bitmap
992 : size Size of block we need to find
993 : block Store found information here
994 :
995 : RETURN
996 : 0 ok (block is updated)
997 : 1 error (no space in bitmap; block is not touched)
998 : */
999 :
1000 :
1001 : static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size,
1002 : MARIA_BITMAP_BLOCK *block)
1003 2716 : {
1004 2716 : uint min_bits= size_to_tail_pattern(bitmap, size);
1005 2716 : uchar *data= bitmap->map, *end= data + bitmap->used_size;
1006 2716 : uchar *best_data= 0;
1007 2716 : uint best_bits= (uint) -1, best_pos;
1008 2716 : DBUG_ENTER("allocate_tail");
1009 2716 : DBUG_PRINT("enter", ("size: %u", size));
1010 :
1011 2716 : LINT_INIT(best_pos);
1012 2716 : DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size));
1013 :
1014 1112305 : for (; data < end; data += 6)
1015 : {
1016 1114212 : ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
1017 : uint i;
1018 :
1019 : /*
1020 : Skip common patterns
1021 : We can skip empty pages (if we already found a match) or
1022 : the following patterns: 1-4 (head pages, not suitable for tail) or
1023 : 7 (full tail page). See 'Dynamic size records' comment at start of file.
1024 :
1025 : At the moment we only skip full head and tail pages (ie, all bits are
1026 : set) as this is easy to detect with one simple test and is a
1027 : quite common case if we have blobs.
1028 : */
1029 :
1030 1114212 : if ((!bits && best_data) || bits == LL(0xffffffffffff) ||
1031 : bits == LL(04444444444444444))
1032 : continue;
1033 273390 : for (i= 0; i < 16; i++, bits >>= 3)
1034 : {
1035 258325 : uint pattern= (uint) (bits & 7);
1036 258325 : if (pattern <= min_bits && (!pattern || pattern >= 5))
1037 : {
1038 10601 : if ((int) pattern > (int) best_bits)
1039 : {
1040 2876 : best_bits= pattern;
1041 2876 : best_data= data;
1042 2876 : best_pos= i;
1043 2876 : if (pattern == min_bits)
1044 256418 : goto found; /* Can't be better */
1045 : }
1046 : }
1047 : }
1048 : }
1049 809 : if (!best_data)
1050 : {
1051 0 : if (data >= bitmap->map + bitmap->total_size)
1052 0 : DBUG_RETURN(1);
1053 : /* Allocate data at end of bitmap */
1054 0 : best_data= data;
1055 0 : bitmap->used_size+= 6;
1056 0 : set_if_smaller(bitmap->used_size, bitmap->total_size);
1057 0 : best_pos= best_bits= 0;
1058 : }
1059 :
1060 2716 : found:
1061 2716 : fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE);
1062 2716 : DBUG_RETURN(0);
1063 : }
1064 :
1065 :
1066 : /*
1067 : Allocate data for full blocks
1068 :
1069 : SYNOPSIS
1070 : allocate_full_pages()
1071 : bitmap bitmap
1072 : pages_needed Total size in pages (bitmap->total_size) we would like to have
1073 : block Store found information here
1074 : full_page 1 if we are not allowed to split extent
1075 :
1076 : IMPLEMENTATION
1077 : We will return the smallest area >= size. If there is no such
1078 : block, we will return the biggest area that satisfies
1079 : area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
1080 :
1081 : To speed up searches, we will only consider areas that has at least 16 free
1082 : pages starting on an even boundary. When finding such an area, we will
1083 : extend it with all previous and following free pages. This will ensure
1084 : we don't get holes between areas
1085 :
1086 : RETURN
1087 : # Blocks used
1088 : 0 error (no space in bitmap; block is not touched)
1089 : */
1090 :
1091 : static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
1092 : ulong pages_needed,
1093 : MARIA_BITMAP_BLOCK *block, my_bool full_page)
1094 101733 : {
1095 101733 : uchar *data= bitmap->map, *data_end= data + bitmap->used_size;
1096 101733 : uchar *page_end= data + bitmap->total_size;
1097 101733 : uchar *best_data= 0;
1098 : uint min_size;
1099 : uint best_area_size, best_prefix_area_size, best_suffix_area_size;
1100 : uint page, size;
1101 : ulonglong best_prefix_bits;
1102 101733 : DBUG_ENTER("allocate_full_pages");
1103 101733 : DBUG_PRINT("enter", ("pages_needed: %lu", pages_needed));
1104 :
1105 : /* Following variables are only used if best_data is set */
1106 101733 : LINT_INIT(best_prefix_bits);
1107 101733 : LINT_INIT(best_prefix_area_size);
1108 101733 : LINT_INIT(best_suffix_area_size);
1109 :
1110 101733 : min_size= pages_needed;
1111 101733 : if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE)
1112 0 : min_size= BLOB_SEGMENT_MIN_SIZE;
1113 101733 : best_area_size= ~(uint) 0;
1114 :
1115 60717939 : for (; data < page_end; data+= 6)
1116 : {
1117 60616206 : ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
1118 : uchar *data_start;
1119 60616206 : ulonglong prefix_bits= 0;
1120 : uint area_size, prefix_area_size, suffix_area_size;
1121 :
1122 : /* Find area with at least 16 free pages */
1123 60616206 : if (bits)
1124 101679 : continue;
1125 101679 : data_start= data;
1126 : /* Find size of area */
1127 34841934 : for (data+=6 ; data < data_end ; data+= 6)
1128 : {
1129 34740255 : if ((bits= uint6korr(data)))
1130 34740255 : break;
1131 : }
1132 101679 : area_size= (uint) (data - data_start) / 6 * 16;
1133 101679 : if (area_size >= best_area_size)
1134 101679 : continue;
1135 101679 : prefix_area_size= suffix_area_size= 0;
1136 101679 : if (!bits)
1137 : {
1138 : /*
1139 : End of page; All the rest of the bits on page are part of area
1140 : This is needed because bitmap->used_size only covers the set bits
1141 : in the bitmap.
1142 : */
1143 101679 : area_size+= (uint) (page_end - data) / 6 * 16;
1144 101679 : if (area_size >= best_area_size)
1145 101679 : break;
1146 101679 : data= page_end;
1147 : }
1148 : else
1149 : {
1150 : /* Add bits at end of page */
1151 0 : for (; !(bits & 7); bits >>= 3)
1152 0 : suffix_area_size++;
1153 0 : area_size+= suffix_area_size;
1154 : }
1155 101679 : if (data_start != bitmap->map)
1156 : {
1157 : /* Add bits before page */
1158 101635 : bits= prefix_bits= uint6korr(data_start - 6);
1159 101635 : DBUG_ASSERT(bits != 0);
1160 : /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */
1161 101635 : if (!(bits & LL(07000000000000000)))
1162 : {
1163 95250 : data_start-= 6;
1164 : do
1165 : {
1166 762257 : prefix_area_size++;
1167 762257 : bits<<= 3;
1168 762257 : } while (!(bits & LL(07000000000000000)));
1169 95250 : area_size+= prefix_area_size;
1170 : /* Calculate offset to page from data_start */
1171 95250 : prefix_area_size= 16 - prefix_area_size;
1172 : }
1173 : }
1174 101679 : if (area_size >= min_size && area_size <= best_area_size)
1175 : {
1176 101679 : best_data= data_start;
1177 101679 : best_area_size= area_size;
1178 101679 : best_prefix_bits= prefix_bits;
1179 101679 : best_prefix_area_size= prefix_area_size;
1180 101679 : best_suffix_area_size= suffix_area_size;
1181 :
1182 : /* Prefer to put data in biggest possible area */
1183 101679 : if (area_size <= pages_needed)
1184 0 : min_size= area_size;
1185 : else
1186 101679 : min_size= pages_needed;
1187 : }
1188 : }
1189 101733 : if (!best_data)
1190 54 : DBUG_RETURN(0); /* No room on page */
1191 :
1192 : /*
1193 : Now allocate min(pages_needed, area_size), starting from
1194 : best_start + best_prefix_area_size
1195 : */
1196 101679 : if (best_area_size > pages_needed)
1197 101679 : best_area_size= pages_needed;
1198 :
1199 : /* For each 6 bytes we have 6*8/3= 16 patterns */
1200 101679 : page= ((uint) (best_data - bitmap->map) * 8) / 3 + best_prefix_area_size;
1201 101679 : block->page= bitmap->page + 1 + page;
1202 101679 : block->page_count= best_area_size;
1203 101679 : block->empty_space= 0;
1204 101679 : block->sub_blocks= 0;
1205 101679 : block->org_bitmap_value= 0;
1206 101679 : block->used= 0;
1207 101679 : DBUG_ASSERT(page + best_area_size < bitmap->pages_covered);
1208 101679 : DBUG_PRINT("info", ("page: %lu page_count: %u",
1209 : (ulong) block->page, block->page_count));
1210 :
1211 101679 : if (best_prefix_area_size)
1212 : {
1213 : ulonglong tmp;
1214 : /* Convert offset back to bits */
1215 95250 : best_prefix_area_size= 16 - best_prefix_area_size;
1216 95250 : if (best_area_size < best_prefix_area_size)
1217 : {
1218 63898 : tmp= (LL(1) << best_area_size*3) - 1;
1219 63898 : best_area_size= best_prefix_area_size; /* for easy end test */
1220 : }
1221 : else
1222 31352 : tmp= (LL(1) << best_prefix_area_size*3) - 1;
1223 95250 : tmp<<= (16 - best_prefix_area_size) * 3;
1224 95250 : DBUG_ASSERT((best_prefix_bits & tmp) == 0);
1225 95250 : best_prefix_bits|= tmp;
1226 95250 : int6store(best_data, best_prefix_bits);
1227 95250 : if (!(best_area_size-= best_prefix_area_size))
1228 : {
1229 70311 : DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
1230 70311 : DBUG_RETURN(block->page_count);
1231 : }
1232 24939 : best_data+= 6;
1233 : }
1234 31368 : best_area_size*= 3; /* Bits to set */
1235 31368 : size= best_area_size/8; /* Bytes to set */
1236 31368 : bfill(best_data, size, 255);
1237 31368 : best_data+= size;
1238 31368 : if ((best_area_size-= size * 8))
1239 : {
1240 : /* fill last uchar */
1241 31363 : *best_data|= (uchar) ((1 << best_area_size) -1);
1242 31363 : best_data++;
1243 : }
1244 31368 : if (data_end < best_data)
1245 : {
1246 18814 : bitmap->used_size= (uint) (best_data - bitmap->map);
1247 18814 : DBUG_ASSERT(bitmap->used_size <= bitmap->total_size);
1248 : }
1249 31368 : bitmap->changed= 1;
1250 31368 : DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
1251 31368 : DBUG_RETURN(block->page_count);
1252 : }
1253 :
1254 :
1255 : /****************************************************************************
1256 : Find right bitmaps where to store data
1257 : ****************************************************************************/
1258 :
1259 : /*
1260 : Find right bitmap and position for head block
1261 :
1262 : SYNOPSIS
1263 : find_head()
1264 : info Maria handler
1265 : length Size of data region we need store
1266 : position Position in bitmap_blocks where to store the
1267 : information for the head block.
1268 :
1269 : RETURN
1270 : 0 ok
1271 : 1 error
1272 : */
1273 :
1274 : static my_bool find_head(MARIA_HA *info, uint length, uint position)
1275 287521 : {
1276 287521 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
1277 : MARIA_BITMAP_BLOCK *block;
1278 : /*
1279 : There is always place for the head block in bitmap_blocks as these are
1280 : preallocated at _ma_init_block_record().
1281 : */
1282 287521 : block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
1283 :
1284 : /*
1285 : We need to have DIRENTRY_SIZE here to take into account that we may
1286 : need an extra directory entry for the row
1287 : */
1288 575406 : while (allocate_head(bitmap, length + DIR_ENTRY_SIZE, block))
1289 364 : if (move_to_next_bitmap(info, bitmap))
1290 0 : return 1;
1291 287521 : return 0;
1292 : }
1293 :
1294 :
1295 : /*
1296 : Find right bitmap and position for tail
1297 :
1298 : SYNOPSIS
1299 : find_tail()
1300 : info Maria handler
1301 : length Size of data region we need store
1302 : position Position in bitmap_blocks where to store the
1303 : information for the head block.
1304 :
1305 : RETURN
1306 : 0 ok
1307 : 1 error
1308 : */
1309 :
1310 : static my_bool find_tail(MARIA_HA *info, uint length, uint position)
1311 2716 : {
1312 2716 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
1313 : MARIA_BITMAP_BLOCK *block;
1314 2716 : DBUG_ENTER("find_tail");
1315 2716 : DBUG_ASSERT(length <= info->s->block_size - PAGE_OVERHEAD_SIZE);
1316 :
1317 : /* Needed, as there is no error checking in dynamic_element */
1318 2716 : if (allocate_dynamic(&info->bitmap_blocks, position))
1319 0 : DBUG_RETURN(1);
1320 2716 : block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
1321 :
1322 : /*
1323 : We have to add DIR_ENTRY_SIZE to ensure we have space for the tail and
1324 : it's directroy entry on the page
1325 : */
1326 5432 : while (allocate_tail(bitmap, length + DIR_ENTRY_SIZE, block))
1327 0 : if (move_to_next_bitmap(info, bitmap))
1328 0 : DBUG_RETURN(1);
1329 2716 : DBUG_RETURN(0);
1330 : }
1331 :
1332 :
1333 : /*
1334 : Find right bitmap and position for full blocks in one extent
1335 :
1336 : SYNOPSIS
1337 : find_mid()
1338 : info Maria handler.
1339 : pages How many pages to allocate.
1340 : position Position in bitmap_blocks where to store the
1341 : information for the head block.
1342 : NOTES
1343 : This is used to allocate the main extent after the 'head' block
1344 : (Ie, the middle part of the head-middle-tail entry)
1345 :
1346 : RETURN
1347 : 0 ok
1348 : 1 error
1349 : */
1350 :
1351 : static my_bool find_mid(MARIA_HA *info, ulong pages, uint position)
1352 0 : {
1353 0 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
1354 : MARIA_BITMAP_BLOCK *block;
1355 0 : block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
1356 :
1357 0 : while (!allocate_full_pages(bitmap, pages, block, 1))
1358 : {
1359 0 : if (move_to_next_bitmap(info, bitmap))
1360 0 : return 1;
1361 : }
1362 0 : return 0;
1363 : }
1364 :
1365 :
1366 : /*
1367 : Find right bitmap and position for putting a blob
1368 :
1369 : SYNOPSIS
1370 : find_blob()
1371 : info Maria handler.
1372 : length Length of the blob
1373 :
1374 : NOTES
1375 : The extents are stored last in info->bitmap_blocks
1376 :
1377 : IMPLEMENTATION
1378 : Allocate all full pages for the block + optionally one tail
1379 :
1380 : RETURN
1381 : 0 ok
1382 : 1 error
1383 : */
1384 :
1385 : static my_bool find_blob(MARIA_HA *info, ulong length)
1386 101700 : {
1387 101700 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
1388 101700 : uint full_page_size= FULL_PAGE_SIZE(info->s->block_size);
1389 : ulong pages;
1390 : uint rest_length, used;
1391 : uint first_block_pos;
1392 101700 : MARIA_BITMAP_BLOCK *first_block= 0;
1393 101700 : DBUG_ENTER("find_blob");
1394 101700 : DBUG_PRINT("enter", ("length: %lu", length));
1395 101700 : LINT_INIT(first_block_pos);
1396 :
1397 101700 : pages= length / full_page_size;
1398 101700 : rest_length= (uint) (length - pages * full_page_size);
1399 101700 : if (rest_length >= MAX_TAIL_SIZE(info->s->block_size))
1400 : {
1401 98984 : pages++;
1402 98984 : rest_length= 0;
1403 : }
1404 :
1405 101700 : first_block_pos= info->bitmap_blocks.elements;
1406 101700 : if (pages)
1407 : {
1408 : MARIA_BITMAP_BLOCK *block;
1409 101679 : if (allocate_dynamic(&info->bitmap_blocks,
1410 : info->bitmap_blocks.elements +
1411 : pages / BLOB_SEGMENT_MIN_SIZE + 2))
1412 0 : DBUG_RETURN(1);
1413 101679 : block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements,
1414 : MARIA_BITMAP_BLOCK*);
1415 : do
1416 : {
1417 : /*
1418 : We use 0x3fff here as the two upmost bits are reserved for
1419 : TAIL_BIT and START_EXTENT_BIT
1420 : */
1421 101733 : used= allocate_full_pages(bitmap,
1422 : (pages >= 0x3fff ? 0x3fff : (uint) pages),
1423 : block, 0);
1424 101733 : if (!used)
1425 : {
1426 54 : if (move_to_next_bitmap(info, bitmap))
1427 0 : DBUG_RETURN(1);
1428 : }
1429 : else
1430 : {
1431 101679 : pages-= used;
1432 101679 : info->bitmap_blocks.elements++;
1433 101679 : block++;
1434 : }
1435 101733 : } while (pages != 0);
1436 : }
1437 101700 : if (rest_length && find_tail(info, rest_length,
1438 : info->bitmap_blocks.elements++))
1439 0 : DBUG_RETURN(1);
1440 101700 : first_block= dynamic_element(&info->bitmap_blocks, first_block_pos,
1441 : MARIA_BITMAP_BLOCK*);
1442 101700 : first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos;
1443 101700 : DBUG_RETURN(0);
1444 : }
1445 :
1446 :
1447 : /*
1448 : Find pages to put ALL blobs
1449 :
1450 : SYNOPSIS
1451 : allocate_blobs()
1452 : info Maria handler
1453 : row Information of what is in the row (from calc_record_size())
1454 :
1455 : RETURN
1456 : 0 ok
1457 : 1 error
1458 : */
1459 :
1460 : static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row)
1461 101700 : {
1462 : ulong *length, *end;
1463 : uint elements;
1464 : /*
1465 : Reserve size for:
1466 : head block
1467 : one extent
1468 : tail block
1469 : */
1470 101700 : elements= info->bitmap_blocks.elements;
1471 101700 : for (length= row->blob_lengths, end= length + info->s->base.blobs;
1472 203400 : length < end; length++)
1473 : {
1474 101700 : if (*length && find_blob(info, *length))
1475 0 : return 1;
1476 : }
1477 101700 : row->extents_count= (info->bitmap_blocks.elements - elements);
1478 101700 : return 0;
1479 : }
1480 :
1481 :
1482 : /*
1483 : Store in the bitmap the new size for a head page
1484 :
1485 : SYNOPSIS
1486 : use_head()
1487 : info Maria handler
1488 : page Page number to update
1489 : (Note that caller guarantees this is in the active
1490 : bitmap)
1491 : size How much free space is left on the page
1492 : block_position In which info->bitmap_block we have the
1493 : information about the head block.
1494 :
1495 : NOTES
1496 : This is used on update where we are updating an existing head page
1497 : */
1498 :
1499 : static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size,
1500 : uint block_position)
1501 379 : {
1502 379 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
1503 : MARIA_BITMAP_BLOCK *block;
1504 : uchar *data;
1505 : uint offset, tmp, offset_page;
1506 379 : DBUG_ASSERT(page % bitmap->pages_covered);
1507 :
1508 379 : block= dynamic_element(&info->bitmap_blocks, block_position,
1509 : MARIA_BITMAP_BLOCK*);
1510 379 : block->page= page;
1511 379 : block->page_count= 1 + TAIL_BIT;
1512 379 : block->empty_space= size;
1513 379 : block->used= BLOCKUSED_TAIL;
1514 :
1515 : /*
1516 : Mark place used by reading/writing 2 bytes at a time to handle
1517 : bitmaps in overlapping bytes
1518 : */
1519 379 : offset_page= (uint) (page - bitmap->page - 1) * 3;
1520 379 : offset= offset_page & 7;
1521 379 : data= bitmap->map + offset_page / 8;
1522 379 : tmp= uint2korr(data);
1523 379 : block->org_bitmap_value= (tmp >> offset) & 7;
1524 379 : tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset);
1525 379 : int2store(data, tmp);
1526 379 : bitmap->changed= 1;
1527 379 : DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
1528 : }
1529 :
1530 :
1531 : /*
1532 : Find out where to split the row (ie, what goes in head, middle, tail etc)
1533 :
1534 : SYNOPSIS
1535 : find_where_to_split_row()
1536 : share Maria share
1537 : row Information of what is in the row (from calc_record_size())
1538 : extents_length Number of bytes needed to store all extents
1539 : split_size Free size on the page (The head length must be less
1540 : than this)
1541 :
1542 : RETURN
1543 : row_length for the head block.
1544 : */
1545 :
1546 : static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row,
1547 : uint extents_length, uint split_size)
1548 0 : {
1549 : uint *lengths, *lengths_end;
1550 : /*
1551 : Ensure we have the minimum required space on head page:
1552 : - Header + length of field lengths (row->min_length)
1553 : - Number of extents
1554 : - One extent
1555 : */
1556 : uint row_length= (row->min_length +
1557 : size_to_store_key_length(extents_length) +
1558 0 : ROW_EXTENT_SIZE);
1559 0 : DBUG_ASSERT(row_length < split_size);
1560 : /*
1561 : Store first in all_field_lengths the different parts that are written
1562 : to the row. This needs to be in same order as in
1563 : ma_block_rec.c::write_block_record()
1564 : */
1565 0 : row->null_field_lengths[-3]= extents_length;
1566 0 : row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length;
1567 0 : row->null_field_lengths[-1]= row->field_lengths_length;
1568 : for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS,
1569 : lengths_end= (lengths + share->base.pack_fields - share->base.blobs +
1570 0 : EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++)
1571 : {
1572 0 : if (row_length + *lengths > split_size)
1573 0 : break;
1574 0 : row_length+= *lengths;
1575 : }
1576 0 : return row_length;
1577 : }
1578 :
1579 :
1580 : /*
1581 : Find where to write the middle parts of the row and the tail
1582 :
1583 : SYNOPSIS
1584 : write_rest_of_head()
1585 : info Maria handler
1586 : position Position in bitmap_blocks. Is 0 for rows that needs
1587 : full blocks (ie, has a head, middle part and optional tail)
1588 : rest_length How much left of the head block to write.
1589 :
1590 : RETURN
1591 : 0 ok
1592 : 1 error
1593 : */
1594 :
1595 : static my_bool write_rest_of_head(MARIA_HA *info, uint position,
1596 : ulong rest_length)
1597 0 : {
1598 0 : MARIA_SHARE *share= info->s;
1599 0 : uint full_page_size= FULL_PAGE_SIZE(share->block_size);
1600 : MARIA_BITMAP_BLOCK *block;
1601 0 : DBUG_ENTER("write_rest_of_head");
1602 0 : DBUG_PRINT("enter", ("position: %u rest_length: %lu", position,
1603 : rest_length));
1604 :
1605 0 : if (position == 0)
1606 : {
1607 : /* Write out full pages */
1608 0 : uint pages= rest_length / full_page_size;
1609 :
1610 0 : rest_length%= full_page_size;
1611 0 : if (rest_length >= MAX_TAIL_SIZE(share->block_size))
1612 : {
1613 : /* Put tail on a full page */
1614 0 : pages++;
1615 0 : rest_length= 0;
1616 : }
1617 0 : if (find_mid(info, pages, 1))
1618 0 : DBUG_RETURN(1);
1619 : /*
1620 : Insert empty block after full pages, to allow write_block_record() to
1621 : split segment into used + free page
1622 : */
1623 0 : block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*);
1624 0 : block->page_count= 0;
1625 0 : block->used= 0;
1626 : }
1627 0 : if (rest_length)
1628 : {
1629 0 : if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1))
1630 0 : DBUG_RETURN(1);
1631 : }
1632 : else
1633 : {
1634 : /* Empty tail block */
1635 0 : block= dynamic_element(&info->bitmap_blocks,
1636 : ELEMENTS_RESERVED_FOR_MAIN_PART - 1,
1637 : MARIA_BITMAP_BLOCK *);
1638 0 : block->page_count= 0;
1639 0 : block->used= 0;
1640 : }
1641 0 : DBUG_RETURN(0);
1642 : }
1643 :
1644 :
1645 : /*
1646 : Find where to store one row
1647 :
1648 : SYNPOSIS
1649 : _ma_bitmap_find_place()
1650 : info Maria handler
1651 : row Information about row to write
1652 : blocks Store data about allocated places here
1653 :
1654 : RETURN
1655 : 0 ok
1656 : row->space_on_head_page contains minimum number of bytes we
1657 : expect to put on the head page.
1658 : 1 error
1659 : my_errno is set to error
1660 : */
1661 :
1662 : my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
1663 : MARIA_BITMAP_BLOCKS *blocks)
1664 287521 : {
1665 287521 : MARIA_SHARE *share= info->s;
1666 287521 : my_bool res= 1;
1667 : uint full_page_size, position, max_page_size;
1668 : uint head_length, row_length, rest_length, extents_length;
1669 287521 : DBUG_ENTER("_ma_bitmap_find_place");
1670 :
1671 287521 : blocks->count= 0;
1672 287521 : blocks->tail_page_skipped= blocks->page_skipped= 0;
1673 287521 : row->extents_count= 0;
1674 :
1675 : /*
1676 : Reserve place for the following blocks:
1677 : - Head block
1678 : - Full page block
1679 : - Marker block to allow write_block_record() to split full page blocks
1680 : into full and free part
1681 : - Tail block
1682 : */
1683 :
1684 287521 : info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
1685 287521 : max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE);
1686 :
1687 287521 : pthread_mutex_lock(&share->bitmap.bitmap_lock);
1688 :
1689 287521 : if (row->total_length <= max_page_size)
1690 : {
1691 : /* Row fits in one page */
1692 186200 : position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
1693 186200 : if (find_head(info, (uint) row->total_length, position))
1694 186200 : goto abort;
1695 186200 : row->space_on_head_page= row->total_length;
1696 186200 : goto end;
1697 : }
1698 :
1699 : /*
1700 : First allocate all blobs so that we can find out the needed size for
1701 : the main block.
1702 : */
1703 101321 : if (row->blob_length && allocate_blobs(info, row))
1704 101321 : goto abort;
1705 :
1706 101321 : extents_length= row->extents_count * ROW_EXTENT_SIZE;
1707 : /*
1708 : The + 3 is reserved for storing the number of segments in the row header.
1709 : */
1710 101321 : if ((head_length= (row->head_length + extents_length + 3)) <=
1711 : max_page_size)
1712 : {
1713 : /* Main row part fits into one page */
1714 101321 : position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
1715 101321 : if (find_head(info, head_length, position))
1716 101321 : goto abort;
1717 101321 : row->space_on_head_page= head_length;
1718 101321 : goto end;
1719 : }
1720 :
1721 : /* Allocate enough space */
1722 0 : head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
1723 :
1724 : /* The first segment size is stored in 'row_length' */
1725 0 : row_length= find_where_to_split_row(share, row, extents_length,
1726 : max_page_size);
1727 :
1728 0 : full_page_size= FULL_PAGE_SIZE(share->block_size);
1729 0 : position= 0;
1730 0 : if (head_length - row_length <= full_page_size)
1731 0 : position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
1732 0 : if (find_head(info, row_length, position))
1733 0 : goto abort;
1734 0 : row->space_on_head_page= row_length;
1735 :
1736 0 : rest_length= head_length - row_length;
1737 0 : if (write_rest_of_head(info, position, rest_length))
1738 287521 : goto abort;
1739 :
1740 287521 : end:
1741 287521 : blocks->block= dynamic_element(&info->bitmap_blocks, position,
1742 : MARIA_BITMAP_BLOCK*);
1743 287521 : blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
1744 : /* First block's page_count is for all blocks */
1745 287521 : blocks->count= info->bitmap_blocks.elements - position;
1746 287521 : res= 0;
1747 :
1748 287521 : abort:
1749 287521 : pthread_mutex_unlock(&share->bitmap.bitmap_lock);
1750 287521 : DBUG_RETURN(res);
1751 : }
1752 :
1753 :
1754 : /*
1755 : Find where to put row on update (when head page is already defined)
1756 :
1757 : SYNPOSIS
1758 : _ma_bitmap_find_new_place()
1759 : info Maria handler
1760 : row Information about row to write
1761 : page On which page original row was stored
1762 : free_size Free size on head page
1763 : blocks Store data about allocated places here
1764 :
1765 : NOTES
1766 : This function is only called when the new row can't fit in the space of
1767 : the old row in the head page.
1768 :
1769 : This is essently same as _ma_bitmap_find_place() except that
1770 : we don't call find_head() to search in bitmaps where to put the page.
1771 :
1772 : RETURN
1773 : 0 ok
1774 : 1 error
1775 : */
1776 :
1777 : my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row,
1778 : pgcache_page_no_t page, uint free_size,
1779 : MARIA_BITMAP_BLOCKS *blocks)
1780 379 : {
1781 379 : MARIA_SHARE *share= info->s;
1782 379 : my_bool res= 1;
1783 : uint position;
1784 : uint head_length, row_length, rest_length, extents_length;
1785 : ulonglong bitmap_page;
1786 379 : DBUG_ENTER("_ma_bitmap_find_new_place");
1787 :
1788 379 : blocks->count= 0;
1789 379 : blocks->tail_page_skipped= blocks->page_skipped= 0;
1790 379 : row->extents_count= 0;
1791 379 : info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
1792 :
1793 379 : pthread_mutex_lock(&share->bitmap.bitmap_lock);
1794 :
1795 : /*
1796 : First allocate all blobs (so that we can find out the needed size for
1797 : the main block.
1798 : */
1799 379 : if (row->blob_length && allocate_blobs(info, row))
1800 379 : goto abort;
1801 :
1802 : /* Switch bitmap to current head page */
1803 379 : bitmap_page= page / share->bitmap.pages_covered;
1804 379 : bitmap_page*= share->bitmap.pages_covered;
1805 :
1806 379 : if (share->bitmap.page != bitmap_page &&
1807 : _ma_change_bitmap_page(info, &share->bitmap, bitmap_page))
1808 379 : goto abort;
1809 :
1810 379 : extents_length= row->extents_count * ROW_EXTENT_SIZE;
1811 379 : if ((head_length= (row->head_length + extents_length + 3)) <= free_size)
1812 : {
1813 : /* Main row part fits into one page */
1814 379 : position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
1815 379 : use_head(info, page, head_length, position);
1816 379 : row->space_on_head_page= head_length;
1817 379 : goto end;
1818 : }
1819 :
1820 : /* Allocate enough space */
1821 0 : head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
1822 :
1823 : /* The first segment size is stored in 'row_length' */
1824 0 : row_length= find_where_to_split_row(share, row, extents_length, free_size);
1825 :
1826 0 : position= 0;
1827 0 : if (head_length - row_length < MAX_TAIL_SIZE(share->block_size))
1828 0 : position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
1829 0 : use_head(info, page, row_length, position);
1830 0 : row->space_on_head_page= row_length;
1831 :
1832 0 : rest_length= head_length - row_length;
1833 0 : if (write_rest_of_head(info, position, rest_length))
1834 379 : goto abort;
1835 :
1836 379 : end:
1837 379 : blocks->block= dynamic_element(&info->bitmap_blocks, position,
1838 : MARIA_BITMAP_BLOCK*);
1839 379 : blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
1840 : /* First block's page_count is for all blocks */
1841 379 : blocks->count= info->bitmap_blocks.elements - position;
1842 379 : res= 0;
1843 :
1844 379 : abort:
1845 379 : pthread_mutex_unlock(&share->bitmap.bitmap_lock);
1846 379 : DBUG_RETURN(res);
1847 : }
1848 :
1849 :
1850 : /****************************************************************************
1851 : Clear and reset bits
1852 : ****************************************************************************/
1853 :
1854 : /*
1855 : Set fill pattern for a page
1856 :
1857 : set_page_bits()
1858 : info Maria handler
1859 : bitmap Bitmap handler
1860 : page Adress to page
1861 : fill_pattern Pattern (not size) for page
1862 :
1863 : NOTES
1864 : Page may not be part of active bitmap
1865 :
1866 : RETURN
1867 : 0 ok
1868 : 1 error
1869 : */
1870 :
1871 : static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
1872 : pgcache_page_no_t page, uint fill_pattern)
1873 683961 : {
1874 : pgcache_page_no_t bitmap_page;
1875 : uint offset_page, offset, tmp, org_tmp;
1876 : uchar *data;
1877 683961 : DBUG_ENTER("set_page_bits");
1878 :
1879 683961 : bitmap_page= page - page % bitmap->pages_covered;
1880 683961 : if (bitmap_page != bitmap->page &&
1881 : _ma_change_bitmap_page(info, bitmap, bitmap_page))
1882 0 : DBUG_RETURN(1);
1883 :
1884 : /* Find page number from start of bitmap */
1885 683961 : offset_page= (uint) (page - bitmap->page - 1);
1886 : /*
1887 : Mark place used by reading/writing 2 bytes at a time to handle
1888 : bitmaps in overlapping bytes
1889 : */
1890 683961 : offset_page*= 3;
1891 683961 : offset= offset_page & 7;
1892 683961 : data= bitmap->map + offset_page / 8;
1893 683961 : org_tmp= tmp= uint2korr(data);
1894 683961 : tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
1895 683961 : if (tmp == org_tmp)
1896 373333 : DBUG_RETURN(0); /* No changes */
1897 310628 : int2store(data, tmp);
1898 :
1899 310628 : bitmap->changed= 1;
1900 310628 : DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
1901 310628 : if (fill_pattern != 3 && fill_pattern != 7)
1902 240231 : set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page);
1903 : /*
1904 : Note that if the condition above is false (page is full), and all pages of
1905 : this bitmap are now full, and that bitmap page was
1906 : first_bitmap_with_space, we don't modify first_bitmap_with_space, indeed
1907 : its value still tells us where to start our search for a bitmap with space
1908 : (which is for sure after this full one).
1909 : That does mean that first_bitmap_with_space is only a lower bound.
1910 : */
1911 310628 : DBUG_RETURN(0);
1912 : }
1913 :
1914 :
1915 : /*
1916 : Get bitmap pattern for a given page
1917 :
1918 : SYNOPSIS
1919 : get_page_bits()
1920 : info Maria handler
1921 : bitmap Bitmap handler
1922 : page Page number
1923 :
1924 : RETURN
1925 : 0-7 Bitmap pattern
1926 : ~0 Error (couldn't read page)
1927 : */
1928 :
1929 : uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
1930 : pgcache_page_no_t page)
1931 57691 : {
1932 : pgcache_page_no_t bitmap_page;
1933 : uint offset_page, offset, tmp;
1934 : uchar *data;
1935 57691 : DBUG_ENTER("_ma_bitmap_get_page_bits");
1936 :
1937 57691 : bitmap_page= page - page % bitmap->pages_covered;
1938 57691 : if (bitmap_page != bitmap->page &&
1939 : _ma_change_bitmap_page(info, bitmap, bitmap_page))
1940 0 : DBUG_RETURN(~ (uint) 0);
1941 :
1942 : /* Find page number from start of bitmap */
1943 57691 : offset_page= (uint) (page - bitmap->page - 1);
1944 : /*
1945 : Mark place used by reading/writing 2 bytes at a time to handle
1946 : bitmaps in overlapping bytes
1947 : */
1948 57691 : offset_page*= 3;
1949 57691 : offset= offset_page & 7;
1950 57691 : data= bitmap->map + offset_page / 8;
1951 57691 : tmp= uint2korr(data);
1952 57691 : DBUG_RETURN((tmp >> offset) & 7);
1953 : }
1954 :
1955 :
1956 : /*
1957 : Mark all pages in a region as free
1958 :
1959 : SYNOPSIS
1960 : _ma_bitmap_reset_full_page_bits()
1961 : info Maria handler
1962 : bitmap Bitmap handler
1963 : page Start page
1964 : page_count Number of pages
1965 :
1966 : NOTES
1967 : We assume that all pages in region is covered by same bitmap
1968 : One must have a lock on info->s->bitmap.bitmap_lock
1969 :
1970 : RETURN
1971 : 0 ok
1972 : 1 Error (when reading bitmap)
1973 : */
1974 :
1975 : my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info,
1976 : MARIA_FILE_BITMAP *bitmap,
1977 : pgcache_page_no_t page,
1978 : uint page_count)
1979 13829 : {
1980 : ulonglong bitmap_page;
1981 : uint offset, bit_start, bit_count, tmp;
1982 : uchar *data;
1983 13829 : DBUG_ENTER("_ma_bitmap_reset_full_page_bits");
1984 13829 : DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count));
1985 13829 : safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock);
1986 :
1987 13829 : bitmap_page= page - page % bitmap->pages_covered;
1988 13829 : DBUG_ASSERT(page != bitmap_page);
1989 :
1990 13829 : if (bitmap_page != bitmap->page &&
1991 : _ma_change_bitmap_page(info, bitmap, bitmap_page))
1992 0 : DBUG_RETURN(1);
1993 :
1994 : /* Find page number from start of bitmap */
1995 13829 : offset= (uint) (page - bitmap->page - 1);
1996 :
1997 : /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */
1998 13829 : bit_start= offset * 3;
1999 13829 : bit_count= page_count * 3;
2000 :
2001 13829 : data= bitmap->map + bit_start / 8;
2002 13829 : offset= bit_start & 7;
2003 :
2004 13829 : tmp= (255 << offset); /* Bits to keep */
2005 13829 : if (bit_count + offset < 8)
2006 : {
2007 : /* Only clear bits between 'offset' and 'offset+bit_count-1' */
2008 4723 : tmp^= (255 << (offset + bit_count));
2009 : }
2010 13829 : *data&= ~tmp;
2011 :
2012 13829 : if ((int) (bit_count-= (8 - offset)) > 0)
2013 : {
2014 : uint fill;
2015 7729 : data++;
2016 : /*
2017 : -1 is here to avoid one 'if' statement and to let the following code
2018 : handle the last byte
2019 : */
2020 7729 : if ((fill= (bit_count - 1) / 8))
2021 : {
2022 517 : bzero(data, fill);
2023 517 : data+= fill;
2024 : }
2025 7729 : bit_count-= fill * 8; /* Bits left to clear */
2026 7729 : tmp= (1 << bit_count) - 1;
2027 7729 : *data&= ~tmp;
2028 : }
2029 13829 : set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page);
2030 13829 : bitmap->changed= 1;
2031 13829 : DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
2032 13829 : DBUG_RETURN(0);
2033 : }
2034 :
2035 : /*
2036 : Set all pages in a region as used
2037 :
2038 : SYNOPSIS
2039 : _ma_bitmap_set_full_page_bits()
2040 : info Maria handler
2041 : bitmap Bitmap handler
2042 : page Start page
2043 : page_count Number of pages
2044 :
2045 : NOTES
2046 : We assume that all pages in region is covered by same bitmap
2047 : One must have a lock on info->s->bitmap.bitmap_lock
2048 :
2049 : RETURN
2050 : 0 ok
2051 : 1 Error (when reading bitmap)
2052 : */
2053 :
2054 : my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
2055 : MARIA_FILE_BITMAP *bitmap,
2056 : pgcache_page_no_t page, uint page_count)
2057 6966 : {
2058 : ulonglong bitmap_page;
2059 : uint offset, bit_start, bit_count, tmp;
2060 : uchar *data;
2061 6966 : DBUG_ENTER("_ma_bitmap_set_full_page_bits");
2062 6966 : DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count));
2063 6966 : safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock);
2064 :
2065 6966 : bitmap_page= page - page % bitmap->pages_covered;
2066 6966 : if (bitmap_page != bitmap->page &&
2067 : _ma_change_bitmap_page(info, bitmap, bitmap_page))
2068 0 : DBUG_RETURN(1);
2069 :
2070 : /* Find page number from start of bitmap */
2071 6966 : offset= (uint) (page - bitmap->page - 1);
2072 :
2073 : /* Set bits from 'page * 3' -> '(page + page_count) * 3' */
2074 6966 : bit_start= offset * 3;
2075 6966 : bit_count= page_count * 3;
2076 :
2077 6966 : data= bitmap->map + bit_start / 8;
2078 6966 : offset= bit_start & 7;
2079 :
2080 6966 : tmp= (255 << offset); /* Bits to keep */
2081 6966 : if (bit_count + offset < 8)
2082 : {
2083 : /* Only set bits between 'offset' and 'offset+bit_count-1' */
2084 1393 : tmp^= (255 << (offset + bit_count));
2085 : }
2086 6966 : *data|= tmp;
2087 :
2088 6966 : if ((int) (bit_count-= (8 - offset)) > 0)
2089 : {
2090 : uint fill;
2091 5133 : data++;
2092 : /*
2093 : -1 is here to avoid one 'if' statement and to let the following code
2094 : handle the last byte
2095 : */
2096 5133 : if ((fill= (bit_count - 1) / 8))
2097 : {
2098 204 : bfill(data, fill, 255);
2099 204 : data+= fill;
2100 : }
2101 5133 : bit_count-= fill * 8; /* Bits left to set */
2102 5133 : tmp= (1 << bit_count) - 1;
2103 5133 : *data|= tmp;
2104 : }
2105 6966 : bitmap->changed= 1;
2106 6966 : DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
2107 6966 : DBUG_RETURN(0);
2108 : }
2109 :
2110 :
2111 : /**
2112 : @brief
2113 : Make a transition of MARIA_FILE_BITMAP::non_flushable.
2114 : If the bitmap becomes flushable, which requires that REDO-UNDO has been
2115 : logged and all bitmap pages touched by the thread have a correct
2116 : allocation, it unpins all bitmap pages, and if _ma_bitmap_flush_all() is
2117 : waiting (in practice it is a checkpoint), it wakes it up.
2118 : If the bitmap becomes or stays unflushable, the function merely records it
2119 : unless a concurrent _ma_bitmap_flush_all() is happening, in which case the
2120 : function first waits for the flush to be done.
2121 :
2122 : @note
2123 : this sets info->non_flushable_state to 1 if we have incremented
2124 : bitmap->non_flushable and not yet decremented it.
2125 :
2126 : @param share Table's share
2127 : @param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable
2128 : (-1 or +1).
2129 : */
2130 :
2131 : void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
2132 661806 : {
2133 661806 : MARIA_SHARE *share= info->s;
2134 : MARIA_FILE_BITMAP *bitmap;
2135 661806 : DBUG_ENTER("_ma_bitmap_flushable");
2136 :
2137 : /*
2138 : Not transactional tables are never automaticly flushed and needs no
2139 : protection
2140 : */
2141 661806 : if (!share->now_transactional)
2142 246717 : DBUG_VOID_RETURN;
2143 :
2144 415089 : bitmap= &share->bitmap;
2145 415089 : if (non_flushable_inc == -1)
2146 : {
2147 130899 : pthread_mutex_lock(&bitmap->bitmap_lock);
2148 130899 : DBUG_ASSERT((int) bitmap->non_flushable > 0);
2149 130899 : DBUG_ASSERT(info->non_flushable_state == 1);
2150 130899 : info->non_flushable_state= 0;
2151 130899 : if (--bitmap->non_flushable == 0)
2152 : {
2153 : /*
2154 : We unlock and unpin pages locked and pinned by other threads. It does
2155 : not seem to be an issue as all bitmap changes are serialized with
2156 : the bitmap's mutex.
2157 : */
2158 130899 : _ma_bitmap_unpin_all(share);
2159 130899 : if (unlikely(bitmap->flush_all_requested))
2160 : {
2161 0 : DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
2162 0 : pthread_cond_broadcast(&bitmap->bitmap_cond);
2163 : }
2164 : }
2165 130899 : DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
2166 130899 : pthread_mutex_unlock(&bitmap->bitmap_lock);
2167 130899 : DBUG_VOID_RETURN;
2168 : }
2169 284190 : DBUG_ASSERT(non_flushable_inc == 1);
2170 284190 : DBUG_ASSERT(info->non_flushable_state == 0);
2171 284190 : pthread_mutex_lock(&bitmap->bitmap_lock);
2172 568380 : while (unlikely(bitmap->flush_all_requested))
2173 : {
2174 : /*
2175 : Some other thread is waiting for the bitmap to become
2176 : flushable. Not the moment to make the bitmap unflushable or more
2177 : unflushable; let's rather back off and wait. If we didn't do this, with
2178 : multiple writers, there may always be one thread causing the bitmap to
2179 : be unflushable and _ma_bitmap_flush_all() would wait for long.
2180 : There should not be a deadlock because if our thread increased
2181 : non_flushable (and thus _ma_bitmap_flush_all() is waiting for at least
2182 : our thread), it is not going to increase it more so is not going to come
2183 : here.
2184 : */
2185 0 : DBUG_PRINT("info", ("waiting for bitmap flusher"));
2186 0 : pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
2187 : }
2188 284190 : bitmap->non_flushable++;
2189 284190 : info->non_flushable_state= 1;
2190 284190 : DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
2191 284190 : pthread_mutex_unlock(&bitmap->bitmap_lock);
2192 284190 : DBUG_VOID_RETURN;
2193 : }
2194 :
2195 :
2196 : /*
2197 : Correct bitmap pages to reflect the true allocation
2198 :
2199 : SYNOPSIS
2200 : _ma_bitmap_release_unused()
2201 : info Maria handle
2202 : blocks Bitmap blocks
2203 :
2204 : IMPLEMENTATION
2205 : If block->used & BLOCKUSED_TAIL is set:
2206 : If block->used & BLOCKUSED_USED is set, then the bits for the
2207 : corresponding page is set according to block->empty_space
2208 : If block->used & BLOCKUSED_USED is not set, then the bits for
2209 : the corresponding page is set to org_bitmap_value;
2210 :
2211 : If block->used & BLOCKUSED_TAIL is not set:
2212 : if block->used is not set, the bits for the corresponding page are
2213 : cleared
2214 :
2215 : For the first block (head block) the logic is same as for a tail block
2216 :
2217 : Note that we may have 'filler blocks' that are used to split a block
2218 : in half; These can be recognized by that they have page_count == 0.
2219 :
2220 : RETURN
2221 : 0 ok
2222 : 1 error (Couldn't write or read bitmap page)
2223 : */
2224 :
2225 : my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
2226 315914 : {
2227 315914 : MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count;
2228 315914 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
2229 : uint bits, current_bitmap_value;
2230 315914 : DBUG_ENTER("_ma_bitmap_release_unused");
2231 :
2232 : /*
2233 : We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full'
2234 : when we allocated space in the page
2235 : */
2236 315914 : current_bitmap_value= FULL_HEAD_PAGE;
2237 :
2238 315914 : pthread_mutex_lock(&bitmap->bitmap_lock);
2239 :
2240 : /* First handle head block */
2241 315914 : if (block->used & BLOCKUSED_USED)
2242 : {
2243 315914 : DBUG_PRINT("info", ("head page: %lu empty_space: %u",
2244 : (ulong) block->page, block->empty_space));
2245 315914 : bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space);
2246 315914 : if (block->used & BLOCKUSED_USE_ORG_BITMAP)
2247 28014 : current_bitmap_value= block->org_bitmap_value;
2248 : }
2249 : else
2250 0 : bits= block->org_bitmap_value;
2251 315914 : if (bits != current_bitmap_value)
2252 : {
2253 285907 : if (set_page_bits(info, bitmap, block->page, bits))
2254 : goto err;
2255 : }
2256 : else
2257 : {
2258 30007 : DBUG_ASSERT(current_bitmap_value ==
2259 : _ma_bitmap_get_page_bits(info, bitmap, block->page));
2260 : }
2261 :
2262 : /* Handle all full pages and tail pages (for head page and blob) */
2263 421588 : for (block++; block < end; block++)
2264 : {
2265 : uint page_count;
2266 105674 : if (!block->page_count)
2267 105674 : continue; /* Skip 'filler blocks' */
2268 :
2269 105674 : page_count= block->page_count;
2270 105674 : if (block->used & BLOCKUSED_TAIL)
2271 : {
2272 3298 : current_bitmap_value= FULL_TAIL_PAGE;
2273 : /* The bitmap page is only one page */
2274 3298 : page_count= 1;
2275 3298 : if (block->used & BLOCKUSED_USED)
2276 : {
2277 3298 : DBUG_PRINT("info", ("tail page: %lu empty_space: %u",
2278 : (ulong) block->page, block->empty_space));
2279 3298 : bits= free_size_to_tail_pattern(bitmap, block->empty_space);
2280 3298 : if (block->used & BLOCKUSED_USE_ORG_BITMAP)
2281 0 : current_bitmap_value= block->org_bitmap_value;
2282 : }
2283 : else
2284 0 : bits= block->org_bitmap_value;
2285 :
2286 : /*
2287 : The page has all bits set; The following test is an optimization
2288 : to not set the bits to the same value as before.
2289 : */
2290 3298 : if (bits != current_bitmap_value &&
2291 : set_page_bits(info, bitmap, block->page, bits))
2292 : goto err;
2293 : }
2294 102376 : else if (!(block->used & BLOCKUSED_USED) &&
2295 : _ma_bitmap_reset_full_page_bits(info, bitmap,
2296 : block->page, page_count))
2297 105674 : goto err;
2298 : }
2299 :
2300 : /* This duplicates ma_bitmap_flushable(-1) except it already has mutex */
2301 315914 : if (info->non_flushable_state)
2302 : {
2303 153291 : DBUG_ASSERT(((int) (bitmap->non_flushable)) > 0);
2304 153291 : info->non_flushable_state= 0;
2305 153291 : if (--bitmap->non_flushable == 0)
2306 : {
2307 153291 : _ma_bitmap_unpin_all(info->s);
2308 153291 : if (unlikely(bitmap->flush_all_requested))
2309 : {
2310 0 : DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
2311 0 : pthread_cond_broadcast(&bitmap->bitmap_cond);
2312 : }
2313 : }
2314 : }
2315 315914 : DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
2316 :
2317 315914 : pthread_mutex_unlock(&bitmap->bitmap_lock);
2318 315914 : DBUG_RETURN(0);
2319 :
2320 0 : err:
2321 0 : pthread_mutex_unlock(&bitmap->bitmap_lock);
2322 0 : DBUG_RETURN(1);
2323 : }
2324 :
2325 :
2326 : /*
2327 : Free full pages from bitmap and pagecache
2328 :
2329 : SYNOPSIS
2330 : _ma_bitmap_free_full_pages()
2331 : info Maria handle
2332 : extents Extents (as stored on disk)
2333 : count Number of extents
2334 :
2335 : IMPLEMENTATION
2336 : Mark all full pages (not tails) from extents as free, both in bitmap
2337 : and page cache.
2338 :
2339 : RETURN
2340 : 0 ok
2341 : 1 error (Couldn't write or read bitmap page)
2342 : */
2343 :
2344 : my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents,
2345 : uint count)
2346 3484 : {
2347 3484 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
2348 3484 : DBUG_ENTER("_ma_bitmap_free_full_pages");
2349 :
2350 3484 : pthread_mutex_lock(&bitmap->bitmap_lock);
2351 9783 : for (; count--; extents+= ROW_EXTENT_SIZE)
2352 : {
2353 6299 : pgcache_page_no_t page= uint5korr(extents);
2354 : uint page_count= (uint2korr(extents + ROW_EXTENT_PAGE_SIZE) &
2355 6299 : ~START_EXTENT_BIT);
2356 6299 : if (!(page_count & TAIL_BIT))
2357 : {
2358 3480 : if (page == 0 && page_count == 0)
2359 3480 : continue; /* Not used extent */
2360 3480 : if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page,
2361 : page_count, PAGECACHE_LOCK_WRITE, 1) ||
2362 : _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count))
2363 : {
2364 0 : pthread_mutex_unlock(&bitmap->bitmap_lock);
2365 0 : DBUG_RETURN(1);
2366 : }
2367 : }
2368 : }
2369 3484 : pthread_mutex_unlock(&bitmap->bitmap_lock);
2370 3484 : DBUG_RETURN(0);
2371 : }
2372 :
2373 :
2374 : /*
2375 : Mark in the bitmap how much free space there is on a page
2376 :
2377 : SYNOPSIS
2378 : _ma_bitmap_set()
2379 : info Maria handler
2380 : page Adress to page
2381 : head 1 if page is a head page, 0 if tail page
2382 : empty_space How much empty space there is on page
2383 :
2384 : RETURN
2385 : 0 ok
2386 : 1 error
2387 : */
2388 :
2389 : my_bool _ma_bitmap_set(MARIA_HA *info, pgcache_page_no_t page, my_bool head,
2390 : uint empty_space)
2391 395315 : {
2392 395315 : MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
2393 : uint bits;
2394 : my_bool res;
2395 395315 : DBUG_ENTER("_ma_bitmap_set");
2396 :
2397 395315 : pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
2398 395315 : bits= (head ?
2399 : _ma_free_size_to_head_pattern(bitmap, empty_space) :
2400 : free_size_to_tail_pattern(bitmap, empty_space));
2401 395315 : res= set_page_bits(info, bitmap, page, bits);
2402 395315 : pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
2403 395315 : DBUG_RETURN(res);
2404 : }
2405 :
2406 :
2407 : /*
2408 : Check that bitmap pattern is correct for a page
2409 :
2410 : NOTES
2411 : Used in maria_chk
2412 :
2413 : SYNOPSIS
2414 : _ma_check_bitmap_data()
2415 : info Maria handler
2416 : page_type What kind of page this is
2417 : page Adress to page
2418 : empty_space Empty space on page
2419 : bitmap_pattern Store here the pattern that was in the bitmap for the
2420 : page. This is always updated.
2421 :
2422 : RETURN
2423 : 0 ok
2424 : 1 error
2425 : */
2426 :
2427 : my_bool _ma_check_bitmap_data(MARIA_HA *info,
2428 : enum en_page_type page_type, pgcache_page_no_t page,
2429 : uint empty_space, uint *bitmap_pattern)
2430 1416 : {
2431 : uint bits;
2432 1416 : switch (page_type) {
2433 : case UNALLOCATED_PAGE:
2434 : case MAX_PAGE_TYPE:
2435 0 : bits= 0;
2436 0 : break;
2437 : case HEAD_PAGE:
2438 967 : bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space);
2439 967 : break;
2440 : case TAIL_PAGE:
2441 63 : bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space);
2442 63 : break;
2443 : case BLOB_PAGE:
2444 386 : bits= FULL_TAIL_PAGE;
2445 386 : break;
2446 : default:
2447 0 : bits= 0; /* to satisfy compiler */
2448 0 : DBUG_ASSERT(0);
2449 : }
2450 1416 : return ((*bitmap_pattern= _ma_bitmap_get_page_bits(info, &info->s->bitmap,
2451 : page)) != bits);
2452 : }
2453 :
2454 :
2455 : /*
2456 : Check if the page type matches the one that we have in the bitmap
2457 :
2458 : SYNOPSIS
2459 : _ma_check_if_right_bitmap_type()
2460 : info Maria handler
2461 : page_type What kind of page this is
2462 : page Adress to page
2463 : bitmap_pattern Store here the pattern that was in the bitmap for the
2464 : page. This is always updated.
2465 :
2466 : NOTES
2467 : Used in maria_chk
2468 :
2469 : RETURN
2470 : 0 ok
2471 : 1 error
2472 : */
2473 :
2474 : my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
2475 : enum en_page_type page_type,
2476 : pgcache_page_no_t page,
2477 : uint *bitmap_pattern)
2478 493 : {
2479 493 : if ((*bitmap_pattern= _ma_bitmap_get_page_bits(info, &info->s->bitmap,
2480 : page)) > 7)
2481 0 : return 1; /* Couldn't read page */
2482 493 : switch (page_type) {
2483 : case HEAD_PAGE:
2484 0 : return *bitmap_pattern < 1 || *bitmap_pattern > 4;
2485 : case TAIL_PAGE:
2486 107 : return *bitmap_pattern < 5;
2487 : case BLOB_PAGE:
2488 386 : return *bitmap_pattern != 7;
2489 : default:
2490 : break;
2491 : }
2492 0 : DBUG_ASSERT(0);
2493 : return 1;
2494 : }
2495 :
2496 :
2497 : /**
2498 : @brief create the first bitmap page of a freshly created data file
2499 :
2500 : @param share table's share
2501 :
2502 : @return Operation status
2503 : @retval 0 OK
2504 : @retval !=0 Error
2505 : */
2506 :
2507 : int _ma_bitmap_create_first(MARIA_SHARE *share)
2508 569 : {
2509 569 : uint block_size= share->bitmap.block_size;
2510 569 : File file= share->bitmap.file.file;
2511 : uchar marker[CRC_SIZE];
2512 :
2513 : /*
2514 : Next write operation of the page will write correct CRC
2515 : if it is needed
2516 : */
2517 569 : int4store(marker, MARIA_NO_CRC_BITMAP_PAGE);
2518 :
2519 569 : if (my_chsize(file, block_size - sizeof(marker),
2520 : 0, MYF(MY_WME)) ||
2521 : my_pwrite(file, marker, sizeof(marker),
2522 : block_size - sizeof(marker),
2523 : MYF(MY_NABP | MY_WME)))
2524 0 : return 1;
2525 569 : share->state.state.data_file_length= block_size;
2526 569 : _ma_bitmap_delete_all(share);
2527 569 : return 0;
2528 : }
2529 :
2530 :
2531 : /**
2532 : @brief Pagecache callback to get the TRANSLOG_ADDRESS to flush up to, when a
2533 : bitmap page needs to be flushed.
2534 :
2535 : @param page Page's content
2536 : @param page_no Page's number (<offset>/<page length>)
2537 : @param data_ptr Callback data pointer (pointer to MARIA_SHARE)
2538 :
2539 : @retval TRANSLOG_ADDRESS to flush up to.
2540 : */
2541 :
2542 : static my_bool
2543 : flush_log_for_bitmap(uchar *page __attribute__((unused)),
2544 : pgcache_page_no_t page_no __attribute__((unused)),
2545 : uchar *data_ptr __attribute__((unused)))
2546 441 : {
2547 : #ifndef DBUG_OFF
2548 441 : const MARIA_SHARE *share= (MARIA_SHARE*)data_ptr;
2549 : #endif
2550 441 : DBUG_ENTER("flush_log_for_bitmap");
2551 441 : DBUG_ASSERT(share->now_transactional);
2552 : /*
2553 : WAL imposes that UNDOs reach disk before bitmap is flushed. We don't know
2554 : the LSN of the last UNDO about this bitmap page, so we flush whole log.
2555 : */
2556 441 : DBUG_RETURN(translog_flush(translog_get_horizon()));
2557 : }
2558 :
2559 :
2560 : /**
2561 : @brief Set callbacks for bitmap pages
2562 :
2563 : @note
2564 : We don't use pagecache_file_init here, as we want to keep the
2565 : code readable
2566 : */
2567 :
2568 : void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file,
2569 : MARIA_SHARE *share)
2570 4380 : {
2571 4380 : file->callback_data= (uchar*) share;
2572 4380 : file->flush_log_callback= maria_flush_log_for_page_none;
2573 4380 : file->write_fail= maria_page_write_failure;
2574 :
2575 4380 : if (share->temporary)
2576 : {
2577 55 : file->read_callback= &maria_page_crc_check_none;
2578 55 : file->write_callback= &maria_page_filler_set_none;
2579 : }
2580 : else
2581 : {
2582 4325 : file->read_callback= &maria_page_crc_check_bitmap;
2583 4325 : if (share->options & HA_OPTION_PAGE_CHECKSUM)
2584 3910 : file->write_callback= &maria_page_crc_set_normal;
2585 : else
2586 415 : file->write_callback= &maria_page_filler_set_bitmap;
2587 4325 : if (share->now_transactional)
2588 3323 : file->flush_log_callback= flush_log_for_bitmap;
2589 : }
2590 : }
2591 :
2592 :
2593 : /**
2594 : Extends data file with zeroes and creates new bitmap pages into page cache.
2595 :
2596 : Writes all bitmap pages in [from, to].
2597 :
2598 : Non-bitmap pages of zeroes are correct as they are marked empty in
2599 : bitmaps. Bitmap pages will not be zeroes: they will get their CRC fixed when
2600 : flushed. And if there is a crash before flush (so they are zeroes at
2601 : restart), a REDO will re-create them in page cache.
2602 : */
2603 :
2604 : static my_bool
2605 : _ma_bitmap_create_missing_into_pagecache(MARIA_SHARE *share,
2606 : MARIA_FILE_BITMAP *bitmap,
2607 : pgcache_page_no_t from,
2608 : pgcache_page_no_t to,
2609 : uchar *zeroes)
2610 0 : {
2611 : pgcache_page_no_t i;
2612 : /*
2613 : We do not use my_chsize() because there can be a race between when it
2614 : reads the physical size and when it writes (assume data_file_length is 10,
2615 : physical length is 8 and two data pages are in cache, and here we do a
2616 : my_chsize: my_chsize sees physical length is 8, then the two data pages go
2617 : to disk then my_chsize writes from page 8 and so overwrites the two data
2618 : pages, wrongly).
2619 : We instead rely on the filesystem filling gaps with zeroes.
2620 : */
2621 0 : for (i= from; i <= to; i+= bitmap->pages_covered)
2622 : {
2623 : /**
2624 : No need to keep them pinned, they are new so flushable.
2625 : @todo but we may want to keep them pinned, as an optimization: if they
2626 : are not pinned they may go to disk before the data pages go (so, the
2627 : physical pages would be in non-ascending "sparse" order on disk), or the
2628 : filesystem may fill gaps with zeroes physically which is a waste of
2629 : time.
2630 : */
2631 0 : if (pagecache_write(share->pagecache,
2632 : &bitmap->file, i, 0,
2633 : zeroes, PAGECACHE_PLAIN_PAGE,
2634 : PAGECACHE_LOCK_LEFT_UNLOCKED,
2635 : PAGECACHE_PIN_LEFT_UNPINNED,
2636 : PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE))
2637 0 : goto err;
2638 : }
2639 : /*
2640 : Data pages after data_file_length are full of zeroes but that is allowed
2641 : as they are marked empty in the bitmap.
2642 : */
2643 0 : return FALSE;
2644 0 : err:
2645 0 : return TRUE;
2646 : }
2647 :
2648 :
2649 : /**
2650 : Creates missing bitmaps when we extend the data file.
2651 :
2652 : At run-time, when we need a new bitmap page we come here; and only one bitmap
2653 : page at a time is created.
2654 :
2655 : In some recovery cases we insert at a large offset in the data file, way
2656 : beyond state.data_file_length, so can need to create more than one bitmap
2657 : page in one go. Known case is:
2658 : Start a transaction in Maria;
2659 : delete last row of very large table (with delete_row)
2660 : do a bulk insert
2661 : crash
2662 : Then UNDO_BULK_INSERT will truncate table files, and
2663 : UNDO_ROW_DELETE will want to put the row back to its original position,
2664 : extending the data file a lot: bitmap page*s* in the hole must be created,
2665 : or he table would look corrupted.
2666 :
2667 : We need to log REDOs for bitmap creation, consider: we apply a REDO for a
2668 : data page, which creates the first data page covered by a new bitmap
2669 : not yet created. If the data page is flushed but the bitmap page is not and
2670 : there is a crash, re-execution of the REDO will complain about the zeroed
2671 : bitmap page (see it as corruption). Thus a REDO is needed to re-create the
2672 : bitmap.
2673 :
2674 : @param info Maria handler
2675 : @param bitmap Bitmap handler
2676 : @param page Last bitmap page to create
2677 :
2678 : @note When this function is called this must be true:
2679 : ((page + 1) * bitmap->block_size > info->s->state.state.data_file_length)
2680 :
2681 : */
2682 :
2683 : static my_bool _ma_bitmap_create_missing(MARIA_HA *info,
2684 : MARIA_FILE_BITMAP *bitmap,
2685 : pgcache_page_no_t page)
2686 15 : {
2687 15 : MARIA_SHARE *share= info->s;
2688 15 : uint block_size= bitmap->block_size;
2689 : pgcache_page_no_t from, to;
2690 15 : my_off_t data_file_length= share->state.state.data_file_length;
2691 15 : DBUG_ENTER("_ma_bitmap_create_missing");
2692 :
2693 : /* First (in offset order) bitmap page to create */
2694 15 : if (data_file_length < block_size)
2695 15 : goto err; /* corrupted, should have first bitmap page */
2696 :
2697 15 : from= (data_file_length / block_size - 1) / bitmap->pages_covered + 1;
2698 15 : from*= bitmap->pages_covered;
2699 : /*
2700 : page>=from because:
2701 : (page + 1) * bs > dfl, and page == k * pc so:
2702 : (k * pc + 1) * bs > dfl; k * pc + 1 > dfl / bs; k * pc > dfl / bs - 1
2703 : k > (dfl / bs - 1) / pc; k >= (dfl / bs - 1) / pc + 1
2704 : k * pc >= ((dfl / bs - 1) / pc + 1) * pc == from.
2705 : */
2706 15 : DBUG_ASSERT(page >= from);
2707 :
2708 15 : if (share->now_transactional)
2709 : {
2710 : LSN lsn;
2711 : uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2];
2712 : LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
2713 0 : page_store(log_data + FILEID_STORE_SIZE, from);
2714 0 : page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
2715 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
2716 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
2717 : /*
2718 : We don't use info->trn so that this REDO is always executed even though
2719 : the UNDO does not reach disk due to crash. This is also consistent with
2720 : the fact that the new bitmap pages are not pinned.
2721 : */
2722 0 : if (translog_write_record(&lsn, LOGREC_REDO_BITMAP_NEW_PAGE,
2723 : &dummy_transaction_object, info,
2724 : (translog_size_t)sizeof(log_data),
2725 : TRANSLOG_INTERNAL_PARTS + 1, log_array,
2726 : log_data, NULL))
2727 15 : goto err;
2728 : /*
2729 : No need to flush the log: the bitmap pages we are going to create will
2730 : flush it when they go to disk.
2731 : */
2732 : }
2733 :
2734 : /*
2735 : Last bitmap page. It has special creation: will go to the page cache
2736 : only later as we are going to modify it very soon.
2737 : */
2738 15 : bzero(bitmap->map, bitmap->block_size);
2739 15 : bitmap->used_size= 0;
2740 : #ifndef DBUG_OFF
2741 15 : memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
2742 : #endif
2743 :
2744 : /* Last bitmap page to create before 'page' */
2745 15 : DBUG_ASSERT(page >= bitmap->pages_covered);
2746 15 : to= page - bitmap->pages_covered;
2747 : /*
2748 : In run-time situations, from>=to is always false, i.e. we always create
2749 : one bitmap at a time ('page').
2750 : */
2751 15 : if ((from <= to) &&
2752 : _ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to,
2753 : bitmap->map))
2754 15 : goto err;
2755 :
2756 15 : share->state.state.data_file_length= (page + 1) * bitmap->block_size;
2757 :
2758 15 : DBUG_RETURN(FALSE);
2759 0 : err:
2760 0 : DBUG_RETURN(TRUE);
2761 : }
2762 :
2763 :
2764 : my_bool _ma_apply_redo_bitmap_new_page(MARIA_HA *info,
2765 : LSN lsn __attribute__ ((unused)),
2766 : const uchar *header)
2767 0 : {
2768 0 : MARIA_SHARE *share= info->s;
2769 0 : MARIA_FILE_BITMAP *bitmap= &share->bitmap;
2770 : my_bool error;
2771 : pgcache_page_no_t from, to, min_from;
2772 0 : DBUG_ENTER("_ma_apply_redo_bitmap_new_page");
2773 :
2774 0 : from= page_korr(header);
2775 0 : to= page_korr(header + PAGE_STORE_SIZE);
2776 0 : DBUG_PRINT("info", ("from: %lu to: %lu", (ulong)from, (ulong)to));
2777 0 : if ((from > to) ||
2778 : (from % bitmap->pages_covered) != 0 ||
2779 : (to % bitmap->pages_covered) != 0)
2780 : {
2781 0 : error= TRUE; /* corrupted log record */
2782 0 : goto err;
2783 : }
2784 :
2785 0 : min_from= (share->state.state.data_file_length / bitmap->block_size - 1) /
2786 : bitmap->pages_covered + 1;
2787 0 : min_from*= bitmap->pages_covered;
2788 0 : if (from < min_from)
2789 : {
2790 0 : DBUG_PRINT("info", ("overwrite bitmap pages from %lu", (ulong)min_from));
2791 : /*
2792 : We have to overwrite. It could be that there was a bitmap page in
2793 : memory, covering a data page which went to disk, then crash: the
2794 : bitmap page is now full of zeros and is ==min_from, we have to overwrite
2795 : it with correct checksum.
2796 : */
2797 : }
2798 0 : share->state.changed|= STATE_CHANGED;
2799 0 : bzero(info->buff, bitmap->block_size);
2800 0 : if (!(error=
2801 : _ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to,
2802 : info->buff)))
2803 0 : share->state.state.data_file_length= (to + 1) * bitmap->block_size;
2804 :
2805 0 : err:
2806 0 : DBUG_RETURN(error);
2807 : }
|