1 : /* Copyright (C) 2007 MySQL AB & Sanja Belkin
2 :
3 : This program is free software; you can redistribute it and/or modify
4 : it under the terms of the GNU General Public License as published by
5 : the Free Software Foundation; version 2 of the License.
6 :
7 : This program is distributed in the hope that it will be useful,
8 : but WITHOUT ANY WARRANTY; without even the implied warranty of
9 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 : GNU General Public License for more details.
11 :
12 : You should have received a copy of the GNU General Public License
13 : along with this program; if not, write to the Free Software
14 : Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
15 :
16 : #include "maria_def.h"
17 : #include "trnman.h"
18 : #include "ma_blockrec.h" /* for some constants and in-write hooks */
19 : #include "ma_key_recover.h" /* For some in-write hooks */
20 : #include "ma_checkpoint.h"
21 :
22 : /*
23 : On Windows, neither my_open() nor my_sync() work for directories.
24 : Also there is no need to flush filesystem changes ,i.e to sync()
25 : directories.
26 : */
27 : #ifdef __WIN__
28 : #define sync_dir(A,B) 0
29 : #else
30 : #define sync_dir(A,B) my_sync(A,B)
31 : #endif
32 :
33 : /**
34 : @file
35 : @brief Module which writes and reads to a transaction log
36 : */
37 :
38 : /* 0xFF can never be valid first byte of a chunk */
39 : #define TRANSLOG_FILLER 0xFF
40 :
41 : /* number of opened log files in the pagecache (should be at least 2) */
42 : #define OPENED_FILES_NUM 3
43 : #define CACHED_FILES_NUM 5
44 : #define CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT 7
45 : #if CACHED_FILES_NUM > CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT
46 : #include <hash.h>
47 : #include <m_ctype.h>
48 : #endif
49 :
50 : /* transaction log file descriptor */
51 : typedef struct st_translog_file
52 : {
53 : uint32 number;
54 : PAGECACHE_FILE handler;
55 : my_bool was_recovered;
56 : my_bool is_sync;
57 : } TRANSLOG_FILE;
58 :
59 : /* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
60 : #define TRANSLOG_WRITE_BUFFER (1024*1024)
61 : /*
62 : pagecache_read/write/inject() use bmove512() on their buffers so those must
63 : be long-aligned, which we guarantee by using the type below:
64 : */
65 : typedef union
66 : {
67 : ulonglong dummy;
68 : uchar buffer[TRANSLOG_PAGE_SIZE];
69 : } TRANSLOG_PAGE_SIZE_BUFF;
70 :
71 : /* min chunk length */
72 : #define TRANSLOG_MIN_CHUNK 3
73 : /*
74 : Number of buffers used by loghandler
75 :
76 : Should be at least 4, because one thread can block up to 2 buffers in
77 : normal circumstances (less then half of one and full other, or just
78 : switched one and other), But if we met end of the file in the middle and
79 : have to switch buffer it will be 3. + 1 buffer for flushing/writing.
80 : We have a bigger number here for higher concurrency and to make division
81 : faster.
82 :
83 : The number should be power of 2 to be fast.
84 : */
85 : #define TRANSLOG_BUFFERS_NO 8
86 : /* number of bytes (+ header) which can be unused on first page in sequence */
87 : #define TRANSLOG_MINCHUNK_CONTENT 1
88 : /* version of log file */
89 : #define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */
90 :
91 : #define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */
92 :
93 : /* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
94 : #define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
95 : #define MAX_NUMBER_OF_LSNS_PER_RECORD 2
96 :
97 :
98 : /* max lsn calculation for buffer */
99 : #define BUFFER_MAX_LSN(B) \
100 : ((B)->last_lsn == LSN_IMPOSSIBLE ? (B)->prev_last_lsn : (B)->last_lsn)
101 :
102 : /* log write buffer descriptor */
103 : struct st_translog_buffer
104 : {
105 : /*
106 : Cache for current log. Comes first to be aligned for bmove512() in
107 : pagecache_inject()
108 : */
109 : uchar buffer[TRANSLOG_WRITE_BUFFER];
110 : /*
111 : Maximum LSN of records which ends in this buffer (or IMPOSSIBLE_LSN
112 : if no LSNs ends here)
113 : */
114 : LSN last_lsn;
115 : /* last_lsn of previous buffer or IMPOSSIBLE_LSN if it is very first one */
116 : LSN prev_last_lsn;
117 : /* This buffer offset in the file */
118 : TRANSLOG_ADDRESS offset;
119 : /*
120 : Next buffer offset in the file (it is not always offset + size,
121 : in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
122 : */
123 : TRANSLOG_ADDRESS next_buffer_offset;
124 : /* Previous buffer offset to detect it flush finish */
125 : TRANSLOG_ADDRESS prev_buffer_offset;
126 : /*
127 : How much is written (or will be written when copy_to_buffer_in_progress
128 : become 0) to this buffer
129 : */
130 : translog_size_t size;
131 : /* File handler for this buffer */
132 : TRANSLOG_FILE *file;
133 : /* Threads which are waiting for buffer filling/freeing */
134 : pthread_cond_t waiting_filling_buffer;
135 : /* Number of records which are in copy progress */
136 : uint copy_to_buffer_in_progress;
137 : /* list of waiting buffer ready threads */
138 : struct st_my_thread_var *waiting_flush;
139 : /*
140 : If true then previous buffer overlap with this one (due to flush of
141 : loghandler, the last page of that buffer is the same as the first page
142 : of this buffer) and have to be written first (because contain old
143 : content of page which present in both buffers)
144 : */
145 : my_bool overlay;
146 : uint buffer_no;
147 : /*
148 : Lock for the buffer.
149 :
150 : Current buffer also lock the whole handler (if one want lock the handler
151 : one should lock the current buffer).
152 :
153 : Buffers are locked only in one direction (with overflow and beginning
154 : from the first buffer). If we keep lock on buffer N we can lock only
155 : buffer N+1 (never N-1).
156 :
157 : One thread do not lock more then 2 buffer in a time, so to make dead
158 : lock it should be N thread (where N equal number of buffers) takes one
159 : buffer and try to lock next. But it is impossible because there is only
160 : 2 cases when thread take 2 buffers: 1) one thread finishes current
161 : buffer (where horizon is) and start next (to which horizon moves). 2)
162 : flush start from buffer after current (oldest) and go till the current
163 : crabbing by buffer sequence. And there is only one flush in a moment
164 : (they are serialised).
165 :
166 : Because of above and number of buffers equal 5 we can't get dead lock (it is
167 : impossible to get all 5 buffers locked simultaneously).
168 : */
169 : pthread_mutex_t mutex;
170 : /*
171 : Some thread is going to close the buffer and it should be
172 : done only by that thread
173 : */
174 : my_bool is_closing_buffer;
175 : /*
176 : Version of the buffer increases every time buffer the buffer flushed.
177 : With file and offset it allow detect buffer changes
178 : */
179 : uint8 ver;
180 :
181 : /*
182 : When previous buffer sent to disk it set its address here to allow
183 : to detect when it is done
184 : (we have to keep it in this buffer to lock buffers only in one direction).
185 : */
186 : TRANSLOG_ADDRESS prev_sent_to_disk;
187 : pthread_cond_t prev_sent_to_disk_cond;
188 : };
189 :
190 :
191 : struct st_buffer_cursor
192 : {
193 : /* pointer into the buffer */
194 : uchar *ptr;
195 : /* current buffer */
196 : struct st_translog_buffer *buffer;
197 : /* How many bytes we wrote on the current page */
198 : uint16 current_page_fill;
199 : /*
200 : How many times we write the page on the disk during flushing process
201 : (for sector protection).
202 : */
203 : uint16 write_counter;
204 : /* previous write offset */
205 : uint16 previous_offset;
206 : /* Number of current buffer */
207 : uint8 buffer_no;
208 : /*
209 : True if it is just filling buffer after advancing the pointer to
210 : the horizon.
211 : */
212 : my_bool chaser;
213 : /*
214 : Is current page of the cursor already finished (sector protection
215 : should be applied if it is needed)
216 : */
217 : my_bool protected;
218 : };
219 :
220 :
221 : typedef uint8 dirty_buffer_mask_t;
222 :
223 : struct st_translog_descriptor
224 : {
225 : /* *** Parameters of the log handler *** */
226 :
227 : /* Page cache for the log reads */
228 : PAGECACHE *pagecache;
229 : uint flags;
230 : /* File open flags */
231 : uint open_flags;
232 : /* max size of one log size (for new logs creation) */
233 : uint32 log_file_max_size;
234 : uint32 server_version;
235 : /* server ID (used for replication) */
236 : uint32 server_id;
237 : /* Loghandler's buffer capacity in case of chunk 2 filling */
238 : uint32 buffer_capacity_chunk_2;
239 : /*
240 : Half of the buffer capacity in case of chunk 2 filling,
241 : used to decide will we write a record in one group or many.
242 : It is written to the variable just to avoid devision every
243 : time we need it.
244 : */
245 : uint32 half_buffer_capacity_chunk_2;
246 : /* Page overhead calculated by flags (whether CRC is enabled, etc) */
247 : uint16 page_overhead;
248 : /*
249 : Page capacity ("useful load") calculated by flags
250 : (TRANSLOG_PAGE_SIZE - page_overhead-1)
251 : */
252 : uint16 page_capacity_chunk_2;
253 : /* Path to the directory where we store log store files */
254 : char directory[FN_REFLEN];
255 :
256 : /* *** Current state of the log handler *** */
257 : /* list of opened files */
258 : DYNAMIC_ARRAY open_files;
259 : /* min/max number of file in the array */
260 : uint32 max_file, min_file;
261 : /* the opened files list guard */
262 : rw_lock_t open_files_lock;
263 :
264 : /*
265 : File descriptor of the directory where we store log files for syncing
266 : it.
267 : */
268 : File directory_fd;
269 : /* buffers for log writing */
270 : struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
271 : /* Mask where 1 in position N mean that buffer N is not flushed */
272 : dirty_buffer_mask_t dirty_buffer_mask;
273 : /* The above variable protection */
274 : pthread_mutex_t dirty_buffer_mask_lock;
275 : /*
276 : horizon - visible end of the log (here is absolute end of the log:
277 : position where next chunk can start
278 : */
279 : TRANSLOG_ADDRESS horizon;
280 : /* horizon buffer cursor */
281 : struct st_buffer_cursor bc;
282 : /* maximum LSN of the current (not finished) file */
283 : LSN max_lsn;
284 :
285 : /*
286 : Last flushed LSN (protected by log_flush_lock).
287 : Pointers in the log ordered like this:
288 : last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
289 : max_lsn <= horizon
290 : */
291 : LSN flushed;
292 : /* Last LSN sent to the disk (but maybe not written yet) */
293 : LSN sent_to_disk;
294 : /* Horizon from which log started after initialization */
295 : TRANSLOG_ADDRESS log_start;
296 : TRANSLOG_ADDRESS previous_flush_horizon;
297 : /* All what is after this address is not sent to disk yet */
298 : TRANSLOG_ADDRESS in_buffers_only;
299 : /* protection of sent_to_disk and in_buffers_only */
300 : pthread_mutex_t sent_to_disk_lock;
301 : /*
302 : Protect flushed (see above) and for flush serialization (will
303 : be removed in v1.5
304 : */
305 : pthread_mutex_t log_flush_lock;
306 : pthread_cond_t log_flush_cond;
307 :
308 : /* Protects changing of headers of finished files (max_lsn) */
309 : pthread_mutex_t file_header_lock;
310 :
311 : /*
312 : Sorted array (with protection) of files where we started writing process
313 : and so we can't give last LSN yet
314 : */
315 : pthread_mutex_t unfinished_files_lock;
316 : DYNAMIC_ARRAY unfinished_files;
317 :
318 : /*
319 : minimum number of still need file calculeted during last
320 : translog_purge call
321 : */
322 : uint32 min_need_file;
323 : /* Purger data: minimum file in the log (or 0 if unknown) */
324 : uint32 min_file_number;
325 : /* Protect purger from many calls and it's data */
326 : pthread_mutex_t purger_lock;
327 : /* last low water mark checked */
328 : LSN last_lsn_checked;
329 : /**
330 : Must be set to 0 under loghandler lock every time a new LSN
331 : is generated.
332 : */
333 : my_bool is_everything_flushed;
334 : /* True when flush pass is in progress */
335 : my_bool flush_in_progress;
336 : /* Next flush pass variables */
337 : TRANSLOG_ADDRESS next_pass_max_lsn;
338 : pthread_t max_lsn_requester;
339 : };
340 :
341 : static struct st_translog_descriptor log_descriptor;
342 :
343 : ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
344 : ulong log_file_size= TRANSLOG_FILE_SIZE;
345 : ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;
346 :
347 : /* Marker for end of log */
348 : static uchar end_of_log= 0;
349 : #define END_OF_LOG &end_of_log
350 :
351 : enum enum_translog_status translog_status= TRANSLOG_UNINITED;
352 :
353 : /* chunk types */
354 : #define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */
355 : #define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */
356 : #define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */
357 : #define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */
358 : #define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */
359 : #define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */
360 : #define TRANSLOG_CHUNK_0_CONT 0x3F /* the type to mark chunk 0 continue */
361 :
362 : /* compressed (relative) LSN constants */
363 : #define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */
364 :
365 :
366 : #include <my_atomic.h>
367 : /* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
368 : static MARIA_SHARE **id_to_share= NULL;
369 : /* lock for id_to_share */
370 : static my_atomic_rwlock_t LOCK_id_to_share;
371 :
372 : static my_bool translog_dummy_callback(uchar *page,
373 : pgcache_page_no_t page_no,
374 : uchar* data_ptr);
375 : static my_bool translog_page_validator(uchar *page,
376 : pgcache_page_no_t page_no,
377 : uchar* data_ptr);
378 :
379 : static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
380 : static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);
381 : LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
382 :
383 :
384 : /*
385 : Initialize log_record_type_descriptors
386 : */
387 :
388 : LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
389 :
390 :
391 : #ifndef DBUG_OFF
392 :
393 : #define translog_buffer_lock_assert_owner(B) \
394 : safe_mutex_assert_owner(&(B)->mutex)
395 : #define translog_lock_assert_owner() \
396 : safe_mutex_assert_owner(&log_descriptor.bc.buffer->mutex)
397 : void translog_lock_handler_assert_owner()
398 0 : {
399 0 : translog_lock_assert_owner();
400 : }
401 :
402 : /**
403 : @brief check the description table validity
404 :
405 : @param num how many records should be filled
406 : */
407 :
408 : static void check_translog_description_table(int num)
409 785 : {
410 : int i;
411 785 : DBUG_ENTER("check_translog_description_table");
412 785 : DBUG_PRINT("enter", ("last record: %d", num));
413 785 : DBUG_ASSERT(num > 0);
414 : /* last is reserved for extending the table */
415 785 : DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
416 785 : DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
417 :
418 34540 : for (i= 0; i <= num; i++)
419 : {
420 33755 : DBUG_PRINT("info",
421 : ("record type: %d class: %d fixed: %u header: %u LSNs: %u "
422 : "name: %s",
423 : i, log_record_type_descriptor[i].rclass,
424 : (uint)log_record_type_descriptor[i].fixed_length,
425 : (uint)log_record_type_descriptor[i].read_header_len,
426 : (uint)log_record_type_descriptor[i].compressed_LSN,
427 : log_record_type_descriptor[i].name));
428 33755 : switch (log_record_type_descriptor[i].rclass) {
429 : case LOGRECTYPE_NOT_ALLOWED:
430 785 : DBUG_ASSERT(i == 0);
431 : break;
432 : case LOGRECTYPE_VARIABLE_LENGTH:
433 21195 : DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
434 21195 : DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
435 : ((log_record_type_descriptor[i].compressed_LSN == 1) &&
436 : (log_record_type_descriptor[i].read_header_len >=
437 : LSN_STORE_SIZE)) ||
438 : ((log_record_type_descriptor[i].compressed_LSN == 2) &&
439 : (log_record_type_descriptor[i].read_header_len >=
440 : LSN_STORE_SIZE * 2)));
441 : break;
442 : case LOGRECTYPE_PSEUDOFIXEDLENGTH:
443 1570 : DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
444 : log_record_type_descriptor[i].read_header_len);
445 1570 : DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
446 1570 : DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
447 : break;
448 : case LOGRECTYPE_FIXEDLENGTH:
449 10205 : DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
450 : log_record_type_descriptor[i].read_header_len);
451 10205 : DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
452 : break;
453 : default:
454 0 : DBUG_ASSERT(0);
455 : }
456 : }
457 17270 : for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
458 : {
459 16485 : DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
460 : LOGRECTYPE_NOT_ALLOWED);
461 : }
462 785 : DBUG_VOID_RETURN;
463 : }
464 : #else
465 : #define translog_buffer_lock_assert_owner(B) {}
466 : #define translog_lock_assert_owner() {}
467 : #endif
468 :
469 : static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
470 : {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
471 : "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
472 :
473 : static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
474 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
475 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
476 : write_hook_for_redo, NULL, 0,
477 : "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
478 :
479 : static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
480 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
481 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
482 : write_hook_for_redo, NULL, 0,
483 : "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
484 :
485 : static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_HEAD=
486 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
487 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
488 : write_hook_for_redo, NULL, 0,
489 : "redo_new_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
490 :
491 : static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_TAIL=
492 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
493 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
494 : write_hook_for_redo, NULL, 0,
495 : "redo_new_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
496 :
497 : static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
498 : {LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
499 : write_hook_for_redo, NULL, 0,
500 : "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
501 :
502 : static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
503 : {LOGRECTYPE_FIXEDLENGTH,
504 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
505 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
506 : NULL, write_hook_for_redo, NULL, 0,
507 : "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
508 :
509 : static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
510 : {LOGRECTYPE_FIXEDLENGTH,
511 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
512 : FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
513 : NULL, write_hook_for_redo, NULL, 0,
514 : "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
515 :
516 : static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
517 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
518 : FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
519 : NULL, write_hook_for_redo, NULL, 0,
520 : "redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
521 :
522 : static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
523 : {LOGRECTYPE_FIXEDLENGTH,
524 : FILEID_STORE_SIZE + PAGE_STORE_SIZE,
525 : FILEID_STORE_SIZE + PAGE_STORE_SIZE,
526 : NULL, write_hook_for_redo, NULL, 0,
527 : "redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
528 :
529 : /* not yet used; for when we have versioning */
530 : static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
531 : {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
532 : "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
533 :
534 : /** @todo RECOVERY BUG unused, remove? */
535 : static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
536 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
537 : "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
538 :
539 : static LOG_DESC INIT_LOGREC_REDO_INDEX=
540 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
541 : "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
542 :
543 : static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
544 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
545 : FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
546 : NULL, write_hook_for_redo, NULL, 0,
547 : "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
548 :
549 : static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
550 : {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
551 : FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
552 : NULL, write_hook_for_redo, NULL, 0,
553 : "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
554 :
555 : static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
556 : {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
557 : "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
558 :
559 : static LOG_DESC INIT_LOGREC_CLR_END=
560 : {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
561 : CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
562 : "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
563 :
564 : static LOG_DESC INIT_LOGREC_PURGE_END=
565 : {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
566 : "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
567 :
568 : static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
569 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
570 : LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
571 : NULL, write_hook_for_undo_row_insert, NULL, 1,
572 : "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
573 :
574 : static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
575 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
576 : LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
577 : NULL, write_hook_for_undo_row_delete, NULL, 1,
578 : "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
579 :
580 : static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
581 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
582 : LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
583 : NULL, write_hook_for_undo_row_update, NULL, 1,
584 : "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
585 :
586 : static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
587 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
588 : LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
589 : NULL, write_hook_for_undo_key_insert, NULL, 1,
590 : "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
591 :
592 : /* This will never be in the log, only in the clr */
593 : static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
594 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
595 : LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
596 : NULL, write_hook_for_undo_key, NULL, 1,
597 : "undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
598 :
599 : static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
600 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
601 : LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
602 : NULL, write_hook_for_undo_key_delete, NULL, 1,
603 : "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
604 :
605 : static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
606 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
607 : LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
608 : NULL, write_hook_for_undo_key_delete, NULL, 1,
609 : "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
610 :
611 : static LOG_DESC INIT_LOGREC_PREPARE=
612 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
613 : "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
614 :
615 : static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
616 : {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
617 : "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
618 :
619 : static LOG_DESC INIT_LOGREC_COMMIT=
620 : {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
621 : write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
622 : NULL};
623 :
624 : static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
625 : {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
626 : "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
627 :
628 : static LOG_DESC INIT_LOGREC_CHECKPOINT=
629 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
630 : "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
631 :
632 : static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
633 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
634 : "redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
635 :
636 : static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
637 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
638 : "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
639 :
640 : static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
641 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
642 : "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
643 :
644 : static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
645 : {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
646 : NULL, write_hook_for_redo_delete_all, NULL, 0,
647 : "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
648 :
649 : static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
650 : {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 8 + 8, FILEID_STORE_SIZE + 8 + 8,
651 : NULL, NULL, NULL, 0,
652 : "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
653 :
654 : static LOG_DESC INIT_LOGREC_FILE_ID=
655 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
656 : "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
657 :
658 : static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
659 : {LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
660 : "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
661 :
662 : static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
663 : {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
664 : NULL, NULL, NULL, 0,
665 : "incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
666 :
667 : static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
668 : {LOGRECTYPE_FIXEDLENGTH, 0, 0,
669 : NULL, NULL, NULL, 0,
670 : "incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
671 :
672 : static LOG_DESC INIT_LOGREC_UNDO_BULK_INSERT=
673 : {LOGRECTYPE_VARIABLE_LENGTH, 0,
674 : LSN_STORE_SIZE + FILEID_STORE_SIZE,
675 : NULL, write_hook_for_undo_bulk_insert, NULL, 1,
676 : "undo_bulk_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
677 :
678 : static LOG_DESC INIT_LOGREC_REDO_BITMAP_NEW_PAGE=
679 : {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
680 : FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
681 : NULL, NULL, NULL, 0,
682 : "redo_create_bitmap", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
683 :
684 : static LOG_DESC INIT_LOGREC_IMPORTED_TABLE=
685 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
686 : "imported_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
687 :
688 : static LOG_DESC INIT_LOGREC_DEBUG_INFO=
689 : {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
690 : "info", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
691 :
692 : const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
693 :
694 : void translog_table_init()
695 785 : {
696 : int i;
697 785 : log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
698 : INIT_LOGREC_RESERVED_FOR_CHUNKS23;
699 785 : log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
700 : INIT_LOGREC_REDO_INSERT_ROW_HEAD;
701 785 : log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
702 : INIT_LOGREC_REDO_INSERT_ROW_TAIL;
703 785 : log_record_type_descriptor[LOGREC_REDO_NEW_ROW_HEAD]=
704 : INIT_LOGREC_REDO_NEW_ROW_HEAD;
705 785 : log_record_type_descriptor[LOGREC_REDO_NEW_ROW_TAIL]=
706 : INIT_LOGREC_REDO_NEW_ROW_TAIL;
707 785 : log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
708 : INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
709 785 : log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
710 : INIT_LOGREC_REDO_PURGE_ROW_HEAD;
711 785 : log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
712 : INIT_LOGREC_REDO_PURGE_ROW_TAIL;
713 785 : log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
714 : INIT_LOGREC_REDO_FREE_BLOCKS;
715 785 : log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
716 : INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
717 785 : log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
718 : INIT_LOGREC_REDO_DELETE_ROW;
719 785 : log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
720 : INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
721 785 : log_record_type_descriptor[LOGREC_REDO_INDEX]=
722 : INIT_LOGREC_REDO_INDEX;
723 785 : log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
724 : INIT_LOGREC_REDO_INDEX_NEW_PAGE;
725 785 : log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
726 : INIT_LOGREC_REDO_INDEX_FREE_PAGE;
727 785 : log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
728 : INIT_LOGREC_REDO_UNDELETE_ROW;
729 785 : log_record_type_descriptor[LOGREC_CLR_END]=
730 : INIT_LOGREC_CLR_END;
731 785 : log_record_type_descriptor[LOGREC_PURGE_END]=
732 : INIT_LOGREC_PURGE_END;
733 785 : log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
734 : INIT_LOGREC_UNDO_ROW_INSERT;
735 785 : log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
736 : INIT_LOGREC_UNDO_ROW_DELETE;
737 785 : log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
738 : INIT_LOGREC_UNDO_ROW_UPDATE;
739 785 : log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
740 : INIT_LOGREC_UNDO_KEY_INSERT;
741 785 : log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
742 : INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
743 785 : log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
744 : INIT_LOGREC_UNDO_KEY_DELETE;
745 785 : log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
746 : INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
747 785 : log_record_type_descriptor[LOGREC_PREPARE]=
748 : INIT_LOGREC_PREPARE;
749 785 : log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
750 : INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
751 785 : log_record_type_descriptor[LOGREC_COMMIT]=
752 : INIT_LOGREC_COMMIT;
753 785 : log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
754 : INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
755 785 : log_record_type_descriptor[LOGREC_CHECKPOINT]=
756 : INIT_LOGREC_CHECKPOINT;
757 785 : log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
758 : INIT_LOGREC_REDO_CREATE_TABLE;
759 785 : log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
760 : INIT_LOGREC_REDO_RENAME_TABLE;
761 785 : log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
762 : INIT_LOGREC_REDO_DROP_TABLE;
763 785 : log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
764 : INIT_LOGREC_REDO_DELETE_ALL;
765 785 : log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
766 : INIT_LOGREC_REDO_REPAIR_TABLE;
767 785 : log_record_type_descriptor[LOGREC_FILE_ID]=
768 : INIT_LOGREC_FILE_ID;
769 785 : log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
770 : INIT_LOGREC_LONG_TRANSACTION_ID;
771 785 : log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
772 : INIT_LOGREC_INCOMPLETE_LOG;
773 785 : log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
774 : INIT_LOGREC_INCOMPLETE_GROUP;
775 785 : log_record_type_descriptor[LOGREC_UNDO_BULK_INSERT]=
776 : INIT_LOGREC_UNDO_BULK_INSERT;
777 785 : log_record_type_descriptor[LOGREC_REDO_BITMAP_NEW_PAGE]=
778 : INIT_LOGREC_REDO_BITMAP_NEW_PAGE;
779 785 : log_record_type_descriptor[LOGREC_IMPORTED_TABLE]=
780 : INIT_LOGREC_IMPORTED_TABLE;
781 785 : log_record_type_descriptor[LOGREC_DEBUG_INFO]=
782 : INIT_LOGREC_DEBUG_INFO;
783 :
784 17270 : for (i= LOGREC_FIRST_FREE; i < LOGREC_NUMBER_OF_TYPES; i++)
785 16485 : log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
786 : #ifndef DBUG_OFF
787 785 : check_translog_description_table(LOGREC_FIRST_FREE -1);
788 : #endif
789 : }
790 :
791 :
792 : /* all possible flags page overheads */
793 : static uint page_overhead[TRANSLOG_FLAGS_NUM];
794 :
795 : typedef struct st_translog_validator_data
796 : {
797 : TRANSLOG_ADDRESS *addr;
798 : my_bool was_recovered;
799 : } TRANSLOG_VALIDATOR_DATA;
800 :
801 :
802 : /*
803 : Check cursor/buffer consistence
804 :
805 : SYNOPSIS
806 : translog_check_cursor
807 : cursor cursor which will be checked
808 : */
809 :
810 : static void translog_check_cursor(struct st_buffer_cursor *cursor
811 : __attribute__((unused)))
812 7505342 : {
813 7505342 : DBUG_ASSERT(cursor->chaser ||
814 : ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
815 : cursor->buffer->size));
816 7505342 : DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
817 7505342 : DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
818 : cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
819 7505342 : DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
820 : }
821 :
822 :
823 : /**
824 : @brief switch the loghandler in read only mode in case of write error
825 : */
826 :
827 : void translog_stop_writing()
828 0 : {
829 0 : DBUG_ENTER("translog_stop_writing");
830 0 : DBUG_PRINT("error", ("errno: %d my_errno: %d", errno, my_errno));
831 0 : translog_status= (translog_status == TRANSLOG_SHUTDOWN ?
832 : TRANSLOG_UNINITED :
833 : TRANSLOG_READONLY);
834 0 : log_descriptor.is_everything_flushed= 1;
835 0 : log_descriptor.open_flags= O_BINARY | O_RDONLY;
836 0 : DBUG_ASSERT(0);
837 : DBUG_VOID_RETURN;
838 : }
839 :
840 :
841 : /*
842 : @brief Get file name of the log by log number
843 :
844 : @param file_no Number of the log we want to open
845 : @param path Pointer to buffer where file name will be
846 : stored (must be FN_REFLEN bytes at least)
847 :
848 : @return pointer to path
849 : */
850 :
851 : char *translog_filename_by_fileno(uint32 file_no, char *path)
852 1508 : {
853 : char buff[11], *end;
854 : uint length;
855 1508 : DBUG_ENTER("translog_filename_by_fileno");
856 1508 : DBUG_ASSERT(file_no <= 0xfffffff);
857 :
858 : /* log_descriptor.directory is already formated */
859 1508 : end= strxmov(path, log_descriptor.directory, "maria_log.0000000", NullS);
860 1508 : length= (uint) (int10_to_str(file_no, buff, 10) - buff);
861 1508 : strmov(end - length +1, buff);
862 :
863 1508 : DBUG_PRINT("info", ("Path: '%s' path: 0x%lx", path, (ulong) path));
864 1508 : DBUG_RETURN(path);
865 : }
866 :
867 :
868 : /**
869 : @brief Create log file with given number without cache
870 :
871 : @param file_no Number of the log we want to open
872 :
873 : retval -1 error
874 : retval # file descriptor number
875 : */
876 :
877 : static File create_logfile_by_number_no_cache(uint32 file_no)
878 465 : {
879 : File file;
880 : char path[FN_REFLEN];
881 465 : DBUG_ENTER("create_logfile_by_number_no_cache");
882 :
883 465 : if (translog_status != TRANSLOG_OK)
884 0 : DBUG_RETURN(-1);
885 :
886 : /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
887 465 : if ((file= my_create(translog_filename_by_fileno(file_no, path),
888 : 0, O_BINARY | O_RDWR, MYF(MY_WME))) < 0)
889 : {
890 0 : DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path));
891 0 : translog_stop_writing();
892 0 : DBUG_RETURN(-1);
893 : }
894 465 : if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
895 : sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
896 : {
897 0 : DBUG_PRINT("error", ("Error %d during syncing directory '%s'",
898 : errno, log_descriptor.directory));
899 0 : translog_stop_writing();
900 0 : DBUG_RETURN(-1);
901 : }
902 465 : DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
903 465 : DBUG_RETURN(file);
904 : }
905 :
906 : /**
907 : @brief Open (not create) log file with given number without cache
908 :
909 : @param file_no Number of the log we want to open
910 :
911 : retval -1 error
912 : retval # file descriptor number
913 : */
914 :
915 : static File open_logfile_by_number_no_cache(uint32 file_no)
916 361 : {
917 : File file;
918 : char path[FN_REFLEN];
919 361 : DBUG_ENTER("open_logfile_by_number_no_cache");
920 :
921 : /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
922 : /* TODO: use my_create() */
923 361 : if ((file= my_open(translog_filename_by_fileno(file_no, path),
924 : log_descriptor.open_flags,
925 : MYF(MY_WME))) < 0)
926 : {
927 0 : DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path));
928 0 : DBUG_RETURN(-1);
929 : }
930 361 : DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
931 361 : DBUG_RETURN(file);
932 : }
933 :
934 :
935 : /**
936 : @brief get file descriptor by given number using cache
937 :
938 : @param file_no Number of the log we want to open
939 :
940 : retval # file descriptor
941 : retval NULL file is not opened
942 : */
943 :
944 : static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
945 7552873 : {
946 : TRANSLOG_FILE *file;
947 7552873 : DBUG_ENTER("get_logfile_by_number");
948 7552873 : rw_rdlock(&log_descriptor.open_files_lock);
949 7552873 : if (log_descriptor.max_file - file_no >=
950 : log_descriptor.open_files.elements)
951 : {
952 357 : DBUG_PRINT("info", ("File #%u is not opened", file_no));
953 357 : rw_unlock(&log_descriptor.open_files_lock);
954 357 : DBUG_RETURN(NULL);
955 : }
956 7552516 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
957 : log_descriptor.open_files.elements);
958 7552516 : DBUG_ASSERT(log_descriptor.max_file >= file_no);
959 7552516 : DBUG_ASSERT(log_descriptor.min_file <= file_no);
960 :
961 7552516 : file= *dynamic_element(&log_descriptor.open_files,
962 : log_descriptor.max_file - file_no, TRANSLOG_FILE **);
963 7552516 : rw_unlock(&log_descriptor.open_files_lock);
964 7552516 : DBUG_PRINT("info", ("File 0x%lx File no: %lu, File handler: %d",
965 : (ulong)file, (ulong)file_no,
966 : (file ? file->handler.file : -1)));
967 7552516 : DBUG_ASSERT(!file || file->number == file_no);
968 7552516 : DBUG_RETURN(file);
969 : }
970 :
971 :
972 : /**
973 : @brief get current file descriptor
974 :
975 : retval # file descriptor
976 : */
977 :
978 : static TRANSLOG_FILE *get_current_logfile()
979 21742 : {
980 : TRANSLOG_FILE *file;
981 21742 : rw_rdlock(&log_descriptor.open_files_lock);
982 21742 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
983 : log_descriptor.open_files.elements);
984 21742 : file= *dynamic_element(&log_descriptor.open_files, 0, TRANSLOG_FILE **);
985 21742 : rw_unlock(&log_descriptor.open_files_lock);
986 21742 : return (file);
987 : }
988 :
989 : uchar NEAR maria_trans_file_magic[]=
990 : { (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
991 : 'L', 'O', 'G' };
992 : #define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
993 : 8 + 4 + 4 + 4 + 2 + 3 + \
994 : LSN_STORE_SIZE)
995 :
996 :
997 : /*
998 : Write log file page header in the just opened new log file
999 :
1000 : SYNOPSIS
1001 : translog_write_file_header();
1002 :
1003 : NOTES
1004 : First page is just a marker page; We don't store any real log data in it.
1005 :
1006 : RETURN
1007 : 0 OK
1008 : 1 ERROR
1009 : */
1010 :
1011 : static my_bool translog_write_file_header()
1012 465 : {
1013 : TRANSLOG_FILE *file;
1014 : ulonglong timestamp;
1015 465 : uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
1016 : my_bool rc;
1017 465 : DBUG_ENTER("translog_write_file_header");
1018 :
1019 : /* file tag */
1020 465 : memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
1021 465 : page+= sizeof(maria_trans_file_magic);
1022 : /* timestamp */
1023 465 : timestamp= my_getsystime();
1024 465 : int8store(page, timestamp);
1025 465 : page+= 8;
1026 : /* maria version */
1027 465 : int4store(page, TRANSLOG_VERSION_ID);
1028 465 : page+= 4;
1029 : /* mysql version (MYSQL_VERSION_ID) */
1030 465 : int4store(page, log_descriptor.server_version);
1031 465 : page+= 4;
1032 : /* server ID */
1033 465 : int4store(page, log_descriptor.server_id);
1034 465 : page+= 4;
1035 : /* loghandler page_size */
1036 465 : int2store(page, TRANSLOG_PAGE_SIZE - 1);
1037 465 : page+= 2;
1038 : /* file number */
1039 465 : int3store(page, LSN_FILE_NO(log_descriptor.horizon));
1040 465 : page+= 3;
1041 465 : lsn_store(page, LSN_IMPOSSIBLE);
1042 465 : page+= LSN_STORE_SIZE;
1043 465 : memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
1044 :
1045 465 : file= get_current_logfile();
1046 465 : rc= my_pwrite(file->handler.file, page_buff, sizeof(page_buff), 0,
1047 : log_write_flags) != 0;
1048 : /*
1049 : Dropping the flag in such way can make false alarm: signalling than the
1050 : file in not sync when it is sync, but the situation is quite rare and
1051 : protections with mutexes give much more overhead to the whole engine
1052 : */
1053 465 : file->is_sync= 0;
1054 465 : DBUG_RETURN(rc);
1055 : }
1056 :
1057 : /*
1058 : @brief write the new LSN on the given file header
1059 :
1060 : @param file The file descriptor
1061 : @param lsn That LSN which should be written
1062 :
1063 : @retval 0 OK
1064 : @retval 1 Error
1065 : */
1066 :
1067 : static my_bool translog_max_lsn_to_header(File file, LSN lsn)
1068 24 : {
1069 : uchar lsn_buff[LSN_STORE_SIZE];
1070 24 : DBUG_ENTER("translog_max_lsn_to_header");
1071 24 : DBUG_PRINT("enter", ("File descriptor: %ld "
1072 : "lsn: (%lu,0x%lx)",
1073 : (long) file,
1074 : LSN_IN_PARTS(lsn)));
1075 :
1076 24 : lsn_store(lsn_buff, lsn);
1077 :
1078 24 : DBUG_RETURN(my_pwrite(file, lsn_buff,
1079 : LSN_STORE_SIZE,
1080 : (LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
1081 : log_write_flags) != 0 ||
1082 : my_sync(file, MYF(MY_WME)) != 0);
1083 : }
1084 :
1085 :
1086 : /*
1087 : Information from transaction log file header
1088 : */
1089 :
1090 : typedef struct st_loghandler_file_info
1091 : {
1092 : /*
1093 : LSN_IMPOSSIBLE for current file (not finished file).
1094 : Maximum LSN of the record which parts stored in the
1095 : file.
1096 : */
1097 : LSN max_lsn;
1098 : ulonglong timestamp; /* Time stamp */
1099 : ulong maria_version; /* Version of maria loghandler */
1100 : ulong mysql_version; /* Version of mysql server */
1101 : ulong server_id; /* Server ID */
1102 : ulong page_size; /* Loghandler page size */
1103 : ulong file_number; /* Number of the file (from the file header) */
1104 : } LOGHANDLER_FILE_INFO;
1105 :
1106 : /*
1107 : @brief Extract hander file information from loghandler file page
1108 :
1109 : @param desc header information descriptor to be filled with information
1110 : @param page_buff buffer with the page content
1111 : */
1112 :
1113 : static void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
1114 : uchar *page_buff)
1115 361 : {
1116 : uchar *ptr;
1117 :
1118 361 : ptr= page_buff + sizeof(maria_trans_file_magic);
1119 361 : desc->timestamp= uint8korr(ptr);
1120 361 : ptr+= 8;
1121 361 : desc->maria_version= uint4korr(ptr);
1122 361 : ptr+= 4;
1123 361 : desc->mysql_version= uint4korr(ptr);
1124 361 : ptr+= 4;
1125 361 : desc->server_id= uint4korr(ptr + 4);
1126 361 : ptr+= 4;
1127 361 : desc->page_size= uint2korr(ptr) + 1;
1128 361 : ptr+= 2;
1129 361 : desc->file_number= uint3korr(ptr);
1130 361 : ptr+=3;
1131 361 : desc->max_lsn= lsn_korr(ptr);
1132 : }
1133 :
1134 :
1135 : /*
1136 : @brief Read hander file information from loghandler file
1137 :
1138 : @param desc header information descriptor to be filled with information
1139 : @param file file descriptor to read
1140 :
1141 : @retval 0 OK
1142 : @retval 1 Error
1143 : */
1144 :
1145 : my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
1146 361 : {
1147 : uchar page_buff[LOG_HEADER_DATA_SIZE];
1148 361 : DBUG_ENTER("translog_read_file_header");
1149 :
1150 361 : if (my_pread(file, page_buff,
1151 : sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
1152 : {
1153 0 : DBUG_PRINT("info", ("log read fail error: %d", my_errno));
1154 0 : DBUG_RETURN(1);
1155 : }
1156 361 : translog_interpret_file_header(desc, page_buff);
1157 361 : DBUG_PRINT("info", ("timestamp: %llu maria ver: %lu mysql ver: %lu "
1158 : "server id %lu page size %lu file number %lu "
1159 : "max lsn: (%lu,0x%lx)",
1160 : (ulonglong) desc->timestamp,
1161 : (ulong) desc->maria_version,
1162 : (ulong) desc->mysql_version,
1163 : (ulong) desc->server_id,
1164 : desc->page_size, (ulong) desc->file_number,
1165 : LSN_IN_PARTS(desc->max_lsn)));
1166 361 : DBUG_RETURN(0);
1167 : }
1168 :
1169 :
1170 : /*
1171 : @brief set the lsn to the files from_file - to_file if it is greater
1172 : then written in the file
1173 :
1174 : @param from_file first file number (min)
1175 : @param to_file last file number (max)
1176 : @param lsn the lsn for writing
1177 : @param is_locked true if current thread locked the log handler
1178 :
1179 : @retval 0 OK
1180 : @retval 1 Error
1181 : */
1182 :
1183 : static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
1184 : LSN lsn, my_bool is_locked)
1185 6704064 : {
1186 : uint32 file;
1187 6704064 : DBUG_ENTER("translog_set_lsn_for_files");
1188 6704064 : DBUG_PRINT("enter", ("From: %lu to: %lu lsn: (%lu,0x%lx) locked: %d",
1189 : (ulong) from_file, (ulong) to_file,
1190 : LSN_IN_PARTS(lsn),
1191 : is_locked));
1192 6704064 : DBUG_ASSERT(from_file <= to_file);
1193 6704064 : DBUG_ASSERT(from_file > 0); /* we have not file 0 */
1194 :
1195 : /* Checks the current file (not finished yet file) */
1196 6704064 : if (!is_locked)
1197 3 : translog_lock();
1198 6704064 : if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
1199 : {
1200 6704064 : if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
1201 6704064 : log_descriptor.max_lsn= lsn;
1202 6704064 : to_file--;
1203 : }
1204 6704064 : if (!is_locked)
1205 3 : translog_unlock();
1206 :
1207 : /* Checks finished files if they are */
1208 6704064 : pthread_mutex_lock(&log_descriptor.file_header_lock);
1209 6704065 : for (file= from_file; file <= to_file; file++)
1210 : {
1211 : LOGHANDLER_FILE_INFO info;
1212 1 : File fd= open_logfile_by_number_no_cache(file);
1213 1 : if ((fd < 0) ||
1214 : ((translog_read_file_header(&info, fd) ||
1215 : (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
1216 : translog_max_lsn_to_header(fd, lsn))) |
1217 : my_close(fd, MYF(MY_WME))))
1218 : {
1219 0 : translog_stop_writing();
1220 0 : DBUG_RETURN(1);
1221 : }
1222 : }
1223 6704064 : pthread_mutex_unlock(&log_descriptor.file_header_lock);
1224 :
1225 6704064 : DBUG_RETURN(0);
1226 : }
1227 :
1228 :
1229 : /* descriptor of file in unfinished_files */
1230 : struct st_file_counter
1231 : {
1232 : uint32 file; /* file number */
1233 : uint32 counter; /* counter for started writes */
1234 : };
1235 :
1236 :
1237 : /*
1238 : @brief mark file "in progress" (for multi-group records)
1239 :
1240 : @param file log file number
1241 : */
1242 :
1243 : static void translog_mark_file_unfinished(uint32 file)
1244 3 : {
1245 : int place, i;
1246 : struct st_file_counter fc, *fc_ptr;
1247 :
1248 3 : DBUG_ENTER("translog_mark_file_unfinished");
1249 3 : DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1250 :
1251 3 : fc.file= file; fc.counter= 1;
1252 3 : pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1253 :
1254 3 : if (log_descriptor.unfinished_files.elements == 0)
1255 : {
1256 3 : insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1257 3 : DBUG_PRINT("info", ("The first element inserted"));
1258 3 : goto end;
1259 : }
1260 :
1261 0 : for (place= log_descriptor.unfinished_files.elements - 1;
1262 0 : place >= 0;
1263 0 : place--)
1264 : {
1265 0 : fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1266 : place, struct st_file_counter *);
1267 0 : if (fc_ptr->file <= file)
1268 0 : break;
1269 : }
1270 :
1271 0 : if (place >= 0 && fc_ptr->file == file)
1272 : {
1273 0 : fc_ptr->counter++;
1274 0 : DBUG_PRINT("info", ("counter increased"));
1275 0 : goto end;
1276 : }
1277 :
1278 0 : if (place == (int)log_descriptor.unfinished_files.elements)
1279 : {
1280 0 : insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1281 0 : DBUG_PRINT("info", ("The last element inserted"));
1282 0 : goto end;
1283 : }
1284 : /* shift and assign new element */
1285 0 : insert_dynamic(&log_descriptor.unfinished_files,
1286 : (uchar*)
1287 : dynamic_element(&log_descriptor.unfinished_files,
1288 : log_descriptor.unfinished_files.elements- 1,
1289 : struct st_file_counter *));
1290 0 : for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
1291 : {
1292 : /* we do not use set_dynamic() to avoid unneeded checks */
1293 0 : memcpy(dynamic_element(&log_descriptor.unfinished_files,
1294 : i, struct st_file_counter *),
1295 : dynamic_element(&log_descriptor.unfinished_files,
1296 : i + 1, struct st_file_counter *),
1297 : sizeof(struct st_file_counter));
1298 : }
1299 0 : memcpy(dynamic_element(&log_descriptor.unfinished_files,
1300 : place + 1, struct st_file_counter *),
1301 : &fc, sizeof(struct st_file_counter));
1302 3 : end:
1303 3 : pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1304 3 : DBUG_VOID_RETURN;
1305 : }
1306 :
1307 :
1308 : /*
1309 : @brief remove file mark "in progress" (for multi-group records)
1310 :
1311 : @param file log file number
1312 : */
1313 :
1314 : static void translog_mark_file_finished(uint32 file)
1315 3 : {
1316 : int i;
1317 : struct st_file_counter *fc_ptr;
1318 3 : DBUG_ENTER("translog_mark_file_finished");
1319 3 : DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1320 :
1321 3 : LINT_INIT(fc_ptr);
1322 :
1323 3 : pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1324 :
1325 3 : DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
1326 3 : for (i= 0;
1327 6 : i < (int) log_descriptor.unfinished_files.elements;
1328 0 : i++)
1329 : {
1330 3 : fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1331 : i, struct st_file_counter *);
1332 3 : if (fc_ptr->file == file)
1333 : {
1334 0 : break;
1335 : }
1336 : }
1337 3 : DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);
1338 :
1339 3 : if (! --fc_ptr->counter)
1340 3 : delete_dynamic_element(&log_descriptor.unfinished_files, i);
1341 3 : pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1342 3 : DBUG_VOID_RETURN;
1343 : }
1344 :
1345 :
1346 : /*
1347 : @brief get max LSN of the record which parts stored in this file
1348 :
1349 : @param file file number
1350 :
1351 : @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
1352 : @retval LSN_IMPOSSIBLE File is still not finished
1353 : @retval LSN_ERROR Error opening file
1354 : @retval # LSN of the record which parts stored in this file
1355 : */
1356 :
1357 : LSN translog_get_file_max_lsn_stored(uint32 file)
1358 5 : {
1359 5 : uint32 limit= FILENO_IMPOSSIBLE;
1360 5 : DBUG_ENTER("translog_get_file_max_lsn_stored");
1361 5 : DBUG_PRINT("enter", ("file: %lu", (ulong)file));
1362 5 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
1363 : translog_status == TRANSLOG_READONLY);
1364 :
1365 5 : pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1366 :
1367 : /* find file with minimum file number "in progress" */
1368 5 : if (log_descriptor.unfinished_files.elements > 0)
1369 : {
1370 : struct st_file_counter *fc_ptr;
1371 0 : fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1372 : 0, struct st_file_counter *);
1373 0 : limit= fc_ptr->file; /* minimal file number "in progress" */
1374 : }
1375 5 : pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1376 :
1377 : /*
1378 : if there is no "in progress file" then unfinished file is in progress
1379 : for sure
1380 : */
1381 5 : if (limit == FILENO_IMPOSSIBLE)
1382 : {
1383 5 : TRANSLOG_ADDRESS horizon= translog_get_horizon();
1384 5 : limit= LSN_FILE_NO(horizon);
1385 : }
1386 :
1387 5 : if (file >= limit)
1388 : {
1389 1 : DBUG_PRINT("info", ("The file in in progress"));
1390 1 : DBUG_RETURN(LSN_IMPOSSIBLE);
1391 : }
1392 :
1393 : {
1394 : LOGHANDLER_FILE_INFO info;
1395 4 : File fd= open_logfile_by_number_no_cache(file);
1396 4 : if ((fd < 0) ||
1397 : (translog_read_file_header(&info, fd) | my_close(fd, MYF(MY_WME))))
1398 : {
1399 0 : DBUG_PRINT("error", ("Can't read file header"));
1400 0 : DBUG_RETURN(LSN_ERROR);
1401 : }
1402 4 : DBUG_PRINT("info", ("Max lsn: (%lu,0x%lx)",
1403 : LSN_IN_PARTS(info.max_lsn)));
1404 4 : DBUG_RETURN(info.max_lsn);
1405 : }
1406 : }
1407 :
1408 : /*
1409 : Initialize transaction log file buffer
1410 :
1411 : SYNOPSIS
1412 : translog_buffer_init()
1413 : buffer The buffer to initialize
1414 : num Number of this buffer
1415 :
1416 : RETURN
1417 : 0 OK
1418 : 1 Error
1419 : */
1420 :
1421 : static my_bool translog_buffer_init(struct st_translog_buffer *buffer, int num)
1422 6384 : {
1423 6384 : DBUG_ENTER("translog_buffer_init");
1424 6384 : buffer->prev_last_lsn= buffer->last_lsn= LSN_IMPOSSIBLE;
1425 6384 : DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: 0x%lx",
1426 : (ulong) buffer));
1427 :
1428 6384 : buffer->buffer_no= (uint8) num;
1429 : /* This Buffer File */
1430 6384 : buffer->file= NULL;
1431 6384 : buffer->overlay= 0;
1432 : /* cache for current log */
1433 6384 : memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
1434 : /* Buffer size */
1435 6384 : buffer->size= 0;
1436 : /* cond of thread which is waiting for buffer filling */
1437 6384 : if (pthread_cond_init(&buffer->waiting_filling_buffer, 0))
1438 0 : DBUG_RETURN(1);
1439 : /* Number of records which are in copy progress */
1440 6384 : buffer->copy_to_buffer_in_progress= 0;
1441 : /* list of waiting buffer ready threads */
1442 6384 : buffer->waiting_flush= 0;
1443 : /*
1444 : Buffers locked by fallowing mutex. As far as buffers create logical
1445 : circle (after last buffer goes first) it trigger false alarm of deadlock
1446 : detect system, so we remove check of deadlock for this buffers. In deed
1447 : all mutex locks concentrated around current buffer except flushing
1448 : thread (but it is only one thread). One thread can't take more then
1449 : 2 buffer locks at once. So deadlock is impossible here.
1450 :
1451 : To prevent false alarm of dead lock detection we switch dead lock
1452 : detection for one buffer in the middle of the buffers chain. Excluding
1453 : only one of eight buffers from deadlock detection hardly can hide other
1454 : possible problems which include this mutexes.
1455 : */
1456 6384 : if (my_pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST,
1457 : "translog_buffer->mutex",
1458 : (num == TRANSLOG_BUFFERS_NO - 2 ?
1459 : MYF_NO_DEADLOCK_DETECTION : 0)) ||
1460 : pthread_cond_init(&buffer->prev_sent_to_disk_cond, 0))
1461 0 : DBUG_RETURN(1);
1462 6384 : buffer->is_closing_buffer= 0;
1463 6384 : buffer->prev_sent_to_disk= LSN_IMPOSSIBLE;
1464 6384 : buffer->prev_buffer_offset= LSN_IMPOSSIBLE;
1465 6384 : buffer->ver= 0;
1466 6384 : DBUG_RETURN(0);
1467 : }
1468 :
1469 :
1470 : /*
1471 : @brief close transaction log file by descriptor
1472 :
1473 : @param file pagegecache file descriptor reference
1474 :
1475 : @return Operation status
1476 : @retval 0 OK
1477 : @retval 1 Error
1478 : */
1479 :
1480 : static my_bool translog_close_log_file(TRANSLOG_FILE *file)
1481 722 : {
1482 722 : int rc= 0;
1483 722 : flush_pagecache_blocks(log_descriptor.pagecache, &file->handler,
1484 : FLUSH_RELEASE);
1485 : /*
1486 : Sync file when we close it
1487 : TODO: sync only we have changed the log
1488 : */
1489 722 : if (!file->is_sync)
1490 702 : rc= my_sync(file->handler.file, MYF(MY_WME));
1491 722 : rc|= my_close(file->handler.file, MYF(MY_WME));
1492 722 : my_free(file, MYF(0));
1493 722 : return test(rc);
1494 : }
1495 :
1496 :
1497 : /**
1498 : @brief Dummy function for write failure (the log to not use
1499 : pagecache writing)
1500 : */
1501 :
1502 : void translog_dummy_write_failure(uchar *data __attribute__((unused)))
1503 0 : {
1504 : return;
1505 : }
1506 :
1507 :
1508 : /**
1509 : @brief Initializes TRANSLOG_FILE structure
1510 :
1511 : @param file reference on the file to initialize
1512 : @param number file number
1513 : @param is_sync is file synced on disk
1514 : */
1515 :
1516 : static void translog_file_init(TRANSLOG_FILE *file, uint32 number,
1517 : my_bool is_sync)
1518 821 : {
1519 821 : pagecache_file_init(file->handler, &translog_page_validator,
1520 : &translog_dummy_callback,
1521 : &translog_dummy_write_failure,
1522 : maria_flush_log_for_page_none, file);
1523 821 : file->number= number;
1524 821 : file->was_recovered= 0;
1525 821 : file->is_sync= is_sync;
1526 : }
1527 :
1528 :
1529 : /**
1530 : @brief Create and fill header of new file.
1531 :
1532 : @note the caller must call it right after it has increased
1533 : log_descriptor.horizon to the new file
1534 : (log_descriptor.horizon+= LSN_ONE_FILE)
1535 :
1536 :
1537 : @retval 0 OK
1538 : @retval 1 Error
1539 : */
1540 :
1541 : static my_bool translog_create_new_file()
1542 23 : {
1543 23 : TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
1544 : MYF(0));
1545 :
1546 23 : TRANSLOG_FILE *old= get_current_logfile();
1547 23 : uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
1548 23 : DBUG_ENTER("translog_create_new_file");
1549 :
1550 23 : if (file == NULL)
1551 23 : goto error;
1552 :
1553 : /*
1554 : Writes max_lsn to the file header before finishing it (there is no need
1555 : to lock file header buffer because it is still unfinished file, so only
1556 : one thread can finish the file and nobody interested of LSN of current
1557 : (unfinished) file, because no one can purge it).
1558 : */
1559 23 : if (translog_max_lsn_to_header(old->handler.file, log_descriptor.max_lsn))
1560 23 : goto error;
1561 :
1562 23 : rw_wrlock(&log_descriptor.open_files_lock);
1563 23 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1564 : log_descriptor.open_files.elements);
1565 23 : DBUG_ASSERT(file_no == log_descriptor.max_file + 1);
1566 23 : if (allocate_dynamic(&log_descriptor.open_files,
1567 : log_descriptor.max_file - log_descriptor.min_file + 2))
1568 23 : goto error_lock;
1569 23 : if ((file->handler.file=
1570 : create_logfile_by_number_no_cache(file_no)) == -1)
1571 23 : goto error_lock;
1572 23 : translog_file_init(file, file_no, 0);
1573 :
1574 : /* this call just expand the array */
1575 23 : insert_dynamic(&log_descriptor.open_files, (uchar*)&file);
1576 23 : log_descriptor.max_file++;
1577 : {
1578 23 : char *start= (char*) dynamic_element(&log_descriptor.open_files, 0,
1579 : TRANSLOG_FILE**);
1580 23 : memmove(start + sizeof(TRANSLOG_FILE*), start,
1581 : sizeof(TRANSLOG_FILE*) *
1582 : (log_descriptor.max_file - log_descriptor.min_file + 1 - 1));
1583 : }
1584 : /* can't fail we because we expanded array */
1585 23 : set_dynamic(&log_descriptor.open_files, (uchar*)&file, 0);
1586 23 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1587 : log_descriptor.open_files.elements);
1588 23 : rw_unlock(&log_descriptor.open_files_lock);
1589 :
1590 23 : DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no));
1591 :
1592 23 : if (translog_write_file_header())
1593 0 : DBUG_RETURN(1);
1594 :
1595 23 : if (ma_control_file_write_and_force(last_checkpoint_lsn, file_no,
1596 : max_trid_in_control_file,
1597 : recovery_failures))
1598 : {
1599 0 : translog_stop_writing();
1600 0 : DBUG_RETURN(1);
1601 : }
1602 :
1603 23 : DBUG_RETURN(0);
1604 :
1605 0 : error_lock:
1606 0 : rw_unlock(&log_descriptor.open_files_lock);
1607 0 : error:
1608 0 : translog_stop_writing();
1609 0 : DBUG_RETURN(1);
1610 : }
1611 :
1612 :
1613 : /**
1614 : @brief Locks the loghandler buffer.
1615 :
1616 : @param buffer This buffer which should be locked
1617 :
1618 : @note See comment before buffer 'mutex' variable.
1619 :
1620 : @retval 0 OK
1621 : @retval 1 Error
1622 : */
1623 :
1624 : static void translog_buffer_lock(struct st_translog_buffer *buffer)
1625 11628871 : {
1626 11628871 : DBUG_ENTER("translog_buffer_lock");
1627 11628871 : DBUG_PRINT("enter",
1628 : ("Lock buffer #%u: (0x%lx)", (uint) buffer->buffer_no,
1629 : (ulong) buffer));
1630 11628871 : pthread_mutex_lock(&buffer->mutex);
1631 11628871 : DBUG_VOID_RETURN;
1632 : }
1633 :
1634 :
1635 : /*
1636 : Unlock the loghandler buffer
1637 :
1638 : SYNOPSIS
1639 : translog_buffer_unlock()
1640 : buffer This buffer which should be unlocked
1641 :
1642 : RETURN
1643 : 0 OK
1644 : 1 Error
1645 : */
1646 :
1647 : static void translog_buffer_unlock(struct st_translog_buffer *buffer)
1648 11628871 : {
1649 11628871 : DBUG_ENTER("translog_buffer_unlock");
1650 11628871 : DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx)",
1651 : (uint) buffer->buffer_no, (ulong) buffer));
1652 :
1653 11628871 : pthread_mutex_unlock(&buffer->mutex);
1654 11628871 : DBUG_VOID_RETURN;
1655 : }
1656 :
1657 :
1658 : /*
1659 : Write a header on the page
1660 :
1661 : SYNOPSIS
1662 : translog_new_page_header()
1663 : horizon Where to write the page
1664 : cursor Where to write the page
1665 :
1666 : NOTE
1667 : - space for page header should be checked before
1668 : */
1669 :
1670 : static uchar translog_sector_random;
1671 :
1672 : static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
1673 : struct st_buffer_cursor *cursor)
1674 192282 : {
1675 : uchar *ptr;
1676 :
1677 192282 : DBUG_ENTER("translog_new_page_header");
1678 192282 : DBUG_ASSERT(cursor->ptr);
1679 :
1680 192282 : cursor->protected= 0;
1681 :
1682 192282 : ptr= cursor->ptr;
1683 : /* Page number */
1684 192282 : int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
1685 192282 : ptr+= 3;
1686 : /* File number */
1687 192282 : int3store(ptr, LSN_FILE_NO(*horizon));
1688 192282 : ptr+= 3;
1689 192282 : DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
1690 192282 : cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
1691 192282 : ptr++;
1692 192282 : if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1693 : {
1694 : #ifndef DBUG_OFF
1695 20191 : DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)",
1696 : LSN_IN_PARTS(*horizon)));
1697 : /* This will be overwritten by real CRC; This is just for debugging */
1698 20191 : int4store(ptr, 0x11223344);
1699 : #endif
1700 : /* CRC will be put when page is finished */
1701 20191 : ptr+= CRC_SIZE;
1702 : }
1703 192282 : if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1704 : {
1705 : /*
1706 : translog_sector_randmo works like "random" values producer because
1707 : it is enough to have such "random" for this purpose and it will
1708 : not interfere with higher level pseudo random value generator
1709 : */
1710 20191 : ptr[0]= translog_sector_random++;
1711 20191 : ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1712 : }
1713 : {
1714 192282 : uint len= (ptr - cursor->ptr);
1715 192282 : (*horizon)+= len; /* increasing the offset part of the address */
1716 192282 : cursor->current_page_fill= len;
1717 192282 : if (!cursor->chaser)
1718 10261 : cursor->buffer->size+= len;
1719 : }
1720 192282 : cursor->ptr= ptr;
1721 192282 : DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) "
1722 : "Horizon: (%lu,0x%lx)",
1723 : (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
1724 : cursor->chaser, (ulong) cursor->buffer->size,
1725 : (ulong) (cursor->ptr - cursor->buffer->buffer),
1726 : LSN_IN_PARTS(*horizon)));
1727 192282 : translog_check_cursor(cursor);
1728 192282 : DBUG_VOID_RETURN;
1729 : }
1730 :
1731 :
1732 : /*
1733 : Put sector protection on the page image
1734 :
1735 : SYNOPSIS
1736 : translog_put_sector_protection()
1737 : page reference on the page content
1738 : cursor cursor of the buffer
1739 :
1740 : NOTES
1741 : We put a sector protection on all following sectors on the page,
1742 : except the first sector that is protected by page header.
1743 : */
1744 :
1745 : static void translog_put_sector_protection(uchar *page,
1746 : struct st_buffer_cursor *cursor)
1747 21790 : {
1748 : uchar *table= page + log_descriptor.page_overhead -
1749 21790 : TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1750 : uint i, offset;
1751 : uint16 last_protected_sector= ((cursor->previous_offset - 1) /
1752 21790 : DISK_DRIVE_SECTOR_SIZE);
1753 21790 : uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
1754 21790 : uint8 value= table[0] + cursor->write_counter;
1755 21790 : DBUG_ENTER("translog_put_sector_protection");
1756 :
1757 21790 : if (start_sector == 0)
1758 : {
1759 : /* First sector is protected by file & page numbers in the page header. */
1760 20286 : start_sector= 1;
1761 : }
1762 :
1763 21790 : DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, "
1764 : "last protected:%u start sector:%u",
1765 : (uint) cursor->write_counter,
1766 : (uint) value,
1767 : (uint) cursor->previous_offset,
1768 : (uint) last_protected_sector, (uint) start_sector));
1769 21790 : if (last_protected_sector == start_sector)
1770 : {
1771 1501 : i= last_protected_sector;
1772 1501 : offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
1773 : /* restore data, because we modified sector which was protected */
1774 1501 : if (offset < cursor->previous_offset)
1775 1501 : page[offset]= table[i];
1776 : }
1777 21790 : for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
1778 359731 : i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1779 316151 : i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
1780 : {
1781 316151 : DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
1782 : i, offset, (uint) page[offset]));
1783 316151 : table[i]= page[offset];
1784 316151 : page[offset]= value;
1785 316151 : DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
1786 : i, offset, (uint) page[offset]));
1787 : }
1788 21790 : DBUG_VOID_RETURN;
1789 : }
1790 :
1791 :
1792 : /*
1793 : Calculate CRC32 of given area
1794 :
1795 : SYNOPSIS
1796 : translog_crc()
1797 : area Pointer of the area beginning
1798 : length The Area length
1799 :
1800 : RETURN
1801 : CRC32
1802 : */
1803 :
1804 : static uint32 translog_crc(uchar *area, uint length)
1805 63770 : {
1806 63770 : DBUG_ENTER("translog_crc");
1807 63770 : DBUG_RETURN(crc32(0L, (unsigned char*) area, length));
1808 : }
1809 :
1810 :
1811 : /*
1812 : Finish current page with zeros
1813 :
1814 : SYNOPSIS
1815 : translog_finish_page()
1816 : horizon \ horizon & buffer pointers
1817 : cursor /
1818 : */
1819 :
1820 : static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
1821 : struct st_buffer_cursor *cursor)
1822 192537 : {
1823 192537 : uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
1824 192537 : uchar *page= cursor->ptr - cursor->current_page_fill;
1825 192537 : DBUG_ENTER("translog_finish_page");
1826 192537 : DBUG_PRINT("enter", ("Buffer: #%u 0x%lx "
1827 : "Buffer addr: (%lu,0x%lx) "
1828 : "Page addr: (%lu,0x%lx) "
1829 : "size:%lu (%lu) Pg:%u left:%u",
1830 : (uint) cursor->buffer_no, (ulong) cursor->buffer,
1831 : LSN_IN_PARTS(cursor->buffer->offset),
1832 : (ulong) LSN_FILE_NO(*horizon),
1833 : (ulong) (LSN_OFFSET(*horizon) -
1834 : cursor->current_page_fill),
1835 : (ulong) cursor->buffer->size,
1836 : (ulong) (cursor->ptr -cursor->buffer->buffer),
1837 : (uint) cursor->current_page_fill, (uint) left));
1838 192537 : DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset));
1839 192537 : translog_check_cursor(cursor);
1840 192537 : if (cursor->protected)
1841 : {
1842 0 : DBUG_PRINT("info", ("Already protected and finished"));
1843 0 : DBUG_VOID_RETURN;
1844 : }
1845 192537 : cursor->protected= 1;
1846 :
1847 192537 : DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
1848 192537 : if (left != 0)
1849 : {
1850 10150 : DBUG_PRINT("info", ("left: %u", (uint) left));
1851 10150 : memset(cursor->ptr, TRANSLOG_FILLER, left);
1852 10150 : cursor->ptr+= left;
1853 10150 : (*horizon)+= left; /* offset increasing */
1854 10150 : if (!cursor->chaser)
1855 10148 : cursor->buffer->size+= left;
1856 : /* We are finishing the page so reset the counter */
1857 10150 : cursor->current_page_fill= 0;
1858 10150 : DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx "
1859 : "chaser: %d Size: %lu (%lu)",
1860 : (uint) cursor->buffer->buffer_no,
1861 : (ulong) cursor->buffer, cursor->chaser,
1862 : (ulong) cursor->buffer->size,
1863 : (ulong) (cursor->ptr - cursor->buffer->buffer)));
1864 10150 : translog_check_cursor(cursor);
1865 : }
1866 : /*
1867 : When we are finishing the page other thread might not finish the page
1868 : header yet (in case if we started from the middle of the page) so we
1869 : have to read log_descriptor.flags but not the flags from the page.
1870 : */
1871 192537 : if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1872 : {
1873 20191 : translog_put_sector_protection(page, cursor);
1874 20191 : DBUG_PRINT("info", ("drop write_counter"));
1875 20191 : cursor->write_counter= 0;
1876 20191 : cursor->previous_offset= 0;
1877 : }
1878 192537 : if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1879 : {
1880 : uint32 crc= translog_crc(page + log_descriptor.page_overhead,
1881 : TRANSLOG_PAGE_SIZE -
1882 20191 : log_descriptor.page_overhead);
1883 20191 : DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
1884 : /* We have page number, file number and flag before crc */
1885 20191 : int4store(page + 3 + 3 + 1, crc);
1886 : }
1887 192537 : DBUG_VOID_RETURN;
1888 : }
1889 :
1890 :
1891 : /*
1892 : @brief Wait until all threads have finished closing this buffer.
1893 :
1894 : @param buffer This buffer should be check
1895 : */
1896 :
1897 : static void translog_wait_for_closing(struct st_translog_buffer *buffer)
1898 0 : {
1899 0 : DBUG_ENTER("translog_wait_for_closing");
1900 0 : DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u "
1901 : "is closing %u File: %d size: %lu",
1902 : (uint) buffer->buffer_no, (ulong) buffer,
1903 : (uint) buffer->copy_to_buffer_in_progress,
1904 : (uint) buffer->is_closing_buffer,
1905 : (buffer->file ? buffer->file->handler.file : -1),
1906 : (ulong) buffer->size));
1907 0 : translog_buffer_lock_assert_owner(buffer);
1908 :
1909 0 : while (buffer->is_closing_buffer)
1910 : {
1911 0 : DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
1912 : (uint) buffer->buffer_no, (ulong) buffer));
1913 0 : DBUG_ASSERT(buffer->file != NULL);
1914 0 : pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1915 0 : DBUG_PRINT("info", ("wait for writers done buffer: #%u 0x%lx",
1916 : (uint) buffer->buffer_no, (ulong) buffer));
1917 : }
1918 :
1919 0 : DBUG_VOID_RETURN;
1920 : }
1921 :
1922 :
1923 : /*
1924 : @brief Wait until all threads have finished filling this buffer.
1925 :
1926 : @param buffer This buffer should be check
1927 : */
1928 :
1929 : static void translog_wait_for_writers(struct st_translog_buffer *buffer)
1930 112719 : {
1931 112719 : DBUG_ENTER("translog_wait_for_writers");
1932 112719 : DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u "
1933 : "is closing %u File: %d size: %lu",
1934 : (uint) buffer->buffer_no, (ulong) buffer,
1935 : (uint) buffer->copy_to_buffer_in_progress,
1936 : (uint) buffer->is_closing_buffer,
1937 : (buffer->file ? buffer->file->handler.file : -1),
1938 : (ulong) buffer->size));
1939 112719 : translog_buffer_lock_assert_owner(buffer);
1940 :
1941 112719 : while (buffer->copy_to_buffer_in_progress)
1942 : {
1943 0 : DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
1944 : (uint) buffer->buffer_no, (ulong) buffer));
1945 0 : DBUG_ASSERT(buffer->file != NULL);
1946 0 : pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1947 0 : DBUG_PRINT("info", ("wait for writers done buffer: #%u 0x%lx",
1948 : (uint) buffer->buffer_no, (ulong) buffer));
1949 : }
1950 :
1951 112719 : DBUG_VOID_RETURN;
1952 : }
1953 :
1954 :
1955 : /*
1956 :
1957 : Wait for buffer to become free
1958 :
1959 : SYNOPSIS
1960 : translog_wait_for_buffer_free()
1961 : buffer The buffer we are waiting for
1962 :
1963 : NOTE
1964 : - this buffer should be locked
1965 : */
1966 :
1967 : static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
1968 19658 : {
1969 19658 : TRANSLOG_ADDRESS offset= buffer->offset;
1970 19658 : TRANSLOG_FILE *file= buffer->file;
1971 19658 : uint8 ver= buffer->ver;
1972 19658 : DBUG_ENTER("translog_wait_for_buffer_free");
1973 19658 : DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u "
1974 : "is closing %u File: %d size: %lu",
1975 : (uint) buffer->buffer_no, (ulong) buffer,
1976 : (uint) buffer->copy_to_buffer_in_progress,
1977 : (uint) buffer->is_closing_buffer,
1978 : (buffer->file ? buffer->file->handler.file : -1),
1979 : (ulong) buffer->size));
1980 :
1981 19658 : translog_wait_for_writers(buffer);
1982 :
1983 19658 : if (offset != buffer->offset || file != buffer->file || ver != buffer->ver)
1984 0 : DBUG_VOID_RETURN; /* the buffer if already freed */
1985 :
1986 19658 : while (buffer->file != NULL)
1987 : {
1988 0 : DBUG_PRINT("info", ("wait for writers... buffer: #%u 0x%lx",
1989 : (uint) buffer->buffer_no, (ulong) buffer));
1990 0 : pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1991 0 : DBUG_PRINT("info", ("wait for writers done. buffer: #%u 0x%lx",
1992 : (uint) buffer->buffer_no, (ulong) buffer));
1993 : }
1994 19658 : DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
1995 19658 : DBUG_VOID_RETURN;
1996 : }
1997 :
1998 :
1999 : /*
2000 : Initialize the cursor for a buffer
2001 :
2002 : SYNOPSIS
2003 : translog_cursor_init()
2004 : buffer The buffer
2005 : cursor It's cursor
2006 : buffer_no Number of buffer
2007 : */
2008 :
2009 : static void translog_cursor_init(struct st_buffer_cursor *cursor,
2010 : struct st_translog_buffer *buffer,
2011 : uint8 buffer_no)
2012 22344 : {
2013 22344 : DBUG_ENTER("translog_cursor_init");
2014 22344 : cursor->ptr= buffer->buffer;
2015 22344 : cursor->buffer= buffer;
2016 22344 : cursor->buffer_no= buffer_no;
2017 22344 : cursor->current_page_fill= 0;
2018 22344 : cursor->chaser= (cursor != &log_descriptor.bc);
2019 22344 : cursor->write_counter= 0;
2020 22344 : cursor->previous_offset= 0;
2021 22344 : cursor->protected= 0;
2022 22344 : DBUG_VOID_RETURN;
2023 : }
2024 :
2025 :
2026 : /*
2027 : @brief Initialize buffer for the current file, and a cursor for this buffer.
2028 :
2029 : @param buffer The buffer
2030 : @param cursor It's cursor
2031 : @param buffer_no Number of buffer
2032 : */
2033 :
2034 : static void translog_start_buffer(struct st_translog_buffer *buffer,
2035 : struct st_buffer_cursor *cursor,
2036 : uint buffer_no)
2037 21254 : {
2038 21254 : DBUG_ENTER("translog_start_buffer");
2039 21254 : DBUG_PRINT("enter",
2040 : ("Assign buffer: #%u (0x%lx) offset: 0x%lx(%lu)",
2041 : (uint) buffer->buffer_no, (ulong) buffer,
2042 : (ulong) LSN_OFFSET(log_descriptor.horizon),
2043 : (ulong) LSN_OFFSET(log_descriptor.horizon)));
2044 21254 : DBUG_ASSERT(buffer_no == buffer->buffer_no);
2045 21254 : buffer->prev_last_lsn= buffer->last_lsn= LSN_IMPOSSIBLE;
2046 21254 : DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: 0x%lx",
2047 : (ulong) buffer));
2048 21254 : buffer->offset= log_descriptor.horizon;
2049 21254 : buffer->next_buffer_offset= LSN_IMPOSSIBLE;
2050 21254 : buffer->file= get_current_logfile();
2051 21254 : buffer->overlay= 0;
2052 21254 : buffer->size= 0;
2053 21254 : translog_cursor_init(cursor, buffer, buffer_no);
2054 21254 : DBUG_PRINT("info", ("file: #%ld (%d) init cursor #%u: 0x%lx "
2055 : "chaser: %d Size: %lu (%lu)",
2056 : (long) (buffer->file ? buffer->file->number : 0),
2057 : (buffer->file ? buffer->file->handler.file : -1),
2058 : (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
2059 : cursor->chaser, (ulong) cursor->buffer->size,
2060 : (ulong) (cursor->ptr - cursor->buffer->buffer)));
2061 21254 : translog_check_cursor(cursor);
2062 21254 : pthread_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2063 21254 : log_descriptor.dirty_buffer_mask|= (1 << buffer->buffer_no);
2064 21254 : pthread_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2065 :
2066 21254 : DBUG_VOID_RETURN;
2067 : }
2068 :
2069 :
2070 : /*
2071 : @brief Switch to the next buffer in a chain.
2072 :
2073 : @param horizon \ Pointers on current position in file and buffer
2074 : @param cursor /
2075 : @param new_file Also start new file
2076 :
2077 : @note
2078 : - loghandler should be locked
2079 : - after return new and old buffer still are locked
2080 :
2081 : @retval 0 OK
2082 : @retval 1 Error
2083 : */
2084 :
2085 : static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
2086 : struct st_buffer_cursor *cursor,
2087 : my_bool new_file)
2088 1120 : {
2089 1120 : uint old_buffer_no= cursor->buffer_no;
2090 1120 : uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
2091 1120 : struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
2092 1120 : my_bool chasing= cursor->chaser;
2093 1120 : DBUG_ENTER("translog_buffer_next");
2094 :
2095 1120 : DBUG_PRINT("info", ("horizon: (%lu,0x%lx) chasing: %d",
2096 : LSN_IN_PARTS(log_descriptor.horizon), chasing));
2097 :
2098 1120 : DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
2099 :
2100 1120 : translog_finish_page(horizon, cursor);
2101 :
2102 1120 : if (!chasing)
2103 : {
2104 30 : translog_buffer_lock(new_buffer);
2105 : #ifndef DBUG_OFF
2106 : {
2107 30 : TRANSLOG_ADDRESS offset= new_buffer->offset;
2108 30 : TRANSLOG_FILE *file= new_buffer->file;
2109 30 : uint8 ver= new_buffer->ver;
2110 30 : translog_lock_assert_owner();
2111 : #endif
2112 30 : translog_wait_for_buffer_free(new_buffer);
2113 : #ifndef DBUG_OFF
2114 : /* We keep the handler locked so nobody can start this new buffer */
2115 30 : DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
2116 : (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
2117 : }
2118 : #endif
2119 : }
2120 : else
2121 1090 : DBUG_ASSERT(new_buffer->file != NULL);
2122 :
2123 1120 : if (new_file)
2124 : {
2125 : /* move the horizon to the next file and its header page */
2126 23 : (*horizon)+= LSN_ONE_FILE;
2127 23 : (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
2128 23 : if (!chasing && translog_create_new_file())
2129 : {
2130 0 : DBUG_RETURN(1);
2131 : }
2132 : }
2133 :
2134 : /* prepare next page */
2135 1120 : if (chasing)
2136 1090 : translog_cursor_init(cursor, new_buffer, new_buffer_no);
2137 : else
2138 : {
2139 30 : translog_lock_assert_owner();
2140 30 : translog_start_buffer(new_buffer, cursor, new_buffer_no);
2141 30 : new_buffer->prev_buffer_offset=
2142 : log_descriptor.buffers[old_buffer_no].offset;
2143 30 : new_buffer->prev_last_lsn=
2144 : BUFFER_MAX_LSN(log_descriptor.buffers + old_buffer_no);
2145 : }
2146 1120 : log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
2147 1120 : DBUG_PRINT("info", ("prev_last_lsn set to (%lu,0x%lx) buffer: 0x%lx",
2148 : LSN_IN_PARTS(new_buffer->prev_last_lsn),
2149 : (ulong) new_buffer));
2150 1120 : translog_new_page_header(horizon, cursor);
2151 1120 : DBUG_RETURN(0);
2152 : }
2153 :
2154 :
2155 : /*
2156 : Sets max LSN sent to file, and address from which data is only in the buffer
2157 :
2158 : SYNOPSIS
2159 : translog_set_sent_to_disk()
2160 : buffer buffer which we have sent to disk
2161 :
2162 : TODO: use atomic operations if possible (64bit architectures?)
2163 : */
2164 :
2165 : static void translog_set_sent_to_disk(struct st_translog_buffer *buffer)
2166 18683 : {
2167 18683 : LSN lsn= buffer->last_lsn;
2168 18683 : TRANSLOG_ADDRESS in_buffers= buffer->next_buffer_offset;
2169 :
2170 18683 : DBUG_ENTER("translog_set_sent_to_disk");
2171 18683 : pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
2172 18683 : DBUG_PRINT("enter", ("lsn: (%lu,0x%lx) in_buffers: (%lu,0x%lx) "
2173 : "in_buffers_only: (%lu,0x%lx) start: (%lu,0x%lx) "
2174 : "sent_to_disk: (%lu,0x%lx)",
2175 : LSN_IN_PARTS(lsn),
2176 : LSN_IN_PARTS(in_buffers),
2177 : LSN_IN_PARTS(log_descriptor.log_start),
2178 : LSN_IN_PARTS(log_descriptor.in_buffers_only),
2179 : LSN_IN_PARTS(log_descriptor.sent_to_disk)));
2180 : /*
2181 : We write sequentially (first part of following assert) but we rewrite
2182 : the same page in case we started mysql and shut it down immediately
2183 : (second part of the following assert)
2184 : */
2185 18683 : DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0 ||
2186 : cmp_translog_addr(lsn, log_descriptor.log_start) < 0);
2187 18683 : log_descriptor.sent_to_disk= lsn;
2188 : /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2189 18683 : if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2190 : {
2191 8349 : log_descriptor.in_buffers_only= in_buffers;
2192 8349 : DBUG_PRINT("info", ("set new in_buffers_only"));
2193 : }
2194 18683 : pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2195 18683 : DBUG_VOID_RETURN;
2196 : }
2197 :
2198 :
2199 : /*
2200 : Sets address from which data is only in the buffer
2201 :
2202 : SYNOPSIS
2203 : translog_set_only_in_buffers()
2204 : lsn LSN to assign
2205 : in_buffers to assign to in_buffers_only
2206 : */
2207 :
2208 : static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
2209 1673 : {
2210 1673 : DBUG_ENTER("translog_set_only_in_buffers");
2211 1673 : pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
2212 1673 : DBUG_PRINT("enter", ("in_buffers: (%lu,0x%lx) "
2213 : "in_buffers_only: (%lu,0x%lx)",
2214 : LSN_IN_PARTS(in_buffers),
2215 : LSN_IN_PARTS(log_descriptor.in_buffers_only)));
2216 : /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2217 1673 : if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2218 : {
2219 1054 : if (translog_status != TRANSLOG_OK)
2220 0 : DBUG_VOID_RETURN;
2221 1054 : log_descriptor.in_buffers_only= in_buffers;
2222 1054 : DBUG_PRINT("info", ("set new in_buffers_only"));
2223 : }
2224 1673 : pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2225 1673 : DBUG_VOID_RETURN;
2226 : }
2227 :
2228 :
2229 : /*
2230 : Gets address from which data is only in the buffer
2231 :
2232 : SYNOPSIS
2233 : translog_only_in_buffers()
2234 :
2235 : RETURN
2236 : address from which data is only in the buffer
2237 : */
2238 :
2239 : static TRANSLOG_ADDRESS translog_only_in_buffers()
2240 7638812 : {
2241 : register TRANSLOG_ADDRESS addr;
2242 7638812 : DBUG_ENTER("translog_only_in_buffers");
2243 7638812 : pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
2244 7638812 : addr= log_descriptor.in_buffers_only;
2245 7638812 : pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2246 7638812 : DBUG_RETURN(addr);
2247 : }
2248 :
2249 :
2250 : /*
2251 : Get max LSN sent to file
2252 :
2253 : SYNOPSIS
2254 : translog_get_sent_to_disk()
2255 :
2256 : RETURN
2257 : max LSN send to file
2258 : */
2259 :
2260 : static LSN translog_get_sent_to_disk()
2261 37076 : {
2262 : register LSN lsn;
2263 37076 : DBUG_ENTER("translog_get_sent_to_disk");
2264 37076 : pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
2265 37076 : lsn= log_descriptor.sent_to_disk;
2266 37076 : DBUG_PRINT("info", ("sent to disk up to (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
2267 37076 : pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2268 37076 : DBUG_RETURN(lsn);
2269 : }
2270 :
2271 :
2272 : /*
2273 : Get first chunk address on the given page
2274 :
2275 : SYNOPSIS
2276 : translog_get_first_chunk_offset()
2277 : page The page where to find first chunk
2278 :
2279 : RETURN
2280 : first chunk offset
2281 : */
2282 :
2283 : static my_bool translog_get_first_chunk_offset(uchar *page)
2284 460361 : {
2285 460361 : DBUG_ENTER("translog_get_first_chunk_offset");
2286 460361 : DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
2287 460361 : DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
2288 : }
2289 :
2290 :
2291 : /*
2292 : Write coded length of record
2293 :
2294 : SYNOPSIS
2295 : translog_write_variable_record_1group_code_len
2296 : dst Destination buffer pointer
2297 : length Length which should be coded
2298 : header_len Calculated total header length
2299 : */
2300 :
2301 : static void
2302 : translog_write_variable_record_1group_code_len(uchar *dst,
2303 : translog_size_t length,
2304 : uint16 header_len)
2305 3305398 : {
2306 3305398 : switch (header_len) {
2307 : case 6: /* (5 + 1) */
2308 3281499 : DBUG_ASSERT(length <= 250);
2309 3281499 : *dst= (uint8) length;
2310 3281499 : return;
2311 : case 8: /* (5 + 3) */
2312 22722 : DBUG_ASSERT(length <= 0xFFFF);
2313 22722 : *dst= 251;
2314 22722 : int2store(dst + 1, length);
2315 22722 : return;
2316 : case 9: /* (5 + 4) */
2317 1175 : DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
2318 1175 : *dst= 252;
2319 1175 : int3store(dst + 1, length);
2320 1175 : return;
2321 : case 10: /* (5 + 5) */
2322 2 : *dst= 253;
2323 2 : int4store(dst + 1, length);
2324 2 : return;
2325 : default:
2326 0 : DBUG_ASSERT(0);
2327 : }
2328 : return;
2329 : }
2330 :
2331 :
2332 : /*
2333 : Decode record data length and advance given pointer to the next field
2334 :
2335 : SYNOPSIS
2336 : translog_variable_record_1group_decode_len()
2337 : src The pointer to the pointer to the length beginning
2338 :
2339 : RETURN
2340 : decoded length
2341 : */
2342 :
2343 : static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
2344 15927369 : {
2345 15927369 : uint8 first= (uint8) (**src);
2346 15927369 : switch (first) {
2347 : case 251:
2348 154208 : (*src)+= 3;
2349 154208 : return (uint2korr((*src) - 2));
2350 : case 252:
2351 4652 : (*src)+= 4;
2352 4652 : return (uint3korr((*src) - 3));
2353 : case 253:
2354 10 : (*src)+= 5;
2355 10 : return (uint4korr((*src) - 4));
2356 : case 254:
2357 : case 255:
2358 0 : DBUG_ASSERT(0); /* reserved for future use */
2359 : return (0);
2360 : default:
2361 15768499 : (*src)++;
2362 15768499 : return (first);
2363 : }
2364 : }
2365 :
2366 :
2367 : /*
2368 : Get total length of this chunk (not only body)
2369 :
2370 : SYNOPSIS
2371 : translog_get_total_chunk_length()
2372 : page The page where chunk placed
2373 : offset Offset of the chunk on this place
2374 :
2375 : RETURN
2376 : total length of the chunk
2377 : */
2378 :
2379 : static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
2380 6382502 : {
2381 6382502 : DBUG_ENTER("translog_get_total_chunk_length");
2382 6382502 : switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
2383 : case TRANSLOG_CHUNK_LSN:
2384 : {
2385 : /* 0 chunk referred as LSN (head or tail) */
2386 : translog_size_t rec_len;
2387 5461177 : uchar *start= page + offset;
2388 5461177 : uchar *ptr= start + 1 + 2; /* chunk type and short trid */
2389 : uint16 chunk_len, header_len, page_rest;
2390 5461177 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
2391 5461177 : rec_len= translog_variable_record_1group_decode_len(&ptr);
2392 5461177 : chunk_len= uint2korr(ptr);
2393 5461177 : header_len= (uint16) (ptr -start) + 2;
2394 5461177 : DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
2395 : (ulong) rec_len, (uint) chunk_len, (uint) header_len));
2396 5461177 : if (chunk_len)
2397 : {
2398 4 : DBUG_PRINT("info", ("chunk len: %u + %u = %u",
2399 : (uint) header_len, (uint) chunk_len,
2400 : (uint) (chunk_len + header_len)));
2401 4 : DBUG_RETURN(chunk_len + header_len);
2402 : }
2403 5461173 : page_rest= TRANSLOG_PAGE_SIZE - offset;
2404 5461173 : DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
2405 5461173 : if (rec_len + header_len < page_rest)
2406 5392152 : DBUG_RETURN(rec_len + header_len);
2407 69021 : DBUG_RETURN(page_rest);
2408 : }
2409 : case TRANSLOG_CHUNK_FIXED:
2410 : {
2411 : uchar *ptr;
2412 263466 : uint type= page[offset] & TRANSLOG_REC_TYPE;
2413 : uint length;
2414 : int i;
2415 : /* 1 (pseudo)fixed record (also LSN) */
2416 263466 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
2417 263466 : DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
2418 : LOGRECTYPE_FIXEDLENGTH ||
2419 : log_record_type_descriptor[type].rclass ==
2420 : LOGRECTYPE_PSEUDOFIXEDLENGTH);
2421 263466 : if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
2422 : {
2423 261865 : DBUG_PRINT("info",
2424 : ("Fixed length: %u",
2425 : (uint) (log_record_type_descriptor[type].fixed_length + 3)));
2426 261865 : DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
2427 : }
2428 :
2429 1601 : ptr= page + offset + 3; /* first compressed LSN */
2430 1601 : length= log_record_type_descriptor[type].fixed_length + 3;
2431 4001 : for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
2432 : {
2433 : /* first 2 bits is length - 2 */
2434 2400 : uint len= (((uint8) (*ptr)) >> 6) + 2;
2435 2400 : if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
2436 0 : len+= LSN_STORE_SIZE; /* case of full LSN storing */
2437 2400 : ptr+= len;
2438 : /* subtract saved bytes */
2439 2400 : length-= (LSN_STORE_SIZE - len);
2440 : }
2441 1601 : DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
2442 1601 : DBUG_RETURN(length);
2443 : }
2444 : case TRANSLOG_CHUNK_NOHDR:
2445 : /* 2 no header chunk (till page end) */
2446 589153 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u",
2447 : (uint) (TRANSLOG_PAGE_SIZE - offset)));
2448 589153 : DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
2449 : case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
2450 68706 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
2451 68706 : DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
2452 68706 : DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
2453 68706 : DBUG_RETURN(uint2korr(page + offset + 1) + 3);
2454 : default:
2455 0 : DBUG_ASSERT(0);
2456 : DBUG_RETURN(0);
2457 : }
2458 : }
2459 :
2460 : /*
2461 : @brief Waits previous buffer flush finish
2462 :
2463 : @param buffer buffer for check
2464 :
2465 : @retval 0 previous buffer flushed and this thread have to flush this one
2466 : @retval 1 previous buffer flushed and this buffer flushed by other thread too
2467 : */
2468 :
2469 : my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer)
2470 37120 : {
2471 37120 : TRANSLOG_ADDRESS offset= buffer->offset;
2472 37120 : TRANSLOG_FILE *file= buffer->file;
2473 37120 : uint8 ver= buffer->ver;
2474 37120 : DBUG_ENTER("translog_prev_buffer_flush_wait");
2475 37120 : DBUG_PRINT("enter", ("buffer: 0x%lx #%u offset: (%lu,0x%lx) "
2476 : "prev sent: (%lu,0x%lx) prev offset: (%lu,0x%lx)",
2477 : (ulong) buffer, (uint) buffer->buffer_no,
2478 : LSN_IN_PARTS(buffer->offset),
2479 : LSN_IN_PARTS(buffer->prev_sent_to_disk),
2480 : LSN_IN_PARTS(buffer->prev_buffer_offset)));
2481 37120 : translog_buffer_lock_assert_owner(buffer);
2482 : /*
2483 : if prev_sent_to_disk == LSN_IMPOSSIBLE then
2484 : prev_buffer_offset should be LSN_IMPOSSIBLE
2485 : because it means that this buffer was never used
2486 : */
2487 37120 : DBUG_ASSERT((buffer->prev_sent_to_disk == LSN_IMPOSSIBLE &&
2488 : buffer->prev_buffer_offset == LSN_IMPOSSIBLE) ||
2489 : buffer->prev_sent_to_disk != LSN_IMPOSSIBLE);
2490 37120 : if (buffer->prev_buffer_offset != buffer->prev_sent_to_disk)
2491 : {
2492 : do {
2493 0 : pthread_cond_wait(&buffer->prev_sent_to_disk_cond, &buffer->mutex);
2494 0 : if (buffer->file != file || buffer->offset != offset ||
2495 : buffer->ver != ver)
2496 : {
2497 0 : translog_buffer_unlock(buffer);
2498 0 : DBUG_RETURN(1); /* some the thread flushed the buffer already */
2499 : }
2500 0 : } while(buffer->prev_buffer_offset != buffer->prev_sent_to_disk);
2501 : }
2502 37120 : DBUG_RETURN(0);
2503 : }
2504 :
2505 :
2506 : /*
2507 : Flush given buffer
2508 :
2509 : SYNOPSIS
2510 : translog_buffer_flush()
2511 : buffer This buffer should be flushed
2512 :
2513 : RETURN
2514 : 0 OK
2515 : 1 Error
2516 : */
2517 :
2518 : static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
2519 20357 : {
2520 : uint32 i, pg;
2521 20357 : TRANSLOG_ADDRESS offset= buffer->offset;
2522 20357 : TRANSLOG_FILE *file= buffer->file;
2523 20357 : uint8 ver= buffer->ver;
2524 20357 : DBUG_ENTER("translog_buffer_flush");
2525 20357 : DBUG_PRINT("enter",
2526 : ("Buffer: #%u 0x%lx file: %d offset: (%lu,0x%lx) size: %lu",
2527 : (uint) buffer->buffer_no, (ulong) buffer,
2528 : buffer->file->handler.file,
2529 : LSN_IN_PARTS(buffer->offset),
2530 : (ulong) buffer->size));
2531 20357 : translog_buffer_lock_assert_owner(buffer);
2532 :
2533 20357 : if (buffer->file == NULL)
2534 0 : DBUG_RETURN(0);
2535 :
2536 20357 : translog_wait_for_writers(buffer);
2537 :
2538 20357 : if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2539 0 : DBUG_RETURN(0); /* some the thread flushed the buffer already */
2540 :
2541 20357 : if (buffer->is_closing_buffer)
2542 : {
2543 : /* some other flush in progress */
2544 0 : translog_wait_for_closing(buffer);
2545 : }
2546 :
2547 20357 : if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2548 0 : DBUG_RETURN(0); /* some the thread flushed the buffer already */
2549 :
2550 20357 : if (buffer->overlay && translog_prev_buffer_flush_wait(buffer))
2551 0 : DBUG_RETURN(0); /* some the thread flushed the buffer already */
2552 :
2553 : /*
2554 : Send page by page in the pagecache what we are going to write on the
2555 : disk
2556 : */
2557 20357 : file= buffer->file;
2558 20357 : for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
2559 251634 : i < buffer->size;
2560 210920 : i+= TRANSLOG_PAGE_SIZE, pg++)
2561 : {
2562 210921 : TRANSLOG_ADDRESS addr= (buffer->offset + i);
2563 : TRANSLOG_VALIDATOR_DATA data;
2564 210921 : DBUG_PRINT("info", ("send log form %lu till %lu address: (%lu,0x%lx) "
2565 : "page #: %lu buffer size: %lu buffer: 0x%lx",
2566 : (ulong) i, (ulong) (i + TRANSLOG_PAGE_SIZE),
2567 : LSN_IN_PARTS(addr), (ulong) pg, (ulong) buffer->size,
2568 : (ulong) buffer));
2569 210921 : data.addr= &addr;
2570 210921 : DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
2571 210921 : DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
2572 210921 : if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN)
2573 1 : DBUG_RETURN(1);
2574 210920 : if (pagecache_inject(log_descriptor.pagecache,
2575 : &file->handler, pg, 3,
2576 : buffer->buffer + i,
2577 : PAGECACHE_PLAIN_PAGE,
2578 : PAGECACHE_LOCK_LEFT_UNLOCKED,
2579 : PAGECACHE_PIN_LEFT_UNPINNED, 0,
2580 : LSN_IMPOSSIBLE))
2581 : {
2582 0 : DBUG_PRINT("error",
2583 : ("Can't write page (%lu,0x%lx) to pagecache, error: %d",
2584 : (ulong) buffer->file->number,
2585 : (ulong) (LSN_OFFSET(buffer->offset)+ i),
2586 : my_errno));
2587 0 : translog_stop_writing();
2588 0 : DBUG_RETURN(1);
2589 : }
2590 : }
2591 20356 : file->is_sync= 0;
2592 20356 : if (my_pwrite(file->handler.file, buffer->buffer,
2593 : buffer->size, LSN_OFFSET(buffer->offset),
2594 : log_write_flags))
2595 : {
2596 0 : DBUG_PRINT("error", ("Can't write buffer (%lu,0x%lx) size %lu "
2597 : "to the disk (%d)",
2598 : (ulong) file->handler.file,
2599 : (ulong) LSN_OFFSET(buffer->offset),
2600 : (ulong) buffer->size, errno));
2601 0 : translog_stop_writing();
2602 0 : DBUG_RETURN(1);
2603 : }
2604 : /*
2605 : Dropping the flag in such way can make false alarm: signalling than the
2606 : file in not sync when it is sync, but the situation is quite rare and
2607 : protections with mutexes give much more overhead to the whole engine
2608 : */
2609 20356 : file->is_sync= 0;
2610 :
2611 20356 : if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */
2612 : {
2613 18683 : if (translog_prev_buffer_flush_wait(buffer))
2614 0 : DBUG_RETURN(0); /* some the thread flushed the buffer already */
2615 18683 : translog_set_sent_to_disk(buffer);
2616 : }
2617 : else
2618 1673 : translog_set_only_in_buffers(buffer->next_buffer_offset);
2619 :
2620 : /* say to next buffer that we are finished */
2621 : {
2622 : struct st_translog_buffer *next_buffer=
2623 20356 : log_descriptor.buffers + ((buffer->buffer_no + 1) % TRANSLOG_BUFFERS_NO);
2624 20356 : if (likely(translog_status == TRANSLOG_OK)){
2625 19658 : translog_buffer_lock(next_buffer);
2626 19658 : next_buffer->prev_sent_to_disk= buffer->offset;
2627 19658 : translog_buffer_unlock(next_buffer);
2628 19658 : pthread_cond_broadcast(&next_buffer->prev_sent_to_disk_cond);
2629 : }
2630 : else
2631 : {
2632 : /*
2633 : It is shutdown =>
2634 : 1) there is only one thread
2635 : 2) mutexes of other buffers can be destroyed => we can't use them
2636 : */
2637 698 : next_buffer->prev_sent_to_disk= buffer->offset;
2638 : }
2639 : }
2640 : /* Free buffer */
2641 20356 : buffer->file= NULL;
2642 20356 : buffer->overlay= 0;
2643 20356 : buffer->ver++;
2644 20356 : pthread_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2645 20356 : log_descriptor.dirty_buffer_mask&= ~(1 << buffer->buffer_no);
2646 20356 : pthread_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2647 20356 : pthread_cond_broadcast(&buffer->waiting_filling_buffer);
2648 20356 : DBUG_RETURN(0);
2649 : }
2650 :
2651 :
2652 : /*
2653 : Recover page with sector protection (wipe out failed chunks)
2654 :
2655 : SYNOPSYS
2656 : translog_recover_page_up_to_sector()
2657 : page reference on the page
2658 : offset offset of failed sector
2659 :
2660 : RETURN
2661 : 0 OK
2662 : 1 Error
2663 : */
2664 :
2665 : static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
2666 0 : {
2667 0 : uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
2668 0 : DBUG_ENTER("translog_recover_page_up_to_sector");
2669 0 : DBUG_PRINT("enter", ("offset: %u first chunk: %u",
2670 : (uint) offset, (uint) chunk_offset));
2671 :
2672 0 : while (page[chunk_offset] != TRANSLOG_FILLER && chunk_offset < offset)
2673 : {
2674 : uint16 chunk_length;
2675 0 : if ((chunk_length=
2676 : translog_get_total_chunk_length(page, chunk_offset)) == 0)
2677 : {
2678 0 : DBUG_PRINT("error", ("cant get chunk length (offset %u)",
2679 : (uint) chunk_offset));
2680 0 : DBUG_RETURN(1);
2681 : }
2682 0 : DBUG_PRINT("info", ("chunk: offset: %u length %u",
2683 : (uint) chunk_offset, (uint) chunk_length));
2684 0 : if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
2685 : {
2686 0 : DBUG_PRINT("error", ("damaged chunk (offset %u) in trusted area",
2687 : (uint) chunk_offset));
2688 0 : DBUG_RETURN(1);
2689 : }
2690 0 : chunk_offset+= chunk_length;
2691 : }
2692 :
2693 0 : valid_chunk_end= chunk_offset;
2694 : /* end of trusted area - sector parsing */
2695 0 : while (page[chunk_offset] != TRANSLOG_FILLER)
2696 : {
2697 : uint16 chunk_length;
2698 0 : if ((chunk_length=
2699 : translog_get_total_chunk_length(page, chunk_offset)) == 0)
2700 0 : break;
2701 :
2702 0 : DBUG_PRINT("info", ("chunk: offset: %u length %u",
2703 : (uint) chunk_offset, (uint) chunk_length));
2704 0 : if (((ulong) chunk_offset) + ((ulong) chunk_length) >
2705 : (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
2706 0 : break;
2707 :
2708 0 : chunk_offset+= chunk_length;
2709 0 : valid_chunk_end= chunk_offset;
2710 : }
2711 0 : DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
2712 :
2713 0 : memset(page + valid_chunk_end, TRANSLOG_FILLER,
2714 : TRANSLOG_PAGE_SIZE - valid_chunk_end);
2715 :
2716 0 : DBUG_RETURN(0);
2717 : }
2718 :
2719 :
2720 : /**
2721 : @brief Dummy write callback.
2722 : */
2723 :
2724 : static my_bool
2725 : translog_dummy_callback(uchar *page __attribute__((unused)),
2726 : pgcache_page_no_t page_no __attribute__((unused)),
2727 : uchar* data_ptr __attribute__((unused)))
2728 0 : {
2729 0 : return 0;
2730 : }
2731 :
2732 :
2733 : /**
2734 : @brief Checks and removes sector protection.
2735 :
2736 : @param page reference on the page content.
2737 : @param file transaction log descriptor.
2738 :
2739 : @retvat 0 OK
2740 : @retval 1 Error
2741 : */
2742 :
2743 : static my_bool
2744 : translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file)
2745 41980 : {
2746 : uint i, offset;
2747 : uchar *table= page + page_overhead[page[TRANSLOG_PAGE_FLAGS]] -
2748 41980 : TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2749 41980 : uint8 current= table[0];
2750 41980 : DBUG_ENTER("translog_check_sector_protection");
2751 :
2752 41980 : for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
2753 713660 : i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2754 629700 : i++, offset+= DISK_DRIVE_SECTOR_SIZE)
2755 : {
2756 : /*
2757 : TODO: add chunk counting for "suspecting" sectors (difference is
2758 : more than 1-2), if difference more then present chunks then it is
2759 : the problem.
2760 : */
2761 629700 : uint8 test= page[offset];
2762 629700 : DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
2763 : "read: 0x%x stored: 0x%x%x",
2764 : i, offset, (ulong) current,
2765 : (uint) uint2korr(page + offset), (uint) table[i],
2766 : (uint) table[i + 1]));
2767 : /*
2768 : 3 is minimal possible record length. So we can have "distance"
2769 : between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
2770 : only if it is old value, i.e. the sector was not written.
2771 : */
2772 629700 : if (((test < current) &&
2773 : ((uint)(0xFFL - current + test) > DISK_DRIVE_SECTOR_SIZE / 3)) ||
2774 : ((test >= current) &&
2775 : ((uint)(test - current) > DISK_DRIVE_SECTOR_SIZE / 3)))
2776 : {
2777 0 : if (translog_recover_page_up_to_sector(page, offset))
2778 0 : DBUG_RETURN(1);
2779 0 : file->was_recovered= 1;
2780 0 : DBUG_RETURN(0);
2781 : }
2782 :
2783 : /* Restore value on the page */
2784 629700 : page[offset]= table[i];
2785 629700 : current= test;
2786 629700 : DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
2787 : "read: 0x%x stored: 0x%x",
2788 : i, offset, (ulong) current,
2789 : (uint) page[offset], (uint) table[i]));
2790 : }
2791 41980 : DBUG_RETURN(0);
2792 : }
2793 :
2794 :
2795 : /**
2796 : @brief Log page validator (read callback)
2797 :
2798 : @param page The page data to check
2799 : @param page_no The page number (<offset>/<page length>)
2800 : @param data_ptr Read callback data pointer (pointer to TRANSLOG_FILE)
2801 :
2802 : @todo: add turning loghandler to read-only mode after merging with
2803 : that patch.
2804 :
2805 : @retval 0 OK
2806 : @retval 1 Error
2807 : */
2808 :
2809 : static my_bool translog_page_validator(uchar *page,
2810 : pgcache_page_no_t page_no,
2811 : uchar* data_ptr)
2812 773402 : {
2813 : uint this_page_page_overhead;
2814 : uint flags;
2815 : uchar *page_pos;
2816 773402 : TRANSLOG_FILE *data= (TRANSLOG_FILE *) data_ptr;
2817 : #ifndef DBUG_OFF
2818 773402 : pgcache_page_no_t offset= page_no * TRANSLOG_PAGE_SIZE;
2819 : #endif
2820 773402 : DBUG_ENTER("translog_page_validator");
2821 :
2822 773402 : data->was_recovered= 0;
2823 :
2824 773402 : if (uint3korr(page) != page_no ||
2825 : uint3korr(page + 3) != data->number)
2826 : {
2827 0 : DBUG_PRINT("error", ("Page (%lu,0x%lx): "
2828 : "page address written in the page is incorrect: "
2829 : "File %lu instead of %lu or page %lu instead of %lu",
2830 : (ulong) data->number, (ulong) offset,
2831 : (ulong) uint3korr(page + 3), (ulong) data->number,
2832 : (ulong) uint3korr(page),
2833 : (ulong) page_no));
2834 0 : DBUG_RETURN(1);
2835 : }
2836 773402 : flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
2837 773402 : this_page_page_overhead= page_overhead[flags];
2838 773402 : if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
2839 : TRANSLOG_RECORD_CRC))
2840 : {
2841 0 : DBUG_PRINT("error", ("Page (%lu,0x%lx): "
2842 : "Garbage in the page flags field detected : %x",
2843 : (ulong) data->number, (ulong) offset,
2844 : (uint) flags));
2845 0 : DBUG_RETURN(1);
2846 : }
2847 773402 : page_pos= page + (3 + 3 + 1);
2848 773402 : if (flags & TRANSLOG_PAGE_CRC)
2849 : {
2850 : uint32 crc= translog_crc(page + this_page_page_overhead,
2851 : TRANSLOG_PAGE_SIZE -
2852 41980 : this_page_page_overhead);
2853 41980 : if (crc != uint4korr(page_pos))
2854 : {
2855 0 : DBUG_PRINT("error", ("Page (%lu,0x%lx): "
2856 : "CRC mismatch: calculated: %lx on the page %lx",
2857 : (ulong) data->number, (ulong) offset,
2858 : (ulong) crc, (ulong) uint4korr(page_pos)));
2859 0 : DBUG_RETURN(1);
2860 : }
2861 41980 : page_pos+= CRC_SIZE; /* Skip crc */
2862 : }
2863 773402 : if (flags & TRANSLOG_SECTOR_PROTECTION &&
2864 : translog_check_sector_protection(page, data))
2865 : {
2866 0 : DBUG_RETURN(1);
2867 : }
2868 773402 : DBUG_RETURN(0);
2869 : }
2870 :
2871 :
2872 : /**
2873 : @brief Locks the loghandler.
2874 : */
2875 :
2876 : void translog_lock()
2877 11529295 : {
2878 : uint8 current_buffer;
2879 11529295 : DBUG_ENTER("translog_lock");
2880 :
2881 : /*
2882 : Locking the loghandler mean locking current buffer, but it can change
2883 : during locking, so we should check it
2884 : */
2885 : for (;;)
2886 : {
2887 : /*
2888 : log_descriptor.bc.buffer_no is only one byte so its reading is
2889 : an atomic operation
2890 : */
2891 11529295 : current_buffer= log_descriptor.bc.buffer_no;
2892 11529295 : translog_buffer_lock(log_descriptor.buffers + current_buffer);
2893 11529295 : if (log_descriptor.bc.buffer_no == current_buffer)
2894 11529295 : break;
2895 0 : translog_buffer_unlock(log_descriptor.buffers + current_buffer);
2896 11529295 : }
2897 11529295 : DBUG_VOID_RETURN;
2898 : }
2899 :
2900 :
2901 : /*
2902 : Unlock the loghandler
2903 :
2904 : SYNOPSIS
2905 : translog_unlock()
2906 :
2907 : RETURN
2908 : 0 OK
2909 : 1 Error
2910 : */
2911 :
2912 : void translog_unlock()
2913 11456591 : {
2914 11456591 : translog_buffer_unlock(log_descriptor.bc.buffer);
2915 : }
2916 :
2917 :
2918 : /**
2919 : @brief Get log page by file number and offset of the beginning of the page
2920 :
2921 : @param data validator data, which contains the page address
2922 : @param buffer buffer for page placing
2923 : (might not be used in some cache implementations)
2924 : @param direct_link if it is not NULL then caller can accept direct
2925 : link to the page cache
2926 :
2927 : @retval NULL Error
2928 : @retval # pointer to the page cache which should be used to read this page
2929 : */
2930 :
2931 : static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
2932 : PAGECACHE_BLOCK_LINK **direct_link)
2933 7584646 : {
2934 7584646 : TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
2935 7584646 : uint32 file_no= LSN_FILE_NO(addr);
2936 : TRANSLOG_FILE *file;
2937 7584646 : DBUG_ENTER("translog_get_page");
2938 7584646 : DBUG_PRINT("enter", ("File: %lu Offset: %lu(0x%lx)",
2939 : (ulong) file_no,
2940 : (ulong) LSN_OFFSET(addr),
2941 : (ulong) LSN_OFFSET(addr)));
2942 :
2943 : /* it is really page address */
2944 7584646 : DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
2945 7584646 : if (direct_link)
2946 7243349 : *direct_link= NULL;
2947 :
2948 7584646 : restart:
2949 :
2950 7584646 : in_buffers= translog_only_in_buffers();
2951 7584646 : DBUG_PRINT("info", ("in_buffers: (%lu,0x%lx)",
2952 : LSN_IN_PARTS(in_buffers)));
2953 7584646 : if (in_buffers != LSN_IMPOSSIBLE &&
2954 : cmp_translog_addr(addr, in_buffers) >= 0)
2955 : {
2956 54166 : translog_lock();
2957 54166 : DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
2958 : /* recheck with locked loghandler */
2959 54166 : in_buffers= translog_only_in_buffers();
2960 54166 : if (cmp_translog_addr(addr, in_buffers) >= 0)
2961 : {
2962 54166 : uint16 buffer_no= log_descriptor.bc.buffer_no;
2963 : #ifndef DBUG_OFF
2964 54166 : uint16 buffer_start= buffer_no;
2965 : #endif
2966 54166 : struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
2967 54166 : struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
2968 : for (;;)
2969 : {
2970 : /*
2971 : if the page is in the buffer and it is the last version of the
2972 : page (in case of division the page by buffer flush)
2973 : */
2974 54166 : if (curr_buffer->file != NULL &&
2975 : cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
2976 : cmp_translog_addr(addr,
2977 : (curr_buffer->next_buffer_offset ?
2978 : curr_buffer->next_buffer_offset:
2979 : curr_buffer->offset + curr_buffer->size)) < 0)
2980 : {
2981 54166 : TRANSLOG_ADDRESS offset= curr_buffer->offset;
2982 54166 : TRANSLOG_FILE *fl= curr_buffer->file;
2983 54166 : uchar *from, *table= NULL;
2984 : int is_last_unfinished_page;
2985 54166 : uint last_protected_sector= 0;
2986 : TRANSLOG_FILE file_copy;
2987 54166 : uint8 ver= curr_buffer->ver;
2988 54166 : translog_wait_for_writers(curr_buffer);
2989 54166 : if (offset != curr_buffer->offset || fl != curr_buffer->file ||
2990 : ver != curr_buffer->ver)
2991 : {
2992 0 : DBUG_ASSERT(buffer_unlock == curr_buffer);
2993 0 : translog_buffer_unlock(buffer_unlock);
2994 0 : goto restart;
2995 : }
2996 54166 : DBUG_ASSERT(LSN_FILE_NO(addr) == LSN_FILE_NO(curr_buffer->offset));
2997 54166 : from= curr_buffer->buffer + (addr - curr_buffer->offset);
2998 54166 : memcpy(buffer, from, TRANSLOG_PAGE_SIZE);
2999 : /*
3000 : We can use copy then in translog_page_validator() because it
3001 : do not put it permanently somewhere.
3002 : We have to use copy because after releasing log lock we can't
3003 : guaranty that the file still be present (in real life it will be
3004 : present but theoretically possible that it will be released
3005 : already from last files cache);
3006 : */
3007 54166 : file_copy= *(curr_buffer->file);
3008 54166 : file_copy.handler.callback_data= (uchar*) &file_copy;
3009 54166 : is_last_unfinished_page= ((log_descriptor.bc.buffer ==
3010 : curr_buffer) &&
3011 : (log_descriptor.bc.ptr >= from) &&
3012 : (log_descriptor.bc.ptr <
3013 : from + TRANSLOG_PAGE_SIZE));
3014 54166 : if (is_last_unfinished_page &&
3015 : (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
3016 : {
3017 2 : last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
3018 : DISK_DRIVE_SECTOR_SIZE);
3019 2 : table= buffer + log_descriptor.page_overhead -
3020 : TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
3021 : }
3022 :
3023 54166 : DBUG_ASSERT(buffer_unlock == curr_buffer);
3024 54166 : translog_buffer_unlock(buffer_unlock);
3025 54166 : if (is_last_unfinished_page)
3026 : {
3027 : uint i;
3028 : /*
3029 : This is last unfinished page => we should not check CRC and
3030 : remove only that protection which already installed (no need
3031 : to check it)
3032 :
3033 : We do not check the flag of sector protection, because if
3034 : (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
3035 : not set then last_protected_sector will be 0 so following loop
3036 : will be never executed
3037 : */
3038 52752 : DBUG_PRINT("info", ("This is last unfinished page, "
3039 : "last protected sector %u",
3040 : last_protected_sector));
3041 52766 : for (i= 1; i <= last_protected_sector; i++)
3042 : {
3043 14 : uint offset= i * DISK_DRIVE_SECTOR_SIZE;
3044 14 : DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
3045 : i, buffer[offset],
3046 : table[i]));
3047 14 : buffer[offset]= table[i];
3048 : }
3049 : }
3050 : else
3051 : {
3052 : /*
3053 : This IF should be true because we use in-memory data which
3054 : supposed to be correct.
3055 : */
3056 1414 : if (translog_page_validator(buffer,
3057 : LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3058 : (uchar*) &file_copy))
3059 : {
3060 0 : DBUG_ASSERT(0);
3061 : buffer= NULL;
3062 : }
3063 : }
3064 54166 : DBUG_RETURN(buffer);
3065 : }
3066 0 : buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
3067 0 : curr_buffer= log_descriptor.buffers + buffer_no;
3068 0 : translog_buffer_lock(curr_buffer);
3069 0 : translog_buffer_unlock(buffer_unlock);
3070 0 : buffer_unlock= curr_buffer;
3071 : /* we can't make a full circle */
3072 0 : DBUG_ASSERT(buffer_start != buffer_no);
3073 : }
3074 : }
3075 0 : translog_unlock();
3076 : }
3077 7530480 : file= get_logfile_by_number(file_no);
3078 7530480 : DBUG_ASSERT(file != NULL);
3079 7530480 : buffer= pagecache_read(log_descriptor.pagecache, &file->handler,
3080 : LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3081 : 3, (direct_link ? NULL : buffer),
3082 : PAGECACHE_PLAIN_PAGE,
3083 : (direct_link ?
3084 : PAGECACHE_LOCK_READ :
3085 : PAGECACHE_LOCK_LEFT_UNLOCKED),
3086 : direct_link);
3087 7530480 : DBUG_PRINT("info", ("Direct link is assigned to : 0x%lx * 0x%lx",
3088 : (ulong) direct_link,
3089 : (ulong)(direct_link ? *direct_link : NULL)));
3090 7530480 : data->was_recovered= file->was_recovered;
3091 7530480 : DBUG_RETURN(buffer);
3092 : }
3093 :
3094 :
3095 : /**
3096 : @brief free direct log page link
3097 :
3098 : @param direct_link the direct log page link to be freed
3099 :
3100 : */
3101 :
3102 : static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
3103 7397663 : {
3104 7397663 : DBUG_ENTER("translog_free_link");
3105 7397663 : DBUG_PRINT("info", ("Direct link: 0x%lx",
3106 : (ulong) direct_link));
3107 7397663 : if (direct_link)
3108 7189254 : pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
3109 : PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
3110 : LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE);
3111 7397663 : DBUG_VOID_RETURN;
3112 : }
3113 :
3114 :
3115 : /**
3116 : @brief Finds last full page of the given log file.
3117 :
3118 : @param addr address structure to fill with data, which contain
3119 : file number of the log file
3120 : @param last_page_ok Result of the check whether last page OK.
3121 : (for now only we check only that file length
3122 : divisible on page length).
3123 : @param no_errors suppress messages about non-critical errors
3124 :
3125 : @retval 0 OK
3126 : @retval 1 Error
3127 : */
3128 :
3129 : static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
3130 : my_bool *last_page_ok,
3131 : my_bool no_errors)
3132 3836 : {
3133 : char path[FN_REFLEN];
3134 : uint32 rec_offset;
3135 : my_off_t file_size;
3136 3836 : uint32 file_no= LSN_FILE_NO(*addr);
3137 : TRANSLOG_FILE *file;
3138 : #ifndef DBUG_OFF
3139 : char buff[21];
3140 : #endif
3141 3836 : DBUG_ENTER("translog_get_last_page_addr");
3142 :
3143 3836 : if (likely((file= get_logfile_by_number(file_no)) != NULL))
3144 : {
3145 : /*
3146 : This function used only during initialization of loghandler or in
3147 : scanner (which mean we need read that part of the log), so the
3148 : requested log file have to be opened and can't be freed after
3149 : returning pointer on it (file_size).
3150 : */
3151 3479 : file_size= my_seek(file->handler.file, 0, SEEK_END, MYF(0));
3152 : }
3153 : else
3154 : {
3155 : /*
3156 : This branch is used only during very early initialization
3157 : when files are not opened.
3158 : */
3159 : File fd;
3160 357 : if ((fd= my_open(translog_filename_by_fileno(file_no, path),
3161 : O_RDONLY, (no_errors ? MYF(0) : MYF(MY_WME)))) < 0)
3162 : {
3163 1 : my_errno= errno;
3164 1 : DBUG_PRINT("error", ("Error %d during opening file #%d",
3165 : errno, file_no));
3166 1 : DBUG_RETURN(1);
3167 : }
3168 356 : file_size= my_seek(fd, 0, SEEK_END, MYF(0));
3169 356 : my_close(fd, MYF(0));
3170 : }
3171 3835 : DBUG_PRINT("info", ("File size: %s", llstr(file_size, buff)));
3172 3835 : if (file_size == MY_FILEPOS_ERROR)
3173 0 : DBUG_RETURN(1);
3174 3835 : DBUG_ASSERT(file_size < ULL(0xffffffff));
3175 3835 : if (((uint32)file_size) > TRANSLOG_PAGE_SIZE)
3176 : {
3177 3835 : rec_offset= (((((uint32)file_size) / TRANSLOG_PAGE_SIZE) - 1) *
3178 : TRANSLOG_PAGE_SIZE);
3179 3835 : *last_page_ok= (((uint32)file_size) == rec_offset + TRANSLOG_PAGE_SIZE);
3180 : }
3181 : else
3182 : {
3183 0 : *last_page_ok= 0;
3184 0 : rec_offset= 0;
3185 : }
3186 3835 : *addr= MAKE_LSN(file_no, rec_offset);
3187 3835 : DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset,
3188 : *last_page_ok));
3189 3835 : DBUG_RETURN(0);
3190 : }
3191 :
3192 :
3193 : /**
3194 : @brief Get number bytes for record length storing
3195 :
3196 : @param length Record length which will be encoded
3197 :
3198 : @return 1,3,4,5 - number of bytes to store given length
3199 : */
3200 :
3201 : static uint translog_variable_record_length_bytes(translog_size_t length)
3202 5011961 : {
3203 5011961 : if (length < 250)
3204 4981561 : return 1;
3205 30400 : if (length < 0xFFFF)
3206 28623 : return 3;
3207 1777 : if (length < (ulong) 0xFFFFFF)
3208 1774 : return 4;
3209 3 : return 5;
3210 : }
3211 :
3212 :
3213 : /**
3214 : @brief Gets header of this chunk.
3215 :
3216 : @param chunk The pointer to the chunk beginning
3217 :
3218 : @retval # total length of the chunk
3219 : @retval 0 Error
3220 : */
3221 :
3222 : static uint16 translog_get_chunk_header_length(uchar *chunk)
3223 231598 : {
3224 231598 : DBUG_ENTER("translog_get_chunk_header_length");
3225 231598 : switch (*chunk & TRANSLOG_CHUNK_TYPE) {
3226 : case TRANSLOG_CHUNK_LSN:
3227 : {
3228 : /* 0 chunk referred as LSN (head or tail) */
3229 : translog_size_t rec_len;
3230 0 : uchar *start= chunk;
3231 0 : uchar *ptr= start + 1 + 2;
3232 : uint16 chunk_len, header_len;
3233 0 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
3234 0 : rec_len= translog_variable_record_1group_decode_len(&ptr);
3235 0 : chunk_len= uint2korr(ptr);
3236 0 : header_len= (uint16) (ptr - start) +2;
3237 0 : DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
3238 : (ulong) rec_len, (uint) chunk_len, (uint) header_len));
3239 0 : if (chunk_len)
3240 : {
3241 : /* TODO: fine header end */
3242 : /*
3243 : The last chunk of multi-group record can be base for it header
3244 : calculation (we skip to the first group to read the header) so if we
3245 : stuck here something is wrong.
3246 : */
3247 0 : DBUG_ASSERT(0);
3248 : DBUG_RETURN(0); /* Keep compiler happy */
3249 : }
3250 0 : DBUG_RETURN(header_len);
3251 : }
3252 : case TRANSLOG_CHUNK_FIXED:
3253 : {
3254 : /* 1 (pseudo)fixed record (also LSN) */
3255 0 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
3256 0 : DBUG_RETURN(3);
3257 : }
3258 : case TRANSLOG_CHUNK_NOHDR:
3259 : /* 2 no header chunk (till page end) */
3260 208609 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
3261 208609 : DBUG_RETURN(1);
3262 : break;
3263 : case TRANSLOG_CHUNK_LNGTH:
3264 : /* 3 chunk with chunk length */
3265 22989 : DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
3266 22989 : DBUG_RETURN(3);
3267 : break;
3268 : default:
3269 0 : DBUG_ASSERT(0);
3270 : DBUG_RETURN(0); /* Keep compiler happy */
3271 : }
3272 : }
3273 :
3274 :
3275 : /**
3276 : @brief Truncate the log to the given address. Used during the startup if the
3277 : end of log if corrupted.
3278 :
3279 : @param addr new horizon
3280 :
3281 : @retval 0 OK
3282 : @retval 1 Error
3283 : */
3284 :
3285 : static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
3286 0 : {
3287 : uchar *page;
3288 : TRANSLOG_ADDRESS current_page;
3289 : uint32 next_page_offset, page_rest;
3290 : uint32 i;
3291 : File fd;
3292 : TRANSLOG_VALIDATOR_DATA data;
3293 : char path[FN_REFLEN];
3294 : uchar page_buff[TRANSLOG_PAGE_SIZE];
3295 0 : DBUG_ENTER("translog_truncate_log");
3296 : /* TODO: write warning to the client */
3297 0 : DBUG_PRINT("warning", ("removing all records from (%lu,0x%lx) "
3298 : "till (%lu,0x%lx)",
3299 : LSN_IN_PARTS(addr),
3300 : LSN_IN_PARTS(log_descriptor.horizon)));
3301 0 : DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
3302 : /* remove files between the address and horizon */
3303 0 : for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
3304 0 : if (my_delete(translog_filename_by_fileno(i, path), MYF(MY_WME)))
3305 : {
3306 0 : translog_unlock();
3307 0 : DBUG_RETURN(1);
3308 : }
3309 :
3310 : /* truncate the last file up to the last page */
3311 0 : next_page_offset= LSN_OFFSET(addr);
3312 0 : next_page_offset= (next_page_offset -
3313 : ((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
3314 : TRANSLOG_PAGE_SIZE);
3315 0 : page_rest= next_page_offset - LSN_OFFSET(addr);
3316 0 : memset(page_buff, TRANSLOG_FILLER, page_rest);
3317 0 : if ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
3318 : ((my_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
3319 : (page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
3320 : log_write_flags)) ||
3321 : my_sync(fd, MYF(MY_WME))) |
3322 : my_close(fd, MYF(MY_WME))) ||
3323 : (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
3324 : sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD))))
3325 0 : DBUG_RETURN(1);
3326 :
3327 : /* fix the horizon */
3328 0 : log_descriptor.horizon= addr;
3329 : /* fix the buffer data */
3330 0 : current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
3331 : TRANSLOG_PAGE_SIZE));
3332 0 : data.addr= ¤t_page;
3333 0 : if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
3334 : NULL)
3335 0 : DBUG_RETURN(1);
3336 0 : if (page != log_descriptor.buffers->buffer)
3337 0 : memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
3338 0 : log_descriptor.bc.buffer->offset= current_page;
3339 0 : log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
3340 0 : log_descriptor.bc.ptr=
3341 : log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
3342 0 : log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
3343 0 : DBUG_RETURN(0);
3344 : }
3345 :
3346 :
3347 : /**
3348 : Applies function 'callback' to all files (in a directory) which
3349 : name looks like a log's name (maria_log.[0-9]{7}).
3350 : If 'callback' returns TRUE this interrupts the walk and returns
3351 : TRUE. Otherwise FALSE is returned after processing all log files.
3352 : It cannot just use log_descriptor.directory because that may not yet have
3353 : been initialized.
3354 :
3355 : @param directory directory to scan
3356 : @param callback function to apply; is passed directory and base
3357 : name of found file
3358 : */
3359 :
3360 : my_bool translog_walk_filenames(const char *directory,
3361 : my_bool (*callback)(const char *,
3362 : const char *))
3363 1 : {
3364 : MY_DIR *dirp;
3365 : uint i;
3366 1 : my_bool rc= FALSE;
3367 :
3368 : /* Finds and removes transaction log files */
3369 1 : if (!(dirp = my_dir(directory, MYF(MY_DONT_SORT))))
3370 0 : return FALSE;
3371 :
3372 365 : for (i= 0; i < dirp->number_off_files; i++)
3373 : {
3374 364 : char *file= dirp->dir_entry[i].name;
3375 364 : if (strncmp(file, "maria_log.", 10) == 0 &&
3376 : file[10] >= '0' && file[10] <= '9' &&
3377 : file[11] >= '0' && file[11] <= '9' &&
3378 : file[12] >= '0' && file[12] <= '9' &&
3379 : file[13] >= '0' && file[13] <= '9' &&
3380 : file[14] >= '0' && file[14] <= '9' &&
3381 : file[15] >= '0' && file[15] <= '9' &&
3382 : file[16] >= '0' && file[16] <= '9' &&
3383 : file[17] >= '0' && file[17] <= '9' &&
3384 : file[18] == '\0' && (*callback)(directory, file))
3385 : {
3386 0 : rc= TRUE;
3387 0 : break;
3388 : }
3389 : }
3390 1 : my_dirend(dirp);
3391 1 : return rc;
3392 : }
3393 :
3394 :
3395 : /**
3396 : @brief Fills table of dependence length of page header from page flags
3397 : */
3398 :
3399 : static void translog_fill_overhead_table()
3400 798 : {
3401 : uint i;
3402 7182 : for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
3403 : {
3404 6384 : page_overhead[i]= 7;
3405 6384 : if (i & TRANSLOG_PAGE_CRC)
3406 3192 : page_overhead[i]+= CRC_SIZE;
3407 6384 : if (i & TRANSLOG_SECTOR_PROTECTION)
3408 3192 : page_overhead[i]+= TRANSLOG_PAGE_SIZE /
3409 : DISK_DRIVE_SECTOR_SIZE;
3410 : }
3411 : }
3412 :
3413 :
3414 : /**
3415 : Callback to find first log in directory.
3416 : */
3417 :
3418 : static my_bool translog_callback_search_first(const char *directory
3419 : __attribute__((unused)),
3420 : const char *filename
3421 : __attribute__((unused)))
3422 0 : {
3423 0 : return TRUE;
3424 : }
3425 :
3426 :
3427 : /**
3428 : @brief Checks that chunk is LSN one
3429 :
3430 : @param type type of the chunk
3431 :
3432 : @retval 1 the chunk is LNS
3433 : @retval 0 the chunk is not LSN
3434 : */
3435 :
3436 : static my_bool translog_is_LSN_chunk(uchar type)
3437 16646342 : {
3438 16646342 : DBUG_ENTER("translog_is_LSN_chunk");
3439 16646342 : DBUG_PRINT("info", ("byte: %x chunk type: %u record type: %u",
3440 : type, type >> 6, type & TRANSLOG_REC_TYPE));
3441 16646342 : DBUG_RETURN(((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_FIXED) ||
3442 : (((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_LSN) &&
3443 : ((type & TRANSLOG_REC_TYPE)) != TRANSLOG_CHUNK_0_CONT));
3444 : }
3445 :
3446 :
3447 : /**
3448 : @brief Initialize transaction log
3449 :
3450 : @param directory Directory where log files are put
3451 : @param log_file_max_size max size of one log size (for new logs creation)
3452 : @param server_version version of MySQL server (MYSQL_VERSION_ID)
3453 : @param server_id server ID (replication & Co)
3454 : @param pagecache Page cache for the log reads
3455 : @param flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
3456 : TRANSLOG_RECORD_CRC)
3457 : @param read_only Put transaction log in read-only mode
3458 : @param init_table_func function to initialize record descriptors table
3459 : @param no_errors suppress messages about non-critical errors
3460 :
3461 : @todo
3462 : Free used resources in case of error.
3463 :
3464 : @retval 0 OK
3465 : @retval 1 Error
3466 : */
3467 :
3468 : my_bool translog_init_with_table(const char *directory,
3469 : uint32 log_file_max_size,
3470 : uint32 server_version,
3471 : uint32 server_id, PAGECACHE *pagecache,
3472 : uint flags, my_bool readonly,
3473 : void (*init_table_func)(),
3474 : my_bool no_errors)
3475 798 : {
3476 : int i;
3477 798 : int old_log_was_recovered= 0, logs_found= 0;
3478 798 : uint old_flags= flags;
3479 798 : uint32 start_file_num= 1;
3480 : TRANSLOG_ADDRESS sure_page, last_page, last_valid_page, checkpoint_lsn;
3481 798 : my_bool version_changed= 0;
3482 798 : DBUG_ENTER("translog_init_with_table");
3483 :
3484 798 : id_to_share= NULL;
3485 798 : log_descriptor.directory_fd= -1;
3486 798 : log_descriptor.is_everything_flushed= 1;
3487 798 : log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
3488 :
3489 798 : (*init_table_func)();
3490 : compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >=
3491 : TRANSLOG_BUFFERS_NO);
3492 798 : log_descriptor.dirty_buffer_mask= 0;
3493 798 : if (readonly)
3494 1 : log_descriptor.open_flags= O_BINARY | O_RDONLY;
3495 : else
3496 797 : log_descriptor.open_flags= O_BINARY | O_RDWR;
3497 798 : if (pthread_mutex_init(&log_descriptor.sent_to_disk_lock,
3498 : MY_MUTEX_INIT_FAST) ||
3499 : pthread_mutex_init(&log_descriptor.file_header_lock,
3500 : MY_MUTEX_INIT_FAST) ||
3501 : pthread_mutex_init(&log_descriptor.unfinished_files_lock,
3502 : MY_MUTEX_INIT_FAST) ||
3503 : pthread_mutex_init(&log_descriptor.purger_lock,
3504 : MY_MUTEX_INIT_FAST) ||
3505 : pthread_mutex_init(&log_descriptor.log_flush_lock,
3506 : MY_MUTEX_INIT_FAST) ||
3507 : pthread_mutex_init(&log_descriptor.dirty_buffer_mask_lock,
3508 : MY_MUTEX_INIT_FAST) ||
3509 : pthread_cond_init(&log_descriptor.log_flush_cond, 0) ||
3510 : my_rwlock_init(&log_descriptor.open_files_lock,
3511 : NULL) ||
3512 : my_init_dynamic_array(&log_descriptor.open_files,
3513 : sizeof(TRANSLOG_FILE*), 10, 10) ||
3514 : my_init_dynamic_array(&log_descriptor.unfinished_files,
3515 : sizeof(struct st_file_counter),
3516 : 10, 10))
3517 : goto err;
3518 798 : log_descriptor.min_need_file= 0;
3519 798 : log_descriptor.min_file_number= 0;
3520 798 : log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
3521 :
3522 : /* Directory to store files */
3523 798 : unpack_dirname(log_descriptor.directory, directory);
3524 : #ifndef __WIN__
3525 798 : if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
3526 : O_RDONLY, MYF(MY_WME))) < 0)
3527 : {
3528 0 : my_errno= errno;
3529 0 : DBUG_PRINT("error", ("Error %d during opening directory '%s'",
3530 : errno, log_descriptor.directory));
3531 0 : goto err;
3532 : }
3533 : #endif
3534 798 : log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
3535 798 : DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
3536 : log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
3537 : /* max size of one log size (for new logs creation) */
3538 798 : log_file_size= log_descriptor.log_file_max_size=
3539 : log_file_max_size;
3540 : /* server version */
3541 798 : log_descriptor.server_version= server_version;
3542 : /* server ID */
3543 798 : log_descriptor.server_id= server_id;
3544 : /* Page cache for the log reads */
3545 798 : log_descriptor.pagecache= pagecache;
3546 : /* Flags */
3547 798 : DBUG_ASSERT((flags &
3548 : ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
3549 : TRANSLOG_RECORD_CRC)) == 0);
3550 798 : log_descriptor.flags= flags;
3551 798 : translog_fill_overhead_table();
3552 798 : log_descriptor.page_overhead= page_overhead[flags];
3553 798 : log_descriptor.page_capacity_chunk_2=
3554 : TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
3555 : compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
3556 798 : log_descriptor.buffer_capacity_chunk_2=
3557 : (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
3558 : log_descriptor.page_capacity_chunk_2;
3559 798 : log_descriptor.half_buffer_capacity_chunk_2=
3560 : log_descriptor.buffer_capacity_chunk_2 / 2;
3561 798 : DBUG_PRINT("info",
3562 : ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u",
3563 : log_descriptor.page_overhead,
3564 : log_descriptor.page_capacity_chunk_2,
3565 : log_descriptor.buffer_capacity_chunk_2,
3566 : log_descriptor.half_buffer_capacity_chunk_2));
3567 :
3568 : /* Just to init it somehow (hack for bootstrap)*/
3569 : {
3570 798 : TRANSLOG_FILE *file= 0;
3571 798 : log_descriptor.min_file = log_descriptor.max_file= 1;
3572 798 : insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3573 798 : translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3574 798 : pop_dynamic(&log_descriptor.open_files);
3575 : }
3576 :
3577 : /* Buffers for log writing */
3578 7182 : for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
3579 : {
3580 6384 : if (translog_buffer_init(log_descriptor.buffers + i, i))
3581 6384 : goto err;
3582 6384 : DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx",
3583 : i, (ulong) log_descriptor.buffers + i));
3584 : }
3585 :
3586 : /*
3587 : last_logno and last_checkpoint_lsn were set in
3588 : ma_control_file_create_or_open()
3589 : */
3590 798 : logs_found= (last_logno != FILENO_IMPOSSIBLE);
3591 :
3592 798 : translog_status= (readonly ? TRANSLOG_READONLY : TRANSLOG_OK);
3593 798 : checkpoint_lsn= last_checkpoint_lsn;
3594 :
3595 798 : if (logs_found)
3596 : {
3597 : my_bool pageok;
3598 357 : DBUG_PRINT("info", ("log found..."));
3599 : /*
3600 : TODO: scan directory for maria_log.XXXXXXXX files and find
3601 : highest XXXXXXXX & set logs_found
3602 : TODO: check that last checkpoint within present log addresses space
3603 :
3604 : find the log end
3605 : */
3606 357 : if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
3607 : {
3608 213 : DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
3609 : /* only last log needs to be checked */
3610 213 : sure_page= MAKE_LSN(last_logno, TRANSLOG_PAGE_SIZE);
3611 : }
3612 : else
3613 : {
3614 144 : sure_page= last_checkpoint_lsn;
3615 144 : DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
3616 144 : sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
3617 : }
3618 : /* Set horizon to the beginning of the last file first */
3619 357 : log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
3620 357 : if (translog_get_last_page_addr(&last_page, &pageok, no_errors))
3621 : {
3622 1 : if (!translog_walk_filenames(log_descriptor.directory,
3623 : &translog_callback_search_first))
3624 : {
3625 : /*
3626 : Files was deleted, just start from the next log number, so that
3627 : existing tables are in the past.
3628 : */
3629 1 : start_file_num= last_logno + 1;
3630 1 : checkpoint_lsn= LSN_IMPOSSIBLE; /* no log so no checkpoint */
3631 1 : logs_found= 0;
3632 : }
3633 : else
3634 1 : goto err;
3635 : }
3636 356 : else if (LSN_OFFSET(last_page) == 0)
3637 : {
3638 0 : if (LSN_FILE_NO(last_page) == 1)
3639 : {
3640 0 : logs_found= 0; /* file #1 has no pages */
3641 0 : DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
3642 : }
3643 : else
3644 : {
3645 0 : last_page-= LSN_ONE_FILE;
3646 0 : if (translog_get_last_page_addr(&last_page, &pageok, 0))
3647 357 : goto err;
3648 : }
3649 : }
3650 357 : if (logs_found)
3651 : {
3652 : uint32 i;
3653 356 : log_descriptor.min_file= translog_first_file(log_descriptor.horizon, 1);
3654 356 : log_descriptor.max_file= last_logno;
3655 : /* Open all files */
3656 356 : if (allocate_dynamic(&log_descriptor.open_files,
3657 : log_descriptor.max_file -
3658 : log_descriptor.min_file + 1))
3659 356 : goto err;
3660 712 : for (i = log_descriptor.max_file; i >= log_descriptor.min_file; i--)
3661 : {
3662 : /*
3663 : We can't allocate all file together because they will be freed
3664 : one by one
3665 : */
3666 356 : TRANSLOG_FILE *file= (TRANSLOG_FILE *)my_malloc(sizeof(TRANSLOG_FILE),
3667 : MYF(0));
3668 :
3669 : compile_time_assert(MY_FILEPOS_ERROR > ULL(0xffffffff));
3670 356 : if (file == NULL ||
3671 : (file->handler.file=
3672 : open_logfile_by_number_no_cache(i)) < 0 ||
3673 : my_seek(file->handler.file, 0, SEEK_END, MYF(0)) >=
3674 : ULL(0xffffffff))
3675 : {
3676 : int j;
3677 0 : for (j= i - log_descriptor.min_file - 1; j > 0; j--)
3678 : {
3679 : TRANSLOG_FILE *el=
3680 0 : *dynamic_element(&log_descriptor.open_files, j,
3681 : TRANSLOG_FILE **);
3682 0 : my_close(el->handler.file, MYF(MY_WME));
3683 0 : my_free(el, MYF(0));
3684 : }
3685 0 : if (file)
3686 : {
3687 0 : free(file);
3688 0 : goto err;
3689 : }
3690 : else
3691 0 : goto err;
3692 : }
3693 356 : translog_file_init(file, i, 1);
3694 : /* we allocated space so it can't fail */
3695 356 : insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3696 : }
3697 356 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
3698 : log_descriptor.open_files.elements);
3699 : }
3700 : }
3701 441 : else if (readonly)
3702 : {
3703 : /* There is no logs and there is read-only mode => nothing to read */
3704 0 : DBUG_PRINT("error", ("No logs and read-only mode"));
3705 0 : goto err;
3706 : }
3707 :
3708 798 : if (logs_found)
3709 : {
3710 356 : TRANSLOG_ADDRESS current_page= sure_page;
3711 : my_bool pageok;
3712 :
3713 356 : DBUG_PRINT("info", ("The log is really present"));
3714 356 : DBUG_ASSERT(sure_page <= last_page);
3715 :
3716 : /* TODO: check page size */
3717 :
3718 356 : last_valid_page= LSN_IMPOSSIBLE;
3719 : /*
3720 : Scans and validate pages. We need it to show "outside" only for sure
3721 : valid part of the log. If the log was damaged then fixed we have to
3722 : cut off damaged part before some other process start write something
3723 : in the log.
3724 : */
3725 : do
3726 : {
3727 : TRANSLOG_ADDRESS current_file_last_page;
3728 356 : current_file_last_page= current_page;
3729 356 : if (translog_get_last_page_addr(¤t_file_last_page, &pageok, 0))
3730 356 : goto err;
3731 356 : if (!pageok)
3732 : {
3733 0 : DBUG_PRINT("error", ("File %lu have no complete last page",
3734 : (ulong) LSN_FILE_NO(current_file_last_page)));
3735 0 : old_log_was_recovered= 1;
3736 : /* This file is not written till the end so it should be last */
3737 0 : last_page= current_file_last_page;
3738 : /* TODO: issue warning */
3739 : }
3740 : do
3741 : {
3742 : TRANSLOG_VALIDATOR_DATA data;
3743 : TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3744 : uchar *page;
3745 186286 : data.addr= ¤t_page;
3746 186286 : if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
3747 186286 : goto err;
3748 186286 : if (data.was_recovered)
3749 : {
3750 0 : DBUG_PRINT("error", ("file no: %lu (%d) "
3751 : "rec_offset: 0x%lx (%lu) (%d)",
3752 : (ulong) LSN_FILE_NO(current_page),
3753 : (uint3korr(page + 3) !=
3754 : LSN_FILE_NO(current_page)),
3755 : (ulong) LSN_OFFSET(current_page),
3756 : (ulong) (LSN_OFFSET(current_page) /
3757 : TRANSLOG_PAGE_SIZE),
3758 : (uint3korr(page) !=
3759 : LSN_OFFSET(current_page) /
3760 : TRANSLOG_PAGE_SIZE)));
3761 0 : old_log_was_recovered= 1;
3762 0 : break;
3763 : }
3764 186286 : old_flags= page[TRANSLOG_PAGE_FLAGS];
3765 186286 : last_valid_page= current_page;
3766 186286 : current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
3767 186286 : } while (current_page <= current_file_last_page);
3768 356 : current_page+= LSN_ONE_FILE;
3769 356 : current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
3770 : } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
3771 356 : !old_log_was_recovered);
3772 356 : if (last_valid_page == LSN_IMPOSSIBLE)
3773 : {
3774 : /* Panic!!! Even page which should be valid is invalid */
3775 : /* TODO: issue error */
3776 356 : goto err;
3777 : }
3778 356 : DBUG_PRINT("info", ("Last valid page is in file: %lu "
3779 : "offset: %lu (0x%lx) "
3780 : "Logs found: %d was recovered: %d "
3781 : "flags match: %d",
3782 : (ulong) LSN_FILE_NO(last_valid_page),
3783 : (ulong) LSN_OFFSET(last_valid_page),
3784 : (ulong) LSN_OFFSET(last_valid_page),
3785 : logs_found, old_log_was_recovered,
3786 : (old_flags == flags)));
3787 :
3788 : /* TODO: check server ID */
3789 356 : if (logs_found && !old_log_was_recovered && old_flags == flags)
3790 : {
3791 : TRANSLOG_VALIDATOR_DATA data;
3792 : TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3793 : uchar *page;
3794 : uint16 chunk_offset;
3795 356 : data.addr= &last_valid_page;
3796 : /* continue old log */
3797 356 : DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
3798 : LSN_FILE_NO(log_descriptor.horizon));
3799 356 : if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
3800 : (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
3801 : goto err;
3802 :
3803 : /* Puts filled part of old page in the buffer */
3804 356 : log_descriptor.horizon= last_valid_page;
3805 356 : translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3806 : /*
3807 : Free space if filled with TRANSLOG_FILLER and first uchar of
3808 : real chunk can't be TRANSLOG_FILLER
3809 : */
3810 31929 : while (chunk_offset < TRANSLOG_PAGE_SIZE &&
3811 : page[chunk_offset] != TRANSLOG_FILLER)
3812 : {
3813 : uint16 chunk_length;
3814 31217 : if ((chunk_length=
3815 : translog_get_total_chunk_length(page, chunk_offset)) == 0)
3816 31217 : goto err;
3817 31217 : DBUG_PRINT("info", ("chunk: offset: %u length: %u",
3818 : (uint) chunk_offset, (uint) chunk_length));
3819 31217 : chunk_offset+= chunk_length;
3820 :
3821 : /* chunk can't cross the page border */
3822 31217 : DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
3823 : }
3824 356 : memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
3825 356 : log_descriptor.bc.buffer->size+= chunk_offset;
3826 356 : log_descriptor.bc.ptr+= chunk_offset;
3827 356 : log_descriptor.bc.current_page_fill= chunk_offset;
3828 356 : log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
3829 : (chunk_offset +
3830 : LSN_OFFSET(last_valid_page)));
3831 356 : DBUG_PRINT("info", ("Move Page #%u: 0x%lx chaser: %d Size: %lu (%lu)",
3832 : (uint) log_descriptor.bc.buffer_no,
3833 : (ulong) log_descriptor.bc.buffer,
3834 : log_descriptor.bc.chaser,
3835 : (ulong) log_descriptor.bc.buffer->size,
3836 : (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
3837 : buffer->buffer)));
3838 356 : translog_check_cursor(&log_descriptor.bc);
3839 : }
3840 356 : if (!old_log_was_recovered && old_flags == flags)
3841 : {
3842 : LOGHANDLER_FILE_INFO info;
3843 : /*
3844 : Accessing &log_descriptor.open_files without mutex is safe
3845 : because it is initialization
3846 : */
3847 356 : if (translog_read_file_header(&info,
3848 : (*dynamic_element(&log_descriptor.
3849 : open_files,
3850 : 0, TRANSLOG_FILE **))->
3851 : handler.file))
3852 356 : goto err;
3853 356 : version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
3854 : }
3855 : }
3856 798 : DBUG_PRINT("info", ("Logs found: %d was recovered: %d",
3857 : logs_found, old_log_was_recovered));
3858 798 : if (!logs_found)
3859 : {
3860 442 : TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
3861 : MYF(0));
3862 442 : DBUG_PRINT("info", ("The log is not found => we will create new log"));
3863 442 : if (file == NULL)
3864 442 : goto err;
3865 : /* Start new log system from scratch */
3866 442 : log_descriptor.horizon= MAKE_LSN(start_file_num,
3867 : TRANSLOG_PAGE_SIZE); /* header page */
3868 442 : if ((file->handler.file=
3869 : create_logfile_by_number_no_cache(start_file_num)) == -1)
3870 442 : goto err;
3871 442 : translog_file_init(file, start_file_num, 0);
3872 442 : if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
3873 442 : goto err;
3874 442 : log_descriptor.min_file= log_descriptor.max_file= start_file_num;
3875 442 : if (translog_write_file_header())
3876 442 : goto err;
3877 442 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
3878 : log_descriptor.open_files.elements);
3879 :
3880 442 : if (ma_control_file_write_and_force(checkpoint_lsn, start_file_num,
3881 : max_trid_in_control_file,
3882 : recovery_failures))
3883 442 : goto err;
3884 : /* assign buffer 0 */
3885 442 : translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3886 442 : translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
3887 : }
3888 356 : else if ((old_log_was_recovered || old_flags != flags || version_changed) &&
3889 : !readonly)
3890 : {
3891 : /* leave the damaged file untouched */
3892 0 : log_descriptor.horizon+= LSN_ONE_FILE;
3893 : /* header page */
3894 0 : log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
3895 : TRANSLOG_PAGE_SIZE);
3896 0 : if (translog_create_new_file())
3897 0 : goto err;
3898 : /*
3899 : Buffer system left untouched after recovery => we should init it
3900 : (starting from buffer 0)
3901 : */
3902 0 : translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3903 0 : translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
3904 : }
3905 :
3906 : /* all LSNs that are on disk are flushed */
3907 798 : log_descriptor.log_start= log_descriptor.sent_to_disk=
3908 : log_descriptor.flushed= log_descriptor.horizon;
3909 798 : log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
3910 798 : log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
3911 798 : log_descriptor.previous_flush_horizon= log_descriptor.horizon;
3912 : /*
3913 : Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
3914 : address of the next LSN and we want indicate that all LSNs that are
3915 : already on the disk are flushed so we need decrease horizon on 1 (we are
3916 : sure that there is no LSN on the disk which is greater then 'flushed'
3917 : and there will not be LSN created that is equal or less then the value
3918 : of the 'flushed').
3919 : */
3920 798 : log_descriptor.flushed--; /* offset decreased */
3921 798 : log_descriptor.sent_to_disk--; /* offset decreased */
3922 : /*
3923 : Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
3924 : structures for generating 2-byte ids:
3925 : */
3926 : my_atomic_rwlock_init(&LOCK_id_to_share);
3927 798 : id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*),
3928 : MYF(MY_WME | MY_ZEROFILL));
3929 798 : if (unlikely(!id_to_share))
3930 798 : goto err;
3931 798 : id_to_share--; /* min id is 1 */
3932 :
3933 : /* Check the last LSN record integrity */
3934 798 : if (logs_found)
3935 : {
3936 : TRANSLOG_SCANNER_DATA scanner;
3937 : TRANSLOG_ADDRESS page_addr;
3938 356 : LSN last_lsn= LSN_IMPOSSIBLE;
3939 : /*
3940 : take very last page address and try to find LSN record on it
3941 : if it fail take address of previous page and so on
3942 : */
3943 356 : page_addr= (log_descriptor.horizon -
3944 : ((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
3945 356 : if (translog_scanner_init(page_addr, 1, &scanner, 1))
3946 356 : goto err;
3947 356 : scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
3948 : for (;;)
3949 : {
3950 : uint chunk_1byte;
3951 32179 : chunk_1byte= scanner.page[scanner.page_offset];
3952 64586 : while (!translog_is_LSN_chunk(chunk_1byte) &&
3953 : scanner.page != END_OF_LOG &&
3954 : scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
3955 : scanner.page_addr == page_addr)
3956 : {
3957 228 : if (translog_get_next_chunk(&scanner))
3958 : {
3959 0 : translog_destroy_scanner(&scanner);
3960 0 : goto err;
3961 : }
3962 228 : if (scanner.page != END_OF_LOG)
3963 221 : chunk_1byte= scanner.page[scanner.page_offset];
3964 : }
3965 32179 : if (translog_is_LSN_chunk(chunk_1byte))
3966 : {
3967 32108 : last_lsn= scanner.page_addr + scanner.page_offset;
3968 32108 : if (translog_get_next_chunk(&scanner))
3969 : {
3970 0 : translog_destroy_scanner(&scanner);
3971 0 : goto err;
3972 : }
3973 32108 : if (scanner.page == END_OF_LOG)
3974 31786 : break; /* it was the last record */
3975 31786 : chunk_1byte= scanner.page[scanner.page_offset];
3976 31786 : continue; /* try to find other record on this page */
3977 : }
3978 :
3979 71 : if (last_lsn != LSN_IMPOSSIBLE)
3980 64 : break; /* there is no more records on the page */
3981 :
3982 : /* We have to make step back */
3983 64 : if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
3984 : {
3985 27 : uint32 file_no= LSN_FILE_NO(page_addr);
3986 : my_bool last_page_ok;
3987 : /* it is beginning of the current file */
3988 27 : if (unlikely(file_no == 1))
3989 : {
3990 : /*
3991 : It is beginning of the log => there is no LSNs in the log =>
3992 : There is no harm in leaving it "as-is".
3993 : */
3994 27 : DBUG_RETURN(0);
3995 : }
3996 0 : file_no--;
3997 0 : page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
3998 0 : translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
3999 : /* page should be OK as it is not the last file */
4000 0 : DBUG_ASSERT(last_page_ok);
4001 : }
4002 : else
4003 : {
4004 37 : page_addr-= TRANSLOG_PAGE_SIZE;
4005 : }
4006 37 : translog_destroy_scanner(&scanner);
4007 37 : if (translog_scanner_init(page_addr, 1, &scanner, 1))
4008 37 : goto err;
4009 37 : scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
4010 : }
4011 329 : translog_destroy_scanner(&scanner);
4012 :
4013 : /* Now scanner points to the last LSN chunk, lets check it */
4014 : {
4015 : TRANSLOG_HEADER_BUFFER rec;
4016 : translog_size_t rec_len;
4017 : int len;
4018 : uchar buffer[1];
4019 329 : DBUG_PRINT("info", ("going to check the last found record (%lu,0x%lx)",
4020 : LSN_IN_PARTS(last_lsn)));
4021 :
4022 329 : len=
4023 : translog_read_record_header(last_lsn, &rec);
4024 329 : if (unlikely (len == RECHEADER_READ_ERROR ||
4025 : len == RECHEADER_READ_EOF))
4026 : {
4027 0 : DBUG_PRINT("error", ("unexpected end of log or record during "
4028 : "reading record header: (%lu,0x%lx) len: %d",
4029 : LSN_IN_PARTS(last_lsn), len));
4030 0 : if (readonly)
4031 0 : log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4032 0 : else if (translog_truncate_log(last_lsn))
4033 : {
4034 0 : translog_free_record_header(&rec);
4035 0 : goto err;
4036 : }
4037 : }
4038 : else
4039 : {
4040 329 : DBUG_ASSERT(last_lsn == rec.lsn);
4041 329 : if (likely(rec.record_length != 0))
4042 : {
4043 : /*
4044 : Reading the last byte of record will trigger scanning all
4045 : record chunks for now
4046 : */
4047 257 : rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
4048 : buffer, NULL);
4049 257 : if (rec_len != 1)
4050 : {
4051 0 : DBUG_PRINT("error", ("unexpected end of log or record during "
4052 : "reading record body: (%lu,0x%lx) len: %d",
4053 : LSN_IN_PARTS(rec.lsn),
4054 : len));
4055 0 : if (readonly)
4056 0 : log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4057 :
4058 0 : else if (translog_truncate_log(last_lsn))
4059 : {
4060 0 : translog_free_record_header(&rec);
4061 0 : goto err;
4062 : }
4063 : }
4064 : }
4065 : }
4066 329 : translog_free_record_header(&rec);
4067 : }
4068 : }
4069 771 : DBUG_RETURN(0);
4070 0 : err:
4071 0 : ma_message_no_user(0, "log initialization failed");
4072 0 : DBUG_RETURN(1);
4073 : }
4074 :
4075 :
4076 : /*
4077 : @brief Free transaction log file buffer.
4078 :
4079 : @param buffer_no The buffer to free
4080 : */
4081 :
4082 : static void translog_buffer_destroy(struct st_translog_buffer *buffer)
4083 5592 : {
4084 5592 : DBUG_ENTER("translog_buffer_destroy");
4085 5592 : DBUG_PRINT("enter",
4086 : ("Buffer #%u: 0x%lx file: %d offset: (%lu,0x%lx) size: %lu",
4087 : (uint) buffer->buffer_no, (ulong) buffer,
4088 : (buffer->file ? buffer->file->handler.file : -1),
4089 : LSN_IN_PARTS(buffer->offset),
4090 : (ulong) buffer->size));
4091 5592 : if (buffer->file != NULL)
4092 : {
4093 : /*
4094 : We ignore errors here, because we can't do something about it
4095 : (it is shutting down)
4096 :
4097 : We also have to take the locks even if there can't be any other
4098 : threads running, because translog_buffer_flush()
4099 : requires that we have the buffer locked.
4100 : */
4101 699 : translog_buffer_lock(buffer);
4102 699 : translog_buffer_flush(buffer);
4103 699 : translog_buffer_unlock(buffer);
4104 : }
4105 5592 : DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex));
4106 5592 : pthread_mutex_destroy(&buffer->mutex);
4107 5592 : pthread_cond_destroy(&buffer->waiting_filling_buffer);
4108 5592 : DBUG_VOID_RETURN;
4109 : }
4110 :
4111 :
4112 : /*
4113 : Free log handler resources
4114 :
4115 : SYNOPSIS
4116 : translog_destroy()
4117 : */
4118 :
4119 : void translog_destroy()
4120 699 : {
4121 : TRANSLOG_FILE **file;
4122 : uint i;
4123 : uint8 current_buffer;
4124 699 : DBUG_ENTER("translog_destroy");
4125 :
4126 699 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
4127 : translog_status == TRANSLOG_READONLY);
4128 699 : translog_lock();
4129 699 : current_buffer= log_descriptor.bc.buffer_no;
4130 699 : translog_status= (translog_status == TRANSLOG_READONLY ?
4131 : TRANSLOG_UNINITED :
4132 : TRANSLOG_SHUTDOWN);
4133 699 : if (log_descriptor.bc.buffer->file != NULL)
4134 699 : translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
4135 699 : translog_unlock();
4136 :
4137 6291 : for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
4138 : {
4139 : struct st_translog_buffer *buffer= (log_descriptor.buffers +
4140 : ((i + current_buffer + 1) %
4141 5592 : TRANSLOG_BUFFERS_NO));
4142 5592 : translog_buffer_destroy(buffer);
4143 : }
4144 699 : translog_status= TRANSLOG_UNINITED;
4145 :
4146 : /* close files */
4147 2118 : while ((file= (TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files)))
4148 720 : translog_close_log_file(*file);
4149 699 : pthread_mutex_destroy(&log_descriptor.sent_to_disk_lock);
4150 699 : pthread_mutex_destroy(&log_descriptor.file_header_lock);
4151 699 : pthread_mutex_destroy(&log_descriptor.unfinished_files_lock);
4152 699 : pthread_mutex_destroy(&log_descriptor.purger_lock);
4153 699 : pthread_mutex_destroy(&log_descriptor.log_flush_lock);
4154 699 : pthread_mutex_destroy(&log_descriptor.dirty_buffer_mask_lock);
4155 699 : pthread_cond_destroy(&log_descriptor.log_flush_cond);
4156 699 : rwlock_destroy(&log_descriptor.open_files_lock);
4157 699 : delete_dynamic(&log_descriptor.open_files);
4158 699 : delete_dynamic(&log_descriptor.unfinished_files);
4159 :
4160 699 : if (log_descriptor.directory_fd >= 0)
4161 699 : my_close(log_descriptor.directory_fd, MYF(MY_WME));
4162 : my_atomic_rwlock_destroy(&LOCK_id_to_share);
4163 699 : if (id_to_share != NULL)
4164 699 : my_free((id_to_share + 1), MYF(MY_WME));
4165 699 : DBUG_VOID_RETURN;
4166 : }
4167 :
4168 :
4169 : /*
4170 : @brief Starts new page.
4171 :
4172 : @param horizon \ Position in file and buffer where we are
4173 : @param cursor /
4174 : @param prev_buffer Buffer which should be flushed will be assigned here.
4175 : This is always set (to NULL if nothing to flush).
4176 :
4177 : @note We do not want to flush the buffer immediately because we want to
4178 : let caller of this function first advance 'horizon' pointer and unlock the
4179 : loghandler and only then flush the log which can take some time.
4180 :
4181 : @retval 0 OK
4182 : @retval 1 Error
4183 : */
4184 :
4185 : static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
4186 : struct st_buffer_cursor *cursor,
4187 : struct st_translog_buffer **prev_buffer)
4188 191838 : {
4189 191838 : struct st_translog_buffer *buffer= cursor->buffer;
4190 191838 : DBUG_ENTER("translog_page_next");
4191 :
4192 191838 : *prev_buffer= NULL;
4193 192958 : if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
4194 : cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
4195 : (LSN_OFFSET(*horizon) >
4196 : log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
4197 : {
4198 1120 : DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d "
4199 : "File size: %lu max: %lu => %d",
4200 : (ulong) cursor->buffer->size,
4201 : (ulong) (cursor->ptr - cursor->buffer->buffer),
4202 : (cursor->ptr + TRANSLOG_PAGE_SIZE >
4203 : cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
4204 : (ulong) LSN_OFFSET(*horizon),
4205 : (ulong) log_descriptor.log_file_max_size,
4206 : (LSN_OFFSET(*horizon) >
4207 : (log_descriptor.log_file_max_size -
4208 : TRANSLOG_PAGE_SIZE))));
4209 1120 : if (translog_buffer_next(horizon, cursor,
4210 : LSN_OFFSET(*horizon) >
4211 : (log_descriptor.log_file_max_size -
4212 : TRANSLOG_PAGE_SIZE)))
4213 0 : DBUG_RETURN(1);
4214 1120 : *prev_buffer= buffer;
4215 1120 : DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed",
4216 : (uint) buffer->buffer_no, (ulong) buffer));
4217 : }
4218 : else
4219 : {
4220 190718 : DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): "
4221 : "Buffer Size: %lu (%lu)",
4222 : (uint) buffer->buffer_no,
4223 : (ulong) buffer,
4224 : (ulong) cursor->buffer->size,
4225 : (ulong) (cursor->ptr - cursor->buffer->buffer)));
4226 190718 : translog_finish_page(horizon, cursor);
4227 190718 : translog_new_page_header(horizon, cursor);
4228 : }
4229 191838 : DBUG_RETURN(0);
4230 : }
4231 :
4232 :
4233 : /*
4234 : Write data of given length to the current page
4235 :
4236 : SYNOPSIS
4237 : translog_write_data_on_page()
4238 : horizon \ Pointers on file and buffer
4239 : cursor /
4240 : length IN length of the chunk
4241 : buffer buffer with data
4242 :
4243 : RETURN
4244 : 0 OK
4245 : 1 Error
4246 : */
4247 :
4248 : static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
4249 : struct st_buffer_cursor *cursor,
4250 : translog_size_t length,
4251 : uchar *buffer)
4252 162747 : {
4253 162747 : DBUG_ENTER("translog_write_data_on_page");
4254 162747 : DBUG_PRINT("enter", ("Chunk length: %lu Page size %u",
4255 : (ulong) length, (uint) cursor->current_page_fill));
4256 162747 : DBUG_ASSERT(length > 0);
4257 162747 : DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4258 162747 : DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4259 : TRANSLOG_WRITE_BUFFER);
4260 :
4261 162747 : memcpy(cursor->ptr, buffer, length);
4262 162747 : cursor->ptr+= length;
4263 162747 : (*horizon)+= length; /* adds offset */
4264 162747 : cursor->current_page_fill+= length;
4265 162747 : if (!cursor->chaser)
4266 0 : cursor->buffer->size+= length;
4267 162747 : DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx "
4268 : "chaser: %d Size: %lu (%lu)",
4269 : (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
4270 : cursor->chaser, (ulong) cursor->buffer->size,
4271 : (ulong) (cursor->ptr - cursor->buffer->buffer)));
4272 162747 : translog_check_cursor(cursor);
4273 :
4274 162747 : DBUG_RETURN(0);
4275 : }
4276 :
4277 :
4278 : /*
4279 : Write data from parts of given length to the current page
4280 :
4281 : SYNOPSIS
4282 : translog_write_parts_on_page()
4283 : horizon \ Pointers on file and buffer
4284 : cursor /
4285 : length IN length of the chunk
4286 : parts IN/OUT chunk source
4287 :
4288 : RETURN
4289 : 0 OK
4290 : 1 Error
4291 : */
4292 :
4293 : static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
4294 : struct st_buffer_cursor *cursor,
4295 : translog_size_t length,
4296 : struct st_translog_parts *parts)
4297 6886083 : {
4298 6886083 : translog_size_t left= length;
4299 6886083 : uint cur= (uint) parts->current;
4300 6886083 : DBUG_ENTER("translog_write_parts_on_page");
4301 6886083 : DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u "
4302 : "Buffer size: %lu (%lu)",
4303 : (ulong) length,
4304 : (uint) (cur + 1), (uint) parts->elements,
4305 : (uint) cursor->current_page_fill,
4306 : (ulong) cursor->buffer->size,
4307 : (ulong) (cursor->ptr - cursor->buffer->buffer)));
4308 6886083 : DBUG_ASSERT(length > 0);
4309 6886083 : DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4310 6886083 : DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4311 : TRANSLOG_WRITE_BUFFER);
4312 :
4313 : do
4314 : {
4315 : translog_size_t len;
4316 : LEX_CUSTRING *part;
4317 : const uchar *buff;
4318 :
4319 19563775 : DBUG_ASSERT(cur < parts->elements);
4320 19563775 : part= parts->parts + cur;
4321 19563775 : buff= part->str;
4322 19563775 : DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: 0x%lx",
4323 : (uint) (cur + 1), (ulong) part->length, (ulong) left,
4324 : (ulong) buff));
4325 :
4326 19563775 : if (part->length > left)
4327 : {
4328 : /* we should write less then the current part */
4329 181690 : len= left;
4330 181690 : part->length-= len;
4331 181690 : part->str+= len;
4332 181690 : DBUG_PRINT("info", ("Set new part: %u Length: %lu",
4333 : (uint) (cur + 1), (ulong) part->length));
4334 : }
4335 : else
4336 : {
4337 19382085 : len= (translog_size_t) part->length;
4338 19382085 : cur++;
4339 19382085 : DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
4340 : }
4341 19563775 : DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u",
4342 : (ulong) cursor->ptr, (ulong)buff, (uint)len));
4343 19563775 : if (likely(len))
4344 : {
4345 19436211 : memcpy(cursor->ptr, buff, len);
4346 19436211 : left-= len;
4347 19436211 : cursor->ptr+= len;
4348 : }
4349 19563775 : } while (left);
4350 :
4351 6886083 : DBUG_PRINT("info", ("Horizon: (%lu,0x%lx) Length %lu(0x%lx)",
4352 : LSN_IN_PARTS(*horizon),
4353 : (ulong) length, (ulong) length));
4354 6886083 : parts->current= cur;
4355 6886083 : (*horizon)+= length; /* offset increasing */
4356 6886083 : cursor->current_page_fill+= length;
4357 6886083 : if (!cursor->chaser)
4358 6683726 : cursor->buffer->size+= length;
4359 : /*
4360 : We do not not updating parts->total_record_length here because it is
4361 : need only before writing record to have total length
4362 : */
4363 6886083 : DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx "
4364 : "chaser: %d Size: %lu (%lu) "
4365 : "Horizon: (%lu,0x%lx) buff offset: 0x%lx",
4366 : (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
4367 : cursor->chaser, (ulong) cursor->buffer->size,
4368 : (ulong) (cursor->ptr - cursor->buffer->buffer),
4369 : LSN_IN_PARTS(*horizon),
4370 : (ulong) (LSN_OFFSET(cursor->buffer->offset) +
4371 : cursor->buffer->size)));
4372 6886083 : translog_check_cursor(cursor);
4373 :
4374 6886083 : DBUG_RETURN(0);
4375 : }
4376 :
4377 :
4378 : /*
4379 : Put 1 group chunk type 0 header into parts array
4380 :
4381 : SYNOPSIS
4382 : translog_write_variable_record_1group_header()
4383 : parts Descriptor of record source parts
4384 : type The log record type
4385 : short_trid Short transaction ID or 0 if it has no sense
4386 : header_length Calculated header length of chunk type 0
4387 : chunk0_header Buffer for the chunk header writing
4388 : */
4389 :
4390 : static void
4391 : translog_write_variable_record_1group_header(struct st_translog_parts *parts,
4392 : enum translog_record_type type,
4393 : SHORT_TRANSACTION_ID short_trid,
4394 : uint16 header_length,
4395 : uchar *chunk0_header)
4396 3305395 : {
4397 : LEX_CUSTRING *part;
4398 3305395 : DBUG_ASSERT(parts->current != 0); /* first part is left for header */
4399 3305395 : part= parts->parts + (--parts->current);
4400 3305395 : parts->total_record_length+= (translog_size_t) (part->length= header_length);
4401 3305395 : part->str= chunk0_header;
4402 : /* puts chunk type */
4403 3305395 : *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
4404 3305395 : int2store(chunk0_header + 1, short_trid);
4405 : /* puts record length */
4406 3305395 : translog_write_variable_record_1group_code_len(chunk0_header + 3,
4407 : parts->record_length,
4408 : header_length);
4409 : /* puts 0 as chunk length which indicate 1 group record */
4410 3305395 : int2store(chunk0_header + header_length - 2, 0);
4411 : }
4412 :
4413 :
4414 : /*
4415 : Increase number of writers for this buffer
4416 :
4417 : SYNOPSIS
4418 : translog_buffer_increase_writers()
4419 : buffer target buffer
4420 : */
4421 :
4422 : static inline void
4423 : translog_buffer_increase_writers(struct st_translog_buffer *buffer)
4424 41023 : {
4425 41023 : DBUG_ENTER("translog_buffer_increase_writers");
4426 41023 : translog_buffer_lock_assert_owner(buffer);
4427 41023 : buffer->copy_to_buffer_in_progress++;
4428 41023 : DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx progress: %d",
4429 : (uint) buffer->buffer_no, (ulong) buffer,
4430 : buffer->copy_to_buffer_in_progress));
4431 41023 : DBUG_VOID_RETURN;
4432 : }
4433 :
4434 :
4435 : /*
4436 : Decrease number of writers for this buffer
4437 :
4438 : SYNOPSIS
4439 : translog_buffer_decrease_writers()
4440 : buffer target buffer
4441 : */
4442 :
4443 : static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
4444 41023 : {
4445 41023 : DBUG_ENTER("translog_buffer_decrease_writers");
4446 41023 : translog_buffer_lock_assert_owner(buffer);
4447 41023 : buffer->copy_to_buffer_in_progress--;
4448 41023 : DBUG_PRINT("info",
4449 : ("copy_to_buffer_in_progress. Buffer #%u 0x%lx progress: %d",
4450 : (uint) buffer->buffer_no, (ulong) buffer,
4451 : buffer->copy_to_buffer_in_progress));
4452 41023 : if (buffer->copy_to_buffer_in_progress == 0)
4453 41023 : pthread_cond_broadcast(&buffer->waiting_filling_buffer);
4454 41023 : DBUG_VOID_RETURN;
4455 : }
4456 :
4457 :
4458 : /**
4459 : @brief Skip to the next page for chaser (thread which advanced horizon
4460 : pointer and now feeling the buffer)
4461 :
4462 : @param horizon \ Pointers on file position and buffer
4463 : @param cursor /
4464 :
4465 : @retval 1 OK
4466 : @retval 0 Error
4467 : */
4468 :
4469 : static my_bool translog_chaser_page_next(TRANSLOG_ADDRESS *horizon,
4470 : struct st_buffer_cursor *cursor)
4471 182021 : {
4472 : struct st_translog_buffer *buffer_to_flush;
4473 : my_bool rc;
4474 182021 : DBUG_ENTER("translog_chaser_page_next");
4475 182021 : DBUG_ASSERT(cursor->chaser);
4476 182021 : rc= translog_page_next(horizon, cursor, &buffer_to_flush);
4477 182021 : if (buffer_to_flush != NULL)
4478 : {
4479 1090 : translog_buffer_lock(buffer_to_flush);
4480 1090 : translog_buffer_decrease_writers(buffer_to_flush);
4481 1090 : if (!rc)
4482 1090 : rc= translog_buffer_flush(buffer_to_flush);
4483 1090 : translog_buffer_unlock(buffer_to_flush);
4484 : }
4485 182021 : DBUG_RETURN(rc);
4486 : }
4487 :
4488 : /*
4489 : Put chunk 2 from new page beginning
4490 :
4491 : SYNOPSIS
4492 : translog_write_variable_record_chunk2_page()
4493 : parts Descriptor of record source parts
4494 : horizon \ Pointers on file position and buffer
4495 : cursor /
4496 :
4497 : RETURN
4498 : 0 OK
4499 : 1 Error
4500 : */
4501 :
4502 : static my_bool
4503 : translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
4504 : TRANSLOG_ADDRESS *horizon,
4505 : struct st_buffer_cursor *cursor)
4506 160624 : {
4507 : uchar chunk2_header[1];
4508 160624 : DBUG_ENTER("translog_write_variable_record_chunk2_page");
4509 160624 : chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
4510 :
4511 160624 : if (translog_chaser_page_next(horizon, cursor))
4512 0 : DBUG_RETURN(1);
4513 :
4514 : /* Puts chunk type */
4515 160624 : translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
4516 : /* Puts chunk body */
4517 160624 : translog_write_parts_on_page(horizon, cursor,
4518 : log_descriptor.page_capacity_chunk_2, parts);
4519 160624 : DBUG_RETURN(0);
4520 : }
4521 :
4522 :
4523 : /*
4524 : Put chunk 3 of requested length in the buffer from new page beginning
4525 :
4526 : SYNOPSIS
4527 : translog_write_variable_record_chunk3_page()
4528 : parts Descriptor of record source parts
4529 : length Length of this chunk
4530 : horizon \ Pointers on file position and buffer
4531 : cursor /
4532 :
4533 : RETURN
4534 : 0 OK
4535 : 1 Error
4536 : */
4537 :
4538 : static my_bool
4539 : translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
4540 : uint16 length,
4541 : TRANSLOG_ADDRESS *horizon,
4542 : struct st_buffer_cursor *cursor)
4543 20337 : {
4544 : LEX_CUSTRING *part;
4545 : uchar chunk3_header[1 + 2];
4546 20337 : DBUG_ENTER("translog_write_variable_record_chunk3_page");
4547 :
4548 20337 : if (translog_chaser_page_next(horizon, cursor))
4549 0 : DBUG_RETURN(1);
4550 :
4551 20337 : if (length == 0)
4552 : {
4553 : /* It was call to write page header only (no data for chunk 3) */
4554 0 : DBUG_PRINT("info", ("It is a call to make page header only"));
4555 0 : DBUG_RETURN(0);
4556 : }
4557 :
4558 20337 : DBUG_ASSERT(parts->current != 0); /* first part is left for header */
4559 20337 : part= parts->parts + (--parts->current);
4560 20337 : parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
4561 20337 : part->str= chunk3_header;
4562 : /* Puts chunk type */
4563 20337 : *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
4564 : /* Puts chunk length */
4565 20337 : int2store(chunk3_header + 1, length);
4566 :
4567 20337 : translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
4568 20337 : DBUG_RETURN(0);
4569 : }
4570 :
4571 : /*
4572 : Move log pointer (horizon) on given number pages starting from next page,
4573 : and given offset on the last page
4574 :
4575 : SYNOPSIS
4576 : translog_advance_pointer()
4577 : pages Number of full pages starting from the next one
4578 : last_page_data Plus this data on the last page
4579 :
4580 : RETURN
4581 : 0 OK
4582 : 1 Error
4583 : */
4584 :
4585 : static my_bool translog_advance_pointer(int pages, uint16 last_page_data)
4586 21395 : {
4587 : translog_size_t last_page_offset= (log_descriptor.page_overhead +
4588 21395 : last_page_data);
4589 : translog_size_t offset= (TRANSLOG_PAGE_SIZE -
4590 : log_descriptor.bc.current_page_fill +
4591 21395 : pages * TRANSLOG_PAGE_SIZE + last_page_offset);
4592 : translog_size_t buffer_end_offset, file_end_offset, min_offset;
4593 21395 : DBUG_ENTER("translog_advance_pointer");
4594 21395 : DBUG_PRINT("enter", ("Pointer: (%lu, 0x%lx) + %u + %u pages + %u + %u",
4595 : LSN_IN_PARTS(log_descriptor.horizon),
4596 : (uint) (TRANSLOG_PAGE_SIZE -
4597 : log_descriptor.bc.current_page_fill),
4598 : pages, (uint) log_descriptor.page_overhead,
4599 : (uint) last_page_data));
4600 21395 : translog_lock_assert_owner();
4601 :
4602 21395 : if (pages == -1)
4603 : {
4604 : /*
4605 : It is special case when we advance the pointer on the same page.
4606 : It can happened when we write last part of multi-group record.
4607 : */
4608 0 : DBUG_ASSERT(last_page_data + log_descriptor.bc.current_page_fill <=
4609 : TRANSLOG_PAGE_SIZE);
4610 0 : offset= last_page_data;
4611 0 : last_page_offset= log_descriptor.bc.current_page_fill + last_page_data;
4612 0 : goto end;
4613 : }
4614 21395 : DBUG_PRINT("info", ("last_page_offset %lu", (ulong) last_page_offset));
4615 21395 : DBUG_ASSERT(last_page_offset <= TRANSLOG_PAGE_SIZE);
4616 :
4617 : /*
4618 : The loop will be executed 1-3 times. Usually we advance the
4619 : pointer to fill only the current buffer (if we have more then 1/2 of
4620 : buffer free or 2 buffers (rest of current and all next). In case of
4621 : really huge record end where we write last group with "table of
4622 : content" of all groups and ignore buffer borders we can occupy
4623 : 3 buffers.
4624 : */
4625 : for (;;)
4626 : {
4627 : uint8 new_buffer_no;
4628 : struct st_translog_buffer *new_buffer;
4629 : struct st_translog_buffer *old_buffer;
4630 22485 : buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
4631 22485 : if (likely(log_descriptor.log_file_max_size >=
4632 : LSN_OFFSET(log_descriptor.horizon)))
4633 22485 : file_end_offset= (log_descriptor.log_file_max_size -
4634 : LSN_OFFSET(log_descriptor.horizon));
4635 : else
4636 : {
4637 : /*
4638 : We already have written more then current file limit allow,
4639 : So we will finish this page and start new file
4640 : */
4641 0 : file_end_offset= (TRANSLOG_PAGE_SIZE -
4642 : log_descriptor.bc.current_page_fill);
4643 : }
4644 22485 : DBUG_PRINT("info", ("offset: %lu buffer_end_offs: %lu, "
4645 : "file_end_offs: %lu",
4646 : (ulong) offset, (ulong) buffer_end_offset,
4647 : (ulong) file_end_offset));
4648 22485 : DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = "
4649 : "0x%lx (0x%lx)",
4650 : (uint) log_descriptor.bc.buffer->buffer_no,
4651 : (uint) log_descriptor.bc.buffer_no,
4652 : (ulong) log_descriptor.bc.buffer,
4653 : (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset),
4654 : (ulong) log_descriptor.bc.buffer->size,
4655 : (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4656 : log_descriptor.bc.buffer->size),
4657 : (ulong) LSN_OFFSET(log_descriptor.horizon)));
4658 22485 : DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4659 : log_descriptor.bc.buffer->size ==
4660 : LSN_OFFSET(log_descriptor.horizon));
4661 :
4662 22485 : if (offset <= buffer_end_offset && offset <= file_end_offset)
4663 1090 : break;
4664 1090 : old_buffer= log_descriptor.bc.buffer;
4665 1090 : new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
4666 1090 : new_buffer= log_descriptor.buffers + new_buffer_no;
4667 :
4668 1090 : translog_buffer_lock(new_buffer);
4669 : #ifndef DBUG_OFF
4670 : {
4671 1090 : TRANSLOG_ADDRESS offset= new_buffer->offset;
4672 1090 : TRANSLOG_FILE *file= new_buffer->file;
4673 1090 : uint8 ver= new_buffer->ver;
4674 1090 : translog_lock_assert_owner();
4675 : #endif
4676 1090 : translog_wait_for_buffer_free(new_buffer);
4677 : #ifndef DBUG_OFF
4678 : /* We keep the handler locked so nobody can start this new buffer */
4679 1090 : DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
4680 : (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
4681 : }
4682 : #endif
4683 :
4684 1090 : min_offset= min(buffer_end_offset, file_end_offset);
4685 : /* TODO: check is it ptr or size enough */
4686 1090 : log_descriptor.bc.buffer->size+= min_offset;
4687 1090 : log_descriptor.bc.ptr+= min_offset;
4688 1090 : DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)",
4689 : (uint) log_descriptor.bc.buffer->buffer_no,
4690 : (ulong) log_descriptor.bc.buffer,
4691 : log_descriptor.bc.chaser,
4692 : (ulong) log_descriptor.bc.buffer->size,
4693 : (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
4694 : buffer->buffer)));
4695 1090 : DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
4696 : log_descriptor.bc.buffer->buffer) ==
4697 : log_descriptor.bc.buffer->size);
4698 1090 : DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
4699 : log_descriptor.bc.buffer_no);
4700 1090 : translog_buffer_increase_writers(log_descriptor.bc.buffer);
4701 :
4702 1090 : if (file_end_offset <= buffer_end_offset)
4703 : {
4704 20 : log_descriptor.horizon+= LSN_ONE_FILE;
4705 20 : log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
4706 : TRANSLOG_PAGE_SIZE);
4707 20 : DBUG_PRINT("info", ("New file: %lu",
4708 : (ulong) LSN_FILE_NO(log_descriptor.horizon)));
4709 20 : if (translog_create_new_file())
4710 : {
4711 0 : DBUG_RETURN(1);
4712 : }
4713 : }
4714 : else
4715 : {
4716 1070 : DBUG_PRINT("info", ("The same file"));
4717 1070 : log_descriptor.horizon+= min_offset; /* offset increasing */
4718 : }
4719 1090 : translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
4720 1090 : old_buffer->next_buffer_offset= new_buffer->offset;
4721 1090 : new_buffer->prev_buffer_offset= old_buffer->offset;
4722 1090 : translog_buffer_unlock(old_buffer);
4723 1090 : offset-= min_offset;
4724 1090 : }
4725 21395 : DBUG_PRINT("info", ("drop write_counter"));
4726 21395 : log_descriptor.bc.write_counter= 0;
4727 21395 : log_descriptor.bc.previous_offset= 0;
4728 21395 : end:
4729 21395 : log_descriptor.bc.ptr+= offset;
4730 21395 : log_descriptor.bc.buffer->size+= offset;
4731 21395 : translog_buffer_increase_writers(log_descriptor.bc.buffer);
4732 21395 : log_descriptor.horizon+= offset; /* offset increasing */
4733 21395 : log_descriptor.bc.current_page_fill= last_page_offset;
4734 21395 : DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) "
4735 : "offset: %u last page: %u",
4736 : (uint) log_descriptor.bc.buffer->buffer_no,
4737 : (ulong) log_descriptor.bc.buffer,
4738 : log_descriptor.bc.chaser,
4739 : (ulong) log_descriptor.bc.buffer->size,
4740 : (ulong) (log_descriptor.bc.ptr -
4741 : log_descriptor.bc.buffer->
4742 : buffer), (uint) offset,
4743 : (uint) last_page_offset));
4744 21395 : DBUG_PRINT("info",
4745 : ("pointer moved to: (%lu, 0x%lx)",
4746 : LSN_IN_PARTS(log_descriptor.horizon)));
4747 21395 : translog_check_cursor(&log_descriptor.bc);
4748 21395 : log_descriptor.bc.protected= 0;
4749 21395 : DBUG_RETURN(0);
4750 : }
4751 :
4752 :
4753 : /*
4754 : Get page rest
4755 :
4756 : SYNOPSIS
4757 : translog_get_current_page_rest()
4758 :
4759 : NOTE loghandler should be locked
4760 :
4761 : RETURN
4762 : number of bytes left on the current page
4763 : */
4764 :
4765 : static uint translog_get_current_page_rest()
4766 21395 : {
4767 21395 : return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
4768 : }
4769 :
4770 :
4771 : /*
4772 : Get buffer rest in full pages
4773 :
4774 : SYNOPSIS
4775 : translog_get_current_buffer_rest()
4776 :
4777 : NOTE loghandler should be locked
4778 :
4779 : RETURN
4780 : number of full pages left on the current buffer
4781 : */
4782 :
4783 : static uint translog_get_current_buffer_rest()
4784 21395 : {
4785 21395 : return ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
4786 : log_descriptor.bc.ptr) /
4787 : TRANSLOG_PAGE_SIZE);
4788 : }
4789 :
4790 : /*
4791 : Calculate possible group size without first (current) page
4792 :
4793 : SYNOPSIS
4794 : translog_get_current_group_size()
4795 :
4796 : NOTE loghandler should be locked
4797 :
4798 : RETURN
4799 : group size without first (current) page
4800 : */
4801 :
4802 : static translog_size_t translog_get_current_group_size()
4803 21395 : {
4804 : /* buffer rest in full pages */
4805 21395 : translog_size_t buffer_rest= translog_get_current_buffer_rest();
4806 21395 : DBUG_ENTER("translog_get_current_group_size");
4807 21395 : DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
4808 :
4809 21395 : buffer_rest*= log_descriptor.page_capacity_chunk_2;
4810 : /* in case of only half of buffer free we can write this and next buffer */
4811 21395 : if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
4812 : {
4813 1010 : DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
4814 : (ulong) buffer_rest,
4815 : (ulong) log_descriptor.buffer_capacity_chunk_2));
4816 1010 : buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
4817 : }
4818 :
4819 21395 : DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
4820 :
4821 21395 : DBUG_RETURN(buffer_rest);
4822 : }
4823 :
4824 :
4825 : static inline void set_lsn(LSN *lsn, LSN value)
4826 6704064 : {
4827 6704064 : DBUG_ENTER("set_lsn");
4828 6704064 : translog_lock_assert_owner();
4829 6704064 : *lsn= value;
4830 : /* we generate LSN so something is not flushed in log */
4831 6704064 : log_descriptor.is_everything_flushed= 0;
4832 6704064 : DBUG_PRINT("info", ("new LSN appeared: (%lu,0x%lx)", LSN_IN_PARTS(value)));
4833 6704064 : DBUG_VOID_RETURN;
4834 : }
4835 :
4836 :
4837 : /**
4838 : @brief Write variable record in 1 group.
4839 :
4840 : @param lsn LSN of the record will be written here
4841 : @param type the log record type
4842 : @param short_trid Short transaction ID or 0 if it has no sense
4843 : @param parts Descriptor of record source parts
4844 : @param buffer_to_flush Buffer which have to be flushed if it is not 0
4845 : @param header_length Calculated header length of chunk type 0
4846 : @param trn Transaction structure pointer for hooks by
4847 : record log type, for short_id
4848 : @param hook_arg Argument which will be passed to pre-write and
4849 : in-write hooks of this record.
4850 :
4851 : @note
4852 : We must have a translog_lock() when entering this function
4853 : We must have buffer_to_flush locked (if not null)
4854 :
4855 : @return Operation status
4856 : @retval 0 OK
4857 : @retval 1 Error
4858 : */
4859 :
4860 : static my_bool
4861 : translog_write_variable_record_1group(LSN *lsn,
4862 : enum translog_record_type type,
4863 : MARIA_HA *tbl_info,
4864 : SHORT_TRANSACTION_ID short_trid,
4865 : struct st_translog_parts *parts,
4866 : struct st_translog_buffer
4867 : *buffer_to_flush, uint16 header_length,
4868 : TRN *trn, void *hook_arg)
4869 20335 : {
4870 : TRANSLOG_ADDRESS horizon;
4871 : struct st_buffer_cursor cursor;
4872 20335 : int rc= 0;
4873 : uint i;
4874 : translog_size_t record_rest, full_pages, first_page;
4875 20335 : uint additional_chunk3_page= 0;
4876 : uchar chunk0_header[1 + 2 + 5 + 2];
4877 20335 : DBUG_ENTER("translog_write_variable_record_1group");
4878 20335 : translog_lock_assert_owner();
4879 20335 : if (buffer_to_flush)
4880 0 : translog_buffer_lock_assert_owner(buffer_to_flush);
4881 :
4882 20335 : set_lsn(lsn, horizon= log_descriptor.horizon);
4883 20335 : if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
4884 : *lsn, TRUE) ||
4885 : (log_record_type_descriptor[type].inwrite_hook &&
4886 : (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
4887 : lsn, hook_arg)))
4888 : {
4889 0 : translog_unlock();
4890 0 : DBUG_RETURN(1);
4891 : }
4892 20335 : cursor= log_descriptor.bc;
4893 20335 : cursor.chaser= 1;
4894 :
4895 : /* Advance pointer to be able unlock the loghandler */
4896 20335 : first_page= translog_get_current_page_rest();
4897 20335 : record_rest= parts->record_length - (first_page - header_length);
4898 20335 : full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
4899 20335 : record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
4900 :
4901 20335 : if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
4902 : {
4903 0 : DBUG_PRINT("info", ("2 chunks type 3 is needed"));
4904 : /* We will write 2 chunks type 3 at the end of this group */
4905 0 : additional_chunk3_page= 1;
4906 0 : record_rest= 1;
4907 : }
4908 :
4909 20335 : DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) "
4910 : "additional: %u (%u) rest %u = %u",
4911 : first_page, first_page - header_length,
4912 : full_pages,
4913 : (ulong) full_pages *
4914 : log_descriptor.page_capacity_chunk_2,
4915 : additional_chunk3_page,
4916 : additional_chunk3_page *
4917 : (log_descriptor.page_capacity_chunk_2 - 1),
4918 : record_rest, parts->record_length));
4919 : /* record_rest + 3 is chunk type 3 overhead + record_rest */
4920 20335 : rc|= translog_advance_pointer((int)(full_pages + additional_chunk3_page),
4921 : (record_rest ? record_rest + 3 : 0));
4922 20335 : log_descriptor.bc.buffer->last_lsn= *lsn;
4923 20335 : DBUG_PRINT("info", ("last_lsn set to (%lu,0x%lx) buffer: 0x%lx",
4924 : LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
4925 : (ulong) log_descriptor.bc.buffer));
4926 :
4927 20335 : translog_unlock();
4928 :
4929 : /*
4930 : Check if we switched buffer and need process it (current buffer is
4931 : unlocked already => we will not delay other threads
4932 : */
4933 20335 : if (buffer_to_flush != NULL)
4934 : {
4935 0 : if (!rc)
4936 0 : rc= translog_buffer_flush(buffer_to_flush);
4937 0 : translog_buffer_unlock(buffer_to_flush);
4938 : }
4939 20335 : if (rc)
4940 0 : DBUG_RETURN(1);
4941 :
4942 20335 : translog_write_variable_record_1group_header(parts, type, short_trid,
4943 : header_length, chunk0_header);
4944 :
4945 : /* fill the pages */
4946 20335 : translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
4947 :
4948 20335 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
4949 : LSN_IN_PARTS(log_descriptor.horizon),
4950 : LSN_IN_PARTS(horizon)));
4951 :
4952 46694 : for (i= 0; i < full_pages; i++)
4953 : {
4954 26359 : if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
4955 0 : DBUG_RETURN(1);
4956 :
4957 26359 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
4958 : LSN_IN_PARTS(log_descriptor.horizon),
4959 : LSN_IN_PARTS(horizon)));
4960 : }
4961 :
4962 20335 : if (additional_chunk3_page)
4963 : {
4964 0 : if (translog_write_variable_record_chunk3_page(parts,
4965 : log_descriptor.
4966 : page_capacity_chunk_2 - 2,
4967 : &horizon, &cursor))
4968 0 : DBUG_RETURN(1);
4969 0 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
4970 : LSN_IN_PARTS(log_descriptor.horizon),
4971 : LSN_IN_PARTS(horizon)));
4972 0 : DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
4973 : }
4974 :
4975 20335 : if (translog_write_variable_record_chunk3_page(parts,
4976 : record_rest,
4977 : &horizon, &cursor))
4978 0 : DBUG_RETURN(1);
4979 20335 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
4980 : (ulong) LSN_FILE_NO(log_descriptor.horizon),
4981 : (ulong) LSN_OFFSET(log_descriptor.horizon),
4982 : (ulong) LSN_FILE_NO(horizon),
4983 : (ulong) LSN_OFFSET(horizon)));
4984 :
4985 20335 : translog_buffer_lock(cursor.buffer);
4986 20335 : translog_buffer_decrease_writers(cursor.buffer);
4987 20335 : translog_buffer_unlock(cursor.buffer);
4988 20335 : DBUG_RETURN(rc);
4989 : }
4990 :
4991 :
4992 : /**
4993 : @brief Write variable record in 1 chunk.
4994 :
4995 : @param lsn LSN of the record will be written here
4996 : @param type the log record type
4997 : @param short_trid Short transaction ID or 0 if it has no sense
4998 : @param parts Descriptor of record source parts
4999 : @param buffer_to_flush Buffer which have to be flushed if it is not 0
5000 : @param header_length Calculated header length of chunk type 0
5001 : @param trn Transaction structure pointer for hooks by
5002 : record log type, for short_id
5003 : @param hook_arg Argument which will be passed to pre-write and
5004 : in-write hooks of this record.
5005 :
5006 : @note
5007 : We must have a translog_lock() when entering this function
5008 : We must have buffer_to_flush locked (if not null)
5009 :
5010 : @return Operation status
5011 : @retval 0 OK
5012 : @retval 1 Error
5013 : */
5014 :
5015 : static my_bool
5016 : translog_write_variable_record_1chunk(LSN *lsn,
5017 : enum translog_record_type type,
5018 : MARIA_HA *tbl_info,
5019 : SHORT_TRANSACTION_ID short_trid,
5020 : struct st_translog_parts *parts,
5021 : struct st_translog_buffer
5022 : *buffer_to_flush, uint16 header_length,
5023 : TRN *trn, void *hook_arg)
5024 3285060 : {
5025 : int rc;
5026 : uchar chunk0_header[1 + 2 + 5 + 2];
5027 3285060 : DBUG_ENTER("translog_write_variable_record_1chunk");
5028 3285060 : translog_lock_assert_owner();
5029 3285060 : if (buffer_to_flush)
5030 2 : translog_buffer_lock_assert_owner(buffer_to_flush);
5031 :
5032 3285060 : translog_write_variable_record_1group_header(parts, type, short_trid,
5033 : header_length, chunk0_header);
5034 3285060 : set_lsn(lsn, log_descriptor.horizon);
5035 3285060 : if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
5036 : *lsn, TRUE) ||
5037 : (log_record_type_descriptor[type].inwrite_hook &&
5038 : (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
5039 : lsn, hook_arg)))
5040 : {
5041 0 : translog_unlock();
5042 0 : DBUG_RETURN(1);
5043 : }
5044 :
5045 3285060 : rc= translog_write_parts_on_page(&log_descriptor.horizon,
5046 : &log_descriptor.bc,
5047 : parts->total_record_length, parts);
5048 3285060 : log_descriptor.bc.buffer->last_lsn= *lsn;
5049 3285060 : DBUG_PRINT("info", ("last_lsn set to (%lu,0x%lx) buffer: 0x%lx",
5050 : LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
5051 : (ulong) log_descriptor.bc.buffer));
5052 3285060 : translog_unlock();
5053 :
5054 : /*
5055 : check if we switched buffer and need process it (current buffer is
5056 : unlocked already => we will not delay other threads
5057 : */
5058 3285060 : if (buffer_to_flush != NULL)
5059 : {
5060 2 : if (!rc)
5061 2 : rc= translog_buffer_flush(buffer_to_flush);
5062 2 : translog_buffer_unlock(buffer_to_flush);
5063 : }
5064 :
5065 3285060 : DBUG_RETURN(rc);
5066 : }
5067 :
5068 :
5069 : /*
5070 : @brief Calculates and write LSN difference (compressed LSN).
5071 :
5072 : @param base_lsn LSN from which we calculate difference
5073 : @param lsn LSN for codding
5074 : @param dst Result will be written to dst[-pack_length] .. dst[-1]
5075 :
5076 : @note To store an LSN in a compact way we will use the following compression:
5077 : If a log record has LSN1, and it contains the LSN2 as a back reference,
5078 : Instead of LSN2 we write LSN1-LSN2, encoded as:
5079 : two bits the number N (see below)
5080 : 14 bits
5081 : N bytes
5082 : That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5083 : is stored in the first two bits.
5084 :
5085 : @note function made to write the result in backward direction with no
5086 : special sense or tricks both directions are equal in complicity
5087 :
5088 : @retval # pointer on coded LSN
5089 : */
5090 :
5091 : static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
5092 1709762 : {
5093 : uint64 diff;
5094 1709762 : DBUG_ENTER("translog_put_LSN_diff");
5095 1709762 : DBUG_PRINT("enter", ("Base: (%lu,0x%lx) val: (%lu,0x%lx) dst: 0x%lx",
5096 : LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
5097 : (ulong) dst));
5098 1709762 : DBUG_ASSERT(base_lsn > lsn);
5099 1709762 : diff= base_lsn - lsn;
5100 1709762 : DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
5101 1709762 : if (diff <= 0x3FFF)
5102 : {
5103 1364585 : dst-= 2;
5104 : /*
5105 : Note we store this high uchar first to ensure that first uchar has
5106 : 0 in the 3 upper bits.
5107 : */
5108 1364585 : dst[0]= (uchar)(diff >> 8);
5109 1364585 : dst[1]= (uchar)(diff & 0xFF);
5110 : }
5111 345177 : else if (diff <= 0x3FFFFFL)
5112 : {
5113 276336 : dst-= 3;
5114 276336 : dst[0]= (uchar)(0x40 | (diff >> 16));
5115 276336 : int2store(dst + 1, diff & 0xFFFF);
5116 : }
5117 68841 : else if (diff <= 0x3FFFFFFFL)
5118 : {
5119 66536 : dst-= 4;
5120 66536 : dst[0]= (uchar)(0x80 | (diff >> 24));
5121 66536 : int3store(dst + 1, diff & 0xFFFFFFL);
5122 : }
5123 2305 : else if (diff <= LL(0x3FFFFFFFFF))
5124 :
5125 : {
5126 2305 : dst-= 5;
5127 2305 : dst[0]= (uchar)(0xC0 | (diff >> 32));
5128 2305 : int4store(dst + 1, diff & 0xFFFFFFFFL);
5129 : }
5130 : else
5131 : {
5132 : /*
5133 : It is full LSN after special 1 diff (which is impossible
5134 : in real life)
5135 : */
5136 0 : dst-= 2 + LSN_STORE_SIZE;
5137 0 : dst[0]= 0;
5138 0 : dst[1]= 1;
5139 0 : lsn_store(dst + 2, lsn);
5140 : }
5141 1709762 : DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst));
5142 1709762 : DBUG_RETURN(dst);
5143 : }
5144 :
5145 :
5146 : /*
5147 : Get LSN from LSN-difference (compressed LSN)
5148 :
5149 : SYNOPSIS
5150 : translog_get_LSN_from_diff()
5151 : base_lsn LSN from which we calculate difference
5152 : src pointer to coded lsn
5153 : dst pointer to buffer where to write 7byte LSN
5154 :
5155 : NOTE:
5156 : To store an LSN in a compact way we will use the following compression:
5157 :
5158 : If a log record has LSN1, and it contains the lSN2 as a back reference,
5159 : Instead of LSN2 we write LSN1-LSN2, encoded as:
5160 :
5161 : two bits the number N (see below)
5162 : 14 bits
5163 : N bytes
5164 :
5165 : That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5166 : is stored in the first two bits.
5167 :
5168 : RETURN
5169 : pointer to buffer after decoded LSN
5170 : */
5171 :
5172 : static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
5173 5174978 : {
5174 : LSN lsn;
5175 : uint32 diff;
5176 : uint32 first_byte;
5177 : uint32 file_no, rec_offset;
5178 : uint8 code;
5179 5174978 : DBUG_ENTER("translog_get_LSN_from_diff");
5180 5174978 : DBUG_PRINT("enter", ("Base: (%lu,0x%lx) src: 0x%lx dst 0x%lx",
5181 : LSN_IN_PARTS(base_lsn), (ulong) src, (ulong) dst));
5182 5174978 : first_byte= *((uint8*) src);
5183 5174978 : code= first_byte >> 6; /* Length is in 2 most significant bits */
5184 5174978 : first_byte&= 0x3F;
5185 5174978 : src++; /* Skip length + encode */
5186 5174978 : file_no= LSN_FILE_NO(base_lsn); /* Assume relative */
5187 5174978 : DBUG_PRINT("info", ("code: %u first byte: %lu",
5188 : (uint) code, (ulong) first_byte));
5189 5174978 : switch (code) {
5190 : case 0:
5191 3165037 : if (first_byte == 0 && *((uint8*)src) == 1)
5192 : {
5193 : /*
5194 : It is full LSN after special 1 diff (which is impossible
5195 : in real life)
5196 : */
5197 0 : memcpy(dst, src + 1, LSN_STORE_SIZE);
5198 0 : DBUG_PRINT("info", ("Special case of full LSN, new src: 0x%lx",
5199 : (ulong) (src + 1 + LSN_STORE_SIZE)));
5200 0 : DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
5201 : }
5202 3165037 : rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
5203 3165037 : break;
5204 : case 1:
5205 1605926 : diff= uint2korr(src);
5206 1605926 : rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
5207 1605926 : break;
5208 : case 2:
5209 399027 : diff= uint3korr(src);
5210 399027 : rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
5211 399027 : break;
5212 : case 3:
5213 : {
5214 4988 : ulonglong base_offset= LSN_OFFSET(base_lsn);
5215 4988 : diff= uint4korr(src);
5216 4988 : if (diff > LSN_OFFSET(base_lsn))
5217 : {
5218 : /* take 1 from file offset */
5219 36 : first_byte++;
5220 36 : base_offset+= LL(0x100000000);
5221 : }
5222 4988 : file_no= LSN_FILE_NO(base_lsn) - first_byte;
5223 4988 : DBUG_ASSERT(base_offset - diff <= UINT_MAX);
5224 4988 : rec_offset= (uint32)(base_offset - diff);
5225 4988 : break;
5226 : }
5227 : default:
5228 0 : DBUG_ASSERT(0);
5229 : DBUG_RETURN(NULL);
5230 : }
5231 5174978 : lsn= MAKE_LSN(file_no, rec_offset);
5232 5174978 : src+= code + 1;
5233 5174978 : lsn_store(dst, lsn);
5234 5174978 : DBUG_PRINT("info", ("new src: 0x%lx", (ulong) src));
5235 5174978 : DBUG_RETURN(src);
5236 : }
5237 :
5238 :
5239 : /**
5240 : @brief Encodes relative LSNs listed in the parameters.
5241 :
5242 : @param parts Parts list with encoded LSN(s)
5243 : @param base_lsn LSN which is base for encoding
5244 : @param lsns number of LSN(s) to encode
5245 : @param compressed_LSNs buffer which can be used for storing compressed LSN(s)
5246 : */
5247 :
5248 : static void translog_relative_LSN_encode(struct st_translog_parts *parts,
5249 : LSN base_lsn,
5250 : uint lsns, uchar *compressed_LSNs)
5251 1708164 : {
5252 : LEX_CUSTRING *part;
5253 1708164 : uint lsns_len= lsns * LSN_STORE_SIZE;
5254 : uchar buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
5255 1708164 : uchar *buffer= buffer_src;
5256 : const uchar *cbuffer;
5257 :
5258 1708164 : DBUG_ENTER("translog_relative_LSN_encode");
5259 :
5260 1708164 : DBUG_ASSERT(parts->current != 0);
5261 1708164 : part= parts->parts + parts->current;
5262 :
5263 : /* collect all LSN(s) in one chunk if it (they) is (are) divided */
5264 1708164 : if (part->length < lsns_len)
5265 : {
5266 0 : uint copied= part->length;
5267 : LEX_CUSTRING *next_part;
5268 0 : DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs));
5269 0 : memcpy(buffer, part->str, part->length);
5270 0 : next_part= parts->parts + parts->current + 1;
5271 : do
5272 : {
5273 0 : DBUG_ASSERT(next_part < parts->parts + parts->elements);
5274 0 : if ((next_part->length + copied) < lsns_len)
5275 : {
5276 0 : memcpy(buffer + copied, next_part->str,
5277 : next_part->length);
5278 0 : copied+= next_part->length;
5279 0 : next_part->length= 0; next_part->str= 0;
5280 : /* delete_dynamic_element(&parts->parts, parts->current + 1); */
5281 0 : next_part++;
5282 0 : parts->current++;
5283 0 : part= parts->parts + parts->current;
5284 : }
5285 : else
5286 : {
5287 0 : uint len= lsns_len - copied;
5288 0 : memcpy(buffer + copied, next_part->str, len);
5289 0 : copied= lsns_len;
5290 0 : next_part->str+= len;
5291 0 : next_part->length-= len;
5292 : }
5293 0 : } while (copied < lsns_len);
5294 0 : cbuffer= buffer;
5295 : }
5296 : else
5297 : {
5298 1708164 : cbuffer= part->str;
5299 1708164 : part->str+= lsns_len;
5300 1708164 : part->length-= lsns_len;
5301 1708164 : parts->current--;
5302 1708164 : part= parts->parts + parts->current;
5303 : }
5304 :
5305 : {
5306 : /* Compress */
5307 : LSN ref;
5308 : int economy;
5309 : const uchar *src_ptr;
5310 : uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
5311 1708164 : COMPRESSED_LSN_MAX_STORE_SIZE);
5312 : /*
5313 : We write the result in backward direction with no special sense or
5314 : tricks both directions are equal in complicity
5315 : */
5316 1708164 : for (src_ptr= cbuffer + lsns_len - LSN_STORE_SIZE;
5317 5126090 : src_ptr >= (const uchar*)cbuffer;
5318 1709762 : src_ptr-= LSN_STORE_SIZE)
5319 : {
5320 1709762 : ref= lsn_korr(src_ptr);
5321 1709762 : dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
5322 : }
5323 1708164 : part->length= (uint)((compressed_LSNs +
5324 : (MAX_NUMBER_OF_LSNS_PER_RECORD *
5325 : COMPRESSED_LSN_MAX_STORE_SIZE)) -
5326 : dst_ptr);
5327 1708164 : parts->record_length-= (economy= lsns_len - part->length);
5328 1708164 : DBUG_PRINT("info", ("new length of LSNs: %lu economy: %d",
5329 : (ulong)part->length, economy));
5330 1708164 : parts->total_record_length-= economy;
5331 1708164 : part->str= dst_ptr;
5332 : }
5333 1708164 : DBUG_VOID_RETURN;
5334 : }
5335 :
5336 :
5337 : /**
5338 : @brief Write multi-group variable-size record.
5339 :
5340 : @param lsn LSN of the record will be written here
5341 : @param type the log record type
5342 : @param short_trid Short transaction ID or 0 if it has no sense
5343 : @param parts Descriptor of record source parts
5344 : @param buffer_to_flush Buffer which have to be flushed if it is not 0
5345 : @param header_length Header length calculated for 1 group
5346 : @param buffer_rest Beginning from which we plan to write in full pages
5347 : @param trn Transaction structure pointer for hooks by
5348 : record log type, for short_id
5349 : @param hook_arg Argument which will be passed to pre-write and
5350 : in-write hooks of this record.
5351 :
5352 : @note
5353 : We must have a translog_lock() when entering this function
5354 :
5355 : We must have buffer_to_flush locked (if not null)
5356 : buffer_to_flush should *NOT* be locked when calling this function.
5357 : (This is note is here as this is different from most other
5358 : translog_write...() functions which require the buffer to be locked)
5359 :
5360 : @return Operation status
5361 : @retval 0 OK
5362 : @retval 1 Error
5363 : */
5364 :
5365 : static my_bool
5366 : translog_write_variable_record_mgroup(LSN *lsn,
5367 : enum translog_record_type type,
5368 : MARIA_HA *tbl_info,
5369 : SHORT_TRANSACTION_ID short_trid,
5370 : struct st_translog_parts *parts,
5371 : struct st_translog_buffer
5372 : *buffer_to_flush,
5373 : uint16 header_length,
5374 : translog_size_t buffer_rest,
5375 : TRN *trn, void *hook_arg)
5376 3 : {
5377 : TRANSLOG_ADDRESS horizon;
5378 : struct st_buffer_cursor cursor;
5379 3 : int rc= 0;
5380 : uint i, chunk2_page, full_pages;
5381 3 : uint curr_group= 0;
5382 3 : translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
5383 3 : translog_size_t done= 0;
5384 : struct st_translog_group_descriptor group;
5385 : DYNAMIC_ARRAY groups;
5386 : uint16 chunk3_size;
5387 3 : uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
5388 : uint16 last_page_capacity;
5389 3 : my_bool new_page_before_chunk0= 1, first_chunk0= 1;
5390 : uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
5391 : uchar chunk2_header[1];
5392 3 : uint header_fixed_part= header_length + 2;
5393 3 : uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
5394 : uint file_of_the_first_group;
5395 : int pages_to_skip;
5396 : struct st_translog_buffer *buffer_of_last_lsn;
5397 3 : DBUG_ENTER("translog_write_variable_record_mgroup");
5398 3 : translog_lock_assert_owner();
5399 :
5400 3 : chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
5401 :
5402 3 : if (my_init_dynamic_array(&groups,
5403 : sizeof(struct st_translog_group_descriptor),
5404 : 10, 10))
5405 : {
5406 0 : translog_unlock();
5407 0 : DBUG_PRINT("error", ("init array failed"));
5408 0 : DBUG_RETURN(1);
5409 : }
5410 :
5411 3 : first_page= translog_get_current_page_rest();
5412 3 : record_rest= parts->record_length - (first_page - 1);
5413 3 : DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
5414 :
5415 3 : if (record_rest < buffer_rest)
5416 : {
5417 : /*
5418 : The record (group 1 type) is larger than the free space on the page
5419 : - we need to split it in two. But when we split it in two, the first
5420 : part is big enough to hold all the data of the record (because the
5421 : header of the first part of the split is smaller than the header of
5422 : the record as a whole when it takes only one chunk)
5423 : */
5424 0 : DBUG_PRINT("info", ("too many free space because changing header"));
5425 0 : buffer_rest-= log_descriptor.page_capacity_chunk_2;
5426 0 : DBUG_ASSERT(record_rest >= buffer_rest);
5427 : }
5428 :
5429 3 : file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
5430 3 : translog_mark_file_unfinished(file_of_the_first_group);
5431 : do
5432 : {
5433 1057 : group.addr= horizon= log_descriptor.horizon;
5434 1057 : cursor= log_descriptor.bc;
5435 1057 : cursor.chaser= 1;
5436 1057 : if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
5437 : {
5438 : /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
5439 0 : full_pages= 255;
5440 0 : buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
5441 : }
5442 : /*
5443 : group chunks =
5444 : full pages + first page (which actually can be full, too).
5445 : But here we assign number of chunks - 1
5446 : */
5447 1057 : group.num= full_pages;
5448 1057 : if (insert_dynamic(&groups, (uchar*) &group))
5449 : {
5450 0 : DBUG_PRINT("error", ("insert into array failed"));
5451 0 : goto err_unlock;
5452 : }
5453 :
5454 1057 : DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) "
5455 : "full_pages: %lu (%lu) "
5456 : "Left %lu",
5457 : groups.elements,
5458 : first_page, first_page - 1,
5459 : (ulong) full_pages,
5460 : (ulong) (full_pages *
5461 : log_descriptor.page_capacity_chunk_2),
5462 : (ulong)(parts->record_length - (first_page - 1 +
5463 : buffer_rest) -
5464 : done)));
5465 1057 : rc|= translog_advance_pointer((int)full_pages, 0);
5466 :
5467 1057 : translog_unlock();
5468 :
5469 1057 : if (buffer_to_flush != NULL)
5470 : {
5471 0 : translog_buffer_decrease_writers(buffer_to_flush);
5472 0 : if (!rc)
5473 0 : rc= translog_buffer_flush(buffer_to_flush);
5474 0 : translog_buffer_unlock(buffer_to_flush);
5475 0 : buffer_to_flush= NULL;
5476 : }
5477 1057 : if (rc)
5478 : {
5479 0 : DBUG_PRINT("error", ("flush of unlock buffer failed"));
5480 0 : goto err;
5481 : }
5482 :
5483 1057 : translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5484 1057 : translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5485 1057 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
5486 : "Left %lu",
5487 : LSN_IN_PARTS(log_descriptor.horizon),
5488 : LSN_IN_PARTS(horizon),
5489 : (ulong) (parts->record_length - (first_page - 1) -
5490 : done)));
5491 :
5492 135240 : for (i= 0; i < full_pages; i++)
5493 : {
5494 134183 : if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5495 134183 : goto err;
5496 :
5497 134183 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) "
5498 : "local: (%lu,0x%lx) "
5499 : "Left: %lu",
5500 : LSN_IN_PARTS(log_descriptor.horizon),
5501 : LSN_IN_PARTS(horizon),
5502 : (ulong) (parts->record_length - (first_page - 1) -
5503 : i * log_descriptor.page_capacity_chunk_2 -
5504 : done)));
5505 : }
5506 :
5507 1057 : done+= (first_page - 1 + buffer_rest);
5508 :
5509 1057 : if (translog_chaser_page_next(&horizon, &cursor))
5510 : {
5511 0 : DBUG_PRINT("error", ("flush of unlock buffer failed"));
5512 0 : goto err;
5513 : }
5514 1057 : translog_buffer_lock(cursor.buffer);
5515 1057 : translog_buffer_decrease_writers(cursor.buffer);
5516 1057 : translog_buffer_unlock(cursor.buffer);
5517 :
5518 1057 : translog_lock();
5519 :
5520 : /* Check that we have place for chunk type 2 */
5521 1057 : first_page= translog_get_current_page_rest();
5522 1057 : if (first_page <= 1)
5523 : {
5524 0 : if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
5525 : &buffer_to_flush))
5526 0 : goto err_unlock;
5527 0 : first_page= translog_get_current_page_rest();
5528 : }
5529 1057 : buffer_rest= translog_get_current_group_size();
5530 1057 : } while ((translog_size_t)(first_page + buffer_rest) <
5531 1057 : (translog_size_t)(parts->record_length - done));
5532 :
5533 3 : group.addr= horizon= log_descriptor.horizon;
5534 3 : cursor= log_descriptor.bc;
5535 3 : cursor.chaser= 1;
5536 3 : group.num= 0; /* 0 because it does not matter */
5537 3 : if (insert_dynamic(&groups, (uchar*) &group))
5538 : {
5539 0 : DBUG_PRINT("error", ("insert into array failed"));
5540 0 : goto err_unlock;
5541 : }
5542 3 : record_rest= parts->record_length - done;
5543 3 : DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
5544 3 : if (first_page > record_rest + 1)
5545 : {
5546 : /*
5547 : We have not so much data to fill all first page
5548 : (no speaking about full pages)
5549 : so it will be:
5550 : <chunk0 <data>>
5551 : or
5552 : <chunk0>...<chunk0><chunk0 <data>>
5553 : or
5554 : <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5555 : */
5556 0 : chunk2_page= full_pages= 0;
5557 0 : last_page_capacity= first_page;
5558 0 : pages_to_skip= -1;
5559 : }
5560 : else
5561 : {
5562 : /*
5563 : We will have:
5564 : <chunk2 <data>>...<chunk2 <data>><chunk0 <data>>
5565 : or
5566 : <chunk2 <data>>...<chunk2 <data>><chunk0>...<chunk0><chunk0 <data>>
5567 : or
5568 : <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5569 : */
5570 3 : chunk2_page= 1;
5571 3 : record_rest-= (first_page - 1);
5572 3 : pages_to_skip= full_pages=
5573 : record_rest / log_descriptor.page_capacity_chunk_2;
5574 3 : record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
5575 3 : last_page_capacity= page_capacity;
5576 : }
5577 3 : chunk3_size= 0;
5578 3 : chunk3_pages= 0;
5579 3 : if (last_page_capacity > record_rest + 1 && record_rest != 0)
5580 : {
5581 3 : if (last_page_capacity >
5582 : record_rest + header_fixed_part + groups.elements * (7 + 1))
5583 : {
5584 : /* 1 record of type 0 */
5585 1 : chunk3_pages= 0;
5586 : }
5587 : else
5588 : {
5589 2 : pages_to_skip++;
5590 2 : chunk3_pages= 1;
5591 2 : if (record_rest + 2 == last_page_capacity)
5592 : {
5593 0 : chunk3_size= record_rest - 1;
5594 0 : record_rest= 1;
5595 : }
5596 : else
5597 : {
5598 2 : chunk3_size= record_rest;
5599 2 : record_rest= 0;
5600 : }
5601 : }
5602 : }
5603 : /*
5604 : A first non-full page will hold type 0 chunk only if it fit in it with
5605 : all its headers
5606 : */
5607 3 : while (page_capacity <
5608 : record_rest + header_fixed_part +
5609 : (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
5610 0 : chunk0_pages++;
5611 3 : DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u "
5612 : "Group on last page: %u",
5613 : chunk0_pages, groups.elements,
5614 : groups_per_page,
5615 : (groups.elements -
5616 : ((page_capacity - header_fixed_part) / (7 + 1)) *
5617 : (chunk0_pages - 1))));
5618 3 : DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) "
5619 : "chunk3: %u (%u) rest: %u",
5620 : first_page,
5621 : chunk2_page, full_pages,
5622 : (ulong) full_pages *
5623 : log_descriptor.page_capacity_chunk_2,
5624 : chunk3_pages, (uint) chunk3_size, (uint) record_rest));
5625 3 : rc= translog_advance_pointer(pages_to_skip + (int)(chunk0_pages - 1),
5626 : record_rest + header_fixed_part +
5627 : (groups.elements -
5628 : ((page_capacity -
5629 : header_fixed_part) / (7 + 1)) *
5630 : (chunk0_pages - 1)) * (7 + 1));
5631 3 : buffer_of_last_lsn= log_descriptor.bc.buffer;
5632 3 : translog_unlock();
5633 :
5634 3 : if (buffer_to_flush != NULL)
5635 : {
5636 0 : translog_buffer_decrease_writers(buffer_to_flush);
5637 0 : if (!rc)
5638 0 : rc= translog_buffer_flush(buffer_to_flush);
5639 0 : translog_buffer_unlock(buffer_to_flush);
5640 0 : buffer_to_flush= NULL;
5641 : }
5642 3 : if (rc)
5643 : {
5644 0 : DBUG_PRINT("error", ("flush of unlock buffer failed"));
5645 0 : goto err;
5646 : }
5647 :
5648 3 : if (rc)
5649 3 : goto err;
5650 :
5651 3 : if (chunk2_page)
5652 : {
5653 3 : DBUG_PRINT("info", ("chunk 2 to finish first page"));
5654 3 : translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5655 3 : translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5656 3 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
5657 : "Left: %lu",
5658 : LSN_IN_PARTS(log_descriptor.horizon),
5659 : LSN_IN_PARTS(horizon),
5660 : (ulong) (parts->record_length - (first_page - 1) -
5661 : done)));
5662 : }
5663 0 : else if (chunk3_pages)
5664 : {
5665 : uchar chunk3_header[3];
5666 0 : DBUG_PRINT("info", ("chunk 3"));
5667 0 : DBUG_ASSERT(full_pages == 0);
5668 0 : chunk3_pages= 0;
5669 0 : chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
5670 0 : int2store(chunk3_header + 1, chunk3_size);
5671 0 : translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
5672 0 : translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
5673 0 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
5674 : "Left: %lu",
5675 : LSN_IN_PARTS(log_descriptor.horizon),
5676 : LSN_IN_PARTS(horizon),
5677 : (ulong) (parts->record_length - chunk3_size - done)));
5678 : }
5679 : else
5680 : {
5681 0 : DBUG_PRINT("info", ("no new_page_before_chunk0"));
5682 0 : new_page_before_chunk0= 0;
5683 : }
5684 :
5685 85 : for (i= 0; i < full_pages; i++)
5686 : {
5687 82 : DBUG_ASSERT(chunk2_page != 0);
5688 82 : if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5689 82 : goto err;
5690 :
5691 82 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
5692 : "Left: %lu",
5693 : LSN_IN_PARTS(log_descriptor.horizon),
5694 : LSN_IN_PARTS(horizon),
5695 : (ulong) (parts->record_length - (first_page - 1) -
5696 : i * log_descriptor.page_capacity_chunk_2 -
5697 : done)));
5698 : }
5699 :
5700 3 : if (chunk3_pages &&
5701 : translog_write_variable_record_chunk3_page(parts,
5702 : chunk3_size,
5703 : &horizon, &cursor))
5704 3 : goto err;
5705 3 : DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
5706 : LSN_IN_PARTS(log_descriptor.horizon),
5707 : LSN_IN_PARTS(horizon)));
5708 :
5709 3 : *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
5710 3 : int2store(chunk0_header + 1, short_trid);
5711 3 : translog_write_variable_record_1group_code_len(chunk0_header + 3,
5712 : parts->record_length,
5713 : header_length);
5714 : do
5715 : {
5716 : int limit;
5717 3 : if (new_page_before_chunk0 &&
5718 : translog_chaser_page_next(&horizon, &cursor))
5719 : {
5720 0 : DBUG_PRINT("error", ("flush of unlock buffer failed"));
5721 0 : goto err;
5722 : }
5723 3 : new_page_before_chunk0= 1;
5724 :
5725 3 : if (first_chunk0)
5726 : {
5727 3 : first_chunk0= 0;
5728 :
5729 : /*
5730 : We can drop "log_descriptor.is_everything_flushed" earlier when have
5731 : lock on loghandler and assign initial value of "horizon" variable or
5732 : before unlocking loghandler (because we will increase writers
5733 : counter on the buffer and every thread which wanted flush the buffer
5734 : will wait till we finish with it). But IMHO better here take short
5735 : lock and do not bother other threads with waiting.
5736 : */
5737 3 : translog_lock();
5738 3 : set_lsn(lsn, horizon);
5739 3 : buffer_of_last_lsn->last_lsn= *lsn;
5740 3 : DBUG_PRINT("info", ("last_lsn set to (%lu,0x%lx) buffer: 0x%lx",
5741 : LSN_IN_PARTS(buffer_of_last_lsn->last_lsn),
5742 : (ulong) buffer_of_last_lsn));
5743 3 : if (log_record_type_descriptor[type].inwrite_hook &&
5744 : (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
5745 : tbl_info,
5746 : lsn, hook_arg))
5747 3 : goto err_unlock;
5748 3 : translog_unlock();
5749 : }
5750 :
5751 : /*
5752 : A first non-full page will hold type 0 chunk only if it fit in it with
5753 : all its headers => the fist page is full or number of groups less then
5754 : possible number of full page.
5755 : */
5756 3 : limit= (groups_per_page < groups.elements - curr_group ?
5757 : groups_per_page : groups.elements - curr_group);
5758 3 : DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u",
5759 : (uint) groups.elements, (uint) curr_group,
5760 : (uint) limit));
5761 :
5762 3 : if (chunk0_pages == 1)
5763 : {
5764 3 : DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
5765 : (uint) limit, (uint) record_rest,
5766 : (uint) (2 + limit * (7 + 1) + record_rest)));
5767 3 : int2store(chunk0_header + header_length - 2,
5768 : 2 + limit * (7 + 1) + record_rest);
5769 : }
5770 : else
5771 : {
5772 0 : DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
5773 : (uint) limit, (uint) (2 + limit * (7 + 1))));
5774 0 : int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
5775 : }
5776 3 : int2store(chunk0_header + header_length, groups.elements - curr_group);
5777 3 : translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
5778 : chunk0_header);
5779 1063 : for (i= curr_group; i < limit + curr_group; i++)
5780 : {
5781 : struct st_translog_group_descriptor *grp_ptr;
5782 1060 : grp_ptr= dynamic_element(&groups, i,
5783 : struct st_translog_group_descriptor *);
5784 1060 : lsn_store(group_desc, grp_ptr->addr);
5785 1060 : group_desc[7]= grp_ptr->num;
5786 1060 : translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
5787 : }
5788 :
5789 3 : if (chunk0_pages == 1 && record_rest != 0)
5790 1 : translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
5791 :
5792 3 : chunk0_pages--;
5793 3 : curr_group+= limit;
5794 : /* put special type to indicate that it is not LSN chunk */
5795 3 : *chunk0_header= (uchar) (TRANSLOG_CHUNK_LSN | TRANSLOG_CHUNK_0_CONT);
5796 3 : } while (chunk0_pages != 0);
5797 3 : translog_buffer_lock(cursor.buffer);
5798 3 : translog_buffer_decrease_writers(cursor.buffer);
5799 3 : translog_buffer_unlock(cursor.buffer);
5800 3 : rc= 0;
5801 :
5802 3 : if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
5803 : *lsn, FALSE))
5804 3 : goto err;
5805 :
5806 3 : translog_mark_file_finished(file_of_the_first_group);
5807 :
5808 3 : delete_dynamic(&groups);
5809 3 : DBUG_RETURN(rc);
5810 :
5811 0 : err_unlock:
5812 :
5813 0 : translog_unlock();
5814 :
5815 0 : err:
5816 0 : if (buffer_to_flush != NULL)
5817 : {
5818 : /* This is to prevent locking buffer forever in case of error */
5819 0 : translog_buffer_decrease_writers(buffer_to_flush);
5820 0 : if (!rc)
5821 0 : rc= translog_buffer_flush(buffer_to_flush);
5822 0 : translog_buffer_unlock(buffer_to_flush);
5823 0 : buffer_to_flush= NULL;
5824 : }
5825 :
5826 :
5827 0 : translog_mark_file_finished(file_of_the_first_group);
5828 :
5829 0 : delete_dynamic(&groups);
5830 0 : DBUG_RETURN(1);
5831 : }
5832 :
5833 :
5834 : /**
5835 : @brief Write the variable length log record.
5836 :
5837 : @param lsn LSN of the record will be written here
5838 : @param type the log record type
5839 : @param short_trid Short transaction ID or 0 if it has no sense
5840 : @param parts Descriptor of record source parts
5841 : @param trn Transaction structure pointer for hooks by
5842 : record log type, for short_id
5843 : @param hook_arg Argument which will be passed to pre-write and
5844 : in-write hooks of this record.
5845 :
5846 : @return Operation status
5847 : @retval 0 OK
5848 : @retval 1 Error
5849 : */
5850 :
5851 : static my_bool translog_write_variable_record(LSN *lsn,
5852 : enum translog_record_type type,
5853 : MARIA_HA *tbl_info,
5854 : SHORT_TRANSACTION_ID short_trid,
5855 : struct st_translog_parts *parts,
5856 : TRN *trn, void *hook_arg)
5857 3305398 : {
5858 3305398 : struct st_translog_buffer *buffer_to_flush= NULL;
5859 : uint header_length1= 1 + 2 + 2 +
5860 3305398 : translog_variable_record_length_bytes(parts->record_length);
5861 : ulong buffer_rest;
5862 : uint page_rest;
5863 : /* Max number of such LSNs per record is 2 */
5864 : uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
5865 : COMPRESSED_LSN_MAX_STORE_SIZE];
5866 : my_bool res;
5867 3305398 : DBUG_ENTER("translog_write_variable_record");
5868 :
5869 3305398 : translog_lock();
5870 3305398 : DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
5871 : LSN_IN_PARTS(log_descriptor.horizon)));
5872 3305398 : page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
5873 3305398 : DBUG_PRINT("info", ("header length: %u page_rest: %u",
5874 : header_length1, page_rest));
5875 :
5876 : /*
5877 : header and part which we should read have to fit in one chunk
5878 : TODO: allow to divide readable header
5879 : */
5880 3305398 : if (page_rest <
5881 : (header_length1 + log_record_type_descriptor[type].read_header_len))
5882 : {
5883 6074 : DBUG_PRINT("info",
5884 : ("Next page, size: %u header: %u + %u",
5885 : log_descriptor.bc.current_page_fill,
5886 : header_length1,
5887 : log_record_type_descriptor[type].read_header_len));
5888 6074 : translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
5889 : &buffer_to_flush);
5890 : /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
5891 6074 : page_rest= log_descriptor.page_capacity_chunk_2 + 1;
5892 6074 : DBUG_PRINT("info", ("page_rest: %u", page_rest));
5893 : }
5894 :
5895 : /*
5896 : To minimize compressed size we will compress always relative to
5897 : very first chunk address (log_descriptor.horizon for now)
5898 : */
5899 3305398 : if (log_record_type_descriptor[type].compressed_LSN > 0)
5900 : {
5901 1706563 : translog_relative_LSN_encode(parts, log_descriptor.horizon,
5902 : log_record_type_descriptor[type].
5903 : compressed_LSN, compressed_LSNs);
5904 : /* recalculate header length after compression */
5905 1706563 : header_length1= 1 + 2 + 2 +
5906 : translog_variable_record_length_bytes(parts->record_length);
5907 1706563 : DBUG_PRINT("info", ("after compressing LSN(s) header length: %u "
5908 : "record length: %lu",
5909 : header_length1, (ulong)parts->record_length));
5910 : }
5911 :
5912 : /* TODO: check space on current page for header + few bytes */
5913 3305398 : if (page_rest >= parts->record_length + header_length1)
5914 : {
5915 : /* following function makes translog_unlock(); */
5916 3285060 : res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
5917 : short_trid,
5918 : parts, buffer_to_flush,
5919 : header_length1, trn, hook_arg);
5920 3285060 : DBUG_RETURN(res);
5921 : }
5922 :
5923 20338 : buffer_rest= translog_get_current_group_size();
5924 :
5925 20338 : if (buffer_rest >= parts->record_length + header_length1 - page_rest)
5926 : {
5927 : /* following function makes translog_unlock(); */
5928 20335 : res= translog_write_variable_record_1group(lsn, type, tbl_info,
5929 : short_trid,
5930 : parts, buffer_to_flush,
5931 : header_length1, trn, hook_arg);
5932 20335 : DBUG_RETURN(res);
5933 : }
5934 : /* following function makes translog_unlock(); */
5935 3 : res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
5936 : short_trid,
5937 : parts, buffer_to_flush,
5938 : header_length1,
5939 : buffer_rest, trn, hook_arg);
5940 3 : DBUG_RETURN(res);
5941 : }
5942 :
5943 :
5944 : /**
5945 : @brief Write the fixed and pseudo-fixed log record.
5946 :
5947 : @param lsn LSN of the record will be written here
5948 : @param type the log record type
5949 : @param short_trid Short transaction ID or 0 if it has no sense
5950 : @param parts Descriptor of record source parts
5951 : @param trn Transaction structure pointer for hooks by
5952 : record log type, for short_id
5953 : @param hook_arg Argument which will be passed to pre-write and
5954 : in-write hooks of this record.
5955 :
5956 : @return Operation status
5957 : @retval 0 OK
5958 : @retval 1 Error
5959 : */
5960 :
5961 : static my_bool translog_write_fixed_record(LSN *lsn,
5962 : enum translog_record_type type,
5963 : MARIA_HA *tbl_info,
5964 : SHORT_TRANSACTION_ID short_trid,
5965 : struct st_translog_parts *parts,
5966 : TRN *trn, void *hook_arg)
5967 3398666 : {
5968 3398666 : struct st_translog_buffer *buffer_to_flush= NULL;
5969 : uchar chunk1_header[1 + 2];
5970 : /* Max number of such LSNs per record is 2 */
5971 : uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
5972 : COMPRESSED_LSN_MAX_STORE_SIZE];
5973 : LEX_CUSTRING *part;
5974 3398666 : int rc= 1;
5975 3398666 : DBUG_ENTER("translog_write_fixed_record");
5976 3398666 : DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
5977 : LOGRECTYPE_FIXEDLENGTH &&
5978 : parts->record_length ==
5979 : log_record_type_descriptor[type].fixed_length) ||
5980 : (log_record_type_descriptor[type].rclass ==
5981 : LOGRECTYPE_PSEUDOFIXEDLENGTH &&
5982 : parts->record_length ==
5983 : log_record_type_descriptor[type].fixed_length));
5984 :
5985 3398666 : translog_lock();
5986 3398666 : DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
5987 : LSN_IN_PARTS(log_descriptor.horizon)));
5988 :
5989 3398666 : DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
5990 3398666 : DBUG_PRINT("info",
5991 : ("Page size: %u record: %u next cond: %d",
5992 : log_descriptor.bc.current_page_fill,
5993 : (parts->record_length +
5994 : log_record_type_descriptor[type].compressed_LSN * 2 + 3),
5995 : ((((uint) log_descriptor.bc.current_page_fill) +
5996 : (parts->record_length +
5997 : log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
5998 : TRANSLOG_PAGE_SIZE)));
5999 : /*
6000 : check that there is enough place on current page.
6001 : NOTE: compressing may increase page LSN size on two bytes for every LSN
6002 : */
6003 3398666 : if ((((uint) log_descriptor.bc.current_page_fill) +
6004 : (parts->record_length +
6005 : log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
6006 : TRANSLOG_PAGE_SIZE)
6007 : {
6008 3743 : DBUG_PRINT("info", ("Next page"));
6009 3743 : if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
6010 : &buffer_to_flush))
6011 3743 : goto err; /* rc == 1 */
6012 3743 : if (buffer_to_flush)
6013 28 : translog_buffer_lock_assert_owner(buffer_to_flush);
6014 : }
6015 :
6016 3398666 : set_lsn(lsn, log_descriptor.horizon);
6017 3398666 : if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
6018 : *lsn, TRUE) ||
6019 : (log_record_type_descriptor[type].inwrite_hook &&
6020 : (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
6021 : lsn, hook_arg)))
6022 : goto err;
6023 :
6024 : /* compress LSNs */
6025 3398666 : if (log_record_type_descriptor[type].rclass ==
6026 : LOGRECTYPE_PSEUDOFIXEDLENGTH)
6027 : {
6028 1601 : DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
6029 1601 : translog_relative_LSN_encode(parts, *lsn,
6030 : log_record_type_descriptor[type].
6031 : compressed_LSN, compressed_LSNs);
6032 : }
6033 :
6034 : /*
6035 : Write the whole record at once (we know that there is enough place on
6036 : the destination page)
6037 : */
6038 3398666 : DBUG_ASSERT(parts->current != 0); /* first part is left for header */
6039 3398666 : part= parts->parts + (--parts->current);
6040 3398666 : parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
6041 3398666 : part->str= chunk1_header;
6042 3398666 : *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
6043 3398666 : int2store(chunk1_header + 1, short_trid);
6044 :
6045 3398666 : rc= translog_write_parts_on_page(&log_descriptor.horizon,
6046 : &log_descriptor.bc,
6047 : parts->total_record_length, parts);
6048 :
6049 3398666 : log_descriptor.bc.buffer->last_lsn= *lsn;
6050 3398666 : DBUG_PRINT("info", ("last_lsn set to (%lu,0x%lx) buffer: 0x%lx",
6051 : LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
6052 : (ulong) log_descriptor.bc.buffer));
6053 :
6054 3398666 : err:
6055 3398666 : translog_unlock();
6056 :
6057 : /*
6058 : check if we switched buffer and need process it (current buffer is
6059 : unlocked already => we will not delay other threads
6060 : */
6061 3398666 : if (buffer_to_flush != NULL)
6062 : {
6063 28 : if (!rc)
6064 28 : rc= translog_buffer_flush(buffer_to_flush);
6065 28 : translog_buffer_unlock(buffer_to_flush);
6066 : }
6067 :
6068 3398666 : DBUG_RETURN(rc);
6069 : }
6070 :
6071 :
6072 : /**
6073 : @brief Writes the log record
6074 :
6075 : If share has no 2-byte-id yet, gives an id to the share and logs
6076 : LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
6077 : yet, logs it.
6078 :
6079 : @param lsn LSN of the record will be written here
6080 : @param type the log record type
6081 : @param trn Transaction structure pointer for hooks by
6082 : record log type, for short_id
6083 : @param tbl_info MARIA_HA of table or NULL
6084 : @param rec_len record length or 0 (count it)
6085 : @param part_no number of parts or 0 (count it)
6086 : @param parts_data zero ended (in case of number of parts is 0)
6087 : array of LEX_STRINGs (parts), first
6088 : TRANSLOG_INTERNAL_PARTS positions in the log
6089 : should be unused (need for loghandler)
6090 : @param store_share_id if tbl_info!=NULL then share's id will
6091 : automatically be stored in the two first bytes
6092 : pointed (so pointer is assumed to be !=NULL)
6093 : @param hook_arg argument which will be passed to pre-write and
6094 : in-write hooks of this record.
6095 :
6096 : @return Operation status
6097 : @retval 0 OK
6098 : @retval 1 Error
6099 : */
6100 :
6101 : my_bool translog_write_record(LSN *lsn,
6102 : enum translog_record_type type,
6103 : TRN *trn, MARIA_HA *tbl_info,
6104 : translog_size_t rec_len,
6105 : uint part_no,
6106 : LEX_CUSTRING *parts_data,
6107 : uchar *store_share_id,
6108 : void *hook_arg)
6109 6704064 : {
6110 : struct st_translog_parts parts;
6111 : LEX_CUSTRING *part;
6112 : int rc;
6113 6704064 : uint short_trid= trn->short_id;
6114 6704064 : DBUG_ENTER("translog_write_record");
6115 6704064 : DBUG_PRINT("enter", ("type: %u (%s) ShortTrID: %u rec_len: %lu",
6116 : (uint) type, log_record_type_descriptor[type].name,
6117 : (uint) short_trid, (ulong) rec_len));
6118 6704064 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6119 : translog_status == TRANSLOG_READONLY);
6120 6704064 : if (unlikely(translog_status != TRANSLOG_OK))
6121 : {
6122 0 : DBUG_PRINT("error", ("Transaction log is write protected"));
6123 0 : DBUG_RETURN(1);
6124 : }
6125 :
6126 6704064 : if (tbl_info)
6127 : {
6128 3437763 : MARIA_SHARE *share= tbl_info->s;
6129 3437763 : DBUG_ASSERT(share->now_transactional);
6130 3437763 : if (unlikely(share->id == 0))
6131 : {
6132 : /*
6133 : First log write for this MARIA_SHARE; give it a short id.
6134 : When the lock manager is enabled and needs a short id, it should be
6135 : assigned in the lock manager (because row locks will be taken before
6136 : log records are written; for example SELECT FOR UPDATE takes locks but
6137 : writes no log record.
6138 : */
6139 392 : if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
6140 0 : DBUG_RETURN(1);
6141 : }
6142 3437763 : fileid_store(store_share_id, share->id);
6143 : }
6144 6704064 : if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
6145 : {
6146 : LSN dummy_lsn;
6147 : LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6148 : uchar log_data[6];
6149 996 : int6store(log_data, trn->trid);
6150 996 : log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6151 996 : log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6152 996 : trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
6153 996 : if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
6154 : trn, NULL, sizeof(log_data),
6155 : sizeof(log_array)/sizeof(log_array[0]),
6156 : log_array, NULL, NULL)))
6157 0 : DBUG_RETURN(1);
6158 : }
6159 :
6160 6704064 : parts.parts= parts_data;
6161 :
6162 : /* count parts if they are not counted by upper level */
6163 6704064 : if (part_no == 0)
6164 : {
6165 800 : for (part_no= TRANSLOG_INTERNAL_PARTS;
6166 2400 : parts_data[part_no].length != 0;
6167 800 : part_no++);
6168 : }
6169 6704064 : parts.elements= part_no;
6170 6704064 : parts.current= TRANSLOG_INTERNAL_PARTS;
6171 :
6172 : /* clear TRANSLOG_INTERNAL_PARTS */
6173 : compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
6174 6704064 : parts_data[0].str= 0;
6175 6704064 : parts_data[0].length= 0;
6176 :
6177 : /* count length of the record */
6178 6704064 : if (rec_len == 0)
6179 : {
6180 4366 : for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
6181 : part < parts_data + part_no;
6182 1600 : part++)
6183 : {
6184 1600 : rec_len+= (translog_size_t) part->length;
6185 : }
6186 : }
6187 6704064 : parts.record_length= rec_len;
6188 :
6189 : #ifndef DBUG_OFF
6190 : {
6191 : uint i;
6192 6704064 : uint len= 0;
6193 : #ifdef HAVE_purify
6194 : ha_checksum checksum= 0;
6195 : #endif
6196 17654389 : for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
6197 : {
6198 : #ifdef HAVE_purify
6199 : /* Find unitialized bytes early */
6200 : checksum+= my_checksum(checksum, parts_data[i].str,
6201 : parts_data[i].length);
6202 : #endif
6203 10950325 : len+= parts_data[i].length;
6204 : }
6205 6704064 : DBUG_ASSERT(len == rec_len);
6206 : }
6207 : #endif
6208 : /*
6209 : Start total_record_length from record_length then overhead will
6210 : be add
6211 : */
6212 6704064 : parts.total_record_length= parts.record_length;
6213 6704064 : DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
6214 :
6215 : /* process this parts */
6216 6704064 : if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
6217 : (*log_record_type_descriptor[type].prewrite_hook) (type, trn,
6218 : tbl_info,
6219 : hook_arg))))
6220 : {
6221 6704064 : switch (log_record_type_descriptor[type].rclass) {
6222 : case LOGRECTYPE_VARIABLE_LENGTH:
6223 3305398 : rc= translog_write_variable_record(lsn, type, tbl_info,
6224 : short_trid, &parts, trn, hook_arg);
6225 3305398 : break;
6226 : case LOGRECTYPE_PSEUDOFIXEDLENGTH:
6227 : case LOGRECTYPE_FIXEDLENGTH:
6228 3398666 : rc= translog_write_fixed_record(lsn, type, tbl_info,
6229 : short_trid, &parts, trn, hook_arg);
6230 3398666 : break;
6231 : case LOGRECTYPE_NOT_ALLOWED:
6232 : default:
6233 0 : DBUG_ASSERT(0);
6234 : rc= 1;
6235 : }
6236 : }
6237 :
6238 6704064 : DBUG_PRINT("info", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(*lsn)));
6239 6704064 : DBUG_RETURN(rc);
6240 : }
6241 :
6242 :
6243 : /*
6244 : Decode compressed (relative) LSN(s)
6245 :
6246 : SYNOPSIS
6247 : translog_relative_lsn_decode()
6248 : base_lsn LSN for encoding
6249 : src Decode LSN(s) from here
6250 : dst Put decoded LSNs here
6251 : lsns number of LSN(s)
6252 :
6253 : RETURN
6254 : position in sources after decoded LSN(s)
6255 : */
6256 :
6257 : static uchar *translog_relative_LSN_decode(LSN base_lsn,
6258 : uchar *src, uchar *dst, uint lsns)
6259 5172581 : {
6260 : uint i;
6261 10347559 : for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
6262 : {
6263 5174978 : src= translog_get_LSN_from_diff(base_lsn, src, dst);
6264 : }
6265 5172581 : return src;
6266 : }
6267 :
6268 : /**
6269 : @brief Get header of fixed/pseudo length record and call hook for
6270 : it processing
6271 :
6272 : @param page Pointer to the buffer with page where LSN chunk is
6273 : placed
6274 : @param page_offset Offset of the first chunk in the page
6275 : @param buff Buffer to be filled with header data
6276 :
6277 : @return Length of header or operation status
6278 : @retval # number of bytes in TRANSLOG_HEADER_BUFFER::header where
6279 : stored decoded part of the header
6280 : */
6281 :
6282 : static int translog_fixed_length_header(uchar *page,
6283 : translog_size_t page_offset,
6284 : TRANSLOG_HEADER_BUFFER *buff)
6285 258396 : {
6286 : struct st_log_record_type_descriptor *desc=
6287 258396 : log_record_type_descriptor + buff->type;
6288 258396 : uchar *src= page + page_offset + 3;
6289 258396 : uchar *dst= buff->header;
6290 258396 : uchar *start= src;
6291 258396 : int lsns= desc->compressed_LSN;
6292 258396 : uint length= desc->fixed_length;
6293 258396 : DBUG_ENTER("translog_fixed_length_header");
6294 :
6295 258396 : buff->record_length= length;
6296 :
6297 258396 : if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
6298 : {
6299 1601 : DBUG_ASSERT(lsns > 0);
6300 1601 : src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
6301 1601 : lsns*= LSN_STORE_SIZE;
6302 1601 : dst+= lsns;
6303 1601 : length-= lsns;
6304 1601 : buff->compressed_LSN_economy= (lsns - (int) (src - start));
6305 : }
6306 : else
6307 256795 : buff->compressed_LSN_economy= 0;
6308 :
6309 258396 : memcpy(dst, src, length);
6310 258396 : buff->non_header_data_start_offset= (uint16) (page_offset +
6311 : ((src + length) -
6312 : (page + page_offset)));
6313 258396 : buff->non_header_data_len= 0;
6314 258396 : DBUG_RETURN(buff->record_length);
6315 : }
6316 :
6317 :
6318 : /*
6319 : Free resources used by TRANSLOG_HEADER_BUFFER
6320 :
6321 : SYNOPSIS
6322 : translog_free_record_header();
6323 : */
6324 :
6325 : void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
6326 10724894 : {
6327 10724894 : DBUG_ENTER("translog_free_record_header");
6328 10724894 : if (buff->groups_no != 0)
6329 : {
6330 6 : my_free(buff->groups, MYF(0));
6331 6 : buff->groups_no= 0;
6332 : }
6333 10724894 : DBUG_VOID_RETURN;
6334 : }
6335 :
6336 :
6337 : /**
6338 : @brief Returns the current horizon at the end of the current log
6339 :
6340 : @return Horizon
6341 : @retval LSN_ERROR error
6342 : @retvar # Horizon
6343 : */
6344 :
6345 : TRANSLOG_ADDRESS translog_get_horizon()
6346 4750186 : {
6347 : TRANSLOG_ADDRESS res;
6348 4750186 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6349 : translog_status == TRANSLOG_READONLY);
6350 4750186 : translog_lock();
6351 4750186 : res= log_descriptor.horizon;
6352 4750186 : translog_unlock();
6353 4750186 : return res;
6354 : }
6355 :
6356 :
6357 : /**
6358 : @brief Returns the current horizon at the end of the current log, caller is
6359 : assumed to already hold the lock
6360 :
6361 : @return Horizon
6362 : @retval LSN_ERROR error
6363 : @retvar # Horizon
6364 : */
6365 :
6366 : TRANSLOG_ADDRESS translog_get_horizon_no_lock()
6367 80 : {
6368 80 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6369 : translog_status == TRANSLOG_READONLY);
6370 80 : translog_lock_assert_owner();
6371 80 : return log_descriptor.horizon;
6372 : }
6373 :
6374 :
6375 : /*
6376 : Set last page in the scanner data structure
6377 :
6378 : SYNOPSIS
6379 : translog_scanner_set_last_page()
6380 : scanner Information about current chunk during scanning
6381 :
6382 : RETURN
6383 : 0 OK
6384 : 1 Error
6385 : */
6386 :
6387 : static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
6388 4748079 : {
6389 : my_bool page_ok;
6390 4748079 : if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
6391 : {
6392 : /* It is last file => we can easy find last page address by horizon */
6393 4744956 : uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
6394 4744956 : scanner->last_file_page= (scanner->horizon -
6395 : (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
6396 4744956 : return (0);
6397 : }
6398 3123 : scanner->last_file_page= scanner->page_addr;
6399 3123 : return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok, 0));
6400 : }
6401 :
6402 :
6403 : /**
6404 : @brief Get page from page cache according to requested method
6405 :
6406 : @param scanner The scanner data
6407 :
6408 : @return operation status
6409 : @retval 0 OK
6410 : @retval 1 Error
6411 : */
6412 :
6413 : static my_bool
6414 : translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
6415 5207737 : {
6416 : TRANSLOG_VALIDATOR_DATA data;
6417 5207737 : DBUG_ENTER("translog_scanner_get_page");
6418 5207737 : data.addr= &scanner->page_addr;
6419 5207737 : data.was_recovered= 0;
6420 5207737 : DBUG_RETURN((scanner->page=
6421 : translog_get_page(&data, scanner->buffer,
6422 : (scanner->use_direct_link ?
6423 : &scanner->direct_link :
6424 : NULL))) ==
6425 : NULL);
6426 : }
6427 :
6428 :
6429 : /**
6430 : @brief Initialize reader scanner.
6431 :
6432 : @param lsn LSN with which it have to be inited
6433 : @param fixed_horizon true if it is OK do not read records which was written
6434 : after scanning beginning
6435 : @param scanner scanner which have to be inited
6436 : @param use_direct prefer using direct lings from page handler
6437 : where it is possible.
6438 :
6439 : @note If direct link was used translog_destroy_scanner should be
6440 : called after it using
6441 :
6442 : @return status of the operation
6443 : @retval 0 OK
6444 : @retval 1 Error
6445 : */
6446 :
6447 : my_bool translog_scanner_init(LSN lsn,
6448 : my_bool fixed_horizon,
6449 : TRANSLOG_SCANNER_DATA *scanner,
6450 : my_bool use_direct)
6451 4748041 : {
6452 : TRANSLOG_VALIDATOR_DATA data;
6453 4748041 : DBUG_ENTER("translog_scanner_init");
6454 4748041 : DBUG_PRINT("enter", ("Scanner: 0x%lx LSN: (%lu,0x%lx)",
6455 : (ulong) scanner, LSN_IN_PARTS(lsn)));
6456 4748041 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6457 : translog_status == TRANSLOG_READONLY);
6458 :
6459 4748041 : data.addr= &scanner->page_addr;
6460 4748041 : data.was_recovered= 0;
6461 :
6462 4748041 : scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
6463 :
6464 4748041 : scanner->fixed_horizon= fixed_horizon;
6465 4748041 : scanner->use_direct_link= use_direct;
6466 4748041 : scanner->direct_link= NULL;
6467 :
6468 4748041 : scanner->horizon= translog_get_horizon();
6469 4748041 : DBUG_PRINT("info", ("horizon: (%lu,0x%lx)", LSN_IN_PARTS(scanner->horizon)));
6470 :
6471 : /* lsn < horizon */
6472 4748041 : DBUG_ASSERT(lsn <= scanner->horizon);
6473 :
6474 4748041 : scanner->page_addr= lsn;
6475 4748041 : scanner->page_addr-= scanner->page_offset; /*decrease offset */
6476 :
6477 4748041 : if (translog_scanner_set_last_page(scanner))
6478 0 : DBUG_RETURN(1);
6479 :
6480 4748041 : if (translog_scanner_get_page(scanner))
6481 0 : DBUG_RETURN(1);
6482 4748041 : DBUG_RETURN(0);
6483 : }
6484 :
6485 :
6486 : /**
6487 : @brief Destroy scanner object;
6488 :
6489 : @param scanner The scanner object to destroy
6490 : */
6491 :
6492 : void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
6493 4748011 : {
6494 4748011 : DBUG_ENTER("translog_destroy_scanner");
6495 4748011 : DBUG_PRINT("enter", ("Scanner: 0x%lx", (ulong)scanner));
6496 4748011 : translog_free_link(scanner->direct_link);
6497 4748011 : DBUG_VOID_RETURN;
6498 : }
6499 :
6500 :
6501 : /*
6502 : Checks End of the Log
6503 :
6504 : SYNOPSIS
6505 : translog_scanner_eol()
6506 : scanner Information about current chunk during scanning
6507 :
6508 : RETURN
6509 : 1 End of the Log
6510 : 0 OK
6511 : */
6512 :
6513 : static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
6514 6579383 : {
6515 6579383 : DBUG_ENTER("translog_scanner_eol");
6516 6579383 : DBUG_PRINT("enter",
6517 : ("Horizon: (%lu, 0x%lx) Current: (%lu, 0x%lx+0x%x=0x%lx)",
6518 : LSN_IN_PARTS(scanner->horizon),
6519 : LSN_IN_PARTS(scanner->page_addr),
6520 : (uint) scanner->page_offset,
6521 : (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
6522 6579383 : if (scanner->horizon > (scanner->page_addr +
6523 : scanner->page_offset))
6524 : {
6525 6578744 : DBUG_PRINT("info", ("Horizon is not reached"));
6526 6578744 : DBUG_RETURN(0);
6527 : }
6528 639 : if (scanner->fixed_horizon)
6529 : {
6530 639 : DBUG_PRINT("info", ("Horizon is fixed and reached"));
6531 639 : DBUG_RETURN(1);
6532 : }
6533 0 : scanner->horizon= translog_get_horizon();
6534 0 : DBUG_PRINT("info",
6535 : ("Horizon is re-read, EOL: %d",
6536 : scanner->horizon <= (scanner->page_addr +
6537 : scanner->page_offset)));
6538 0 : DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
6539 : scanner->page_offset));
6540 : }
6541 :
6542 :
6543 : /**
6544 : @brief Cheks End of the Page
6545 :
6546 : @param scanner Information about current chunk during scanning
6547 :
6548 : @retval 1 End of the Page
6549 : @retval 0 OK
6550 : */
6551 :
6552 : static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
6553 12239043 : {
6554 12239043 : DBUG_ENTER("translog_scanner_eop");
6555 12239043 : DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
6556 : scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
6557 : }
6558 :
6559 :
6560 : /**
6561 : @brief Checks End of the File (i.e. we are scanning last page, which do not
6562 : mean end of this page)
6563 :
6564 : @param scanner Information about current chunk during scanning
6565 :
6566 : @retval 1 End of the File
6567 : @retval 0 OK
6568 : */
6569 :
6570 : static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
6571 459696 : {
6572 459696 : DBUG_ENTER("translog_scanner_eof");
6573 459696 : DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
6574 : LSN_FILE_NO(scanner->last_file_page));
6575 459696 : DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx "
6576 : "normal EOF: %d",
6577 : (ulong) LSN_OFFSET(scanner->page_addr),
6578 : (ulong) LSN_OFFSET(scanner->last_file_page),
6579 : LSN_OFFSET(scanner->page_addr) ==
6580 : LSN_OFFSET(scanner->last_file_page)));
6581 : /*
6582 : TODO: detect damaged file EOF,
6583 : TODO: issue warning if damaged file EOF detected
6584 : */
6585 459696 : DBUG_RETURN(scanner->page_addr ==
6586 : scanner->last_file_page);
6587 : }
6588 :
6589 : /*
6590 : Move scanner to the next chunk
6591 :
6592 : SYNOPSIS
6593 : translog_get_next_chunk()
6594 : scanner Information about current chunk during scanning
6595 :
6596 : RETURN
6597 : 0 OK
6598 : 1 Error
6599 : */
6600 :
6601 : static my_bool
6602 : translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
6603 6119687 : {
6604 : uint16 len;
6605 6119687 : DBUG_ENTER("translog_get_next_chunk");
6606 :
6607 6119687 : if (translog_scanner_eop(scanner))
6608 0 : len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
6609 6119687 : else if ((len= translog_get_total_chunk_length(scanner->page,
6610 : scanner->page_offset)) == 0)
6611 0 : DBUG_RETURN(1);
6612 6119687 : scanner->page_offset+= len;
6613 :
6614 6119687 : if (translog_scanner_eol(scanner))
6615 : {
6616 639 : scanner->page= END_OF_LOG;
6617 639 : scanner->page_offset= 0;
6618 639 : DBUG_RETURN(0);
6619 : }
6620 6119048 : if (translog_scanner_eop(scanner))
6621 : {
6622 : /* before reading next page we should unpin current one if it was pinned */
6623 459696 : translog_free_link(scanner->direct_link);
6624 459696 : if (translog_scanner_eof(scanner))
6625 : {
6626 38 : DBUG_PRINT("info", ("horizon: (%lu,0x%lx) pageaddr: (%lu,0x%lx)",
6627 : LSN_IN_PARTS(scanner->horizon),
6628 : LSN_IN_PARTS(scanner->page_addr)));
6629 : /* if it is log end it have to be caught before */
6630 38 : DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
6631 : LSN_FILE_NO(scanner->page_addr));
6632 38 : scanner->page_addr+= LSN_ONE_FILE;
6633 38 : scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
6634 : TRANSLOG_PAGE_SIZE);
6635 38 : if (translog_scanner_set_last_page(scanner))
6636 0 : DBUG_RETURN(1);
6637 : }
6638 : else
6639 : {
6640 459658 : scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
6641 : }
6642 :
6643 459696 : if (translog_scanner_get_page(scanner))
6644 0 : DBUG_RETURN(1);
6645 :
6646 459696 : scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
6647 459696 : if (translog_scanner_eol(scanner))
6648 : {
6649 0 : scanner->page= END_OF_LOG;
6650 0 : scanner->page_offset= 0;
6651 0 : DBUG_RETURN(0);
6652 : }
6653 459696 : DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
6654 : }
6655 6119048 : DBUG_RETURN(0);
6656 : }
6657 :
6658 :
6659 : /**
6660 : @brief Get header of variable length record and call hook for it processing
6661 :
6662 : @param page Pointer to the buffer with page where LSN chunk is
6663 : placed
6664 : @param page_offset Offset of the first chunk in the page
6665 : @param buff Buffer to be filled with header data
6666 : @param scanner If present should be moved to the header page if
6667 : it differ from LSN page
6668 :
6669 : @return Length of header or operation status
6670 : @retval RECHEADER_READ_ERROR error
6671 : @retval RECHEADER_READ_EOF End of the log reached during the read
6672 : @retval # number of bytes in
6673 : TRANSLOG_HEADER_BUFFER::header where
6674 : stored decoded part of the header
6675 : */
6676 :
6677 : static int
6678 : translog_variable_length_header(uchar *page, translog_size_t page_offset,
6679 : TRANSLOG_HEADER_BUFFER *buff,
6680 : TRANSLOG_SCANNER_DATA *scanner)
6681 10466192 : {
6682 : struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
6683 10466192 : buff->type);
6684 10466192 : uchar *src= page + page_offset + 1 + 2;
6685 10466192 : uchar *dst= buff->header;
6686 : LSN base_lsn;
6687 10466192 : uint lsns= desc->compressed_LSN;
6688 : uint16 chunk_len;
6689 10466192 : uint16 length= desc->read_header_len;
6690 10466192 : uint16 buffer_length= length;
6691 : uint16 body_len;
6692 : int rc;
6693 : TRANSLOG_SCANNER_DATA internal_scanner;
6694 10466192 : DBUG_ENTER("translog_variable_length_header");
6695 :
6696 10466192 : buff->record_length= translog_variable_record_1group_decode_len(&src);
6697 10466192 : chunk_len= uint2korr(src);
6698 10466192 : DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u",
6699 : (ulong) buff->record_length, (uint) chunk_len,
6700 : (uint) length, (uint) buffer_length));
6701 10466192 : if (chunk_len == 0)
6702 : {
6703 : uint16 page_rest;
6704 10466186 : DBUG_PRINT("info", ("1 group"));
6705 10466186 : src+= 2;
6706 10466186 : page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6707 :
6708 10466186 : base_lsn= buff->lsn;
6709 10466186 : body_len= min(page_rest, buff->record_length);
6710 : }
6711 : else
6712 : {
6713 : uint grp_no, curr;
6714 : uint header_to_skip;
6715 : uint16 page_rest;
6716 :
6717 6 : DBUG_PRINT("info", ("multi-group"));
6718 6 : grp_no= buff->groups_no= uint2korr(src + 2);
6719 6 : if (!(buff->groups=
6720 : (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no,
6721 : MYF(0))))
6722 0 : DBUG_RETURN(RECHEADER_READ_ERROR);
6723 6 : DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
6724 6 : src+= (2 + 2);
6725 6 : page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6726 6 : curr= 0;
6727 6 : header_to_skip= src - (page + page_offset);
6728 6 : buff->chunk0_pages= 0;
6729 :
6730 : for (;;)
6731 : {
6732 6 : uint i, read_length= grp_no;
6733 :
6734 6 : buff->chunk0_pages++;
6735 6 : if (page_rest < grp_no * (7 + 1))
6736 0 : read_length= page_rest / (7 + 1);
6737 6 : DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u "
6738 : "start from: %u",
6739 : buff->chunk0_pages, read_length, grp_no, curr));
6740 2846 : for (i= 0; i < read_length; i++, curr++)
6741 : {
6742 2840 : DBUG_ASSERT(curr < buff->groups_no);
6743 2840 : buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
6744 2840 : buff->groups[curr].num= src[i * (7 + 1) + 7];
6745 2840 : DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks: %u",
6746 : curr,
6747 : LSN_IN_PARTS(buff->groups[curr].addr),
6748 : (uint) buff->groups[curr].num));
6749 : }
6750 6 : grp_no-= read_length;
6751 6 : if (grp_no == 0)
6752 : {
6753 6 : if (scanner)
6754 : {
6755 3 : buff->chunk0_data_addr= scanner->page_addr;
6756 : /* offset increased */
6757 3 : buff->chunk0_data_addr+= (page_offset + header_to_skip +
6758 : read_length * (7 + 1));
6759 : }
6760 : else
6761 : {
6762 3 : buff->chunk0_data_addr= buff->lsn;
6763 : /* offset increased */
6764 3 : buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
6765 : }
6766 6 : buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
6767 6 : DBUG_PRINT("info", ("Data address: (%lu,0x%lx) len: %u",
6768 : LSN_IN_PARTS(buff->chunk0_data_addr),
6769 : buff->chunk0_data_len));
6770 : break;
6771 : }
6772 0 : if (scanner == NULL)
6773 : {
6774 0 : DBUG_PRINT("info", ("use internal scanner for header reading"));
6775 0 : scanner= &internal_scanner;
6776 0 : if (translog_scanner_init(buff->lsn, 1, scanner, 0))
6777 : {
6778 0 : rc= RECHEADER_READ_ERROR;
6779 0 : goto exit_and_free;
6780 : }
6781 : }
6782 0 : if (translog_get_next_chunk(scanner))
6783 : {
6784 0 : if (scanner == &internal_scanner)
6785 0 : translog_destroy_scanner(scanner);
6786 0 : rc= RECHEADER_READ_ERROR;
6787 0 : goto exit_and_free;
6788 : }
6789 0 : if (scanner->page == END_OF_LOG)
6790 : {
6791 0 : if (scanner == &internal_scanner)
6792 0 : translog_destroy_scanner(scanner);
6793 0 : rc= RECHEADER_READ_EOF;
6794 0 : goto exit_and_free;
6795 : }
6796 0 : page= scanner->page;
6797 0 : page_offset= scanner->page_offset;
6798 0 : src= page + page_offset + header_to_skip;
6799 0 : chunk_len= uint2korr(src - 2 - 2);
6800 0 : DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
6801 0 : page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6802 0 : }
6803 :
6804 6 : if (scanner == NULL)
6805 : {
6806 3 : DBUG_PRINT("info", ("use internal scanner"));
6807 3 : scanner= &internal_scanner;
6808 : }
6809 : else
6810 : {
6811 3 : translog_destroy_scanner(scanner);
6812 : }
6813 6 : base_lsn= buff->groups[0].addr;
6814 6 : translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
6815 : /* first group chunk is always chunk type 2 */
6816 6 : page= scanner->page;
6817 6 : page_offset= scanner->page_offset;
6818 6 : src= page + page_offset + 1;
6819 6 : page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6820 6 : body_len= page_rest;
6821 6 : if (scanner == &internal_scanner)
6822 3 : translog_destroy_scanner(scanner);
6823 : }
6824 10466192 : if (lsns)
6825 : {
6826 5170980 : uchar *start= src;
6827 5170980 : src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
6828 5170980 : lsns*= LSN_STORE_SIZE;
6829 5170980 : dst+= lsns;
6830 5170980 : length-= lsns;
6831 5170980 : buff->record_length+= (buff->compressed_LSN_economy=
6832 : (int) (lsns - (src - start)));
6833 5170980 : DBUG_PRINT("info", ("lsns: %u length: %u economy: %d new length: %lu",
6834 : lsns / LSN_STORE_SIZE, (uint) length,
6835 : (int) buff->compressed_LSN_economy,
6836 : (ulong) buff->record_length));
6837 5170980 : body_len-= (uint16) (src - start);
6838 : }
6839 : else
6840 5295212 : buff->compressed_LSN_economy= 0;
6841 :
6842 10466192 : DBUG_ASSERT(body_len >= length);
6843 10466192 : body_len-= length;
6844 10466192 : memcpy(dst, src, length);
6845 10466192 : buff->non_header_data_start_offset= (uint16) (src + length - page);
6846 10466192 : buff->non_header_data_len= body_len;
6847 10466192 : DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u",
6848 : buff->non_header_data_start_offset,
6849 : buff->non_header_data_len, buffer_length));
6850 10466192 : DBUG_RETURN(buffer_length);
6851 :
6852 0 : exit_and_free:
6853 0 : my_free(buff->groups, MYF(0));
6854 0 : buff->groups_no= 0; /* prevent try to use of buff->groups */
6855 0 : DBUG_RETURN(rc);
6856 : }
6857 :
6858 :
6859 : /**
6860 : @brief Read record header from the given buffer
6861 :
6862 : @param page page content buffer
6863 : @param page_offset offset of the chunk in the page
6864 : @param buff destination buffer
6865 : @param scanner If this is set the scanner will be moved to the
6866 : record header page (differ from LSN page in case of
6867 : multi-group records)
6868 :
6869 : @return Length of header or operation status
6870 : @retval RECHEADER_READ_ERROR error
6871 : @retval # number of bytes in
6872 : TRANSLOG_HEADER_BUFFER::header where
6873 : stored decoded part of the header
6874 : */
6875 :
6876 : int translog_read_record_header_from_buffer(uchar *page,
6877 : uint16 page_offset,
6878 : TRANSLOG_HEADER_BUFFER *buff,
6879 : TRANSLOG_SCANNER_DATA *scanner)
6880 10724588 : {
6881 : translog_size_t res;
6882 10724588 : DBUG_ENTER("translog_read_record_header_from_buffer");
6883 10724588 : DBUG_ASSERT(translog_is_LSN_chunk(page[page_offset]));
6884 10724588 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6885 : translog_status == TRANSLOG_READONLY);
6886 10724588 : DBUG_PRINT("info", ("page byte: 0x%x offset: %u",
6887 : (uint) page[page_offset], (uint) page_offset));
6888 10724588 : buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
6889 10724588 : buff->short_trid= uint2korr(page + page_offset + 1);
6890 10724588 : DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN (%lu,0x%lx)",
6891 : (uint) buff->type, (uint)buff->short_trid,
6892 : LSN_IN_PARTS(buff->lsn)));
6893 : /* Read required bytes from the header and call hook */
6894 10724588 : switch (log_record_type_descriptor[buff->type].rclass) {
6895 : case LOGRECTYPE_VARIABLE_LENGTH:
6896 10466192 : res= translog_variable_length_header(page, page_offset, buff,
6897 : scanner);
6898 10466192 : break;
6899 : case LOGRECTYPE_PSEUDOFIXEDLENGTH:
6900 : case LOGRECTYPE_FIXEDLENGTH:
6901 258396 : res= translog_fixed_length_header(page, page_offset, buff);
6902 258396 : break;
6903 : default:
6904 0 : DBUG_ASSERT(0); /* we read some junk (got no LSN) */
6905 : res= RECHEADER_READ_ERROR;
6906 : }
6907 10724588 : DBUG_RETURN(res);
6908 : }
6909 :
6910 :
6911 : /**
6912 : @brief Read record header and some fixed part of a record (the part depend
6913 : on record type).
6914 :
6915 : @param lsn log record serial number (address of the record)
6916 : @param buff log record header buffer
6917 :
6918 : @note Some type of record can be read completely by this call
6919 : @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
6920 : LSN can be translated to absolute one), some fields can be added (like
6921 : actual header length in the record if the header has variable length)
6922 :
6923 : @return Length of header or operation status
6924 : @retval RECHEADER_READ_ERROR error
6925 : @retval # number of bytes in
6926 : TRANSLOG_HEADER_BUFFER::header where
6927 : stored decoded part of the header
6928 : */
6929 :
6930 : int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
6931 2189956 : {
6932 : TRANSLOG_PAGE_SIZE_BUFF psize_buff;
6933 : uchar *page;
6934 2189956 : translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
6935 : PAGECACHE_BLOCK_LINK *direct_link;
6936 : TRANSLOG_ADDRESS addr;
6937 : TRANSLOG_VALIDATOR_DATA data;
6938 2189956 : DBUG_ENTER("translog_read_record_header");
6939 2189956 : DBUG_PRINT("enter", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
6940 2189956 : DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
6941 2189956 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6942 : translog_status == TRANSLOG_READONLY);
6943 :
6944 2189956 : buff->lsn= lsn;
6945 2189956 : buff->groups_no= 0;
6946 2189956 : data.addr= &addr;
6947 2189956 : data.was_recovered= 0;
6948 2189956 : addr= lsn;
6949 2189956 : addr-= page_offset; /* offset decreasing */
6950 2189956 : res= (!(page= translog_get_page(&data, psize_buff.buffer, &direct_link))) ?
6951 : RECHEADER_READ_ERROR :
6952 : translog_read_record_header_from_buffer(page, page_offset, buff, 0);
6953 2189956 : translog_free_link(direct_link);
6954 2189956 : DBUG_RETURN(res);
6955 : }
6956 :
6957 :
6958 : /**
6959 : @brief Read record header and some fixed part of a record (the part depend
6960 : on record type).
6961 :
6962 : @param scan scanner position to read
6963 : @param buff log record header buffer
6964 : @param move_scanner request to move scanner to the header position
6965 :
6966 : @note Some type of record can be read completely by this call
6967 : @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
6968 : LSN can be translated to absolute one), some fields can be added (like
6969 : actual header length in the record if the header has variable length)
6970 :
6971 : @return Length of header or operation status
6972 : @retval RECHEADER_READ_ERROR error
6973 : @retval # number of bytes in
6974 : TRANSLOG_HEADER_BUFFER::header where stored
6975 : decoded part of the header
6976 : */
6977 :
6978 : int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
6979 : TRANSLOG_HEADER_BUFFER *buff,
6980 : my_bool move_scanner)
6981 8534632 : {
6982 : translog_size_t res;
6983 8534632 : DBUG_ENTER("translog_read_record_header_scan");
6984 8534632 : DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
6985 : "Lst: (%lu,0x%lx) Offset: %u(%x) fixed %d",
6986 : LSN_IN_PARTS(scanner->page_addr),
6987 : LSN_IN_PARTS(scanner->horizon),
6988 : LSN_IN_PARTS(scanner->last_file_page),
6989 : (uint) scanner->page_offset,
6990 : (uint) scanner->page_offset, scanner->fixed_horizon));
6991 8534632 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6992 : translog_status == TRANSLOG_READONLY);
6993 8534632 : buff->groups_no= 0;
6994 8534632 : buff->lsn= scanner->page_addr;
6995 8534632 : buff->lsn+= scanner->page_offset; /* offset increasing */
6996 8534632 : res= translog_read_record_header_from_buffer(scanner->page,
6997 : scanner->page_offset,
6998 : buff,
6999 : (move_scanner ?
7000 : scanner : 0));
7001 8534632 : DBUG_RETURN(res);
7002 : }
7003 :
7004 :
7005 : /**
7006 : @brief Read record header and some fixed part of the next record (the part
7007 : depend on record type).
7008 :
7009 : @param scanner data for scanning if lsn is NULL scanner data
7010 : will be used for continue scanning.
7011 : The scanner can be NULL.
7012 :
7013 : @param buff log record header buffer
7014 :
7015 : @return Length of header or operation status
7016 : @retval RECHEADER_READ_ERROR error
7017 : @retval RECHEADER_READ_EOF EOF
7018 : @retval # number of bytes in
7019 : TRANSLOG_HEADER_BUFFER::header where
7020 : stored decoded part of the header
7021 : */
7022 :
7023 : int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
7024 : TRANSLOG_HEADER_BUFFER *buff)
7025 5638523 : {
7026 : translog_size_t res;
7027 :
7028 5638523 : DBUG_ENTER("translog_read_next_record_header");
7029 5638523 : buff->groups_no= 0; /* to be sure that we will free it right */
7030 5638523 : DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
7031 5638523 : DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
7032 : "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d",
7033 : LSN_IN_PARTS(scanner->page_addr),
7034 : LSN_IN_PARTS(scanner->horizon),
7035 : LSN_IN_PARTS(scanner->last_file_page),
7036 : (uint) scanner->page_offset,
7037 : (uint) scanner->page_offset, scanner->fixed_horizon));
7038 5638523 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7039 : translog_status == TRANSLOG_READONLY);
7040 :
7041 : do
7042 : {
7043 5857170 : if (translog_get_next_chunk(scanner))
7044 0 : DBUG_RETURN(RECHEADER_READ_ERROR);
7045 5857170 : if (scanner->page == END_OF_LOG)
7046 : {
7047 310 : DBUG_PRINT("info", ("End of file from the scanner"));
7048 : /* Last record was read */
7049 310 : buff->lsn= LSN_IMPOSSIBLE;
7050 310 : DBUG_RETURN(RECHEADER_READ_EOF);
7051 : }
7052 5856860 : DBUG_PRINT("info", ("Page: (%lu,0x%lx) offset: %lu byte: %x",
7053 : LSN_IN_PARTS(scanner->page_addr),
7054 : (ulong) scanner->page_offset,
7055 : (uint) scanner->page[scanner->page_offset]));
7056 : } while (!translog_is_LSN_chunk(scanner->page[scanner->page_offset]) &&
7057 5856860 : scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
7058 :
7059 5638213 : if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
7060 : {
7061 0 : DBUG_PRINT("info", ("End of file"));
7062 : /* Last record was read */
7063 0 : buff->lsn= LSN_IMPOSSIBLE;
7064 : /* Return 'end of log' marker */
7065 0 : res= RECHEADER_READ_EOF;
7066 : }
7067 : else
7068 5638213 : res= translog_read_record_header_scan(scanner, buff, 0);
7069 5638213 : DBUG_RETURN(res);
7070 : }
7071 :
7072 :
7073 : /*
7074 : Moves record data reader to the next chunk and fill the data reader
7075 : information about that chunk.
7076 :
7077 : SYNOPSIS
7078 : translog_record_read_next_chunk()
7079 : data data cursor
7080 :
7081 : RETURN
7082 : 0 OK
7083 : 1 Error
7084 : */
7085 :
7086 : static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
7087 231598 : {
7088 231598 : translog_size_t new_current_offset= data->current_offset + data->chunk_size;
7089 : uint16 chunk_header_len, chunk_len;
7090 : uint8 type;
7091 231598 : DBUG_ENTER("translog_record_read_next_chunk");
7092 :
7093 231598 : if (data->eor)
7094 : {
7095 0 : DBUG_PRINT("info", ("end of the record flag set"));
7096 0 : DBUG_RETURN(1);
7097 : }
7098 :
7099 233015 : if (data->header.groups_no &&
7100 : data->header.groups_no - 1 != data->current_group &&
7101 : data->header.groups[data->current_group].num == data->current_chunk)
7102 : {
7103 : /* Goto next group */
7104 1417 : data->current_group++;
7105 1417 : data->current_chunk= 0;
7106 1417 : DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
7107 1417 : translog_destroy_scanner(&data->scanner);
7108 1417 : translog_scanner_init(data->header.groups[data->current_group].addr,
7109 : 1, &data->scanner, 1);
7110 : }
7111 : else
7112 : {
7113 230181 : data->current_chunk++;
7114 230181 : if (translog_get_next_chunk(&data->scanner))
7115 0 : DBUG_RETURN(1);
7116 230181 : if (data->scanner.page == END_OF_LOG)
7117 : {
7118 : /*
7119 : Actually it should not happened, but we want to quit nicely in case
7120 : of a truncated log
7121 : */
7122 0 : DBUG_RETURN(1);
7123 : }
7124 : }
7125 231598 : type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
7126 :
7127 231598 : if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
7128 : {
7129 0 : DBUG_PRINT("info",
7130 : ("Last chunk: data len: %u offset: %u group: %u of %u",
7131 : data->header.chunk0_data_len, data->scanner.page_offset,
7132 : data->current_group, data->header.groups_no - 1));
7133 0 : DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
7134 0 : DBUG_ASSERT(data->header.lsn ==
7135 : data->scanner.page_addr + data->scanner.page_offset);
7136 0 : translog_destroy_scanner(&data->scanner);
7137 0 : translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
7138 0 : data->chunk_size= data->header.chunk0_data_len;
7139 0 : data->body_offset= data->scanner.page_offset;
7140 0 : data->current_offset= new_current_offset;
7141 0 : data->eor= 1;
7142 0 : DBUG_RETURN(0);
7143 : }
7144 :
7145 231598 : if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
7146 : {
7147 0 : data->eor= 1;
7148 0 : DBUG_RETURN(1); /* End of record */
7149 : }
7150 :
7151 231598 : chunk_header_len=
7152 : translog_get_chunk_header_length(data->scanner.page +
7153 : data->scanner.page_offset);
7154 231598 : chunk_len= translog_get_total_chunk_length(data->scanner.page,
7155 : data->scanner.page_offset);
7156 231598 : data->chunk_size= chunk_len - chunk_header_len;
7157 231598 : data->body_offset= data->scanner.page_offset + chunk_header_len;
7158 231598 : data->current_offset= new_current_offset;
7159 231598 : DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u "
7160 : "current_offset: %lu",
7161 : (uint) data->current_group,
7162 : (uint) data->current_chunk,
7163 : (uint) data->body_offset,
7164 : (uint) data->chunk_size, (ulong) data->current_offset));
7165 231598 : DBUG_RETURN(0);
7166 : }
7167 :
7168 :
7169 : /*
7170 : Initialize record reader data from LSN
7171 :
7172 : SYNOPSIS
7173 : translog_init_reader_data()
7174 : lsn reference to LSN we should start from
7175 : data reader data to initialize
7176 :
7177 : RETURN
7178 : 0 OK
7179 : 1 Error
7180 : */
7181 :
7182 : static my_bool translog_init_reader_data(LSN lsn,
7183 : TRANSLOG_READER_DATA *data)
7184 2896419 : {
7185 : int read_header;
7186 2896419 : DBUG_ENTER("translog_init_reader_data");
7187 2896419 : if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
7188 : ((read_header=
7189 : translog_read_record_header_scan(&data->scanner, &data->header, 1))
7190 : == RECHEADER_READ_ERROR))
7191 0 : DBUG_RETURN(1);
7192 2896419 : data->read_header= read_header;
7193 2896419 : data->body_offset= data->header.non_header_data_start_offset;
7194 2896419 : data->chunk_size= data->header.non_header_data_len;
7195 2896419 : data->current_offset= data->read_header;
7196 2896419 : data->current_group= 0;
7197 2896419 : data->current_chunk= 0;
7198 2896419 : data->eor= 0;
7199 2896419 : DBUG_PRINT("info", ("read_header: %u "
7200 : "body_offset: %u chunk_size: %u current_offset: %lu",
7201 : (uint) data->read_header,
7202 : (uint) data->body_offset,
7203 : (uint) data->chunk_size, (ulong) data->current_offset));
7204 2896419 : DBUG_RETURN(0);
7205 : }
7206 :
7207 :
7208 : /**
7209 : @brief Destroy reader data object
7210 : */
7211 :
7212 : static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
7213 2896419 : {
7214 2896419 : translog_destroy_scanner(&data->scanner);
7215 2896419 : translog_free_record_header(&data->header);
7216 : }
7217 :
7218 :
7219 : /*
7220 : Read a part of the record.
7221 :
7222 : SYNOPSIS
7223 : translog_read_record_header()
7224 : lsn log record serial number (address of the record)
7225 : offset From the beginning of the record beginning (read
7226 : by translog_read_record_header).
7227 : length Length of record part which have to be read.
7228 : buffer Buffer where to read the record part (have to be at
7229 : least 'length' bytes length)
7230 :
7231 : RETURN
7232 : length of data actually read
7233 : */
7234 :
7235 : translog_size_t translog_read_record(LSN lsn,
7236 : translog_size_t offset,
7237 : translog_size_t length,
7238 : uchar *buffer,
7239 : TRANSLOG_READER_DATA *data)
7240 2896419 : {
7241 2896419 : translog_size_t requested_length= length;
7242 2896419 : translog_size_t end= offset + length;
7243 : TRANSLOG_READER_DATA internal_data;
7244 2896419 : DBUG_ENTER("translog_read_record");
7245 2896419 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7246 : translog_status == TRANSLOG_READONLY);
7247 :
7248 2896419 : if (data == NULL)
7249 : {
7250 2896419 : DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
7251 2896419 : data= &internal_data;
7252 : }
7253 2896419 : if (lsn ||
7254 : (offset < data->current_offset &&
7255 : !(offset < data->read_header && offset + length < data->read_header)))
7256 : {
7257 2896419 : if (translog_init_reader_data(lsn, data))
7258 0 : DBUG_RETURN(0);
7259 : }
7260 2896419 : DBUG_PRINT("info", ("Offset: %lu length: %lu "
7261 : "Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
7262 : "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d",
7263 : (ulong) offset, (ulong) length,
7264 : LSN_IN_PARTS(data->scanner.page_addr),
7265 : LSN_IN_PARTS(data->scanner.horizon),
7266 : LSN_IN_PARTS(data->scanner.last_file_page),
7267 : (uint) data->scanner.page_offset,
7268 : (uint) data->scanner.page_offset,
7269 : data->scanner.fixed_horizon));
7270 2896419 : if (offset < data->read_header)
7271 : {
7272 2784971 : uint16 len= min(data->read_header, end) - offset;
7273 2784971 : DBUG_PRINT("info",
7274 : ("enter header offset: %lu length: %lu",
7275 : (ulong) offset, (ulong) length));
7276 2784971 : memcpy(buffer, data->header.header + offset, len);
7277 2784971 : length-= len;
7278 2784971 : if (length == 0)
7279 : {
7280 573526 : translog_destroy_reader_data(data);
7281 573526 : DBUG_RETURN(requested_length);
7282 : }
7283 2211445 : offset+= len;
7284 2211445 : buffer+= len;
7285 2211445 : DBUG_PRINT("info",
7286 : ("len: %u offset: %lu curr: %lu length: %lu",
7287 : len, (ulong) offset, (ulong) data->current_offset,
7288 : (ulong) length));
7289 : }
7290 : /* TODO: find first page which we should read by offset */
7291 :
7292 : /* read the record chunk by chunk */
7293 : for(;;)
7294 : {
7295 2554491 : uint page_end= data->current_offset + data->chunk_size;
7296 2554491 : DBUG_PRINT("info",
7297 : ("enter body offset: %lu curr: %lu "
7298 : "length: %lu page_end: %lu",
7299 : (ulong) offset, (ulong) data->current_offset, (ulong) length,
7300 : (ulong) page_end));
7301 2554491 : if (offset < page_end)
7302 : {
7303 2506696 : uint len= page_end - offset;
7304 2506696 : set_if_smaller(len, length); /* in case we read beyond record's end */
7305 2506696 : DBUG_ASSERT(offset >= data->current_offset);
7306 2506696 : memcpy(buffer,
7307 : data->scanner.page + data->body_offset +
7308 : (offset - data->current_offset), len);
7309 2506696 : length-= len;
7310 2506696 : if (length == 0)
7311 : {
7312 2322893 : translog_destroy_reader_data(data);
7313 2322893 : DBUG_RETURN(requested_length);
7314 : }
7315 183803 : offset+= len;
7316 183803 : buffer+= len;
7317 183803 : DBUG_PRINT("info",
7318 : ("len: %u offset: %lu curr: %lu length: %lu",
7319 : len, (ulong) offset, (ulong) data->current_offset,
7320 : (ulong) length));
7321 : }
7322 231598 : if (translog_record_read_next_chunk(data))
7323 : {
7324 0 : translog_destroy_reader_data(data);
7325 0 : DBUG_RETURN(requested_length - length);
7326 : }
7327 : }
7328 : }
7329 :
7330 :
7331 : /*
7332 : @brief Force skipping to the next buffer
7333 :
7334 : @todo Do not copy old page content if all page protections are switched off
7335 : (because we do not need calculate something or change old parts of the page)
7336 : */
7337 :
7338 : static void translog_force_current_buffer_to_finish()
7339 18538 : {
7340 : TRANSLOG_ADDRESS new_buff_beginning;
7341 18538 : uint16 old_buffer_no= log_descriptor.bc.buffer_no;
7342 18538 : uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7343 : struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
7344 18538 : new_buffer_no);
7345 18538 : struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
7346 18538 : uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
7347 18538 : uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
7348 : uint16 current_page_fill, write_counter, previous_offset;
7349 18538 : DBUG_ENTER("translog_force_current_buffer_to_finish");
7350 18538 : DBUG_PRINT("enter", ("Buffer #%u 0x%lx "
7351 : "Buffer addr: (%lu,0x%lx) "
7352 : "Page addr: (%lu,0x%lx) "
7353 : "size: %lu (%lu) Pg: %u left: %u in progress %u",
7354 : (uint) log_descriptor.bc.buffer_no,
7355 : (ulong) log_descriptor.bc.buffer,
7356 : LSN_IN_PARTS(log_descriptor.bc.buffer->offset),
7357 : (ulong) LSN_FILE_NO(log_descriptor.horizon),
7358 : (ulong) (LSN_OFFSET(log_descriptor.horizon) -
7359 : log_descriptor.bc.current_page_fill),
7360 : (ulong) log_descriptor.bc.buffer->size,
7361 : (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
7362 : buffer->buffer),
7363 : (uint) log_descriptor.bc.current_page_fill,
7364 : (uint) left,
7365 : (uint) log_descriptor.bc.buffer->
7366 : copy_to_buffer_in_progress));
7367 18538 : translog_lock_assert_owner();
7368 18538 : LINT_INIT(current_page_fill);
7369 18538 : new_buff_beginning= log_descriptor.bc.buffer->offset;
7370 18538 : new_buff_beginning+= log_descriptor.bc.buffer->size; /* increase offset */
7371 :
7372 18538 : DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
7373 18538 : DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
7374 : LSN_FILE_NO(log_descriptor.bc.buffer->offset));
7375 18538 : translog_check_cursor(&log_descriptor.bc);
7376 18538 : DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
7377 18538 : if (left)
7378 : {
7379 : /*
7380 : TODO: if 'left' is so small that can't hold any other record
7381 : then do not move the page
7382 : */
7383 18536 : DBUG_PRINT("info", ("left: %u", (uint) left));
7384 :
7385 : /* decrease offset */
7386 18536 : new_buff_beginning-= log_descriptor.bc.current_page_fill;
7387 18536 : current_page_fill= log_descriptor.bc.current_page_fill;
7388 :
7389 18536 : memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
7390 18536 : log_descriptor.bc.buffer->size+= left;
7391 18536 : DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx "
7392 : "Size: %lu",
7393 : (uint) log_descriptor.bc.buffer->buffer_no,
7394 : (ulong) log_descriptor.bc.buffer,
7395 : (ulong) log_descriptor.bc.buffer->size));
7396 18536 : DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
7397 : log_descriptor.bc.buffer_no);
7398 : }
7399 : else
7400 : {
7401 2 : log_descriptor.bc.current_page_fill= 0;
7402 : }
7403 :
7404 18538 : translog_buffer_lock(new_buffer);
7405 : #ifndef DBUG_OFF
7406 : {
7407 18538 : TRANSLOG_ADDRESS offset= new_buffer->offset;
7408 18538 : TRANSLOG_FILE *file= new_buffer->file;
7409 18538 : uint8 ver= new_buffer->ver;
7410 18538 : translog_lock_assert_owner();
7411 : #endif
7412 18538 : translog_wait_for_buffer_free(new_buffer);
7413 : #ifndef DBUG_OFF
7414 : /* We keep the handler locked so nobody can start this new buffer */
7415 18538 : DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
7416 : (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
7417 : }
7418 : #endif
7419 :
7420 18538 : write_counter= log_descriptor.bc.write_counter;
7421 18538 : previous_offset= log_descriptor.bc.previous_offset;
7422 18538 : translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
7423 : /* Fix buffer offset (which was incorrectly set to horizon) */
7424 18538 : log_descriptor.bc.buffer->offset= new_buff_beginning;
7425 18538 : log_descriptor.bc.write_counter= write_counter;
7426 18538 : log_descriptor.bc.previous_offset= previous_offset;
7427 18538 : new_buffer->prev_last_lsn= BUFFER_MAX_LSN(old_buffer);
7428 18538 : DBUG_PRINT("info", ("prev_last_lsn set to (%lu,0x%lx) buffer: 0x%lx",
7429 : LSN_IN_PARTS(new_buffer->prev_last_lsn),
7430 : (ulong) new_buffer));
7431 :
7432 : /*
7433 : Advances this log pointer, increases writers and let other threads to
7434 : write to the log while we process old page content
7435 : */
7436 18538 : if (left)
7437 : {
7438 18536 : log_descriptor.bc.ptr+= current_page_fill;
7439 18536 : log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
7440 : current_page_fill;
7441 18536 : new_buffer->overlay= 1;
7442 : }
7443 : else
7444 2 : translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
7445 18538 : translog_buffer_increase_writers(new_buffer);
7446 18538 : translog_buffer_unlock(new_buffer);
7447 :
7448 : /*
7449 : We have to wait until all writers finish before start changing the
7450 : pages by applying protection and copying the page content in the
7451 : new buffer.
7452 : */
7453 : #ifndef DBUG_OFF
7454 : {
7455 18538 : TRANSLOG_ADDRESS offset= old_buffer->offset;
7456 18538 : TRANSLOG_FILE *file= old_buffer->file;
7457 18538 : uint8 ver= old_buffer->ver;
7458 : #endif
7459 : /*
7460 : Now only one thread can flush log (buffer can flush many threads but
7461 : log flush log flush where this function is used can do only one thread)
7462 : so no other thread can set is_closing_buffer.
7463 : */
7464 18538 : DBUG_ASSERT(!old_buffer->is_closing_buffer);
7465 18538 : old_buffer->is_closing_buffer= 1; /* Other flushes will wait */
7466 18538 : DBUG_PRINT("enter", ("Buffer #%u 0x%lx is_closing_buffer set",
7467 : (uint) old_buffer->buffer_no, (ulong) old_buffer));
7468 18538 : translog_wait_for_writers(old_buffer);
7469 : #ifndef DBUG_OFF
7470 : /* We blocked flushing this buffer so the buffer should not changed */
7471 18538 : DBUG_ASSERT(offset == old_buffer->offset && file == old_buffer->file &&
7472 : ver == old_buffer->ver);
7473 : }
7474 : #endif
7475 :
7476 18538 : if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
7477 : {
7478 1599 : translog_put_sector_protection(data, &log_descriptor.bc);
7479 1599 : if (left)
7480 : {
7481 1599 : log_descriptor.bc.write_counter++;
7482 1599 : log_descriptor.bc.previous_offset= current_page_fill;
7483 : }
7484 : else
7485 : {
7486 0 : DBUG_PRINT("info", ("drop write_counter"));
7487 0 : log_descriptor.bc.write_counter= 0;
7488 0 : log_descriptor.bc.previous_offset= 0;
7489 : }
7490 : }
7491 :
7492 18538 : if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
7493 : {
7494 : uint32 crc= translog_crc(data + log_descriptor.page_overhead,
7495 : TRANSLOG_PAGE_SIZE -
7496 1599 : log_descriptor.page_overhead);
7497 1599 : DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
7498 1599 : int4store(data + 3 + 3 + 1, crc);
7499 : }
7500 18538 : old_buffer->is_closing_buffer= 0;
7501 18538 : DBUG_PRINT("enter", ("Buffer #%u 0x%lx is_closing_buffer cleared",
7502 : (uint) old_buffer->buffer_no, (ulong) old_buffer));
7503 18538 : pthread_cond_broadcast(&old_buffer->waiting_filling_buffer);
7504 :
7505 18538 : if (left)
7506 : {
7507 : /*
7508 : TODO: do not copy beginning of the page if we have no CRC or sector
7509 : checks on
7510 : */
7511 18536 : memcpy(new_buffer->buffer, data, current_page_fill);
7512 : }
7513 18538 : old_buffer->next_buffer_offset= new_buffer->offset;
7514 18538 : translog_buffer_lock(new_buffer);
7515 18538 : new_buffer->prev_buffer_offset= old_buffer->offset;
7516 18538 : translog_buffer_decrease_writers(new_buffer);
7517 18538 : translog_buffer_unlock(new_buffer);
7518 :
7519 18538 : DBUG_VOID_RETURN;
7520 : }
7521 :
7522 :
7523 : /**
7524 : @brief Waits while given lsn will be flushed
7525 :
7526 : @param lsn log record serial number up to which (inclusive)
7527 : the log has to be flushed
7528 : */
7529 :
7530 : void translog_flush_wait_for_end(LSN lsn)
7531 0 : {
7532 0 : DBUG_ENTER("translog_flush_wait_for_end");
7533 0 : DBUG_PRINT("enter", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
7534 0 : safe_mutex_assert_owner(&log_descriptor.log_flush_lock);
7535 0 : while (cmp_translog_addr(log_descriptor.flushed, lsn) < 0)
7536 0 : pthread_cond_wait(&log_descriptor.log_flush_cond,
7537 : &log_descriptor.log_flush_lock);
7538 0 : DBUG_VOID_RETURN;
7539 : }
7540 :
7541 :
7542 : /**
7543 : @brief Sets goal for the next flush pass and waits for this pass end.
7544 :
7545 : @param lsn log record serial number up to which (inclusive)
7546 : the log has to be flushed
7547 : */
7548 :
7549 : void translog_flush_set_new_goal_and_wait(TRANSLOG_ADDRESS lsn)
7550 0 : {
7551 0 : DBUG_ENTER("translog_flush_set_new_goal_and_wait");
7552 0 : DBUG_PRINT("enter", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
7553 0 : safe_mutex_assert_owner(&log_descriptor.log_flush_lock);
7554 0 : if (cmp_translog_addr(lsn, log_descriptor.next_pass_max_lsn) > 0)
7555 : {
7556 0 : log_descriptor.next_pass_max_lsn= lsn;
7557 0 : log_descriptor.max_lsn_requester= pthread_self();
7558 : }
7559 0 : while (log_descriptor.flush_in_progress)
7560 : {
7561 0 : pthread_cond_wait(&log_descriptor.log_flush_cond,
7562 : &log_descriptor.log_flush_lock);
7563 : }
7564 0 : DBUG_VOID_RETURN;
7565 : }
7566 :
7567 :
7568 : /**
7569 : @brief Flush the log up to given LSN (included)
7570 :
7571 : @param lsn log record serial number up to which (inclusive)
7572 : the log has to be flushed
7573 :
7574 : @return Operation status
7575 : @retval 0 OK
7576 : @retval 1 Error
7577 :
7578 : */
7579 :
7580 : my_bool translog_flush(TRANSLOG_ADDRESS lsn)
7581 148119 : {
7582 148119 : LSN sent_to_disk= LSN_IMPOSSIBLE;
7583 : TRANSLOG_ADDRESS flush_horizon;
7584 : uint fn, i;
7585 : dirty_buffer_mask_t dirty_buffer_mask;
7586 : uint8 last_buffer_no, start_buffer_no;
7587 148119 : my_bool rc= 0;
7588 148119 : DBUG_ENTER("translog_flush");
7589 148119 : DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
7590 148119 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7591 : translog_status == TRANSLOG_READONLY);
7592 148119 : LINT_INIT(sent_to_disk);
7593 :
7594 148119 : pthread_mutex_lock(&log_descriptor.log_flush_lock);
7595 148119 : DBUG_PRINT("info", ("Everything is flushed up to (%lu,0x%lx)",
7596 : LSN_IN_PARTS(log_descriptor.flushed)));
7597 148119 : if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
7598 : {
7599 129162 : pthread_mutex_unlock(&log_descriptor.log_flush_lock);
7600 129162 : DBUG_RETURN(0);
7601 : }
7602 18957 : if (log_descriptor.flush_in_progress)
7603 : {
7604 0 : translog_flush_set_new_goal_and_wait(lsn);
7605 0 : if (!pthread_equal(log_descriptor.max_lsn_requester, pthread_self()))
7606 : {
7607 : /* fix lsn if it was horizon */
7608 0 : if (cmp_translog_addr(lsn, log_descriptor.bc.buffer->last_lsn) > 0)
7609 0 : lsn= BUFFER_MAX_LSN(log_descriptor.bc.buffer);
7610 0 : translog_flush_wait_for_end(lsn);
7611 0 : pthread_mutex_unlock(&log_descriptor.log_flush_lock);
7612 0 : DBUG_RETURN(0);
7613 : }
7614 0 : log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
7615 : }
7616 18957 : log_descriptor.flush_in_progress= 1;
7617 18957 : flush_horizon= log_descriptor.previous_flush_horizon;
7618 18957 : DBUG_PRINT("info", ("flush_in_progress is set"));
7619 18957 : pthread_mutex_unlock(&log_descriptor.log_flush_lock);
7620 :
7621 18957 : translog_lock();
7622 18957 : if (log_descriptor.is_everything_flushed)
7623 : {
7624 419 : DBUG_PRINT("info", ("everything is flushed"));
7625 419 : rc= (translog_status == TRANSLOG_READONLY);
7626 419 : translog_unlock();
7627 419 : goto out;
7628 : }
7629 :
7630 : /*
7631 : We will recheck information when will lock buffers one by
7632 : one so we can use unprotected read here (this is just for
7633 : speed up buffers processing)
7634 : */
7635 18538 : dirty_buffer_mask= log_descriptor.dirty_buffer_mask;
7636 18538 : DBUG_PRINT("info", ("Dirty buffer mask: %lx current buffer: %u",
7637 : (ulong) dirty_buffer_mask,
7638 : (uint) log_descriptor.bc.buffer_no));
7639 18538 : for (i= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7640 166842 : i != log_descriptor.bc.buffer_no && !(dirty_buffer_mask & (1 << i));
7641 129766 : i= (i + 1) % TRANSLOG_BUFFERS_NO) {}
7642 18538 : start_buffer_no= i;
7643 :
7644 18538 : DBUG_PRINT("info",
7645 : ("start from: %u current: %u prev last lsn: (%lu,0x%lx)",
7646 : (uint) start_buffer_no, (uint) log_descriptor.bc.buffer_no,
7647 : LSN_IN_PARTS(log_descriptor.bc.buffer->prev_last_lsn)));
7648 :
7649 :
7650 : /*
7651 : if LSN up to which we have to flush bigger then maximum LSN of previous
7652 : buffer and at least one LSN was saved in the current buffer (last_lsn !=
7653 : LSN_IMPOSSIBLE) then we better finish the current buffer.
7654 : */
7655 37076 : if (cmp_translog_addr(lsn, log_descriptor.bc.buffer->prev_last_lsn) > 0 &&
7656 : log_descriptor.bc.buffer->last_lsn != LSN_IMPOSSIBLE)
7657 : {
7658 18538 : struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
7659 18538 : lsn= log_descriptor.bc.buffer->last_lsn; /* fix lsn if it was horizon */
7660 18538 : DBUG_PRINT("info", ("LSN to flush fixed to last lsn: (%lu,0x%lx)",
7661 : LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn)));
7662 18538 : last_buffer_no= log_descriptor.bc.buffer_no;
7663 18538 : log_descriptor.is_everything_flushed= 1;
7664 18538 : translog_force_current_buffer_to_finish();
7665 18538 : translog_buffer_unlock(buffer);
7666 : }
7667 : else
7668 : {
7669 0 : last_buffer_no= ((log_descriptor.bc.buffer_no + TRANSLOG_BUFFERS_NO -1) %
7670 : TRANSLOG_BUFFERS_NO);
7671 0 : translog_unlock();
7672 : }
7673 18538 : sent_to_disk= translog_get_sent_to_disk();
7674 18538 : if (cmp_translog_addr(lsn, sent_to_disk) > 0)
7675 : {
7676 :
7677 18538 : DBUG_PRINT("info", ("Start buffer #: %u last buffer #: %u",
7678 : (uint) start_buffer_no, (uint) last_buffer_no));
7679 18538 : last_buffer_no= (last_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7680 18538 : i= start_buffer_no;
7681 : do
7682 : {
7683 18538 : struct st_translog_buffer *buffer= log_descriptor.buffers + i;
7684 18538 : translog_buffer_lock(buffer);
7685 18538 : DBUG_PRINT("info", ("Check buffer: 0x%lx #: %u "
7686 : "prev last LSN: (%lu,0x%lx) "
7687 : "last LSN: (%lu,0x%lx) status: %s",
7688 : (ulong)(buffer),
7689 : (uint) i,
7690 : LSN_IN_PARTS(buffer->prev_last_lsn),
7691 : LSN_IN_PARTS(buffer->last_lsn),
7692 : (buffer->file ?
7693 : "dirty" : "closed")));
7694 18538 : if (buffer->prev_last_lsn <= lsn &&
7695 : buffer->file != NULL)
7696 : {
7697 18538 : DBUG_ASSERT(flush_horizon <= buffer->offset + buffer->size);
7698 18538 : flush_horizon= buffer->offset + buffer->size;
7699 18538 : translog_buffer_flush(buffer);
7700 : }
7701 18538 : translog_buffer_unlock(buffer);
7702 18538 : i= (i + 1) % TRANSLOG_BUFFERS_NO;
7703 18538 : } while (i != last_buffer_no);
7704 18538 : sent_to_disk= translog_get_sent_to_disk();
7705 : }
7706 :
7707 : /* sync files from previous flush till current one */
7708 37095 : for (fn= LSN_FILE_NO(log_descriptor.flushed); fn <= LSN_FILE_NO(lsn); fn++)
7709 : {
7710 18557 : TRANSLOG_FILE *file= get_logfile_by_number(fn);
7711 18557 : DBUG_ASSERT(file != NULL);
7712 18557 : if (!file->is_sync)
7713 : {
7714 18545 : if (my_sync(file->handler.file, MYF(MY_WME)))
7715 : {
7716 0 : rc= 1;
7717 0 : translog_stop_writing();
7718 0 : sent_to_disk= LSN_IMPOSSIBLE;
7719 0 : goto out;
7720 : }
7721 18545 : file->is_sync= 1;
7722 : }
7723 : }
7724 :
7725 18538 : if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
7726 : (LSN_FILE_NO(log_descriptor.previous_flush_horizon) !=
7727 : LSN_FILE_NO(flush_horizon) ||
7728 : ((LSN_OFFSET(log_descriptor.previous_flush_horizon) - 1) /
7729 : TRANSLOG_PAGE_SIZE) !=
7730 : ((LSN_OFFSET(flush_horizon) - 1) / TRANSLOG_PAGE_SIZE)))
7731 0 : rc|= sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
7732 18538 : log_descriptor.previous_flush_horizon= flush_horizon;
7733 18957 : out:
7734 18957 : pthread_mutex_lock(&log_descriptor.log_flush_lock);
7735 18957 : if (sent_to_disk != LSN_IMPOSSIBLE)
7736 18538 : log_descriptor.flushed= sent_to_disk;
7737 18957 : log_descriptor.flush_in_progress= 0;
7738 18957 : DBUG_PRINT("info", ("flush_in_progress is dropped"));
7739 18957 : pthread_mutex_unlock(&log_descriptor.log_flush_lock);\
7740 : pthread_cond_broadcast(&log_descriptor.log_flush_cond);
7741 18957 : DBUG_RETURN(rc);
7742 : }
7743 :
7744 :
7745 : /**
7746 : @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact
7747 :
7748 : If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
7749 : open MARIA_SHAREs), give it one and record this assignment in the log
7750 : (LOGREC_FILE_ID log record).
7751 :
7752 : @param tbl_info table
7753 : @param trn calling transaction
7754 :
7755 : @return Operation status
7756 : @retval 0 OK
7757 : @retval 1 Error
7758 :
7759 : @note Can be called even if share already has an id (then will do nothing)
7760 : */
7761 :
7762 : int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
7763 392 : {
7764 392 : MARIA_SHARE *share= tbl_info->s;
7765 : /*
7766 : If you give an id to a non-BLOCK_RECORD table, you also need to release
7767 : this id somewhere. Then you can change the assertion.
7768 : */
7769 392 : DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
7770 : /* re-check under mutex to avoid having 2 ids for the same share */
7771 392 : pthread_mutex_lock(&share->intern_lock);
7772 392 : if (unlikely(share->id == 0))
7773 : {
7774 : LSN lsn;
7775 : LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
7776 : uchar log_data[FILEID_STORE_SIZE];
7777 : /* Inspired by set_short_trid() of trnman.c */
7778 392 : uint i= share->kfile.file % SHARE_ID_MAX + 1;
7779 : do
7780 : {
7781 : my_atomic_rwlock_wrlock(&LOCK_id_to_share);
7782 0 : for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
7783 : {
7784 392 : void *tmp= NULL;
7785 392 : if (id_to_share[i] == NULL &&
7786 : my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
7787 : {
7788 392 : share->id= (uint16)i;
7789 392 : break;
7790 : }
7791 : }
7792 : my_atomic_rwlock_wrunlock(&LOCK_id_to_share);
7793 392 : i= 1; /* scan the whole array */
7794 392 : } while (share->id == 0);
7795 392 : DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, share->id));
7796 392 : log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
7797 392 : log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
7798 : /*
7799 : open_file_name is an unresolved name (symlinks are not resolved, datadir
7800 : is not realpath-ed, etc) which is good: the log can be moved to another
7801 : directory and continue working.
7802 : */
7803 392 : log_array[TRANSLOG_INTERNAL_PARTS + 1].str=
7804 : (uchar *)share->open_file_name.str;
7805 392 : log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
7806 : share->open_file_name.length + 1;
7807 : /*
7808 : We can't unlock share->intern_lock before the log entry is written to
7809 : ensure no one uses the id before it's logged.
7810 : */
7811 392 : if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
7812 : (translog_size_t)
7813 : (sizeof(log_data) +
7814 : log_array[TRANSLOG_INTERNAL_PARTS +
7815 : 1].length),
7816 : sizeof(log_array)/sizeof(log_array[0]),
7817 : log_array, log_data, NULL)))
7818 : {
7819 0 : pthread_mutex_unlock(&share->intern_lock);
7820 0 : return 1;
7821 : }
7822 : }
7823 392 : pthread_mutex_unlock(&share->intern_lock);
7824 392 : return 0;
7825 : }
7826 :
7827 :
7828 : /**
7829 : @brief Recycles a MARIA_SHARE's short id.
7830 :
7831 : @param share table
7832 :
7833 : @note Must be called only if share has an id (i.e. id != 0)
7834 : */
7835 :
7836 : void translog_deassign_id_from_share(MARIA_SHARE *share)
7837 600 : {
7838 600 : DBUG_PRINT("info", ("id_to_share: 0x%lx id %u -> 0",
7839 : (ulong)share, share->id));
7840 : /*
7841 : We don't need any mutex as we are called only when closing the last
7842 : instance of the table or at the end of REPAIR: no writes can be
7843 : happening. But a Checkpoint may be reading share->id, so we require this
7844 : mutex:
7845 : */
7846 600 : safe_mutex_assert_owner(&share->intern_lock);
7847 : my_atomic_rwlock_rdlock(&LOCK_id_to_share);
7848 600 : my_atomic_storeptr((void **)&id_to_share[share->id], 0);
7849 : my_atomic_rwlock_rdunlock(&LOCK_id_to_share);
7850 600 : share->id= 0;
7851 : /* useless but safety: */
7852 600 : share->lsn_of_file_id= LSN_IMPOSSIBLE;
7853 : }
7854 :
7855 :
7856 : void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
7857 : uint16 id)
7858 307 : {
7859 307 : DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
7860 307 : DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
7861 307 : DBUG_ASSERT(share->id == 0);
7862 307 : DBUG_ASSERT(id_to_share[id] == NULL);
7863 307 : id_to_share[share->id= id]= share;
7864 : }
7865 :
7866 :
7867 : /**
7868 : @brief check if such log file exists
7869 :
7870 : @param file_no number of the file to test
7871 :
7872 : @retval 0 no such file
7873 : @retval 1 there is file with such number
7874 : */
7875 :
7876 : my_bool translog_is_file(uint file_no)
7877 321 : {
7878 : MY_STAT stat_buff;
7879 : char path[FN_REFLEN];
7880 321 : return (test(my_stat(translog_filename_by_fileno(file_no, path),
7881 : &stat_buff, MYF(0))));
7882 : }
7883 :
7884 :
7885 : /**
7886 : @brief returns minimum log file number
7887 :
7888 : @param horizon the end of the log
7889 : @param is_protected true if it is under purge_log protection
7890 :
7891 : @retval minimum file number
7892 : @retval 0 no files found
7893 : */
7894 :
7895 : static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
7896 749 : {
7897 749 : uint min_file= 0, max_file;
7898 749 : DBUG_ENTER("translog_first_file");
7899 749 : if (!is_protected)
7900 309 : pthread_mutex_lock(&log_descriptor.purger_lock);
7901 749 : if (log_descriptor.min_file_number &&
7902 : translog_is_file(log_descriptor.min_file_number))
7903 : {
7904 310 : DBUG_PRINT("info", ("cached %lu",
7905 : (ulong) log_descriptor.min_file_number));
7906 310 : if (!is_protected)
7907 308 : pthread_mutex_unlock(&log_descriptor.purger_lock);
7908 310 : DBUG_RETURN(log_descriptor.min_file_number);
7909 : }
7910 :
7911 439 : max_file= LSN_FILE_NO(horizon);
7912 :
7913 : /* binary search for last file */
7914 880 : while (min_file != max_file && min_file != (max_file - 1))
7915 : {
7916 2 : uint test= (min_file + max_file) / 2;
7917 2 : DBUG_PRINT("info", ("min_file: %u test: %u max_file: %u",
7918 : min_file, test, max_file));
7919 2 : if (test == max_file)
7920 0 : test--;
7921 2 : if (translog_is_file(test))
7922 1 : max_file= test;
7923 : else
7924 1 : min_file= test;
7925 : }
7926 439 : log_descriptor.min_file_number= max_file;
7927 439 : if (!is_protected)
7928 1 : pthread_mutex_unlock(&log_descriptor.purger_lock);
7929 439 : DBUG_PRINT("info", ("first file :%lu", (ulong) max_file));
7930 439 : DBUG_ASSERT(max_file >= 1);
7931 439 : DBUG_RETURN(max_file);
7932 : }
7933 :
7934 :
7935 : /**
7936 : @brief returns the most close LSN higher the given chunk address
7937 :
7938 : @param addr the chunk address to start from
7939 : @param horizon the horizon if it is known or LSN_IMPOSSIBLE
7940 :
7941 : @retval LSN_ERROR Error
7942 : @retval LSN_IMPOSSIBLE no LSNs after the address
7943 : @retval # LSN of the most close LSN higher the given chunk address
7944 : */
7945 :
7946 : LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
7947 309 : {
7948 : TRANSLOG_SCANNER_DATA scanner;
7949 : LSN result;
7950 309 : DBUG_ENTER("translog_next_LSN");
7951 :
7952 309 : if (horizon == LSN_IMPOSSIBLE)
7953 0 : horizon= translog_get_horizon();
7954 :
7955 309 : if (addr == horizon)
7956 1 : DBUG_RETURN(LSN_IMPOSSIBLE);
7957 :
7958 308 : translog_scanner_init(addr, 0, &scanner, 1);
7959 : /*
7960 : addr can point not to a chunk beginning but page end so next
7961 : page beginning.
7962 : */
7963 308 : if (addr % TRANSLOG_PAGE_SIZE == 0)
7964 : {
7965 : /*
7966 : We are emulating the page end which cased such horizon value to
7967 : trigger translog_scanner_eop().
7968 :
7969 : We can't just increase addr on page header overhead because it
7970 : can be file end so we allow translog_get_next_chunk() to skip
7971 : to the next page in correct way
7972 : */
7973 0 : scanner.page_addr-= TRANSLOG_PAGE_SIZE;
7974 0 : scanner.page_offset= TRANSLOG_PAGE_SIZE;
7975 : #ifndef DBUG_OFF
7976 0 : scanner.page= NULL; /* prevent using incorrect page content */
7977 : #endif
7978 : }
7979 : /* addr can point not to a chunk beginning but to a page end */
7980 308 : if (translog_scanner_eop(&scanner))
7981 : {
7982 0 : if (translog_get_next_chunk(&scanner))
7983 : {
7984 0 : result= LSN_ERROR;
7985 0 : goto out;
7986 : }
7987 0 : if (scanner.page == END_OF_LOG)
7988 : {
7989 0 : result= LSN_IMPOSSIBLE;
7990 0 : goto out;
7991 : }
7992 : }
7993 :
7994 308 : while (!translog_is_LSN_chunk(scanner.page[scanner.page_offset]) &&
7995 : scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
7996 : {
7997 0 : if (translog_get_next_chunk(&scanner))
7998 : {
7999 0 : result= LSN_ERROR;
8000 0 : goto out;
8001 : }
8002 0 : if (scanner.page == END_OF_LOG)
8003 : {
8004 0 : result= LSN_IMPOSSIBLE;
8005 0 : goto out;
8006 : }
8007 : }
8008 :
8009 308 : if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
8010 0 : result= LSN_IMPOSSIBLE; /* reached page filler */
8011 : else
8012 308 : result= scanner.page_addr + scanner.page_offset;
8013 308 : out:
8014 308 : translog_destroy_scanner(&scanner);
8015 308 : DBUG_RETURN(result);
8016 : }
8017 :
8018 :
8019 : /**
8020 : @brief returns the LSN of the first record starting in this log
8021 :
8022 : @retval LSN_ERROR Error
8023 : @retval LSN_IMPOSSIBLE no log or the log is empty
8024 : @retval # LSN of the first record
8025 : */
8026 :
8027 : LSN translog_first_lsn_in_log()
8028 309 : {
8029 309 : TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
8030 : TRANSLOG_VALIDATOR_DATA data;
8031 : uint file;
8032 : uint16 chunk_offset;
8033 : uchar *page;
8034 309 : DBUG_ENTER("translog_first_lsn_in_log");
8035 309 : DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(horizon)));
8036 309 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8037 : translog_status == TRANSLOG_READONLY);
8038 :
8039 309 : if (!(file= translog_first_file(horizon, 0)))
8040 : {
8041 : /* log has no records yet */
8042 0 : DBUG_RETURN(LSN_IMPOSSIBLE);
8043 : }
8044 :
8045 309 : addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8046 309 : data.addr= &addr;
8047 : {
8048 : TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8049 309 : if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
8050 : (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
8051 0 : DBUG_RETURN(LSN_ERROR);
8052 : }
8053 309 : addr+= chunk_offset;
8054 :
8055 309 : DBUG_RETURN(translog_next_LSN(addr, horizon));
8056 : }
8057 :
8058 :
8059 : /**
8060 : @brief Returns theoretical first LSN if first log is present
8061 :
8062 : @retval LSN_ERROR Error
8063 : @retval LSN_IMPOSSIBLE no log
8064 : @retval # LSN of the first record
8065 : */
8066 :
8067 : LSN translog_first_theoretical_lsn()
8068 2 : {
8069 2 : TRANSLOG_ADDRESS addr= translog_get_horizon();
8070 : TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8071 : uchar *page;
8072 : TRANSLOG_VALIDATOR_DATA data;
8073 2 : DBUG_ENTER("translog_first_theoretical_lsn");
8074 2 : DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
8075 2 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8076 : translog_status == TRANSLOG_READONLY);
8077 :
8078 2 : if (!translog_is_file(1))
8079 0 : DBUG_RETURN(LSN_IMPOSSIBLE);
8080 2 : if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
8081 : {
8082 : /* log has no records yet */
8083 0 : DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8084 : log_descriptor.page_overhead));
8085 : }
8086 :
8087 2 : addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8088 2 : data.addr= &addr;
8089 2 : if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
8090 0 : DBUG_RETURN(LSN_ERROR);
8091 :
8092 2 : DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8093 : page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
8094 : }
8095 :
8096 :
8097 : /**
8098 : @brief Checks given low water mark and purge files if it is need
8099 :
8100 : @param low the last (minimum) address which is need
8101 :
8102 : @retval 0 OK
8103 : @retval 1 Error
8104 : */
8105 :
8106 : my_bool translog_purge(TRANSLOG_ADDRESS low)
8107 84 : {
8108 84 : uint32 last_need_file= LSN_FILE_NO(low);
8109 84 : TRANSLOG_ADDRESS horizon= translog_get_horizon();
8110 84 : int rc= 0;
8111 84 : DBUG_ENTER("translog_purge");
8112 84 : DBUG_PRINT("enter", ("low: (%lu,0x%lx)", LSN_IN_PARTS(low)));
8113 84 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8114 : translog_status == TRANSLOG_READONLY);
8115 :
8116 84 : pthread_mutex_lock(&log_descriptor.purger_lock);
8117 84 : if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
8118 : {
8119 : uint32 i;
8120 84 : uint32 min_file= translog_first_file(horizon, 1);
8121 84 : DBUG_ASSERT(min_file != 0); /* log is already started */
8122 86 : for(i= min_file; i < last_need_file && rc == 0; i++)
8123 : {
8124 3 : LSN lsn= translog_get_file_max_lsn_stored(i);
8125 3 : if (lsn == LSN_IMPOSSIBLE)
8126 3 : break; /* files are still in writing */
8127 3 : if (lsn == LSN_ERROR)
8128 : {
8129 0 : rc= 1;
8130 0 : break;
8131 : }
8132 3 : if (cmp_translog_addr(lsn, low) >= 0)
8133 2 : break;
8134 :
8135 2 : DBUG_PRINT("info", ("purge file %lu", (ulong) i));
8136 :
8137 : /* remove file descriptor from the cache */
8138 : /*
8139 : log_descriptor.min_file can be changed only here during execution
8140 : and the function is serialized, so we can access it without problems
8141 : */
8142 2 : if (i >= log_descriptor.min_file)
8143 : {
8144 : TRANSLOG_FILE *file;
8145 2 : rw_wrlock(&log_descriptor.open_files_lock);
8146 2 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8147 : log_descriptor.open_files.elements);
8148 2 : DBUG_ASSERT(log_descriptor.min_file == i);
8149 2 : file= *((TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files));
8150 2 : DBUG_PRINT("info", ("Files : %d", log_descriptor.open_files.elements));
8151 2 : DBUG_ASSERT(i == file->number);
8152 2 : log_descriptor.min_file++;
8153 2 : DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8154 : log_descriptor.open_files.elements);
8155 2 : rw_unlock(&log_descriptor.open_files_lock);
8156 2 : translog_close_log_file(file);
8157 : }
8158 2 : if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE)
8159 : {
8160 : char path[FN_REFLEN], *file_name;
8161 2 : file_name= translog_filename_by_fileno(i, path);
8162 2 : rc= test(my_delete(file_name, MYF(MY_WME)));
8163 : }
8164 : }
8165 84 : if (unlikely(rc == 1))
8166 0 : log_descriptor.min_need_file= 0; /* impossible value */
8167 : else
8168 84 : log_descriptor.min_need_file= i;
8169 : }
8170 :
8171 84 : pthread_mutex_unlock(&log_descriptor.purger_lock);
8172 84 : DBUG_RETURN(rc);
8173 : }
8174 :
8175 :
8176 : /**
8177 : @brief Purges files by stored min need file in case of
8178 : "ondemend" purge type
8179 :
8180 : @note This function do real work only if it is "ondemend" purge type
8181 : and translog_purge() was called at least once and last time without
8182 : errors
8183 :
8184 : @retval 0 OK
8185 : @retval 1 Error
8186 : */
8187 :
8188 : my_bool translog_purge_at_flush()
8189 0 : {
8190 : uint32 i, min_file;
8191 0 : int rc= 0;
8192 0 : DBUG_ENTER("translog_purge_at_flush");
8193 0 : DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8194 : translog_status == TRANSLOG_READONLY);
8195 :
8196 0 : if (unlikely(translog_status == TRANSLOG_READONLY))
8197 : {
8198 0 : DBUG_PRINT("info", ("The log is read only => exit"));
8199 0 : DBUG_RETURN(0);
8200 : }
8201 :
8202 0 : if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
8203 : {
8204 0 : DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
8205 0 : DBUG_RETURN(0);
8206 : }
8207 :
8208 0 : pthread_mutex_lock(&log_descriptor.purger_lock);
8209 :
8210 0 : if (unlikely(log_descriptor.min_need_file == 0))
8211 : {
8212 0 : DBUG_PRINT("info", ("No info about min need file => exit"));
8213 0 : pthread_mutex_unlock(&log_descriptor.purger_lock);
8214 0 : DBUG_RETURN(0);
8215 : }
8216 :
8217 0 : min_file= translog_first_file(translog_get_horizon(), 1);
8218 0 : DBUG_ASSERT(min_file != 0); /* log is already started */
8219 0 : for(i= min_file; i < log_descriptor.min_need_file && rc == 0; i++)
8220 : {
8221 : char path[FN_REFLEN], *file_name;
8222 0 : DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
8223 0 : file_name= translog_filename_by_fileno(i, path);
8224 0 : rc= test(my_delete(file_name, MYF(MY_WME)));
8225 : }
8226 :
8227 0 : pthread_mutex_unlock(&log_descriptor.purger_lock);
8228 0 : DBUG_RETURN(rc);
8229 : }
8230 :
8231 :
8232 : /**
8233 : @brief Gets min file number
8234 :
8235 : @param horizon the end of the log
8236 :
8237 : @retval minimum file number
8238 : @retval 0 no files found
8239 : */
8240 :
8241 : uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
8242 0 : {
8243 0 : return translog_first_file(horizon, 0);
8244 : }
8245 :
8246 :
8247 : /**
8248 : @brief Gets min file number which is needed
8249 :
8250 : @retval minimum file number
8251 : @retval 0 unknown
8252 : */
8253 :
8254 : uint32 translog_get_first_needed_file()
8255 0 : {
8256 : uint32 file_no;
8257 0 : pthread_mutex_lock(&log_descriptor.purger_lock);
8258 0 : file_no= log_descriptor.min_need_file;
8259 0 : pthread_mutex_unlock(&log_descriptor.purger_lock);
8260 0 : return file_no;
8261 : }
8262 :
8263 :
8264 : /**
8265 : @brief Gets transaction log file size
8266 :
8267 : @return transaction log file size
8268 : */
8269 :
8270 : uint32 translog_get_file_size()
8271 0 : {
8272 : uint32 res;
8273 0 : translog_lock();
8274 0 : res= log_descriptor.log_file_max_size;
8275 0 : translog_unlock();
8276 0 : return (res);
8277 : }
8278 :
8279 :
8280 : /**
8281 : @brief Sets transaction log file size
8282 :
8283 : @return Returns actually set transaction log size
8284 : */
8285 :
8286 : void translog_set_file_size(uint32 size)
8287 0 : {
8288 0 : struct st_translog_buffer *old_buffer= NULL;
8289 0 : DBUG_ENTER("translog_set_file_size");
8290 0 : translog_lock();
8291 0 : DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
8292 0 : DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0 &&
8293 : size >= TRANSLOG_MIN_FILE_SIZE);
8294 0 : log_descriptor.log_file_max_size= size;
8295 : /* if current file longer then finish it*/
8296 0 : if (LSN_OFFSET(log_descriptor.horizon) >= log_descriptor.log_file_max_size)
8297 : {
8298 0 : old_buffer= log_descriptor.bc.buffer;
8299 0 : translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
8300 0 : translog_buffer_unlock(old_buffer);
8301 : }
8302 0 : translog_unlock();
8303 0 : if (old_buffer)
8304 : {
8305 0 : translog_buffer_lock(old_buffer);
8306 0 : translog_buffer_flush(old_buffer);
8307 0 : translog_buffer_unlock(old_buffer);
8308 : }
8309 0 : DBUG_VOID_RETURN;
8310 : }
8311 :
8312 :
8313 : /**
8314 : Write debug information to log if we EXTRA_DEBUG is enabled
8315 : */
8316 :
8317 : my_bool translog_log_debug_info(TRN *trn __attribute__((unused)),
8318 : enum translog_debug_info_type type
8319 : __attribute__((unused)),
8320 : uchar *info __attribute__((unused)),
8321 : size_t length __attribute__((unused)))
8322 0 : {
8323 : #ifdef EXTRA_DEBUG
8324 : LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
8325 : uchar debug_type;
8326 : LSN lsn;
8327 :
8328 0 : if (!trn)
8329 : {
8330 : /*
8331 : We can't log the current transaction because we don't have
8332 : an active transaction. Use a temporary transaction object instead
8333 : */
8334 0 : trn= &dummy_transaction_object;
8335 : }
8336 0 : debug_type= (uchar) type;
8337 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].str= &debug_type;
8338 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1;
8339 0 : log_array[TRANSLOG_INTERNAL_PARTS + 1].str= info;
8340 0 : log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
8341 0 : return translog_write_record(&lsn, LOGREC_DEBUG_INFO,
8342 : trn, NULL,
8343 : (translog_size_t) (1+ length),
8344 : sizeof(log_array)/sizeof(log_array[0]),
8345 : log_array, NULL, NULL);
8346 : #else
8347 : return 0;
8348 : #endif
8349 : }
8350 :
8351 :
8352 : #ifdef MARIA_DUMP_LOG
8353 : #include <my_getopt.h>
8354 : extern void translog_example_table_init();
8355 : static const char *load_default_groups[]= { "maria_dump_log",0 };
8356 : static void get_options(int *argc,char * * *argv);
8357 : #ifndef DBUG_OFF
8358 : #if defined(__WIN__)
8359 : const char *default_dbug_option= "d:t:i:O,\\maria_dump_log.trace";
8360 : #else
8361 : const char *default_dbug_option= "d:t:i:o,/tmp/maria_dump_log.trace";
8362 : #endif
8363 : #endif
8364 : static ulonglong opt_offset;
8365 : static ulong opt_pages;
8366 : static const char *opt_file= NULL;
8367 : static File handler= -1;
8368 : static my_bool opt_unit= 0;
8369 : static struct my_option my_long_options[] =
8370 : {
8371 : #ifdef IMPLTMENTED
8372 : {"body", 'b',
8373 : "Print chunk body dump",
8374 : (uchar **) &opt_body, (uchar **) &opt_body, 0,
8375 : GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
8376 : #endif
8377 : #ifndef DBUG_OFF
8378 : {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.",
8379 : 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
8380 : #endif
8381 : {"file", 'f', "Path to file which will be read",
8382 : (uchar**) &opt_file, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
8383 : {"help", '?', "Display this help and exit.",
8384 : 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
8385 : { "offset", 'o', "Start reading log from this offset",
8386 : (uchar**) &opt_offset, (uchar**) &opt_offset,
8387 : 0, GET_ULL, REQUIRED_ARG, 0, 0, ~(longlong) 0, 0, 0, 0 },
8388 : { "pages", 'n', "Number of pages to read",
8389 : (uchar**) &opt_pages, (uchar**) &opt_pages, 0,
8390 : GET_ULONG, REQUIRED_ARG, (long) ~(ulong) 0,
8391 : (long) 1, (long) ~(ulong) 0, (long) 0,
8392 : (long) 1, 0},
8393 : {"unit-test", 'U',
8394 : "Use unit test record table (for logs created by unittests",
8395 : (uchar **) &opt_unit, (uchar **) &opt_unit, 0,
8396 : GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
8397 : {"version", 'V', "Print version and exit.",
8398 : 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
8399 : { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
8400 : };
8401 :
8402 :
8403 : static void print_version(void)
8404 : {
8405 : VOID(printf("%s Ver 1.0 for %s on %s\n",
8406 : my_progname_short, SYSTEM_TYPE, MACHINE_TYPE));
8407 : NETWARE_SET_SCREEN_MODE(1);
8408 : }
8409 :
8410 :
8411 : static void usage(void)
8412 : {
8413 : print_version();
8414 : puts("Copyright (C) 2008 MySQL AB");
8415 : puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
8416 : puts("and you are welcome to modify and redistribute it under the GPL license\n");
8417 :
8418 : puts("Dump content of maria log pages.");
8419 : VOID(printf("\nUsage: %s -f file OPTIONS\n", my_progname_short));
8420 : my_print_help(my_long_options);
8421 : print_defaults("my", load_default_groups);
8422 : my_print_variables(my_long_options);
8423 : }
8424 :
8425 :
8426 : static my_bool
8427 : get_one_option(int optid __attribute__((unused)),
8428 : const struct my_option *opt __attribute__((unused)),
8429 : char *argument __attribute__((unused)))
8430 : {
8431 : switch (optid) {
8432 : case '?':
8433 : usage();
8434 : exit(0);
8435 : case 'V':
8436 : print_version();
8437 : exit(0);
8438 : #ifndef DBUG_OFF
8439 : case '#':
8440 : DBUG_SET_INITIAL(argument ? argument : default_dbug_option);
8441 : break;
8442 : #endif
8443 : }
8444 : return 0;
8445 : }
8446 :
8447 :
8448 : static void get_options(int *argc,char ***argv)
8449 : {
8450 : int ho_error;
8451 :
8452 : if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
8453 : exit(ho_error);
8454 :
8455 : if (opt_file == NULL)
8456 : {
8457 : usage();
8458 : exit(1);
8459 : }
8460 : }
8461 :
8462 :
8463 : /**
8464 : @brief Dump information about file header page.
8465 : */
8466 :
8467 : static void dump_header_page(uchar *buff)
8468 : {
8469 : LOGHANDLER_FILE_INFO desc;
8470 : char strbuff[21];
8471 : translog_interpret_file_header(&desc, buff);
8472 : printf(" This can be header page:\n"
8473 : " Timestamp: %s\n"
8474 : " Maria log version: %lu\n"
8475 : " Server version: %lu\n"
8476 : " Server id %lu\n"
8477 : " Page size %lu\n",
8478 : llstr(desc.timestamp, strbuff),
8479 : desc.maria_version,
8480 : desc.mysql_version,
8481 : desc.server_id,
8482 : desc.page_size);
8483 : if (desc.page_size != TRANSLOG_PAGE_SIZE)
8484 : printf(" WARNING: page size is not equal compiled in one %lu!!!\n",
8485 : (ulong) TRANSLOG_PAGE_SIZE);
8486 : printf(" File number %lu\n"
8487 : " Max lsn: (%lu,0x%lx)\n",
8488 : desc.file_number,
8489 : LSN_IN_PARTS(desc.max_lsn));
8490 : }
8491 :
8492 : static const char *record_class_string[]=
8493 : {
8494 : "LOGRECTYPE_NOT_ALLOWED",
8495 : "LOGRECTYPE_VARIABLE_LENGTH",
8496 : "LOGRECTYPE_PSEUDOFIXEDLENGTH",
8497 : "LOGRECTYPE_FIXEDLENGTH"
8498 : };
8499 :
8500 :
8501 : /**
8502 : @brief dump information about transaction log chunk
8503 :
8504 : @param buffer reference to the whole page
8505 : @param ptr pointer to the chunk
8506 :
8507 : @reval # reference to the next chunk
8508 : @retval NULL can't interpret data
8509 : */
8510 :
8511 : static uchar *dump_chunk(uchar *buffer, uchar *ptr)
8512 : {
8513 : uint length;
8514 : if (*ptr == TRANSLOG_FILLER)
8515 : {
8516 : printf(" Filler till the page end\n");
8517 : for (; ptr < buffer + TRANSLOG_PAGE_SIZE; ptr++)
8518 : {
8519 : if (*ptr != TRANSLOG_FILLER)
8520 : {
8521 : printf(" WARNING: non filler character met before page end "
8522 : "(page + 0x%04x: 0x%02x) (stop interpretation)!!!",
8523 : (uint) (ptr - buffer), (uint) ptr[0]);
8524 : return NULL;
8525 : }
8526 : }
8527 : return ptr;
8528 : }
8529 : if (*ptr == 0 || *ptr == 0xFF)
8530 : {
8531 : printf(" WARNING: chunk can't start from 0x0 "
8532 : "(stop interpretation)!!!\n");
8533 : return NULL;
8534 : }
8535 : switch (ptr[0] & TRANSLOG_CHUNK_TYPE) {
8536 : case TRANSLOG_CHUNK_LSN:
8537 : printf(" LSN chunk type 0 (variable length)\n");
8538 : if (likely((ptr[0] & TRANSLOG_REC_TYPE) != TRANSLOG_CHUNK_0_CONT))
8539 : {
8540 : printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
8541 : ptr[0] & TRANSLOG_REC_TYPE,
8542 : (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
8543 : log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
8544 : "NULL"),
8545 : record_class_string[log_record_type_descriptor[ptr[0] &
8546 : TRANSLOG_REC_TYPE].
8547 : rclass],
8548 : log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
8549 : compressed_LSN);
8550 : if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
8551 : LOGRECTYPE_VARIABLE_LENGTH)
8552 : {
8553 : printf(" WARNING: this record class here can't be used "
8554 : "(stop interpretation)!!!\n");
8555 : break;
8556 : }
8557 : }
8558 : else
8559 : printf(" Continuation of previous chunk 0 header \n");
8560 : printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
8561 : {
8562 : uchar *hdr_ptr= ptr + 1 + 2; /* chunk type and short trid */
8563 : uint16 chunk_len;
8564 : printf (" Record length: %lu\n",
8565 : (ulong) translog_variable_record_1group_decode_len(&hdr_ptr));
8566 : chunk_len= uint2korr(hdr_ptr);
8567 : if (chunk_len == 0)
8568 : printf (" It is 1 group record (chunk length == 0)\n");
8569 : else
8570 : {
8571 : uint16 groups, i;
8572 :
8573 : printf (" Chunk length %u\n", (uint) chunk_len);
8574 : groups= uint2korr(hdr_ptr + 2);
8575 : hdr_ptr+= 4;
8576 : printf (" Number of groups left to the end %u:\n", (uint) groups);
8577 : for(i= 0;
8578 : i < groups && hdr_ptr < buffer + TRANSLOG_PAGE_SIZE;
8579 : i++, hdr_ptr+= LSN_STORE_SIZE + 1)
8580 : {
8581 : TRANSLOG_ADDRESS gpr_addr= lsn_korr(hdr_ptr);
8582 : uint pages= hdr_ptr[LSN_STORE_SIZE];
8583 : printf (" Group +#%u: (%lu,0x%lx) pages: %u\n",
8584 : (uint) i, LSN_IN_PARTS(gpr_addr), pages);
8585 : }
8586 : }
8587 : }
8588 : break;
8589 : case TRANSLOG_CHUNK_FIXED:
8590 : printf(" LSN chunk type 1 (fixed size)\n");
8591 : printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
8592 : ptr[0] & TRANSLOG_REC_TYPE,
8593 : (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
8594 : log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
8595 : "NULL"),
8596 : record_class_string[log_record_type_descriptor[ptr[0] &
8597 : TRANSLOG_REC_TYPE].
8598 : rclass],
8599 : log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
8600 : compressed_LSN);
8601 : if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
8602 : LOGRECTYPE_PSEUDOFIXEDLENGTH &&
8603 : log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
8604 : LOGRECTYPE_FIXEDLENGTH)
8605 : {
8606 : printf(" WARNING: this record class here can't be used "
8607 : "(stop interpretation)!!!\n");
8608 : }
8609 : printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
8610 : break;
8611 : case TRANSLOG_CHUNK_NOHDR:
8612 : printf(" No header chunk type 2(till the end of the page)\n");
8613 : if (ptr[0] & TRANSLOG_REC_TYPE)
8614 : {
8615 : printf(" WARNING: chunk header content record type: 0x%02x "
8616 : "(dtop interpretation)!!!",
8617 : (uint) ptr[0]);
8618 : return NULL;
8619 : }
8620 : break;
8621 : case TRANSLOG_CHUNK_LNGTH:
8622 : printf(" Chunk with length type 3\n");
8623 : if (ptr[0] & TRANSLOG_REC_TYPE)
8624 : {
8625 : printf(" WARNING: chunk header content record type: 0x%02x "
8626 : "(dtop interpretation)!!!",
8627 : (uint) ptr[0]);
8628 : return NULL;
8629 : }
8630 : break;
8631 : }
8632 : {
8633 : intptr offset= ptr - buffer;
8634 : DBUG_ASSERT(offset >= 0 && offset <= UINT_MAX16);
8635 : length= translog_get_total_chunk_length(buffer, (uint16)offset);
8636 : }
8637 : printf(" Length %u\n", length);
8638 : ptr+= length;
8639 : return ptr;
8640 : }
8641 :
8642 :
8643 : /**
8644 : @brief Dump information about page with data.
8645 : */
8646 :
8647 : static void dump_datapage(uchar *buffer)
8648 : {
8649 : uchar *ptr;
8650 : ulong offset;
8651 : uint32 page, file;
8652 : uint header_len;
8653 : printf(" Page: %ld File number: %ld\n",
8654 : (ulong) (page= uint3korr(buffer)),
8655 : (ulong) (file= uint3korr(buffer + 3)));
8656 : if (page == 0)
8657 : printf(" WARNING: page == 0!!!\n");
8658 : if (file == 0)
8659 : printf(" WARNING: file == 0!!!\n");
8660 : offset= page * TRANSLOG_PAGE_SIZE;
8661 : printf(" Flags (0x%x):\n", (uint) buffer[TRANSLOG_PAGE_FLAGS]);
8662 : if (buffer[TRANSLOG_PAGE_FLAGS])
8663 : {
8664 : if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
8665 : printf(" Page CRC\n");
8666 : if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
8667 : printf(" Sector protection\n");
8668 : if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
8669 : printf(" Record CRC (WARNING: not yet implemented!!!)\n");
8670 : if (buffer[TRANSLOG_PAGE_FLAGS] & ~(TRANSLOG_PAGE_CRC |
8671 : TRANSLOG_SECTOR_PROTECTION |
8672 : TRANSLOG_RECORD_CRC))
8673 : {
8674 : printf(" WARNING: unknown flags (stop interpretation)!!!\n");
8675 : return;
8676 : }
8677 : }
8678 : else
8679 : printf(" No flags\n");
8680 : printf(" Page header length: %u\n",
8681 : (header_len= page_overhead[buffer[TRANSLOG_PAGE_FLAGS]]));
8682 : if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
8683 : {
8684 : uint32 crc= uint4korr(buffer + TRANSLOG_PAGE_FLAGS + 1);
8685 : uint32 ccrc;
8686 : printf (" Page CRC 0x%04lx\n", (ulong) crc);
8687 : ccrc= translog_crc(buffer + header_len, TRANSLOG_PAGE_SIZE - header_len);
8688 : if (crc != ccrc)
8689 : printf(" WARNING: calculated CRC: 0x%04lx!!!\n", (ulong) ccrc);
8690 : }
8691 : if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
8692 : {
8693 : TRANSLOG_FILE tfile;
8694 : {
8695 : uchar *table= buffer + header_len -
8696 : TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
8697 : uint i;
8698 : printf(" Sector protection current value: 0x%02x\n", (uint) table[0]);
8699 : for (i= 1; i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE; i++)
8700 : {
8701 : printf(" Sector protection in sector: 0x%02x saved value 0x%02x\n",
8702 : (uint)buffer[i * DISK_DRIVE_SECTOR_SIZE],
8703 : (uint)table[i]);
8704 : }
8705 : }
8706 : tfile.number= file;
8707 : tfile.handler.file= handler;
8708 : pagecache_file_init(tfile.handler, NULL, NULL, NULL, NULL, NULL);
8709 : tfile.was_recovered= 0;
8710 : tfile.is_sync= 1;
8711 : if (translog_check_sector_protection(buffer, &tfile))
8712 : printf(" WARNING: sector protection found problems!!!\n");
8713 : }
8714 : ptr= buffer + header_len;
8715 : while (ptr && ptr < buffer + TRANSLOG_PAGE_SIZE)
8716 : {
8717 : printf(" Chunk (%lu,0x%lx):\n",
8718 : (ulong)file, (ulong) offset + (ptr - buffer));
8719 : ptr= dump_chunk(buffer, ptr);
8720 : }
8721 : }
8722 :
8723 :
8724 : /**
8725 : @brief Dump information about page.
8726 : */
8727 :
8728 : static void dump_page(uchar *buffer)
8729 : {
8730 : printf("Page by offset %llu (0x%llx)\n", opt_offset, opt_offset);
8731 : if (strncmp((char*)maria_trans_file_magic, (char*)buffer,
8732 : sizeof(maria_trans_file_magic)) == 0)
8733 : {
8734 : dump_header_page(buffer);
8735 : }
8736 : dump_datapage(buffer);
8737 : }
8738 :
8739 :
8740 : /**
8741 : @brief maria_dump_log main function.
8742 : */
8743 :
8744 : int main(int argc, char **argv)
8745 : {
8746 : char **default_argv;
8747 : uchar buffer[TRANSLOG_PAGE_SIZE];
8748 : MY_INIT(argv[0]);
8749 :
8750 : load_defaults("my", load_default_groups, &argc, &argv);
8751 : default_argv= argv;
8752 : get_options(&argc, &argv);
8753 :
8754 : if (opt_unit)
8755 : translog_example_table_init();
8756 : else
8757 : translog_table_init();
8758 : translog_fill_overhead_table();
8759 :
8760 : maria_data_root= (char *)".";
8761 :
8762 : if ((handler= my_open(opt_file, O_RDONLY, MYF(MY_WME))) < 0)
8763 : {
8764 : fprintf(stderr, "Can't open file: '%s' errno: %d\n",
8765 : opt_file, my_errno);
8766 : goto err;
8767 : }
8768 : if (my_seek(handler, opt_offset, SEEK_SET, MYF(MY_WME)) !=
8769 : opt_offset)
8770 : {
8771 : fprintf(stderr, "Can't set position %lld file: '%s' errno: %d\n",
8772 : opt_offset, opt_file, my_errno);
8773 : goto err;
8774 : }
8775 : for (;
8776 : opt_pages;
8777 : opt_offset+= TRANSLOG_PAGE_SIZE, opt_pages--)
8778 : {
8779 : if (my_pread(handler, buffer, TRANSLOG_PAGE_SIZE, opt_offset,
8780 : MYF(MY_NABP)))
8781 : {
8782 : if (my_errno == HA_ERR_FILE_TOO_SHORT)
8783 : goto end;
8784 : fprintf(stderr, "Can't read page at position %lld file: '%s' "
8785 : "errno: %d\n", opt_offset, opt_file, my_errno);
8786 : goto err;
8787 : }
8788 : dump_page(buffer);
8789 : }
8790 :
8791 : end:
8792 : my_close(handler, MYF(0));
8793 : free_defaults(default_argv);
8794 : exit(0);
8795 : return 0; /* No compiler warning */
8796 :
8797 : err:
8798 : my_close(handler, MYF(0));
8799 : fprintf(stderr, "%s: FAILED\n", my_progname_short);
8800 : free_defaults(default_argv);
8801 : exit(1);
8802 : }
8803 : #endif
|