1 : /* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
2 :
3 : This program is free software; you can redistribute it and/or modify
4 : it under the terms of the GNU General Public License as published by
5 : the Free Software Foundation; version 2 of the License.
6 :
7 : This program is distributed in the hope that it will be useful,
8 : but WITHOUT ANY WARRANTY; without even the implied warranty of
9 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 : GNU General Public License for more details.
11 :
12 : You should have received a copy of the GNU General Public License
13 : along with this program; if not, write to the Free Software
14 : Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
15 :
16 :
17 : #include <my_global.h>
18 : #include <my_sys.h>
19 : #include <m_string.h>
20 : #include "trnman.h"
21 : #include "ma_checkpoint.h"
22 : #include "ma_control_file.h"
23 :
24 : /*
25 : status variables:
26 : how many trns in the active list currently,
27 : in the committed list currently, allocated since startup.
28 : */
29 : uint trnman_active_transactions, trnman_committed_transactions,
30 : trnman_allocated_transactions;
31 :
32 : /* list of active transactions in the trid order */
33 : static TRN active_list_min, active_list_max;
34 : /* list of committed transactions in the trid order */
35 : static TRN committed_list_min, committed_list_max;
36 :
37 : /* a counter, used to generate transaction ids */
38 : static TrID global_trid_generator;
39 :
40 : /*
41 : The minimum existing transaction id for trnman_get_min_trid()
42 : The default value is used when transaction manager not initialize;
43 : Probably called from maria_chk
44 : */
45 : static TrID trid_min_read_from= MAX_TRID;
46 :
47 : /* the mutex for everything above */
48 : static pthread_mutex_t LOCK_trn_list;
49 :
50 : /* LIFO pool of unused TRN structured for reuse */
51 : static TRN *pool;
52 :
53 : /* a hash for committed transactions that maps trid to a TRN structure */
54 : static LF_HASH trid_to_trn;
55 :
56 : /* an array that maps short_id of an active transaction to a TRN structure */
57 : static TRN **short_trid_to_active_trn;
58 :
59 : /* locks for short_trid_to_active_trn and pool */
60 : static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool;
61 : static my_bool default_trnman_end_trans_hook(TRN *, my_bool, my_bool);
62 : static void trnman_free_trn(TRN *);
63 :
64 : my_bool (*trnman_end_trans_hook)(TRN *, my_bool, my_bool)=
65 : default_trnman_end_trans_hook;
66 :
67 : /*
68 : Simple interface functions
69 : QQ: if they stay so simple, should we make them inline?
70 : */
71 :
72 : uint trnman_increment_locked_tables(TRN *trn)
73 0 : {
74 0 : return trn->locked_tables++;
75 : }
76 :
77 : uint trnman_has_locked_tables(TRN *trn)
78 0 : {
79 0 : return trn->locked_tables;
80 : }
81 :
82 : uint trnman_decrement_locked_tables(TRN *trn)
83 0 : {
84 0 : return --trn->locked_tables;
85 : }
86 :
87 : void trnman_reset_locked_tables(TRN *trn, uint locked_tables)
88 0 : {
89 0 : trn->locked_tables= locked_tables;
90 : }
91 :
92 : #ifdef EXTRA_DEBUG
93 : uint16 trnman_get_flags(TRN *trn)
94 0 : {
95 0 : return trn->flags;
96 : }
97 :
98 : void trnman_set_flags(TRN *trn, uint16 flags)
99 0 : {
100 0 : trn->flags= flags;
101 : }
102 : #endif
103 :
104 : /** Wake up threads waiting for this transaction */
105 : static void wt_thd_release_self(TRN *trn)
106 0 : {
107 0 : if (trn->wt)
108 : {
109 : WT_RESOURCE_ID rc;
110 0 : rc.type= &ma_rc_dup_unique;
111 0 : rc.value= (intptr)trn;
112 0 : wt_thd_release(trn->wt, & rc);
113 0 : trn->wt= 0;
114 : }
115 : }
116 :
117 : static my_bool
118 : default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)),
119 : my_bool commit __attribute__ ((unused)),
120 : my_bool active_transactions
121 : __attribute__ ((unused)))
122 0 : {
123 0 : return 0;
124 : }
125 :
126 :
127 : static uchar *trn_get_hash_key(const uchar *trn, size_t *len,
128 : my_bool unused __attribute__ ((unused)))
129 0 : {
130 0 : *len= sizeof(TrID);
131 0 : return (uchar *) & ((*((TRN **)trn))->trid);
132 : }
133 :
134 :
135 : /**
136 : @brief Initializes transaction manager.
137 :
138 : @param initial_trid Generated TrIDs will start from initial_trid+1.
139 :
140 : @return Operation status
141 : @retval 0 OK
142 : @retval !=0 Error
143 : */
144 :
145 : int trnman_init(TrID initial_trid)
146 5 : {
147 5 : DBUG_ENTER("trnman_init");
148 :
149 5 : short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*),
150 : MYF(MY_WME|MY_ZEROFILL));
151 5 : if (unlikely(!short_trid_to_active_trn))
152 0 : DBUG_RETURN(1);
153 5 : short_trid_to_active_trn--; /* min short_id is 1 */
154 :
155 : /*
156 : Initialize lists.
157 : active_list_max.min_read_from must be larger than any trid,
158 : so that when an active list is empty we would could free
159 : all committed list.
160 : And committed_list_max itself can not be freed so
161 : committed_list_max.commit_trid must not be smaller that
162 : active_list_max.min_read_from
163 : */
164 :
165 5 : active_list_max.trid= active_list_min.trid= 0;
166 5 : active_list_max.min_read_from= MAX_TRID;
167 5 : active_list_max.next= active_list_min.prev= 0;
168 5 : active_list_max.prev= &active_list_min;
169 5 : active_list_min.next= &active_list_max;
170 :
171 5 : committed_list_max.commit_trid= MAX_TRID;
172 5 : committed_list_max.next= committed_list_min.prev= 0;
173 5 : committed_list_max.prev= &committed_list_min;
174 5 : committed_list_min.next= &committed_list_max;
175 :
176 5 : trnman_active_transactions= 0;
177 5 : trnman_committed_transactions= 0;
178 5 : trnman_allocated_transactions= 0;
179 :
180 5 : pool= 0;
181 5 : global_trid_generator= initial_trid;
182 5 : trid_min_read_from= initial_trid;
183 5 : lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE,
184 : 0, 0, trn_get_hash_key, 0);
185 5 : DBUG_PRINT("info", ("pthread_mutex_init LOCK_trn_list"));
186 5 : pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST);
187 : my_atomic_rwlock_init(&LOCK_short_trid_to_trn);
188 : my_atomic_rwlock_init(&LOCK_pool);
189 :
190 5 : DBUG_RETURN(0);
191 : }
192 :
193 : /*
194 : NOTE
195 : this could only be called in the "idle" state - no transaction can be
196 : running. See asserts below.
197 : */
198 : void trnman_destroy()
199 116 : {
200 116 : DBUG_ENTER("trnman_destroy");
201 :
202 116 : if (short_trid_to_active_trn == NULL) /* trnman already destroyed */
203 112 : DBUG_VOID_RETURN;
204 4 : DBUG_ASSERT(trid_to_trn.count == 0);
205 4 : DBUG_ASSERT(trnman_active_transactions == 0);
206 4 : DBUG_ASSERT(trnman_committed_transactions == 0);
207 4 : DBUG_ASSERT(active_list_max.prev == &active_list_min);
208 4 : DBUG_ASSERT(active_list_min.next == &active_list_max);
209 4 : DBUG_ASSERT(committed_list_max.prev == &committed_list_min);
210 4 : DBUG_ASSERT(committed_list_min.next == &committed_list_max);
211 4 : while (pool)
212 : {
213 0 : TRN *trn= pool;
214 0 : pool= pool->next;
215 0 : DBUG_ASSERT(trn->wt == NULL);
216 0 : pthread_mutex_destroy(&trn->state_lock);
217 0 : my_free((void *)trn, MYF(0));
218 : }
219 4 : lf_hash_destroy(&trid_to_trn);
220 4 : DBUG_PRINT("info", ("pthread_mutex_destroy LOCK_trn_list"));
221 4 : pthread_mutex_destroy(&LOCK_trn_list);
222 : my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn);
223 : my_atomic_rwlock_destroy(&LOCK_pool);
224 4 : my_free((void *)(short_trid_to_active_trn+1), MYF(0));
225 4 : short_trid_to_active_trn= NULL;
226 :
227 4 : DBUG_VOID_RETURN;
228 : }
229 :
230 : /*
231 : NOTE
232 : TrID is limited to 6 bytes. Initial value of the generator
233 : is set by the recovery code - being read from the last checkpoint
234 : (or 1 on a first run).
235 : */
236 : static TrID new_trid()
237 0 : {
238 0 : DBUG_ENTER("new_trid");
239 0 : DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL);
240 0 : DBUG_PRINT("info", ("safe_mutex_assert_owner LOCK_trn_list"));
241 0 : safe_mutex_assert_owner(&LOCK_trn_list);
242 0 : DBUG_RETURN(++global_trid_generator);
243 : }
244 :
245 : static uint get_short_trid(TRN *trn)
246 0 : {
247 : int i= (int) ((global_trid_generator + (intptr)trn) * 312089 %
248 0 : SHORT_TRID_MAX) + 1;
249 0 : uint res=0;
250 :
251 0 : for ( ; !res ; i= 1)
252 : {
253 : my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn);
254 0 : for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */
255 : {
256 0 : void *tmp= NULL;
257 0 : if (short_trid_to_active_trn[i] == NULL &&
258 : my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
259 : {
260 0 : res= i;
261 0 : break;
262 : }
263 : }
264 : my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn);
265 : }
266 0 : return res;
267 : }
268 :
269 : /**
270 : Allocates and initialzies a new TRN object
271 :
272 : @note the 'wt' parameter can only be 0 in a single-threaded code (or,
273 : generally, where threads cannot block each other), otherwise the
274 : first call to the deadlock detector will sigsegv.
275 : */
276 :
277 : TRN *trnman_new_trn(WT_THD *wt)
278 0 : {
279 : int res;
280 : TRN *trn;
281 : union { TRN *trn; void *v; } tmp;
282 0 : DBUG_ENTER("trnman_new_trn");
283 :
284 : /*
285 : we have a mutex, to do simple things under it - allocate a TRN,
286 : increment trnman_active_transactions, set trn->min_read_from.
287 :
288 : Note that all the above is fast. generating short_id may be slow,
289 : as it involves scanning a large array - so it's done outside of the
290 : mutex.
291 : */
292 :
293 0 : DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
294 0 : pthread_mutex_lock(&LOCK_trn_list);
295 :
296 : /* Allocating a new TRN structure */
297 0 : tmp.trn= pool;
298 : /*
299 : Popping an unused TRN from the pool
300 : (ABA isn't possible, we're behind a mutex
301 : */
302 : my_atomic_rwlock_wrlock(&LOCK_pool);
303 0 : while (tmp.trn && !my_atomic_casptr((void **)&pool, &tmp.v,
304 : (void *)tmp.trn->next))
305 : /* no-op */;
306 : my_atomic_rwlock_wrunlock(&LOCK_pool);
307 :
308 : /* Nothing in the pool ? Allocate a new one */
309 0 : if (!(trn= tmp.trn))
310 : {
311 : /*
312 : trn should be completely initalized at create time to allow
313 : one to keep a known state on it.
314 : (Like redo_lns, which is assumed to be 0 at start of row handling
315 : and reset to zero before end of row handling)
316 : */
317 0 : trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME | MY_ZEROFILL));
318 0 : if (unlikely(!trn))
319 : {
320 0 : DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list"));
321 0 : pthread_mutex_unlock(&LOCK_trn_list);
322 0 : return 0;
323 : }
324 0 : trnman_allocated_transactions++;
325 0 : pthread_mutex_init(&trn->state_lock, MY_MUTEX_INIT_FAST);
326 : }
327 0 : trn->wt= wt;
328 0 : trn->pins= lf_hash_get_pins(&trid_to_trn);
329 0 : if (!trn->pins)
330 : {
331 0 : trnman_free_trn(trn);
332 0 : pthread_mutex_unlock(&LOCK_trn_list);
333 0 : return 0;
334 : }
335 :
336 0 : trnman_active_transactions++;
337 :
338 0 : trn->min_read_from= active_list_min.next->trid;
339 :
340 0 : trn->trid= new_trid();
341 :
342 0 : trn->next= &active_list_max;
343 0 : trn->prev= active_list_max.prev;
344 0 : active_list_max.prev= trn->prev->next= trn;
345 0 : trid_min_read_from= active_list_min.next->min_read_from;
346 0 : DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list"));
347 0 : pthread_mutex_unlock(&LOCK_trn_list);
348 :
349 0 : if (unlikely(!trn->min_read_from))
350 : {
351 : /*
352 : We are the only transaction. Set min_read_from so that we can read
353 : our own rows
354 : */
355 0 : trn->min_read_from= trn->trid + 1;
356 : }
357 :
358 : /* no other transaction can read changes done by this one */
359 0 : trn->commit_trid= MAX_TRID;
360 0 : trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0;
361 0 : trn->used_tables= 0;
362 :
363 0 : trn->locked_tables= 0;
364 :
365 : /*
366 : only after the following function TRN is considered initialized,
367 : so it must be done the last
368 : */
369 0 : pthread_mutex_lock(&trn->state_lock);
370 0 : trn->short_id= get_short_trid(trn);
371 0 : pthread_mutex_unlock(&trn->state_lock);
372 :
373 0 : res= lf_hash_insert(&trid_to_trn, trn->pins, &trn);
374 0 : DBUG_ASSERT(res <= 0);
375 0 : if (res)
376 : {
377 0 : trnman_end_trn(trn, 0);
378 0 : return 0;
379 : }
380 :
381 0 : DBUG_PRINT("exit", ("trn: 0x%lx trid: 0x%lu",
382 : (ulong) trn, (ulong) trn->trid));
383 :
384 0 : DBUG_RETURN(trn);
385 : }
386 :
387 : /*
388 : remove a trn from the active list.
389 : if necessary - move to committed list and set commit_trid
390 :
391 : NOTE
392 : Locks are released at the end. In particular, after placing the
393 : transaction in commit list, and after setting commit_trid. It's
394 : important, as commit_trid affects visibility. Locks don't affect
395 : anything they simply delay execution of other threads - they could be
396 : released arbitrarily late. In other words, when locks are released it
397 : serves as a start banner for other threads, they start to run. So
398 : everything they may need must be ready at that point.
399 :
400 : RETURN
401 : 0 ok
402 : 1 error
403 : */
404 : my_bool trnman_end_trn(TRN *trn, my_bool commit)
405 0 : {
406 0 : int res= 1;
407 0 : uint16 cached_short_id= trn->short_id; /* we have to cache it, see below */
408 0 : TRN *free_me= 0;
409 0 : LF_PINS *pins= trn->pins;
410 0 : DBUG_ENTER("trnman_end_trn");
411 0 : DBUG_PRINT("enter", ("trn=0x%lx commit=%d", (ulong) trn, commit));
412 :
413 : /* if a rollback, all UNDO records should have been executed */
414 0 : DBUG_ASSERT(commit || trn->undo_lsn == 0);
415 0 : DBUG_ASSERT(trn != &dummy_transaction_object);
416 0 : DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
417 :
418 0 : pthread_mutex_lock(&LOCK_trn_list);
419 :
420 : /* remove from active list */
421 0 : trn->next->prev= trn->prev;
422 0 : trn->prev->next= trn->next;
423 :
424 : /*
425 : if trn was the oldest active transaction, now that it goes away there
426 : may be committed transactions in the list which no active transaction
427 : needs to bother about - clean up the committed list
428 : */
429 0 : if (trn->prev == &active_list_min)
430 : {
431 : uint free_me_count;
432 : TRN *t;
433 0 : for (t= committed_list_min.next, free_me_count= 0;
434 0 : t->commit_trid < active_list_min.next->min_read_from;
435 0 : t= t->next, free_me_count++) /* no-op */;
436 :
437 0 : DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) ||
438 : (t == committed_list_min.next && free_me_count == 0));
439 : /* found transactions committed before the oldest active one */
440 0 : if (t != committed_list_min.next)
441 : {
442 0 : free_me= committed_list_min.next;
443 0 : committed_list_min.next= t;
444 0 : t->prev->next= 0;
445 0 : t->prev= &committed_list_min;
446 0 : trnman_committed_transactions-= free_me_count;
447 : }
448 : }
449 :
450 0 : pthread_mutex_lock(&trn->state_lock);
451 0 : if (commit)
452 0 : trn->commit_trid= global_trid_generator;
453 0 : wt_thd_release_self(trn);
454 0 : pthread_mutex_unlock(&trn->state_lock);
455 :
456 : /*
457 : if transaction is committed and it was not the only active transaction -
458 : add it to the committed list
459 : */
460 0 : if (commit && active_list_min.next != &active_list_max)
461 : {
462 0 : trn->next= &committed_list_max;
463 0 : trn->prev= committed_list_max.prev;
464 0 : trnman_committed_transactions++;
465 0 : committed_list_max.prev= trn->prev->next= trn;
466 : }
467 : else
468 : {
469 0 : trn->next= free_me;
470 0 : free_me= trn;
471 : }
472 0 : trid_min_read_from= active_list_min.next->min_read_from;
473 :
474 0 : if ((*trnman_end_trans_hook)(trn, commit,
475 : active_list_min.next != &active_list_max))
476 0 : res= -1;
477 0 : trnman_active_transactions--;
478 :
479 0 : DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list"));
480 0 : pthread_mutex_unlock(&LOCK_trn_list);
481 :
482 : /*
483 : the rest is done outside of a critical section
484 :
485 : note that we don't own trn anymore, it may be in a shared list now.
486 : Thus, we cannot dereference it, and must use cached_short_id below.
487 : */
488 : my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
489 0 : my_atomic_storeptr((void **)&short_trid_to_active_trn[cached_short_id], 0);
490 : my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
491 :
492 : /*
493 : we, under the mutex, removed going-in-free_me transactions from the
494 : active and committed lists, thus nobody else may see them when it scans
495 : those lists, and thus nobody may want to free them. Now we don't
496 : need a mutex to access free_me list
497 : */
498 : /* QQ: send them to the purge thread */
499 0 : while (free_me)
500 : {
501 0 : TRN *t= free_me;
502 0 : free_me= free_me->next;
503 :
504 : /* ignore OOM. it's harmless, and we can do nothing here anyway */
505 0 : (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID));
506 :
507 0 : trnman_free_trn(t);
508 : }
509 :
510 0 : lf_hash_put_pins(pins);
511 :
512 0 : DBUG_RETURN(res < 0);
513 : }
514 :
515 : /*
516 : free a trn (add to the pool, that is)
517 : note - we can never really free() a TRN if there's at least one other
518 : running transaction - see, e.g., how lock waits are implemented in
519 : lockman.c
520 : The same is true for other lock-free data structures too. We may need some
521 : kind of FLUSH command to reset them all - ensuring that no transactions are
522 : running. It may even be called automatically on checkpoints if no
523 : transactions are running.
524 : */
525 : static void trnman_free_trn(TRN *trn)
526 0 : {
527 : /*
528 : union is to solve strict aliasing issue.
529 : without it gcc 3.4.3 doesn't notice that updating *(void **)&tmp
530 : modifies the value of tmp.
531 : */
532 : union { TRN *trn; void *v; } tmp;
533 :
534 0 : pthread_mutex_lock(&trn->state_lock);
535 0 : trn->short_id= 0;
536 0 : pthread_mutex_unlock(&trn->state_lock);
537 :
538 0 : tmp.trn= pool;
539 :
540 : my_atomic_rwlock_wrlock(&LOCK_pool);
541 : do
542 : {
543 : /*
544 : without this volatile cast gcc-3.4.4 moves the assignment
545 : down after the loop at -O2
546 : */
547 0 : *(TRN * volatile *)&(trn->next)= tmp.trn;
548 0 : } while (!my_atomic_casptr((void **)&pool, &tmp.v, trn));
549 : my_atomic_rwlock_wrunlock(&LOCK_pool);
550 : }
551 :
552 : /*
553 : NOTE
554 : here we access the hash in a lock-free manner.
555 : It's safe, a 'found' TRN can never be freed/reused before we access it.
556 : In fact, it cannot be freed before 'trn' ends, because a 'found' TRN
557 : can only be removed from the hash when:
558 : found->commit_trid < ALL (trn->min_read_from)
559 : that is, at least
560 : found->commit_trid < trn->min_read_from
561 : but
562 : found->trid >= trn->min_read_from
563 : and
564 : found->commit_trid > found->trid
565 :
566 : RETURN
567 : 1 can
568 : 0 cannot
569 : -1 error (OOM)
570 : */
571 : int trnman_can_read_from(TRN *trn, TrID trid)
572 0 : {
573 : TRN **found;
574 : my_bool can;
575 : LF_REQUIRE_PINS(3);
576 :
577 0 : if (trid < trn->min_read_from)
578 0 : return 1; /* Row is visible by all transactions in the system */
579 :
580 0 : if (trid >= trn->trid)
581 : {
582 : /*
583 : We have now two cases
584 : trid > trn->trid, in which case the row is from a new transaction
585 : and not visible, in which case we should return 0.
586 : trid == trn->trid in which case the row is from the current transaction
587 : and we should return 1
588 : */
589 0 : return trid == trn->trid;
590 : }
591 :
592 0 : found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
593 0 : if (found == NULL)
594 0 : return 0; /* not in the hash of transactions = cannot read */
595 0 : if (found == MY_ERRPTR)
596 0 : return -1;
597 :
598 0 : can= (*found)->commit_trid < trn->trid;
599 0 : lf_hash_search_unpin(trn->pins);
600 0 : return can;
601 : }
602 :
603 : /**
604 : Finds a TRN by its TrID
605 :
606 : @param trn current trn. Needed for pinning pointers (see lf_pin)
607 : @param trid trid to search for
608 :
609 : @return found trn or 0
610 :
611 : @note that trn is returned with its state locked!
612 : */
613 : TRN *trnman_trid_to_trn(TRN *trn, TrID trid)
614 0 : {
615 : TRN **found;
616 : LF_REQUIRE_PINS(3);
617 :
618 0 : if (trid < trn->min_read_from)
619 0 : return 0; /* it's committed eons ago */
620 :
621 0 : found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
622 0 : if (found == NULL || found == MY_ERRPTR)
623 0 : return 0; /* no luck */
624 :
625 : /* we've found something */
626 0 : pthread_mutex_lock(&(*found)->state_lock);
627 :
628 0 : if ((*found)->short_id == 0)
629 : {
630 0 : pthread_mutex_unlock(&(*found)->state_lock);
631 0 : lf_hash_search_unpin(trn->pins);
632 0 : return 0; /* but it was a ghost */
633 : }
634 0 : lf_hash_search_unpin(trn->pins);
635 :
636 : /* Gotcha! */
637 0 : return *found;
638 : }
639 :
640 : /* TODO: the stubs below are waiting for savepoints to be implemented */
641 :
642 : void trnman_new_statement(TRN *trn __attribute__ ((unused)))
643 0 : {
644 : }
645 :
646 : void trnman_rollback_statement(TRN *trn __attribute__ ((unused)))
647 0 : {
648 : }
649 :
650 :
651 : /**
652 : @brief Allocates buffers and stores in them some info about transactions
653 :
654 : Does the allocation because the caller cannot know the size itself.
655 : Memory freeing is to be done by the caller (if the "str" member of the
656 : LEX_STRING is not NULL).
657 : The caller has the intention of doing checkpoints.
658 :
659 : @param[out] str_act pointer to where the allocated buffer,
660 : and its size, will be put; buffer will be filled
661 : with info about active transactions
662 : @param[out] str_com pointer to where the allocated buffer,
663 : and its size, will be put; buffer will be filled
664 : with info about committed transactions
665 : @param[out] min_first_undo_lsn pointer to where the minimum
666 : first_undo_lsn of all transactions will be put
667 :
668 : @return Operation status
669 : @retval 0 OK
670 : @retval 1 Error
671 : */
672 :
673 : my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
674 : LSN *min_rec_lsn, LSN *min_first_undo_lsn)
675 4 : {
676 : my_bool error;
677 : TRN *trn;
678 : char *ptr;
679 4 : uint stored_transactions= 0;
680 4 : LSN minimum_rec_lsn= LSN_MAX, minimum_first_undo_lsn= LSN_MAX;
681 4 : DBUG_ENTER("trnman_collect_transactions");
682 :
683 4 : DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str));
684 :
685 : /* validate the use of read_non_atomic() in general: */
686 : compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8));
687 4 : pthread_mutex_lock(&LOCK_trn_list);
688 4 : str_act->length= 2 + /* number of active transactions */
689 : LSN_STORE_SIZE + /* minimum of their rec_lsn */
690 : TRANSID_SIZE + /* current TrID generator value */
691 : (2 + /* short id */
692 : 6 + /* long id */
693 : LSN_STORE_SIZE + /* undo_lsn */
694 : #ifdef MARIA_VERSIONING /* not enabled yet */
695 : LSN_STORE_SIZE + /* undo_purge_lsn */
696 : #endif
697 : LSN_STORE_SIZE /* first_undo_lsn */
698 : ) * trnman_active_transactions;
699 4 : str_com->length= 4 + /* number of committed transactions */
700 : (6 + /* long id */
701 : #ifdef MARIA_VERSIONING /* not enabled yet */
702 : LSN_STORE_SIZE + /* undo_purge_lsn */
703 : #endif
704 : LSN_STORE_SIZE /* first_undo_lsn */
705 : ) * trnman_committed_transactions;
706 4 : if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) ||
707 : (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME)))))
708 : goto err;
709 : /* First, the active transactions */
710 4 : ptr= str_act->str + 2 + LSN_STORE_SIZE;
711 4 : transid_store(ptr, global_trid_generator);
712 4 : ptr+= TRANSID_SIZE;
713 4 : for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
714 : {
715 : /*
716 : trns with a short trid of 0 are not even initialized, we can ignore
717 : them. trns with undo_lsn==0 have done no writes, we can ignore them
718 : too. XID not needed now.
719 : */
720 : uint sid;
721 : LSN rec_lsn, undo_lsn, first_undo_lsn;
722 0 : pthread_mutex_lock(&trn->state_lock);
723 0 : sid= trn->short_id;
724 0 : pthread_mutex_unlock(&trn->state_lock);
725 0 : if (sid == 0)
726 : {
727 : /*
728 : Not even inited, has done nothing. Or it is the
729 : dummy_transaction_object, which does only non-transactional
730 : immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so
731 : can be forgotten for Checkpoint.
732 : */
733 0 : continue;
734 : }
735 : /* needed for low-water mark calculation */
736 0 : if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) &&
737 : (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0))
738 0 : minimum_rec_lsn= rec_lsn;
739 : /*
740 : trn may have logged REDOs but not yet UNDO, that's why we read rec_lsn
741 : before deciding to ignore if undo_lsn==0.
742 : */
743 0 : if ((undo_lsn= trn->undo_lsn) == 0) /* trn can be forgotten */
744 0 : continue;
745 0 : stored_transactions++;
746 0 : int2store(ptr, sid);
747 0 : ptr+= 2;
748 0 : int6store(ptr, trn->trid);
749 0 : ptr+= 6;
750 0 : lsn_store(ptr, undo_lsn); /* needed for rollback */
751 0 : ptr+= LSN_STORE_SIZE;
752 : /* needed for low-water mark calculation */
753 0 : if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) &&
754 : (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0))
755 0 : minimum_first_undo_lsn= first_undo_lsn;
756 0 : lsn_store(ptr, first_undo_lsn);
757 0 : ptr+= LSN_STORE_SIZE;
758 : #ifdef MARIA_VERSIONING /* not enabled yet */
759 : /* to know where purging should start (last delete of this trn) */
760 : lsn_store(ptr, trn->undo_purge_lsn);
761 : ptr+= LSN_STORE_SIZE;
762 : #endif
763 : /**
764 : @todo RECOVERY: add a comment explaining why we can dirtily read some
765 : vars, inspired by the text of "assumption 8" in WL#3072
766 : */
767 : }
768 4 : str_act->length= ptr - str_act->str; /* as we maybe over-estimated */
769 4 : ptr= str_act->str;
770 4 : DBUG_PRINT("info",("collected %u active transactions",
771 : (uint)stored_transactions));
772 4 : int2store(ptr, stored_transactions);
773 4 : ptr+= 2;
774 : /* this LSN influences how REDOs for any page can be ignored by Recovery */
775 4 : lsn_store(ptr, minimum_rec_lsn);
776 : /* one day there will also be a list of prepared transactions */
777 : /* do the same for committed ones */
778 4 : ptr= str_com->str;
779 4 : int4store(ptr, trnman_committed_transactions);
780 4 : ptr+= 4;
781 4 : DBUG_PRINT("info",("collected %u committed transactions",
782 : (uint)trnman_committed_transactions));
783 8 : for (trn= committed_list_min.next; trn != &committed_list_max;
784 0 : trn= trn->next)
785 : {
786 : LSN first_undo_lsn;
787 0 : int6store(ptr, trn->trid);
788 0 : ptr+= 6;
789 : #ifdef MARIA_VERSIONING /* not enabled yet */
790 : lsn_store(ptr, trn->undo_purge_lsn);
791 : ptr+= LSN_STORE_SIZE;
792 : #endif
793 0 : first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn);
794 0 : if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)
795 0 : minimum_first_undo_lsn= first_undo_lsn;
796 0 : lsn_store(ptr, first_undo_lsn);
797 0 : ptr+= LSN_STORE_SIZE;
798 : }
799 : /*
800 : TODO: if we see there exists no transaction (active and committed) we can
801 : tell the lock-free structures to do some freeing (my_free()).
802 : */
803 4 : error= 0;
804 4 : *min_rec_lsn= minimum_rec_lsn;
805 4 : *min_first_undo_lsn= minimum_first_undo_lsn;
806 4 : goto end;
807 0 : err:
808 0 : error= 1;
809 4 : end:
810 4 : pthread_mutex_unlock(&LOCK_trn_list);
811 4 : DBUG_RETURN(error);
812 : }
813 :
814 :
815 : TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid)
816 0 : {
817 0 : TrID old_trid_generator= global_trid_generator;
818 : TRN *trn;
819 0 : DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
820 0 : global_trid_generator= longid-1; /* force a correct trid in the new trn */
821 0 : if (unlikely((trn= trnman_new_trn(NULL)) == NULL))
822 0 : return NULL;
823 : /* deallocate excessive allocations of trnman_new_trn() */
824 0 : global_trid_generator= old_trid_generator;
825 0 : set_if_bigger(global_trid_generator, longid);
826 0 : short_trid_to_active_trn[trn->short_id]= 0;
827 0 : DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL);
828 0 : short_trid_to_active_trn[shortid]= trn;
829 0 : trn->short_id= shortid;
830 0 : return trn;
831 : }
832 :
833 :
834 : TRN *trnman_get_any_trn()
835 0 : {
836 0 : TRN *trn= active_list_min.next;
837 0 : return (trn != &active_list_max) ? trn : NULL;
838 : }
839 :
840 :
841 : /**
842 : Returns the minimum existing transaction id. May return a too small
843 : number in race conditions, but this is ok as the value is used to
844 : remove not visible transid from index/rows.
845 : */
846 :
847 : TrID trnman_get_min_trid()
848 98 : {
849 98 : return trid_min_read_from;
850 : }
851 :
852 :
853 : /**
854 : Returns the minimum possible transaction id
855 :
856 : @notes
857 : If there is no transactions running, returns number for next running
858 : transaction.
859 : If one has an active transaction, the returned number will be less or
860 : equal to this. If one is not running in a transaction one will ge the
861 : number for the next started transaction. This is used in create table
862 : to get a safe minimum trid to use.
863 : */
864 :
865 : TrID trnman_get_min_safe_trid()
866 0 : {
867 : TrID trid;
868 0 : pthread_mutex_lock(&LOCK_trn_list);
869 0 : trid= min(active_list_min.next->min_read_from,
870 : global_trid_generator);
871 0 : pthread_mutex_unlock(&LOCK_trn_list);
872 0 : return trid;
873 : }
874 :
875 :
876 : /**
877 : Returns maximum transaction id given to a transaction so far.
878 : */
879 :
880 : TrID trnman_get_max_trid()
881 116 : {
882 : TrID id;
883 116 : if (short_trid_to_active_trn == NULL)
884 112 : return 0;
885 4 : pthread_mutex_lock(&LOCK_trn_list);
886 4 : id= global_trid_generator;
887 4 : pthread_mutex_unlock(&LOCK_trn_list);
888 4 : return id;
889 : }
890 :
891 : /**
892 : @brief Check if there exist an active transaction between two commit_id's
893 :
894 : @todo
895 : Improve speed of this.
896 : - Store transactions in tree or skip list
897 : - Have function to copying all active transaction id's to b-tree
898 : and use b-tree for checking states. This could be a big win
899 : for checkpoint that will call this function for a lot of objects.
900 :
901 : @return
902 : 0 No transaction exists
903 : 1 There is at least on active transaction in the given range
904 : */
905 :
906 : my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id,
907 : my_bool trnman_is_locked)
908 0 : {
909 : TRN *trn;
910 0 : my_bool ret= 0;
911 :
912 0 : if (!trnman_is_locked)
913 0 : pthread_mutex_lock(&LOCK_trn_list);
914 0 : safe_mutex_assert_owner(&LOCK_trn_list);
915 0 : for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
916 : {
917 : /*
918 : We use <= for max_id as max_id is a commit_trid and trn->trid
919 : is transaction id. When calculating commit_trid we use the
920 : current value of global_trid_generator. global_trid_generator is
921 : incremented for each new transaction.
922 :
923 : For example, assuming we have
924 : min_id = 5
925 : max_id = 10
926 :
927 : A trid of value 5 can't see the history event between 5 & 10
928 : at it vas started before min_id 5 was committed.
929 : A trid of value 10 can't see the next history event (max_id = 10)
930 : as it started before this was committed. In this case it must use
931 : the this event.
932 : */
933 0 : if (trn->trid > min_id && trn->trid <= max_id)
934 : {
935 0 : ret= 1;
936 0 : break;
937 : }
938 : }
939 0 : if (!trnman_is_locked)
940 0 : pthread_mutex_unlock(&LOCK_trn_list);
941 0 : return ret;
942 : }
943 :
944 :
945 : /**
946 : lock transaction list
947 : */
948 :
949 : void trnman_lock()
950 18 : {
951 18 : pthread_mutex_lock(&LOCK_trn_list);
952 : }
953 :
954 :
955 : /**
956 : unlock transaction list
957 : */
958 :
959 : void trnman_unlock()
960 18 : {
961 18 : pthread_mutex_unlock(&LOCK_trn_list);
962 : }
963 :
964 :
965 : /**
966 : Is trman initialized
967 : */
968 :
969 : my_bool trman_is_inited()
970 130 : {
971 130 : return (short_trid_to_active_trn != NULL);
972 : }
|