1 : /* Copyright (C) 2006 MySQL AB & Alexey Botchkov & MySQL Finland AB
2 : & TCX DataKonsult AB
3 :
4 : This program is free software; you can redistribute it and/or modify
5 : it under the terms of the GNU General Public License as published by
6 : the Free Software Foundation; version 2 of the License.
7 :
8 : This program is distributed in the hope that it will be useful,
9 : but WITHOUT ANY WARRANTY; without even the implied warranty of
10 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 : GNU General Public License for more details.
12 :
13 : You should have received a copy of the GNU General Public License
14 : along with this program; if not, write to the Free Software
15 : Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16 :
17 : #include "maria_def.h"
18 : #include "trnman.h"
19 : #include "ma_key_recover.h"
20 :
21 : #ifdef HAVE_RTREE_KEYS
22 :
23 : #include "ma_rt_index.h"
24 : #include "ma_rt_key.h"
25 : #include "ma_rt_mbr.h"
26 :
27 : typedef struct
28 : {
29 : double square;
30 : int n_node;
31 : const uchar *key;
32 : double *coords;
33 : } SplitStruct;
34 :
35 : inline static double *reserve_coords(double **d_buffer, int n_dim)
36 0 : {
37 0 : double *coords= *d_buffer;
38 0 : (*d_buffer)+= n_dim * 2;
39 0 : return coords;
40 : }
41 :
42 : static void mbr_join(double *a, const double *b, int n_dim)
43 0 : {
44 0 : double *end= a + n_dim * 2;
45 : do
46 : {
47 0 : if (a[0] > b[0])
48 0 : a[0]= b[0];
49 :
50 0 : if (a[1] < b[1])
51 0 : a[1]= b[1];
52 :
53 0 : a+= 2;
54 0 : b+= 2;
55 0 : } while (a != end);
56 : }
57 :
58 : /*
59 : Counts the square of mbr which is a join of a and b
60 : */
61 : static double mbr_join_square(const double *a, const double *b, int n_dim)
62 0 : {
63 0 : const double *end= a + n_dim * 2;
64 0 : double square= 1.0;
65 : do
66 : {
67 0 : square *=
68 : ((a[1] < b[1]) ? b[1] : a[1]) - ((a[0] > b[0]) ? b[0] : a[0]);
69 :
70 0 : a+= 2;
71 0 : b+= 2;
72 0 : } while (a != end);
73 :
74 0 : return square;
75 : }
76 :
77 : static double count_square(const double *a, int n_dim)
78 0 : {
79 0 : const double *end= a + n_dim * 2;
80 0 : double square= 1.0;
81 : do
82 : {
83 0 : square *= a[1] - a[0];
84 0 : a+= 2;
85 0 : } while (a != end);
86 0 : return square;
87 : }
88 :
89 : inline static void copy_coords(double *dst, const double *src, int n_dim)
90 0 : {
91 0 : memcpy(dst, src, sizeof(double) * (n_dim * 2));
92 : }
93 :
94 : /**
95 : Select two nodes to collect group upon.
96 :
97 : Note that such function uses 'double' arithmetic so may behave differently
98 : on different platforms/builds. There are others in this file.
99 : */
100 : static void pick_seeds(SplitStruct *node, int n_entries,
101 : SplitStruct **seed_a, SplitStruct **seed_b, int n_dim)
102 0 : {
103 : SplitStruct *cur1;
104 0 : SplitStruct *lim1= node + (n_entries - 1);
105 : SplitStruct *cur2;
106 0 : SplitStruct *lim2= node + n_entries;
107 :
108 0 : double max_d= -DBL_MAX;
109 : double d;
110 :
111 0 : for (cur1= node; cur1 < lim1; cur1++)
112 : {
113 0 : for (cur2=cur1 + 1; cur2 < lim2; cur2++)
114 : {
115 :
116 0 : d= mbr_join_square(cur1->coords, cur2->coords, n_dim) - cur1->square -
117 : cur2->square;
118 0 : if (d > max_d)
119 : {
120 0 : max_d= d;
121 0 : *seed_a= cur1;
122 0 : *seed_b= cur2;
123 : }
124 : }
125 : }
126 : }
127 :
128 : /*
129 : Select next node and group where to add
130 : */
131 : static void pick_next(SplitStruct *node, int n_entries, double *g1, double *g2,
132 : SplitStruct **choice, int *n_group, int n_dim)
133 0 : {
134 0 : SplitStruct *cur= node;
135 0 : SplitStruct *end= node + n_entries;
136 :
137 0 : double max_diff= -DBL_MAX;
138 :
139 0 : for (; cur < end; cur++)
140 : {
141 : double diff;
142 : double abs_diff;
143 :
144 0 : if (cur->n_node)
145 : {
146 0 : continue;
147 : }
148 :
149 0 : diff= mbr_join_square(g1, cur->coords, n_dim) -
150 : mbr_join_square(g2, cur->coords, n_dim);
151 :
152 0 : abs_diff= fabs(diff);
153 0 : if (abs_diff > max_diff)
154 : {
155 0 : max_diff= abs_diff;
156 0 : *n_group= 1 + (diff > 0);
157 0 : *choice= cur;
158 : }
159 : }
160 : }
161 :
162 : /*
163 : Mark not-in-group entries as n_group
164 : */
165 : static void mark_all_entries(SplitStruct *node, int n_entries, int n_group)
166 0 : {
167 0 : SplitStruct *cur= node;
168 0 : SplitStruct *end= node + n_entries;
169 :
170 0 : for (; cur < end; cur++)
171 : {
172 0 : if (cur->n_node)
173 : {
174 0 : continue;
175 : }
176 0 : cur->n_node= n_group;
177 : }
178 : }
179 :
180 : static int split_maria_rtree_node(SplitStruct *node, int n_entries,
181 : int all_size, /* Total key's size */
182 : int key_size,
183 : int min_size, /* Minimal group size */
184 : int size1, int size2 /* initial group sizes */,
185 : double **d_buffer, int n_dim)
186 0 : {
187 : SplitStruct *cur;
188 : SplitStruct *a;
189 : SplitStruct *b;
190 0 : double *g1= reserve_coords(d_buffer, n_dim);
191 0 : double *g2= reserve_coords(d_buffer, n_dim);
192 : SplitStruct *next;
193 : int next_node;
194 : int i;
195 0 : SplitStruct *end= node + n_entries;
196 0 : LINT_INIT(a);
197 0 : LINT_INIT(b);
198 0 : LINT_INIT(next);
199 0 : LINT_INIT(next_node);
200 :
201 0 : if (all_size < min_size * 2)
202 : {
203 0 : return 1;
204 : }
205 :
206 0 : cur= node;
207 0 : for (; cur < end; cur++)
208 : {
209 0 : cur->square= count_square(cur->coords, n_dim);
210 0 : cur->n_node= 0;
211 : }
212 :
213 0 : pick_seeds(node, n_entries, &a, &b, n_dim);
214 0 : a->n_node= 1;
215 0 : b->n_node= 2;
216 :
217 :
218 0 : copy_coords(g1, a->coords, n_dim);
219 0 : size1+= key_size;
220 0 : copy_coords(g2, b->coords, n_dim);
221 0 : size2+= key_size;
222 :
223 :
224 0 : for (i=n_entries - 2; i>0; --i)
225 : {
226 0 : if (all_size - (size2 + key_size) < min_size) /* Can't write into group 2 */
227 : {
228 0 : mark_all_entries(node, n_entries, 1);
229 0 : break;
230 : }
231 :
232 0 : if (all_size - (size1 + key_size) < min_size) /* Can't write into group 1 */
233 : {
234 0 : mark_all_entries(node, n_entries, 2);
235 0 : break;
236 : }
237 :
238 0 : pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim);
239 0 : if (next_node == 1)
240 : {
241 0 : size1+= key_size;
242 0 : mbr_join(g1, next->coords, n_dim);
243 : }
244 : else
245 : {
246 0 : size2+= key_size;
247 0 : mbr_join(g2, next->coords, n_dim);
248 : }
249 0 : next->n_node= next_node;
250 : }
251 :
252 0 : return 0;
253 : }
254 :
255 :
256 : /**
257 : Logs key reorganization done in a split page (new page is logged elsewhere).
258 :
259 : The effect of a split on the split page is three changes:
260 : - some piece of the page move to different places inside this page (we are
261 : not interested here in the pieces which move to the new page)
262 : - the key is inserted into the page or not (could be in the new page)
263 : - page is shrunk
264 : All this is uniquely determined by a few parameters:
265 : - the key (starting at 'key-nod_flag', for 'full_length' bytes
266 : (maria_rtree_split_page() seems to depend on its parameters key&key_length
267 : but in fact it reads more (to the left: nod_flag, and to the right:
268 : full_length)
269 : - the binary content of the page
270 : - some variables in the share
271 : - double arithmetic, which is unpredictable from machine to machine and
272 : from build to build (see pick_seeds() above: it has a comparison between
273 : double-s 'if (d > max_d)' so the comparison can go differently from machine
274 : to machine or build to build, it has happened in real life).
275 : If one day we use precision-math instead of double-math, in GIS, then the
276 : last parameter would become constant accross machines and builds and we
277 : could some cheap logging: just log the few parameters above.
278 : Until then, we log the list of memcpy() operations (fortunately, we often do
279 : not have to log the source bytes, as they can be found in the page before
280 : applying the REDO; the only source bytes to log are the key), the key if it
281 : was inserted into this page, and the shrinking.
282 :
283 : @param info table
284 : @param page page's offset in the file
285 : @param buff content of the page (post-split)
286 : @param key_with_nod_flag pointer to key-nod_flag
287 : @param full_length length of (key + (nod_flag (if node) or rowid (if
288 : leaf)))
289 : @param log_internal_copy encoded list of mempcy() operations done on
290 : split page, having their source in the page
291 : @param log_internal_copy_length length of above list, in bytes
292 : @param log_key_copy operation describing the key's copy, or NULL if the
293 : inserted key was not put into the page (was put in
294 : new page, so does not have to be logged here)
295 : @param length_diff by how much the page has shrunk during split
296 : */
297 :
298 : static my_bool _ma_log_rt_split(MARIA_PAGE *page,
299 : const uchar *key_with_nod_flag,
300 : uint full_length,
301 : const uchar *log_internal_copy,
302 : uint log_internal_copy_length,
303 : const uchar *log_key_copy,
304 : uint length_diff)
305 0 : {
306 0 : MARIA_HA *info= page->info;
307 0 : MARIA_SHARE *share= info->s;
308 : LSN lsn;
309 : uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 1 + 2 + 1 + 2 + 2 + 7],
310 : *log_pos;
311 : LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 5];
312 0 : uint translog_parts, extra_length= 0;
313 : my_off_t page_pos;
314 0 : DBUG_ENTER("_ma_log_rt_split");
315 0 : DBUG_PRINT("enter", ("page: %lu", (ulong) page));
316 :
317 0 : DBUG_ASSERT(share->now_transactional);
318 0 : page_pos= page->pos / share->block_size;
319 0 : page_store(log_data + FILEID_STORE_SIZE, page_pos);
320 0 : log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE;
321 0 : log_pos[0]= KEY_OP_DEL_SUFFIX;
322 0 : log_pos++;
323 0 : DBUG_ASSERT((int)length_diff > 0);
324 0 : int2store(log_pos, length_diff);
325 0 : log_pos+= 2;
326 0 : log_pos[0]= KEY_OP_MULTI_COPY;
327 0 : log_pos++;
328 0 : int2store(log_pos, full_length);
329 0 : log_pos+= 2;
330 0 : int2store(log_pos, log_internal_copy_length);
331 0 : log_pos+= 2;
332 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
333 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data) - 7;
334 0 : log_array[TRANSLOG_INTERNAL_PARTS + 1].str= log_internal_copy;
335 0 : log_array[TRANSLOG_INTERNAL_PARTS + 1].length= log_internal_copy_length;
336 0 : translog_parts= 2;
337 0 : if (log_key_copy != NULL) /* need to store key into record */
338 : {
339 0 : log_array[TRANSLOG_INTERNAL_PARTS + 2].str= log_key_copy;
340 0 : log_array[TRANSLOG_INTERNAL_PARTS + 2].length= 1 + 2 + 1 + 2;
341 0 : log_array[TRANSLOG_INTERNAL_PARTS + 3].str= key_with_nod_flag;
342 0 : log_array[TRANSLOG_INTERNAL_PARTS + 3].length= full_length;
343 0 : extra_length= 1 + 2 + 1 + 2 + full_length;
344 0 : translog_parts+= 2;
345 : }
346 :
347 : #ifdef EXTRA_DEBUG_KEY_CHANGES
348 : {
349 0 : int page_length= page->size;
350 : ha_checksum crc;
351 0 : uchar *check_start= log_pos;
352 0 : crc= my_checksum(0, page->buff + LSN_STORE_SIZE,
353 : page_length - LSN_STORE_SIZE);
354 0 : log_pos[0]= KEY_OP_CHECK;
355 0 : log_pos++;
356 0 : int2store(log_pos, page_length);
357 0 : log_pos+= 2;
358 0 : int4store(log_pos, crc);
359 0 : log_pos+= 4;
360 0 : log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= check_start;
361 0 : log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7;
362 0 : translog_parts++;
363 : }
364 : #endif
365 :
366 0 : if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
367 : info->trn, info,
368 : (translog_size_t) ((log_pos - log_data) +
369 : log_internal_copy_length +
370 : extra_length),
371 : TRANSLOG_INTERNAL_PARTS + translog_parts,
372 : log_array, log_data, NULL))
373 0 : DBUG_RETURN(1);
374 0 : DBUG_RETURN(0);
375 : }
376 :
377 : /**
378 : 0 ok; the created page is put into page cache; the shortened one is not (up
379 : to the caller to do it)
380 : 1 or -1: error.
381 : If new_page_offs==NULL, won't create new page (for redo phase).
382 : */
383 :
384 : int maria_rtree_split_page(const MARIA_KEY *key, MARIA_PAGE *page,
385 : my_off_t *new_page_offs)
386 0 : {
387 0 : MARIA_HA *info= page->info;
388 0 : MARIA_SHARE *share= info->s;
389 0 : const my_bool transactional= share->now_transactional;
390 : int n1, n2; /* Number of items in groups */
391 : SplitStruct *task;
392 : SplitStruct *cur;
393 : SplitStruct *stop;
394 : double *coord_buf;
395 : double *next_coord;
396 : double *old_coord;
397 : int n_dim;
398 : uchar *source_cur, *cur1, *cur2;
399 : uchar *new_page_buff, *log_internal_copy, *log_internal_copy_ptr,
400 0 : *log_key_copy= NULL;
401 0 : int err_code= 0;
402 : uint new_page_length;
403 0 : uint nod_flag= page->node;
404 0 : uint org_length= page->size;
405 : uint full_length= key->data_length + (nod_flag ? nod_flag :
406 0 : key->ref_length);
407 0 : uint key_data_length= key->data_length;
408 0 : int max_keys= ((org_length - share->keypage_header) / (full_length));
409 0 : MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
410 0 : MARIA_KEYDEF *keyinfo= key->keyinfo;
411 0 : DBUG_ENTER("maria_rtree_split_page");
412 0 : DBUG_PRINT("rtree", ("splitting block"));
413 :
414 0 : n_dim= keyinfo->keysegs / 2;
415 :
416 0 : if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) *
417 : (max_keys + 1 + 4) +
418 : sizeof(SplitStruct) * (max_keys + 1))))
419 0 : DBUG_RETURN(-1); /* purecov: inspected */
420 :
421 0 : task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4));
422 :
423 0 : next_coord= coord_buf;
424 :
425 0 : stop= task + max_keys;
426 0 : source_cur= rt_PAGE_FIRST_KEY(share, page->buff, nod_flag);
427 :
428 0 : for (cur= task;
429 0 : cur < stop;
430 0 : cur++, source_cur= rt_PAGE_NEXT_KEY(share, source_cur, key_data_length,
431 : nod_flag))
432 : {
433 0 : cur->coords= reserve_coords(&next_coord, n_dim);
434 0 : cur->key= source_cur;
435 0 : maria_rtree_d_mbr(keyinfo->seg, source_cur, key_data_length, cur->coords);
436 : }
437 :
438 0 : cur->coords= reserve_coords(&next_coord, n_dim);
439 0 : maria_rtree_d_mbr(keyinfo->seg, key->data, key_data_length, cur->coords);
440 0 : cur->key= key->data;
441 :
442 0 : old_coord= next_coord;
443 :
444 0 : if (split_maria_rtree_node(task, max_keys + 1,
445 : page->size + full_length + 2,
446 : full_length,
447 : rt_PAGE_MIN_SIZE(keyinfo->block_length),
448 : 2, 2, &next_coord, n_dim))
449 : {
450 0 : err_code= 1;
451 0 : goto split_err;
452 : }
453 :
454 : /* Allocate buffer for new page and piece of log record */
455 0 : if (!(new_page_buff= (uchar*) my_alloca((uint)keyinfo->block_length +
456 : (transactional ?
457 : (max_keys * (2 + 2) +
458 : 1 + 2 + 1 + 2) : 0))))
459 : {
460 0 : err_code= -1;
461 0 : goto split_err;
462 : }
463 0 : log_internal_copy= log_internal_copy_ptr= new_page_buff +
464 : keyinfo->block_length;
465 0 : bzero(new_page_buff, share->block_size);
466 :
467 0 : stop= task + (max_keys + 1);
468 0 : cur1= rt_PAGE_FIRST_KEY(share, page->buff, nod_flag);
469 0 : cur2= rt_PAGE_FIRST_KEY(share, new_page_buff, nod_flag);
470 :
471 0 : n1= n2= 0;
472 0 : for (cur= task; cur < stop; cur++)
473 : {
474 : uchar *to;
475 0 : const uchar *cur_key= cur->key;
476 : my_bool log_this_change;
477 0 : DBUG_ASSERT(log_key_copy == NULL);
478 0 : if (cur->n_node == 1)
479 : {
480 0 : to= cur1;
481 0 : cur1= rt_PAGE_NEXT_KEY(share, cur1, key_data_length, nod_flag);
482 0 : n1++;
483 0 : log_this_change= transactional;
484 : }
485 : else
486 : {
487 0 : to= cur2;
488 0 : cur2= rt_PAGE_NEXT_KEY(share, cur2, key_data_length, nod_flag);
489 0 : n2++;
490 0 : log_this_change= FALSE;
491 : }
492 0 : if (to != cur_key)
493 : {
494 0 : uchar *to_with_nod_flag= to - nod_flag;
495 0 : const uchar *cur_key_with_nod_flag= cur_key - nod_flag;
496 0 : memcpy(to_with_nod_flag, cur_key_with_nod_flag, full_length);
497 0 : if (log_this_change)
498 : {
499 0 : uint to_with_nod_flag_offs= to_with_nod_flag - page->buff;
500 0 : if (likely(cur_key != key->data))
501 : {
502 : /* this memcpy() is internal to the page (source in the page) */
503 0 : uint cur_key_with_nod_flag_offs= cur_key_with_nod_flag - page->buff;
504 0 : int2store(log_internal_copy_ptr, to_with_nod_flag_offs);
505 0 : log_internal_copy_ptr+= 2;
506 0 : int2store(log_internal_copy_ptr, cur_key_with_nod_flag_offs);
507 0 : log_internal_copy_ptr+= 2;
508 : }
509 : else
510 : {
511 : /* last iteration, and this involves *key: source is external */
512 0 : log_key_copy= log_internal_copy_ptr;
513 0 : log_key_copy[0]= KEY_OP_OFFSET;
514 0 : int2store(log_key_copy + 1, to_with_nod_flag_offs);
515 0 : log_key_copy[3]= KEY_OP_CHANGE;
516 0 : int2store(log_key_copy + 4, full_length);
517 : /* _ma_log_rt_split() will store *key, right after */
518 : }
519 : }
520 : }
521 : }
522 : { /* verify that above loop didn't touch header bytes */
523 : uint i;
524 0 : for (i= 0; i < share->keypage_header; i++)
525 0 : DBUG_ASSERT(new_page_buff[i]==0);
526 : }
527 :
528 0 : if (nod_flag)
529 0 : _ma_store_keypage_flag(share, new_page_buff, KEYPAGE_FLAG_ISNOD);
530 0 : _ma_store_keynr(share, new_page_buff, keyinfo->key_nr);
531 0 : new_page_length= share->keypage_header + n2 * full_length;
532 0 : _ma_store_page_used(share, new_page_buff, new_page_length);
533 0 : page->size= share->keypage_header + n1 * full_length;
534 0 : page_store_size(share, page);
535 :
536 0 : if ((*new_page_offs= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
537 : HA_OFFSET_ERROR)
538 0 : err_code= -1;
539 : else
540 : {
541 : MARIA_PAGE new_page;
542 0 : _ma_page_setup(&new_page, info, keyinfo, *new_page_offs, new_page_buff);
543 :
544 0 : if (transactional &&
545 : ( /* log change to split page */
546 : _ma_log_rt_split(page, key->data - nod_flag,
547 : full_length, log_internal_copy,
548 : log_internal_copy_ptr - log_internal_copy,
549 : log_key_copy, org_length - page->size) ||
550 : /* and to new page */
551 : _ma_log_new(&new_page, 0)))
552 0 : err_code= -1;
553 :
554 0 : if (_ma_write_keypage(&new_page, page_link->write_lock,
555 : DFLT_INIT_HITS))
556 0 : err_code= -1;
557 : }
558 0 : DBUG_PRINT("rtree", ("split new block: %lu", (ulong) *new_page_offs));
559 :
560 : my_afree(new_page);
561 :
562 0 : split_err:
563 : my_afree(coord_buf);
564 0 : DBUG_RETURN(err_code);
565 : }
566 :
567 : #endif /*HAVE_RTREE_KEYS*/
|