1 : /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 :
3 : This program is free software; you can redistribute it and/or modify
4 : it under the terms of the GNU General Public License as published by
5 : the Free Software Foundation; version 2 of the License.
6 :
7 : This program is distributed in the hope that it will be useful,
8 : but WITHOUT ANY WARRANTY; without even the implied warranty of
9 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 : GNU General Public License for more details.
11 :
12 : You should have received a copy of the GNU General Public License
13 : along with this program; if not, write to the Free Software
14 : Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
15 :
16 : /* Create a MARIA table */
17 :
18 : #include "ma_ftdefs.h"
19 : #include "ma_sp_defs.h"
20 : #include <my_bit.h>
21 : #include "ma_blockrec.h"
22 : #include "trnman_public.h"
23 :
24 : #if defined(MSDOS) || defined(__WIN__)
25 : #ifdef __WIN__
26 : #include <fcntl.h>
27 : #else
28 : #include <process.h> /* Prototype for getpid */
29 : #endif
30 : #endif
31 : #include <m_ctype.h>
32 :
33 : static int compare_columns(MARIA_COLUMNDEF **a, MARIA_COLUMNDEF **b);
34 :
35 : /*
36 : Old options is used when recreating database, from maria_chk
37 : */
38 :
39 : int maria_create(const char *name, enum data_file_type datafile_type,
40 : uint keys,MARIA_KEYDEF *keydefs,
41 : uint columns, MARIA_COLUMNDEF *columndef,
42 : uint uniques, MARIA_UNIQUEDEF *uniquedefs,
43 : MARIA_CREATE_INFO *ci,uint flags)
44 503 : {
45 : register uint i,j;
46 : File dfile,file;
47 503 : int errpos,save_errno, create_mode= O_RDWR | O_TRUNC, res;
48 : myf create_flag;
49 : uint length,max_key_length,packed,pack_bytes,pointer,real_length_diff,
50 : key_length,info_length,key_segs,options,min_key_length,
51 : base_pos,long_varchar_count,varchar_length,
52 : unique_key_parts,fulltext_keys,offset, not_block_record_extra_length;
53 : uint max_field_lengths, extra_header_size, column_nr;
54 : ulong reclength, real_reclength,min_pack_length;
55 : char filename[FN_REFLEN], linkname[FN_REFLEN], *linkname_ptr;
56 : ulong pack_reclength;
57 : ulonglong tot_length,max_rows, tmp;
58 : enum en_fieldtype type;
59 503 : enum data_file_type org_datafile_type= datafile_type;
60 : MARIA_SHARE share;
61 : MARIA_KEYDEF *keydef,tmp_keydef;
62 : MARIA_UNIQUEDEF *uniquedef;
63 : HA_KEYSEG *keyseg,tmp_keyseg;
64 : MARIA_COLUMNDEF *column, *end_column;
65 : double *rec_per_key_part;
66 : ulong *nulls_per_key_part;
67 : uint16 *column_array;
68 : my_off_t key_root[HA_MAX_POSSIBLE_KEY], kfile_size_before_extension;
69 : MARIA_CREATE_INFO tmp_create_info;
70 503 : my_bool tmp_table= FALSE; /* cache for presence of HA_OPTION_TMP_TABLE */
71 : my_bool forced_packed;
72 503 : myf sync_dir= 0;
73 503 : uchar *log_data= NULL;
74 503 : DBUG_ENTER("maria_create");
75 503 : DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u",
76 : keys, columns, uniques, flags));
77 :
78 503 : DBUG_ASSERT(maria_inited);
79 503 : LINT_INIT(dfile);
80 503 : LINT_INIT(file);
81 :
82 503 : if (!ci)
83 : {
84 0 : bzero((char*) &tmp_create_info,sizeof(tmp_create_info));
85 0 : ci=&tmp_create_info;
86 : }
87 :
88 503 : if (keys + uniques > MARIA_MAX_KEY)
89 : {
90 0 : DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION);
91 : }
92 503 : errpos=0;
93 503 : options=0;
94 503 : bzero((uchar*) &share,sizeof(share));
95 :
96 503 : if (flags & HA_DONT_TOUCH_DATA)
97 : {
98 : /* We come here from recreate table */
99 30 : org_datafile_type= ci->org_data_file_type;
100 30 : if (!(ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD))
101 15 : options= (ci->old_options &
102 : (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD |
103 : HA_OPTION_READ_ONLY_DATA | HA_OPTION_CHECKSUM |
104 : HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
105 : HA_OPTION_LONG_BLOB_PTR | HA_OPTION_PAGE_CHECKSUM));
106 : else
107 : {
108 : /* Uncompressing rows */
109 15 : options= (ci->old_options &
110 : (HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE |
111 : HA_OPTION_DELAY_KEY_WRITE | HA_OPTION_LONG_BLOB_PTR |
112 : HA_OPTION_PAGE_CHECKSUM));
113 : }
114 : }
115 : else
116 : {
117 : /* Transactional tables must be of type BLOCK_RECORD */
118 473 : if (ci->transactional)
119 312 : datafile_type= BLOCK_RECORD;
120 : }
121 :
122 503 : if (ci->reloc_rows > ci->max_rows)
123 148 : ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */
124 :
125 503 : if (!(rec_per_key_part=
126 : (double*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(double) +
127 : (keys + uniques)*HA_MAX_KEY_SEG*sizeof(ulong) +
128 : sizeof(uint16) * columns,
129 : MYF(MY_WME | MY_ZEROFILL))))
130 0 : DBUG_RETURN(my_errno);
131 503 : nulls_per_key_part= (ulong*) (rec_per_key_part +
132 : (keys + uniques) * HA_MAX_KEY_SEG);
133 503 : column_array= (uint16*) (nulls_per_key_part +
134 : (keys + uniques) * HA_MAX_KEY_SEG);
135 :
136 :
137 : /* Start by checking fields and field-types used */
138 503 : varchar_length=long_varchar_count=packed= not_block_record_extra_length=
139 : pack_reclength= max_field_lengths= 0;
140 503 : reclength= min_pack_length= ci->null_bytes;
141 503 : forced_packed= 0;
142 503 : column_nr= 0;
143 :
144 503 : for (column= columndef, end_column= column + columns ;
145 2731 : column != end_column ;
146 1725 : column++)
147 : {
148 : /* Fill in not used struct parts */
149 1725 : column->column_nr= column_nr++;
150 1725 : column->offset= reclength;
151 1725 : column->empty_pos= 0;
152 1725 : column->empty_bit= 0;
153 1725 : column->fill_length= column->length;
154 1725 : if (column->null_bit)
155 183 : options|= HA_OPTION_NULL_FIELDS;
156 :
157 1725 : reclength+= column->length;
158 1725 : type= column->type;
159 1725 : if (datafile_type == BLOCK_RECORD)
160 : {
161 1295 : if (type == FIELD_SKIP_PRESPACE)
162 513 : type= column->type= FIELD_NORMAL; /* SKIP_PRESPACE not supported */
163 1295 : if (type == FIELD_NORMAL &&
164 : column->length > FULL_PAGE_SIZE(maria_block_size))
165 : {
166 : /* FIELD_NORMAL can't be split over many blocks, convert to a CHAR */
167 0 : type= column->type= FIELD_SKIP_ENDSPACE;
168 : }
169 : }
170 :
171 1725 : if (type != FIELD_NORMAL && type != FIELD_CHECK)
172 : {
173 806 : column->empty_pos= packed/8;
174 806 : column->empty_bit= (1 << (packed & 7));
175 806 : if (type == FIELD_BLOB)
176 : {
177 215 : forced_packed= 1;
178 215 : packed++;
179 215 : share.base.blobs++;
180 215 : if (pack_reclength != INT_MAX32)
181 : {
182 141 : if (column->length == 4+portable_sizeof_char_ptr)
183 141 : pack_reclength= INT_MAX32;
184 : else
185 : {
186 : /* Add max possible blob length */
187 0 : pack_reclength+= (1 << ((column->length-
188 : portable_sizeof_char_ptr)*8));
189 : }
190 : }
191 215 : max_field_lengths+= (column->length - portable_sizeof_char_ptr);
192 : }
193 1006 : else if (type == FIELD_SKIP_PRESPACE ||
194 : type == FIELD_SKIP_ENDSPACE)
195 : {
196 415 : forced_packed= 1;
197 415 : max_field_lengths+= column->length > 255 ? 2 : 1;
198 415 : not_block_record_extra_length++;
199 415 : packed++;
200 : }
201 176 : else if (type == FIELD_VARCHAR)
202 : {
203 40 : varchar_length+= column->length-1; /* Used for min_pack_length */
204 40 : pack_reclength++;
205 40 : not_block_record_extra_length++;
206 40 : max_field_lengths++;
207 40 : packed++;
208 40 : column->fill_length= 1;
209 40 : options|= HA_OPTION_NULL_FIELDS; /* Use ma_checksum() */
210 :
211 : /* We must test for 257 as length includes pack-length */
212 40 : if (test(column->length >= 257))
213 : {
214 10 : long_varchar_count++;
215 10 : max_field_lengths++;
216 10 : column->fill_length= 2;
217 : }
218 : }
219 136 : else if (type == FIELD_SKIP_ZERO)
220 136 : packed++;
221 : else
222 : {
223 0 : if (!column->null_bit)
224 0 : min_pack_length+= column->length;
225 : else
226 : {
227 : /* Only BLOCK_RECORD skips NULL fields for all field values */
228 0 : not_block_record_extra_length+= column->length;
229 : }
230 0 : column->empty_pos= 0;
231 0 : column->empty_bit= 0;
232 : }
233 : }
234 : else /* FIELD_NORMAL */
235 : {
236 919 : if (!column->null_bit)
237 : {
238 906 : min_pack_length+= column->length;
239 906 : share.base.fixed_not_null_fields++;
240 906 : share.base.fixed_not_null_fields_length+= column->length;
241 : }
242 : else
243 13 : not_block_record_extra_length+= column->length;
244 : }
245 : }
246 :
247 503 : if (datafile_type == STATIC_RECORD && forced_packed)
248 : {
249 : /* Can't use fixed length records, revert to block records */
250 5 : datafile_type= BLOCK_RECORD;
251 : }
252 :
253 503 : if (datafile_type == DYNAMIC_RECORD)
254 53 : options|= HA_OPTION_PACK_RECORD; /* Must use packed records */
255 :
256 503 : if (datafile_type == STATIC_RECORD)
257 : {
258 : /* We can't use checksum with static length rows */
259 50 : flags&= ~HA_CREATE_CHECKSUM;
260 50 : options&= ~HA_OPTION_CHECKSUM;
261 50 : min_pack_length= reclength;
262 50 : packed= 0;
263 : }
264 453 : else if (datafile_type != BLOCK_RECORD)
265 83 : min_pack_length+= not_block_record_extra_length;
266 : else
267 370 : min_pack_length+= 5; /* Min row overhead */
268 :
269 503 : if (flags & HA_CREATE_TMP_TABLE)
270 : {
271 0 : options|= HA_OPTION_TMP_TABLE;
272 0 : tmp_table= TRUE;
273 0 : create_mode|= O_NOFOLLOW;
274 : /* "CREATE TEMPORARY" tables are not crash-safe (dropped at restart) */
275 0 : ci->transactional= FALSE;
276 0 : flags&= ~HA_CREATE_PAGE_CHECKSUM;
277 : }
278 503 : share.base.null_bytes= ci->null_bytes;
279 503 : share.base.original_null_bytes= ci->null_bytes;
280 503 : share.base.born_transactional= ci->transactional;
281 503 : share.base.max_field_lengths= max_field_lengths;
282 503 : share.base.field_offsets= 0; /* for future */
283 :
284 503 : if (flags & HA_CREATE_CHECKSUM || (options & HA_OPTION_CHECKSUM))
285 : {
286 305 : options|= HA_OPTION_CHECKSUM;
287 305 : min_pack_length++;
288 305 : pack_reclength++;
289 : }
290 503 : if (pack_reclength < INT_MAX32)
291 362 : pack_reclength+= max_field_lengths + long_varchar_count;
292 : else
293 141 : pack_reclength= INT_MAX32;
294 :
295 503 : if (flags & HA_CREATE_DELAY_KEY_WRITE)
296 5 : options|= HA_OPTION_DELAY_KEY_WRITE;
297 503 : if (flags & HA_CREATE_RELIES_ON_SQL_LAYER)
298 0 : options|= HA_OPTION_RELIES_ON_SQL_LAYER;
299 503 : if (flags & HA_CREATE_PAGE_CHECKSUM)
300 299 : options|= HA_OPTION_PAGE_CHECKSUM;
301 :
302 503 : pack_bytes= (packed + 7) / 8;
303 503 : if (pack_reclength != INT_MAX32)
304 362 : pack_reclength+= reclength+pack_bytes +
305 : test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_OPTION_PACK_RECORD));
306 503 : min_pack_length+= pack_bytes;
307 : /* Calculate min possible row length for rows-in-block */
308 503 : extra_header_size= MAX_FIXED_HEADER_SIZE;
309 503 : if (ci->transactional)
310 : {
311 324 : extra_header_size= TRANS_MAX_FIXED_HEADER_SIZE;
312 324 : DBUG_PRINT("info",("creating a transactional table"));
313 : }
314 503 : share.base.min_block_length= (extra_header_size + share.base.null_bytes +
315 : pack_bytes);
316 503 : if (!ci->data_file_length && ci->max_rows)
317 : {
318 22 : if (pack_reclength == INT_MAX32 ||
319 : (~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength)
320 5 : ci->data_file_length= ~(ulonglong) 0;
321 : else
322 12 : ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength;
323 : }
324 486 : else if (!ci->max_rows)
325 : {
326 456 : if (datafile_type == BLOCK_RECORD)
327 : {
328 : uint rows_per_page= ((maria_block_size - PAGE_OVERHEAD_SIZE) /
329 : (min_pack_length + extra_header_size +
330 360 : DIR_ENTRY_SIZE));
331 360 : ulonglong data_file_length= ci->data_file_length;
332 360 : if (!data_file_length)
333 360 : data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) *
334 : 8)) -1) * maria_block_size);
335 360 : if (rows_per_page > 0)
336 : {
337 360 : set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE);
338 360 : ci->max_rows= data_file_length / maria_block_size * rows_per_page;
339 : }
340 : else
341 0 : ci->max_rows= data_file_length / (min_pack_length +
342 : extra_header_size +
343 : DIR_ENTRY_SIZE);
344 : }
345 : else
346 96 : ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length +
347 : ((options &
348 : HA_OPTION_PACK_RECORD) ?
349 : 3 : 0)));
350 : }
351 503 : max_rows= (ulonglong) ci->max_rows;
352 503 : if (datafile_type == BLOCK_RECORD)
353 : {
354 : /*
355 : The + 1 is for record position withing page
356 : The / 2 is because we need one bit for knowing if there is transid's
357 : after the row pointer
358 : */
359 370 : pointer= maria_get_pointer_length((ci->data_file_length /
360 : (maria_block_size * 2)), 3) + 1;
361 370 : set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE);
362 :
363 370 : if (!max_rows)
364 0 : max_rows= (((((ulonglong) 1 << ((pointer-1)*8)) -1) * maria_block_size) /
365 : min_pack_length / 2);
366 : }
367 : else
368 : {
369 133 : if (datafile_type != STATIC_RECORD)
370 83 : pointer= maria_get_pointer_length(ci->data_file_length,
371 : maria_data_pointer_size);
372 : else
373 50 : pointer= maria_get_pointer_length(ci->max_rows, maria_data_pointer_size);
374 133 : if (!max_rows)
375 96 : max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length);
376 : }
377 :
378 503 : real_reclength=reclength;
379 503 : if (datafile_type == STATIC_RECORD)
380 : {
381 50 : if (reclength <= pointer)
382 0 : reclength=pointer+1; /* reserve place for delete link */
383 : }
384 : else
385 453 : reclength+= long_varchar_count; /* We need space for varchar! */
386 :
387 503 : max_key_length=0; tot_length=0 ; key_segs=0;
388 503 : fulltext_keys=0;
389 503 : share.state.rec_per_key_part= rec_per_key_part;
390 503 : share.state.nulls_per_key_part= nulls_per_key_part;
391 503 : share.state.key_root=key_root;
392 503 : share.state.key_del= HA_OFFSET_ERROR;
393 503 : if (uniques)
394 40 : max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer;
395 :
396 1771 : for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++)
397 : {
398 1268 : share.state.key_root[i]= HA_OFFSET_ERROR;
399 1268 : length= real_length_diff= 0;
400 1268 : min_key_length= key_length= pointer;
401 :
402 1268 : if (keydef->key_alg == HA_KEY_ALG_RTREE)
403 0 : keydef->flag|= HA_RTREE_INDEX; /* For easier tests */
404 :
405 1268 : if (keydef->flag & HA_SPATIAL)
406 : {
407 : #ifdef HAVE_SPATIAL
408 : /* BAR TODO to support 3D and more dimensions in the future */
409 0 : uint sp_segs=SPDIMS*2;
410 0 : keydef->flag=HA_SPATIAL;
411 :
412 0 : if (flags & HA_DONT_TOUCH_DATA)
413 : {
414 : /*
415 : Called by maria_chk - i.e. table structure was taken from
416 : MYI file and SPATIAL key *does have* additional sp_segs keysegs.
417 : keydef->seg here points right at the GEOMETRY segment,
418 : so we only need to decrease keydef->keysegs.
419 : (see maria_recreate_table() in _ma_check.c)
420 : */
421 0 : keydef->keysegs-=sp_segs-1;
422 : }
423 :
424 0 : for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ;
425 0 : j++, keyseg++)
426 : {
427 0 : if (keyseg->type != HA_KEYTYPE_BINARY &&
428 : keyseg->type != HA_KEYTYPE_VARBINARY1 &&
429 : keyseg->type != HA_KEYTYPE_VARBINARY2)
430 : {
431 0 : my_errno=HA_WRONG_CREATE_OPTION;
432 0 : goto err_no_lock;
433 : }
434 : }
435 0 : keydef->keysegs+=sp_segs;
436 0 : key_length+=SPLEN*sp_segs;
437 0 : length++; /* At least one length uchar */
438 0 : min_key_length++;
439 : #else
440 : my_errno= HA_ERR_UNSUPPORTED;
441 : goto err_no_lock;
442 : #endif /*HAVE_SPATIAL*/
443 : }
444 1268 : else if (keydef->flag & HA_FULLTEXT)
445 : {
446 0 : keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY;
447 0 : options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
448 :
449 0 : for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ;
450 0 : j++, keyseg++)
451 : {
452 0 : if (keyseg->type != HA_KEYTYPE_TEXT &&
453 : keyseg->type != HA_KEYTYPE_VARTEXT1 &&
454 : keyseg->type != HA_KEYTYPE_VARTEXT2)
455 : {
456 0 : my_errno=HA_WRONG_CREATE_OPTION;
457 0 : goto err_no_lock;
458 : }
459 0 : if (!(keyseg->flag & HA_BLOB_PART) &&
460 : (keyseg->type == HA_KEYTYPE_VARTEXT1 ||
461 : keyseg->type == HA_KEYTYPE_VARTEXT2))
462 : {
463 : /* Make a flag that this is a VARCHAR */
464 0 : keyseg->flag|= HA_VAR_LENGTH_PART;
465 : /* Store in bit_start number of bytes used to pack the length */
466 0 : keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)?
467 : 1 : 2);
468 : }
469 : }
470 :
471 0 : fulltext_keys++;
472 0 : key_length+= HA_FT_MAXBYTELEN+HA_FT_WLEN;
473 0 : length++; /* At least one length uchar */
474 0 : min_key_length+= 1 + HA_FT_WLEN;
475 0 : real_length_diff=HA_FT_MAXBYTELEN-FT_MAX_WORD_LEN_FOR_SORT;
476 : }
477 : else
478 : {
479 : /* Test if prefix compression */
480 1268 : if (keydef->flag & HA_PACK_KEY)
481 : {
482 : /* Can't use space_compression on number keys */
483 232 : if ((keydef->seg[0].flag & HA_SPACE_PACK) &&
484 : keydef->seg[0].type == (int) HA_KEYTYPE_NUM)
485 0 : keydef->seg[0].flag&= ~HA_SPACE_PACK;
486 :
487 : /* Only use HA_PACK_KEY when first segment is a variable length key */
488 232 : if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART |
489 : HA_VAR_LENGTH_PART)))
490 : {
491 : /* pack relative to previous key */
492 68 : keydef->flag&= ~HA_PACK_KEY;
493 68 : keydef->flag|= HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY;
494 : }
495 : else
496 : {
497 164 : keydef->seg[0].flag|=HA_PACK_KEY; /* for easyer intern test */
498 164 : keydef->flag|=HA_VAR_LENGTH_KEY;
499 164 : options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
500 : }
501 : }
502 1268 : if (keydef->flag & HA_BINARY_PACK_KEY)
503 143 : options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
504 :
505 1268 : if (keydef->flag & HA_AUTO_KEY && ci->with_auto_increment)
506 0 : share.base.auto_key=i+1;
507 2689 : for (j=0, keyseg=keydef->seg ; j < keydef->keysegs ; j++, keyseg++)
508 : {
509 : /* numbers are stored with high by first to make compression easier */
510 1421 : switch (keyseg->type) {
511 : case HA_KEYTYPE_SHORT_INT:
512 : case HA_KEYTYPE_LONG_INT:
513 : case HA_KEYTYPE_FLOAT:
514 : case HA_KEYTYPE_DOUBLE:
515 : case HA_KEYTYPE_USHORT_INT:
516 : case HA_KEYTYPE_ULONG_INT:
517 : case HA_KEYTYPE_LONGLONG:
518 : case HA_KEYTYPE_ULONGLONG:
519 : case HA_KEYTYPE_INT24:
520 : case HA_KEYTYPE_UINT24:
521 : case HA_KEYTYPE_INT8:
522 0 : keyseg->flag|= HA_SWAP_KEY;
523 0 : break;
524 : case HA_KEYTYPE_VARTEXT1:
525 : case HA_KEYTYPE_VARTEXT2:
526 : case HA_KEYTYPE_VARBINARY1:
527 : case HA_KEYTYPE_VARBINARY2:
528 94 : if (!(keyseg->flag & HA_BLOB_PART))
529 : {
530 : /* Make a flag that this is a VARCHAR */
531 20 : keyseg->flag|= HA_VAR_LENGTH_PART;
532 : /* Store in bit_start number of bytes used to pack the length */
533 20 : keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
534 : keyseg->type == HA_KEYTYPE_VARBINARY1) ?
535 : 1 : 2);
536 : }
537 : break;
538 : default:
539 : break;
540 : }
541 1421 : if (keyseg->flag & HA_SPACE_PACK)
542 : {
543 199 : DBUG_ASSERT(!(keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)));
544 199 : keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY;
545 199 : options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
546 199 : length++; /* At least one length uchar */
547 199 : if (!keyseg->null_bit)
548 184 : min_key_length++;
549 199 : key_length+= keyseg->length;
550 199 : if (keyseg->length >= 255)
551 : {
552 : /* prefix may be 3 bytes */
553 15 : length+= 2;
554 : }
555 : }
556 1222 : else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
557 : {
558 94 : DBUG_ASSERT(!test_all_bits(keyseg->flag,
559 : (HA_VAR_LENGTH_PART | HA_BLOB_PART)));
560 94 : keydef->flag|=HA_VAR_LENGTH_KEY;
561 94 : length++; /* At least one length uchar */
562 94 : if (!keyseg->null_bit)
563 20 : min_key_length++;
564 94 : options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
565 94 : key_length+= keyseg->length;
566 94 : if (keyseg->length >= 255)
567 : {
568 : /* prefix may be 3 bytes */
569 15 : length+= 2;
570 : }
571 : }
572 : else
573 : {
574 1128 : key_length+= keyseg->length;
575 1128 : if (!keyseg->null_bit)
576 1034 : min_key_length+= keyseg->length;
577 : }
578 1421 : if (keyseg->null_bit)
579 : {
580 183 : key_length++;
581 : /* min key part is 1 byte */
582 183 : min_key_length++;
583 183 : options|=HA_OPTION_PACK_KEYS;
584 183 : keyseg->flag|=HA_NULL_PART;
585 183 : keydef->flag|=HA_VAR_LENGTH_KEY | HA_NULL_PART_KEY;
586 : }
587 : }
588 : } /* if HA_FULLTEXT */
589 1268 : key_segs+=keydef->keysegs;
590 1268 : if (keydef->keysegs > HA_MAX_KEY_SEG)
591 : {
592 0 : my_errno=HA_WRONG_CREATE_OPTION;
593 0 : goto err_no_lock;
594 : }
595 : /*
596 : key_segs may be 0 in the case when we only want to be able to
597 : add on row into the table. This can happen with some DISTINCT queries
598 : in MySQL
599 : */
600 1268 : if ((keydef->flag & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME &&
601 : key_segs)
602 285 : share.state.rec_per_key_part[key_segs-1]=1L;
603 1268 : length+=key_length;
604 : /*
605 : A key can't be longer than than half a index block (as we have
606 : to be able to put at least 2 keys on an index block for the key
607 : algorithms to work).
608 : */
609 1268 : if (length > maria_max_key_length())
610 : {
611 0 : my_errno=HA_WRONG_CREATE_OPTION;
612 0 : goto err_no_lock;
613 : }
614 1268 : keydef->block_length= (uint16) maria_block_size;
615 1268 : keydef->keylength= (uint16) key_length;
616 1268 : keydef->minlength= (uint16) min_key_length;
617 1268 : keydef->maxlength= (uint16) length;
618 :
619 1268 : if (length > max_key_length)
620 809 : max_key_length= length;
621 1268 : tot_length+= ((max_rows/(ulong) (((uint) maria_block_size -
622 : MAX_KEYPAGE_HEADER_SIZE -
623 : KEYPAGE_CHECKSUM_SIZE)/
624 : (length*2))) *
625 : maria_block_size);
626 : }
627 :
628 503 : unique_key_parts=0;
629 543 : for (i=0, uniquedef=uniquedefs ; i < uniques ; i++ , uniquedef++)
630 : {
631 40 : uniquedef->key=keys+i;
632 40 : unique_key_parts+=uniquedef->keysegs;
633 40 : share.state.key_root[keys+i]= HA_OFFSET_ERROR;
634 40 : tot_length+= (max_rows/(ulong) (((uint) maria_block_size -
635 : MAX_KEYPAGE_HEADER_SIZE -
636 : KEYPAGE_CHECKSUM_SIZE) /
637 : ((MARIA_UNIQUE_HASH_LENGTH + pointer)*2)))*
638 : (ulong) maria_block_size;
639 : }
640 503 : keys+=uniques; /* Each unique has 1 key */
641 503 : key_segs+=uniques; /* Each unique has 1 key seg */
642 :
643 503 : base_pos=(MARIA_STATE_INFO_SIZE + keys * MARIA_STATE_KEY_SIZE +
644 : key_segs * MARIA_STATE_KEYSEG_SIZE);
645 503 : info_length= base_pos+(uint) (MARIA_BASE_INFO_SIZE+
646 : keys * MARIA_KEYDEF_SIZE+
647 : uniques * MARIA_UNIQUEDEF_SIZE +
648 : (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+
649 : columns*(MARIA_COLUMNDEF_SIZE + 2));
650 :
651 503 : DBUG_PRINT("info", ("info_length: %u", info_length));
652 : /* There are only 16 bits for the total header length. */
653 503 : if (info_length > 65535)
654 : {
655 0 : my_printf_error(HA_WRONG_CREATE_OPTION,
656 : "Maria table '%s' has too many columns and/or "
657 : "indexes and/or unique constraints.",
658 : MYF(0), name + dirname_length(name));
659 0 : my_errno= HA_WRONG_CREATE_OPTION;
660 0 : goto err_no_lock;
661 : }
662 :
663 503 : bmove(share.state.header.file_version, maria_file_magic, 4);
664 503 : ci->old_options=options | (ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD ?
665 : HA_OPTION_COMPRESS_RECORD |
666 : HA_OPTION_TEMP_COMPRESS_RECORD: 0);
667 503 : mi_int2store(share.state.header.options,ci->old_options);
668 503 : mi_int2store(share.state.header.header_length,info_length);
669 503 : mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE);
670 503 : mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE);
671 503 : mi_int2store(share.state.header.base_pos,base_pos);
672 503 : share.state.header.data_file_type= share.data_file_type= datafile_type;
673 503 : share.state.header.org_data_file_type= org_datafile_type;
674 503 : share.state.header.language= (ci->language ?
675 : ci->language : default_charset_info->number);
676 :
677 503 : share.state.dellink = HA_OFFSET_ERROR;
678 503 : share.state.first_bitmap_with_space= 0;
679 : #ifdef EXTERNAL_LOCKING
680 : share.state.process= (ulong) getpid();
681 : #endif
682 503 : share.state.version= (ulong) time((time_t*) 0);
683 503 : share.state.sortkey= (ushort) ~0;
684 503 : share.state.auto_increment=ci->auto_increment;
685 503 : share.options=options;
686 503 : share.base.rec_reflength=pointer;
687 503 : share.base.block_size= maria_block_size;
688 :
689 : /*
690 : Get estimate for index file length (this may be wrong for FT keys)
691 : This is used for pointers to other key pages.
692 : */
693 503 : tmp= (tot_length + maria_block_size * keys *
694 : MARIA_INDEX_BLOCK_MARGIN) / maria_block_size;
695 :
696 : /*
697 : use maximum of key_file_length we calculated and key_file_length value we
698 : got from MAI file header (see also mariapack.c:save_state)
699 : */
700 503 : share.base.key_reflength=
701 : maria_get_pointer_length(max(ci->key_file_length,tmp),3);
702 503 : share.base.keys= share.state.header.keys= keys;
703 503 : share.state.header.uniques= uniques;
704 503 : share.state.header.fulltext_keys= fulltext_keys;
705 503 : mi_int2store(share.state.header.key_parts,key_segs);
706 503 : mi_int2store(share.state.header.unique_key_parts,unique_key_parts);
707 :
708 503 : maria_set_all_keys_active(share.state.key_map, keys);
709 :
710 503 : share.base.keystart = share.state.state.key_file_length=
711 : MY_ALIGN(info_length, maria_block_size);
712 503 : share.base.max_key_block_length= maria_block_size;
713 503 : share.base.max_key_length=ALIGN_SIZE(max_key_length+4);
714 503 : share.base.records=ci->max_rows;
715 503 : share.base.reloc= ci->reloc_rows;
716 503 : share.base.reclength=real_reclength;
717 503 : share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM);
718 503 : share.base.max_pack_length=pack_reclength;
719 503 : share.base.min_pack_length=min_pack_length;
720 503 : share.base.pack_bytes= pack_bytes;
721 503 : share.base.fields= columns;
722 503 : share.base.pack_fields= packed;
723 :
724 503 : if (share.data_file_type == BLOCK_RECORD)
725 : {
726 : /*
727 : we are going to create a first bitmap page, set data_file_length
728 : to reflect this, before the state goes to disk
729 : */
730 370 : share.state.state.data_file_length= maria_block_size;
731 : /* Add length of packed fields + length */
732 370 : share.base.pack_reclength+= share.base.max_field_lengths+3;
733 :
734 : /* Adjust max_pack_length, to be used if we have short rows */
735 370 : if (share.base.max_pack_length < maria_block_size)
736 : {
737 238 : share.base.max_pack_length+= FLAG_SIZE;
738 238 : if (ci->transactional)
739 192 : share.base.max_pack_length+= TRANSID_SIZE * 2;
740 : }
741 : }
742 :
743 : /* max_data_file_length and max_key_file_length are recalculated on open */
744 503 : if (tmp_table)
745 0 : share.base.max_data_file_length= (my_off_t) ci->data_file_length;
746 503 : else if (ci->transactional && translog_status == TRANSLOG_OK &&
747 : !maria_in_recovery)
748 : {
749 : /*
750 : we have checked translog_inited above, because maria_chk may call us
751 : (via maria_recreate_table()) and it does not have a log.
752 : */
753 312 : sync_dir= MY_SYNC_DIR;
754 : /*
755 : If crash between _ma_state_info_write_sub() and
756 : _ma_update_state__lsns_sub(), table should be ignored by Recovery (or
757 : old REDOs would fail), so we cannot let LSNs be 0:
758 : */
759 312 : share.state.skip_redo_lsn= share.state.is_of_horizon=
760 : share.state.create_rename_lsn= LSN_MAX;
761 : }
762 :
763 503 : if (datafile_type == DYNAMIC_RECORD)
764 : {
765 53 : share.base.min_block_length=
766 : (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH &&
767 : ! share.base.blobs) ?
768 : max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) :
769 : MARIA_EXTEND_BLOCK_LENGTH;
770 : }
771 450 : else if (datafile_type == STATIC_RECORD)
772 50 : share.base.min_block_length= share.base.pack_reclength;
773 :
774 503 : if (! (flags & HA_DONT_TOUCH_DATA))
775 473 : share.state.create_time= (long) time((time_t*) 0);
776 :
777 503 : pthread_mutex_lock(&THR_LOCK_maria);
778 :
779 : /*
780 : NOTE: For test_if_reopen() we need a real path name. Hence we need
781 : MY_RETURN_REAL_PATH for every fn_format(filename, ...).
782 : */
783 503 : if (ci->index_file_name)
784 : {
785 0 : char *iext= strrchr(ci->index_file_name, '.');
786 0 : int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT);
787 0 : if (tmp_table)
788 : {
789 : char *path;
790 : /* chop off the table name, tempory tables use generated name */
791 0 : if ((path= strrchr(ci->index_file_name, FN_LIBCHAR)))
792 0 : *path= '\0';
793 0 : fn_format(filename, name, ci->index_file_name, MARIA_NAME_IEXT,
794 : MY_REPLACE_DIR | MY_UNPACK_FILENAME |
795 : MY_RETURN_REAL_PATH | MY_APPEND_EXT);
796 : }
797 : else
798 : {
799 0 : fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT,
800 : MY_UNPACK_FILENAME | MY_RETURN_REAL_PATH |
801 : (have_iext ? MY_REPLACE_EXT : MY_APPEND_EXT));
802 : }
803 0 : fn_format(linkname, name, "", MARIA_NAME_IEXT,
804 : MY_UNPACK_FILENAME|MY_APPEND_EXT);
805 0 : linkname_ptr= linkname;
806 : /*
807 : Don't create the table if the link or file exists to ensure that one
808 : doesn't accidently destroy another table.
809 : Don't sync dir now if the data file has the same path.
810 : */
811 0 : create_flag=
812 : (ci->data_file_name &&
813 : !strcmp(ci->index_file_name, ci->data_file_name)) ? 0 : sync_dir;
814 : }
815 : else
816 : {
817 503 : char *iext= strrchr(name, '.');
818 503 : int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT);
819 503 : fn_format(filename, name, "", MARIA_NAME_IEXT,
820 : MY_UNPACK_FILENAME | MY_RETURN_REAL_PATH |
821 : (have_iext ? MY_REPLACE_EXT : MY_APPEND_EXT));
822 503 : linkname_ptr= NullS;
823 : /*
824 : Replace the current file.
825 : Don't sync dir now if the data file has the same path.
826 : */
827 503 : create_flag= (flags & HA_CREATE_KEEP_FILES) ? 0 : MY_DELETE_OLD;
828 503 : create_flag|= (!ci->data_file_name ? 0 : sync_dir);
829 : }
830 :
831 : /*
832 : If a MRG_MARIA table is in use, the mapped MARIA tables are open,
833 : but no entry is made in the table cache for them.
834 : A TRUNCATE command checks for the table in the cache only and could
835 : be fooled to believe, the table is not open.
836 : Pull the emergency brake in this situation. (Bug #8306)
837 :
838 :
839 : NOTE: The filename is compared against unique_file_name of every
840 : open table. Hence we need a real path here.
841 : */
842 503 : if (_ma_test_if_reopen(filename))
843 : {
844 0 : my_printf_error(0, "MARIA table '%s' is in use "
845 : "(most likely by a MERGE table). Try FLUSH TABLES.",
846 : MYF(0), name + dirname_length(name));
847 0 : my_errno= HA_ERR_TABLE_EXIST;
848 0 : goto err;
849 : }
850 :
851 503 : if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
852 : MYF(MY_WME|create_flag))) < 0)
853 503 : goto err;
854 503 : errpos=1;
855 :
856 503 : DBUG_PRINT("info", ("write state info and base info"));
857 503 : if (_ma_state_info_write_sub(file, &share.state,
858 : MA_STATE_INFO_WRITE_FULL_INFO) ||
859 : _ma_base_info_write(file, &share.base))
860 : goto err;
861 503 : DBUG_PRINT("info", ("base_pos: %d base_info_size: %d",
862 : base_pos, MARIA_BASE_INFO_SIZE));
863 503 : DBUG_ASSERT(my_tell(file,MYF(0)) == base_pos+ MARIA_BASE_INFO_SIZE);
864 :
865 : /* Write key and keyseg definitions */
866 503 : DBUG_PRINT("info", ("write key and keyseg definitions"));
867 1771 : for (i=0 ; i < share.base.keys - uniques; i++)
868 : {
869 1268 : uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0;
870 :
871 1268 : if (_ma_keydef_write(file, &keydefs[i]))
872 1268 : goto err;
873 2689 : for (j=0 ; j < keydefs[i].keysegs-sp_segs ; j++)
874 1421 : if (_ma_keyseg_write(file, &keydefs[i].seg[j]))
875 1421 : goto err;
876 : #ifdef HAVE_SPATIAL
877 1268 : for (j=0 ; j < sp_segs ; j++)
878 : {
879 : HA_KEYSEG sseg;
880 0 : sseg.type=SPTYPE;
881 0 : sseg.language= 7; /* Binary */
882 0 : sseg.null_bit=0;
883 0 : sseg.bit_start=0;
884 0 : sseg.bit_end=0;
885 0 : sseg.bit_length= 0;
886 0 : sseg.bit_pos= 0;
887 0 : sseg.length=SPLEN;
888 0 : sseg.null_pos=0;
889 0 : sseg.start=j*SPLEN;
890 0 : sseg.flag= HA_SWAP_KEY;
891 0 : if (_ma_keyseg_write(file, &sseg))
892 0 : goto err;
893 : }
894 : #endif
895 : }
896 : /* Create extra keys for unique definitions */
897 503 : offset= real_reclength - uniques*MARIA_UNIQUE_HASH_LENGTH;
898 503 : bzero((char*) &tmp_keydef,sizeof(tmp_keydef));
899 503 : bzero((char*) &tmp_keyseg,sizeof(tmp_keyseg));
900 543 : for (i=0; i < uniques ; i++)
901 : {
902 40 : tmp_keydef.keysegs=1;
903 40 : tmp_keydef.flag= HA_UNIQUE_CHECK;
904 40 : tmp_keydef.block_length= (uint16) maria_block_size;
905 40 : tmp_keydef.keylength= MARIA_UNIQUE_HASH_LENGTH + pointer;
906 40 : tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength;
907 40 : tmp_keyseg.type= MARIA_UNIQUE_HASH_TYPE;
908 40 : tmp_keyseg.length= MARIA_UNIQUE_HASH_LENGTH;
909 40 : tmp_keyseg.start= offset;
910 40 : offset+= MARIA_UNIQUE_HASH_LENGTH;
911 40 : if (_ma_keydef_write(file,&tmp_keydef) ||
912 : _ma_keyseg_write(file,(&tmp_keyseg)))
913 : goto err;
914 : }
915 :
916 : /* Save unique definition */
917 503 : DBUG_PRINT("info", ("write unique definitions"));
918 543 : for (i=0 ; i < share.state.header.uniques ; i++)
919 : {
920 : HA_KEYSEG *keyseg_end;
921 40 : keyseg= uniquedefs[i].seg;
922 40 : if (_ma_uniquedef_write(file, &uniquedefs[i]))
923 40 : goto err;
924 40 : for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs;
925 160 : keyseg < keyseg_end;
926 80 : keyseg++)
927 : {
928 80 : switch (keyseg->type) {
929 : case HA_KEYTYPE_VARTEXT1:
930 : case HA_KEYTYPE_VARTEXT2:
931 : case HA_KEYTYPE_VARBINARY1:
932 : case HA_KEYTYPE_VARBINARY2:
933 10 : if (!(keyseg->flag & HA_BLOB_PART))
934 : {
935 10 : keyseg->flag|= HA_VAR_LENGTH_PART;
936 10 : keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
937 : keyseg->type == HA_KEYTYPE_VARBINARY1) ?
938 : 1 : 2);
939 : }
940 : break;
941 : default:
942 70 : DBUG_ASSERT((keyseg->flag & HA_VAR_LENGTH_PART) == 0);
943 : break;
944 : }
945 80 : if (_ma_keyseg_write(file, keyseg))
946 80 : goto err;
947 : }
948 : }
949 503 : DBUG_PRINT("info", ("write field definitions"));
950 503 : if (datafile_type == BLOCK_RECORD)
951 : {
952 : /* Store columns in a more efficent order */
953 : MARIA_COLUMNDEF **col_order, **pos;
954 370 : if (!(col_order= (MARIA_COLUMNDEF**) my_malloc(share.base.fields *
955 : sizeof(MARIA_COLUMNDEF*),
956 : MYF(MY_WME))))
957 370 : goto err;
958 370 : for (column= columndef, pos= col_order ;
959 2070 : column != end_column ;
960 1330 : column++, pos++)
961 1330 : *pos= column;
962 370 : qsort(col_order, share.base.fields, sizeof(*col_order),
963 : (qsort_cmp) compare_columns);
964 1700 : for (i=0 ; i < share.base.fields ; i++)
965 : {
966 1330 : column_array[col_order[i]->column_nr]= i;
967 1330 : if (_ma_columndef_write(file, col_order[i]))
968 : {
969 0 : my_free(col_order, MYF(0));
970 0 : goto err;
971 : }
972 : }
973 370 : my_free(col_order, MYF(0));
974 : }
975 : else
976 : {
977 528 : for (i=0 ; i < share.base.fields ; i++)
978 : {
979 395 : column_array[i]= (uint16) i;
980 395 : if (_ma_columndef_write(file, &columndef[i]))
981 395 : goto err;
982 : }
983 : }
984 503 : if (_ma_column_nr_write(file, column_array, columns))
985 503 : goto err;
986 :
987 503 : if ((kfile_size_before_extension= my_tell(file,MYF(0))) == MY_FILEPOS_ERROR)
988 503 : goto err;
989 : #ifndef DBUG_OFF
990 503 : if (kfile_size_before_extension != info_length)
991 0 : DBUG_PRINT("warning",("info_length: %u != used_length: %u",
992 : info_length, (uint)kfile_size_before_extension));
993 : #endif
994 :
995 503 : if (sync_dir)
996 : {
997 : /*
998 : we log the first bytes and then the size to which we extend; this is
999 : not log 1 KB of mostly zeroes if this is a small table.
1000 : */
1001 312 : char empty_string[]= "";
1002 : LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4];
1003 312 : translog_size_t total_rec_length= 0;
1004 : uint k;
1005 : LSN lsn;
1006 312 : log_array[TRANSLOG_INTERNAL_PARTS + 1].length= 1 + 2 + 2 +
1007 : (uint) kfile_size_before_extension;
1008 : /* we are needing maybe 64 kB, so don't use the stack */
1009 312 : log_data= my_malloc(log_array[TRANSLOG_INTERNAL_PARTS + 1].length, MYF(0));
1010 312 : if ((log_data == NULL) ||
1011 : my_pread(file, 1 + 2 + 2 + log_data,
1012 : (size_t) kfile_size_before_extension, 0, MYF(MY_NABP)))
1013 : goto err;
1014 : /*
1015 : remember if the data file was created or not, to know if Recovery can
1016 : do it or not, in the future
1017 : */
1018 312 : log_data[0]= test(flags & HA_DONT_TOUCH_DATA);
1019 312 : int2store(log_data + 1, kfile_size_before_extension);
1020 312 : int2store(log_data + 1 + 2, share.base.keystart);
1021 312 : log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (uchar *)name;
1022 : /* we store the end-zero, for Recovery to just pass it to my_create() */
1023 312 : log_array[TRANSLOG_INTERNAL_PARTS + 0].length= strlen(name) + 1;
1024 312 : log_array[TRANSLOG_INTERNAL_PARTS + 1].str= log_data;
1025 : /* symlink description is also needed for re-creation by Recovery: */
1026 : {
1027 312 : const char *s= ci->data_file_name ? ci->data_file_name : empty_string;
1028 312 : log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (uchar*)s;
1029 312 : log_array[TRANSLOG_INTERNAL_PARTS + 2].length= strlen(s) + 1;
1030 312 : s= ci->index_file_name ? ci->index_file_name : empty_string;
1031 312 : log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (uchar*)s;
1032 312 : log_array[TRANSLOG_INTERNAL_PARTS + 3].length= strlen(s) + 1;
1033 : }
1034 312 : for (k= TRANSLOG_INTERNAL_PARTS;
1035 1560 : k < (sizeof(log_array)/sizeof(log_array[0])); k++)
1036 1248 : total_rec_length+= (translog_size_t) log_array[k].length;
1037 : /**
1038 : For this record to be of any use for Recovery, we need the upper
1039 : MySQL layer to be crash-safe, which it is not now (that would require
1040 : work using the ddl_log of sql/sql_table.cc); when it is, we should
1041 : reconsider the moment of writing this log record (before or after op,
1042 : under THR_LOCK_maria or not...), how to use it in Recovery.
1043 : For now this record can serve when we apply logs to a backup,
1044 : so we sync it. This happens before the data file is created. If the
1045 : data file was created before, and we crashed before writing the log
1046 : record, at restart the table may be used, so we would not have a
1047 : trustable history in the log (impossible to apply this log to a
1048 : backup). The way we do it, if we crash before writing the log record
1049 : then there is no data file and the table cannot be used.
1050 : @todo Note that in case of TRUNCATE TABLE we also come here; for
1051 : Recovery to be able to finish TRUNCATE TABLE, instead of leaving a
1052 : half-truncated table, we should log the record at start of
1053 : maria_create(); for that we shouldn't write to the index file but to a
1054 : buffer (DYNAMIC_STRING), put the buffer into the record, then put the
1055 : buffer into the index file (so, change _ma_keydef_write() etc). That
1056 : would also enable Recovery to finish a CREATE TABLE. The final result
1057 : would be that we would be able to finish what the SQL layer has asked
1058 : for: it would be atomic.
1059 : When in CREATE/TRUNCATE (or DROP or RENAME or REPAIR) we have not
1060 : called external_lock(), so have no TRN. It does not matter, as all
1061 : these operations are non-transactional and sync their files.
1062 : */
1063 312 : if (unlikely(translog_write_record(&lsn,
1064 : LOGREC_REDO_CREATE_TABLE,
1065 : &dummy_transaction_object, NULL,
1066 : total_rec_length,
1067 : sizeof(log_array)/sizeof(log_array[0]),
1068 : log_array, NULL, NULL) ||
1069 : translog_flush(lsn)))
1070 312 : goto err;
1071 312 : share.kfile.file= file;
1072 312 : DBUG_EXECUTE_IF("maria_flush_whole_log",
1073 : {
1074 : DBUG_PRINT("maria_flush_whole_log", ("now"));
1075 : translog_flush(translog_get_horizon());
1076 : });
1077 312 : DBUG_EXECUTE_IF("maria_crash_create_table",
1078 : {
1079 : DBUG_PRINT("maria_crash_create_table", ("now"));
1080 : DBUG_ABORT();
1081 : });
1082 : /*
1083 : store LSN into file, needed for Recovery to not be confused if a
1084 : DROP+CREATE happened (applying REDOs to the wrong table).
1085 : */
1086 312 : if (_ma_update_state_lsns_sub(&share, lsn, trnman_get_min_safe_trid(),
1087 : FALSE, TRUE))
1088 312 : goto err;
1089 312 : my_free(log_data, MYF(0));
1090 : }
1091 :
1092 503 : if (!(flags & HA_DONT_TOUCH_DATA))
1093 : {
1094 473 : if (ci->data_file_name)
1095 : {
1096 0 : char *dext= strrchr(ci->data_file_name, '.');
1097 0 : int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT);
1098 :
1099 0 : if (tmp_table)
1100 : {
1101 : char *path;
1102 : /* chop off the table name, tempory tables use generated name */
1103 0 : if ((path= strrchr(ci->data_file_name, FN_LIBCHAR)))
1104 0 : *path= '\0';
1105 0 : fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT,
1106 : MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT);
1107 : }
1108 : else
1109 : {
1110 0 : fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT,
1111 : MY_UNPACK_FILENAME |
1112 : (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT));
1113 : }
1114 0 : fn_format(linkname, name, "",MARIA_NAME_DEXT,
1115 : MY_UNPACK_FILENAME | MY_APPEND_EXT);
1116 0 : linkname_ptr= linkname;
1117 0 : create_flag=0;
1118 : }
1119 : else
1120 : {
1121 473 : fn_format(filename,name,"", MARIA_NAME_DEXT,
1122 : MY_UNPACK_FILENAME | MY_APPEND_EXT);
1123 473 : linkname_ptr= NullS;
1124 473 : create_flag= (flags & HA_CREATE_KEEP_FILES) ? 0 : MY_DELETE_OLD;
1125 : }
1126 473 : if ((dfile=
1127 : my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
1128 : MYF(MY_WME | create_flag | sync_dir))) < 0)
1129 473 : goto err;
1130 473 : errpos=3;
1131 :
1132 473 : if (_ma_initialize_data_file(&share, dfile))
1133 503 : goto err;
1134 : }
1135 :
1136 : /* Enlarge files */
1137 503 : DBUG_PRINT("info", ("enlarge to keystart: %lu",
1138 : (ulong) share.base.keystart));
1139 503 : if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0)))
1140 503 : goto err;
1141 :
1142 503 : if (sync_dir && my_sync(file, MYF(0)))
1143 503 : goto err;
1144 :
1145 503 : if (! (flags & HA_DONT_TOUCH_DATA))
1146 : {
1147 : #ifdef USE_RELOC
1148 : if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0)))
1149 : goto err;
1150 : #endif
1151 473 : if (sync_dir && my_sync(dfile, MYF(0)))
1152 473 : goto err;
1153 473 : if (my_close(dfile,MYF(0)))
1154 503 : goto err;
1155 : }
1156 503 : pthread_mutex_unlock(&THR_LOCK_maria);
1157 503 : res= 0;
1158 503 : my_free((char*) rec_per_key_part,MYF(0));
1159 503 : errpos=0;
1160 503 : if (my_close(file,MYF(0)))
1161 0 : res= my_errno;
1162 503 : DBUG_RETURN(res);
1163 :
1164 0 : err:
1165 0 : pthread_mutex_unlock(&THR_LOCK_maria);
1166 :
1167 0 : err_no_lock:
1168 0 : save_errno=my_errno;
1169 0 : switch (errpos) {
1170 : case 3:
1171 0 : VOID(my_close(dfile,MYF(0)));
1172 : /* fall through */
1173 : case 2:
1174 0 : if (! (flags & HA_DONT_TOUCH_DATA))
1175 0 : my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT,
1176 : MY_UNPACK_FILENAME | MY_APPEND_EXT),
1177 : sync_dir);
1178 : /* fall through */
1179 : case 1:
1180 0 : VOID(my_close(file,MYF(0)));
1181 0 : if (! (flags & HA_DONT_TOUCH_DATA))
1182 0 : my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_IEXT,
1183 : MY_UNPACK_FILENAME | MY_APPEND_EXT),
1184 : sync_dir);
1185 : }
1186 0 : my_free(log_data, MYF(MY_ALLOW_ZERO_PTR));
1187 0 : my_free((char*) rec_per_key_part, MYF(0));
1188 0 : DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */
1189 : }
1190 :
1191 :
1192 : uint maria_get_pointer_length(ulonglong file_length, uint def)
1193 1117 : {
1194 1117 : DBUG_ASSERT(def >= 2 && def <= 7);
1195 1117 : if (file_length) /* If not default */
1196 : {
1197 : #ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS
1198 : if (file_length >= (ULL(1) << 56))
1199 : def=8;
1200 : else
1201 : #endif
1202 658 : if (file_length >= (ULL(1) << 48))
1203 10 : def=7;
1204 648 : else if (file_length >= (ULL(1) << 40))
1205 172 : def=6;
1206 476 : else if (file_length >= (ULL(1) << 32))
1207 252 : def=5;
1208 224 : else if (file_length >= (ULL(1) << 24))
1209 56 : def=4;
1210 168 : else if (file_length >= (ULL(1) << 16))
1211 82 : def=3;
1212 : else
1213 86 : def=2;
1214 : }
1215 1117 : return def;
1216 : }
1217 :
1218 :
1219 : /*
1220 : Sort columns for records-in-block
1221 :
1222 : IMPLEMENTATION
1223 : Sort columns in following order:
1224 :
1225 : Fixed size, not null columns
1226 : Fixed length, null fields
1227 : Numbers (zero fill fields)
1228 : Variable length fields (CHAR, VARCHAR) according to length
1229 : Blobs
1230 :
1231 : For same kind of fields, keep fields in original order
1232 : */
1233 :
1234 : static inline int sign(long a)
1235 889 : {
1236 889 : return a < 0 ? -1 : (a > 0 ? 1 : 0);
1237 : }
1238 :
1239 :
1240 : static int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr)
1241 1604 : {
1242 1604 : MARIA_COLUMNDEF *a= *a_ptr, *b= *b_ptr;
1243 : enum en_fieldtype a_type, b_type;
1244 :
1245 1604 : a_type= (a->type == FIELD_CHECK) ? FIELD_NORMAL : a->type;
1246 1604 : b_type= (b->type == FIELD_CHECK) ? FIELD_NORMAL : b->type;
1247 :
1248 1604 : if (a_type == FIELD_NORMAL && !a->null_bit)
1249 : {
1250 1089 : if (b_type != FIELD_NORMAL || b->null_bit)
1251 282 : return -1;
1252 807 : return sign((long) a->offset - (long) b->offset);
1253 : }
1254 515 : if (b_type == FIELD_NORMAL && !b->null_bit)
1255 257 : return 1;
1256 258 : if (a_type == b_type)
1257 82 : return sign((long) a->offset - (long) b->offset);
1258 176 : if (a_type == FIELD_NORMAL)
1259 0 : return -1;
1260 176 : if (b_type == FIELD_NORMAL)
1261 0 : return 1;
1262 176 : if (a_type == FIELD_SKIP_ZERO)
1263 120 : return -1;
1264 56 : if (b_type == FIELD_SKIP_ZERO)
1265 0 : return 1;
1266 56 : if (a->type != FIELD_BLOB && b->type != FIELD_BLOB)
1267 0 : if (a->length != b->length)
1268 0 : return sign((long) a->length - (long) b->length);
1269 56 : if (a_type == FIELD_BLOB)
1270 0 : return 1;
1271 56 : if (b_type == FIELD_BLOB)
1272 56 : return -1;
1273 0 : return sign((long) a->offset - (long) b->offset);
1274 : }
1275 :
1276 :
1277 : /**
1278 : @brief Initialize data file
1279 :
1280 : @note
1281 : In BLOCK_RECORD, a freshly created datafile is one page long; while in
1282 : other formats it is 0-byte long.
1283 : */
1284 :
1285 : int _ma_initialize_data_file(MARIA_SHARE *share, File dfile)
1286 623 : {
1287 623 : if (share->data_file_type == BLOCK_RECORD)
1288 : {
1289 514 : share->bitmap.block_size= share->base.block_size;
1290 514 : share->bitmap.file.file = dfile;
1291 514 : return _ma_bitmap_create_first(share);
1292 : }
1293 109 : return 0;
1294 : }
1295 :
1296 :
1297 : /**
1298 : @brief Writes create_rename_lsn, skip_redo_lsn and is_of_horizon to disk,
1299 : can force.
1300 :
1301 : This is for special cases where:
1302 : - we don't want to write the full state to disk (so, not call
1303 : _ma_state_info_write()) because some parts of the state may be
1304 : currently inconsistent, or because it would be overkill
1305 : - we must sync these LSNs immediately for correctness.
1306 : It acquires intern_lock to protect the LSNs and state write.
1307 :
1308 : @param share table's share
1309 : @param lsn LSN to write to log files
1310 : @param create_trid Trid to be used as state.create_trid
1311 : @param do_sync if the write should be forced to disk
1312 : @param update_create_rename_lsn if this LSN should be updated or not
1313 :
1314 : @return Operation status
1315 : @retval 0 ok
1316 : @retval 1 error (disk problem)
1317 : */
1318 :
1319 : int _ma_update_state_lsns(MARIA_SHARE *share, LSN lsn, TrID create_trid,
1320 : my_bool do_sync, my_bool update_create_rename_lsn)
1321 2 : {
1322 : int res;
1323 2 : pthread_mutex_lock(&share->intern_lock);
1324 2 : res= _ma_update_state_lsns_sub(share, lsn, create_trid, do_sync,
1325 : update_create_rename_lsn);
1326 2 : pthread_mutex_unlock(&share->intern_lock);
1327 2 : return res;
1328 : }
1329 :
1330 :
1331 : /**
1332 : @brief Writes create_rename_lsn, skip_redo_lsn and is_of_horizon to disk,
1333 : can force.
1334 :
1335 : Shortcut of _ma_update_state_lsns() when we know that intern_lock is not
1336 : needed (when creating a table or opening it for the first time).
1337 :
1338 : @param share table's share
1339 : @param lsn LSN to write to state; if LSN_IMPOSSIBLE, write
1340 : a LOGREC_IMPORTED_TABLE and use its LSN as lsn.
1341 : @param create_trid Trid to be used as state.create_trid
1342 : @param do_sync if the write should be forced to disk
1343 : @param update_create_rename_lsn if this LSN should be updated or not
1344 :
1345 : @return Operation status
1346 : @retval 0 ok
1347 : @retval 1 error (disk problem)
1348 : */
1349 :
1350 : #if (_MSC_VER == 1310)
1351 : /*
1352 : Visual Studio 2003 compiler produces internal compiler error
1353 : in this function. Disable optimizations to workaround.
1354 : */
1355 : #pragma optimize("",off)
1356 : #endif
1357 : int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn, TrID create_trid,
1358 : my_bool do_sync,
1359 : my_bool update_create_rename_lsn)
1360 314 : {
1361 : uchar buf[LSN_STORE_SIZE * 3], *ptr;
1362 : uchar trid_buff[8];
1363 314 : File file= share->kfile.file;
1364 314 : DBUG_ASSERT(file >= 0);
1365 :
1366 314 : if (lsn == LSN_IMPOSSIBLE)
1367 : {
1368 : int res;
1369 : LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
1370 : /* table name is logged only for information */
1371 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].str=
1372 : (uchar *)(share->open_file_name.str);
1373 0 : log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
1374 : share->open_file_name.length + 1;
1375 0 : if ((res= translog_write_record(&lsn, LOGREC_IMPORTED_TABLE,
1376 : &dummy_transaction_object, NULL,
1377 : (translog_size_t)
1378 : log_array[TRANSLOG_INTERNAL_PARTS +
1379 : 0].length,
1380 : sizeof(log_array)/sizeof(log_array[0]),
1381 : log_array, NULL, NULL)))
1382 0 : return res;
1383 : }
1384 :
1385 1256 : for (ptr= buf; ptr < (buf + sizeof(buf)); ptr+= LSN_STORE_SIZE)
1386 942 : lsn_store(ptr, lsn);
1387 314 : share->state.skip_redo_lsn= share->state.is_of_horizon= lsn;
1388 314 : share->state.create_trid= create_trid;
1389 314 : mi_int8store(trid_buff, create_trid);
1390 314 : if (update_create_rename_lsn)
1391 : {
1392 312 : share->state.create_rename_lsn= lsn;
1393 312 : if (share->id != 0)
1394 : {
1395 : /*
1396 : If OP is the operation which is calling us, if table is later written,
1397 : we could see in the log:
1398 : FILE_ID ... REDO_OP ... REDO_INSERT.
1399 : (that can happen in real life at least with OP=REPAIR).
1400 : As FILE_ID will be ignored by Recovery because it is <
1401 : create_rename_lsn, REDO_INSERT would be ignored too, wrongly.
1402 : To avoid that, we force a LOGREC_FILE_ID to be logged at next write:
1403 : */
1404 0 : translog_deassign_id_from_share(share);
1405 : }
1406 : }
1407 : else
1408 2 : lsn_store(buf, share->state.create_rename_lsn);
1409 314 : return (my_pwrite(file, buf, sizeof(buf),
1410 : sizeof(share->state.header) +
1411 : MARIA_FILE_CREATE_RENAME_LSN_OFFSET, MYF(MY_NABP)) ||
1412 : my_pwrite(file, trid_buff, sizeof(trid_buff),
1413 : sizeof(share->state.header) +
1414 : MARIA_FILE_CREATE_TRID_OFFSET, MYF(MY_NABP)) ||
1415 : (do_sync && my_sync(file, MYF(0))));
1416 : }
1417 : #if (_MSC_VER == 1310)
1418 : #pragma optimize("",on)
1419 : #endif /*VS2003 compiler bug workaround*/
|