← Back to team overview

maria-developers team mailing list archive

Review for maria/10.0-serg/revision/3899 (MDEV-5248 Serious incompatibility and data corruption of DATETIME and DATE types due to get_innobase_type_from_mysql_type refactor combined with InnoDB Online DDL)

 

Serg,

Thanks for looking into this. General comments:

   - The approach overall looks sound. I am very much in favor of reverting
   back to Oracle MySQL's type codes as you have, by reverting the refactor
   of get_innobase_type_from_mysql_type as you did.
   - You've mixed tabs and spaces in a bunch of places. InnoDB code should
   use tabs to indent.
   - It would probably be worth visually comparing to MySQL
   5.6's get_innobase_type_from_mysql_type to ensure there is nothing new
   there aside from my comments.
   - It is almost certainly worth creating test cases with data files from
   older MariaDB and newer MySQL versions to exercise the usage of the code
   paths that only occur during upgrade; that doesn't need to happen here
   necessarily.

Comments on the diff inline below.

Regards,

Jeremy

(I removed the .result file differences as any issues there will be
resolved by fixes to the .test.)

=== renamed file 'mysql-test/suite/innodb/t/1byte_data_int.opt' =>
> 'mysql-test/suite/innodb/t/data_types.opt'
>
=== renamed file 'mysql-test/suite/innodb/t/1byte_data_int.test' =>
> 'mysql-test/suite/innodb/t/data_types.test'
> --- mysql-test/suite/innodb/t/1byte_data_int.test 2013-10-31 22:20:05
> +0000
> +++ mysql-test/suite/innodb/t/data_types.test 2013-11-11 20:54:09 +0000
> @@ -1,25 +1,76 @@
>  --source include/have_innodb.inc
>
>

I would strongly suggest a detailed explanatory comment and warning added
with "--echo #" so that it appears in the result file as well, explaining
the repercussions changing any of the results here.


> ---echo # Create a table with a 1-byte ENUM, 1-byte SET, and TINYINT
> UNSIGNED.
> -
>  CREATE TABLE t1
>  (
> -  t1_enum       ENUM("a", "b", "c"),
> -  t1_set        SET("a", "b", "c"),
> -  t1_tinyint_s  TINYINT,
> -  t1_tinyint_u  TINYINT UNSIGNED
> +  t1_BIGINT BIGINT,
> +  t1_BIGINT_UNSIGNED BIGINT UNSIGNED,
> +  t1_BINARY_100 BINARY(100),
> +  t1_BIT_2 BIT(2),
> +  t1_BIT_20 BIT(20),
> +  t1_BLOB BLOB,
> +  t1_CHAR_100 CHAR(100),
> +  t1_CHAR_100_BINARY CHAR(100) BINARY,
> +  t1_DATE DATE,
> +  t1_DATETIME DATETIME,
> +  t1_DATETIME_6 DATETIME(6),
> +  t1_DECIMAL_10_3 DECIMAL(10,3),
> +  t1_DECIMAL_10_3_UNSIGNED DECIMAL(10,3) UNSIGNED,
> +  t1_DOUBLE DOUBLE,
> +  t1_DOUBLE_UNSIGNED DOUBLE UNSIGNED,
> +  t1_ENUM       ENUM("a", "b", "c"),
> +  t1_ENUM_BINARY ENUM('a','b') BINARY,
>

Why the mix of quote styles? For completeness this should also use an ENUM
with  > 254 elements to create a 2-byte ENUM.


> +  t1_FLOAT FLOAT,
> +  t1_FLOAT_UNSIGNED FLOAT UNSIGNED,
> +  t1_INT INT,
> +  t1_INT_UNSIGNED INT UNSIGNED,
> +  t1_LONGBLOB LONGBLOB,
> +  t1_LONGTEXT LONGTEXT,
> +  t1_MEDIUMBLOB MEDIUMBLOB,
> +  t1_MEDIUMINT MEDIUMINT,
> +  t1_MEDIUMINT_UNSIGNED MEDIUMINT UNSIGNED,
> +  t1_MEDIUMTEXT MEDIUMTEXT,
> +  t1_SET        SET("a", "b", "c"),
> +  t1_SET_BINARY SET('a','b') BINARY,
>

Same mix of quote styles. Also should create a SET with > 8 elements to
create a 2-byte SET.


> +  t1_SMALLINT SMALLINT,
> +  t1_SMALLINT_UNSIGNED SMALLINT UNSIGNED,
> +  t1_TEXT TEXT,
> +  t1_TIME TIME,
> +  t1_TIMESTAMP TIMESTAMP,
> +  t1_TIMESTAMP_5 TIMESTAMP(5),
> +  t1_TIME_4 TIME(4),
>

Maybe move up to TIME above (I guess these are all alphabetically sorted,
but it may be better to sort only by the type [or add e.g. _0 to the base
one]). Why 4 and not 3 or 6 or some other value?


> +  t1_TINYBLOB TINYBLOB,
> +  t1_TINYINT  TINYINT,
> +  t1_TINYINT_UNSIGNED TINYINT UNSIGNED,
> +  t1_TINYTEXT TINYTEXT,
> +  t1_VARBINARY_100 VARBINARY(100),
> +  t1_VARCHAR_10 VARCHAR(10),
> +  t1_VARCHAR_10_BINARY VARCHAR(10) BINARY,
> +  t1_VARCHAR_500 VARCHAR(500),
> +  t1_VARCHAR_500_BINARY VARCHAR(500) BINARY,
> +  t1_YEAR_2 YEAR(2),
> +  t1_YEAR_4 YEAR(4)
>  ) ENGINE=InnoDB;
>
> ---echo # All t1 fields' mtypes should be 6 (DATA_INT).
> -
>  SELECT
> -  name,
> -  mtype,
> -  (prtype & 512) = 512 AS is_unsigned
> +  name, CASE mtype WHEN 1 THEN "DATA_VARCHAR"
>

Put CASE on its own line (distinct from "name") and move the first WHEN to
its own line. Re-indent other WHEN's to match.


> +             WHEN 2 THEN "DATA_CHAR"
> +             WHEN 3 THEN "DATA_FIXBINARY"
> +             WHEN 4 THEN "DATA_BINARY"
> +             WHEN 5 THEN "DATA_BLOB"
> +             WHEN 6 THEN "DATA_INT"
> +             WHEN 7 THEN "DATA_SYS_CHILD"
> +             WHEN 8 THEN "DATA_SYS"
> +             WHEN 9 THEN "DATA_FLOAT"
> +             WHEN 10 THEN "DATA_DOUBLE"
> +             WHEN 11 THEN "DATA_DECIMAL"
> +             WHEN 12 THEN "DATA_VARMYSQL"
> +             WHEN 13 THEN "DATA_MYSQL"
> +             WHEN 63 THEN "DATA_MTYPE_MAX"
> +             ELSE mtype
> +        END,
>

Add an alias e.g. AS base_type so that the result's column name isn't a
truncated version of this CASE.


> +  IF((prtype & 512) = 512,"UNSIGNED","") AS is_unsigned
>

It's a bit weird to have is_unsigned be "UNSIGNED" or empty. Maybe YES/NO
instead? Not a big deal I suppose.


>  FROM information_schema.INNODB_SYS_COLUMNS
>  WHERE name LIKE "t1\_%"
>  ORDER BY name;
>
> ---echo # Cleanup
> -
>  DROP TABLE t1;
> === modified file 'storage/innobase/dict/dict0stats.cc'
> --- storage/innobase/dict/dict0stats.cc 2013-06-22 15:47:12 +0000
> +++ storage/innobase/dict/dict0stats.cc 2013-11-11 20:54:09 +0000
> @@ -176,7 +176,7 @@
>   DATA_NOT_NULL, 192},
>
>   {"last_update", DATA_INT,
> - DATA_NOT_NULL | DATA_UNSIGNED, 4},
> + DATA_NOT_NULL, 4},
>
>   {"n_rows", DATA_INT,
>   DATA_NOT_NULL | DATA_UNSIGNED, 8},
> @@ -207,7 +207,7 @@
>   DATA_NOT_NULL, 192},
>
>   {"last_update", DATA_INT,
> - DATA_NOT_NULL | DATA_UNSIGNED, 4},
> + DATA_NOT_NULL, 4},
>
>   {"stat_name", DATA_VARMYSQL,
>   DATA_NOT_NULL, 64*3},
> === modified file 'storage/innobase/handler/ha_innodb.cc'
> --- storage/innobase/handler/ha_innodb.cc 2013-11-03 23:45:27 +0000
> +++ storage/innobase/handler/ha_innodb.cc 2013-11-11 20:54:09 +0000
> @@ -5745,67 +5745,94 @@
>   8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
>   the type */
>
> - compile_time_assert((ulint)MYSQL_TYPE_STRING < 256);
> - compile_time_assert((ulint)MYSQL_TYPE_VAR_STRING < 256);
> - compile_time_assert((ulint)MYSQL_TYPE_DOUBLE < 256);
> - compile_time_assert((ulint)MYSQL_TYPE_FLOAT < 256);
> - compile_time_assert((ulint)MYSQL_TYPE_DECIMAL < 256);
> -
> - *unsigned_flag = 0;
> -
> - switch (field->key_type()) {
> - case HA_KEYTYPE_USHORT_INT:
> - case HA_KEYTYPE_ULONG_INT:
> - case HA_KEYTYPE_UINT24:
> - case HA_KEYTYPE_ULONGLONG:
> + DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
> + DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
> + DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
> + DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
> + DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
> +
> + if (field->flags & UNSIGNED_FLAG) {
> +
>   *unsigned_flag = DATA_UNSIGNED;
> - /* fall through */
> - case HA_KEYTYPE_SHORT_INT:
> - case HA_KEYTYPE_LONG_INT:
> - case HA_KEYTYPE_INT24:
> - case HA_KEYTYPE_INT8:
> - case HA_KEYTYPE_LONGLONG:
> + } else {
> + *unsigned_flag = 0;
> + }
> +
> + if (field->real_type() == MYSQL_TYPE_ENUM
> + || field->real_type() == MYSQL_TYPE_SET) {
> +
> + /* MySQL has field->type() a string type for these, but the
> + data is actually internally stored as an unsigned integer
> + code! */
> +
> + *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
> + flag set to zero, even though
> + internally this is an unsigned
> + integer type */
>   return(DATA_INT);
> - case HA_KEYTYPE_FLOAT:
> - return(DATA_FLOAT);
> - case HA_KEYTYPE_DOUBLE:
> - return(DATA_DOUBLE);
> - case HA_KEYTYPE_BINARY:
> -                if (field->type() == MYSQL_TYPE_TINY ||
> -                    field->real_type() == MYSQL_TYPE_ENUM ||
> -                    field->real_type() == MYSQL_TYPE_SET
> -                    )
> -                { // compatibility workaround
> -                 *unsigned_flag= DATA_UNSIGNED;
> -                 return DATA_INT;
> -                }
> - return(DATA_FIXBINARY);
> - case HA_KEYTYPE_VARBINARY2:
> - if (field->type() != MYSQL_TYPE_VARCHAR)
> - return(DATA_BLOB);
> - /* fall through */
> - case HA_KEYTYPE_VARBINARY1:
> - return(DATA_BINARY);
> - case HA_KEYTYPE_VARTEXT2:
> - if (field->type() != MYSQL_TYPE_VARCHAR)
> - return(DATA_BLOB);
> - /* fall through */
> - case HA_KEYTYPE_VARTEXT1:
> - if (field->charset() == &my_charset_latin1) {
> + }
> +
> + switch (field->type()) {
> + /* NOTE that we only allow string types in DATA_MYSQL and
> + DATA_VARMYSQL */
> + case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
> + case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
> + if (field->binary()) {
> + return(DATA_BINARY);
> + } else if (strcmp(field->charset()->name,
> +  "latin1_swedish_ci") == 0) {
>   return(DATA_VARCHAR);
>   } else {
>   return(DATA_VARMYSQL);
>   }
> - case HA_KEYTYPE_TEXT:
> - if (field->charset() == &my_charset_latin1) {
> + case MYSQL_TYPE_BIT:
> + case MYSQL_TYPE_STRING: if (field->binary()) {
> +
> + return(DATA_FIXBINARY);
> + } else if (strcmp(field->charset()->name,
> +  "latin1_swedish_ci") == 0) {
>   return(DATA_CHAR);
>   } else {
>   return(DATA_MYSQL);
>   }
> - case HA_KEYTYPE_NUM:
> + case MYSQL_TYPE_NEWDECIMAL:
> + return(DATA_FIXBINARY);
> + case MYSQL_TYPE_LONG:
> + case MYSQL_TYPE_LONGLONG:
> + case MYSQL_TYPE_TINY:
> + case MYSQL_TYPE_SHORT:
> + case MYSQL_TYPE_INT24:
> + case MYSQL_TYPE_DATE:
> + case MYSQL_TYPE_YEAR:
> + case MYSQL_TYPE_NEWDATE:
> + return(DATA_INT);
> + case MYSQL_TYPE_TIMESTAMP:
> + *unsigned_flag = 0;
> +                /* fall through */
> + case MYSQL_TYPE_TIME:
> + case MYSQL_TYPE_DATETIME:
> + if (field->key_type() == HA_KEYTYPE_BINARY)
> + return(DATA_FIXBINARY);
> +                else
> + return(DATA_INT);
>

You also need to support MYSQL_TYPE_{TIMESTAMP,TIME,DATETIME}2 for MySQL
5.6 here.


> + case MYSQL_TYPE_FLOAT:
> + return(DATA_FLOAT);
> + case MYSQL_TYPE_DOUBLE:
> + return(DATA_DOUBLE);
> + case MYSQL_TYPE_DECIMAL:
>   return(DATA_DECIMAL);
> - case HA_KEYTYPE_BIT:
> - case HA_KEYTYPE_END:
> + case MYSQL_TYPE_GEOMETRY:
> + case MYSQL_TYPE_TINY_BLOB:
> + case MYSQL_TYPE_MEDIUM_BLOB:
> + case MYSQL_TYPE_BLOB:
> + case MYSQL_TYPE_LONG_BLOB:
> + return(DATA_BLOB);
> + case MYSQL_TYPE_NULL:
> + /* MySQL currently accepts "NULL" datatype, but will
> + reject such datatype in the next release. We will cope
> + with it and not trigger assertion failure in 5.1 */
> + break;
> + default:
>   ut_error;
>   }
>
> === modified file 'storage/innobase/handler/handler0alter.cc'
> --- storage/innobase/handler/handler0alter.cc 2013-09-21 08:14:42 +0000
> +++ storage/innobase/handler/handler0alter.cc 2013-11-11 20:54:09 +0000
> @@ -347,6 +347,32 @@
>   }
>   }
>
> +        /*
> +          InnoDB in different MariaDB versions was generating different
> mtype
> +          codes for certain types. Also signed/unsigned bit was generated
> +          differently too.
>

It may be more clear to merge these two sentences into one.


> +
> +          Online ALTER would change the mtype/unsigned_flag (to what the
> +          current code generates) without changing the underlying data
> +          represenation, and it might result in data corruption.
> +
> +          Don't do online ALTER if mtype/unsigned_flag are wrong.
> +        */
> + for (ulint i = 0; i < table->s->fields; i++) {
> + const Field* field = table->field[i];
> + const dict_col_t* col = dict_table_get_nth_col(prebuilt->table, i);
> + ulint unsigned_flag;
> + if (col->mtype != get_innobase_type_from_mysql_type(&unsigned_flag,
> field)) {
> +
> + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
> +                }
> +
> + if ((col->prtype & DATA_UNSIGNED) != unsigned_flag) {
> +
> + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
> +                }
> +        }
> +
>

This looks reasonable to me.


>   /* We should be able to do the operation in-place.
>   See if we can do it online (LOCK=NONE). */
>   bool online = true;

Follow ups