← Back to team overview

maria-developers team mailing list archive

Re: 46734eb1632: MDEV-371 long unique

 

Hi, Sachin!

On Feb 14, Sachin Setiya wrote:
> revision-id: 46734eb1632 (mariadb-10.4.1-97-g46734eb1632)
> parent(s): acf4269a5b1
> author: Sachin <sachin.setiya@xxxxxxxxxxx>
> committer: Sachin <sachin.setiya@xxxxxxxxxxx>
> timestamp: 2019-02-12 19:01:17 +0530
> message:


> diff --git a/mysql-test/main/long_unique.result b/mysql-test/main/long_unique.result
> --- /dev/null
> +++ b/mysql-test/main/long_unique.result
> @@ -0,0 +1,1391 @@

please add a test with 64 indexes. Say, one long unique
and 63 normal indexes on ints or whatever.
The point is to show that long unique creates internally only _one_
index, not two, so it does not limit user's ability to
create all MAX_INDEXES indexes

> +#Structure of tests
> +#First we will check all option for
> +#table containing single unique column
> +#table containing keys like unique(a,b,c,d) etc
> +#then table containing 2 blob unique etc
> +set @allowed_packet= @@max_allowed_packet;
> +#table with single long blob column;
> +create table t1(a blob  unique );
> +insert into t1 values(1),(2),(3),(56),('sachin'),('maria'),(123456789034567891),(null),(null),(123456789034567890);
> +#blob with primary key not allowed
> +create table t2(a blob,primary key(a(10000)));
> +ERROR 42000: Specified key was too long; max key length is 1000 bytes
> +create table t3(a varchar(10000) primary key);
> +ERROR 42000: Specified key was too long; max key length is 1000 bytes
> +insert into t1 values(2);
> +ERROR 23000: Duplicate entry '2' for key 'a'
> +#table structure;
> +desc t1;
> +Field	Type	Null	Key	Default	Extra
> +a	blob	YES	UNI	NULL	
> +show create table t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `a` blob DEFAULT NULL,
> +  UNIQUE KEY `a` (`a`)
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1
> +show keys from t1;
> +Table	t1
> +Non_unique	0
> +Key_name	a
> +Seq_in_index	1
> +Column_name	a
> +Collation	A
> +Cardinality	NULL
> +Sub_part	NULL
> +Packed	NULL
> +Null	YES
> +Index_type	HASH
> +Comment	
> +Index_comment	
> +
> +MyISAM file:         DATADIR/test/t1
> +Record format:       Packed
> +Character set:       latin1_swedish_ci (8)
> +Data records:                   10  Deleted blocks:                 0
> +Recordlength:                   12
> +
> +table description:
> +Key Start Len Index   Type
> +1   12    8   multip. ulonglong NULL       
> +select * from information_schema.columns where table_schema = 'test' and table_name = 't1';
> +TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	COLUMN_NAME	ORDINAL_POSITION	COLUMN_DEFAULT	IS_NULLABLE	DATA_TYPE	CHARACTER_MAXIMUM_LENGTH	CHARACTER_OCTET_LENGTH	NUMERIC_PRECISION	NUMERIC_SCALE	DATETIME_PRECISION	CHARACTER_SET_NAME	COLLATION_NAME	COLUMN_TYPE	COLUMN_KEY	EXTRA	PRIVILEGES	COLUMN_COMMENT	IS_GENERATED	GENERATION_EXPRESSION
> +def	test	t1	a	1	NULL	YES	blob	65535	65535	NULL	NULL	NULL	NULL	NULL	blob	UNI		select,insert,update,references		NEVER	NULL
> +select * from information_schema.statistics where table_schema = 'test' and table_name = 't1';
> +TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	NON_UNIQUE	INDEX_SCHEMA	INDEX_NAME	SEQ_IN_INDEX	COLUMN_NAME	COLLATION	CARDINALITY	SUB_PART	PACKED	NULLABLE	INDEX_TYPE	COMMENT	INDEX_COMMENT
> +def	test	t1	0	test	a	1	a	A	NULL	NULL	NULL	YES	HASH		
> +select   * from information_schema.key_column_usage where table_schema= 'test' and table_name= 't1';
> +CONSTRAINT_CATALOG	def
> +CONSTRAINT_SCHEMA	test
> +CONSTRAINT_NAME	a
> +TABLE_CATALOG	def
> +TABLE_SCHEMA	test
> +TABLE_NAME	t1
> +COLUMN_NAME	a
> +ORDINAL_POSITION	1
> +POSITION_IN_UNIQUE_CONSTRAINT	NULL
> +REFERENCED_TABLE_SCHEMA	NULL
> +REFERENCED_TABLE_NAME	NULL
> +REFERENCED_COLUMN_NAME	NULL
> +# table select we should not be able to see db_row_hash_column;
> +select * from t1 order by a;
> +a
> +NULL
> +NULL
> +1
> +123456789034567890
> +123456789034567891
> +2
> +3
> +56
> +maria
> +sachin
> +select db_row_hash_1 from t1;
> +ERROR 42S22: Unknown column 'db_row_hash_1' in 'field list'
> +#duplicate entry test;
> +insert into t1 values(2);
> +ERROR 23000: Duplicate entry '2' for key 'a'
> +insert into t1 values('sachin');
> +ERROR 23000: Duplicate entry 'sachin' for key 'a'
> +insert into t1 values(123456789034567891);
> +ERROR 23000: Duplicate entry '123456789034567891' for key 'a'
> +select * from t1 order by a;
> +a
> +NULL
> +NULL
> +1
> +123456789034567890
> +123456789034567891
> +2
> +3
> +56
> +maria
> +sachin
> +insert into t1 values(11),(22),(33);
> +insert into t1 values(12),(22);
> +ERROR 23000: Duplicate entry '22' for key 'a'
> +select * from t1 order by a;
> +a
> +NULL
> +NULL
> +1
> +11
> +12
> +123456789034567890
> +123456789034567891
> +2
> +22
> +3
> +33
> +56
> +maria
> +sachin
> +insert into t1 values(repeat('s',4000*10)),(repeat('s',4001*10));
> +insert into t1 values(repeat('m',4000*10)),(repeat('m',4000*10));
> +ERROR 23000: Duplicate entry 'mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm' for key 'a'
> +insert into t1 values(repeat('m',4001)),(repeat('m',4002));
> +truncate table t1;
> +insert into t1 values(1),(2),(3),(4),(5),(8),(7);
> +
> +MyISAM file:         DATADIR/test/t1
> +Record format:       Packed
> +Character set:       latin1_swedish_ci (8)
> +Data records:                    7  Deleted blocks:                 0
> +Recordlength:                   12
> +
> +table description:
> +Key Start Len Index   Type
> +1   12    8   multip. ulonglong NULL       
> +#now some alter commands;
> +alter table t1 add column b int;
> +desc t1;
> +Field	Type	Null	Key	Default	Extra
> +a	blob	YES	UNI	NULL	
> +b	int(11)	YES		NULL	
> +show create table t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `a` blob DEFAULT NULL,
> +  `b` int(11) DEFAULT NULL,
> +  UNIQUE KEY `a` (`a`(65535))

hmm, again, (65535) appeared from nowhere

> +) ENGINE=MyISAM DEFAULT CHARSET=latin1
> +insert into t1 values(1,2);
> +ERROR 23000: Duplicate entry '1' for key 'a'
> +insert into t1 values(2,2);
> +ERROR 23000: Duplicate entry '2' for key 'a'
> +select db_row_hash_1 from t1;
> +ERROR 42S22: Unknown column 'db_row_hash_1' in 'field list'
> +#now try to change db_row_hash_1 column;
> +alter table t1 drop column db_row_hash_1;
> +ERROR 42000: Can't DROP COLUMN `db_row_hash_1`; check that it exists
> +alter table t1 add column d int , add column e int , drop column db_row_hash_1;
...
> +insert into t2 values(1,1);
> +insert into t2 values(2,1);
> +drop table t2;
> +#not null test //todo solve warnings

is that solved? I don't see any warnings

> +create table t1(a blob unique not null);
> +desc t1;
> +Field	Type	Null	Key	Default	Extra
> +a	blob	NO	UNI	NULL	
> +show create table t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `a` blob NOT NULL,
> +  UNIQUE KEY `a` (`a`)
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1
> +insert into t1 values(1);
> +insert into t1 values(3);
> +insert into t1 values(1);
> +ERROR 23000: Duplicate entry '1' for key 'a'
> +drop table t1;
> +create table t1(a int primary key, b blob unique , c blob unique not null);
> +insert into t1 values(1,1,1);
> +insert into t1 values(2,1,2);
> +ERROR 23000: Duplicate entry '1' for key 'b'
> +insert into t1 values(3,3,1);
> +ERROR 23000: Duplicate entry '1' for key 'c'
> +drop table t1;
> +create table t1 (a blob unique not null, b blob not null, c blob not null, unique(b,c));
> +desc t1;
> +Field	Type	Null	Key	Default	Extra
> +a	blob	NO	UNI	NULL	
> +b	blob	NO	MUL	NULL	
> +c	blob	NO		NULL	
> +show create table t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `a` blob NOT NULL,
> +  `b` blob NOT NULL,
> +  `c` blob NOT NULL,
> +  UNIQUE KEY `a` (`a`),
> +  UNIQUE KEY `b` (`b`,`c`)
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1
> +insert into t1 values (1, 2, 3);
> +insert into t1 values (2, 1, 3);
> +insert into t1 values (2, 1, 3);
> +ERROR 23000: Duplicate entry '2' for key 'a'
> +drop table t1;
> +#partition
> +create table t1(a blob unique) partition by hash(a);
> +ERROR HY000: A BLOB field is not allowed in partition function
> +#key length > 2^15 -1
> +create table t1(a blob, unique(a(100001)));
> +ERROR 42000: Specified key was too long; max key length is 1000 bytes

better to say "max length is 32767 bytes"

or, may be, a new error message "key segment length is 32767 bytes"?

> +set @@GLOBAL.max_allowed_packet= @allowed_packet;
> diff --git a/mysql-test/main/long_unique_debug.result b/mysql-test/main/long_unique_debug.result
> --- /dev/null
> +++ b/mysql-test/main/long_unique_debug.result
> @@ -0,0 +1,574 @@
> +#In this test case we will check what will happen in the case of hash collision
> +SET debug_dbug="d,same_long_unique_hash";
> +create table t1(a blob unique);
> +insert into t1 values('xyz');
> +insert into t1 values('abc');
> +insert into t1 values('sachin');
> +insert into t1 values('sachin');
> +ERROR 23000: Duplicate entry 'sachin' for key 'a'
> +insert into t1 values('maria');
> +insert into t1 values('maria');
> +ERROR 23000: Duplicate entry 'maria' for key 'a'

I think you can use SHOW STATUS LIKE 'Handler_read_key'
(or some other Handler_% variable)
to verify here that there was indeed a hash collision - that is, this
variable counts row reads and you have an extra row read here, so it
should be visible.

Try: add FLUSH STATUS at the beginning of the test,
SHOW STATUS LIKE 'Handler_read_key' after errors
and run the test with and without d,same_long_unique_hash
if there'll be a difference, let's keep SHOW STATUS in the test.

> +drop table t1;
> +create table t1(a blob unique, b blob unique);
> +insert into t1 values('xyz', 11);
> +insert into t1 values('abc', 22);
> +insert into t1 values('sachin', 1);
> +insert into t1 values('sachin', 4);
> +ERROR 23000: Duplicate entry 'sachin' for key 'a'
> +insert into t1 values('maria', 2);
> +insert into t1 values('maria', 3);
> +ERROR 23000: Duplicate entry 'maria' for key 'a'
> +drop table t1;
> +create table t1(a blob , b blob , unique(a,b));
> +insert into t1 values('xyz', 11);
> +insert into t1 values('abc', 22);
> +insert into t1 values('sachin', 1);
> +insert into t1 values('sachin', 1);
> +ERROR 23000: Duplicate entry 'sachin-1' for key 'a'
> +insert into t1 values('maria', 2);
> +insert into t1 values('maria', 2);
> +ERROR 23000: Duplicate entry 'maria-2' for key 'a'
> +drop table t1;
> +##Internal State of long unique tables
> +SET debug_dbug="d,print_long_unique_internal_state";
> +create table t1 ( a blob unique);
> +Warnings:
> +Note	1	Printing Table state, It will print table fields, fields->offset,field->null_bit, field->null_pos and key_info ... 
> +
> +Printing  Table  keyinfo
> +
> +table->s->reclength 19
> +table->s->fields 2

Rather unorthodox, but fine, as you like :)

> diff --git a/mysql-test/main/long_unique_innodb.result b/mysql-test/main/long_unique_innodb.result
> --- /dev/null
> +++ b/mysql-test/main/long_unique_innodb.result
> @@ -0,0 +1,121 @@
> +create table t1(a blob unique) engine= InnoDB;
> +insert into t1 values('RUC');
> +insert into t1 values ('RUC');
> +ERROR 23000: Duplicate entry 'RUC' for key 'a'
> +drop table t1;
> +#test for concurrent insert of long unique in innodb
> +create table t1(a blob unique) engine= InnoDB;
> +show create table t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `a` blob DEFAULT NULL,
> +  UNIQUE KEY `a` (`a`)
> +) ENGINE=InnoDB DEFAULT CHARSET=latin1
> +connect  'con1', localhost, root,,;
> +connect  'con2', localhost, root,,;
> +connection con1;
> +set  transaction isolation level READ UNCOMMITTED;
> +start transaction;
> +insert into t1 values('RUC');
> +connection con2;
> +set  transaction isolation level READ UNCOMMITTED;
> +start transaction;
> +insert into t1 values ('RUC');
> +ERROR 23000: Duplicate entry 'RUC' for key 'a'
> +connection con1;
> +commit;
> +set  transaction isolation level READ COMMITTED;
> +start transaction;
> +insert into t1 values('RC');
> +connection con2;
> +commit;
> +set  transaction isolation level READ COMMITTED;
> +start transaction;
> +insert into t1 values ('RC');
> +ERROR 23000: Duplicate entry 'RC' for key 'a'

Is this your HA_ERR_LOCK_WAIT_TIMEOUT that you replace with
HA_ERR_FOUND_DUPP_KEY?
I think HA_ERR_LOCK_WAIT_TIMEOUT is correct here.

Compare with normal indexes, there should be HA_ERR_LOCK_WAIT_TIMEOUT
with normal indexes too. Because the second INSERT should wait
for the first to commit or rollback. And if the first doesn't neither,
the second times out. Looks reasonable.

> +commit;
> +connection con1;
> +commit;
> +set  transaction isolation level REPEATABLE READ;
> +start transaction;
> +insert into t1 values('RR');
> +connection con2;
> +commit;
> +set  transaction isolation level REPEATABLE READ;
> +start transaction;
> +insert into t1 values ('RR');
> +ERROR 23000: Duplicate entry 'RR' for key 'a'

Same here, I guess

> +connection con1;
> +commit;
> +set  transaction isolation level SERIALIZABLE;
> +start transaction;
> +insert into t1 values('S');
> +connection con2;
> +commit;
> +set  transaction isolation level SERIALIZABLE;
> +start transaction;
> +insert into t1 values ('S');
> +ERROR 23000: Duplicate entry 'S' for key 'a'

and here

> +commit;
> +connection con1;
> +commit;
> +select * from t1;
> +a
> +RUC
> +RC
> +RR
> +S
> +drop table t1;
> +create table t1(a blob unique) engine=Innodb;
> +connection con1;
> +set  transaction isolation level READ UNCOMMITTED;
> +start transaction;
> +insert into t1 values('RUC');
> +connection con2;
> +set  transaction isolation level READ UNCOMMITTED;
> +start transaction;
> +insert into t1 values ('RUC');;
> +connection con1;
> +rollback;
> +connection con2;
> +commit;
> +connection con1;
> +set  transaction isolation level READ COMMITTED;
> +start transaction;
> +insert into t1 values('RC');
> +connection con2;
> +set  transaction isolation level READ COMMITTED;
> +start transaction;
> +insert into t1 values ('RC');;
> +connection con1;
> +rollback;
> +connection con2;
> +commit;
> +connection con1;
> +set  transaction isolation level REPEATABLE READ;
> +start transaction;
> +insert into t1 values('RR');
> +connection con2;
> +set  transaction isolation level REPEATABLE READ;
> +start transaction;
> +insert into t1 values ('RR');;
> +connection con1;
> +rollback;
> +connection con2;
> +commit;
> +connection con1;
> +set  transaction isolation level SERIALIZABLE;
> +start transaction;
> +insert into t1 values('S');
> +connection con2;
> +set  transaction isolation level SERIALIZABLE;
> +start transaction;
> +insert into t1 values ('S');;
> +connection con1;
> +rollback;
> +connection con2;
> +commit;
> +connection default;
> +drop table t1;
> +disconnect con1;
> +disconnect con2;
> diff --git a/mysql-test/main/long_unique_innodb.test b/mysql-test/main/long_unique_innodb.test
> --- /dev/null
> +++ b/mysql-test/main/long_unique_innodb.test
> @@ -0,0 +1,131 @@
> +--source include/have_innodb.inc
> +--source include/big_test.inc

instead of big_test, better set the innodb lock timeout
to something small for the first half of your test.

and please make sure all tests pass. This one doesn't even start for me
now in your branch.

> +
> +create table t1(a blob unique) engine= InnoDB;
> +insert into t1 values('RUC');
> +--error ER_DUP_ENTRY
> +insert into t1 values ('RUC');
> +drop table t1;
> +
> +--echo #test for concurrent insert of long unique in innodb
> +create table t1(a blob unique) engine= InnoDB;
> +show create table t1;
> +connect ('con1', localhost, root,,);
> diff --git a/mysql-test/main/mdev-504.test b/mysql-test/main/mdev-504.test
> --- a/mysql-test/main/mdev-504.test
> +++ b/mysql-test/main/mdev-504.test
> @@ -1,4 +1,3 @@
> ---source include/not_valgrind.inc

and restore this line too, please. Or, if it make sense to remove it,
make sure you'll push it in a separate commit, not in MDEV-371

>  --source include/no_protocol.inc
>  
>  set @save_use_stat_tables=@@global.use_stat_tables;
> diff --git a/sql/field.h b/sql/field.h
> --- a/sql/field.h
> +++ b/sql/field.h
> @@ -553,7 +553,7 @@ class Virtual_column_info: public Sql_alloc
>      name.str= NULL;
>      name.length= 0;
>    };
> -  ~Virtual_column_info() {}
> +  ~Virtual_column_info();

Hmm, I didn't find ~Virtual_column_info() destructor anywhere.
Where is it now?

>    enum_vcol_info_type get_vcol_type() const
>    {
>      return vcol_type;
> diff --git a/sql/item_func.h b/sql/item_func.h
> --- a/sql/item_func.h
> +++ b/sql/item_func.h
> @@ -1019,6 +1019,19 @@ class Item_long_func: public Item_int_func
>  };
>  
>  
> +class Item_func_hash: public Item_int_func
> +{
> +public:
> +  Item_func_hash(THD *thd, List<Item> &item): Item_int_func(thd, item)
> +  {}
> +  longlong val_int();
> +  bool fix_length_and_dec();
> +  const Type_handler *type_handler() const { return &type_handler_long; }
> +  Item *get_copy(THD *thd)
> +  { return get_item_copy<Item_func_hash>(thd, this); }
> +  const char *func_name() const { return "HASH"; }

perhaps "<hash>" ? Like other internal items, e.g. <in_optimizer> or <rowid>

> +};
> +
>  class Item_longlong_func: public Item_int_func
>  {
>  public:
> diff --git a/sql/sql_show.cc b/sql/sql_show.cc
> --- a/sql/sql_show.cc
> +++ b/sql/sql_show.cc
> @@ -2341,7 +2341,7 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet,
>        }
>      }
>      packet->append(')');
> -    store_key_options(thd, packet, table, key_info);
> +    store_key_options(thd, packet, table, &table->key_info[i]);

why? table->s->key_info[i] doesn't show correct options?
I've tried to revert this change and long_unique test passed.

>      if (key_info->parser)
>      {
>        LEX_CSTRING *parser_name= plugin_name(key_info->parser);
> @@ -6586,15 +6586,20 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables,
>              table->field[8]->set_notnull();
>            }
>            KEY *key=show_table->key_info+i;
> -          if (key->rec_per_key[j])
> +          if (key->rec_per_key[j] && key->algorithm != HA_KEY_ALG_LONG_HASH)
>            {
>              ha_rows records= (ha_rows) ((double) show_table->stat_records() /
>                                          key->actual_rec_per_key(j));
>              table->field[9]->store((longlong) records, TRUE);
>              table->field[9]->set_notnull();
>            }

When you'll do the optimizer support, I think you can
print rec_per_key[] here for your virtual HASH key.
But it's not important now in this patch, so let's not delay
the push because of it.

> -          const char *tmp= show_table->file->index_type(i);
> -          table->field[13]->store(tmp, strlen(tmp), cs);
> +          if (key->algorithm == HA_KEY_ALG_LONG_HASH)
> +            table->field[13]->store(STRING_WITH_LEN("HASH"), cs);
> +          else
> +          {
> +            const char *tmp= show_table->file->index_type(i);
> +            table->field[13]->store(tmp, strlen(tmp), cs);
> +          }
>          }
>          if (!(key_info->flags & HA_FULLTEXT) &&
>              (key_part->field &&
> diff --git a/sql/sql_table.cc b/sql/sql_table.cc
> --- a/sql/sql_table.cc
> +++ b/sql/sql_table.cc
> @@ -4106,12 +4127,27 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
>      if (key->type == Key::UNIQUE && !(key_info->flags & HA_NULL_PART_KEY))
>        unique_key=1;
>      key_info->key_length=(uint16) key_length;
> -    if (key_length > max_key_length && key->type != Key::FULLTEXT)
> +    if (key_length > max_key_length && key->type != Key::FULLTEXT &&
> +            !is_hash_field_needed)
>      {
>        my_error(ER_TOO_LONG_KEY,MYF(0),max_key_length);
>        DBUG_RETURN(TRUE);
>      }
>  
> +    if (is_hash_field_needed)
> +    {
> +      if (key_info->algorithm != HA_KEY_ALG_UNDEF &&
> +              key_info->algorithm != HA_KEY_ALG_HASH )
> +      {
> +	    my_error(ER_TOO_LONG_KEY, MYF(0), file->max_key_length());
> +	    DBUG_RETURN(TRUE);
> +      }
> +      key_info->algorithm= HA_KEY_ALG_LONG_HASH;
> +    }
> +    // If user forces hash index for storage engine other then memory
> +    else if (key_info->algorithm == HA_KEY_ALG_HASH &&
> +              create_info->db_type->db_type != DB_TYPE_HEAP)

Please, no. No engine type checks. Better add a new engine capability
flag HA_CAN_HASH_KEYS (see HA_CAN_RTREEKEYS for example).
And only MEMORY engine will have it for now.

> +      key_info->algorithm= HA_KEY_ALG_LONG_HASH;
>      if (validate_comment_length(thd, &key->key_create_info.comment,
>                                  INDEX_COMMENT_MAXLEN,
>                                  ER_TOO_LONG_INDEX_COMMENT,
> @@ -8319,6 +8355,11 @@ mysql_prepare_alter_table(THD *thd, TABLE *table,
>        enum Key::Keytype key_type;
>        LEX_CSTRING tmp_name;
>        bzero((char*) &key_create_info, sizeof(key_create_info));
> +      if (key_info->algorithm == HA_KEY_ALG_LONG_HASH)
> +      {
> +        key_info->flags|= HA_NOSAME;
> +        key_info->algorithm= HA_KEY_ALG_UNDEF;

Why HA_KEY_ALG_UNDEF?

> +      }
>        key_create_info.algorithm= key_info->algorithm;
>        /*
>          We copy block size directly as some engines, like Area, sets this
> @@ -10262,6 +10304,8 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
>    to->s->default_fields= 0;
>    for (Field **ptr=to->field ; *ptr ; ptr++)
>    {
> +    if ((*ptr)->flags & LONG_UNIQUE_HASH_FIELD)
> +      continue;

Really? I'd think it would skip all virtual fields anyway, so you
should not need to have any special checks for your HASH field.

>      def=it++;
>      if (def->field)
>      {
> @@ -10485,14 +10529,23 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
>            if ((int) key_nr >= 0)
>            {
>              const char *err_msg= ER_THD(thd, ER_DUP_ENTRY_WITH_KEY_NAME);
> +            KEY *long_key= NULL;
>              if (key_nr == 0 && to->s->keys > 0 &&
>                  (to->key_info[0].key_part[0].field->flags &
>                   AUTO_INCREMENT_FLAG))
>                err_msg= ER_THD(thd, ER_DUP_ENTRY_AUTOINCREMENT_CASE);
> +            if (key_nr <= to->s->keys && to->key_info[key_nr].algorithm
> +                    == HA_KEY_ALG_LONG_HASH)
> +            {
> +              long_key= to->key_info + key_nr;
> +                setup_keyinfo_hash(long_key);
> +            }
>              print_keydup_error(to,
>                                 key_nr >= to->s->keys ? NULL :
>                                     &to->key_info[key_nr],
>                                 err_msg, MYF(0));
> +            if (long_key)
> +              re_setup_keyinfo_hash(long_key);

You've already had this exactly code in handler::print_error.
May be it'd be better to move it inside print_keydup_error() to have
it only in one place?

>            }
>            else
>              to->file->print_error(error, MYF(0));
> diff --git a/sql/table.cc b/sql/table.cc
> --- a/sql/table.cc
> +++ b/sql/table.cc
> @@ -1143,10 +1150,59 @@ bool parse_vcol_defs(THD *thd, MEM_ROOT *mem_root, TABLE *table,
>      pos+= expr_length;
>    }
>  
> -  /* Now, initialize CURRENT_TIMESTAMP fields */
> +  /* Now, initialize CURRENT_TIMESTAMP and UNIQUE_INDEX_HASH_FIELD fields */
>    for (field_ptr= table->field; *field_ptr; field_ptr++)
>    {
>      Field *field= *field_ptr;
> +    if (field->flags & LONG_UNIQUE_HASH_FIELD)
> +    { 
> +      List<Item > *field_list= new (mem_root) List<Item >();

Why do you add a space before a closing angle bracket?
I don't think I've see this style anywhere in our codebase.

> +      Item *list_item;
> +      KEY *key;
> +      uint key_index;
> +      for (key_index= 0; key_index < table->s->keys; key_index++)
> +      {
> +        key=table->key_info + key_index;
> +        if (key->algorithm == HA_KEY_ALG_LONG_HASH &&
> +                key->key_part[key->user_defined_key_parts].fieldnr == field->field_index+ 1)
> +            break;
> +      }
> +      KEY_PART_INFO *keypart;
> +      for (uint i=0; i < key->user_defined_key_parts; i++)
> +      {
> +        keypart= key->key_part + i;
> +        if (!keypart->length)
> +        {
> +          list_item= new(mem_root)Item_field(thd, keypart->field);
> +        }
> +        else
> +        {
> +          list_item= new(mem_root)Item_func_left(thd,
> +                      new (mem_root)Item_field(thd, keypart->field),
> +                      new (mem_root) Item_int(thd, keypart->length));
> +          list_item->fix_fields(thd, NULL);
> +        }
> +        field_list->push_back(list_item, mem_root);
> +      }
> +      Item_func_hash *hash_item= new(mem_root)Item_func_hash(thd, *field_list);
> +      Virtual_column_info *v= new (mem_root) Virtual_column_info();
> +      field->vcol_info= v;
> +      field->vcol_info->expr= hash_item;
> +      uint parts= key->user_defined_key_parts;
> +      key->user_defined_key_parts= key->ext_key_parts= key->usable_key_parts= 1;
> +      key->key_part+= parts;
> +
> +      if (key->flags & HA_NULL_PART_KEY)
> +        key->key_length= HA_HASH_KEY_LENGTH_WITH_NULL;
> +      else
> +        key->key_length= HA_HASH_KEY_LENGTH_WITHOUT_NULL;
> +
> +      if (key->flags & HA_NULL_PART_KEY)
> +        key->key_length= HA_HASH_KEY_LENGTH_WITH_NULL;
> +      else
> +        key->key_length= HA_HASH_KEY_LENGTH_WITHOUT_NULL;

copy-paste error? same if() repeated twice

> +      *(vfield_ptr++)= *field_ptr;
> +    }
>      if (field->has_default_now_unireg_check())
>      {
>        expr_str.length(parse_vcol_keyword.length);
> @@ -1287,6 +1366,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
>  
>    keyinfo= &first_keyinfo;
>    share->ext_key_parts= 0;
> +  share->long_unique_table= 0;

I don't think that's needed, TABLE_SHARE is probably bzero-ed by this time
(otherwise there would've been a lot more initializations here)

>    thd->mem_root= &share->mem_root;
>  
>    if (write && write_frm_image(frm_image, frm_length))
> @@ -2162,6 +2255,51 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
>          share->default_fields++;
>      }
>    }
> +
> +  keyinfo= share->key_info;
> +  for (uint i=0; i < hash_fields; i++, field_ptr++)
> +  {
> +    LEX_CSTRING comment, name;
> +    uint32 flags= 0;
> +    Column_definition_attributes attr;
> +	comment.str= (char*) "";
> +	comment.length=0;
> +    //There are n no of hash fields for n long unique key
> +    while(keyinfo->algorithm != HA_KEY_ALG_LONG_HASH)
> +      keyinfo++;
> +    attr.length= HA_HASH_FIELD_LENGTH;
> +    if (keyinfo->flags & HA_NULL_PART_KEY)
> +      attr.pack_flag= FIELDFLAG_MAYBE_NULL;
> +    attr.unireg_check= Field::NONE;
> +    attr.charset= share->table_charset;//STODO
> +    name.str= (const char *)alloc_root(&share->mem_root, LONG_HASH_FIELD_NAME_LENGTH);
> +    make_long_hash_field_name(&name, &fieldnames);
> +    fieldnames.count++;
> +    fieldnames.type_names[share->fields + i] = name.str;
> +    Record_addr addr(share->default_values + share->reclength, null_pos, null_bit_pos);
> +    *field_ptr= reg_field=
> +      attr.make_field(share, &share->mem_root, &addr, &type_handler_longlong, &name, flags);
> +    if (!reg_field)				// Not supported field type
> +      goto err;
> +    reg_field->field_index= i + share->fields;
> +    reg_field->comment=comment;
> +    reg_field->flags|= LONG_UNIQUE_HASH_FIELD;
> +    reg_field->invisible= INVISIBLE_FULL;
> +    if (!(reg_field->flags & NOT_NULL_FLAG))
> +    {
> +      if (!(null_bit_pos= (null_bit_pos + 1) & 7))
> +        null_pos++;
> +    }
> +    share->reclength+= HA_HASH_FIELD_LENGTH;
> +    keyinfo++;
> +  }

I'm sorry :(
After I saw your chat with Marko I've realized a big problem
with this approach. If we do it this way, it'll be very diffucult
to change anything in the future. Say, you'd want to do persistent
generated columns (I'm not saying you should, it's just an example) -
if LONG_UNIQUE_HASH_FIELD fields are stored in the frm, you simply make
generated columns persistent, and everything works. If LONG_UNIQUE_HASH_FIELD
fields are generated on the fly, you'd need here to generate persistent
or virtual fields to match what the actual table in the storage
engine has, so here you'd still need to be able to write in the frm
that some LONG_UNIQUE_HASH_FIELD are persistent.
It looks like it'd be more future-proof just to write LONG_UNIQUE_HASH_FIELD
fields into frm as you did in a previous patch. Sorry, for this.

> +  keyinfo= share->key_info;
> +  for (uint i =0; hash_fields && i < share->keys; i++)
> +  {
> +    if(keyinfo[i].algorithm != HA_KEY_ALG_LONG_HASH)
> +      for (uint j= 0; j < keyinfo->user_defined_key_parts; j++)
> +        keyinfo[i].key_part[j].offset+= extra_null_field_bytes;
> +  }
>    *field_ptr=0;					// End marker
>    /* Sanity checks: */
>    DBUG_ASSERT(share->fields>=share->stored_fields);
> diff --git a/sql/table_cache.cc b/sql/table_cache.cc
> --- a/sql/table_cache.cc
> +++ b/sql/table_cache.cc
> @@ -824,11 +824,9 @@ TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags,
>        DBUG_RETURN(0);
>      else if (res == 1)
>        continue;
> -
>      element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
>               thd->tdc_hash_pins, hash_value, (uchar*) key, key_length);
>      lf_hash_search_unpin(thd->tdc_hash_pins);
> -    DBUG_ASSERT(element);

Why?

>  
>      if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length)))
>      {

Regards,
Sergei
Chief Architect MariaDB
and security@xxxxxxxxxxx


Follow ups