maria-developers team mailing list archive

Thread
Date
Re: 2467eb2d314: MDEV-27743 Remove Lex::charset

To: Alexander Barkov <bar@xxxxxxxxxxx>
From: Sergei Golubchik <serg@xxxxxxxxxxx>
Date: Sun, 13 Mar 2022 23:50:58 +0100
Cc: maria-developers@xxxxxxxxxxxxxxxxxxx
In-reply-to: <3QSJxi4OVfD.A.gVF.Kd1KiB@pweza>
Hi, Alexander,

I had only a couple of questions/comments, see below:

On Mar 11, Alexander Barkov wrote:
> revision-id: 2467eb2d314 (mariadb-10.6.1-330-g2467eb2d314)
> parent(s): 61b72ed3dde
> author: Alexander Barkov
> committer: Alexander Barkov
> timestamp: 2022-02-10 10:27:55 +0400
> message:
> 
>     MDEV-27743 Remove Lex::charset
>     
>     This patch also fixes:
>     MDEV-27690 Crash on `CHARACTER SET csname COLLATE DEFAULT` in column definition

how do you plan to fix it in 10.2-10.8 ?

> 
> diff --git a/mysql-test/main/ctype_collate_column.result b/mysql-test/main/ctype_collate_column.result
> new file mode 100644
> index 00000000000..b20be5d3a48
> --- /dev/null
> +++ b/mysql-test/main/ctype_collate_column.result
> @@ -0,0 +1,450 @@
> +#
> +# Start of 10.9 tests
> +#
> +#
> +# MDEV-27743 Remove Lex::charset

1. Jira doesn't explain what user-visible changes one can expect.
If there are none, it's not clear why you needed this big test case

2. the test is totally unreadable. Could you do a normal test
instead? Like, put `select` instead of `execute immediate`
and convert the output to a .test file?

> +#
> +CREATE TABLE cs (cs VARCHAR(64) NOT NULL);
> +INSERT INTO cs (cs) VALUES
> +(''),
> +('CHARACTER SET latin1'),
> +('CHARACTER SET utf8mb4');
> +CREATE TABLE cl0 (cl0 VARCHAR(64) NOT NULL);
> +INSERT INTO cl0 (cl0) VALUES
> +(''),
> +('BINARY'),
> +('COLLATE DEFAULT'),
> +('COLLATE latin1_bin'),
> +('COLLATE latin1_swedish_ci'),
> +('COLLATE utf8mb4_bin'),
> +('COLLATE utf8mb4_general_ci');
> +CREATE TABLE cl1 (cl1 VARCHAR(64) NOT NULL);
> +INSERT INTO cl1 (cl1) VALUES
> +(''),
> +('COLLATE DEFAULT'),
> +('COLLATE latin1_bin'),
> +('COLLATE latin1_swedish_ci'),
> +('COLLATE utf8mb4_bin'),
> +('COLLATE utf8mb4_general_ci');
...
> diff --git a/mysql-test/main/ctype_latin1.result b/mysql-test/main/ctype_latin1.result
> index 0b16d1cf6f2..70577f2c22c 100644
> --- a/mysql-test/main/ctype_latin1.result
> +++ b/mysql-test/main/ctype_latin1.result
> @@ -8889,6 +8889,38 @@ a	b
>  111	111
>  DROP TABLE t1;
>  #
> +# MDEV-27690 Crash on `CHARACTER SET csname COLLATE DEFAULT` in column definition
> +#
> +CREATE TABLE t1 (a CHAR(10) CHARACTER SET latin1 COLLATE DEFAULT);
> +SHOW CREATE TABLE t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `a` char(10) DEFAULT NULL
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1
> +DROP TABLE t1;
> +SELECT CAST('a' AS CHAR(10) CHARACTER SET latin1 COLLATE DEFAULT);
> +CAST('a' AS CHAR(10) CHARACTER SET latin1 COLLATE DEFAULT)
> +a
> +CREATE TABLE t1 AS
> +SELECT CAST('a' AS CHAR(10) CHARACTER SET latin1 COLLATE DEFAULT) AS c1;
> +SHOW CREATE TABLE t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `c1` varchar(10) DEFAULT NULL
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1
> +DROP TABLE t1;
> +SELECT COLUMN_GET(COLUMN_CREATE(0, 'string'),0 AS CHAR CHARACTER SET latin1 COLLATE DEFAULT) AS c1;
> +c1
> +string
> +CREATE TABLE t1 AS
> +SELECT COLUMN_GET(COLUMN_CREATE(0, 'string'),0 AS CHAR CHARACTER SET latin1 COLLATE DEFAULT) AS c1;
> +SHOW CREATE TABLE t1;
> +Table	Create Table
> +t1	CREATE TABLE `t1` (
> +  `c1` longtext DEFAULT NULL
> +) ENGINE=MyISAM DEFAULT CHARSET=latin1
> +DROP TABLE t1;

1. why is it in "10.2 tests" ?
2. shouldn't here be at least one test that COLLATE DEFAULT is not a no-op?
Like CREATE TABLE (... COLLATE DEFAULT ...) COLLATE _non_default ?
Or is it in your generated ctype_collate_column.test?

> +#
>  # End of 10.2 tests
>  #
>  #
> diff --git a/mysql-test/suite/roles/create_and_drop_role_invalid_user_table.test b/mysql-test/suite/roles/create_and_drop_role_invalid_user_table.test
> index 9cf0d7b4aff..dac6eab21e9 100644
> --- a/mysql-test/suite/roles/create_and_drop_role_invalid_user_table.test
> +++ b/mysql-test/suite/roles/create_and_drop_role_invalid_user_table.test
> @@ -36,7 +36,7 @@ set default role test_role for root@localhost;
>  drop role test_role;
>  drop user test_user@localhost;
>  
> -alter table user add column default_role char(80) binary default '' not null
> +alter table user add column default_role char(80) default '' not null
>                                                     COLLATE utf8_general_ci

Heh, interesting that it worked before
good catch

>  after is_role;
>  alter table user add max_statement_time decimal(12,6) default 0 not null
> diff --git a/sql/structs.h b/sql/structs.h
> index 9a5006e8b47..db065c49931 100644
> --- a/sql/structs.h
> +++ b/sql/structs.h
> @@ -599,11 +599,140 @@ class DDL_options: public DDL_options_st
...
> +class Lex_collation: public Lex_collation_st
> +{
> +public:
> +  Lex_collation(CHARSET_INFO *collation, Type type)
> +  {
> +    m_collation= collation;
> +    m_type= type;
> +  }
> +  static Lex_collation national(bool bin_mod)
> +  {
> +    if (bin_mod)
> +      return Lex_collation(&my_charset_utf8mb3_bin, TYPE_EXPLICIT);
> +    return Lex_collation(&my_charset_utf8mb3_general_ci, TYPE_IMPLICIT);

utf8mb3? not utf8?

> +  }
> +};
> +
> +
>  struct Lex_length_and_dec_st
>  {
> -private:
> +protected:
>    uint32 m_length;
>    uint8  m_dec;
> +  uint8  m_collation_type:2;
>    bool   m_has_explicit_length:1;
>    bool   m_has_explicit_dec:1;
>  public:
> diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
> index ae6ae9a0cc0..6ca10267187 100644
> --- a/sql/sql_lex.cc
> +++ b/sql/sql_lex.cc
> @@ -11816,3 +11784,257 @@ bool SELECT_LEX_UNIT::explainable() const
...
> +CHARSET_INFO *
> +Lex_collation_st::resolved_to_character_set(CHARSET_INFO *def) const
> +{
> +  DBUG_ASSERT(def);
> +  if (m_type != TYPE_CONTEXTUALLY_TYPED)
> +  {
> +    if (!m_collation)
> +      return def;       // Empty - not typed at all
> +    return m_collation; // Explicitly typed
> +  }
> +
> +  // Contextually typed
> +  DBUG_ASSERT(m_collation);
> +
> +  if (m_collation == &my_charset_bin)    // CHAR(10) BINARY
> +    return find_bin_collation(def);
> +
> +  if (m_collation == &my_charset_latin1) // CHAR(10) COLLATE DEFAULT
> +    return find_default_collation(def);

Uhm. Could you create two dummy CHARSET_INFO variables for that instead?
Like

  CHARSET_INFO binary_collaton, default_collation;

no need to initialize them, but to have distinct values for binary/default
and not reuse my_charset_bin and my_charset_latin1.

> +
> +  /*
> +    Non-binary and non-default contextually typed collation.
> +    We don't have such yet - the parser cannot produce this.
> +    But will have soon, e.g. "uca1400_as_ci".
> +  */
> +  DBUG_ASSERT(0);
> +  return NULL;
> +}
> +
> +
...
> diff --git a/sql/field.h b/sql/field.h
> index 2ed02b37cfd..fe5ba435202 100644
> --- a/sql/field.h
> +++ b/sql/field.h
> @@ -5493,6 +5492,22 @@ class Column_definition: public Sql_alloc,
>    { return compression_method_ptr; }
>  
>    bool check_vcol_for_key(THD *thd) const;
> +
> +  void set_lex_collation(const Lex_collation_st &lc)
> +  {
> +    charset= lc.collation();
> +    if (lc.is_contextually_typed_collation())
> +      flags|= BINCMP_FLAG;
> +    else
> +      flags&= ~BINCMP_FLAG;

Isn't COLLATE DEFAULT also contextually typed?

> +  }
> +  Lex_collation lex_collation() const
> +  {
> +    return Lex_collation(charset,
> +                         flags & BINCMP_FLAG ?
> +                         Lex_collation_st::TYPE_CONTEXTUALLY_TYPED :
> +                         Lex_collation_st::TYPE_IMPLICIT);
> +  }
>  };
>  
>  

Regards,
Sergei
VP of MariaDB Server Engineering
and security@xxxxxxxxxxx
Follow ups

Re: 2467eb2d314: MDEV-27743 Remove Lex::charset
From: Alexander Barkov, 2022-03-15