← Back to team overview

maria-developers team mailing list archive

Re: 026e52bad84: MDEV-27743 Remove Lex::charset

 

Hi, Alexander,

Please, see below

On Mar 18, Alexander Barkov wrote:
> revision-id: 026e52bad84 (mariadb-10.6.1-369-g026e52bad84)
> parent(s): 369498d6b8c
> author: Alexander Barkov
> committer: Alexander Barkov
> timestamp: 2022-03-18 12:26:03 +0400
> message:
> 
> MDEV-27743 Remove Lex::charset
> 
> This patch also fixes:
> 
> MDEV-27690 Crash on `CHARACTER SET csname COLLATE DEFAULT` in column definition
> MDEV-27853 Wrong data type on column `COLLATE DEFAULT` and table `COLLATE some_non_default_collation`
> MDEV-28067 Multiple conflicting column COLLATE clauses are not rejected
> MDEV-28118 Wrong collation of `CAST(.. AS CHAR COLLATE DEFAULT)`
> MDEV-28119 Wrong column collation on MODIFY + CONVERT
> 
> diff --git a/mysql-test/main/ctype_collate_column.test b/mysql-test/main/ctype_collate_column.test
> new file mode 100644
> index 00000000000..6a7cf2cc84c
> --- /dev/null
> +++ b/mysql-test/main/ctype_collate_column.test
> @@ -0,0 +1,637 @@
...
> +
> +DELIMITER $$;
> +CREATE PROCEDURE p1(dt TEXT, cs TEXT, cl0 TEXT, cl1 TEXT, cl2 TEXT, tcs TEXT)
> +BEGIN
> +  DECLARE errstate TEXT DEFAULT NULL;
> +  DECLARE errno INT DEFAULT NULL;
> +  DECLARE errmsg TEXT DEFAULT NULL;
> +  DECLARE query TEXT;
> +  DECLARE result TEXT;
> +  DECLARE CONTINUE HANDLER FOR SQLEXCEPTION
> +  BEGIN
> +    GET DIAGNOSTICS CONDITION 1 errstate = RETURNED_SQLSTATE,
> +      errno = MYSQL_ERRNO, errmsg = MESSAGE_TEXT;
> +  END;
> +  SET query= CONCAT('CREATE TABLE t1 (a ', dt, ' ', cs, ' ', cl0,
> +                           ' NOT NULL ',cl1,
> +                           ' DEFAULT '''' ', cl2,
> +                           ') ', tcs, ' ENGINE=Memory');
> +  EXECUTE IMMEDIATE query;
> +  IF errmsg IS NOT NULL
> +  THEN
> +    SET result=CONCAT('ERROR: ',
> +                      COALESCE(errstate,'<NULL>'), ' ',
> +                      COALESCE(errno,'<NULL>'), ' ',
> +                      COALESCE(errmsg,'<NULL>'));
> +    INSERT INTO results (dt,cs,cl0,cl1,cl2,tcs,query,result)
> +                 VALUES (dt,cs,cl0,cl1,cl2,tcs,query,result);
> +  ELSE
> +    FOR row IN (SELECT CONCAT(COLUMN_TYPE,
> +                  ' CHARACTER SET ', CHARACTER_SET_NAME,
> +                  ' COLLATE ', COLLATION_NAME) AS result
> +                FROM INFORMATION_SCHEMA.COLUMNS
> +                WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1')
> +    DO
> +      INSERT INTO results (dt,cs,cl0,cl1,cl2,tcs,query,result)
> +                   VALUES (dt,cs,cl0,cl1,cl2,tcs,query,row.result);
> +    END FOR;
> +    DROP TABLE t1;
> +  END IF;
> +END;
> +$$
> +DELIMITER ;$$
> +
> +
> +DELIMITER $$;
> +CREATE PROCEDURE p3(dt TEXT)
> +BEGIN
> +  FOR row IN (
> +    SELECT cs, cl0, cl1.cl1 AS cl1, cl2.cl1 AS cl2, tcs
> +    FROM cs, cl0, cl1, cl1 AS cl2, tcs
> +    ORDER BY cs, cl0, cl1, cl2, tcs
> +  )
> +  DO
> +    CALL p1('char(10)', row.cs, row.cl0, row.cl1, row.cl2, row.tcs);

minor detail: here ^^^ you want to use dt, not 'char(10)'

> +  END FOR;
> +END;
> +$$
> +DELIMITER ;$$
> +
> +
> +--disable_column_names
> +CALL p3('char(10)');
> +--enable_column_names
> +
> +SELECT * FROM results_statistics;

isn't it redundant? you select all rows from result
anyway. If something will change, the result of that select will already
cause the the to fail. Why do you need second mismatch in the
result/reject.

> +
> +--vertical_results
> +SELECT query, result, '' AS `` FROM results
> +ORDER BY dt, cs, cl0, cl1, cl2, tcs;
> +--horizontal_results
> +
> +DROP PROCEDURE p1;
> +DROP PROCEDURE p3;
> +
> +DROP TABLE cs, cl0, cl1, tcs;
...
> +# CREATE TABLE t1
> +# (
> +#   a CHAR(10) BINARY NOT NULL DEFAULT ''
> +# ) CHARACTER SET cs COLLATE cs_bin;
> +
> +DELETE FROM results
> +WHERE cs=''
> +  AND cl0='BINARY'
> +  AND cl1=''
> +  AND cl2=''
> +  AND tcs LIKE 'CHARACTER SET%'
> +  AND result NOT LIKE 'ERROR%'
> +  AND result RLIKE CONCAT('COLLATE ', tcs_character_set_name, '_bin');
> +SELECT ROW_COUNT();
...
> +DROP FUNCTION is_collate_clause_with_explicit_default_collation;
> +DROP FUNCTION is_collate_clause_with_explicit_collation;
> +DROP FUNCTION is_conflicting_charset_explicit_collate_explicit;
> +DROP FUNCTION is_conflicting_collate_explicit2;
> +DROP FUNCTION is_conflicting_collate_default_collate_explicit;
> +DROP FUNCTION collate_cs_default_collation;

I think the main result is `select * from results` (with order by, etc).
It's great, that you added that.

Everything else is redundant, I suggest to remove other selects and
deletes.

> +
> +
> +--echo #
> +--echo # End of 10.9 tests
> +--echo #
> diff --git a/sql/lex_charset.h b/sql/lex_charset.h
> new file mode 100644
> index 00000000000..d95f0bc1920
> --- /dev/null
> +++ b/sql/lex_charset.h
> @@ -0,0 +1,202 @@
...
> +#define LEX_CHARSET_COLLATION_TYPE_BITS 2
> +  static_assert(((1<<LEX_CHARSET_COLLATION_TYPE_BITS)-1) >=
> +                 TYPE_COLLATE_CONTEXTUALLY_TYPED,
> +                 "Lex_charset_collation_st::Type bits check");
> +
> +protected:
> +  CHARSET_INFO *m_ci;
> +  Type m_type;
> +public:
> +  static CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs);
> +  static CHARSET_INFO *find_default_collation(CHARSET_INFO *cs);
> +public:
...
> +  void set_contextually_typed_binary_style()
> +  {
> +    m_ci= &my_collation_contextually_typed_binary;
> +    m_type= TYPE_COLLATE_CONTEXTUALLY_TYPED;
> +  }
> +  bool is_contextually_typed_collate_default() const
> +  {
> +    return m_ci == &my_collation_contextually_typed_default &&
> +           m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED;

what does it mean when only one of these two expressions is true?
Say, the first true, the second false?
Or the first false, the second true?

> +  }
> +  bool is_contextually_typed_binary_style() const
> +  {
> +    return m_ci == &my_collation_contextually_typed_binary &&
> +           m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED;
> +  }
...
> diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
> index bb746ad90b0..a5c9129ebea 100644
> --- a/strings/ctype-bin.c
> +++ b/strings/ctype-bin.c
> @@ -630,3 +630,69 @@ struct charset_info_st my_charset_bin =
>      &my_charset_handler,
>      &my_collation_binary_handler
>  };
> +
> +
> +struct charset_info_st my_collation_contextually_typed_binary=
> +{
> +    0,0,0,                        /* number        */

I thought you will never need to resove the
&my_collation_contextually_typed_binary pointer, that is, I thought the
values in the my_collation_contextually_typed_binary struct should not
matter as they'll never be used. Was that wrong?

Regards,
Sergei
VP of MariaDB Server Engineering
and security@xxxxxxxxxxx


Follow ups