maria-developers team mailing list archive

Thread
Date
Re: 49ecf935415: MDEV-27009 Add UCA-14.0.0 collations

To: Alexander Barkov <bar@xxxxxxxxxxx>
From: Sergei Golubchik <serg@xxxxxxxxxxx>
Date: Wed, 16 Mar 2022 19:19:30 +0100
Cc: maria-developers@xxxxxxxxxxxxxxxxxxx
In-reply-to: <Pp3lqYvigGC.A.SZC.BqyLiB@pweza>
Hi, Alexander,

On Mar 14, Alexander Barkov wrote:
> revision-id: 49ecf935415 (mariadb-10.6.1-335-g49ecf935415)
> parent(s): c67789f63c8
> author: Alexander Barkov
> committer: Alexander Barkov
> timestamp: 2022-02-28 14:04:58 +0400
> message:
> 
> MDEV-27009 Add UCA-14.0.0 collations

please, list all user visible changes there. Mainly that
collations are now decoupled from charsets. New syntax in CREATE
TABLE, changes in I_S tables, etc.
 
> diff --git a/mysql-test/include/ctype_utf_uca1400_ids.inc b/mysql-test/include/ctype_utf_uca1400_ids.inc
> new file mode 100644
> index 00000000000..09cf49fc0e7
> --- /dev/null
> +++ b/mysql-test/include/ctype_utf_uca1400_ids.inc
> @@ -0,0 +1,17 @@

file names are confusing. better rename ctype_ucs_uca1400_ids.inc
to something like ctype_convert_uca1400_ids
and ctype_utf_uca1400_ids to ctype_set_names_uca1400_ids
or something like that, to show what they do.

> +
> +--disable_ps_protocol
> +--enable_metadata
> +DELIMITER $$;
> +FOR rec IN (SELECT COLLATION_NAME
> +            FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
> +            WHERE CHARACTER_SET_NAME=@charset
> +              AND COLLATION_NAME RLIKE 'uca1400'
> +            ORDER BY ID)
> +DO
> +  EXECUTE IMMEDIATE CONCAT('SET NAMES ',@charset,' COLLATE ', rec.COLLATION_NAME);
> +  SELECT rec.COLLATION_NAME;
> +END FOR;
> +$$
> +DELIMITER ;$$
> +--disable_metadata
> +--enable_ps_protocol
> diff --git a/include/m_ctype.h b/include/m_ctype.h
> index 4c6628b72b3..706764ead2a 100644
> --- a/include/m_ctype.h
> +++ b/include/m_ctype.h
> @@ -34,7 +34,9 @@ enum loglevel {
>  extern "C" {
>  #endif
>  
> -#define MY_CS_NAME_SIZE			32
> +#define MY_CS_CHARACTER_SET_NAME_SIZE   32
> +#define MY_CS_COLLATION_NAME_SIZE       64

That's FULL_COLLATION_NAME_SIZE, right?

> +
>  #define MY_CS_CTYPE_TABLE_SIZE		257
>  #define MY_CS_TO_LOWER_TABLE_SIZE	256
>  #define MY_CS_TO_UPPER_TABLE_SIZE	256
> @@ -240,6 +242,46 @@ typedef enum enum_repertoire_t
>  } my_repertoire_t;
>  
>  
> +/* ID compatibility */
> +typedef enum enum_collation_id_type
> +{
> +  MY_COLLATION_ID_TYPE_PRECISE=          0,
> +  MY_COLLATION_ID_TYPE_COMPAT_100800=    1
> +} my_collation_id_type_t;
> +
> +
> +/* Collation name display modes */
> +typedef enum enum_collation_name_mode
> +{
> +  MY_COLLATION_NAME_MODE_FULL=                                 0,
> +  MY_COLLATION_NAME_MODE_CONTEXT=                              1
> +} my_collation_name_mode_t;
> +
> +
> +/* Level flags */
> +#define MY_CS_LEVEL_BIT_PRIMARY    0x00
> +#define MY_CS_LEVEL_BIT_SECONDARY  0x01
> +#define MY_CS_LEVEL_BIT_TERTIARY   0x02
> +#define MY_CS_LEVEL_BIT_QUATERNARY 0x03
> +
> +#define MY_CS_COLL_LEVELS_S1       (1<<MY_CS_LEVEL_BIT_PRIMARY)
> +
> +#define MY_CS_COLL_LEVELS_AI_CS    (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
> +                                   (1<<MY_CS_LEVEL_BIT_TERTIARY)
> +
> +#define MY_CS_COLL_LEVELS_S2       (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
> +                                   (1<<MY_CS_LEVEL_BIT_SECONDARY)
> +
> +#define MY_CS_COLL_LEVELS_S3       (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
> +                                   (1<<MY_CS_LEVEL_BIT_SECONDARY) | \
> +                                   (1<<MY_CS_LEVEL_BIT_TERTIARY)

AI_CS and S3 don't seem to be used yet

> +
> +#define MY_CS_COLL_LEVELS_S4       (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
> +                                   (1<<MY_CS_LEVEL_BIT_SECONDARY) | \
> +                                   (1<<MY_CS_LEVEL_BIT_TERTIARY)  | \
> +                                   (1<<MY_CS_LEVEL_BIT_QUATERNARY)
> +
> +
>  /* Flags for strxfrm */
>  #define MY_STRXFRM_LEVEL1          0x00000001 /* for primary weights   */
>  #define MY_STRXFRM_LEVEL2          0x00000002 /* for secondary weights */
> diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc
> index 86c6e9a27f8..9ddd482ad57 100644
> --- a/sql/sql_alter.cc
> +++ b/sql/sql_alter.cc
> @@ -546,6 +546,7 @@ bool Sql_cmd_alter_table::execute(THD *thd)
>  
>    result= mysql_alter_table(thd, &select_lex->db, &lex->name,
>                              &create_info,
> +                            lex->create_info.default_charset_collation,

I don't see why you need a new argument here. It's
create_info.default_charset_collation, so, mysql_alter_table already gets
it in create_info. All other mysql_alter_table invocations also
take create_info argument and can get default_charset_collation from there

>                              first_table,
>                              &alter_info,
>                              select_lex->order_list.elements,
> diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc
> index fb1ae0d5fc7..4188dde252b 100644
> --- a/sql/sql_partition_admin.cc
> +++ b/sql/sql_partition_admin.cc
> @@ -211,6 +211,7 @@ bool compare_table_with_partition(THD *thd, TABLE *table, TABLE *part_table,
>    part_table->use_all_columns();
>    table->use_all_columns();
>    if (unlikely(mysql_prepare_alter_table(thd, part_table, &part_create_info,
> +                                         Lex_maybe_default_charset_collation(),

Same. Can be in part_create_info

>                                           &part_alter_info, &part_alter_ctx)))
>    {
>      my_error(ER_TABLES_DIFFERENT_METADATA, MYF(0));
> diff --git a/sql/sql_i_s.h b/sql/sql_i_s.h
> index bed2e886718..5ff06d32231 100644
> --- a/sql/sql_i_s.h
> +++ b/sql/sql_i_s.h
> @@ -162,6 +162,11 @@ class Yesno: public Varchar
>  {
>  public:
>    Yesno(): Varchar(3) { }
> +  static LEX_CSTRING value(bool val)
> +  {
> +    return val ? Lex_cstring(STRING_WITH_LEN("Yes")) :
> +                 Lex_cstring();
> +  }

eh... please, rename the class from Yesno to something like
Yesempty or Yes_or_empty, something that says that the second
should not be Lex_cstring(STRING_WITH_LEN("No"))

>  };
>  
>  
> diff --git a/sql/table.cc b/sql/table.cc
> index a683a78ff49..c28cb2bd928 100644
> --- a/sql/table.cc
> +++ b/sql/table.cc
> @@ -3491,6 +3493,16 @@ int TABLE_SHARE::init_from_sql_statement_string(THD *thd, bool write,
>    else
>      thd->set_n_backup_active_arena(arena, &backup);
>  
> +  /*
> +    THD::reset_db() does not set THD::db_charset,
> +    so it keeps pointing to the character set and collation
> +    of the current database, rather than the database of the
> +    new initialized table.

Hmm, is that correct? Could you check other invocation of
thd->reset_db()? Perhaps they all need to switch charset?
In that case it should be done inside THD::reset_db().

Or may be they have to use mysql_change_db_impl() instead?

> +    Let's call get_default_db_collation() before reset_db().
> +    This forces the db.opt file to be loaded.
> +  */
> +  db_cs= get_default_db_collation(thd, db.str);
> +
>    thd->reset_db(&db);
>    lex_start(thd);
>  
> @@ -3498,6 +3510,11 @@ int TABLE_SHARE::init_from_sql_statement_string(THD *thd, bool write,
>                  sql_unusable_for_discovery(thd, hton, sql_copy))))
>      goto ret;
>  
> +  if (!(thd->lex->create_info.default_table_charset=
> +         thd->lex->create_info.default_charset_collation.
> +           resolved_to_character_set(db_cs, db_cs)))
> +    DBUG_RETURN(true);

How could this (and similar if()'s in other files) fail?

> +
>    thd->lex->create_info.db_type= hton;
>  #ifdef WITH_PARTITION_STORAGE_ENGINE
>    thd->work_part_info= 0;                       // For partitioning
> diff --git a/sql/mysys_charset.h b/sql/mysys_charset.h
> new file mode 100644
> index 00000000000..86eaeedd432
> --- /dev/null
> +++ b/sql/mysys_charset.h
> @@ -0,0 +1,44 @@
> +#ifndef MYSYS_CHARSET
> +#define MYSYS_CHARSET
> +
> +/* Copyright (c) 2021, MariaDB Corporation.
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; version 2 of the License.
> +
> +   This program is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program; if not, write to the Free Software
> +   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
> +
> +
> +#include "my_sys.h"
> +
> +
> +class Charset_loader_mysys: public MY_CHARSET_LOADER
> +{
> +public:
> +  Charset_loader_mysys()
> +  {
> +    my_charset_loader_init_mysys(this);
> +  }
> +  void raise_unknown_collation_error(const char *name,
> +                                     CHARSET_INFO *name_cs) const;
> +  CHARSET_INFO *get_charset(const char *cs_name, uint cs_flags, myf my_flags);
> +  CHARSET_INFO *get_exact_collation(const char *name, myf utf8_flag);
> +  CHARSET_INFO *get_contextually_typed_collation(CHARSET_INFO *cs,
> +                                                 const char *name);
> +  CHARSET_INFO *get_contextually_typed_collation(const char *name);
> +  CHARSET_INFO *get_contextually_typed_collation_or_error(CHARSET_INFO *cs,
> +                                                          const char *name);
> +  CHARSET_INFO *find_default_collation(CHARSET_INFO *cs);
> +  CHARSET_INFO *find_bin_collation_or_error(CHARSET_INFO *cs);
> +};

you can have C++ code in mysys too, you know, no need to put it
in sql/mysys*

> +
> +#endif // MYSYS_CHARSET
> +
> diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
> index b579f0af203..d09dfba86ed 100644
> --- a/strings/ctype-simple.c
> +++ b/strings/ctype-simple.c
> @@ -1940,13 +1941,26 @@ my_bool my_propagate_complex(CHARSET_INFO *cs __attribute__((unused)),
>  }
>  
>  
> +void my_ci_set_strength(struct charset_info_st *cs, uint strength)
> +{
> +  DBUG_ASSERT(strength > 0 && strength <= MY_STRXFRM_NLEVELS);

don't use && in asserts, please create two separate asserts instead:

 DBUG_ASSERT(strength > 0);
 DBUG_ASSERT(strength <= MY_STRXFRM_NLEVELS);

> +  cs->levels_for_order= ((1 << strength) - 1);

why do you still use the old concept of "strength"? Why not to use
bitmap consistently everywhere?

> +}
> +
> +
> +void my_ci_set_level_flags(struct charset_info_st *cs, uint flags)
> +{
> +  DBUG_ASSERT(flags < (1<<MY_STRXFRM_NLEVELS));
> +  cs->levels_for_order= flags;
> +}
> +
>  /*
>    Normalize strxfrm flags
>  
>    SYNOPSIS:
>      my_strxfrm_flag_normalize()
> +    cs       - the CHARSET_INFO pointer
>      flags    - non-normalized flags
> -    nlevels  - number of levels
>      
>    NOTES:
>      If levels are omitted, then 1-maximum is assumed.
> diff --git a/sql/handler.h b/sql/handler.h
> index 8ad521e189a..1e82f37b1e7 100644
> --- a/sql/handler.h
> +++ b/sql/handler.h
> @@ -2409,7 +2386,32 @@ struct Table_specification_st: public HA_CREATE_INFO,
>    {
>      HA_CREATE_INFO::options= 0;
>      DDL_options_st::init();
> +    default_charset_collation.init();
> +  }
> +
> +  bool
> +  add_alter_list_item_convert_to_charset(const Lex_charset_collation_st &cl)
> +  {
> +    /*
> +      cs cannot be NULL, as sql_yacc.yy translates
> +         CONVERT TO CHARACTER SET DEFAULT
> +      to
> +         CONVERT TO CHARACTER SET <character-set-of-the-current-database>
> +      TODO: Shouldn't we postpone resolution of DEFAULT until the
> +      character set of the table owner database is loaded from its db.opt?
> +    */
> +    DBUG_ASSERT(cl.charset_collation());
> +    DBUG_ASSERT(!cl.is_contextually_typed_collation());
> +    alter_table_convert_to_charset= cl.charset_collation();
> +    default_charset_collation.Lex_charset_collation_st::operator=(cl);

looks quite ugly. can you do, like, default_charset_collation.set(cl) ?

> +    used_fields|= (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET);
> +    return false;
>    }
> +  bool add_table_option_default_charset(CHARSET_INFO *cs);
> +  bool add_table_option_default_collation(const Lex_charset_collation_st &cl);
> +  bool resolve_db_charset_and_collation(THD *thd,
> +                                        const LEX_CSTRING &db,
> +                                        bool is_alter);
>  };
>  
>  
> diff --git a/strings/ctype-uca1400data.h b/strings/ctype-uca1400data.h
> new file mode 100644
> index 00000000000..da95dcfde54
> --- /dev/null
> +++ b/strings/ctype-uca1400data.h
> @@ -0,0 +1,44151 @@
> +/*
> +  Generated from allkeys.txt version '14.0.0'
> +*/

if it's generated, do you need to check it in?
perhaps it should be generated during the build?
you've checked in allkeys1400.txt anyway.

> +static const uint16 uca1400_p000[]= { /* 0000 (4 weights per char) */
> +0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000, /* 0000 */
> +0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000, /* 0002 */
> diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
> index 6ca10267187..d115401a855 100644
> --- a/sql/sql_lex.cc
> +++ b/sql/sql_lex.cc
> @@ -542,6 +542,30 @@ bool LEX::add_alter_list(LEX_CSTRING name, LEX_CSTRING new_name, bool exists)
>  }
>  
>  
> +bool LEX::add_alter_list_item_convert_to_charset(
> +                                             THD *thd,
> +                                             CHARSET_INFO *cs,
> +                                             const Lex_charset_collation_st &cl)
> +{
> +  if (!cs)
> +  {
> +    Lex_charset_collation_st tmp;
> +    tmp.set_charset_collate_default(thd->variables.collation_database);

Hmm, what if one is doing ALTER TABLE db.test CHARSET DEFAULT
and current db is not `db` but `test` ?

> +    if (!(cs= tmp.charset_collation()))
> +      return true; // Should not actually happen

assert?

> +  }
> +
> +  Lex_explicit_charset_opt_collate tmp(cs, false);
> +  if (tmp.merge_opt_collate_or_error(cl) ||
> +      create_info.add_alter_list_item_convert_to_charset(
> +                    Lex_charset_collation(tmp)))
> +    return true;
> +
> +  alter_info.flags|= ALTER_CONVERT_TO;
> +  return false;
> +}
> +
> +
>  void LEX::init_last_field(Column_definition *field,
>                            const LEX_CSTRING *field_name)
>  {
> @@ -11871,29 +11869,41 @@ CHARSET_INFO *Lex_collation_st::find_default_collation(CHARSET_INFO *cs)
>    "def" is the upper level CHARACTER SET clause (e.g. of a table)
>  */
>  CHARSET_INFO *
> -Lex_collation_st::resolved_to_character_set(CHARSET_INFO *def) const
> +Lex_charset_collation_st::resolved_to_character_set(CHARSET_INFO *def) const
>  {
>    DBUG_ASSERT(def);
> -  if (m_type != TYPE_CONTEXTUALLY_TYPED)
> -  {
> -    if (!m_collation)
> -      return def;       // Empty - not typed at all
> -    return m_collation; // Explicitly typed
> +
> +  switch (m_type) {
> +  case TYPE_EMPTY:
> +    return def;
> +  case TYPE_CHARACTER_SET:
> +    DBUG_ASSERT(m_ci);
> +    return m_ci;
> +  case TYPE_COLLATE_EXACT:
> +    DBUG_ASSERT(m_ci);
> +    return m_ci;
> +  case TYPE_COLLATE_CONTEXTUALLY_TYPED:
> +    break;
>    }
>  
>    // Contextually typed
> -  DBUG_ASSERT(m_collation);
> +  DBUG_ASSERT(m_ci);
>  
> -  if (m_collation == &my_charset_bin)    // CHAR(10) BINARY
> -    return find_bin_collation(def);
> +  Charset_loader_mysys loader;
> +  if (is_contextually_typed_binary_style())    // CHAR(10) BINARY
> +    return loader.find_bin_collation_or_error(def);
>  
> -  if (m_collation == &my_charset_latin1) // CHAR(10) COLLATE DEFAULT
> -    return find_default_collation(def);
> +  if (is_contextually_typed_collate_default()) // CHAR(10) COLLATE DEFAULT
> +    return loader.find_default_collation(def);
> +
> +  const LEX_CSTRING context_name= collation_name_context_suffix();

I'd rather put this in assert, not in if(). Like

-  if (!strncasecmp(context_name.str, STRING_WITH_LEN("uca1400_")))
+  DBUG_ASSERT(!strncasecmp(context_cl_name.str, STRING_WITH_LEN("uca1400_")));

> +  if (!strncasecmp(context_name.str, STRING_WITH_LEN("uca1400_")))
> +    return loader.get_contextually_typed_collation_or_error(def,
> +                                                            context_name.str);
>  
>    /*
> -    Non-binary and non-default contextually typed collation.
> +    Non-binary, non-default, non-uca1400 contextually typed collation.
>      We don't have such yet - the parser cannot produce this.
> -    But will have soon, e.g. "uca1400_as_ci".
>    */
>    DBUG_ASSERT(0);
>    return NULL;
> @@ -11944,58 +11972,106 @@ bool Lex_collation_st::
>        CHAR(10) BINARY                .. COLLATE latin1_bin
>        CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin
>      */
> -    if (collation() == &my_charset_latin1 &&
> -        !(cl.collation()->state & MY_CS_PRIMARY))
> +    if (is_contextually_typed_collate_default() &&
> +        !cl.charset_collation()->default_flag())
>      {
> -      my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
> -               "COLLATE ", "DEFAULT", "COLLATE ",
> -               cl.collation()->coll_name.str);
> +      error_conflicting_collations_or_styles(*this, cl);
>        return true;
>      }
> -    if (collation() == &my_charset_bin &&
> -        !(cl.collation()->state & MY_CS_BINSORT))
> +
> +    if (is_contextually_typed_binary_style() &&
> +        !cl.charset_collation()->binsort_flag())
>      {
> -      my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
> -               "", "BINARY", "COLLATE ", cl.collation()->coll_name.str);
> +      error_conflicting_collations_or_styles(*this, cl);
>        return true;
>      }
>      *this= cl;
>      return false;
>    }
>  
> -  if (cl.is_contextually_typed_collation())
> -  {
> +  DBUG_ASSERT(0);
> +  return false;
> +}
> +
> +
> +bool
> +Lex_explicit_charset_opt_collate::
> +  merge_collate_or_error(const Lex_charset_collation_st &cl)
> +{
> +  DBUG_ASSERT(cl.type() != Lex_charset_collation_st::TYPE_CHARACTER_SET);
> +
> +  switch (cl.type()) {
> +  case Lex_charset_collation_st::TYPE_EMPTY:
> +    return false;
> +  case Lex_charset_collation_st::TYPE_CHARACTER_SET:
> +    DBUG_ASSERT(0);
> +    return false;
> +  case Lex_charset_collation_st::TYPE_COLLATE_EXACT:
>      /*
> -      EXPLICIT + CONTEXT
> -      CHAR(10) COLLATE latin1_bin .. COLLATE DEFAULT       - not supported
> -      CHAR(10) COLLATE latin1_bin .. COLLATE uca1400_as_ci - not yet
> +      EXPLICIT + EXPLICIT
> +      CHAR(10) CHARACTER SET latin1                    .. COLLATE latin1_bin
> +      CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin .. COLLATE latin1_bin
> +      CHAR(10) COLLATE latin1_bin                      .. COLLATE latin1_bin
> +      CHAR(10) COLLATE latin1_bin                      .. COLLATE latin1_bin
> +      CHAR(10) CHARACTER SET latin1 BINARY             .. COLLATE latin1_bin
>      */
> -    DBUG_ASSERT(0); // Not possible yet
> +    if (m_with_collate && m_ci != cl.charset_collation())
> +    {
> +      my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
> +               "COLLATE ", m_ci->coll_name.str,
> +               "COLLATE ", cl.charset_collation()->coll_name.str);
> +      return true;
> +    }
> +    if (!my_charset_same(m_ci, cl.charset_collation()))
> +    {
> +      my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
> +               cl.charset_collation()->coll_name.str, m_ci->cs_name.str);
> +      return true;
> +    }
> +    m_ci= cl.charset_collation();
> +    m_with_collate= true;
>      return false;
> -  }
>  
> -  /*
> -    EXPLICIT + EXPLICIT
> -    CHAR(10) CHARACTER SET latin1                    .. COLLATE latin1_bin
> -    CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin .. COLLATE latin1_bin
> -    CHAR(10) COLLATE latin1_bin                      .. COLLATE latin1_bin
> -    CHAR(10) COLLATE latin1_bin                      .. COLLATE latin1_bin
> -    CHAR(10) CHARACTER SET latin1 BINARY             .. COLLATE latin1_bin
> -  */
> -  if (type() == TYPE_EXPLICIT && collation() != cl.collation())
> -  {
> -    my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
> -             "COLLATE ", collation()->coll_name.str,
> -             "COLLATE ", cl.collation()->coll_name.str);
> -    return true;
> -  }
> -  if (!my_charset_same(collation(), cl.collation()))
> -  {
> -    my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
> -             cl.collation()->coll_name.str, collation()->cs_name.str);
> -    return true;
> +  case Lex_charset_collation_st::TYPE_COLLATE_CONTEXTUALLY_TYPED:
> +    if (cl.is_contextually_typed_collate_default())
> +    {
> +      /*
> +        SET NAMES latin1 COLLATE DEFAULT;
> +        ALTER TABLE t1 CONVERT TO CHARACTER SET latin1 COLLATE DEFAULT;
> +      */
> +      CHARSET_INFO *tmp= Charset_loader_mysys().find_default_collation(m_ci);
> +      if (!tmp)
> +        return true;
> +      m_ci= tmp;
> +      m_with_collate= true;
> +      return false;
> +    }
> +    else
> +    {
> +      /*
> +        EXPLICIT + CONTEXT
> +        CHAR(10) COLLATE latin1_bin .. COLLATE DEFAULT not possible yet
> +        CHAR(10) COLLATE latin1_bin .. COLLATE uca1400_as_ci
> +      */
> +
> +      const LEX_CSTRING context_cl_name= cl.collation_name_context_suffix();
> +      if (!strncasecmp(context_cl_name.str, STRING_WITH_LEN("uca1400_")))

Like above, better

DBUG_ASSERT(!strncasecmp(context_cl_name.str, STRING_WITH_LEN("uca1400_")))

> +      {
> +        CHARSET_INFO *tmp;
> +        Charset_loader_mysys loader;
> +        if (!(tmp= loader.get_contextually_typed_collation_or_error(m_ci,
> +                                                          context_cl_name.str)))
> +          return true;
> +        m_with_collate= true;
> +        m_ci= tmp;
> +        return false;
> +      }
> +
> +      DBUG_ASSERT(0); // Not possible yet
> +      return false;
> +    }
>    }
> -  *this= cl;
> +  DBUG_ASSERT(0);
>    return false;
>  }
>  
> diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
> index b89916f3b20..3e6b4e4ce43 100644
> --- a/strings/ctype-uca.c
> +++ b/strings/ctype-uca.c
> @@ -30542,7 +30613,7 @@ static const char vietnamese[]=
>    Myanmar, according to CLDR Revision 8900.
>    http://unicode.org/cldr/trac/browser/trunk/common/collation/my.xml
>  */
> -static const char myanmar[]= "[shift-after-method expand][version 5.2.0]"
> +static const char myanmar[]= "[shift-after-method expand]"

What's going on with myanmar? You removed a version here and
added &my_uca_v520 below in its charset_info_st.
What does this change mean?

>  /* Tones */
>  "&\\u108C"
>  "<\\u1037"
> @@ -37627,7 +37825,7 @@ struct charset_info_st my_charset_utf32_myanmar_uca_ci=
>      NULL,               /* to_lower     */
>      NULL,               /* to_upper     */
>      NULL,               /* sort_order   */
> -    NULL,               /* uca          */
> +    &my_uca_v520,       /* uca          */

What does this change?

>      NULL,               /* tab_to_uni   */
>      NULL,               /* tab_from_uni */
>      &my_unicase_unicode520,/* caseinfo   */
 
Regards,
Sergei
VP of MariaDB Server Engineering
and security@xxxxxxxxxxx
Follow ups

Re: 49ecf935415: MDEV-27009 Add UCA-14.0.0 collations
From: Alexander Barkov, 2022-05-26