← Back to team overview

maria-developers team mailing list archive

Re: 7b20964dd240: MDEV-8334: Rename utf8 to utf8mb3

 

Hi, Rucha!

On Apr 15, Rucha Deodhar wrote:
> revision-id: 7b20964dd240
> parent(s): e9a2c9e
> author: Rucha Deodhar <rucha.deodhar@xxxxxxxxxxx>
> timestamp: 2021-03-26 00:55:56 +0530
> message:
> 
> MDEV-8334: Rename utf8 to utf8mb3
> 
> This patch is made as a part of MDEV-8334 to fix failing test in unit and
> main test suite so that utf8mb3 characterset is recognized. Failing tests:
> main.mysql_client_test
> main.mysql_client_test_comp
> unit.conc_basic-t
> unit.conc_charset
> unit.conc_connection

> diff --git a/libmariadb/ma_charset.c b/libmariadb/ma_charset.c
> index ee4b0f47..307cd522 100644
> --- a/libmariadb/ma_charset.c
> +++ b/libmariadb/ma_charset.c
> @@ -67,6 +67,10 @@
>  #include <langinfo.h>
>  #endif
>  
> +#define IS_UTF8(c)\
> +(!strcasecmp((c), "utf8") || !strcasecmp((c), "utf8mb3") ||\
> + !strcasecmp((c), "utf8mb4") || !strcasecmp((c), "utf-8"))
> +
>  /*
>    +----------------------------------------------------------------------+
>    | PHP Version 5                                                        |
> @@ -1269,7 +1275,7 @@ struct st_madb_os_charset MADB_OS_CHARSET[]=
>    {"57010", "ISCII Gujarati", NULL, NULL, MADB_CS_UNSUPPORTED},
>    {"57011", "ISCII Punjabi", NULL, NULL, MADB_CS_UNSUPPORTED},
>    {"65000", "utf-7 Unicode (UTF-7)", NULL, NULL, MADB_CS_UNSUPPORTED},
> -  {"65001", "utf-8 Unicode (UTF-8)", "utf8", NULL, MADB_CS_EXACT},
> +  {"65001", "utf-8 Unicode (UTF-8)", "utf8mb3", NULL, MADB_CS_EXACT},

No, keep this utf8, it's still a valid charset name, the server can
figure it out what to map it to.

>    /* non Windows */
>  #else
>    /* iconv encodings */
> @@ -1337,8 +1343,8 @@ struct st_madb_os_charset MADB_OS_CHARSET[]=
>    {"gb2312", "GB2312", "gb2312", "GB2312", MADB_CS_EXACT},
>    {"gbk", "GBK", "gbk", "GBK", MADB_CS_EXACT},
>    {"georgianps", "Georgian", "geostd8", "GEORGIAN-PS", MADB_CS_EXACT},
> -  {"utf8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
> -  {"utf-8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
> +  {"utf8mb3", "UTF8MB3", "utf8mb3", "UTF-8", MADB_CS_EXACT},
> +  {"utf-8", "UTF8MB3", "utf8mb3", "UTF-8", MADB_CS_EXACT},

same here

>  #endif
>    {NULL, NULL, NULL, NULL, 0}
>  };
> @@ -1361,8 +1367,8 @@ const char *madb_get_os_character_set()
>      return MADB_DEFAULT_CHARSET_NAME;
>    while (MADB_OS_CHARSET[i].identifier)
>    {
> -    if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
> -        strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0)
> +    if ((MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
> +        strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0) || IS_UTF8(p))

why?

>        return MADB_OS_CHARSET[i].charset;
>      i++;
>    }
> diff --git a/unittest/libmariadb/basic-t.c b/unittest/libmariadb/basic-t.c
> index c22e6c2b..e2943964 100644
> --- a/unittest/libmariadb/basic-t.c
> +++ b/unittest/libmariadb/basic-t.c
> @@ -310,7 +310,8 @@ static int use_utf8(MYSQL *my)
>  
>    while ((row= mysql_fetch_row(res)) != NULL)
>    {
> -    FAIL_IF(strcmp(row[0], "utf8"), "wrong character set");
> +    FAIL_IF(strcmp(row[0], get_utf8_name(mysql_get_server_version(my),"utf8")),
> +                   "wrong character set");

technically, C/C is a separate project, can run on any server with any
config file. So it'd be safer to check that row[0] starts from utf8
and not assume that it depends on a server version in a specific way.

>    }
>    FAIL_IF(mysql_errno(my), mysql_error(my));
>    mysql_free_result(res);
> diff --git a/unittest/libmariadb/charset.c b/unittest/libmariadb/charset.c
> index 898b6dad..ffa877bc 100644
> --- a/unittest/libmariadb/charset.c
> +++ b/unittest/libmariadb/charset.c
> @@ -71,14 +71,20 @@ int bug_8378(MYSQL *mysql) {
>  int test_client_character_set(MYSQL *mysql)
>  {
>    MY_CHARSET_INFO cs;
> +  char collation_name[19];
>    char *csname= (char*) "utf8";
>    char *csdefault= (char*)mysql_character_set_name(mysql);
>  
> +  strcpy(collation_name,(const char*)get_utf8_name(mysql_get_server_version(mysql),
> +                                                   "utf8_general_ci"));
> +

This one is simpler. It only tests that mysql_set_character_set() works.
Just don't use utf8, make it test on something else, e.g. on latin2.

>    FAIL_IF(mysql_set_character_set(mysql, csname), mysql_error(mysql));
>  
>    mysql_get_character_set_info(mysql, &cs);
>  
> -  FAIL_IF(strcmp(cs.csname, "utf8") || strcmp(cs.name, "utf8_general_ci"), "Character set != UTF8");
> +  FAIL_IF(strcmp(cs.csname, get_utf8_name(mysql_get_server_version(mysql),"utf8")) ||
> +                            strcmp(cs.name, collation_name),
> +          "Wrong UTF8 characterset");
>    FAIL_IF(mysql_set_character_set(mysql, csdefault), mysql_error(mysql));
>  
>    return OK;
> @@ -537,6 +544,9 @@ static int test_bug30472(MYSQL *mysql)
>  
>    SKIP_MAXSCALE;
>  
> +  strcpy(collation_name,(const char*)get_utf8_name(mysql_get_server_version(mysql),
> +                                                   "utf8_general_ci"));
> +

same here, the bug is https://bugs.mysql.com/bug.php?id=30472
"libmysql doesn't reset charset, insert_id after succ. mysql_change_user() call"

so, does not need utf8 specifically. Change it to some easier to use
charset.

>    if (mysql_get_server_version(mysql) < 50100 || !is_mariadb) 
>    {
>      diag("Test requires MySQL Server version 5.1 or above");
> diff --git a/unittest/libmariadb/connection.c b/unittest/libmariadb/connection.c
> index 70d347ce..eb9b39bb 100644
> --- a/unittest/libmariadb/connection.c
> +++ b/unittest/libmariadb/connection.c
> @@ -644,9 +644,8 @@ int test_conc26(MYSQL *unused __attribute__((unused)))
>  
>    FAIL_IF(my_test_connect(mysql, hostname, "notexistinguser", "password", schema, port, NULL, CLIENT_REMEMBER_OPTIONS), 
>            "Error expected");
> -
> -  FAIL_IF(!mysql->options.charset_name || strcmp(mysql->options.charset_name, "utf8") != 0, 
> -          "expected charsetname=utf8");
> +  FAIL_IF(!mysql->options.charset_name || strcmp(mysql->options.charset_name, "utf8") != 0,
> +          "Wrong utf8 characterset for this version");

again, CONC-26 is "CLIENT_REMEMBER_OPTIONS flag missing"
it doesn't apparently need utf8 specifically, so just use a different
non-default charset there.

>    mysql_close(mysql);
>  
>    mysql= mysql_init(NULL);
> @@ -981,7 +980,8 @@ static int test_sess_track_db(MYSQL *mysql)
>        printf("# SESSION_TRACK_VARIABLES: %*.*s\n", (int)len, (int)len, data);
>      } while (!mysql_session_track_get_next(mysql, SESSION_TRACK_SYSTEM_VARIABLES, &data, &len));
>      diag("charset: %s", mysql->charset->csname);
> -    FAIL_IF(strcmp(mysql->charset->csname, "utf8"), "Expected charset 'utf8'");
> +    FAIL_IF(strcmp(mysql->charset->csname, get_utf8_name(mysql_get_server_version(mysql),"utf8")),
> +            "Wrong utf8 characterset for this version");

same here

>  
>      rc= mysql_query(mysql, "SET NAMES latin1");
>      check_mysql_rc(rc, mysql);
> diff --git a/unittest/libmariadb/my_test.h b/unittest/libmariadb/my_test.h
> index c30d1b6d..a040c3d9 100644
> --- a/unittest/libmariadb/my_test.h
> +++ b/unittest/libmariadb/my_test.h
> @@ -701,3 +701,23 @@ void run_tests(struct my_tests_st *test) {
>    }
>  }
>  
> +static inline const char* get_utf8_name(unsigned long server_version,
> +                                        const char* name)
> +{
> +  const char *csname= server_version >= 100600 ? "utf8mb3" : "utf8";
> +  char *corrected_name= malloc(19*sizeof(char));
> +  corrected_name[18]='\0';
> +
> +  if (!strchr(name, '_'))
> +  {
> +    strcpy(corrected_name,csname);
> +    corrected_name[strlen(csname)]='\0';
> +  }
> +  else
> +  {
> +    strcpy(corrected_name,csname);
> +    strcat(corrected_name,"_general_ci");
> +    corrected_name[strlen(csname)+11]= '\0';
> +  }
> +  return (const char*)corrected_name;
> +}

shouldn't be needed

Regards,
Sergei
VP of MariaDB Server Engineering
and security@xxxxxxxxxxx