maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #09464
Please review MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
Hi Sergei,
Please review a patch for MDEV-9842.
This is a prerequisite for:
MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring
Thanks.
commit 4d615db357a5b513c915ab677afd130a0673f0da
Author: Alexander Barkov <bar@xxxxxxxxxxx>
Date: Fri Apr 1 14:53:00 2016 +0400
MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
Fixing READ_INFO to store the loaded data in a String object.
Making sure to reserve MY_CS_MBMAXLEN bytes before loading the next character
in read_field().
diff --git a/mysql-test/r/ctype_sjis.result b/mysql-test/r/ctype_sjis.result
index 4668693..4edd900 100644
--- a/mysql-test/r/ctype_sjis.result
+++ b/mysql-test/r/ctype_sjis.result
@@ -18730,3 +18730,23 @@ DROP TABLE t1;
#
# End of 10.0 tests
#
+#
+# Start of 10.2 tests
+#
+#
+# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+#
+CREATE TABLE t1 (a TEXT CHARACTER SET sjis);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9842.txt' INTO TABLE t1 CHARACTER SET sjis;
+SHOW WARNINGS;
+Level Code Message
+SELECT HEX(a) FROM t1;
+HEX(a)
+78835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C835C
+SELECT a=CONCAT('x', REPEAT(_sjis 0x835C, 200)) FROM t1;
+a=CONCAT('x', REPEAT(_sjis 0x835C, 200))
+1
+DROP TABLE t1;
+#
+# End of 10.2 tests
+#
diff --git a/mysql-test/std_data/loaddata/mdev9842.txt b/mysql-test/std_data/loaddata/mdev9842.txt
new file mode 100644
index 0000000..1aed9a9
--- /dev/null
+++ b/mysql-test/std_data/loaddata/mdev9842.txt
@@ -0,0 +1 @@
+x\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
diff --git a/mysql-test/t/ctype_sjis.test b/mysql-test/t/ctype_sjis.test
index 2777cf6..da1f963 100644
--- a/mysql-test/t/ctype_sjis.test
+++ b/mysql-test/t/ctype_sjis.test
@@ -231,3 +231,22 @@ SET NAMES sjis;
--echo #
--echo # End of 10.0 tests
--echo #
+
+--echo #
+--echo # Start of 10.2 tests
+--echo #
+
+--echo #
+--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+--echo #
+CREATE TABLE t1 (a TEXT CHARACTER SET sjis);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9842.txt' INTO TABLE t1 CHARACTER SET sjis;
+SHOW WARNINGS;
+SELECT HEX(a) FROM t1;
+SELECT a=CONCAT('x', REPEAT(_sjis 0x835C, 200)) FROM t1;
+DROP TABLE t1;
+
+
+--echo #
+--echo # End of 10.2 tests
+--echo #
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index f1c2920..d7ca8ef 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -66,10 +66,9 @@ XML_TAG::XML_TAG(int l, String f, String v)
class READ_INFO {
File file;
- uchar *buffer, /* Buffer for read text */
- *end_of_buff; /* Data in bufferts ends here */
- uint buff_length, /* Length of buffert */
- max_length; /* Max length of row */
+ String data; /* Read buffer */
+ uint fixed_length; /* Length of the fixed length record */
+ uint max_length; /* Max length of row */
const uchar *field_term_ptr,*line_term_ptr;
const char *line_start_ptr,*line_start_end;
uint field_term_length,line_term_length,enclosed_length;
@@ -1349,10 +1348,11 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs,
String &field_term, String &line_start, String &line_term,
String &enclosed_par, int escape, bool get_it_from_net,
bool is_fifo)
- :file(file_par), buffer(NULL), buff_length(tot_length), escape_char(escape),
+ :file(file_par), fixed_length(tot_length), escape_char(escape),
found_end_of_line(false), eof(false),
error(false), line_cuted(false), found_null(false), read_charset(cs)
{
+ data.set_thread_specific();
/*
Field and line terminators must be interpreted as sequence of unsigned char.
Otherwise, non-ascii terminators will be negative on some platforms,
@@ -1394,18 +1394,16 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs,
set_if_bigger(length,line_start.length());
stack= stack_pos= (int*) thd->alloc(sizeof(int) * length);
- if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(MY_THREAD_SPECIFIC))))
+ if (data.reserve(tot_length))
error=1; /* purecov: inspected */
else
{
- end_of_buff=buffer+buff_length;
if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
(get_it_from_net) ? READ_NET :
(is_fifo ? READ_FIFO : READ_CACHE),0L,1,
MYF(MY_WME | MY_THREAD_SPECIFIC)))
{
- my_free(buffer); /* purecov: inspected */
- buffer= NULL;
+ data.free();
error=1;
}
else
@@ -1428,7 +1426,7 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs,
READ_INFO::~READ_INFO()
{
::end_io_cache(&cache);
- my_free(buffer);
+ data.free();
List_iterator<XML_TAG> xmlit(taglist);
XML_TAG *t;
while ((t= xmlit++))
@@ -1459,7 +1457,6 @@ inline int READ_INFO::terminator(const uchar *ptr,uint length)
int READ_INFO::read_field()
{
int chr,found_enclosed_char;
- uchar *to,*new_buffer;
found_null=0;
if (found_end_of_line)
@@ -1478,11 +1475,11 @@ int READ_INFO::read_field()
found_end_of_line=eof=1;
return 1;
}
- to=buffer;
+ data.length(0);
if (chr == enclosed_char)
{
found_enclosed_char=enclosed_char;
- *to++=(uchar) chr; // If error
+ data.append(chr); // If error
}
else
{
@@ -1492,7 +1489,7 @@ int READ_INFO::read_field()
for (;;)
{
- while ( to < end_of_buff)
+ while (data.length() + MY_CS_MBMAXLEN < data.alloced_length())
{
chr = GET;
if (chr == my_b_EOF)
@@ -1501,7 +1498,7 @@ int READ_INFO::read_field()
{
if ((chr=GET) == my_b_EOF)
{
- *to++= (uchar) escape_char;
+ data.append(escape_char);
goto found_eof;
}
/*
@@ -1513,7 +1510,7 @@ int READ_INFO::read_field()
*/
if (escape_char != enclosed_char || chr == escape_char)
{
- *to++ = (uchar) unescape((char) chr);
+ data.append(unescape((char) chr));
continue;
}
PUSH(chr);
@@ -1529,8 +1526,8 @@ int READ_INFO::read_field()
{ // Maybe unexpected linefeed
enclosed=0;
found_end_of_line=1;
- row_start=buffer;
- row_end= to;
+ row_start= (uchar *) data.ptr();
+ row_end= (uchar *) data.end();
return 0;
}
}
@@ -1538,7 +1535,7 @@ int READ_INFO::read_field()
{
if ((chr=GET) == found_enclosed_char)
{ // Remove dupplicated
- *to++ = (uchar) chr;
+ data.append(chr);
continue;
}
// End of enclosed field if followed by field_term or line_term
@@ -1549,16 +1546,16 @@ int READ_INFO::read_field()
/* Maybe unexpected linefeed */
enclosed=1;
found_end_of_line=1;
- row_start=buffer+1;
- row_end= to;
+ row_start= (uchar *) data.ptr() + 1;
+ row_end= (uchar *) data.end();
return 0;
}
if (chr == field_term_char &&
terminator(field_term_ptr,field_term_length))
{
enclosed=1;
- row_start=buffer+1;
- row_end= to;
+ row_start= (uchar *) data.ptr() + 1;
+ row_end= (uchar *) data.end();
return 0;
}
/*
@@ -1574,22 +1571,19 @@ int READ_INFO::read_field()
if (terminator(field_term_ptr,field_term_length))
{
enclosed=0;
- row_start=buffer;
- row_end= to;
+ row_start= (uchar *) data.ptr();
+ row_end= (uchar *) data.end();
return 0;
}
}
#ifdef USE_MB
- if (my_mbcharlen(read_charset, chr) > 1 &&
- to + my_mbcharlen(read_charset, chr) <= end_of_buff)
+ if (my_mbcharlen(read_charset, chr) > 1)
{
- uchar* p= to;
- int ml, i;
- *to++ = chr;
+ uint32 length0= data.length();
+ int ml= my_mbcharlen(read_charset, chr);
+ data.append(chr);
- ml= my_mbcharlen(read_charset, chr);
-
- for (i= 1; i < ml; i++)
+ for (int i= 1; i < ml; i++)
{
chr= GET;
if (chr == my_b_EOF)
@@ -1598,39 +1592,35 @@ int READ_INFO::read_field()
Need to back up the bytes already ready from illformed
multi-byte char
*/
- to-= i;
+ data.length(length0);
goto found_eof;
}
- *to++ = chr;
+ data.append(chr);
}
if (my_ismbchar(read_charset,
- (const char *)p,
- (const char *)to))
+ (const char *) data.ptr() + length0,
+ (const char *) data.end()))
continue;
- for (i= 0; i < ml; i++)
- PUSH(*--to);
+ for (int i= 0; i < ml; i++)
+ PUSH(data.end()[-1 - i]);
+ data.length(length0);
chr= GET;
}
#endif
- *to++ = (uchar) chr;
+ data.append(chr);
}
/*
** We come here if buffer is too small. Enlarge it and continue
*/
- if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE,
- MYF(MY_WME | MY_THREAD_SPECIFIC))))
- return (error=1);
- to=new_buffer + (to-buffer);
- buffer=new_buffer;
- buff_length+=IO_SIZE;
- end_of_buff=buffer+buff_length;
+ if (data.reserve(IO_SIZE))
+ return (error= 1);
}
found_eof:
enclosed=0;
found_end_of_line=eof=1;
- row_start=buffer;
- row_end=to;
+ row_start= (uchar *) data.ptr();
+ row_end= (uchar *) data.end();
return 0;
}
@@ -1652,7 +1642,6 @@ int READ_INFO::read_field()
int READ_INFO::read_fixed_length()
{
int chr;
- uchar *to;
if (found_end_of_line)
return 1; // One have to call next_line
@@ -1663,8 +1652,7 @@ int READ_INFO::read_fixed_length()
return 1;
}
- to=row_start=buffer;
- while (to < end_of_buff)
+ for (data.length(0); data.length() < fixed_length ; )
{
if ((chr=GET) == my_b_EOF)
goto found_eof;
@@ -1672,31 +1660,31 @@ int READ_INFO::read_fixed_length()
{
if ((chr=GET) == my_b_EOF)
{
- *to++= (uchar) escape_char;
+ data.append(escape_char);
goto found_eof;
}
- *to++ =(uchar) unescape((char) chr);
+ data.append((uchar) unescape((char) chr));
continue;
}
if (chr == line_term_char)
{
if (terminator(line_term_ptr,line_term_length))
{ // Maybe unexpected linefeed
- found_end_of_line=1;
- row_end= to;
- return 0;
+ found_end_of_line=1;
+ break;
}
}
- *to++ = (uchar) chr;
+ data.append(chr);
}
- row_end=to; // Found full line
+ row_start= (uchar *) data.ptr();
+ row_end= (uchar *) data.end(); // Found full line
return 0;
found_eof:
found_end_of_line=eof=1;
- row_start=buffer;
- row_end=to;
- return to == buffer ? 1 : 0;
+ row_start= (uchar *) data.ptr();
+ row_end= (uchar *) data.end();
+ return data.length() == 0 ? 1 : 0;
}
Follow ups