← Back to team overview

maria-developers team mailing list archive

Re: Correct parsing of BINLOG 'XXX' with comment inside

 

Hi Bar!
On Tue, Oct 16, 2018 at 7:33 AM Alexander Barkov <bar@xxxxxxxxxxx> wrote:
>
> Hi Sachin,
>
>
> On 10/16/2018 03:49 AM, Sachin Setiya wrote:
> > Hi Bar!
> > On Tue, Sep 11, 2018 at 1:47 PM Alexander Barkov <bar@xxxxxxxxxxx> wrote:
> >>
> >> Hi Sachin,
> >>
> >>
> >> On 09/10/2018 03:47 PM, Sachin Setia wrote:
> >>> Hi Bar,
> >>>
> >>> Currently if we have generated the sql file using mysqlbinlog -v and
> >>> if we have big statement binlog
> >>> , then mysqlbinlog add comment in between of Binlog 'XXX' statement ,
> >>> but unfortunately base64_decode
> >>> does not understands it, and it through error since # is not a base64 char
> >>>
> >>> This patches solves this.
> >>
> >> Note, base64_decode() is used in at least two places:
> >> - for the binary log
> >> - for the SQL function FROM_BASE64()
> >>
> >>
> >> I don't like that your patch changes the behavior of the SQL function
> >> FROM_BASE64(). It should not recognize any '#' inside the base64 data
> >> as comments.
> > Right , So I have creates a flag MY_BASE64_DECODE_ALLOW_COMMENTS it is
> > kind of similar to as MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS flag is used
> > So no impact on FROM_BASE64
> >
> >>
> >> So perhaps this should be fixed in some other place, not in base64_decode().
> >>
> >>
> >> I checked the output of these commands:
> >>
> >> ./bin/mysqlbinlog      ./data/retsina-bin.000003
> >> ./bin/mysqlbinlog -vvv ./data/retsina-bin.000003
> >>
> >> It looks suspicious for me.
> >>
> >> Can you please remind why mysqlbinlog prints multiple base64 chunks
> >> inside the same BINLOG statement?
> > It is beacuse if we have long row we can have more then one
> > ROWS_LOG_EVENT after TABLE_MAP_EVENT
>
> Do you know where in the code the server decides to end
> the current chunk and start a new one?
Here In binlog_prepare_pending_rows_event
  if (!pending ||
      pending->server_id != serv_id ||
      pending->get_table_id() != table->s->table_map_id ||
      pending->get_general_type_code() != general_type_code ||
      pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
      ^^^ this line
      pending->get_width() != colcnt ||
      !bitmap_cmp(pending->get_cols(), cols))
  {

>
> Is there some limit (in bytes, or in number or records)?
>
> >>
> >>
> >> From my understanding, it is "mysqlbinlog -vvv" who should be fixed
> >> to print good base64 data inside the string literal that follows the
> >> BINLOG keyword.
> >>
> >> At least  the additional comments printed by -vvv should be outside of
> >> the BINLOG statement (presumably before), not inside.
> >>
> > Idk , this can be lot of work , plus it will slow mysqlbinlog also ,
> > And I think with new solution FROM_BASE64 behavior is not changed
> > I have added test for FROM_BASE64
> > #No change in BASE64_FROM behaviour
> > SELECT FROM_BASE64('TWFy
> > #Comment 344
> > #
> > aWE=') AS 'Output';
> > this will generate Maria in earlier patch which is wrong , with new
> > patch it gives NULL
>
> If we really go this way, I suggest to cover all these scenarios too:
>
> SELECT FROM_BASE64('TWFyaWE=# comment1');
> SELECT FROM_BASE64('TWFyaWE=\n# comment1');
> SELECT FROM_BASE64('TWFyaWE=\n# comment1\nTWFyaWE=');
> SELECT FROM_BASE64('TWFyaWE=\n# comment1\nTWFyaWE=\n# comment2\n');
>
> They all should give NULL.
They all are giving null
>
> >> Thanks.
> >>
> >> _______________________________________________
> >> Mailing list: https://launchpad.net/~maria-developers
> >> Post to     : maria-developers@xxxxxxxxxxxxxxxxxxx
> >> Unsubscribe : https://launchpad.net/~maria-developers
> >> More help   : https://help.launchpad.net/ListHelp
> >
> >
> >



-- 
Regards
Sachin Setiya
Software Engineer at  MariaDB
commit 804d84d5fd424b573e409b0cb50c56eaa7b1aa9c
Author: Sachin <sachin.setiya@xxxxxxxxxxx>
Date:   Tue Oct 16 17:32:35 2018 +0530

    MDEV-10362 mysqlbinlog verbose output cannot be decoded
    
    base64_encode does not correctly parse comments inside of base64 string.
    This patch fixes that.

diff --git a/include/base64.h b/include/base64.h
index 9a843b5088e..5779dc1ace2 100644
--- a/include/base64.h
+++ b/include/base64.h
@@ -54,6 +54,8 @@ int base64_decode(const char *src, size_t src_len,
 
 /* Allow multuple chunks 'AAA= AA== AA==', binlog uses this */
 #define MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS 1
+/* Allow Comment starting of line with #*/
+#define MY_BASE64_DECODE_ALLOW_COMMENTS 2
 
 
 #ifdef __cplusplus
diff --git a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_comment_base64.result b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_comment_base64.result
new file mode 100644
index 00000000000..997212ac4e2
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_comment_base64.result
@@ -0,0 +1,54 @@
+create table t1(a int , b int);
+#202 entry so that binlog query is split
+select count(*) from t1;
+count(*)
+202
+FLUSH logs;
+DROP TABLE t1;
+select count(*) from t1;
+count(*)
+202
+DROP TABLE t1;
+#No change in BASE64_FROM behaviour
+SELECT FROM_BASE64('TWFy
+#Comment 344
+#
+aWE=') AS 'Output';
+Output
+NULL
+Warnings:
+Warning	1958	Bad base64 data as position 5
+SELECT FROM_BASE64('TWFyaWE=# comment1');
+FROM_BASE64('TWFyaWE=# comment1')
+NULL
+Warnings:
+Warning	1958	Bad base64 data as position 8
+SELECT FROM_BASE64('TWFyaWE=
+# comment1');
+FROM_BASE64('TWFyaWE=
+# comment1')
+NULL
+Warnings:
+Warning	1958	Bad base64 data as position 9
+SELECT FROM_BASE64('TWFyaWE=
+# comment1
+TWFyaWE=');
+FROM_BASE64('TWFyaWE=
+# comment1
+TWFyaWE=')
+NULL
+Warnings:
+Warning	1958	Bad base64 data as position 9
+SELECT FROM_BASE64('TWFyaWE=
+# comment1
+TWFyaWE=
+# comment2
+');
+FROM_BASE64('TWFyaWE=
+# comment1
+TWFyaWE=
+# comment2
+')
+NULL
+Warnings:
+Warning	1958	Bad base64 data as position 9
diff --git a/mysql-test/suite/binlog/t/binlog_mysqlbinlog_comment_base64.test b/mysql-test/suite/binlog/t/binlog_mysqlbinlog_comment_base64.test
new file mode 100644
index 00000000000..9e0be647678
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_mysqlbinlog_comment_base64.test
@@ -0,0 +1,44 @@
+--source include/have_binlog_format_row.inc
+
+create table t1(a int , b int);
+
+--echo #202 entry so that binlog query is split
+--let str=insert into t1 values(1,1),
+--let counter=200
+while ($counter)
+{
+  --let str=$str(1,1),
+  --dec $counter
+}
+--let str=$str(1,1)
+--disable_query_log
+--eval $str
+--enable_query_log
+select count(*) from t1;
+Show binlog events;
+FLUSH logs;
+--let $MYSQLD_DATADIR= `SELECT @@datadir`
+--exec $MYSQL_BINLOG -vvv $MYSQLD_DATADIR/master-bin.000001 > $MYSQTEST_VARDIR/tmp/a.binlog
+
+DROP TABLE t1;
+
+--exec $MYSQL < $MYSQTEST_VARDIR/tmp/a.binlog
+select count(*) from t1;
+DROP TABLE t1;
+
+--echo #No change in BASE64_FROM behaviour
+SELECT FROM_BASE64('TWFy
+#Comment 344
+#
+aWE=') AS 'Output';
+SELECT FROM_BASE64('TWFyaWE=# comment1');
+SELECT FROM_BASE64('TWFyaWE=
+# comment1');
+SELECT FROM_BASE64('TWFyaWE=
+# comment1
+TWFyaWE=');
+SELECT FROM_BASE64('TWFyaWE=
+# comment1
+TWFyaWE=
+# comment2
+');
diff --git a/mysys/base64.c b/mysys/base64.c
index 265b2f22aad..6b9a69afda7 100644
--- a/mysys/base64.c
+++ b/mysys/base64.c
@@ -201,7 +201,22 @@ my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder)
     decoder->error= 1; /* Unexpected end-of-input found */
   return TRUE;
 }
-
+/**
+ * Skip all the comments inside of base64 encoding
+ *
+ * @param decode base64 decoding stream
+ *
+ * @return void
+ * */
+static inline void
+my_base64_skip_comments(MY_BASE64_DECODER *decoder)
+{
+  my_base64_decoder_skip_spaces(decoder);
+  const char* src= decoder->src;
+  while(*src == '#')
+    while (*src++ != '\n'){}
+  decoder->src= src;
+}
 
 /**
  * Convert the next character in a base64 encoded stream
@@ -323,11 +338,13 @@ base64_decode(const char *src_base, size_t len,
   decoder.end= src_base + len;
   decoder.error= 0;
   decoder.mark= 0;
-
+  uint i=1;
   for ( ; ; )
   {
     decoder.c= 0;
     decoder.state= 0;
+    if (flags & MY_BASE64_DECODE_ALLOW_COMMENTS)
+      my_base64_skip_comments(&decoder);
 
     if (my_base64_decoder_getch(&decoder) ||
         my_base64_decoder_getch(&decoder) ||
@@ -346,6 +363,7 @@ base64_decode(const char *src_base, size_t len,
         break;
       decoder.mark= 0;
     }
+    i++;
   }
 
   /* Return error if there are more non-space characters */
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
index f0465cdf5bf..07dac31184f 100644
--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -125,7 +125,8 @@ void mysql_client_binlog_statement(THD* thd)
   {
     char const *endptr= 0;
     int bytes_decoded= base64_decode(strptr, coded_len, buf, &endptr,
-                                     MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS);
+                                     MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS
+                                       | MY_BASE64_DECODE_ALLOW_COMMENTS);
 
 #ifndef HAVE_valgrind
       /*

Follow ups

References