← Back to team overview

maria-developers team mailing list archive

A fix for MDEV-5689 ExtractValue(xml, 'substring(/x, /y)') crashes

 

  Hi Sergei,

please review a fix for MDEV-5689.

It also fixes
MDEV-5709 ExtractValue() with XPath variable references returns wrong result.


Description:

1. The main problem was that that nodeset_func->fix_fields() was
called in Item_func_xml_extractvalue::val_str() and
Item_func_xml_update::val_str(), which led in some cases to
execution of the XPath engine *before* having a parsed XML value.
Moved to Item_xml_str_func::fix_fields().

2. Cleanup: added a new method Item_xml_str_func::fix_fields() and moved
most of the code from Item_xml_str_func::fix_length_and_dec()
to Item_xml_str_func::fix_fields(), to follow the usual Item layout.

3. Cleanup: a parsed XML value is useless without the raw XML value
it was built from.

Previously the parsed and the raw values where stored in separate String
instances. It was hard to follow how they are synchronized.
Added a helper class XML which contains both parsed and raw values.
Makes things easier to read and modify.

4. MDEV-5709: const_item() could incorrectly return a "true"
result when XPath expression contains users/SP variable references.
Now nodeset_func->const_item() is also taken into account to
catch such cases.

5. Minor code enhancements.
=== modified file 'mysql-test/r/xml.result'
--- mysql-test/r/xml.result	2013-10-02 11:04:07 +0000
+++ mysql-test/r/xml.result	2014-02-20 13:37:23 +0000
@@ -1209,5 +1209,37 @@ SELECT ExtractValue (a, CONCAT('//',REPE
 c512	c512
 DROP TABLE t1;
 #
+# MDEV-5689 ExtractValue(xml, 'substring(/x,/y)') crashes
+#
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,..)');
+ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,..)')
+
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c)');
+ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c)')
+bc
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d)');
+ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d)')
+abc
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c,/a/d)');
+ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c,/a/d)')
+b
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d,/a/c)');
+ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d,/a/c)')
+ab
+#
+# MDEV-5709 ExtractValue() with XPath variable references returns wrong result
+#
+CREATE TABLE t1 (a INT, b VARCHAR(10));
+INSERT INTO t1 VALUES (1,'b1'),(2,'b2');
+SELECT *,IF(@i:=a,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0) AS xpath FROM t1;
+a	b	xpath
+1	b1	b1
+2	b2	b2
+SELECT * FROM t1 WHERE b=IF(@i:=a,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0);
+a	b
+1	b1
+2	b2
+DROP TABLE t1;
+#
 # End of 5.5 tests
 #

=== modified file 'mysql-test/t/xml.test'
--- mysql-test/t/xml.test	2013-10-02 11:04:07 +0000
+++ mysql-test/t/xml.test	2014-02-20 13:37:07 +0000
@@ -702,6 +702,26 @@ INSERT INTO t1 VALUES (CONCAT('<a><', RE
 SELECT ExtractValue (a, CONCAT('//',REPEAT('c',512))) AS c512 FROM t1;
 DROP TABLE t1;
 
+--horizontal_results
+
+--echo #
+--echo # MDEV-5689 ExtractValue(xml, 'substring(/x,/y)') crashes
+--echo #
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,..)');
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c)');
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d)');
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c,/a/d)');
+SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d,/a/c)');
+
+--echo #
+--echo # MDEV-5709 ExtractValue() with XPath variable references returns wrong result
+--echo #
+CREATE TABLE t1 (a INT, b VARCHAR(10));
+INSERT INTO t1 VALUES (1,'b1'),(2,'b2');
+SELECT *,IF(@i:=a,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0) AS xpath FROM t1;
+SELECT * FROM t1 WHERE b=IF(@i:=a,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0);
+DROP TABLE t1;
+
 --echo #
 --echo # End of 5.5 tests
 --echo #

=== modified file 'sql/item_xmlfunc.cc'
--- sql/item_xmlfunc.cc	2013-11-20 11:05:39 +0000
+++ sql/item_xmlfunc.cc	2014-02-20 14:03:33 +0000
@@ -2597,16 +2597,24 @@ my_xpath_parse(MY_XPATH *xpath, const ch
 
 void Item_xml_str_func::fix_length_and_dec()
 {
+  max_length= MAX_BLOB_WIDTH;
+  agg_arg_charsets_for_comparison(collation, args, arg_count);
+}
+
+
+bool Item_xml_str_func::fix_fields(THD *thd, Item **ref)
+{
   String *xp, tmp;
   MY_XPATH xpath;
   int rc;
 
+  if (Item_str_func::fix_fields(thd, ref))
+    return true;
+  
   status_var_increment(current_thd->status_var.feature_xml);
 
   nodeset_func= 0;
 
-  if (agg_arg_charsets_for_comparison(collation, args, arg_count))
-    return;
 
   if (collation.collation->mbminlen > 1)
   {
@@ -2614,23 +2622,23 @@ void Item_xml_str_func::fix_length_and_d
     my_printf_error(ER_UNKNOWN_ERROR,
                     "Character set '%s' is not supported by XPATH",
                     MYF(0), collation.collation->csname);
-    return;
+    return true;
   }
 
   if (!args[1]->const_item())
   {
     my_printf_error(ER_UNKNOWN_ERROR,
                     "Only constant XPATH queries are supported", MYF(0));
-    return;
+    return true;
   }
 
   if (!(xp= args[1]->val_str(&tmp)))
-    return;
+    return false; // Will return NULL
   my_xpath_init(&xpath);
   xpath.cs= collation.collation;
   xpath.debug= 0;
-  xpath.pxml= &pxml;
-  pxml.set_charset(collation.collation);
+  xpath.pxml= xml.parsed();
+  xml.set_charset(collation.collation);
 
   rc= my_xpath_parse(&xpath, xp->ptr(), xp->ptr() + xp->length());
 
@@ -2640,13 +2648,24 @@ void Item_xml_str_func::fix_length_and_d
     set_if_smaller(clen, 32);
     my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%.*s'",
                     MYF(0), clen, xpath.lasttok.beg);
-    return;
+    return true;
   }
 
-  nodeset_func= xpath.item;
-  if (nodeset_func)
-    nodeset_func->fix_fields(current_thd, &nodeset_func);
-  max_length= MAX_BLOB_WIDTH;
+  /*
+     Parsing XML is a heavy operation, so if the first argument is constant,
+     then parse XML only one time and cache the parsed representation
+     together with raw text representation.
+
+     Note, we cannot cache the entire function result even if
+     the first and the second arguments are constants, because
+     the XPath expression may have user and SP variable references,
+     so the function result can vary between executions.
+  */
+  if ((args[0]->const_item() && get_xml(&xml, true)) ||
+      !(nodeset_func= xpath.item))
+    return false; // Will return NULL
+
+  return nodeset_func->fix_fields(thd, &nodeset_func);
 }
 
 
@@ -2777,25 +2796,24 @@ int xml_leave(MY_XML_PARSER *st,const ch
   Parse raw XML
 
   SYNOPSYS
-    
 
   RETURN
-    Currently pointer to parsed XML on success
-    0 on parse error
+    false on success
+    true on error
 */
-String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
+bool Item_xml_str_func::XML::parse()
 {
   MY_XML_PARSER p;
   MY_XML_USER_DATA user_data;
   int rc;
 
-  parsed_xml_buf->length(0);
+  m_parsed_buf.length(0);
 
   /* Prepare XML parser */
   my_xml_parser_create(&p);
   p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION;
   user_data.level= 0;
-  user_data.pxml= parsed_xml_buf;
+  user_data.pxml= &m_parsed_buf;
   user_data.parent= 0;
   my_xml_set_enter_handler(&p, xml_enter);
   my_xml_set_value_handler(&p, xml_value);
@@ -2804,10 +2822,10 @@ String *Item_xml_str_func::parse_xml(Str
 
   /* Add root node */
   p.current_node_type= MY_XML_NODE_TAG;
-  xml_enter(&p, raw_xml->ptr(), 0);
+  xml_enter(&p, m_raw_ptr->ptr(), 0);
 
   /* Execute XML parser */
-  if ((rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length())) != MY_XML_OK)
+  if ((rc= my_xml_parse(&p, m_raw_ptr->ptr(), m_raw_ptr->length())) != MY_XML_OK)
   {
     char buf[128];
     my_snprintf(buf, sizeof(buf)-1, "parse error at line %d pos %lu: %s",
@@ -2817,10 +2835,41 @@ String *Item_xml_str_func::parse_xml(Str
     push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
                         ER_WRONG_VALUE,
                         ER(ER_WRONG_VALUE), "XML", buf);
+    m_raw_ptr= (String *) 0;
   }
   my_xml_parser_free(&p);
 
-  return rc == MY_XML_OK ? parsed_xml_buf : 0;
+  return rc != MY_XML_OK;
+}
+
+
+/*
+  Parse the raw XML from the given source,
+  optionally cache the raw XML,
+  remember the pointer to the raw XML.
+*/
+bool Item_xml_str_func::XML::parse(String *raw_xml, bool cache)
+{
+  m_raw_ptr= raw_xml;
+  if (cache)
+  {
+    m_cached= true;
+    if (m_raw_ptr != &m_raw_buf && m_raw_buf.copy(*m_raw_ptr))
+    {
+      m_raw_ptr= (String *) 0;
+      return true;
+    }
+    m_raw_ptr= &m_raw_buf;
+  }
+  return parse();
+}
+
+
+const MY_XML_NODE *Item_xml_str_func::XML::node(uint idx)
+{
+  const MY_XML_NODE *nodebeg= (MY_XML_NODE*) m_parsed_buf.ptr();
+  DBUG_ASSERT(idx < m_parsed_buf.length() / sizeof (MY_XML_NODE));
+  return nodebeg + idx;
 }
 
 
@@ -2828,10 +2877,8 @@ String *Item_func_xml_extractvalue::val_
 {
   String *res;
   null_value= 0;
-  if (!nodeset_func ||
-      !(res= args[0]->val_str(str)) || 
-      !parse_xml(res, &pxml) ||
-      !(res= nodeset_func->val_str(&tmp_value)))
+  if (!nodeset_func || get_xml(&xml) ||
+      !(res= nodeset_func->val_str(str)))
   {
     null_value= 1;
     return 0;
@@ -2840,22 +2887,37 @@ String *Item_func_xml_extractvalue::val_
 }
 
 
+bool Item_func_xml_update::collect_result(String *str,
+                                          const MY_XML_NODE *cut,
+                                          const String *replace)
+{
+  uint offs= cut->type == MY_XML_NODE_TAG ? 1 : 0;
+  const char *end= cut->tagend + offs;
+  str->length(0);
+  str->set_charset(collation.collation);
+  return
+    /* Put the XML part preceeding the replaced piece */
+    str->append(xml.raw()->ptr(), cut->beg - xml.raw()->ptr() - offs) ||
+    /* Put the replacement */
+    str->append(replace->ptr(), replace->length()) ||
+    /* Put the XML part following the replaced piece */
+    str->append(end, xml.raw()->ptr() + xml.raw()->length() - end);
+}
+
+
 String *Item_func_xml_update::val_str(String *str)
 {
-  String *res, *nodeset, *rep;
+  String *nodeset, *rep;
 
   null_value= 0;
-  if (!nodeset_func || 
-      !(res= args[0]->val_str(str)) ||
+  if (!nodeset_func || get_xml(&xml) ||
       !(rep= args[2]->val_str(&tmp_value3)) ||
-      !parse_xml(res, &pxml) ||
       !(nodeset= nodeset_func->val_nodeset(&tmp_value2)))
   {
     null_value= 1;
     return 0;
   }
 
-  MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml.ptr();
   MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) nodeset->ptr();
   MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (nodeset->ptr() + nodeset->length());
 
@@ -2863,10 +2925,10 @@ String *Item_func_xml_update::val_str(St
   if (fltend - fltbeg != 1)
   {
     /* TODO: perhaps add a warning that more than one tag selected */
-    return res;
+    return xml.raw();
   }
 
-  nodebeg+= fltbeg->num;
+  const MY_XML_NODE *nodebeg= xml.node(fltbeg->num);
 
   if (!nodebeg->level)
   {
@@ -2878,12 +2940,5 @@ String *Item_func_xml_update::val_str(St
     return rep;
   }
 
-  tmp_value.length(0);
-  tmp_value.set_charset(collation.collation);
-  uint offs= nodebeg->type == MY_XML_NODE_TAG ? 1 : 0;
-  tmp_value.append(res->ptr(), nodebeg->beg - res->ptr() - offs);
-  tmp_value.append(rep->ptr(), rep->length());
-  const char *end= nodebeg->tagend + offs;
-  tmp_value.append(end, res->ptr() + res->length() - end);
-  return &tmp_value;
+  return collect_result(str, nodebeg, rep) ? (String *) NULL : str;
 }

=== modified file 'sql/item_xmlfunc.h'
--- sql/item_xmlfunc.h	2013-11-28 21:35:59 +0000
+++ sql/item_xmlfunc.h	2014-02-20 13:45:36 +0000
@@ -26,11 +26,55 @@
 #endif
 
 
+typedef struct my_xml_node_st MY_XML_NODE;
+
+
 class Item_xml_str_func: public Item_str_func
 {
 protected:
-  String tmp_value, pxml;
+  /*
+    A helper class to store raw and parsed XML.
+  */
+  class XML
+  {
+    bool m_cached;
+    String *m_raw_ptr;   // Pointer to text representation
+    String m_raw_buf;    // Cached text representation
+    String m_parsed_buf; // Array of MY_XML_NODEs, pointing to raw_buffer
+    bool parse();
+    void reset()
+    {
+      m_cached= false;
+      m_raw_ptr= (String *) 0;
+    }
+  public:
+    XML() { reset(); }
+    void set_charset(CHARSET_INFO *cs) { m_parsed_buf.set_charset(cs); }
+    String *raw() { return m_raw_ptr; }
+    String *parsed() { return &m_parsed_buf; }
+    const MY_XML_NODE *node(uint idx);
+    bool cached() { return m_cached; }
+    bool parse(String *raw, bool cache);
+    bool parse(Item *item, bool cache)
+    {
+      String *res;
+      if (!(res= item->val_str(&m_raw_buf)))
+      {
+        m_raw_ptr= (String *) 0;
+        m_cached= cache;
+        return true;
+      }
+      return parse(res, cache);
+    }
+  };
   Item *nodeset_func;
+  XML xml;
+  bool get_xml(XML *xml, bool cache= false)
+  {
+    if (!cache && xml->cached())
+      return xml->raw() == 0;
+    return xml->parse(args[0], cache);
+  }
 public:
   Item_xml_str_func(Item *a, Item *b): 
     Item_str_func(a,b) 
@@ -42,8 +86,12 @@ class Item_xml_str_func: public Item_str
   {
     maybe_null= TRUE;
   }
+  bool fix_fields(THD *thd, Item **ref);
   void fix_length_and_dec();
-  String *parse_xml(String *raw_xml, String *parsed_xml_buf);
+  bool const_item() const
+  {
+    return const_item_cache && (!nodeset_func || nodeset_func->const_item());
+  }
   bool check_vcol_func_processor(uchar *int_arg) 
   {
     return trace_unsupported_by_check_vcol_func_processor(func_name());
@@ -63,6 +111,9 @@ class Item_func_xml_extractvalue: public
 class Item_func_xml_update: public Item_xml_str_func
 {
   String tmp_value2, tmp_value3;
+  bool collect_result(String *str,
+                      const MY_XML_NODE *cut,
+                      const String *replace);
 public:
   Item_func_xml_update(Item *a,Item *b,Item *c) :Item_xml_str_func(a,b,c) {}
   const char *func_name() const { return "updatexml"; }


Follow ups