← Back to team overview

maria-developers team mailing list archive

Review for: MDEV-17399 Add support for JSON_TABLE, part #9

 

Hi Alexey,

More input:

== Circular dependencies are allowed ==

This query will cause an assert in the optimizer, for obvious reasons:

select * 
from
  json_table(JS3.size, '$' columns (size INT PATH '$.size')) as JS1,
  json_table(JS1.size, '$' columns (size INT PATH '$.size')) as JS2,
  json_table(JS1.size, '$' columns (size INT PATH '$.size')) as JS3
where
 1;

== Character set inference is wrong ==

create table t20 (json varchar(100) character set utf8);
insert into t20 values ('{"value":"АБВ"}');
create table tj20 as 
select T.value
from t20, 
     json_table(t20.json, 
                '$' columns (value varchar(32) PATH '$.value')) T;

mysql> show create table tj20\G
*************************** 1. row ***************************
       Table: tj20
Create Table: CREATE TABLE `tj20` (
  `value` varchar(32) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1

The source document uses UTF-8 but the values from it are in latin1? 

== Comments and code cleanup ==

There are a lot of comments missing, some duplicate code, and code using
out-of-date ways of doing things.

Please find attached a patch with obvious fixes for most of it.

== Explain ==

Let's follow MySQL here:
let the tabular EXPLAIN show "Table function: json_table" in the Extra column,
let EXPLAIN FORMAT=JSON show "table_function": "json_table" in the "table"
element.

BR
 Sergei
-- 
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog


diff -u ../10.5-json-cp/sql_class.h sql/sql_class.h
--- ../10.5-json-cp/sql_class.h	2020-08-02 16:09:22.113625982 +0300
+++ sql/sql_class.h	2020-08-03 13:23:56.910397573 +0300
@@ -7436,5 +7436,9 @@
 
 extern THD_list server_threads;
 
+void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
+void
+setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count);
+
 #endif /* MYSQL_SERVER */
 #endif /* SQL_CLASS_INCLUDED */
diff -u ../10.5-json-cp/sql_select.h sql/sql_select.h
--- ../10.5-json-cp/sql_select.h	2020-08-02 16:09:22.121626229 +0300
+++ sql/sql_select.h	2020-08-03 13:23:58.622294391 +0300
@@ -2454,7 +2454,6 @@
                            TMP_ENGINE_COLUMNDEF **recinfo,
                            ulonglong options);
 bool open_tmp_table(TABLE *table);
-void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
 double prev_record_reads(const POSITION *positions, uint idx, table_map found_ref);
 void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist);
 double get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size);
diff -u ../10.5-json-cp/table_function.cc sql/table_function.cc
--- ../10.5-json-cp/table_function.cc	2020-08-02 16:09:22.145626969 +0300
+++ sql/table_function.cc	2020-08-03 14:55:49.167668973 +0300
@@ -40,6 +40,10 @@
 static table_function_handlerton table_function_hton;
 
 
+/*
+   A table that produces output rows for JSON_TABLE().
+*/
+
 class ha_json_table: public handler
 {
 protected:
@@ -133,6 +137,14 @@
 };
 
 
+/*
+  @brief
+    Start scanning the JSON document in [str ... end]
+
+  @detail
+    Note: non-root nested paths are set to scan one JSON node (that is, a
+    "subdocument")
+*/
 void Json_table_nested_path::scan_start(CHARSET_INFO *i_cs,
                                         const uchar *str, const uchar *end)
 {
@@ -144,6 +156,11 @@
 }
 
 
+/*
+  @brief
+    Find the next JSON element that matches the search path.
+*/
+
 int Json_table_nested_path::scan_next()
 {
   if (m_cur_nested)
@@ -279,6 +296,10 @@
     return HA_ERR_END_OF_FILE;
 
   m_jt->m_nested_path.get_current_position((uchar *) m_js->ptr(), m_cur_pos);
+
+  /*
+    Move the primary nested path object to point to the data for the next row.
+  */
   if (m_jt->m_nested_path.scan_next())
   {
     if (m_jt->m_nested_path.m_engine.s.error)
@@ -295,7 +316,10 @@
     }
     return HA_ERR_END_OF_FILE;
   }
-  
+ 
+  /*
+    Get the values for each field of the table
+  */
   List_iterator_fast<Json_table_column> jc_i(m_jt->m_columns);
   my_ptrdiff_t ptrdiff= buf - table->record[0];
   while ((jc= jc_i++))
@@ -305,72 +329,66 @@
 
     if (ptrdiff)
       (*f)->move_field_offset(ptrdiff);
-    switch (jc->m_column_type)
-    {
-    case Json_table_column::FOR_ORDINALITY:
-      if (jc->m_nest->m_null)
-        (*f)->set_null();
-      else
-      {
-        (*f)->set_notnull();
-        (*f)->store(jc->m_nest->m_ordinality_counter, TRUE);
-      }
-      break;
-    case Json_table_column::PATH:
-    case Json_table_column::EXISTS_PATH:
+
+    if (jc->m_nest->m_null)
+      (*f)->set_null();
+    else
     {
-      json_engine_t je;
-      json_engine_t &nest_je= jc->m_nest->m_engine;
-      json_path_step_t *cur_step;
-      uint array_counters[JSON_DEPTH_LIMIT];
-      int not_found;
+      (*f)->set_notnull();
 
-      if (jc->m_nest->m_null)
+      switch (jc->m_column_type)
       {
-        (*f)->set_null();
+      case Json_table_column::FOR_ORDINALITY:
+        (*f)->store(jc->m_nest->m_ordinality_counter, TRUE);
         break;
-      }
-      json_scan_start(&je, nest_je.s.cs,
-                      nest_je.value_begin, nest_je.s.str_end);
-
-      cur_step= jc->m_path.steps;
-      not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) ||
-                 json_read_value(&je);
-
-      if (jc->m_column_type == Json_table_column::EXISTS_PATH)
+      case Json_table_column::PATH:
+      case Json_table_column::EXISTS_PATH:
       {
-        (*f)->set_notnull();
-        (*f)->store(!not_found);
-      }
-      else /*PATH*/
-      {
-        if (not_found)
-          jc->m_on_empty.respond(jc, *f);
-        else
+        json_engine_t je;
+        json_engine_t &nest_je= jc->m_nest->m_engine;
+        json_path_step_t *cur_step;
+        uint array_counters[JSON_DEPTH_LIMIT];
+        int not_found;
+
+        json_scan_start(&je, nest_je.s.cs,
+                        nest_je.value_begin, nest_je.s.str_end);
+
+        cur_step= jc->m_path.steps;
+        not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) ||
+                   json_read_value(&je);
+
+        if (jc->m_column_type == Json_table_column::EXISTS_PATH)
         {
-          (*f)->set_notnull();
-          if (!json_value_scalar(&je) ||
-              (*f)->store((const char *) je.value,
-                          (uint32) je.value_len, je.s.cs))
-            jc->m_on_error.respond(jc, *f);
+          (*f)->store(!not_found);
+        }
+        else /*PATH*/
+        {
+          if (not_found)
+            jc->m_on_empty.respond(jc, *f);
           else
           {
-            /*
-              If the path contains wildcards, check if there are
-              more matches for it in json and report an error if so.
-            */
-            if (jc->m_path.types_used &
-                  (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD) &&
-                (json_scan_next(&je) ||
-                 !json_find_path(&je, &jc->m_path, &cur_step, array_counters)))
+            if (!json_value_scalar(&je) ||
+                (*f)->store((const char *) je.value,
+                            (uint32) je.value_len, je.s.cs))
               jc->m_on_error.respond(jc, *f);
+            else
+            {
+              /*
+                If the path contains wildcards, check if there are
+                more matches for it in json and report an error if so.
+              */
+              if (jc->m_path.types_used &
+                    (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD) &&
+                  (json_scan_next(&je) ||
+                   !json_find_path(&je, &jc->m_path, &cur_step, array_counters)))
+                jc->m_on_error.respond(jc, *f);
+            }
           }
-
         }
+        break;
       }
-      break;
+      };
     }
-    };
     if (ptrdiff)
       (*f)->move_field_offset(-ptrdiff);
 cont_loop:
@@ -407,35 +425,6 @@
 }
 
 
-static void
-setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count)
-{
-  uint bitmap_size= bitmap_buffer_size(field_count);
-
-  DBUG_ASSERT(table->s->virtual_fields == 0);
-
-  my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count,
-              FALSE);
-  bitmaps+= bitmap_size;
-  my_bitmap_init(&table->tmp_set,
-                 (my_bitmap_map*) bitmaps, field_count, FALSE);
-  bitmaps+= bitmap_size;
-  my_bitmap_init(&table->eq_join_set,
-                 (my_bitmap_map*) bitmaps, field_count, FALSE);
-  bitmaps+= bitmap_size;
-  my_bitmap_init(&table->cond_set,
-                 (my_bitmap_map*) bitmaps, field_count, FALSE);
-  bitmaps+= bitmap_size;
-  my_bitmap_init(&table->has_value_set,
-                 (my_bitmap_map*) bitmaps, field_count, FALSE);
-  /* write_set and all_set are copies of read_set */
-  table->def_write_set= table->def_read_set;
-  table->s->all_set= table->def_read_set;
-  bitmap_set_all(&table->s->all_set);
-  table->default_column_bitmaps();
-}
-
-
 void Create_json_table::add_field(TABLE *table, Field *field,
                                   uint fieldnr, bool force_not_null_cols)
 {
@@ -721,6 +710,12 @@
 }
 
 
+/*
+  @brief
+    Read the JSON_TABLE's field definitions from @jt and add the fields to
+    table @table.
+*/
+
 bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table,
                                               Table_function_json_table *jt)
 {
@@ -794,6 +789,22 @@
 }
 
 
+/*
+  @brief
+    Given a TABLE_LIST representing JSON_TABLE(...) syntax, create a temporary
+    table for it.
+
+  @detail
+    The temporary table will have:
+    - fields whose names/datatypes are specified in JSON_TABLE(...) syntax
+    - a ha_json_table as the storage engine.
+
+    The uses of the temporary table are:
+    - name resolution: the query may have references to the columns of
+      JSON_TABLE(...). A TABLE object will allow to resolve them.
+    - query execution: ha_json_table will produce JSON_TABLE's rows.
+*/
+
 TABLE *create_table_for_function(THD *thd, TABLE_LIST *sql_table)
 {
   TMP_TABLE_PARAM tp;
@@ -878,10 +889,7 @@
 */
 int Json_table_column::print(THD *thd, Field **f, String *str)
 {
-  char column_type_buff[MAX_FIELD_WIDTH];
-  String column_type(column_type_buff, sizeof(column_type_buff),
-                     str->charset());
-
+  StringBuffer<MAX_FIELD_WIDTH> column_type(str->charset());
   if (append_identifier(thd, str, &m_field->field_name) ||
       str->append(' '))
     return 1;
@@ -923,14 +931,20 @@
 
   /*
     This is done so the ::print function can just print the path string.
-    Can be removed if we redo that function to print the path using it's
-    anctual content. Not sure though if we should.
+    Can be removed if we redo that function to print the path using its
+    actual content. Not sure though if we should.
   */
   m_path.s.c_str= (const uchar *) path.str;
   return 0;
 }
 
 
+/*
+  @brief 
+    Perform the action of this response on field @f (emit an error, or set @f
+    to NULL, or set it to default value).
+*/
+
 void Json_table_column::On_response::respond(Json_table_column *jc, Field *f)
 {
   switch (m_response)
@@ -1003,6 +1017,21 @@
 }
 
 
+/*
+  @brief
+    Perform name-resolution phase tasks
+
+  @detail
+    - The only argument that needs resolution is the JSON text
+    - Then, we need to set dependencies: if JSON_TABLE refers to table's
+      column, e.g.
+
+         JSON_TABLE (t1.col ... ) AS t2
+
+      then it can be computed only after table t1.
+    - The dependencies must not form a loop.
+*/
+
 int Table_function_json_table::setup(THD *thd, TABLE_LIST *sql_table)
 {
   thd->where= "JSON_TABLE argument";
diff -u ../10.5-json-cp/table_function.h sql/table_function.h
--- ../10.5-json-cp/table_function.h	2020-08-02 16:09:22.121626229 +0300
+++ sql/table_function.h	2020-08-03 14:59:42.643398344 +0300
@@ -19,11 +19,13 @@
 
 #include <json_lib.h>
 
+class Json_table_column;
+
 /*
   The Json_table_nested_path represents the 'current nesting' level
   for a set of JSON_TABLE columns.
   Each column (Json_table_column instance) is linked with corresponding
-  'nested path' object and gets it's piece of JSON to parse during the computation
+  'nested path' object and gets its piece of JSON to parse during the computation
   phase.
   The root 'nested_path' is always present as a part of Table_function_json_table,
   then other 'nested_paths' can be created and linked into a tree structure when new
@@ -46,13 +48,11 @@
   m_nest    &root      &nested_b    &nested_c    &nested_n
 */
 
-
-class Json_table_column;
-
 class Json_table_nested_path : public Sql_alloc
 {
 public:
-  bool m_null;
+  bool m_null; // TRUE <=> produce SQL NULL.
+
   json_path_t m_path;
   json_engine_t m_engine;
   json_path_t m_cur_path;