maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #12344
Review for: MDEV-17399 Add support for JSON_TABLE, part #9
Hi Alexey,
More input:
== Circular dependencies are allowed ==
This query will cause an assert in the optimizer, for obvious reasons:
select *
from
json_table(JS3.size, '$' columns (size INT PATH '$.size')) as JS1,
json_table(JS1.size, '$' columns (size INT PATH '$.size')) as JS2,
json_table(JS1.size, '$' columns (size INT PATH '$.size')) as JS3
where
1;
== Character set inference is wrong ==
create table t20 (json varchar(100) character set utf8);
insert into t20 values ('{"value":"АБВ"}');
create table tj20 as
select T.value
from t20,
json_table(t20.json,
'$' columns (value varchar(32) PATH '$.value')) T;
mysql> show create table tj20\G
*************************** 1. row ***************************
Table: tj20
Create Table: CREATE TABLE `tj20` (
`value` varchar(32) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1
The source document uses UTF-8 but the values from it are in latin1?
== Comments and code cleanup ==
There are a lot of comments missing, some duplicate code, and code using
out-of-date ways of doing things.
Please find attached a patch with obvious fixes for most of it.
== Explain ==
Let's follow MySQL here:
let the tabular EXPLAIN show "Table function: json_table" in the Extra column,
let EXPLAIN FORMAT=JSON show "table_function": "json_table" in the "table"
element.
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
diff -u ../10.5-json-cp/sql_class.h sql/sql_class.h
--- ../10.5-json-cp/sql_class.h 2020-08-02 16:09:22.113625982 +0300
+++ sql/sql_class.h 2020-08-03 13:23:56.910397573 +0300
@@ -7436,5 +7436,9 @@
extern THD_list server_threads;
+void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
+void
+setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count);
+
#endif /* MYSQL_SERVER */
#endif /* SQL_CLASS_INCLUDED */
diff -u ../10.5-json-cp/sql_select.h sql/sql_select.h
--- ../10.5-json-cp/sql_select.h 2020-08-02 16:09:22.121626229 +0300
+++ sql/sql_select.h 2020-08-03 13:23:58.622294391 +0300
@@ -2454,7 +2454,6 @@
TMP_ENGINE_COLUMNDEF **recinfo,
ulonglong options);
bool open_tmp_table(TABLE *table);
-void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
double prev_record_reads(const POSITION *positions, uint idx, table_map found_ref);
void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist);
double get_tmp_table_lookup_cost(THD *thd, double row_count, uint row_size);
diff -u ../10.5-json-cp/table_function.cc sql/table_function.cc
--- ../10.5-json-cp/table_function.cc 2020-08-02 16:09:22.145626969 +0300
+++ sql/table_function.cc 2020-08-03 14:55:49.167668973 +0300
@@ -40,6 +40,10 @@
static table_function_handlerton table_function_hton;
+/*
+ A table that produces output rows for JSON_TABLE().
+*/
+
class ha_json_table: public handler
{
protected:
@@ -133,6 +137,14 @@
};
+/*
+ @brief
+ Start scanning the JSON document in [str ... end]
+
+ @detail
+ Note: non-root nested paths are set to scan one JSON node (that is, a
+ "subdocument")
+*/
void Json_table_nested_path::scan_start(CHARSET_INFO *i_cs,
const uchar *str, const uchar *end)
{
@@ -144,6 +156,11 @@
}
+/*
+ @brief
+ Find the next JSON element that matches the search path.
+*/
+
int Json_table_nested_path::scan_next()
{
if (m_cur_nested)
@@ -279,6 +296,10 @@
return HA_ERR_END_OF_FILE;
m_jt->m_nested_path.get_current_position((uchar *) m_js->ptr(), m_cur_pos);
+
+ /*
+ Move the primary nested path object to point to the data for the next row.
+ */
if (m_jt->m_nested_path.scan_next())
{
if (m_jt->m_nested_path.m_engine.s.error)
@@ -295,7 +316,10 @@
}
return HA_ERR_END_OF_FILE;
}
-
+
+ /*
+ Get the values for each field of the table
+ */
List_iterator_fast<Json_table_column> jc_i(m_jt->m_columns);
my_ptrdiff_t ptrdiff= buf - table->record[0];
while ((jc= jc_i++))
@@ -305,72 +329,66 @@
if (ptrdiff)
(*f)->move_field_offset(ptrdiff);
- switch (jc->m_column_type)
- {
- case Json_table_column::FOR_ORDINALITY:
- if (jc->m_nest->m_null)
- (*f)->set_null();
- else
- {
- (*f)->set_notnull();
- (*f)->store(jc->m_nest->m_ordinality_counter, TRUE);
- }
- break;
- case Json_table_column::PATH:
- case Json_table_column::EXISTS_PATH:
+
+ if (jc->m_nest->m_null)
+ (*f)->set_null();
+ else
{
- json_engine_t je;
- json_engine_t &nest_je= jc->m_nest->m_engine;
- json_path_step_t *cur_step;
- uint array_counters[JSON_DEPTH_LIMIT];
- int not_found;
+ (*f)->set_notnull();
- if (jc->m_nest->m_null)
+ switch (jc->m_column_type)
{
- (*f)->set_null();
+ case Json_table_column::FOR_ORDINALITY:
+ (*f)->store(jc->m_nest->m_ordinality_counter, TRUE);
break;
- }
- json_scan_start(&je, nest_je.s.cs,
- nest_je.value_begin, nest_je.s.str_end);
-
- cur_step= jc->m_path.steps;
- not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) ||
- json_read_value(&je);
-
- if (jc->m_column_type == Json_table_column::EXISTS_PATH)
+ case Json_table_column::PATH:
+ case Json_table_column::EXISTS_PATH:
{
- (*f)->set_notnull();
- (*f)->store(!not_found);
- }
- else /*PATH*/
- {
- if (not_found)
- jc->m_on_empty.respond(jc, *f);
- else
+ json_engine_t je;
+ json_engine_t &nest_je= jc->m_nest->m_engine;
+ json_path_step_t *cur_step;
+ uint array_counters[JSON_DEPTH_LIMIT];
+ int not_found;
+
+ json_scan_start(&je, nest_je.s.cs,
+ nest_je.value_begin, nest_je.s.str_end);
+
+ cur_step= jc->m_path.steps;
+ not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) ||
+ json_read_value(&je);
+
+ if (jc->m_column_type == Json_table_column::EXISTS_PATH)
{
- (*f)->set_notnull();
- if (!json_value_scalar(&je) ||
- (*f)->store((const char *) je.value,
- (uint32) je.value_len, je.s.cs))
- jc->m_on_error.respond(jc, *f);
+ (*f)->store(!not_found);
+ }
+ else /*PATH*/
+ {
+ if (not_found)
+ jc->m_on_empty.respond(jc, *f);
else
{
- /*
- If the path contains wildcards, check if there are
- more matches for it in json and report an error if so.
- */
- if (jc->m_path.types_used &
- (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD) &&
- (json_scan_next(&je) ||
- !json_find_path(&je, &jc->m_path, &cur_step, array_counters)))
+ if (!json_value_scalar(&je) ||
+ (*f)->store((const char *) je.value,
+ (uint32) je.value_len, je.s.cs))
jc->m_on_error.respond(jc, *f);
+ else
+ {
+ /*
+ If the path contains wildcards, check if there are
+ more matches for it in json and report an error if so.
+ */
+ if (jc->m_path.types_used &
+ (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD) &&
+ (json_scan_next(&je) ||
+ !json_find_path(&je, &jc->m_path, &cur_step, array_counters)))
+ jc->m_on_error.respond(jc, *f);
+ }
}
-
}
+ break;
}
- break;
+ };
}
- };
if (ptrdiff)
(*f)->move_field_offset(-ptrdiff);
cont_loop:
@@ -407,35 +425,6 @@
}
-static void
-setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count)
-{
- uint bitmap_size= bitmap_buffer_size(field_count);
-
- DBUG_ASSERT(table->s->virtual_fields == 0);
-
- my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count,
- FALSE);
- bitmaps+= bitmap_size;
- my_bitmap_init(&table->tmp_set,
- (my_bitmap_map*) bitmaps, field_count, FALSE);
- bitmaps+= bitmap_size;
- my_bitmap_init(&table->eq_join_set,
- (my_bitmap_map*) bitmaps, field_count, FALSE);
- bitmaps+= bitmap_size;
- my_bitmap_init(&table->cond_set,
- (my_bitmap_map*) bitmaps, field_count, FALSE);
- bitmaps+= bitmap_size;
- my_bitmap_init(&table->has_value_set,
- (my_bitmap_map*) bitmaps, field_count, FALSE);
- /* write_set and all_set are copies of read_set */
- table->def_write_set= table->def_read_set;
- table->s->all_set= table->def_read_set;
- bitmap_set_all(&table->s->all_set);
- table->default_column_bitmaps();
-}
-
-
void Create_json_table::add_field(TABLE *table, Field *field,
uint fieldnr, bool force_not_null_cols)
{
@@ -721,6 +710,12 @@
}
+/*
+ @brief
+ Read the JSON_TABLE's field definitions from @jt and add the fields to
+ table @table.
+*/
+
bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table,
Table_function_json_table *jt)
{
@@ -794,6 +789,22 @@
}
+/*
+ @brief
+ Given a TABLE_LIST representing JSON_TABLE(...) syntax, create a temporary
+ table for it.
+
+ @detail
+ The temporary table will have:
+ - fields whose names/datatypes are specified in JSON_TABLE(...) syntax
+ - a ha_json_table as the storage engine.
+
+ The uses of the temporary table are:
+ - name resolution: the query may have references to the columns of
+ JSON_TABLE(...). A TABLE object will allow to resolve them.
+ - query execution: ha_json_table will produce JSON_TABLE's rows.
+*/
+
TABLE *create_table_for_function(THD *thd, TABLE_LIST *sql_table)
{
TMP_TABLE_PARAM tp;
@@ -878,10 +889,7 @@
*/
int Json_table_column::print(THD *thd, Field **f, String *str)
{
- char column_type_buff[MAX_FIELD_WIDTH];
- String column_type(column_type_buff, sizeof(column_type_buff),
- str->charset());
-
+ StringBuffer<MAX_FIELD_WIDTH> column_type(str->charset());
if (append_identifier(thd, str, &m_field->field_name) ||
str->append(' '))
return 1;
@@ -923,14 +931,20 @@
/*
This is done so the ::print function can just print the path string.
- Can be removed if we redo that function to print the path using it's
- anctual content. Not sure though if we should.
+ Can be removed if we redo that function to print the path using its
+ actual content. Not sure though if we should.
*/
m_path.s.c_str= (const uchar *) path.str;
return 0;
}
+/*
+ @brief
+ Perform the action of this response on field @f (emit an error, or set @f
+ to NULL, or set it to default value).
+*/
+
void Json_table_column::On_response::respond(Json_table_column *jc, Field *f)
{
switch (m_response)
@@ -1003,6 +1017,21 @@
}
+/*
+ @brief
+ Perform name-resolution phase tasks
+
+ @detail
+ - The only argument that needs resolution is the JSON text
+ - Then, we need to set dependencies: if JSON_TABLE refers to table's
+ column, e.g.
+
+ JSON_TABLE (t1.col ... ) AS t2
+
+ then it can be computed only after table t1.
+ - The dependencies must not form a loop.
+*/
+
int Table_function_json_table::setup(THD *thd, TABLE_LIST *sql_table)
{
thd->where= "JSON_TABLE argument";
diff -u ../10.5-json-cp/table_function.h sql/table_function.h
--- ../10.5-json-cp/table_function.h 2020-08-02 16:09:22.121626229 +0300
+++ sql/table_function.h 2020-08-03 14:59:42.643398344 +0300
@@ -19,11 +19,13 @@
#include <json_lib.h>
+class Json_table_column;
+
/*
The Json_table_nested_path represents the 'current nesting' level
for a set of JSON_TABLE columns.
Each column (Json_table_column instance) is linked with corresponding
- 'nested path' object and gets it's piece of JSON to parse during the computation
+ 'nested path' object and gets its piece of JSON to parse during the computation
phase.
The root 'nested_path' is always present as a part of Table_function_json_table,
then other 'nested_paths' can be created and linked into a tree structure when new
@@ -46,13 +48,11 @@
m_nest &root &nested_b &nested_c &nested_n
*/
-
-class Json_table_column;
-
class Json_table_nested_path : public Sql_alloc
{
public:
- bool m_null;
+ bool m_null; // TRUE <=> produce SQL NULL.
+
json_path_t m_path;
json_engine_t m_engine;
json_path_t m_cur_path;