maria-developers team mailing list archive
-
maria-developers team
-
Mailing list archive
-
Message #02961
Rev 2842: A test implementation of having SQL layer to supply the storage engine with in file:///home/psergey/dev/maria-5.1-minmax/
At file:///home/psergey/dev/maria-5.1-minmax/
------------------------------------------------------------
revno: 2842
revision-id: psergey@xxxxxxxxxxxx-20100421141933-707cy6l3iha89lm5
parent: monty@xxxxxxxxxxxx-20100406224708-i62wvtw3nl14fhv4
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.1-minmax
timestamp: Wed 2010-04-21 10:19:33 -0400
message:
A test implementation of having SQL layer to supply the storage engine with
interval lists for each unindexed column.
This patch consists of
- SQL layer providing service of taking a condition and extracting
list<column, disjoint_ordered_list<interval>> from it (see opt_field_minmax.h)
- An storage engine's "Implementation" that prints passed intervals to
server stderr.
Things need to be done:
- Comments, better names
- Make the interface compatible with Query Fragment Pushdown ideas.
- Invent something that would allow to have test coverage for this.
=== added file 'sql/opt_field_minmax.h'
--- a/sql/opt_field_minmax.h 1970-01-01 00:00:00 +0000
+++ b/sql/opt_field_minmax.h 2010-04-21 14:19:33 +0000
@@ -0,0 +1,35 @@
+//#include "../sql/opt_field_minmax.h"
+
+class RANGE_OPT_PARAM;
+class SEL_TREE;
+class SEL_ARG;
+
+typedef bool (*take_int_return_bool)(void* param, uint index);
+
+class Condition_analyzer
+{
+ TABLE *table;
+ MEM_ROOT alloc;
+ RANGE_OPT_PARAM *range_par;
+ SEL_TREE *tree;
+ my_bitmap_map *old_sets[2];
+ uchar min_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+ uchar max_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+
+ uint n_keys;
+
+ int cur_key;
+ SEL_ARG *cur_interval;
+public:
+ Condition_analyzer(TABLE *table_arg) :
+ table(table_arg), range_par(NULL), cur_key(-1) {}
+
+ bool analyze(Item *cond, take_int_return_bool field_filter_func,
+ void *field_filter_param);
+
+ bool start_intervals_walk_for_field(uint *fieldno);
+ bool get_next_interval(KEY_MULTI_RANGE *mrange);
+
+ void dispose();
+};
+
=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc 2010-01-15 15:27:55 +0000
+++ b/sql/opt_range.cc 2010-04-21 14:19:33 +0000
@@ -2788,6 +2788,248 @@
DBUG_RETURN(retval);
}
+// psergey-minmax: start
+
+#include "opt_field_minmax.h"
+
+/*
+ Create one-column pseudo-indexes for each column.
+*/
+
+static
+bool create_minmax_indexes_descriptions(RANGE_OPT_PARAM *range_par,
+ take_int_return_bool field_filter_func,
+ void *func_param,
+ uint *n_keys)
+{
+ TABLE *table= range_par->table;
+ MEM_ROOT *alloc= range_par->mem_root;
+ *n_keys= 0;
+ uint fieldno;
+ for (fieldno= 0; fieldno < range_par->table->s->fields; fieldno++)
+ {
+ enum_field_types type= table->field[fieldno]->type();
+ if (!(type == MYSQL_TYPE_TINY_BLOB ||
+ type == MYSQL_TYPE_MEDIUM_BLOB ||
+ type == MYSQL_TYPE_LONG_BLOB ||
+ type == MYSQL_TYPE_BLOB ||
+ type == MYSQL_TYPE_GEOMETRY) &&
+ field_filter_func(func_param, fieldno) &&
+ *n_keys < MAX_INDEXES)
+ {
+ (*n_keys)++;
+ }
+ }
+
+ if (!*n_keys)
+ return TRUE;
+
+ KEY_PART *key_part;
+ key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* (*n_keys));
+ range_par->key_parts= key_part;
+ range_par->key_parts_end= key_part + *n_keys;
+
+ uint idx= 0;
+ for (fieldno= 0; fieldno < range_par->table->s->fields; fieldno++)
+ {
+ enum_field_types type= table->field[fieldno]->type();
+ if (!(type == MYSQL_TYPE_TINY_BLOB ||
+ type == MYSQL_TYPE_MEDIUM_BLOB ||
+ type == MYSQL_TYPE_LONG_BLOB ||
+ type == MYSQL_TYPE_BLOB ||
+ type == MYSQL_TYPE_GEOMETRY) &&
+ field_filter_func(func_param, fieldno) &&
+ idx < MAX_INDEXES)
+ {
+ idx++;
+ Field **field= table->field + fieldno;
+ key_part->key= 0;
+ key_part->part= 0;
+ key_part->store_length= key_part->length= (uint16) (*field)->key_length();
+ if ((*field)->real_maybe_null())
+ key_part->store_length+= HA_KEY_NULL_LENGTH;
+ if ((*field)->type() == MYSQL_TYPE_BLOB ||
+ (*field)->real_type() == MYSQL_TYPE_VARCHAR)
+ key_part->store_length+= HA_KEY_BLOB_LENGTH;
+
+ key_part->field= (*field);
+ key_part->image_type = Field::itRAW;
+ /*
+ We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
+ in the RangeAnalysisModule.
+ */
+ key_part->flag= 0;
+ /* We don't set key_parts->null_bit as it will not be used */
+
+ key_part++;
+ }
+ }
+ return FALSE;
+}
+
+
+
+
+bool Condition_analyzer::analyze(Item *cond,
+ take_int_return_bool func,
+ void *func_param)
+{
+ bool retval= FALSE;
+ THD *thd= current_thd;
+ uint i;
+ DBUG_ENTER("find_min_max_bounds");
+
+ init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
+ range_par= (RANGE_OPT_PARAM*)alloc_root(&alloc, sizeof(RANGE_OPT_PARAM));
+ range_par->mem_root= &alloc;
+ range_par->old_root= thd->mem_root;
+ range_par->thd= thd;
+ range_par->table= table;
+ /* range_par->cond doesn't need initialization */
+ range_par->prev_tables= range_par->read_tables= 0;
+ range_par->current_table= table->map;
+
+ if (create_minmax_indexes_descriptions(range_par, func, func_param, &n_keys))
+ {
+ free_root(&alloc,MYF(0)); // Return memory & allocator
+ DBUG_RETURN(FALSE);
+ }
+ range_par->keys= n_keys;
+
+ dbug_tmp_use_all_columns(table, old_sets,
+ table->read_set, table->write_set);
+ range_par->using_real_indexes= FALSE;
+ range_par->remove_jump_scans= TRUE;
+ for (i=0; i < n_keys; i++)
+ range_par->real_keynr[i]= i;
+ range_par->alloced_sel_args= 0;
+
+
+ thd->no_errors=1; // Don't warn about NULL
+ thd->mem_root=&alloc;
+
+ tree= get_mm_tree(range_par, cond);
+ if (!tree)
+ {
+ retval= TRUE;
+ goto end;
+ }
+
+ if (tree->type == SEL_TREE::IMPOSSIBLE)
+ {
+ retval= TRUE;
+ goto end;
+ }
+
+ if ((tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) ||
+ !tree->merges.is_empty())
+ {
+ retval= TRUE;
+ goto end;
+ }
+
+end:
+ DBUG_RETURN(retval);
+}
+
+
+bool Condition_analyzer::start_intervals_walk_for_field(uint *fieldno)
+{
+ if (!tree)
+ return FALSE;
+ for (cur_key++; cur_key < (int)n_keys; cur_key++)
+ {
+ if (tree->keys[cur_key])
+ {
+ *fieldno= range_par->key_parts[cur_key].field->field_index;
+ cur_interval= tree->keys[0]->first();
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+bool Condition_analyzer::get_next_interval(KEY_MULTI_RANGE *mrange)
+{
+ uchar *key_ptr;
+ key_range *min_range= &mrange->start_key;
+ key_range *max_range= &mrange->end_key;
+
+ if (!cur_interval)
+ return FALSE;
+
+// uchar min_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+// uchar max_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+ min_range->key= this->min_val;
+ max_range->key= this->max_val;
+
+ key_ptr= (uchar*)min_range->key;
+ cur_interval->store_min(range_par->key_parts[cur_key].store_length,
+ &key_ptr, /*cur_interval->min_flag*/ 0);
+ min_range->keypart_map= key_part_map(1);
+ min_range->length= key_ptr - min_range->key;
+ min_range->flag= (cur_interval->min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
+ HA_READ_KEY_EXACT);
+
+ key_ptr= (uchar*)max_range->key;
+ cur_interval->store_max(range_par->key_parts[cur_key].store_length,
+ &key_ptr, /* cur_interval->max_flag */ 0);
+ max_range->keypart_map= key_part_map(1);
+ max_range->length= key_ptr - max_range->key;
+ max_range->flag= (cur_interval->max_flag & NEAR_MAX ?
+ HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
+
+ mrange->range_flag= cur_interval->min_flag | cur_interval->max_flag;
+
+ cur_interval= cur_interval->next;
+ return TRUE;
+}
+
+
+void Condition_analyzer::dispose()
+{
+ THD *thd= current_thd;
+ dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
+ thd->no_errors=0;
+ thd->mem_root= range_par->old_root;
+ free_root(&alloc,MYF(0)); // Return memory & allocator
+}
+
+
+#if 0
+static
+int search_min_max_bounds(RANGE_OPT_PARAM *ppar, SEL_ARG *key_tree)
+{
+ int res, left_res=0, right_res=0;
+
+ if (key_tree->left != &null_element)
+ {
+ if (-1 == (left_res= search_min_max_bounds(ppar,key_tree->left)))
+ return -1;
+ }
+
+ if (key_tree->type == SEL_ARG::KEY_RANGE)
+ {
+ /*
+ Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and
+ we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change
+ */
+ DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
+ ppar->key_parts););
+ // key_tree->min_value, key_tree->max_value, key_tree->min_flag | key_tree->max_flag,
+ }
+
+ if (key_tree->right != &null_element)
+ {
+ if (-1 == (right_res= search_min_max_bounds(ppar,key_tree->right)))
+ return -1;
+ }
+ return (left_res || right_res || res);
+}
+#endif
+// psergey-minmax: end;
+
/*
Store field key image to table record
=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc 2010-03-04 08:03:07 +0000
+++ b/sql/sql_parse.cc 2010-04-21 14:19:33 +0000
@@ -1237,6 +1237,7 @@
general_log_write(thd, command, thd->query(), thd->query_length());
DBUG_PRINT("query",("%-.4096s",thd->query()));
+ fprintf(stderr, "query: %-.4096s\n",thd->query());
#if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
thd->profiling.set_query_source(thd->query(), thd->query_length());
#endif
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-03-10 09:12:23 +0000
+++ b/sql/sql_select.cc 2010-04-21 14:19:33 +0000
@@ -6643,6 +6643,8 @@
}
}
+//psergey-minmax:
+bool find_min_max_bounds(THD *thd, TABLE *table, Item *pprune_cond);
static void
make_join_readinfo(JOIN *join, ulonglong options)
@@ -6681,6 +6683,7 @@
table->status=STATUS_NO_RECORD;
pick_table_access_method (tab);
+ //find_min_max_bounds(join->thd, table, tab->select_cond);
switch (tab->type) {
case JT_EQ_REF:
tab->read_record.unlock_row= join_read_key_unlock_row;
=== modified file 'storage/myisam/ha_myisam.cc'
--- a/storage/myisam/ha_myisam.cc 2009-12-03 11:34:11 +0000
+++ b/storage/myisam/ha_myisam.cc 2010-04-21 14:19:33 +0000
@@ -2269,3 +2269,88 @@
DBUG_RETURN(TRUE);
}
#endif
+
+static bool do_min_max_for_index(void* param, uint index)
+{
+ /* param is what was to Condition_analyzer::analyze */
+ return TRUE;
+}
+
+#include "../sql/opt_field_minmax.h"
+
+void store_key_image_to_rec(Field *field, uchar *ptr, uint len);
+static void print_segment_range(Field *field, KEY_MULTI_RANGE *mrange);
+
+static void print_field(Field *field)
+{
+ if (field->is_real_null())
+ fprintf(stderr, "NULL");
+ else
+ {
+ char buf[256];
+ String str(buf, sizeof(buf), &my_charset_bin);
+ str.length(0);
+ String *pstr;
+ pstr= field->val_str(&str);
+ fprintf(stderr, "'%s'", pstr->c_ptr_safe());
+ }
+}
+
+/* Print a "c1 < keypartX < c2" - type interval into debug trace. */
+static void print_segment_range(Field *field, KEY_MULTI_RANGE *mrange)
+{
+ if (!(mrange->range_flag & NO_MIN_RANGE))
+ {
+ //DBUG_ASSERT(field->key_length() == mrange->start_key.length);
+ store_key_image_to_rec(field, (uchar*)mrange->start_key.key, mrange->start_key.length);
+ print_field(field);
+ if (mrange->range_flag & NEAR_MIN)
+ fputs(" < ", stderr);
+ else
+ fputs(" <= ", stderr);
+ }
+
+ fprintf(stderr, "%s", field->field_name);
+
+ if (!(mrange->range_flag & NO_MAX_RANGE))
+ {
+ if (mrange->range_flag & NEAR_MAX)
+ fputs(" < ", stderr);
+ else
+ fputs(" <= ", stderr);
+ //DBUG_ASSERT(field->key_length() == mrange->end_key.length);
+ store_key_image_to_rec(field, (uchar*)mrange->end_key.key, mrange->end_key.length);
+ print_field(field);
+ }
+ fputs("\n", stderr);
+}
+
+//931 202 76 04 natalya
+
+
+
+const COND * ha_myisam::cond_push(const COND *cond)
+{
+ fprintf(stderr, "table %s: set min-max bounds { \n", table->alias);
+ Condition_analyzer analyzer(table);
+ analyzer.analyze((Item*)cond, do_min_max_for_index, (void*)0x123456);
+ uint fieldno;
+ while (analyzer.start_intervals_walk_for_field(&fieldno))
+ {
+ KEY_MULTI_RANGE mrange;
+ while (analyzer.get_next_interval(&mrange))
+ {
+ // print the range;
+ print_segment_range(table->field[fieldno], &mrange);
+ }
+ }
+ fprintf(stderr, "}\n");
+ analyzer.dispose();
+ return cond;
+}
+
+void ha_myisam::cond_pop()
+{
+ fprintf(stderr, "table %s: Remove min-max bounds\n", table->alias);
+}
+
=== modified file 'storage/myisam/ha_myisam.h'
--- a/storage/myisam/ha_myisam.h 2009-09-07 20:50:10 +0000
+++ b/storage/myisam/ha_myisam.h 2010-04-21 14:19:33 +0000
@@ -148,4 +148,10 @@
{
return file;
}
+//psergey-minmax:
+ /*
+ Condition pushdown
+ */
+ const COND *cond_push(const COND *cond);
+ void cond_pop();
};