← Back to team overview

maria-developers team mailing list archive

Rev 2842: A test implementation of having SQL layer to supply the storage engine with in file:///home/psergey/dev/maria-5.1-minmax/

 

At file:///home/psergey/dev/maria-5.1-minmax/

------------------------------------------------------------
revno: 2842
revision-id: psergey@xxxxxxxxxxxx-20100421141933-707cy6l3iha89lm5
parent: monty@xxxxxxxxxxxx-20100406224708-i62wvtw3nl14fhv4
committer: Sergey Petrunya <psergey@xxxxxxxxxxxx>
branch nick: maria-5.1-minmax
timestamp: Wed 2010-04-21 10:19:33 -0400
message:
  A test implementation of having SQL layer to supply the storage engine with
  interval lists for each unindexed column.
  This patch consists of
  - SQL layer providing service of taking a condition and extracting 
    list<column, disjoint_ordered_list<interval>> from it (see opt_field_minmax.h)
  - An storage engine's "Implementation" that prints passed intervals to 
    server stderr.
  
  Things need to be done:
  - Comments, better names
  - Make the interface compatible with Query Fragment Pushdown ideas.
  - Invent something that would allow to have test coverage for this.
=== added file 'sql/opt_field_minmax.h'
--- a/sql/opt_field_minmax.h	1970-01-01 00:00:00 +0000
+++ b/sql/opt_field_minmax.h	2010-04-21 14:19:33 +0000
@@ -0,0 +1,35 @@
+//#include "../sql/opt_field_minmax.h"
+
+class RANGE_OPT_PARAM;
+class SEL_TREE;
+class SEL_ARG;
+
+typedef bool (*take_int_return_bool)(void* param, uint index);
+
+class Condition_analyzer
+{
+  TABLE *table;
+  MEM_ROOT alloc;
+  RANGE_OPT_PARAM *range_par;
+  SEL_TREE *tree;
+  my_bitmap_map *old_sets[2];
+  uchar min_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+  uchar max_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+
+  uint n_keys;
+
+  int cur_key;
+  SEL_ARG *cur_interval;
+public:
+  Condition_analyzer(TABLE *table_arg) : 
+    table(table_arg), range_par(NULL), cur_key(-1) {}
+  
+  bool analyze(Item *cond, take_int_return_bool field_filter_func, 
+               void *field_filter_param);
+  
+  bool start_intervals_walk_for_field(uint *fieldno);
+  bool get_next_interval(KEY_MULTI_RANGE *mrange);
+  
+  void dispose();
+};
+

=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc	2010-01-15 15:27:55 +0000
+++ b/sql/opt_range.cc	2010-04-21 14:19:33 +0000
@@ -2788,6 +2788,248 @@
   DBUG_RETURN(retval);
 }
 
+// psergey-minmax: start
+
+#include "opt_field_minmax.h"
+
+/*
+  Create one-column pseudo-indexes for each column.
+*/
+
+static 
+bool create_minmax_indexes_descriptions(RANGE_OPT_PARAM *range_par, 
+                                        take_int_return_bool field_filter_func, 
+                                        void *func_param,
+                                        uint *n_keys)
+{
+  TABLE *table= range_par->table;
+  MEM_ROOT *alloc= range_par->mem_root;
+  *n_keys= 0;
+  uint fieldno;
+  for (fieldno= 0; fieldno < range_par->table->s->fields; fieldno++)
+  {
+    enum_field_types type= table->field[fieldno]->type();
+    if (!(type == MYSQL_TYPE_TINY_BLOB ||
+          type == MYSQL_TYPE_MEDIUM_BLOB ||
+          type == MYSQL_TYPE_LONG_BLOB ||
+          type == MYSQL_TYPE_BLOB || 
+          type == MYSQL_TYPE_GEOMETRY) &&
+        field_filter_func(func_param, fieldno) &&
+        *n_keys < MAX_INDEXES)
+    {
+      (*n_keys)++;
+    }
+  }
+
+  if (!*n_keys)
+    return TRUE;
+
+  KEY_PART *key_part;
+  key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* (*n_keys));
+  range_par->key_parts= key_part;
+  range_par->key_parts_end= key_part + *n_keys;
+  
+  uint idx= 0;
+  for (fieldno= 0; fieldno < range_par->table->s->fields; fieldno++)
+  {
+    enum_field_types type= table->field[fieldno]->type();
+    if (!(type == MYSQL_TYPE_TINY_BLOB ||
+          type == MYSQL_TYPE_MEDIUM_BLOB ||
+          type == MYSQL_TYPE_LONG_BLOB ||
+          type == MYSQL_TYPE_BLOB || 
+          type == MYSQL_TYPE_GEOMETRY) &&
+        field_filter_func(func_param, fieldno) &&
+        idx < MAX_INDEXES)
+    {
+      idx++;
+      Field **field= table->field + fieldno;
+      key_part->key=          0;
+      key_part->part=	      0;
+      key_part->store_length= key_part->length= (uint16) (*field)->key_length();
+      if ((*field)->real_maybe_null())
+        key_part->store_length+= HA_KEY_NULL_LENGTH;
+      if ((*field)->type() == MYSQL_TYPE_BLOB || 
+          (*field)->real_type() == MYSQL_TYPE_VARCHAR)
+        key_part->store_length+= HA_KEY_BLOB_LENGTH;
+
+      key_part->field=        (*field);
+      key_part->image_type =  Field::itRAW;
+      /* 
+        We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
+        in the RangeAnalysisModule.
+      */
+      key_part->flag=         0;
+      /* We don't set key_parts->null_bit as it will not be used */
+
+      key_part++;
+    }
+  }
+  return FALSE;
+}
+
+
+
+
+bool Condition_analyzer::analyze(Item *cond, 
+                                 take_int_return_bool func, 
+                                 void *func_param)
+{
+  bool retval= FALSE;
+  THD *thd= current_thd;
+  uint i;
+  DBUG_ENTER("find_min_max_bounds");
+
+  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
+  range_par= (RANGE_OPT_PARAM*)alloc_root(&alloc, sizeof(RANGE_OPT_PARAM));
+  range_par->mem_root= &alloc;
+  range_par->old_root= thd->mem_root;
+  range_par->thd= thd;
+  range_par->table= table;
+  /* range_par->cond doesn't need initialization */
+  range_par->prev_tables= range_par->read_tables= 0;
+  range_par->current_table= table->map;
+
+  if (create_minmax_indexes_descriptions(range_par, func, func_param, &n_keys))
+  {
+    free_root(&alloc,MYF(0));		// Return memory & allocator
+    DBUG_RETURN(FALSE);
+  }
+  range_par->keys= n_keys;
+
+  dbug_tmp_use_all_columns(table, old_sets, 
+                           table->read_set, table->write_set);
+  range_par->using_real_indexes= FALSE;
+  range_par->remove_jump_scans= TRUE;
+  for (i=0; i < n_keys; i++)
+    range_par->real_keynr[i]= i;
+  range_par->alloced_sel_args= 0;
+
+
+  thd->no_errors=1;				// Don't warn about NULL
+  thd->mem_root=&alloc;
+
+  tree= get_mm_tree(range_par, cond);
+  if (!tree)
+  {
+    retval= TRUE;
+    goto end;
+  }
+
+  if (tree->type == SEL_TREE::IMPOSSIBLE)
+  {
+    retval= TRUE;
+    goto end;
+  }
+
+  if ((tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) ||
+      !tree->merges.is_empty())
+  {
+    retval= TRUE;
+    goto end;
+  }
+
+end:
+  DBUG_RETURN(retval);
+}
+
+
+bool Condition_analyzer::start_intervals_walk_for_field(uint *fieldno)
+{
+  if (!tree)
+    return FALSE;
+  for (cur_key++; cur_key < (int)n_keys; cur_key++)
+  {
+    if (tree->keys[cur_key])
+    {
+      *fieldno= range_par->key_parts[cur_key].field->field_index;
+      cur_interval= tree->keys[0]->first();
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+
+bool Condition_analyzer::get_next_interval(KEY_MULTI_RANGE *mrange)
+{
+  uchar *key_ptr;
+  key_range *min_range= &mrange->start_key; 
+  key_range *max_range= &mrange->end_key;
+
+  if (!cur_interval) 
+    return FALSE;
+
+//  uchar min_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+//  uchar max_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
+  min_range->key= this->min_val;
+  max_range->key= this->max_val;
+
+  key_ptr= (uchar*)min_range->key;
+  cur_interval->store_min(range_par->key_parts[cur_key].store_length,
+                          &key_ptr, /*cur_interval->min_flag*/ 0);
+  min_range->keypart_map= key_part_map(1);
+  min_range->length=      key_ptr - min_range->key;
+  min_range->flag=        (cur_interval->min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
+                           HA_READ_KEY_EXACT);
+
+  key_ptr= (uchar*)max_range->key;
+  cur_interval->store_max(range_par->key_parts[cur_key].store_length,
+                          &key_ptr, /* cur_interval->max_flag */ 0);
+  max_range->keypart_map= key_part_map(1);
+  max_range->length=      key_ptr - max_range->key;
+  max_range->flag=        (cur_interval->max_flag & NEAR_MAX ?
+                           HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
+  
+  mrange->range_flag= cur_interval->min_flag | cur_interval->max_flag;
+
+  cur_interval= cur_interval->next;
+  return TRUE;
+}
+
+
+void Condition_analyzer::dispose()
+{
+  THD *thd= current_thd;
+  dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
+  thd->no_errors=0;
+  thd->mem_root= range_par->old_root;
+  free_root(&alloc,MYF(0));			// Return memory & allocator
+}
+
+
+#if 0
+static 
+int search_min_max_bounds(RANGE_OPT_PARAM *ppar, SEL_ARG *key_tree)
+{
+  int res, left_res=0, right_res=0;
+
+  if (key_tree->left != &null_element)
+  {
+    if (-1 == (left_res= search_min_max_bounds(ppar,key_tree->left)))
+      return -1;
+  }
+
+  if (key_tree->type == SEL_ARG::KEY_RANGE)
+  {
+    /* 
+      Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and
+      we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change
+    */
+    DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
+                                                  ppar->key_parts););
+    // key_tree->min_value, key_tree->max_value, key_tree->min_flag | key_tree->max_flag,
+  }
+  
+  if (key_tree->right != &null_element)
+  {
+    if (-1 == (right_res= search_min_max_bounds(ppar,key_tree->right)))
+      return -1;
+  }
+  return (left_res || right_res || res);
+}
+#endif 
+// psergey-minmax: end;
+
 
 /*
   Store field key image to table record

=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc	2010-03-04 08:03:07 +0000
+++ b/sql/sql_parse.cc	2010-04-21 14:19:33 +0000
@@ -1237,6 +1237,7 @@
 
     general_log_write(thd, command, thd->query(), thd->query_length());
     DBUG_PRINT("query",("%-.4096s",thd->query()));
+    fprintf(stderr, "query: %-.4096s\n",thd->query());
 #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
     thd->profiling.set_query_source(thd->query(), thd->query_length());
 #endif

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2010-03-10 09:12:23 +0000
+++ b/sql/sql_select.cc	2010-04-21 14:19:33 +0000
@@ -6643,6 +6643,8 @@
   }
 }
 
+//psergey-minmax:
+bool find_min_max_bounds(THD *thd, TABLE *table, Item *pprune_cond);
 
 static void
 make_join_readinfo(JOIN *join, ulonglong options)
@@ -6681,6 +6683,7 @@
     table->status=STATUS_NO_RECORD;
     pick_table_access_method (tab);
 
+    //find_min_max_bounds(join->thd, table, tab->select_cond);
     switch (tab->type) {
     case JT_EQ_REF:
       tab->read_record.unlock_row= join_read_key_unlock_row;

=== modified file 'storage/myisam/ha_myisam.cc'
--- a/storage/myisam/ha_myisam.cc	2009-12-03 11:34:11 +0000
+++ b/storage/myisam/ha_myisam.cc	2010-04-21 14:19:33 +0000
@@ -2269,3 +2269,88 @@
   DBUG_RETURN(TRUE);
 }
 #endif
+
+static bool do_min_max_for_index(void* param, uint index)
+{
+  /* param is what was to Condition_analyzer::analyze */
+  return TRUE;
+}
+
+#include "../sql/opt_field_minmax.h"
+
+void store_key_image_to_rec(Field *field, uchar *ptr, uint len);
+static void print_segment_range(Field *field, KEY_MULTI_RANGE *mrange);
+
+static void print_field(Field *field)
+{
+  if (field->is_real_null())
+    fprintf(stderr, "NULL");
+  else
+  {
+    char buf[256];
+    String str(buf, sizeof(buf), &my_charset_bin);
+    str.length(0);
+    String *pstr;
+    pstr= field->val_str(&str);
+    fprintf(stderr, "'%s'", pstr->c_ptr_safe());
+  }
+}
+
+/* Print a "c1 < keypartX < c2" - type interval into debug trace. */
+static void print_segment_range(Field *field, KEY_MULTI_RANGE *mrange)
+{
+  if (!(mrange->range_flag & NO_MIN_RANGE))
+  {
+    //DBUG_ASSERT(field->key_length() == mrange->start_key.length);
+    store_key_image_to_rec(field, (uchar*)mrange->start_key.key, mrange->start_key.length);
+    print_field(field);
+    if (mrange->range_flag & NEAR_MIN)
+      fputs(" < ", stderr);
+    else
+      fputs(" <= ", stderr);
+  }
+
+  fprintf(stderr, "%s", field->field_name);
+
+  if (!(mrange->range_flag & NO_MAX_RANGE))
+  {
+    if (mrange->range_flag & NEAR_MAX)
+      fputs(" < ", stderr);
+    else
+      fputs(" <= ", stderr);
+    //DBUG_ASSERT(field->key_length() == mrange->end_key.length);
+    store_key_image_to_rec(field, (uchar*)mrange->end_key.key, mrange->end_key.length);
+    print_field(field);
+  }
+  fputs("\n", stderr);
+}
+
+//931 202 76 04 natalya
+
+
+
+const COND * ha_myisam::cond_push(const COND *cond)
+{
+  fprintf(stderr, "table %s: set min-max bounds { \n", table->alias);
+  Condition_analyzer analyzer(table);
+  analyzer.analyze((Item*)cond, do_min_max_for_index, (void*)0x123456);
+  uint fieldno;
+  while (analyzer.start_intervals_walk_for_field(&fieldno))
+  {
+    KEY_MULTI_RANGE mrange;
+    while (analyzer.get_next_interval(&mrange))
+    {
+      // print the range;
+      print_segment_range(table->field[fieldno], &mrange);
+    }
+  }
+  fprintf(stderr, "}\n");
+  analyzer.dispose();
+  return cond;
+}
+
+void ha_myisam::cond_pop()
+{
+  fprintf(stderr, "table %s: Remove min-max bounds\n", table->alias);
+}
+

=== modified file 'storage/myisam/ha_myisam.h'
--- a/storage/myisam/ha_myisam.h	2009-09-07 20:50:10 +0000
+++ b/storage/myisam/ha_myisam.h	2010-04-21 14:19:33 +0000
@@ -148,4 +148,10 @@
   {
     return file;
   }
+//psergey-minmax:
+  /*
+    Condition pushdown
+  */
+  const COND *cond_push(const COND *cond);
+  void cond_pop();
 };