← Back to team overview

maria-developers team mailing list archive

Re: b1a4d1e4937: MDEV-16973 Application-time periods: DELETE

 

Hi, Sergei!

On Thu, Feb 7, 2019 at 4:23 AM Sergei Golubchik <serg@xxxxxxxxxxx> wrote:

> Hi, Nikita!
>
> On Jan 31, Nikita Malyavin wrote:
> > revision-id: b1a4d1e4937 (versioning-1.0.7-3-gb1a4d1e4937)
> > parent(s): 4b01d3aee60
> > author: Nikita Malyavin <nikitamalyavin@xxxxxxxxx>
> > committer: Nikita Malyavin <nikitamalyavin@xxxxxxxxx>
> > timestamp: 2019-01-30 22:54:00 +1000
> > message:
> >
> > MDEV-16973 Application-time periods: DELETE
> >
> > * inject portion of time updates into mysql_delete main loop
> > * triggered case emits delete+insert, no updates
> > * PORTION OF `SYSTEM_TIME` is forbidden
> > * `DELETE HISTORY .. FOR PORTION OF ...` is forbidden as well
>
> > diff --git a/mysql-test/suite/period/r/delete.result
> b/mysql-test/suite/period/r/delete.result
> > --- /dev/null
> > +++ b/mysql-test/suite/period/r/delete.result
> > @@ -0,0 +1,244 @@
> > +create or replace table t (id int, s date, e date, period for
> apptime(s,e));
> > +insert into t values(1, '1999-01-01', '2018-12-12');
> > +insert into t values(1, '1999-01-01', '2017-01-01');
> > +insert into t values(1, '2017-01-01', '2019-01-01');
> > +insert into t values(2, '1998-01-01', '2018-12-12');
> > +insert into t values(3, '1997-01-01', '2015-01-01');
> > +insert into t values(4, '2016-01-01', '2020-01-01');
> > +insert into t values(5, '2010-01-01', '2015-01-01');
> > +create or replace table t1 (id int, s date, e date, period for
> apptime(s,e));
> > +insert t1 select * from t;
> > +create or replace table t2 (id int, s date, e date, period for
> apptime(s,e));
> > +insert t2 select * from t;
> > +delete from t for portion of apptime from '2000-01-01' to '2018-01-01';
> > +delete from t1 for portion of APPTIME from '2000-01-01' to '2018-01-01';
> > +select * from t;
> > +id   s       e
> > +1    1999-01-01      2000-01-01
> > +1    1999-01-01      2000-01-01
> > +1    2018-01-01      2018-12-12
> > +1    2018-01-01      2019-01-01
> > +2    1998-01-01      2000-01-01
> > +2    2018-01-01      2018-12-12
> > +3    1997-01-01      2000-01-01
> > +4    2018-01-01      2020-01-01
> > +select * from t1;
> > +id   s       e
> > +1    1999-01-01      2000-01-01
> > +1    1999-01-01      2000-01-01
> > +1    2018-01-01      2018-12-12
> > +1    2018-01-01      2019-01-01
> > +2    1998-01-01      2000-01-01
> > +2    2018-01-01      2018-12-12
> > +3    1997-01-01      2000-01-01
> > +4    2018-01-01      2020-01-01
> > +select * from log_tbl;
> > +id   log
>
> here (and everywhere selecting from log_tbl), better do ORDER BY id
> it's quite difficult to follow the sequence of events otherwise
>
Okay


> > +# auto_increment field overflow
> > +create or replace table t (id tinyint auto_increment primary key,
> > +s date, e date, period for apptime(s,e));
> > +insert into t values(127, '1999-01-01', '2018-12-12');
> > +delete from t for portion of apptime from '2000-01-01' to '2018-01-01';
> > +ERROR 22003: Out of range value for column 'id' at row 1
>
> add select * from t here, please
>


> +# same for trigger case
> > +delete from t for portion of apptime from '2000-01-01' to '2018-01-01';
> > +ERROR 22003: Out of range value for column 'id' at row 1
>
> and here
> and select from the log_tbl too
>
> Ahhh, the results differ for Innodb and MyISAM because of transactions.

Well, I came across with solution, but it may look cumbersome. I've put the
queries in while loop and cycle the engines.
Please take a look at it in delete.test. Maybe You'll want it to be in
different file, or have any better idea/suggestion.


> diff --git a/mysql-test/suite/versioning/t/select.test
> b/mysql-test/suite/versioning/t/select.test
> > --- a/mysql-test/suite/versioning/t/select.test
> > +++ b/mysql-test/suite/versioning/t/select.test
> > @@ -107,6 +107,32 @@ for system_time as of timestamp @t0 as t;
> >  drop table t1;
> >  drop table t2;
> >
> > +# Query conditions check
> > +
> > +create or replace table t1(x int) with system versioning;
> > +insert into t1 values (1);
> > +delete from t1;
> > +insert into t1 values (2);
> > +delete from t1;
> > +insert into t1 values (3);
> > +delete from t1;
> > +
> > +select row_start into @start1 from t1 for system_time all where x = 1;
> > +select row_end into @end1 from t1 for system_time all where x = 1;
> > +select row_start into @start2 from t1 for system_time all where x = 2;
> > +select row_end into @end2 from t1 for system_time all where x = 2;
> > +select row_start into @start3 from t1 for system_time all where x = 3;
> > +select row_end into @end3 from t1 for system_time all where x = 3;
> > +
> > +select x as ASOF_x from t1 for system_time as of @start2;
> > +select x as ASOF_x from t1 for system_time as of @end2;
> > +select x as FROMTO_x from t1 for system_time from @start1 to @end3;
> > +select x as FROMTO_x from t1 for system_time from @end1 to @start2;
> > +select x as BETWAND_x from t1 for system_time between @start1 and @end3;
> > +select x as BETWAND_x from t1 for system_time between @end1 and @start2;
> > +
> > +drop table t1;
>
> what does that have to do with MDEV-16973?
>
> I've refactored AS OF query conditions  generator. We've discussed it
earlier. Here's a citation:

> > > > +      || unique_table(thd, table_list, table_list->next_global, 0))
> > > >      *delete_while_scanning= false;
> > > >
> >
> > > > +    case SYSTEM_TIME_FROM_TO:
> > > > +      cond1= newx Item_func_trt_trx_sees(thd, trx_id1,
> conds.field_start);
> > > > +      cond3= newx Item_func_lt(thd, conds.start.item,
> conds.end.item);
> > > > +      break;
> > >
> > > hmm, you don't use trx_id0 here at all. did you forget
> > >
> > >   cond2= newx Item_func_trt_trx_sees_eq(thd, conds.field_end, trx_id0);
> > >
> > yes, looks so... The tests did not catch this
> add a new test, perhaps? :)

 .

> +
> >  # Wildcard expansion on hidden fields
> >
> >  create table t1(
> > @@ -233,9 +259,9 @@ select x from t1 for system_time as of @trx_start;
> >  --echo ### Issue #365, bug 4 (related to #226, optimized fields)
> >  create or replace table t1 (i int, b int) with system versioning;
> >  insert into t1 values (0, 0), (0, 0);
> > -select min(i) over (partition by b) as f
> > -from (select i + 0 as i, b from t1) as tt
> > -order by i;
> > +#select min(i) over (partition by b) as f
> > +#from (select i + 0 as i, b from t1) as tt
> > +#order by i;
>
> why is this?
>
> oh sorry, it shouldn't be here. removed.

> diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
> > --- a/sql/sql_delete.cc
> > +++ b/sql/sql_delete.cc
> > @@ -245,6 +245,48 @@ static bool record_should_be_deleted(THD *thd,
> TABLE *table, SQL_SELECT *sel,
> >    return false;
> >  }
> >
> > +inline
> > +int TABLE::update_portion_of_time(THD *thd,
> > +                                  const vers_select_conds_t
> &period_conds,
> > +                                  bool *inside_period)
>
> I don't understand why you want to keep this very much DELETE-only
> functionality in the TABLE class which is used everywhere.
>
> Actually it has a chance to be reused in INSERT/REPLACE. But I'm not quite
sure.


> And what's the point of pretending it's in a common TABLE class,
> if it can only be used in sql_delete.cc? I find it quite confusing :(
>
> The point is that insert_portion_of_time is inside TABLE class,
while update_portion_of_time, which is very similar and feels to be
naturally grouped in the same namespace, is not -- which is found confusing
by me.
But i don't want to argue about it too much, so i'am about to make it just
as you prefer.

> >  inline
> >  int TABLE::delete_row()
> > @@ -672,6 +736,9 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list,
> COND *conds,
> >
> >    table->mark_columns_needed_for_delete();
> >
> > +  if (table_list->has_period())
> > +    table->use_all_columns();
>
> may be even
>
>   if (table_list->has_period())
>     table->use_all_columns()
>   else
>     table->mark_columns_needed_for_delete();
>
> Ok, no problem.

> > +
> >    if ((table->file->ha_table_flags() & HA_CAN_FORCE_BULK_DELETE) &&
> >        !table->prepare_triggers_for_delete_stmt_or_event())
> >      will_batch= !table->file->start_bulk_delete();
> > @@ -727,6 +794,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list,
> COND *conds,
> >      delete_record= true;
> >    }
> >
> > +  /*
> > +    From SQL2016, Part 2, 15.7 <Effect of deleting rows from base
> table>,
> > +    General Rules, 8), we can conclude that DELETE FOR PORTTION OF time
> performs
> > +    0-2 INSERTS + DELETE. We can substitute INSERT+DELETE with one
> UPDATE, but
> > +    only if there are no triggers set.
> > +    It is also meaningless for system-versioned table
> > +  */
> > +  portion_of_time_through_update= !has_triggers
> > +                               && !table->versioned(VERS_TIMESTAMP);
>
> I still don't understand why you disable portion_of_time_through_update
> for VERS_TIMESTAMP, but not for VERS_TRX_ID.
>
> Well, yes, better to bisable it, so less questions.

> > +
> >    THD_STAGE_INFO(thd, stage_updating);
> >    while (likely(!(error=info.read_record())) && likely(!thd->killed) &&
> >           likely(!thd->is_error()))
> > diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
> > --- a/sql/sql_lex.cc
> > +++ b/sql/sql_lex.cc
> > @@ -3576,6 +3577,20 @@ void LEX::set_trg_event_type_for_tables()
> >      break;
> >    }
> >
> > +  if (period_conditions.is_set())
> > +  {
> > +    switch (sql_command)
> > +    {
> > +    case SQLCOM_DELETE:
> > +    case SQLCOM_UPDATE:
> > +    case SQLCOM_REPLACE:
> > +      new_trg_event_map |= static_cast<uint8>
> > +                             (1 << static_cast<int>(TRG_EVENT_INSERT));
>
> I've added a helper for this recently, use
>
>      new_trg_event_map |= trg2bit(TRG_EVENT_INSERT);
>
> Should upmerge then


> > +    default:
> > +      break;
> > +    }
> > +  }
> > +
> >
> >    /*
> >      Do not iterate over sub-selects, only the tables in the outermost
> > diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
> > --- a/sql/sql_yacc.yy
> > +++ b/sql/sql_yacc.yy
> > @@ -9295,6 +9296,22 @@ history_point:
> >              $$= Vers_history_point($1, $2);
> >            }
> >          ;
> > +opt_for_portion_of_time_clause:
> > +          /* empty */
> > +          {
> > +            $$= false;
> > +          }
> > +        | FOR_SYM PORTION_SYM OF_SYM ident FROM history_point TO_SYM
> history_point
>
> history_point allows TIMESTAMP '2010-10-10 10:10:10' and
> TRANSACTION 1234.
>
> You don't need any of that, just use a corresponding expression
> rule. E.g. bit_expr, like history_point does.
>
ok

>
> > +          {
> > +            if (unlikely(0 == strcasecmp($4.str, "SYSTEM_TIME")))
> > +            {
> > +              thd->parse_error(ER_SYNTAX_ERROR, $4.str);
>
> no, for the error message to look correct you need to pass the pointer
> into the query text. It's usually done like this:
>
>       FOR_SYM PORTION_SYM OF_SYM remember_tok_start ident FROM ...
>       {
>          ...
>          thd->parse_error(ER_SYNTAX_ERROR, $4);
>
> nice trick, thanks!


-- 
Yours truly,
Nikita Malyavin

Follow ups

References