LTP GCOV extension - code coverage report
Current view: directory - storage/maria - ma_ft_nlq_search.c
Test: mtr_and_unit.info
Date: 2009-03-05 Instrumented lines: 155
Code covered: 0.0 % Executed lines: 0

       1                 : /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
       2                 : 
       3                 :    This program is free software; you can redistribute it and/or modify
       4                 :    it under the terms of the GNU General Public License as published by
       5                 :    the Free Software Foundation; version 2 of the License.
       6                 : 
       7                 :    This program is distributed in the hope that it will be useful,
       8                 :    but WITHOUT ANY WARRANTY; without even the implied warranty of
       9                 :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      10                 :    GNU General Public License for more details.
      11                 : 
      12                 :    You should have received a copy of the GNU General Public License
      13                 :    along with this program; if not, write to the Free Software
      14                 :    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
      15                 : 
      16                 : /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
      17                 : 
      18                 : #define FT_CORE
      19                 : #include "ma_ftdefs.h"
      20                 : 
      21                 : /* search with natural language queries */
      22                 : 
      23                 : typedef struct ft_doc_rec
      24                 : {
      25                 :   my_off_t  dpos;
      26                 :   double    weight;
      27                 : } FT_DOC;
      28                 : 
      29                 : struct st_ft_info
      30                 : {
      31                 :   struct _ft_vft *please;
      32                 :   MARIA_HA  *info;
      33                 :   int       ndocs;
      34                 :   int       curdoc;
      35                 :   FT_DOC    doc[1];
      36                 : };
      37                 : 
      38                 : typedef struct st_all_in_one
      39                 : {
      40                 :   MARIA_HA    *info;
      41                 :   uint        keynr;
      42                 :   CHARSET_INFO *charset;
      43                 :   uchar      *keybuff;
      44                 :   TREE        dtree;
      45                 : } ALL_IN_ONE;
      46                 : 
      47                 : typedef struct st_ft_superdoc
      48                 : {
      49                 :     FT_DOC   doc;
      50                 :     FT_WORD *word_ptr;
      51                 :     double   tmp_weight;
      52                 : } FT_SUPERDOC;
      53                 : 
      54                 : static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)),
      55                 :                            FT_SUPERDOC *p1, FT_SUPERDOC *p2)
      56               0 : {
      57               0 :   if (p1->doc.dpos < p2->doc.dpos)
      58               0 :     return -1;
      59               0 :   if (p1->doc.dpos == p2->doc.dpos)
      60               0 :     return 0;
      61               0 :   return 1;
      62                 : }
      63                 : 
      64                 : static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
      65               0 : {
      66                 :   int          subkeys, r;
      67                 :   uint         doc_cnt;
      68                 :   FT_SUPERDOC  sdoc, *sptr;
      69                 :   TREE_ELEMENT *selem;
      70               0 :   double       gweight=1;
      71               0 :   MARIA_HA     *info= aio->info;
      72               0 :   uchar        *keybuff= aio->keybuff;
      73               0 :   MARIA_KEYDEF *keyinfo= info->s->keyinfo+aio->keynr;
      74               0 :   my_off_t     key_root=info->s->state.key_root[aio->keynr];
      75               0 :   uint         extra=HA_FT_WLEN+info->s->base.rec_reflength;
      76                 :   MARIA_KEY    key;
      77                 : #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
      78                 :   float tmp_weight;
      79                 : #else
      80                 : #error
      81                 : #endif
      82               0 :   DBUG_ENTER("walk_and_match");
      83                 : 
      84               0 :   word->weight=LWS_FOR_QUERY;
      85                 : 
      86               0 :   _ma_ft_make_key(info, &key, aio->keynr, keybuff, word, 0);
      87               0 :   key.data_length-= HA_FT_WLEN;
      88               0 :   doc_cnt=0;
      89                 : 
      90                 :   /* Skip rows inserted by current inserted */
      91               0 :   for (r= _ma_search(info, &key, SEARCH_FIND, key_root) ;
      92               0 :        !r &&
      93                 :          (subkeys=ft_sintXkorr(info->last_key.data +
      94                 :                                info->last_key.data_length +
      95                 :                                info->last_key.ref_length - extra)) > 0 &&
      96                 :          info->cur_row.lastpos >= info->state->data_file_length ;
      97               0 :        r= _ma_search_next(info, &info->last_key, SEARCH_BIGGER, key_root))
      98                 :     ;
      99                 : 
     100               0 :   info->update|= HA_STATE_AKTIV;              /* for _ma_test_if_changed() */
     101                 : 
     102                 :   /* The following should be safe, even if we compare doubles */
     103               0 :   while (!r && gweight)
     104                 :   {
     105                 : 
     106               0 :     if (key.data_length &&
     107                 :         ha_compare_text(aio->charset,
     108                 :                         info->last_key.data+1,
     109                 :                         info->last_key.data_length +
     110                 :                         info->last_key.ref_length - extra - 1,
     111                 :                         key.data+1, key.data_length-1, 0, 0))
     112               0 :      break;
     113                 : 
     114               0 :     if (subkeys<0)
     115                 :     {
     116               0 :       if (doc_cnt)
     117               0 :         DBUG_RETURN(1); /* index is corrupted */
     118                 :       /*
     119                 :         TODO here: unsafe optimization, should this word
     120                 :         be skipped (based on subkeys) ?
     121                 :       */
     122               0 :       keybuff+= key.data_length;
     123               0 :       keyinfo= &info->s->ft2_keyinfo;
     124               0 :       key_root= info->cur_row.lastpos;
     125               0 :       key.data_length= 0;
     126               0 :       r= _ma_search_first(info, keyinfo, key_root);
     127               0 :       goto do_skip;
     128                 :     }
     129                 : #if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
     130               0 :     tmp_weight=*(float*)&subkeys;
     131                 : #else
     132                 : #error
     133                 : #endif
     134                 :   /* The following should be safe, even if we compare doubles */
     135               0 :     if (tmp_weight==0)
     136               0 :       DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */
     137                 : 
     138               0 :     sdoc.doc.dpos= info->cur_row.lastpos;
     139                 : 
     140                 :     /* saving document matched into dtree */
     141               0 :     if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg)))
     142               0 :       DBUG_RETURN(1);
     143                 : 
     144               0 :     sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem);
     145                 : 
     146               0 :     if (selem->count==1) /* document's first match */
     147               0 :       sptr->doc.weight=0;
     148                 :     else
     149               0 :       sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight;
     150                 : 
     151               0 :     sptr->word_ptr=word;
     152               0 :     sptr->tmp_weight=tmp_weight;
     153                 : 
     154               0 :     doc_cnt++;
     155                 : 
     156               0 :     gweight=word->weight*GWS_IN_USE;
     157               0 :     if (gweight < 0 || doc_cnt > 2000000)
     158               0 :       gweight=0;
     159                 : 
     160               0 :     if (_ma_test_if_changed(info) == 0)
     161               0 :         r= _ma_search_next(info, &info->last_key, SEARCH_BIGGER, key_root);
     162                 :     else
     163               0 :         r= _ma_search(info, &info->last_key, SEARCH_BIGGER, key_root);
     164                 : do_skip:
     165               0 :     while ((subkeys=ft_sintXkorr(info->last_key.data +
     166                 :                                  info->last_key.data_length +
     167                 :                                  info->last_key.ref_length - extra)) > 0 &&
     168                 :            !r && info->cur_row.lastpos >= info->state->data_file_length)
     169               0 :       r= _ma_search_next(info, &info->last_key, SEARCH_BIGGER, key_root);
     170                 : 
     171                 :   }
     172               0 :   word->weight=gweight;
     173                 : 
     174               0 :   DBUG_RETURN(0);
     175                 : }
     176                 : 
     177                 : 
     178                 : static int walk_and_copy(FT_SUPERDOC *from,
     179                 :                          uint32 count __attribute__((unused)), FT_DOC **to)
     180               0 : {
     181               0 :   DBUG_ENTER("walk_and_copy");
     182               0 :   from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
     183               0 :   (*to)->dpos=from->doc.dpos;
     184               0 :   (*to)->weight=from->doc.weight;
     185               0 :   (*to)++;
     186               0 :   DBUG_RETURN(0);
     187                 : }
     188                 : 
     189                 : static int walk_and_push(FT_SUPERDOC *from,
     190                 :                          uint32 count __attribute__((unused)), QUEUE *best)
     191               0 : {
     192               0 :   DBUG_ENTER("walk_and_copy");
     193               0 :   from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
     194               0 :   set_if_smaller(best->elements, ft_query_expansion_limit-1);
     195               0 :   queue_insert(best, (uchar *)& from->doc);
     196               0 :   DBUG_RETURN(0);
     197                 : }
     198                 : 
     199                 : 
     200                 : static int FT_DOC_cmp(void *unused __attribute__((unused)),
     201                 :                       FT_DOC *a, FT_DOC *b)
     202               0 : {
     203               0 :   return sgn(b->weight - a->weight);
     204                 : }
     205                 : 
     206                 : 
     207                 : FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, uchar *query,
     208                 :                             uint query_len, uint flags, uchar *record)
     209               0 : {
     210                 :   TREE        wtree;
     211                 :   ALL_IN_ONE  aio;
     212                 :   FT_DOC     *dptr;
     213               0 :   FT_INFO    *dlist=NULL;
     214               0 :   MARIA_RECORD_POS saved_lastpos= info->cur_row.lastpos;
     215                 :   struct st_mysql_ftparser *parser;
     216                 :   MYSQL_FTPARSER_PARAM *ftparser_param;
     217               0 :   DBUG_ENTER("maria_ft_init_nlq_search");
     218                 : 
     219                 :   /* black magic ON */
     220               0 :   if ((int) (keynr = _ma_check_index(info,keynr)) < 0)
     221               0 :     DBUG_RETURN(NULL);
     222               0 :   if (_ma_readinfo(info,F_RDLCK,1))
     223               0 :     DBUG_RETURN(NULL);
     224                 :   /* black magic OFF */
     225                 : 
     226               0 :   aio.info=info;
     227               0 :   aio.keynr=keynr;
     228               0 :   aio.charset=info->s->keyinfo[keynr].seg->charset;
     229               0 :   aio.keybuff= info->lastkey_buff2;
     230               0 :   parser= info->s->keyinfo[keynr].parser;
     231               0 :   if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr, 0)))
     232               0 :     goto err;
     233                 : 
     234               0 :   bzero(&wtree,sizeof(wtree));
     235                 : 
     236               0 :   init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
     237                 :             NULL, NULL);
     238                 : 
     239               0 :   maria_ft_parse_init(&wtree, aio.charset);
     240               0 :   ftparser_param->flags= 0;
     241               0 :   if (maria_ft_parse(&wtree, query, query_len, parser, ftparser_param,
     242                 :                &wtree.mem_root))
     243               0 :     goto err;
     244                 : 
     245               0 :   if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
     246                 :                 left_root_right))
     247               0 :     goto err;
     248                 : 
     249               0 :   if (flags & FT_EXPAND && ft_query_expansion_limit)
     250                 :   {
     251                 :     QUEUE best;
     252               0 :     init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
     253                 :                0);
     254               0 :     tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
     255                 :               &best, left_root_right);
     256               0 :     while (best.elements)
     257                 :     {
     258               0 :       my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
     259               0 :       if (!(*info->read_record)(info, record, docid))
     260                 :       {
     261               0 :         info->update|= HA_STATE_AKTIV;
     262               0 :         ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY;
     263               0 :         if (unlikely(_ma_ft_parse(&wtree, info, keynr, record, ftparser_param,
     264                 :                                   &wtree.mem_root)))
     265                 :         {
     266               0 :           delete_queue(&best);
     267               0 :           goto err;
     268                 :         }
     269                 :       }
     270                 :     }
     271               0 :     delete_queue(&best);
     272               0 :     reset_tree(&aio.dtree);
     273               0 :     if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
     274                 :                   left_root_right))
     275               0 :       goto err;
     276                 : 
     277                 :   }
     278                 : 
     279                 :   /*
     280                 :     If ndocs == 0, this will not allocate RAM for FT_INFO.doc[],
     281                 :     so if ndocs == 0, FT_INFO.doc[] must not be accessed.
     282                 :    */
     283               0 :   dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
     284                 :                              sizeof(FT_DOC)*
     285                 :                              (int)(aio.dtree.elements_in_tree-1),
     286                 :                              MYF(0));
     287               0 :   if (!dlist)
     288               0 :     goto err;
     289                 : 
     290               0 :   dlist->please= (struct _ft_vft *) & _ma_ft_vft_nlq;
     291               0 :   dlist->ndocs=aio.dtree.elements_in_tree;
     292               0 :   dlist->curdoc=-1;
     293               0 :   dlist->info=aio.info;
     294               0 :   dptr=dlist->doc;
     295                 : 
     296               0 :   tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy,
     297                 :             &dptr, left_root_right);
     298                 : 
     299               0 :   if (flags & FT_SORTED)
     300               0 :     my_qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC),
     301                 :               (qsort2_cmp)&FT_DOC_cmp, 0);
     302                 : 
     303               0 : err:
     304               0 :   delete_tree(&aio.dtree);
     305               0 :   delete_tree(&wtree);
     306               0 :   info->cur_row.lastpos= saved_lastpos;
     307               0 :   DBUG_RETURN(dlist);
     308                 : }
     309                 : 
     310                 : 
     311                 : int maria_ft_nlq_read_next(FT_INFO *handler, char *record)
     312               0 : {
     313               0 :   MARIA_HA *info= (MARIA_HA *) handler->info;
     314                 : 
     315               0 :   if (++handler->curdoc >= handler->ndocs)
     316                 :   {
     317               0 :     --handler->curdoc;
     318               0 :     return HA_ERR_END_OF_FILE;
     319                 :   }
     320                 : 
     321               0 :   info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
     322                 : 
     323               0 :   info->cur_row.lastpos= handler->doc[handler->curdoc].dpos;
     324               0 :   if (!(*info->read_record)(info, (uchar *) record, info->cur_row.lastpos))
     325                 :   {
     326               0 :     info->update|= HA_STATE_AKTIV;           /* Record is read */
     327               0 :     return 0;
     328                 :   }
     329               0 :   return my_errno;
     330                 : }
     331                 : 
     332                 : 
     333                 : float maria_ft_nlq_find_relevance(FT_INFO *handler,
     334                 :                             uchar *record __attribute__((unused)),
     335                 :                             uint length __attribute__((unused)))
     336               0 : {
     337                 :   int a,b,c;
     338               0 :   FT_DOC  *docs=handler->doc;
     339               0 :   MARIA_RECORD_POS docid= handler->info->cur_row.lastpos;
     340                 : 
     341               0 :   if (docid == HA_POS_ERROR)
     342               0 :     return -5.0;
     343                 : 
     344                 :   /* Assuming docs[] is sorted by dpos... */
     345                 : 
     346               0 :   for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
     347                 :   {
     348               0 :     if (docs[c].dpos > docid)
     349               0 :       b=c;
     350                 :     else
     351               0 :       a=c;
     352                 :   }
     353                 :   /* bounds check to avoid accessing unallocated handler->doc  */
     354               0 :   if (a < handler->ndocs && docs[a].dpos == docid)
     355               0 :     return (float) docs[a].weight;
     356                 :   else
     357               0 :     return 0.0;
     358                 : }
     359                 : 
     360                 : 
     361                 : void maria_ft_nlq_close_search(FT_INFO *handler)
     362               0 : {
     363               0 :   my_free(handler, MYF(0));
     364                 : }
     365                 : 
     366                 : 
     367                 : float maria_ft_nlq_get_relevance(FT_INFO *handler)
     368               0 : {
     369               0 :   return (float) handler->doc[handler->curdoc].weight;
     370                 : }
     371                 : 
     372                 : 
     373                 : void maria_ft_nlq_reinit_search(FT_INFO *handler)
     374               0 : {
     375               0 :   handler->curdoc=-1;
     376                 : }
     377                 : 

Generated by: LTP GCOV extension version 1.4