LTP GCOV extension - code coverage report
Current view: directory - storage/maria - maria_pack.c
Test: maria-unit-test.html
Date: 2009-03-04 Instrumented lines: 1271
Code covered: 66.3 % Executed lines: 843

       1                 : /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
       2                 : 
       3                 :    This program is free software; you can redistribute it and/or modify
       4                 :    it under the terms of the GNU General Public License as published by
       5                 :    the Free Software Foundation; version 2 of the License.
       6                 : 
       7                 :    This program is distributed in the hope that it will be useful,
       8                 :    but WITHOUT ANY WARRANTY; without even the implied warranty of
       9                 :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      10                 :    GNU General Public License for more details.
      11                 : 
      12                 :    You should have received a copy of the GNU General Public License
      13                 :    along with this program; if not, write to the Free Software
      14                 :    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
      15                 : 
      16                 : /* Pack MARIA file */
      17                 : 
      18                 : #ifndef USE_MY_FUNC
      19                 : #define USE_MY_FUNC                     /* We need at least my_malloc */
      20                 : #endif
      21                 : 
      22                 : #include "maria_def.h"
      23                 : #include <queues.h>
      24                 : #include <my_tree.h>
      25                 : #include "mysys_err.h"
      26                 : #ifdef MSDOS
      27                 : #include <io.h>
      28                 : #endif
      29                 : #ifndef __GNU_LIBRARY__
      30                 : #define __GNU_LIBRARY__                 /* Skip warnings in getopt.h */
      31                 : #endif
      32                 : #include <my_getopt.h>
      33                 : #include <assert.h>
      34                 : 
      35                 : #if SIZEOF_LONG_LONG > 4
      36                 : #define BITS_SAVED 64
      37                 : #else
      38                 : #define BITS_SAVED 32
      39                 : #endif
      40                 : 
      41                 : #define IS_OFFSET ((uint) 32768)        /* Bit if offset or char in tree */
      42                 : #define HEAD_LENGTH     32
      43                 : #define ALLOWED_JOIN_DIFF       256     /* Diff allowed to join trees */
      44                 : 
      45                 : #define DATA_TMP_EXT            ".TMD"
      46                 : #define OLD_EXT                 ".OLD"
      47                 : #define WRITE_COUNT             MY_HOW_OFTEN_TO_WRITE
      48                 : 
      49                 : struct st_file_buffer {
      50                 :   File file;
      51                 :   uchar *buffer,*pos,*end;
      52                 :   my_off_t pos_in_file;
      53                 :   int bits;
      54                 :   ulonglong bitbucket;
      55                 : };
      56                 : 
      57                 : struct st_huff_tree;
      58                 : struct st_huff_element;
      59                 : 
      60                 : typedef struct st_huff_counts {
      61                 :   uint  field_length,max_zero_fill;
      62                 :   uint  pack_type;
      63                 :   uint  max_end_space,max_pre_space,length_bits,min_space;
      64                 :   ulong max_length;
      65                 :   enum en_fieldtype field_type;
      66                 :   struct st_huff_tree *tree;            /* Tree for field */
      67                 :   my_off_t counts[256];
      68                 :   my_off_t end_space[8];
      69                 :   my_off_t pre_space[8];
      70                 :   my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
      71                 :   TREE int_tree;        /* Tree for detecting distinct column values. */
      72                 :   uchar *tree_buff;      /* Column values, 'field_length' each. */
      73                 :   uchar *tree_pos;       /* Points to end of column values in 'tree_buff'. */
      74                 : } HUFF_COUNTS;
      75                 : 
      76                 : typedef struct st_huff_element HUFF_ELEMENT;
      77                 : 
      78                 : /*
      79                 :   WARNING: It is crucial for the optimizations in calc_packed_length()
      80                 :   that 'count' is the first element of 'HUFF_ELEMENT'.
      81                 : */
      82                 : struct st_huff_element {
      83                 :   my_off_t count;
      84                 :   union un_element {
      85                 :     struct st_nod {
      86                 :       HUFF_ELEMENT *left,*right;
      87                 :     } nod;
      88                 :     struct st_leaf {
      89                 :       HUFF_ELEMENT *null;
      90                 :       uint      element_nr;             /* Number of element */
      91                 :     } leaf;
      92                 :   } a;
      93                 : };
      94                 : 
      95                 : 
      96                 : typedef struct st_huff_tree {
      97                 :   HUFF_ELEMENT *root,*element_buffer;
      98                 :   HUFF_COUNTS *counts;
      99                 :   uint tree_number;
     100                 :   uint elements;
     101                 :   my_off_t bytes_packed;
     102                 :   uint tree_pack_length;
     103                 :   uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
     104                 :   ulonglong *code;
     105                 :   uchar *code_len;
     106                 : } HUFF_TREE;
     107                 : 
     108                 : 
     109                 : typedef struct st_isam_mrg {
     110                 :   MARIA_HA **file,**current,**end;
     111                 :   uint free_file;
     112                 :   uint count;
     113                 :   uint  min_pack_length;                /* Theese is used by packed data */
     114                 :   uint  max_pack_length;
     115                 :   uint  ref_length;
     116                 :   uint  max_blob_length;
     117                 :   my_off_t records;
     118                 :   /* true if at least one source file has at least one disabled index */
     119                 :   my_bool src_file_has_indexes_disabled;
     120                 : } PACK_MRG_INFO;
     121                 : 
     122                 : 
     123                 : extern int main(int argc,char * *argv);
     124                 : static void get_options(int *argc,char ***argv);
     125                 : static MARIA_HA *open_maria_file(char *name,int mode);
     126                 : static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count);
     127                 : static int compress(PACK_MRG_INFO *file,char *join_name);
     128                 : static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records);
     129                 : static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees,
     130                 :                                            uint trees,
     131                 :                                            HUFF_COUNTS *huff_counts,
     132                 :                                            uint fields);
     133                 : static int compare_tree(void* cmp_arg __attribute__((unused)),
     134                 :                         const uchar *s,const uchar *t);
     135                 : static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts);
     136                 : static void check_counts(HUFF_COUNTS *huff_counts,uint trees,
     137                 :                          my_off_t records);
     138                 : static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records,
     139                 :                                uint max_space_length,my_off_t *space_counts,
     140                 :                                my_off_t tot_space_count,
     141                 :                                enum en_fieldtype field_type);
     142                 : static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees);
     143                 : static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts);
     144                 : static int compare_huff_elements(void *not_used, uchar *a,uchar *b);
     145                 : static int save_counts_in_queue(uchar *key,element_count count,
     146                 :                                     HUFF_TREE *tree);
     147                 : static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag);
     148                 : static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
     149                 : static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
     150                 : static void make_traverse_code_tree(HUFF_TREE *huff_tree,
     151                 :                                     HUFF_ELEMENT *element,uint size,
     152                 :                                     ulonglong code);
     153                 : static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
     154                 :                         my_off_t tot_elements,my_off_t filelength);
     155                 : static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
     156                 : static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees);
     157                 : static uint *make_offset_code_tree(HUFF_TREE *huff_tree,
     158                 :                                        HUFF_ELEMENT *element,
     159                 :                                        uint *offset);
     160                 : static uint max_bit(uint value);
     161                 : static int compress_maria_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts);
     162                 : static char *make_new_name(char *new_name,char *old_name);
     163                 : static char *make_old_name(char *new_name,char *old_name);
     164                 : static void init_file_buffer(File file,pbool read_buffer);
     165                 : static int flush_buffer(ulong neaded_length);
     166                 : static void end_file_buffer(void);
     167                 : static void write_bits(ulonglong value, uint bits);
     168                 : static void flush_bits(void);
     169                 : static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
     170                 :                       my_off_t new_length, ha_checksum crc);
     171                 : static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,
     172                 :                           my_off_t new_length, ha_checksum crc);
     173                 : static int mrg_close(PACK_MRG_INFO *mrg);
     174                 : static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf);
     175                 : static void mrg_reset(PACK_MRG_INFO *mrg);
     176                 : #if !defined(DBUG_OFF)
     177                 : static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
     178                 : static int fakecmp(my_off_t **count1, my_off_t **count2);
     179                 : #endif
     180                 : 
     181                 : 
     182                 : static int error_on_write=0,test_only=0,verbose=0,silent=0,
     183                 :            write_loop=0,force_pack=0, isamchk_neaded=0;
     184                 : static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
     185                 : static my_bool backup, opt_wait;
     186                 : /*
     187                 :   tree_buff_length is somewhat arbitrary. The bigger it is the better
     188                 :   the chance to win in terms of compression factor. On the other hand,
     189                 :   this table becomes part of the compressed file header. And its length
     190                 :   is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
     191                 : */
     192                 : static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
     193                 : static char tmp_dir[FN_REFLEN]={0},*join_table;
     194                 : static my_off_t intervall_length;
     195                 : static ha_checksum glob_crc;
     196                 : static struct st_file_buffer file_buffer;
     197                 : static QUEUE queue;
     198                 : static HUFF_COUNTS *global_count;
     199                 : static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
     200                 : static const char *load_default_groups[]= { "mariapack",0 };
     201                 : 
     202                 :         /* The main program */
     203                 : 
     204                 : int main(int argc, char **argv)
     205              30 : {
     206                 :   int error,ok;
     207                 :   PACK_MRG_INFO merge;
     208                 :   char **default_argv;
     209              30 :   MY_INIT(argv[0]);
     210                 : 
     211              30 :   load_defaults("my",load_default_groups,&argc,&argv);
     212              30 :   default_argv= argv;
     213              30 :   get_options(&argc,&argv);
     214              30 :   maria_init();
     215                 : 
     216              30 :   error=ok=isamchk_neaded=0;
     217              30 :   if (join_table)
     218                 :   {                                             /* Join files into one */
     219               5 :     if (open_maria_files(&merge,argv,(uint) argc) ||
     220                 :         compress(&merge,join_table))
     221               0 :       error=1;
     222                 :   }
     223              50 :   else while (argc--)
     224                 :   {
     225                 :     MARIA_HA *isam_file;
     226              25 :     if (!(isam_file=open_maria_file(*argv++,O_RDWR)))
     227               0 :       error=1;
     228                 :     else
     229                 :     {
     230              25 :       merge.file= &isam_file;
     231              25 :       merge.current=0;
     232              25 :       merge.free_file=0;
     233              25 :       merge.count=1;
     234              25 :       if (compress(&merge,0))
     235               0 :         error=1;
     236                 :       else
     237              25 :         ok=1;
     238                 :     }
     239                 :   }
     240              30 :   if (ok && isamchk_neaded && !silent)
     241               0 :     puts("Remember to run maria_chk -rq on compressed tables");
     242              30 :   VOID(fflush(stdout));
     243              30 :   VOID(fflush(stderr));
     244              30 :   free_defaults(default_argv);
     245              30 :   maria_end();
     246              30 :   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
     247              30 :   exit(error ? 2 : 0);
     248                 : #ifndef _lint
     249                 :   return 0;                                     /* No compiler warning */
     250                 : #endif
     251                 : }
     252                 : 
     253                 : enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE};
     254                 : 
     255                 : static struct my_option my_long_options[] =
     256                 : {
     257                 : #ifdef __NETWARE__
     258                 :   {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
     259                 :    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
     260                 : #endif
     261                 :   {"backup", 'b', "Make a backup of the table as table_name.OLD.",
     262                 :    (uchar**) &backup, (uchar**) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
     263                 :   {"character-sets-dir", OPT_CHARSETS_DIR_MP,
     264                 :    "Directory where character sets are.", (uchar**) &charsets_dir,
     265                 :    (uchar**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
     266                 :   {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.",
     267                 :    0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
     268                 :   {"force", 'f',
     269                 :    "Force packing of table even if it gets bigger or if tempfile exists.",
     270                 :    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
     271                 :   {"join", 'j',
     272                 :    "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.",
     273                 :    (uchar**) &join_table, (uchar**) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
     274                 :    0, 0, 0},
     275                 :   {"help", '?', "Display this help and exit.",
     276                 :    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
     277                 :   {"silent", 's', "Be more silent.",
     278                 :    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
     279                 :   {"tmpdir", 'T', "Use temporary directory to store temporary table.",
     280                 :    0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
     281                 :   {"test", 't', "Don't pack table, only test packing it.",
     282                 :    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
     283                 :   {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
     284                 :    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
     285                 :   {"version", 'V', "Output version information and exit.",
     286                 :    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
     287                 :   {"wait", 'w', "Wait and retry if table is in use.", (uchar**) &opt_wait,
     288                 :    (uchar**) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
     289                 :   { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
     290                 : };
     291                 : 
     292                 : #include <help_start.h>
     293                 : 
     294                 : static void print_version(void)
     295               0 : {
     296               0 :   VOID(printf("%s Ver 1.0 for %s on %s\n",
     297                 :               my_progname, SYSTEM_TYPE, MACHINE_TYPE));
     298                 :   NETWARE_SET_SCREEN_MODE(1);
     299                 : }
     300                 : 
     301                 : 
     302                 : static void usage(void)
     303               0 : {
     304               0 :   print_version();
     305               0 :   puts("Copyright 2002-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.");
     306               0 :   puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
     307               0 :   puts("and you are welcome to modify and redistribute it under the GPL license\n");
     308                 : 
     309               0 :   puts("Pack a MARIA-table to take much less space.");
     310               0 :   puts("Keys are not updated, you must run maria_chk -rq on the index (.MAI) file");
     311               0 :   puts("afterwards to update the keys.");
     312               0 :   puts("You should give the .MAI file as the filename argument.");
     313               0 :   puts("To unpack a packed table, run maria_chk -u on the table");
     314                 : 
     315               0 :   VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname));
     316               0 :   my_print_help(my_long_options);
     317               0 :   print_defaults("my", load_default_groups);
     318               0 :   my_print_variables(my_long_options);
     319                 : }
     320                 : 
     321                 : #include <help_end.h>
     322                 : 
     323                 : static my_bool
     324                 : get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
     325                 :                char *argument)
     326              65 : {
     327                 :   uint length;
     328                 : 
     329              65 :   switch(optid) {
     330                 : #ifdef __NETWARE__
     331                 :   case OPT_AUTO_CLOSE:
     332                 :     setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
     333                 :     break;
     334                 : #endif
     335                 :   case 'f':
     336              30 :     force_pack= 1;
     337              30 :     tmpfile_createflag= O_RDWR | O_TRUNC;
     338              30 :     break;
     339                 :   case 's':
     340              30 :     write_loop= verbose= 0;
     341              30 :     silent= 1;
     342              30 :     break;
     343                 :   case 't':
     344               0 :     test_only= 1;
     345                 :     /* Avoid to reset 'verbose' if it was already set > 1. */
     346               0 :     if (! verbose)
     347               0 :       verbose= 1;
     348                 :     break;
     349                 :   case 'T':
     350               0 :     length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
     351               0 :     if (length != dirname_length(tmp_dir))
     352                 :     {
     353               0 :       tmp_dir[length]=FN_LIBCHAR;
     354               0 :       tmp_dir[length+1]=0;
     355                 :     }
     356                 :     break;
     357                 :   case 'v':
     358               0 :     verbose++; /* Allow for selecting the level of verbosity. */
     359               0 :     silent= 0;
     360               0 :     break;
     361                 :   case '#':
     362               0 :     DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace");
     363               0 :     break;
     364                 :   case 'V':
     365               0 :     print_version();
     366               0 :     exit(0);
     367                 :   case 'I':
     368                 :   case '?':
     369               0 :     usage();
     370               0 :     exit(0);
     371                 :   }
     372              65 :   return 0;
     373                 : }
     374                 : 
     375                 :         /* reads options */
     376                 :         /* Initiates DEBUG - but no debugging here ! */
     377                 : 
     378                 : static void get_options(int *argc,char ***argv)
     379              30 : {
     380                 :   int ho_error;
     381                 : 
     382              30 :   my_progname= argv[0][0];
     383              30 :   if (isatty(fileno(stdout)))
     384               0 :     write_loop=1;
     385                 : 
     386              30 :   if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
     387               0 :     exit(ho_error);
     388                 : 
     389              30 :   if (!*argc)
     390                 :   {
     391               0 :     usage();
     392               0 :     exit(1);
     393                 :   }
     394              30 :   if (join_table)
     395                 :   {
     396               5 :     backup=0;                                   /* Not needed */
     397               5 :     tmp_dir[0]=0;
     398                 :   }
     399                 :   return;
     400                 : }
     401                 : 
     402                 : 
     403                 : static MARIA_HA *open_maria_file(char *name,int mode)
     404              35 : {
     405                 :   MARIA_HA *isam_file;
     406                 :   MARIA_SHARE *share;
     407              35 :   DBUG_ENTER("open_maria_file");
     408                 : 
     409              35 :   if (!(isam_file=maria_open(name, mode, HA_OPEN_IGNORE_MOVED_STATE |
     410                 :                           (opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
     411                 :                            HA_OPEN_ABORT_IF_LOCKED))))
     412                 :   {
     413               0 :     VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno));
     414               0 :     DBUG_RETURN(0);
     415                 :   }
     416              35 :   share=isam_file->s;
     417              35 :   if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table)
     418                 :   {
     419               0 :     if (!force_pack)
     420                 :     {
     421               0 :       VOID(fprintf(stderr, "%s is already compressed\n", name));
     422               0 :       VOID(maria_close(isam_file));
     423               0 :       DBUG_RETURN(0);
     424                 :     }
     425               0 :     if (verbose)
     426               0 :       puts("Recompressing already compressed table");
     427               0 :     share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
     428                 :   }
     429              35 :   if (! force_pack && share->state.state.records != 0 &&
     430                 :       (share->state.state.records <= 1 ||
     431                 :        share->state.state.data_file_length < 1024))
     432                 :   {
     433               0 :     VOID(fprintf(stderr, "%s is too small to compress\n", name));
     434               0 :     VOID(maria_close(isam_file));
     435               0 :     DBUG_RETURN(0);
     436                 :   }
     437              35 :   VOID(maria_lock_database(isam_file,F_WRLCK));
     438              35 :   maria_ignore_trids(isam_file);
     439              35 :   DBUG_RETURN(isam_file);
     440                 : }
     441                 : 
     442                 : 
     443                 : static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count)
     444               5 : {
     445                 :   uint i,j;
     446               5 :   mrg->count=0;
     447               5 :   mrg->current=0;
     448               5 :   mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE));
     449               5 :   mrg->free_file=1;
     450               5 :   mrg->src_file_has_indexes_disabled= 0;
     451              15 :   for (i=0; i < count ; i++)
     452                 :   {
     453              10 :     if (!(mrg->file[i]=open_maria_file(names[i],O_RDONLY)))
     454              10 :       goto error;
     455                 : 
     456              10 :     mrg->src_file_has_indexes_disabled|=
     457                 :       ! maria_is_all_keys_active(mrg->file[i]->s->state.key_map,
     458                 :                               mrg->file[i]->s->base.keys);
     459                 :   }
     460                 :   /* Check that files are identical */
     461              10 :   for (j=0 ; j < count-1 ; j++)
     462                 :   {
     463                 :     MARIA_COLUMNDEF *m1,*m2,*end;
     464               5 :     if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength ||
     465                 :         mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields)
     466                 :       goto diff_file;
     467               5 :     m1=mrg->file[j]->s->columndef;
     468               5 :     end=m1+mrg->file[j]->s->base.fields;
     469               5 :     m2=mrg->file[j+1]->s->columndef;
     470              15 :     for ( ; m1 != end ; m1++,m2++)
     471                 :     {
     472              10 :       if (m1->type != m2->type || m1->length != m2->length)
     473                 :         goto diff_file;
     474                 :     }
     475                 :   }
     476               5 :   mrg->count=count;
     477               5 :   return 0;
     478                 : 
     479               0 :  diff_file:
     480               0 :   VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
     481                 :                my_progname, names[j], names[j+1]));
     482                 :  error:
     483               0 :   while (i--)
     484               0 :     maria_close(mrg->file[i]);
     485               0 :   my_free(mrg->file, MYF(0));
     486               0 :   return 1;
     487                 : }
     488                 : 
     489                 : 
     490                 : static int compress(PACK_MRG_INFO *mrg,char *result_table)
     491              30 : {
     492                 :   int error;
     493                 :   File new_file,join_maria_file;
     494                 :   MARIA_HA *isam_file;
     495                 :   MARIA_SHARE *share;
     496                 :   char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN];
     497                 :   uint i,header_length,fields,trees,used_trees;
     498                 :   my_off_t old_length,new_length,tot_elements;
     499                 :   HUFF_COUNTS *huff_counts;
     500                 :   HUFF_TREE *huff_trees;
     501              30 :   DBUG_ENTER("compress");
     502                 : 
     503              30 :   isam_file=mrg->file[0];                    /* Take this as an example */
     504              30 :   share=isam_file->s;
     505              30 :   new_file=join_maria_file= -1;
     506              30 :   trees=fields=0;
     507              30 :   huff_trees=0;
     508              30 :   huff_counts=0;
     509              30 :   maria_block_size= isam_file->s->block_size;
     510                 : 
     511                 :   /* Create temporary or join file */
     512              30 :   if (backup)
     513               0 :     VOID(fn_format(org_name,isam_file->s->open_file_name.str,
     514                 :                    "",MARIA_NAME_DEXT, 2));
     515                 :   else
     516              30 :     VOID(fn_format(org_name,isam_file->s->open_file_name.str,
     517                 :                    "",MARIA_NAME_DEXT, 2+4+16));
     518                 : 
     519              30 :   if (init_pagecache(maria_pagecache, MARIA_MIN_PAGE_CACHE_SIZE, 0, 0,
     520                 :                      maria_block_size, MY_WME) == 0)
     521                 :   {
     522               0 :     fprintf(stderr, "Can't initialize page cache\n");
     523               0 :     goto err;
     524                 :   }
     525                 : 
     526              35 :   if (!test_only && result_table)
     527                 :   {
     528                 :     /* Make a new indexfile based on first file in list */
     529                 :     uint length;
     530                 :     uchar *buff;
     531               5 :     strmov(org_name,result_table);              /* Fix error messages */
     532               5 :     VOID(fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2));
     533               5 :     if ((join_maria_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME)))
     534                 :         < 0)
     535               5 :       goto err;
     536               5 :     length=(uint) share->base.keystart;
     537               5 :     if (!(buff= (uchar*) my_malloc(length,MYF(MY_WME))))
     538               5 :       goto err;
     539               5 :     if (my_pread(share->kfile.file, buff, length, 0L, MYF(MY_WME | MY_NABP)) ||
     540                 :         my_write(join_maria_file,buff,length,
     541                 :                  MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
     542                 :     {
     543               0 :       my_free(buff,MYF(0));
     544               0 :       goto err;
     545                 :     }
     546               5 :     my_free(buff,MYF(0));
     547               5 :     VOID(fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2));
     548                 :   }
     549              25 :   else if (!tmp_dir[0])
     550              25 :     VOID(make_new_name(new_name,org_name));
     551                 :   else
     552               0 :     VOID(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4));
     553              30 :   if (!test_only &&
     554                 :       (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0)
     555              30 :     goto err;
     556                 : 
     557                 :   /* Start calculating statistics */
     558                 : 
     559              30 :   mrg->records=0;
     560              65 :   for (i=0 ; i < mrg->count ; i++)
     561              35 :     mrg->records+=mrg->file[i]->s->state.state.records;
     562                 : 
     563              30 :   DBUG_PRINT("info", ("Compressing %s: (%lu records)",
     564                 :                       result_table ? new_name : org_name,
     565                 :                       (ulong) mrg->records));
     566              30 :   if (write_loop || verbose)
     567                 :   {
     568               0 :     VOID(printf("Compressing %s: (%lu records)\n",
     569                 :                 result_table ? new_name : org_name, (ulong) mrg->records));
     570                 :   }
     571              30 :   trees=fields=share->base.fields;
     572              30 :   huff_counts=init_huff_count(isam_file,mrg->records);
     573              30 :   QUICK_SAFEMALLOC;
     574                 : 
     575                 :   /*
     576                 :     Read the whole data file(s) for statistics.
     577                 :   */
     578              30 :   DBUG_PRINT("info", ("- Calculating statistics"));
     579              30 :   if (write_loop || verbose)
     580               0 :     VOID(printf("- Calculating statistics\n"));
     581              30 :   if (get_statistic(mrg,huff_counts))
     582              30 :     goto err;
     583              30 :   NORMAL_SAFEMALLOC;
     584              30 :   old_length=0;
     585              65 :   for (i=0; i < mrg->count ; i++)
     586              35 :     old_length+= (mrg->file[i]->s->state.state.data_file_length -
     587                 :                   mrg->file[i]->s->state.state.empty);
     588                 : 
     589                 :   /*
     590                 :     Create a global priority queue in preparation for making
     591                 :     temporary Huffman trees.
     592                 :   */
     593              30 :   if (init_queue(&queue,256,0,0,compare_huff_elements,0))
     594              30 :     goto err;
     595                 : 
     596                 :   /*
     597                 :     Check each column if we should use pre-space-compress, end-space-
     598                 :     compress, empty-field-compress or zero-field-compress.
     599                 :   */
     600              30 :   check_counts(huff_counts,fields,mrg->records);
     601                 : 
     602                 :   /*
     603                 :     Build a Huffman tree for each column.
     604                 :   */
     605              30 :   huff_trees=make_huff_trees(huff_counts,trees);
     606                 : 
     607                 :   /*
     608                 :     If the packed lengths of combined columns is less then the sum of
     609                 :     the non-combined columns, then create common Huffman trees for them.
     610                 :     We do this only for uchar compressed columns, not for distinct values
     611                 :     compressed columns.
     612                 :   */
     613              30 :   if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
     614              30 :     goto err;
     615                 : 
     616                 :   /*
     617                 :     Assign codes to all uchar or column values.
     618                 :   */
     619              30 :   if (make_huff_decode_table(huff_trees,fields))
     620              30 :     goto err;
     621                 : 
     622                 :   /* Prepare a file buffer. */
     623              30 :   init_file_buffer(new_file,0);
     624                 : 
     625                 :   /*
     626                 :     Reserve space in the target file for the fixed compressed file header.
     627                 :   */
     628              30 :   file_buffer.pos_in_file=HEAD_LENGTH;
     629              30 :   if (! test_only)
     630              30 :     VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0)));
     631                 : 
     632                 :   /*
     633                 :     Write field infos: field type, pack type, length bits, tree number.
     634                 :   */
     635              30 :   write_field_info(huff_counts,fields,used_trees);
     636                 : 
     637                 :   /*
     638                 :     Write decode trees.
     639                 :   */
     640              30 :   if (!(tot_elements=write_huff_tree(huff_trees,trees)))
     641              30 :     goto err;
     642                 : 
     643                 :   /*
     644                 :     Calculate the total length of the compression info header.
     645                 :     This includes the fixed compressed file header, the column compression
     646                 :     type descriptions, and the decode trees.
     647                 :   */
     648              30 :   header_length=(uint) file_buffer.pos_in_file+
     649                 :     (uint) (file_buffer.pos-file_buffer.buffer);
     650                 : 
     651                 :   /*
     652                 :     Compress the source file into the target file.
     653                 :   */
     654              30 :   DBUG_PRINT("info", ("- Compressing file"));
     655              30 :   if (write_loop || verbose)
     656               0 :     VOID(printf("- Compressing file\n"));
     657              30 :   error=compress_maria_file(mrg,huff_counts);
     658              30 :   new_length=file_buffer.pos_in_file;
     659              30 :   if (!error && !test_only)
     660                 :   {
     661                 :     uchar buff[MEMMAP_EXTRA_MARGIN];            /* End marginal for memmap */
     662              30 :     bzero(buff,sizeof(buff));
     663              30 :     error=my_write(file_buffer.file,buff,sizeof(buff),
     664                 :                    MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
     665                 :   }
     666                 : 
     667                 :   /*
     668                 :     Write the fixed compressed file header.
     669                 :   */
     670              30 :   if (!error)
     671              30 :     error=write_header(mrg,header_length,used_trees,tot_elements,
     672                 :                        new_length);
     673                 : 
     674                 :   /* Flush the file buffer. */
     675              30 :   end_file_buffer();
     676                 : 
     677                 :   /* Display statistics. */
     678              30 :   DBUG_PRINT("info", ("Min record length: %6d  Max length: %6d  "
     679                 :                       "Mean total length: %6ld",
     680                 :                       mrg->min_pack_length, mrg->max_pack_length,
     681                 :                       (ulong) (mrg->records ? (new_length/mrg->records) : 0)));
     682              30 :   if (verbose && mrg->records)
     683               0 :     VOID(printf("Min record length: %6d   Max length: %6d   "
     684                 :                 "Mean total length: %6ld\n", mrg->min_pack_length,
     685                 :                 mrg->max_pack_length, (ulong) (new_length/mrg->records)));
     686                 : 
     687                 :   /* Close source and target file. */
     688              30 :   if (!test_only)
     689                 :   {
     690              30 :     error|=my_close(new_file,MYF(MY_WME));
     691              30 :     if (!result_table)
     692                 :     {
     693              25 :       error|=my_close(isam_file->dfile.file, MYF(MY_WME));
     694              25 :       isam_file->dfile.file= -1;     /* Tell maria_close file is closed */
     695              25 :       isam_file->s->bitmap.file.file= -1;
     696                 :     }
     697                 :   }
     698                 : 
     699                 :   /* Cleanup. */
     700              30 :   free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
     701              30 :   if (! test_only && ! error)
     702                 :   {
     703              30 :     if (result_table)
     704                 :     {
     705               5 :       error=save_state_mrg(join_maria_file,mrg,new_length,glob_crc);
     706                 :     }
     707                 :     else
     708                 :     {
     709              25 :       if (backup)
     710                 :       {
     711               0 :         if (my_rename(org_name,make_old_name(temp_name,
     712                 :                                              isam_file->s->open_file_name.str),
     713                 :                       MYF(MY_WME)))
     714               0 :           error=1;
     715                 :         else
     716                 :         {
     717               0 :           if (tmp_dir[0])
     718               0 :             error=my_copy(new_name,org_name,MYF(MY_WME));
     719                 :           else
     720               0 :             error=my_rename(new_name,org_name,MYF(MY_WME));
     721               0 :           if (!error)
     722                 :           {
     723               0 :             VOID(my_copystat(temp_name,org_name,MYF(MY_COPYTIME)));
     724               0 :             if (tmp_dir[0])
     725               0 :               VOID(my_delete(new_name,MYF(MY_WME)));
     726                 :           }
     727                 :         }
     728                 :       }
     729                 :       else
     730                 :       {
     731              25 :         if (tmp_dir[0])
     732                 :         {
     733               0 :           error=my_copy(new_name,org_name,
     734                 :                         MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME));
     735               0 :           if (!error)
     736               0 :             VOID(my_delete(new_name,MYF(MY_WME)));
     737                 :         }
     738                 :         else
     739              25 :           error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME));
     740                 :       }
     741              25 :       if (! error)
     742              25 :         error=save_state(isam_file,mrg,new_length,glob_crc);
     743                 :     }
     744                 :   }
     745              30 :   error|=mrg_close(mrg);
     746              30 :   if (join_maria_file >= 0)
     747               5 :     error|=my_close(join_maria_file,MYF(MY_WME));
     748              30 :   if (error)
     749                 :   {
     750               0 :     VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name));
     751               0 :     VOID(my_delete(new_name,MYF(MY_WME)));
     752               0 :     DBUG_RETURN(-1);
     753                 :   }
     754              30 :   if (write_loop || verbose)
     755                 :   {
     756               0 :     if (old_length)
     757               0 :       VOID(printf("%.4g%%     \n",
     758                 :                   (((longlong) (old_length - new_length)) * 100.0 /
     759                 :                    (longlong) old_length)));
     760                 :     else
     761               0 :       puts("Empty file saved in compressed format");
     762                 :   }
     763              30 :   DBUG_RETURN(0);
     764                 : 
     765               0 :  err:
     766               0 :   end_pagecache(maria_pagecache, 1);
     767               0 :   free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
     768               0 :   if (new_file >= 0)
     769               0 :     VOID(my_close(new_file,MYF(0)));
     770               0 :   if (join_maria_file >= 0)
     771               0 :     VOID(my_close(join_maria_file,MYF(0)));
     772               0 :   mrg_close(mrg);
     773               0 :   VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name));
     774               0 :   DBUG_RETURN(-1);
     775                 : }
     776                 : 
     777                 :         /* Init a huff_count-struct for each field and init it */
     778                 : 
     779                 : static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records)
     780              30 : {
     781                 :   reg2 uint i;
     782                 :   reg1 HUFF_COUNTS *count;
     783              30 :   if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields*
     784                 :                                         sizeof(HUFF_COUNTS),
     785                 :                                         MYF(MY_ZEROFILL | MY_WME))))
     786                 :   {
     787              90 :     for (i=0 ; i < info->s->base.fields ; i++)
     788                 :     {
     789                 :       enum en_fieldtype type;
     790              60 :       count[i].field_length=info->s->columndef[i].length;
     791              60 :       type= count[i].field_type= (enum en_fieldtype) info->s->columndef[i].type;
     792              60 :       if (type == FIELD_INTERVALL ||
     793                 :           type == FIELD_CONSTANT ||
     794                 :           type == FIELD_ZERO)
     795               0 :         type = FIELD_NORMAL;
     796              60 :       if (count[i].field_length <= 8 &&
     797                 :           (type == FIELD_NORMAL ||
     798                 :            type == FIELD_SKIP_ZERO))
     799              27 :         count[i].max_zero_fill= count[i].field_length;
     800                 :       /*
     801                 :         For every column initialize a tree, which is used to detect distinct
     802                 :         column values. 'int_tree' works together with 'tree_buff' and
     803                 :         'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
     804                 :         This is accomplished by '-1' as the element size.
     805                 :       */
     806              60 :       init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL,
     807                 :                 NULL);
     808              60 :       if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
     809              60 :         count[i].tree_pos=count[i].tree_buff =
     810                 :           my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
     811                 :                     MYF(MY_WME));
     812                 :     }
     813                 :   }
     814              30 :   return count;
     815                 : }
     816                 : 
     817                 : 
     818                 :         /* Free memory used by counts and trees */
     819                 : 
     820                 : static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees,
     821                 :                                            HUFF_COUNTS *huff_counts,
     822                 :                                            uint fields)
     823              30 : {
     824                 :   register uint i;
     825                 : 
     826              30 :   if (huff_trees)
     827                 :   {
     828              90 :     for (i=0 ; i < trees ; i++)
     829                 :     {
     830              60 :       if (huff_trees[i].element_buffer)
     831              30 :         my_free(huff_trees[i].element_buffer,MYF(0));
     832              60 :       if (huff_trees[i].code)
     833              30 :         my_free(huff_trees[i].code,MYF(0));
     834                 :     }
     835              30 :     my_free(huff_trees,MYF(0));
     836                 :   }
     837              30 :   if (huff_counts)
     838                 :   {
     839              90 :     for (i=0 ; i < fields ; i++)
     840                 :     {
     841              60 :       if (huff_counts[i].tree_buff)
     842                 :       {
     843               0 :         my_free(huff_counts[i].tree_buff,MYF(0));
     844               0 :         delete_tree(&huff_counts[i].int_tree);
     845                 :       }
     846                 :     }
     847              30 :     my_free(huff_counts, MYF(0));
     848                 :   }
     849              30 :   delete_queue(&queue);             /* This is safe to free */
     850                 :   return;
     851                 : }
     852                 : 
     853                 :         /* Read through old file and gather some statistics */
     854                 : 
     855                 : static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
     856              30 : {
     857                 :   int error;
     858                 :   uint length, null_bytes;
     859                 :   ulong reclength,max_blob_length;
     860                 :   uchar *record,*pos,*next_pos,*end_pos,*start_pos;
     861                 :   ha_rows record_count;
     862                 :   HUFF_COUNTS *count,*end_count;
     863                 :   TREE_ELEMENT *element;
     864                 :   ha_checksum(*calc_checksum)(MARIA_HA *, const uchar *);
     865              30 :   DBUG_ENTER("get_statistic");
     866                 : 
     867              30 :   reclength=  mrg->file[0]->s->base.reclength;
     868              30 :   null_bytes= mrg->file[0]->s->base.null_bytes;
     869              30 :   record=(uchar*) my_alloca(reclength);
     870              30 :   end_count=huff_counts+mrg->file[0]->s->base.fields;
     871              30 :   record_count=0; glob_crc=0;
     872              30 :   max_blob_length=0;
     873                 : 
     874                 :   /* Check how to calculate checksum */
     875              30 :   if (mrg->file[0]->s->data_file_type == STATIC_RECORD)
     876               9 :     calc_checksum= _ma_static_checksum;
     877                 :   else
     878              21 :     calc_checksum= _ma_checksum;
     879                 : 
     880              30 :   mrg_reset(mrg);
     881             645 :   while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
     882                 :   {
     883             585 :     ulong tot_blob_length=0;
     884             585 :     if (! error)
     885                 :     {
     886                 :       /* glob_crc is a checksum over all bytes of all records. */
     887             525 :       glob_crc+= (*calc_checksum)(mrg->file[0],record);
     888                 : 
     889                 :       /* Count the incidence of values separately for every column. */
     890             525 :       for (pos=record + null_bytes, count=huff_counts ;
     891            2100 :            count < end_count ;
     892                 :            count++,
     893            1050 :            pos=next_pos)
     894                 :       {
     895            1050 :         next_pos=end_pos=(start_pos=pos)+count->field_length;
     896                 : 
     897                 :         /*
     898                 :           Put the whole column value in a tree if there is room for it.
     899                 :           'int_tree' is used to quickly check for duplicate values.
     900                 :           'tree_buff' collects as many distinct column values as
     901                 :           possible. If the field length is > 1, it is tree_buff_length,
     902                 :           else 2 bytes. Each value is 'field_length' bytes big. If there
     903                 :           are more distinct column values than fit into the buffer, we
     904                 :           give up with this tree. BLOBs and VARCHARs do not have a
     905                 :           tree_buff as it can only be used with fixed length columns.
     906                 :           For the special case of field length == 1, we handle only the
     907                 :           case that there is only one distinct value in the table(s).
     908                 :           Otherwise, we can have a maximum of 256 distinct values. This
     909                 :           is then handled by the normal Huffman tree build.
     910                 : 
     911                 :           Another limit for collecting distinct column values is the
     912                 :           number of values itself. Since we would need to build a
     913                 :           Huffman tree for the values, we are limited by the 'IS_OFFSET'
     914                 :           constant. This constant expresses a bit which is used to
     915                 :           determine if a tree element holds a final value or an offset
     916                 :           to a child element. Hence, all values and offsets need to be
     917                 :           smaller than 'IS_OFFSET'. A tree element is implemented with
     918                 :           two integer values, one for the left branch and one for the
     919                 :           right branch. For the extreme case that the first element
     920                 :           points to the last element, the number of integers in the tree
     921                 :           must be less or equal to IS_OFFSET. So the number of elements
     922                 :           must be less or equal to IS_OFFSET / 2.
     923                 : 
     924                 :           WARNING: At first, we insert a pointer into the record buffer
     925                 :           as the key for the tree. If we got a new distinct value, which
     926                 :           is really inserted into the tree, instead of being counted
     927                 :           only, we will copy the column value from the record buffer to
     928                 :           'tree_buff' and adjust the key pointer of the tree accordingly.
     929                 :         */
     930            1050 :         if (count->tree_buff)
     931                 :         {
     932            1050 :           global_count=count;
     933            1050 :           if (!(element=tree_insert(&count->int_tree,pos, 0,
     934                 :                                     count->int_tree.custom_arg)) ||
     935                 :               (element->count == 1 &&
     936                 :                (count->tree_buff + tree_buff_length <
     937                 :                 count->tree_pos + count->field_length)) ||
     938                 :               (count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
     939                 :               (count->field_length == 1 &&
     940                 :                count->int_tree.elements_in_tree > 1))
     941                 :           {
     942               0 :             delete_tree(&count->int_tree);
     943               0 :             my_free(count->tree_buff,MYF(0));
     944               0 :             count->tree_buff=0;
     945                 :           }
     946                 :           else
     947                 :           {
     948                 :             /*
     949                 :               If tree_insert() succeeds, it either creates a new element
     950                 :               or increments the counter of an existing element.
     951                 :             */
     952            1050 :             if (element->count == 1)
     953                 :             {
     954                 :               /* Copy the new column value into 'tree_buff'. */
     955             855 :               memcpy(count->tree_pos,pos,(size_t) count->field_length);
     956                 :               /* Adjust the key pointer in the tree. */
     957             855 :               tree_set_pointer(element,count->tree_pos);
     958                 :               /* Point behind the last column value so far. */
     959             855 :               count->tree_pos+=count->field_length;
     960                 :             }
     961                 :           }
     962                 :         }
     963                 : 
     964                 :         /* Save character counters and space-counts and zero-field-counts */
     965            1050 :         if (count->field_type == FIELD_NORMAL ||
     966                 :             count->field_type == FIELD_SKIP_ENDSPACE)
     967                 :         {
     968                 :           /* Ignore trailing space. */
     969               0 :           for ( ; end_pos > pos ; end_pos--)
     970             990 :             if (end_pos[-1] != ' ')
     971               0 :               break;
     972                 :           /* Empty fields are just counted. Go to the next record. */
     973             990 :           if (end_pos == pos)
     974                 :           {
     975               0 :             count->empty_fields++;
     976               0 :             count->max_zero_fill=0;
     977               0 :             continue;
     978                 :           }
     979                 :           /*
     980                 :             Count the total of all trailing spaces and the number of
     981                 :             short trailing spaces. Remember the longest trailing space.
     982                 :           */
     983             990 :           length= (uint) (next_pos-end_pos);
     984             990 :           count->tot_end_space+=length;
     985             990 :           if (length < 8)
     986             990 :             count->end_space[length]++;
     987             990 :           if (count->max_end_space < length)
     988               0 :             count->max_end_space = length;
     989                 :         }
     990                 : 
     991            1050 :         if (count->field_type == FIELD_NORMAL ||
     992                 :             count->field_type == FIELD_SKIP_PRESPACE)
     993                 :         {
     994                 :           /* Ignore leading space. */
     995            3135 :           for (pos=start_pos; pos < end_pos ; pos++)
     996            3135 :             if (pos[0] != ' ')
     997            2325 :               break;
     998                 :           /* Empty fields are just counted. Go to the next record. */
     999             810 :           if (end_pos == pos)
    1000                 :           {
    1001               0 :             count->empty_fields++;
    1002               0 :             count->max_zero_fill=0;
    1003               0 :             continue;
    1004                 :           }
    1005                 :           /*
    1006                 :             Count the total of all leading spaces and the number of
    1007                 :             short leading spaces. Remember the longest leading space.
    1008                 :           */
    1009             810 :           length= (uint) (pos-start_pos);
    1010             810 :           count->tot_pre_space+=length;
    1011             810 :           if (length < 8)
    1012             810 :             count->pre_space[length]++;
    1013             810 :           if (count->max_pre_space < length)
    1014              55 :             count->max_pre_space = length;
    1015                 :         }
    1016                 : 
    1017                 :         /* Calculate pos, end_pos, and max_length for variable length fields. */
    1018            1050 :         if (count->field_type == FIELD_BLOB)
    1019                 :         {
    1020               0 :           uint field_length=count->field_length -portable_sizeof_char_ptr;
    1021               0 :           ulong blob_length= _ma_calc_blob_length(field_length, start_pos);
    1022               0 :           memcpy_fixed((char*) &pos,  start_pos+field_length,sizeof(char*));
    1023               0 :           end_pos=pos+blob_length;
    1024               0 :           tot_blob_length+=blob_length;
    1025               0 :           set_if_bigger(count->max_length,blob_length);
    1026                 :         }
    1027            1050 :         else if (count->field_type == FIELD_VARCHAR)
    1028                 :         {
    1029               0 :           uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
    1030               0 :           length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
    1031                 :                    uint2korr(start_pos));
    1032               0 :           pos= start_pos+pack_length;
    1033               0 :           end_pos= pos+length;
    1034               0 :           set_if_bigger(count->max_length,length);
    1035                 :         }
    1036                 : 
    1037                 :         /* Evaluate 'max_zero_fill' for short fields. */
    1038            1050 :         if (count->field_length <= 8 &&
    1039                 :             (count->field_type == FIELD_NORMAL ||
    1040                 :              count->field_type == FIELD_SKIP_ZERO))
    1041                 :         {
    1042                 :           uint i;
    1043                 :           /* Zero fields are just counted. Go to the next record. */
    1044             465 :           if (!memcmp(start_pos, zero_string, count->field_length))
    1045                 :           {
    1046               0 :             count->zero_fields++;
    1047               0 :             continue;
    1048                 :           }
    1049                 :           /*
    1050                 :             max_zero_fill starts with field_length. It is decreased every
    1051                 :             time a shorter "zero trailer" is found. It is set to zero when
    1052                 :             an empty field is found (see above). This suggests that the
    1053                 :             variable should be called 'min_zero_fill'.
    1054                 :           */
    1055             930 :           for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
    1056               0 :                i++) ;
    1057             465 :           if (i < count->max_zero_fill)
    1058              27 :             count->max_zero_fill=i;
    1059                 :         }
    1060                 : 
    1061                 :         /* Ignore zero fields and check fields. */
    1062            1050 :         if (count->field_type == FIELD_ZERO ||
    1063                 :             count->field_type == FIELD_CHECK)
    1064                 :           continue;
    1065                 : 
    1066                 :         /*
    1067                 :           Count the incidence of every uchar value in the
    1068                 :           significant field value.
    1069                 :         */
    1070           13425 :         for ( ; pos < end_pos ; pos++)
    1071           13425 :           count->counts[(uchar) *pos]++;
    1072                 : 
    1073                 :         /* Step to next field. */
    1074                 :       }
    1075                 : 
    1076             525 :       if (tot_blob_length > max_blob_length)
    1077               0 :         max_blob_length=tot_blob_length;
    1078             525 :       record_count++;
    1079             525 :       if (write_loop && record_count % WRITE_COUNT == 0)
    1080                 :       {
    1081               0 :         VOID(printf("%lu\r", (ulong) record_count));
    1082               0 :         VOID(fflush(stdout));
    1083                 :       }
    1084                 :     }
    1085              60 :     else if (error != HA_ERR_RECORD_DELETED)
    1086                 :     {
    1087               0 :       VOID(fprintf(stderr, "Got error %d while reading rows\n", error));
    1088               0 :       break;
    1089                 :     }
    1090                 : 
    1091                 :     /* Step to next record. */
    1092                 :   }
    1093              30 :   if (write_loop)
    1094                 :   {
    1095               0 :     VOID(printf("            \r"));
    1096               0 :     VOID(fflush(stdout));
    1097                 :   }
    1098                 : 
    1099                 :   /*
    1100                 :     If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
    1101                 :     codes.
    1102                 :   */
    1103              30 :   DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
    1104                 : 
    1105              30 :   DBUG_PRINT("info", ("Found the following number of incidents "
    1106                 :                       "of the uchar codes:"));
    1107              30 :   if (verbose >= 2)
    1108               0 :     VOID(printf("Found the following number of incidents "
    1109                 :                 "of the uchar codes:\n"));
    1110              90 :   for (count= huff_counts ; count < end_count; count++)
    1111                 :   {
    1112                 :     uint      idx;
    1113                 :     my_off_t  total_count;
    1114                 :     char      llbuf[32];
    1115                 : 
    1116              60 :     DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1)));
    1117              60 :     if (verbose >= 2)
    1118               0 :       VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1)));
    1119              60 :     if (count->tree_buff)
    1120                 :     {
    1121              60 :       DBUG_PRINT("info", ("number of distinct values: %u",
    1122                 :                           (uint) ((count->tree_pos - count->tree_buff) /
    1123                 :                                   count->field_length)));
    1124              60 :       if (verbose >= 2)
    1125               0 :         VOID(printf("number of distinct values: %u\n",
    1126                 :                     (uint) ((count->tree_pos - count->tree_buff) /
    1127                 :                             count->field_length)));
    1128                 :     }
    1129              60 :     total_count= 0;
    1130           15420 :     for (idx= 0; idx < 256; idx++)
    1131                 :     {
    1132           15360 :       if (count->counts[idx])
    1133                 :       {
    1134             600 :         total_count+= count->counts[idx];
    1135             600 :         DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
    1136                 :                             llstr((longlong) count->counts[idx], llbuf)));
    1137             600 :         if (verbose >= 2)
    1138               0 :           VOID(printf("counts[0x%02x]: %12s\n", idx,
    1139                 :                       llstr((longlong) count->counts[idx], llbuf)));
    1140                 :       }
    1141                 :     }
    1142              60 :     DBUG_PRINT("info", ("total:        %12s", llstr((longlong) total_count,
    1143                 :                                                     llbuf)));
    1144              60 :     if ((verbose >= 2) && total_count)
    1145                 :     {
    1146               0 :       VOID(printf("total:        %12s\n",
    1147                 :                   llstr((longlong) total_count, llbuf)));
    1148                 :     }
    1149                 :   }
    1150                 : 
    1151              30 :   mrg->records=record_count;
    1152              30 :   mrg->max_blob_length=max_blob_length;
    1153                 :   my_afree(record);
    1154              30 :   DBUG_RETURN(error != HA_ERR_END_OF_FILE);
    1155                 : }
    1156                 : 
    1157                 : static int compare_huff_elements(void *not_used __attribute__((unused)),
    1158                 :                                  uchar *a, uchar *b)
    1159           23005 : {
    1160           23005 :   return *((my_off_t*) a) < *((my_off_t*) b) ? -1 :
    1161                 :     (*((my_off_t*) a) == *((my_off_t*) b)  ? 0 : 1);
    1162                 : }
    1163                 : 
    1164                 :         /* Check each tree if we should use pre-space-compress, end-space-
    1165                 :            compress, empty-field-compress or zero-field-compress */
    1166                 : 
    1167                 : static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
    1168                 :                          my_off_t records)
    1169              30 : {
    1170                 :   uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count];
    1171                 :   my_off_t old_length,new_length,length;
    1172              30 :   DBUG_ENTER("check_counts");
    1173                 : 
    1174              30 :   bzero((uchar*) field_count,sizeof(field_count));
    1175              30 :   space_fields=fill_zero_fields=0;
    1176                 : 
    1177              90 :   for (; trees-- ; huff_counts++)
    1178                 :   {
    1179              60 :     if (huff_counts->field_type == FIELD_BLOB)
    1180                 :     {
    1181               0 :       huff_counts->length_bits=max_bit(huff_counts->max_length);
    1182               0 :       goto found_pack;
    1183                 :     }
    1184              60 :     else if (huff_counts->field_type == FIELD_VARCHAR)
    1185                 :     {
    1186               0 :       huff_counts->length_bits=max_bit(huff_counts->max_length);
    1187               0 :       goto found_pack;
    1188                 :     }
    1189              60 :     else if (huff_counts->field_type == FIELD_CHECK)
    1190                 :     {
    1191               0 :       huff_counts->bytes_packed=0;
    1192               0 :       huff_counts->counts[0]=0;
    1193               0 :       goto found_pack;
    1194                 :     }
    1195                 : 
    1196              60 :     huff_counts->field_type=FIELD_NORMAL;
    1197              60 :     huff_counts->pack_type=0;
    1198                 : 
    1199                 :     /* Check for zero-filled records (in this column), or zero records. */
    1200              60 :     if (huff_counts->zero_fields || ! records)
    1201                 :     {
    1202                 :       my_off_t old_space_count;
    1203                 :       /*
    1204                 :         If there are only zero filled records (in this column),
    1205                 :         or no records at all, we are done.
    1206                 :       */
    1207               0 :       if (huff_counts->zero_fields == records)
    1208                 :       {
    1209               0 :         huff_counts->field_type= FIELD_ZERO;
    1210               0 :         huff_counts->bytes_packed=0;
    1211               0 :         huff_counts->counts[0]=0;
    1212               0 :         goto found_pack;
    1213                 :       }
    1214                 :       /* Remeber the number of significant spaces. */
    1215               0 :       old_space_count=huff_counts->counts[' '];
    1216                 :       /* Add all leading and trailing spaces. */
    1217               0 :       huff_counts->counts[' ']+= (huff_counts->tot_end_space +
    1218                 :                                   huff_counts->tot_pre_space +
    1219                 :                                   huff_counts->empty_fields *
    1220                 :                                   huff_counts->field_length);
    1221                 :       /* Check, what the compressed length of this would be. */
    1222               0 :       old_length=calc_packed_length(huff_counts,0)+records/8;
    1223                 :       /* Get the number of zero bytes. */
    1224               0 :       length=huff_counts->zero_fields*huff_counts->field_length;
    1225                 :       /* Add it to the counts. */
    1226               0 :       huff_counts->counts[0]+=length;
    1227                 :       /* Check, what the compressed length of this would be. */
    1228               0 :       new_length=calc_packed_length(huff_counts,0);
    1229                 :       /* If the compression without the zeroes would be shorter, we are done. */
    1230               0 :       if (old_length < new_length && huff_counts->field_length > 1)
    1231                 :       {
    1232               0 :         huff_counts->field_type=FIELD_SKIP_ZERO;
    1233               0 :         huff_counts->counts[0]-=length;
    1234               0 :         huff_counts->bytes_packed=old_length- records/8;
    1235               0 :         goto found_pack;
    1236                 :       }
    1237                 :       /* Remove the insignificant spaces, but keep the zeroes. */
    1238               0 :       huff_counts->counts[' ']=old_space_count;
    1239                 :     }
    1240                 :     /* Check, what the compressed length of this column would be. */
    1241              60 :     huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
    1242                 : 
    1243                 :     /*
    1244                 :       If there are enough empty records (in this column),
    1245                 :       treating them specially may pay off.
    1246                 :     */
    1247              60 :     if (huff_counts->empty_fields)
    1248                 :     {
    1249               0 :       if (huff_counts->field_length > 2 &&
    1250                 :           huff_counts->empty_fields + (records - huff_counts->empty_fields)*
    1251                 :           (1+max_bit(max(huff_counts->max_pre_space,
    1252                 :                          huff_counts->max_end_space))) <
    1253                 :           records * max_bit(huff_counts->field_length))
    1254                 :       {
    1255               0 :         huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS;
    1256                 :       }
    1257                 :       else
    1258                 :       {
    1259               0 :         length=huff_counts->empty_fields*huff_counts->field_length;
    1260               0 :         if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space)
    1261                 :         {
    1262               0 :           huff_counts->tot_end_space+=length;
    1263               0 :           huff_counts->max_end_space=huff_counts->field_length;
    1264               0 :           if (huff_counts->field_length < 8)
    1265               0 :             huff_counts->end_space[huff_counts->field_length]+=
    1266                 :               huff_counts->empty_fields;
    1267                 :         }
    1268               0 :         if (huff_counts->tot_pre_space)
    1269                 :         {
    1270               0 :           huff_counts->tot_pre_space+=length;
    1271               0 :           huff_counts->max_pre_space=huff_counts->field_length;
    1272               0 :           if (huff_counts->field_length < 8)
    1273               0 :             huff_counts->pre_space[huff_counts->field_length]+=
    1274                 :               huff_counts->empty_fields;
    1275                 :         }
    1276                 :       }
    1277                 :     }
    1278                 : 
    1279                 :     /*
    1280                 :       If there are enough trailing spaces (in this column),
    1281                 :       treating them specially may pay off.
    1282                 :     */
    1283              60 :     if (huff_counts->tot_end_space)
    1284                 :     {
    1285               0 :       huff_counts->counts[' ']+=huff_counts->tot_pre_space;
    1286               0 :       if (test_space_compress(huff_counts,records,huff_counts->max_end_space,
    1287                 :                               huff_counts->end_space,
    1288                 :                               huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE))
    1289               0 :         goto found_pack;
    1290               0 :       huff_counts->counts[' ']-=huff_counts->tot_pre_space;
    1291                 :     }
    1292                 : 
    1293                 :     /*
    1294                 :       If there are enough leading spaces (in this column),
    1295                 :       treating them specially may pay off.
    1296                 :     */
    1297              60 :     if (huff_counts->tot_pre_space)
    1298                 :     {
    1299              30 :       if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
    1300                 :                               huff_counts->pre_space,
    1301                 :                               huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE))
    1302                 :         goto found_pack;
    1303                 :     }
    1304                 : 
    1305              60 :   found_pack:                   /* Found field-packing */
    1306                 : 
    1307                 :     /* Test if we can use zero-fill */
    1308                 : 
    1309              60 :     if (huff_counts->max_zero_fill &&
    1310                 :         (huff_counts->field_type == FIELD_NORMAL ||
    1311                 :          huff_counts->field_type == FIELD_SKIP_ZERO))
    1312                 :     {
    1313               0 :       huff_counts->counts[0]-=huff_counts->max_zero_fill*
    1314                 :         (huff_counts->field_type == FIELD_SKIP_ZERO ?
    1315                 :          records - huff_counts->zero_fields : records);
    1316               0 :       huff_counts->pack_type|=PACK_TYPE_ZERO_FILL;
    1317               0 :       huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
    1318                 :     }
    1319                 : 
    1320                 :     /* Test if intervall-field is better */
    1321                 : 
    1322              60 :     if (huff_counts->tree_buff)
    1323                 :     {
    1324                 :       HUFF_TREE tree;
    1325                 : 
    1326              60 :       DBUG_EXECUTE_IF("forceintervall",
    1327                 :                       huff_counts->bytes_packed= ~ (my_off_t) 0;);
    1328              60 :       tree.element_buffer=0;
    1329              60 :       if (!make_huff_tree(&tree,huff_counts) &&
    1330                 :           tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
    1331                 :       {
    1332               0 :         if (tree.elements == 1)
    1333               0 :           huff_counts->field_type=FIELD_CONSTANT;
    1334                 :         else
    1335               0 :           huff_counts->field_type=FIELD_INTERVALL;
    1336               0 :         huff_counts->pack_type=0;
    1337                 :       }
    1338                 :       else
    1339                 :       {
    1340              60 :         my_free(huff_counts->tree_buff,MYF(0));
    1341              60 :         delete_tree(&huff_counts->int_tree);
    1342              60 :         huff_counts->tree_buff=0;
    1343                 :       }
    1344              60 :       if (tree.element_buffer)
    1345              60 :         my_free(tree.element_buffer,MYF(0));
    1346                 :     }
    1347              60 :     if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS)
    1348               0 :       space_fields++;
    1349              60 :     if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL)
    1350               0 :       fill_zero_fields++;
    1351              60 :     field_count[huff_counts->field_type]++;
    1352                 :   }
    1353              30 :   DBUG_PRINT("info", ("normal:    %3d  empty-space:     %3d  "
    1354                 :                       "empty-zero:       %3d  empty-fill: %3d",
    1355                 :                       field_count[FIELD_NORMAL],space_fields,
    1356                 :                       field_count[FIELD_SKIP_ZERO],fill_zero_fields));
    1357              30 :   DBUG_PRINT("info", ("pre-space: %3d  end-space:       %3d  "
    1358                 :                       "intervall-fields: %3d  zero:       %3d",
    1359                 :                       field_count[FIELD_SKIP_PRESPACE],
    1360                 :                       field_count[FIELD_SKIP_ENDSPACE],
    1361                 :                       field_count[FIELD_INTERVALL],
    1362                 :                       field_count[FIELD_ZERO]));
    1363              30 :   if (verbose)
    1364               0 :     VOID(printf("\nnormal:    %3d  empty-space:     %3d  "
    1365                 :                 "empty-zero:       %3d  empty-fill: %3d\n"
    1366                 :                 "pre-space: %3d  end-space:       %3d  "
    1367                 :                 "intervall-fields: %3d  zero:       %3d\n",
    1368                 :                 field_count[FIELD_NORMAL],space_fields,
    1369                 :                 field_count[FIELD_SKIP_ZERO],fill_zero_fields,
    1370                 :                 field_count[FIELD_SKIP_PRESPACE],
    1371                 :                 field_count[FIELD_SKIP_ENDSPACE],
    1372                 :                 field_count[FIELD_INTERVALL],
    1373                 :                 field_count[FIELD_ZERO]));
    1374              30 :   DBUG_VOID_RETURN;
    1375                 : }
    1376                 : 
    1377                 : 
    1378                 : /* Test if we can use space-compression and empty-field-compression */
    1379                 : 
    1380                 : static int
    1381                 : test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records,
    1382                 :                     uint max_space_length, my_off_t *space_counts,
    1383                 :                     my_off_t tot_space_count, enum en_fieldtype field_type)
    1384              30 : {
    1385                 :   int min_pos;
    1386                 :   uint length_bits,i;
    1387                 :   my_off_t space_count,min_space_count,min_pack,new_length,skip;
    1388                 : 
    1389              30 :   length_bits=max_bit(max_space_length);
    1390                 : 
    1391                 :                 /* Default no end_space-packing */
    1392              30 :   space_count=huff_counts->counts[(uint) ' '];
    1393              30 :   min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count);
    1394              30 :   min_pack=calc_packed_length(huff_counts,0);
    1395              30 :   min_pos= -2;
    1396              30 :   huff_counts->counts[(uint) ' ']=space_count;
    1397                 : 
    1398                 :         /* Test with allways space-count */
    1399              30 :   new_length=huff_counts->bytes_packed+length_bits*records/8;
    1400              30 :   if (new_length+1 < min_pack)
    1401                 :   {
    1402              30 :     min_pos= -1;
    1403              30 :     min_pack=new_length;
    1404              30 :     min_space_count=space_count;
    1405                 :   }
    1406                 :         /* Test with length-flag */
    1407             270 :   for (skip=0L, i=0 ; i < 8 ; i++)
    1408                 :   {
    1409             240 :     if (space_counts[i])
    1410                 :     {
    1411              55 :       if (i)
    1412              55 :         huff_counts->counts[(uint) ' ']+=space_counts[i];
    1413              55 :       skip+=huff_counts->pre_space[i];
    1414              55 :       new_length=calc_packed_length(huff_counts,0)+
    1415                 :         (records+(records-skip)*(1+length_bits))/8;
    1416              55 :       if (new_length < min_pack)
    1417                 :       {
    1418               0 :         min_pos=(int) i;
    1419               0 :         min_pack=new_length;
    1420               0 :         min_space_count=huff_counts->counts[(uint) ' '];
    1421                 :       }
    1422                 :     }
    1423                 :   }
    1424                 : 
    1425              30 :   huff_counts->counts[(uint) ' ']=min_space_count;
    1426              30 :   huff_counts->bytes_packed=min_pack;
    1427              30 :   switch (min_pos) {
    1428                 :   case -2:
    1429               0 :     return(0);                          /* No space-compress */
    1430                 :   case -1:                              /* Always space-count */
    1431              30 :     huff_counts->field_type=field_type;
    1432              30 :     huff_counts->min_space=0;
    1433              30 :     huff_counts->length_bits=max_bit(max_space_length);
    1434              30 :     break;
    1435                 :   default:
    1436               0 :     huff_counts->field_type=field_type;
    1437               0 :     huff_counts->min_space=(uint) min_pos;
    1438               0 :     huff_counts->pack_type|=PACK_TYPE_SELECTED;
    1439               0 :     huff_counts->length_bits=max_bit(max_space_length);
    1440                 :     break;
    1441                 :   }
    1442              30 :   return(1);                            /* Using space-compress */
    1443                 : }
    1444                 : 
    1445                 : 
    1446                 :         /* Make a huff_tree of each huff_count */
    1447                 : 
    1448                 : static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
    1449              30 : {
    1450                 :   uint tree;
    1451                 :   HUFF_TREE *huff_tree;
    1452              30 :   DBUG_ENTER("make_huff_trees");
    1453                 : 
    1454              30 :   if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE),
    1455                 :                                          MYF(MY_WME | MY_ZEROFILL))))
    1456               0 :     DBUG_RETURN(0);
    1457                 : 
    1458              90 :   for (tree=0 ; tree < trees ; tree++)
    1459                 :   {
    1460              60 :     if (make_huff_tree(huff_tree+tree,huff_counts+tree))
    1461                 :     {
    1462               0 :       while (tree--)
    1463               0 :         my_free(huff_tree[tree].element_buffer,MYF(0));
    1464               0 :       my_free(huff_tree,MYF(0));
    1465               0 :       DBUG_RETURN(0);
    1466                 :     }
    1467                 :   }
    1468              30 :   DBUG_RETURN(huff_tree);
    1469                 : }
    1470                 : 
    1471                 : /*
    1472                 :   Build a Huffman tree.
    1473                 : 
    1474                 :   SYNOPSIS
    1475                 :     make_huff_tree()
    1476                 :     huff_tree                   The Huffman tree.
    1477                 :     huff_counts                 The counts.
    1478                 : 
    1479                 :   DESCRIPTION
    1480                 :     Build a Huffman tree according to huff_counts->counts or
    1481                 :     huff_counts->tree_buff. tree_buff, if non-NULL contains up to
    1482                 :     tree_buff_length of distinct column values. In that case, whole
    1483                 :     values can be Huffman encoded instead of single bytes.
    1484                 : 
    1485                 :   RETURN
    1486                 :     0           OK
    1487                 :     != 0        Error
    1488                 : */
    1489                 : 
    1490                 : static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
    1491             150 : {
    1492                 :   uint i,found,bits_packed,first,last;
    1493                 :   my_off_t bytes_packed;
    1494                 :   HUFF_ELEMENT *a,*b,*new_huff_el;
    1495                 : 
    1496             150 :   first=last=0;
    1497             150 :   if (huff_counts->tree_buff)
    1498                 :   {
    1499                 :     /* Calculate the number of distinct values in tree_buff. */
    1500              60 :     found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
    1501                 :       huff_counts->field_length;
    1502              60 :     first=0; last=found-1;
    1503                 :   }
    1504                 :   else
    1505                 :   {
    1506                 :     /* Count the number of uchar codes found in the column. */
    1507           23130 :     for (i=found=0 ; i < 256 ; i++)
    1508                 :     {
    1509           23040 :       if (huff_counts->counts[i])
    1510                 :       {
    1511            1000 :         if (! found++)
    1512              90 :           first=i;
    1513            1000 :         last=i;
    1514                 :       }
    1515                 :     }
    1516              90 :     if (found < 2)
    1517               0 :       found=2;
    1518                 :   }
    1519                 : 
    1520                 :   /* When using 'tree_buff' we can have more that 256 values. */
    1521             150 :   if (queue.max_elements < found)
    1522                 :   {
    1523               0 :     delete_queue(&queue);
    1524               0 :     if (init_queue(&queue,found,0,0,compare_huff_elements,0))
    1525               0 :       return -1;
    1526                 :   }
    1527                 : 
    1528                 :   /* Allocate or reallocate an element buffer for the Huffman tree. */
    1529             150 :   if (!huff_tree->element_buffer)
    1530                 :   {
    1531             120 :     if (!(huff_tree->element_buffer=
    1532                 :          (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME))))
    1533               0 :       return 1;
    1534                 :   }
    1535                 :   else
    1536                 :   {
    1537                 :     HUFF_ELEMENT *temp;
    1538              30 :     if (!(temp=
    1539                 :           (HUFF_ELEMENT*) my_realloc((uchar*) huff_tree->element_buffer,
    1540                 :                                      found*2*sizeof(HUFF_ELEMENT),
    1541                 :                                      MYF(MY_WME))))
    1542               0 :       return 1;
    1543              30 :     huff_tree->element_buffer=temp;
    1544                 :   }
    1545                 : 
    1546             150 :   huff_counts->tree=huff_tree;
    1547             150 :   huff_tree->counts=huff_counts;
    1548             150 :   huff_tree->min_chr=first;
    1549             150 :   huff_tree->max_chr=last;
    1550             150 :   huff_tree->char_bits=max_bit(last-first);
    1551             150 :   huff_tree->offset_bits=max_bit(found-1)+1;
    1552                 : 
    1553             150 :   if (huff_counts->tree_buff)
    1554                 :   {
    1555              60 :     huff_tree->elements=0;
    1556              60 :     huff_tree->tree_pack_length=(1+15+16+5+5+
    1557                 :                                  (huff_tree->char_bits+1)*found+
    1558                 :                                  (huff_tree->offset_bits+1)*
    1559                 :                                  (found-2)+7)/8 +
    1560                 :                                    (uint) (huff_tree->counts->tree_pos-
    1561                 :                                            huff_tree->counts->tree_buff);
    1562                 :     /*
    1563                 :       Put a HUFF_ELEMENT into the queue for every distinct column value.
    1564                 : 
    1565                 :       tree_walk() calls save_counts_in_queue() for every element in
    1566                 :       'int_tree'. This takes elements from the target trees element
    1567                 :       buffer and places references to them into the buffer of the
    1568                 :       priority queue. We insert in column value order, but the order is
    1569                 :       in fact irrelevant here. We will establish the correct order
    1570                 :       later.
    1571                 :     */
    1572              60 :     tree_walk(&huff_counts->int_tree,
    1573                 :               (int (*)(void*, element_count,void*)) save_counts_in_queue,
    1574                 :               (uchar*) huff_tree, left_root_right);
    1575                 :   }
    1576                 :   else
    1577                 :   {
    1578              90 :     huff_tree->elements=found;
    1579              90 :     huff_tree->tree_pack_length=(9+9+5+5+
    1580                 :                                  (huff_tree->char_bits+1)*found+
    1581                 :                                  (huff_tree->offset_bits+1)*
    1582                 :                                  (found-2)+7)/8;
    1583                 :     /*
    1584                 :       Put a HUFF_ELEMENT into the queue for every uchar code found in the column.
    1585                 : 
    1586                 :       The elements are taken from the target trees element buffer.
    1587                 :       Instead of using queue_insert(), we just place references to the
    1588                 :       elements into the buffer of the priority queue. We insert in byte
    1589                 :       value order, but the order is in fact irrelevant here. We will
    1590                 :       establish the correct order later.
    1591                 :     */
    1592            5635 :     for (i=first, found=0 ; i <= last ; i++)
    1593                 :     {
    1594            5545 :       if (huff_counts->counts[i])
    1595                 :       {
    1596            1000 :         new_huff_el=huff_tree->element_buffer+(found++);
    1597            1000 :         new_huff_el->count=huff_counts->counts[i];
    1598            1000 :         new_huff_el->a.leaf.null=0;
    1599            1000 :         new_huff_el->a.leaf.element_nr=i;
    1600            1000 :         queue.root[found]=(uchar*) new_huff_el;
    1601                 :       }
    1602                 :     }
    1603                 :     /*
    1604                 :       If there is only a single uchar value in this field in all records,
    1605                 :       add a second element with zero incidence. This is required to enter
    1606                 :       the loop, which builds the Huffman tree.
    1607                 :     */
    1608              90 :     while (found < 2)
    1609                 :     {
    1610               0 :       new_huff_el=huff_tree->element_buffer+(found++);
    1611               0 :       new_huff_el->count=0;
    1612               0 :       new_huff_el->a.leaf.null=0;
    1613               0 :       if (last)
    1614               0 :         new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1;
    1615                 :       else
    1616               0 :         new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1;
    1617               0 :       queue.root[found]=(uchar*) new_huff_el;
    1618                 :     }
    1619                 :   }
    1620                 : 
    1621                 :   /* Make a queue from the queue buffer. */
    1622             150 :   queue.elements=found;
    1623                 : 
    1624                 :   /*
    1625                 :     Make a priority queue from the queue. Construct its index so that we
    1626                 :     have a partially ordered tree.
    1627                 :   */
    1628            1010 :   for (i=found/2 ; i > 0 ; i--)
    1629             860 :     _downheap(&queue,i);
    1630                 : 
    1631                 :   /* The Huffman algorithm. */
    1632             150 :   bytes_packed=0; bits_packed=0;
    1633            1855 :   for (i=1 ; i < found ; i++)
    1634                 :   {
    1635                 :     /*
    1636                 :       Pop the top element from the queue (the one with the least incidence).
    1637                 :       Popping from a priority queue includes a re-ordering of the queue,
    1638                 :       to get the next least incidence element to the top.
    1639                 :     */
    1640            1705 :     a=(HUFF_ELEMENT*) queue_remove(&queue,0);
    1641                 :     /*
    1642                 :       Copy the next least incidence element. The queue implementation
    1643                 :       reserves root[0] for temporary purposes. root[1] is the top.
    1644                 :     */
    1645            1705 :     b=(HUFF_ELEMENT*) queue.root[1];
    1646                 :     /* Get a new element from the element buffer. */
    1647            1705 :     new_huff_el=huff_tree->element_buffer+found+i;
    1648                 :     /* The new element gets the sum of the two least incidence elements. */
    1649            1705 :     new_huff_el->count=a->count+b->count;
    1650                 :     /*
    1651                 :       The Huffman algorithm assigns another bit to the code for a byte
    1652                 :       every time that bytes incidence is combined (directly or indirectly)
    1653                 :       to a new element as one of the two least incidence elements.
    1654                 :       This means that one more bit per incidence of that uchar is required
    1655                 :       in the resulting file. So we add the new combined incidence as the
    1656                 :       number of bits by which the result grows.
    1657                 :     */
    1658            1705 :     bits_packed+=(uint) (new_huff_el->count & 7);
    1659            1705 :     bytes_packed+=new_huff_el->count/8;
    1660                 :     /* The new element points to its children, lesser in left.  */
    1661            1705 :     new_huff_el->a.nod.left=a;
    1662            1705 :     new_huff_el->a.nod.right=b;
    1663                 :     /*
    1664                 :       Replace the copied top element by the new element and re-order the
    1665                 :       queue.
    1666                 :     */
    1667            1705 :     queue.root[1]=(uchar*) new_huff_el;
    1668            1705 :     queue_replaced(&queue);
    1669                 :   }
    1670             150 :   huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
    1671             150 :   huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
    1672             150 :   return 0;
    1673                 : }
    1674                 : 
    1675                 : static int compare_tree(void* cmp_arg __attribute__((unused)),
    1676                 :                         register const uchar *s, register const uchar *t)
    1677            3285 : {
    1678                 :   uint length;
    1679           31010 :   for (length=global_count->field_length; length-- ;)
    1680           27530 :     if (*s++ != *t++)
    1681            3090 :       return (int) s[-1] - (int) t[-1];
    1682             195 :   return 0;
    1683                 : }
    1684                 : 
    1685                 : /*
    1686                 :   Organize distinct column values and their incidences into a priority queue.
    1687                 : 
    1688                 :   SYNOPSIS
    1689                 :     save_counts_in_queue()
    1690                 :     key                         The column value.
    1691                 :     count                       The incidence of this value.
    1692                 :     tree                        The Huffman tree to be built later.
    1693                 : 
    1694                 :   DESCRIPTION
    1695                 :     We use the element buffer of the targeted tree. The distinct column
    1696                 :     values are organized in a priority queue first. The Huffman
    1697                 :     algorithm will later organize the elements into a Huffman tree. For
    1698                 :     the time being, we just place references to the elements into the
    1699                 :     queue buffer. The buffer will later be organized into a priority
    1700                 :     queue.
    1701                 : 
    1702                 :   RETURN
    1703                 :     0
    1704                 :  */
    1705                 : 
    1706                 : static int save_counts_in_queue(uchar *key, element_count count,
    1707                 :                                 HUFF_TREE *tree)
    1708             855 : {
    1709                 :   HUFF_ELEMENT *new_huff_el;
    1710                 : 
    1711             855 :   new_huff_el=tree->element_buffer+(tree->elements++);
    1712             855 :   new_huff_el->count=count;
    1713             855 :   new_huff_el->a.leaf.null=0;
    1714             855 :   new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) /
    1715                 :     tree->counts->field_length;
    1716             855 :   queue.root[tree->elements]=(uchar*) new_huff_el;
    1717             855 :   return 0;
    1718                 : }
    1719                 : 
    1720                 : 
    1721                 : /*
    1722                 :   Calculate length of file if given counts should be used.
    1723                 : 
    1724                 :   SYNOPSIS
    1725                 :     calc_packed_length()
    1726                 :     huff_counts                 The counts for a column of the table(s).
    1727                 :     add_tree_lenght             If the decode tree length should be added.
    1728                 : 
    1729                 :   DESCRIPTION
    1730                 :     We need to follow the Huffman algorithm until we know, how many bits
    1731                 :     are required for each uchar code. But we do not need the resulting
    1732                 :     Huffman tree. Hence, we can leave out some steps which are essential
    1733                 :     in make_huff_tree().
    1734                 : 
    1735                 :   RETURN
    1736                 :     Number of bytes required to compress this table column.
    1737                 : */
    1738                 : 
    1739                 : static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
    1740                 :                                    uint add_tree_lenght)
    1741             175 : {
    1742                 :   uint i,found,bits_packed,first,last;
    1743                 :   my_off_t bytes_packed;
    1744                 :   HUFF_ELEMENT element_buffer[256];
    1745             175 :   DBUG_ENTER("calc_packed_length");
    1746                 : 
    1747                 :   /*
    1748                 :     WARNING: We use a small hack for efficiency: Instead of placing
    1749                 :     references to HUFF_ELEMENTs into the queue, we just insert
    1750                 :     references to the counts of the uchar codes which appeared in this
    1751                 :     table column. During the Huffman algorithm they are successively
    1752                 :     replaced by references to HUFF_ELEMENTs. This works, because
    1753                 :     HUFF_ELEMENTs have the incidence count at their beginning.
    1754                 :     Regardless, wether the queue array contains references to counts of
    1755                 :     type my_off_t or references to HUFF_ELEMENTs which have the count of
    1756                 :     type my_off_t at their beginning, it always points to a count of the
    1757                 :     same type.
    1758                 : 
    1759                 :     Instead of using queue_insert(), we just copy the references into
    1760                 :     the buffer of the priority queue. We insert in uchar value order, but
    1761                 :     the order is in fact irrelevant here. We will establish the correct
    1762                 :     order later.
    1763                 :   */
    1764             175 :   first=last=0;
    1765           44975 :   for (i=found=0 ; i < 256 ; i++)
    1766                 :   {
    1767           44800 :     if (huff_counts->counts[i])
    1768                 :     {
    1769            1670 :       if (! found++)
    1770             175 :         first=i;
    1771            1670 :       last=i;
    1772                 :       /* We start with root[1], which is the queues top element. */
    1773            1670 :       queue.root[found]=(uchar*) &huff_counts->counts[i];
    1774                 :     }
    1775                 :   }
    1776             175 :   if (!found)
    1777               0 :     DBUG_RETURN(0);                     /* Empty tree */
    1778                 :   /*
    1779                 :     If there is only a single uchar value in this field in all records,
    1780                 :     add a second element with zero incidence. This is required to enter
    1781                 :     the loop, which follows the Huffman algorithm.
    1782                 :   */
    1783             175 :   if (found < 2)
    1784               0 :     queue.root[++found]=(uchar*) &huff_counts->counts[last ? 0 : 1];
    1785                 : 
    1786                 :   /* Make a queue from the queue buffer. */
    1787             175 :   queue.elements=found;
    1788                 : 
    1789             175 :   bytes_packed=0; bits_packed=0;
    1790                 :   /* Add the length of the coding table, which would become part of the file. */
    1791             175 :   if (add_tree_lenght)
    1792              30 :     bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
    1793                 :                   (max_bit(found-1)+1+1)*(found-2) +7)/8;
    1794                 : 
    1795                 :   /*
    1796                 :     Make a priority queue from the queue. Construct its index so that we
    1797                 :     have a partially ordered tree.
    1798                 :   */
    1799            1055 :   for (i=(found+1)/2 ; i > 0 ; i--)
    1800             880 :     _downheap(&queue,i);
    1801                 : 
    1802                 :   /* The Huffman algorithm. */
    1803            1670 :   for (i=0 ; i < found-1 ; i++)
    1804                 :   {
    1805                 :     my_off_t        *a;
    1806                 :     my_off_t        *b;
    1807                 :     HUFF_ELEMENT    *new_huff_el;
    1808                 : 
    1809                 :     /*
    1810                 :       Pop the top element from the queue (the one with the least
    1811                 :       incidence). Popping from a priority queue includes a re-ordering
    1812                 :       of the queue, to get the next least incidence element to the top.
    1813                 :     */
    1814            1495 :     a= (my_off_t*) queue_remove(&queue, 0);
    1815                 :     /*
    1816                 :       Copy the next least incidence element. The queue implementation
    1817                 :       reserves root[0] for temporary purposes. root[1] is the top.
    1818                 :     */
    1819            1495 :     b= (my_off_t*) queue.root[1];
    1820                 :     /* Create a new element in a local (automatic) buffer. */
    1821            1495 :     new_huff_el= element_buffer + i;
    1822                 :     /* The new element gets the sum of the two least incidence elements. */
    1823            1495 :     new_huff_el->count= *a + *b;
    1824                 :     /*
    1825                 :       The Huffman algorithm assigns another bit to the code for a byte
    1826                 :       every time that bytes incidence is combined (directly or indirectly)
    1827                 :       to a new element as one of the two least incidence elements.
    1828                 :       This means that one more bit per incidence of that uchar is required
    1829                 :       in the resulting file. So we add the new combined incidence as the
    1830                 :       number of bits by which the result grows.
    1831                 :     */
    1832            1495 :     bits_packed+=(uint) (new_huff_el->count & 7);
    1833            1495 :     bytes_packed+=new_huff_el->count/8;
    1834                 :     /*
    1835                 :       Replace the copied top element by the new element and re-order the
    1836                 :       queue. This successively replaces the references to counts by
    1837                 :       references to HUFF_ELEMENTs.
    1838                 :     */
    1839            1495 :     queue.root[1]=(uchar*) new_huff_el;
    1840            1495 :     queue_replaced(&queue);
    1841                 :   }
    1842             175 :   DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
    1843                 : }
    1844                 : 
    1845                 : 
    1846                 :         /* Remove trees that don't give any compression */
    1847                 : 
    1848                 : static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
    1849              30 : {
    1850                 :   uint k,tree_number;
    1851                 :   HUFF_COUNTS count,*i,*j,*last_count;
    1852                 : 
    1853              30 :   last_count=huff_counts+trees;
    1854              90 :   for (tree_number=0, i=huff_counts ; i < last_count ; i++)
    1855                 :   {
    1856              60 :     if (!i->tree->tree_number)
    1857                 :     {
    1858              30 :       i->tree->tree_number= ++tree_number;
    1859              30 :       if (i->tree_buff)
    1860              30 :         continue;                       /* Don't join intervall */
    1861              60 :       for (j=i+1 ; j < last_count ; j++)
    1862                 :       {
    1863              30 :         if (! j->tree->tree_number && ! j->tree_buff)
    1864                 :         {
    1865            7710 :           for (k=0 ; k < 256 ; k++)
    1866            7680 :             count.counts[k]=i->counts[k]+j->counts[k];
    1867              30 :           if (calc_packed_length(&count,1) <=
    1868                 :               i->tree->bytes_packed + j->tree->bytes_packed+
    1869                 :               i->tree->tree_pack_length+j->tree->tree_pack_length+
    1870                 :               ALLOWED_JOIN_DIFF)
    1871                 :           {
    1872              30 :             memcpy_fixed((uchar*) i->counts,(uchar*) count.counts,
    1873                 :                          sizeof(count.counts[0])*256);
    1874              30 :             my_free((uchar*) j->tree->element_buffer,MYF(0));
    1875              30 :             j->tree->element_buffer=0;
    1876              30 :             j->tree=i->tree;
    1877              30 :             bmove((uchar*) i->counts,(uchar*) count.counts,
    1878                 :                   sizeof(count.counts[0])*256);
    1879              30 :             if (make_huff_tree(i->tree,i))
    1880               0 :               return (uint) -1;
    1881                 :           }
    1882                 :         }
    1883                 :       }
    1884                 :     }
    1885                 :   }
    1886              30 :   DBUG_PRINT("info", ("Original trees:  %d  After join: %d",
    1887                 :                       trees, tree_number));
    1888              30 :   if (verbose)
    1889               0 :     VOID(printf("Original trees:  %d  After join: %d\n", trees, tree_number));
    1890              30 :   return tree_number;                   /* Return trees left */
    1891                 : }
    1892                 : 
    1893                 : 
    1894                 : /*
    1895                 :   Fill in huff_tree encode tables.
    1896                 : 
    1897                 :   SYNOPSIS
    1898                 :     make_huff_decode_table()
    1899                 :     huff_tree               An array of HUFF_TREE which are to be encoded.
    1900                 :     trees                   The number of HUFF_TREE in the array.
    1901                 : 
    1902                 :   RETURN
    1903                 :     0           success
    1904                 :     != 0        error
    1905                 : */
    1906                 : 
    1907                 : static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
    1908              30 : {
    1909                 :   uint elements;
    1910              90 :   for ( ; trees-- ; huff_tree++)
    1911                 :   {
    1912              60 :     if (huff_tree->tree_number > 0)
    1913                 :     {
    1914              30 :       elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
    1915              30 :       if (!(huff_tree->code =
    1916                 :             (ulonglong*) my_malloc(elements*
    1917                 :                                    (sizeof(ulonglong) + sizeof(uchar)),
    1918                 :                                    MYF(MY_WME | MY_ZEROFILL))))
    1919               0 :         return 1;
    1920              30 :       huff_tree->code_len=(uchar*) (huff_tree->code+elements);
    1921              30 :       make_traverse_code_tree(huff_tree, huff_tree->root,
    1922                 :                               8 * sizeof(ulonglong), LL(0));
    1923                 :     }
    1924                 :   }
    1925              30 :   return 0;
    1926                 : }
    1927                 : 
    1928                 : 
    1929                 : static void make_traverse_code_tree(HUFF_TREE *huff_tree,
    1930                 :                                     HUFF_ELEMENT *element,
    1931                 :                                     uint size, ulonglong code)
    1932             770 : {
    1933                 :   uint chr;
    1934             770 :   if (!element->a.leaf.null)
    1935                 :   {
    1936             400 :     chr=element->a.leaf.element_nr;
    1937             400 :     huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
    1938             400 :     huff_tree->code[chr]= (code >> size);
    1939             400 :     if (huff_tree->height < 8 * sizeof(ulonglong) - size)
    1940              95 :         huff_tree->height= 8 * sizeof(ulonglong) - size;
    1941                 :   }
    1942                 :   else
    1943                 :   {
    1944             370 :     size--;
    1945             370 :     make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
    1946             370 :     make_traverse_code_tree(huff_tree, element->a.nod.right, size,
    1947                 :                             code + (((ulonglong) 1) << size));
    1948                 :   }
    1949                 :   return;
    1950                 : }
    1951                 : 
    1952                 : 
    1953                 : /*
    1954                 :   Convert a value into binary digits.
    1955                 : 
    1956                 :   SYNOPSIS
    1957                 :     bindigits()
    1958                 :     value                       The value.
    1959                 :     length                      The number of low order bits to convert.
    1960                 : 
    1961                 :   NOTE
    1962                 :     The result string is in static storage. It is reused on every call.
    1963                 :     So you cannot use it twice in one expression.
    1964                 : 
    1965                 :   RETURN
    1966                 :     A pointer to a static NUL-terminated string.
    1967                 :  */
    1968                 : 
    1969                 : static char *bindigits(ulonglong value, uint bits)
    1970           13825 : {
    1971                 :   static char digits[72];
    1972           13825 :   char *ptr= digits;
    1973           13825 :   uint idx= bits;
    1974                 : 
    1975           13825 :   DBUG_ASSERT(idx < sizeof(digits));
    1976           48275 :   while (idx)
    1977           34450 :     *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1);
    1978           13825 :   *ptr= '\0';
    1979           13825 :   return digits;
    1980                 : }
    1981                 : 
    1982                 : 
    1983                 : /*
    1984                 :   Convert a value into hexadecimal digits.
    1985                 : 
    1986                 :   SYNOPSIS
    1987                 :     hexdigits()
    1988                 :     value                       The value.
    1989                 : 
    1990                 :   NOTE
    1991                 :     The result string is in static storage. It is reused on every call.
    1992                 :     So you cannot use it twice in one expression.
    1993                 : 
    1994                 :   RETURN
    1995                 :     A pointer to a static NUL-terminated string.
    1996                 :  */
    1997                 : 
    1998                 : static char *hexdigits(ulonglong value)
    1999           13825 : {
    2000                 :   static char digits[20];
    2001           13825 :   char *ptr= digits;
    2002           13825 :   uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
    2003                 : 
    2004           13825 :   DBUG_ASSERT(idx < sizeof(digits));
    2005          235025 :   while (idx)
    2006                 :   {
    2007          221200 :     if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9')
    2008            3200 :       *(ptr - 1)+= 'a' - '9' - 1;
    2009                 :   }
    2010           13825 :   *ptr= '\0';
    2011           13825 :   return digits;
    2012                 : }
    2013                 : 
    2014                 : 
    2015                 :         /* Write header to new packed data file */
    2016                 : 
    2017                 : static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
    2018                 :                         my_off_t tot_elements,my_off_t filelength)
    2019              30 : {
    2020              30 :   uchar *buff= (uchar*) file_buffer.pos;
    2021                 : 
    2022              30 :   bzero(buff,HEAD_LENGTH);
    2023              30 :   memcpy_fixed(buff,maria_pack_file_magic,4);
    2024              30 :   int4store(buff+4,head_length);
    2025              30 :   int4store(buff+8, mrg->min_pack_length);
    2026              30 :   int4store(buff+12,mrg->max_pack_length);
    2027              30 :   int4store(buff+16,tot_elements);
    2028              30 :   int4store(buff+20,intervall_length);
    2029              30 :   int2store(buff+24,trees);
    2030              30 :   buff[26]=(char) mrg->ref_length;
    2031                 :         /* Save record pointer length */
    2032              30 :   buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2);
    2033              30 :   if (test_only)
    2034               0 :     return 0;
    2035              30 :   VOID(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0)));
    2036              30 :   return my_write(file_buffer.file,(const uchar *) file_buffer.pos,HEAD_LENGTH,
    2037                 :                   MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
    2038                 : }
    2039                 : 
    2040                 :         /* Write fieldinfo to new packed file */
    2041                 : 
    2042                 : static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
    2043              30 : {
    2044                 :   reg1 uint i;
    2045                 :   uint huff_tree_bits;
    2046              30 :   huff_tree_bits=max_bit(trees ? trees-1 : 0);
    2047                 : 
    2048              30 :   DBUG_PRINT("info", (" "));
    2049              30 :   DBUG_PRINT("info", ("column types:"));
    2050              30 :   DBUG_PRINT("info", ("FIELD_NORMAL          0"));
    2051              30 :   DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE   1"));
    2052              30 :   DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE   2"));
    2053              30 :   DBUG_PRINT("info", ("FIELD_SKIP_ZERO       3"));
    2054              30 :   DBUG_PRINT("info", ("FIELD_BLOB            4"));
    2055              30 :   DBUG_PRINT("info", ("FIELD_CONSTANT        5"));
    2056              30 :   DBUG_PRINT("info", ("FIELD_INTERVALL       6"));
    2057              30 :   DBUG_PRINT("info", ("FIELD_ZERO            7"));
    2058              30 :   DBUG_PRINT("info", ("FIELD_VARCHAR         8"));
    2059              30 :   DBUG_PRINT("info", ("FIELD_CHECK           9"));
    2060              30 :   DBUG_PRINT("info", (" "));
    2061              30 :   DBUG_PRINT("info", ("pack type as a set of flags:"));
    2062              30 :   DBUG_PRINT("info", ("PACK_TYPE_SELECTED      1"));
    2063              30 :   DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS  2"));
    2064              30 :   DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL     4"));
    2065              30 :   DBUG_PRINT("info", (" "));
    2066              30 :   if (verbose >= 2)
    2067                 :   {
    2068               0 :     VOID(printf("\n"));
    2069               0 :     VOID(printf("column types:\n"));
    2070               0 :     VOID(printf("FIELD_NORMAL          0\n"));
    2071               0 :     VOID(printf("FIELD_SKIP_ENDSPACE   1\n"));
    2072               0 :     VOID(printf("FIELD_SKIP_PRESPACE   2\n"));
    2073               0 :     VOID(printf("FIELD_SKIP_ZERO       3\n"));
    2074               0 :     VOID(printf("FIELD_BLOB            4\n"));
    2075               0 :     VOID(printf("FIELD_CONSTANT        5\n"));
    2076               0 :     VOID(printf("FIELD_INTERVALL       6\n"));
    2077               0 :     VOID(printf("FIELD_ZERO            7\n"));
    2078               0 :     VOID(printf("FIELD_VARCHAR         8\n"));
    2079               0 :     VOID(printf("FIELD_CHECK           9\n"));
    2080               0 :     VOID(printf("\n"));
    2081               0 :     VOID(printf("pack type as a set of flags:\n"));
    2082               0 :     VOID(printf("PACK_TYPE_SELECTED      1\n"));
    2083               0 :     VOID(printf("PACK_TYPE_SPACE_FIELDS  2\n"));
    2084               0 :     VOID(printf("PACK_TYPE_ZERO_FILL     4\n"));
    2085               0 :     VOID(printf("\n"));
    2086                 :   }
    2087              90 :   for (i=0 ; i++ < fields ; counts++)
    2088                 :   {
    2089              60 :     write_bits((ulonglong) (int) counts->field_type, 5);
    2090              60 :     write_bits(counts->pack_type,6);
    2091              60 :     if (counts->pack_type & PACK_TYPE_ZERO_FILL)
    2092               0 :       write_bits(counts->max_zero_fill,5);
    2093                 :     else
    2094              60 :       write_bits(counts->length_bits,5);
    2095              60 :     write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
    2096              60 :     DBUG_PRINT("info", ("column: %3u  type: %2u  pack: %2u  zero: %4u  "
    2097                 :                         "lbits: %2u  tree: %2u  length: %4u",
    2098                 :                         i , counts->field_type, counts->pack_type,
    2099                 :                         counts->max_zero_fill, counts->length_bits,
    2100                 :                         counts->tree->tree_number, counts->field_length));
    2101              60 :     if (verbose >= 2)
    2102               0 :       VOID(printf("column: %3u  type: %2u  pack: %2u  zero: %4u  lbits: %2u  "
    2103                 :                   "tree: %2u  length: %4u\n", i , counts->field_type,
    2104                 :                   counts->pack_type, counts->max_zero_fill, counts->length_bits,
    2105                 :                   counts->tree->tree_number, counts->field_length));
    2106                 :   }
    2107              30 :   flush_bits();
    2108                 :   return;
    2109                 : }
    2110                 : 
    2111                 :         /* Write all huff_trees to new datafile. Return tot count of
    2112                 :            elements in all trees
    2113                 :            Returns 0 on error */
    2114                 : 
    2115                 : static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
    2116              30 : {
    2117                 :   uint i,int_length;
    2118                 :   uint tree_no;
    2119                 :   uint codes;
    2120              30 :   uint errors= 0;
    2121                 :   uint *packed_tree,*offset,length;
    2122                 :   my_off_t elements;
    2123                 : 
    2124                 :   /* Find the highest number of elements in the trees. */
    2125              90 :   for (i=length=0 ; i < trees ; i++)
    2126              60 :     if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
    2127              30 :       length=huff_tree[i].elements;
    2128                 :   /*
    2129                 :     Allocate a buffer for packing a decode tree. Two numbers per element
    2130                 :     (left child and right child).
    2131                 :   */
    2132              30 :   if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
    2133                 :   {
    2134               0 :     my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
    2135               0 :     return 0;
    2136                 :   }
    2137                 : 
    2138              30 :   DBUG_PRINT("info", (" "));
    2139              30 :   if (verbose >= 2)
    2140               0 :     VOID(printf("\n"));
    2141              30 :   tree_no= 0;
    2142              30 :   intervall_length=0;
    2143              90 :   for (elements=0; trees-- ; huff_tree++)
    2144                 :   {
    2145                 :     /* Skip columns that have been joined with other columns. */
    2146              60 :     if (huff_tree->tree_number == 0)
    2147              30 :       continue;                         /* Deleted tree */
    2148              30 :     tree_no++;
    2149              30 :     DBUG_PRINT("info", (" "));
    2150              30 :     if (verbose >= 3)
    2151               0 :       VOID(printf("\n"));
    2152                 :     /* Count the total number of elements (byte codes or column values). */
    2153              30 :     elements+=huff_tree->elements;
    2154              30 :     huff_tree->max_offset=2;
    2155                 :     /* Build a tree of offsets and codes for decoding in 'packed_tree'. */
    2156              30 :     if (huff_tree->elements <= 1)
    2157               0 :       offset=packed_tree;
    2158                 :     else
    2159              30 :       offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
    2160                 : 
    2161                 :     /* This should be the same as 'length' above. */
    2162              30 :     huff_tree->offset_bits=max_bit(huff_tree->max_offset);
    2163                 : 
    2164                 :     /*
    2165                 :       Since we check this during collecting the distinct column values,
    2166                 :       this should never happen.
    2167                 :     */
    2168              30 :     if (huff_tree->max_offset >= IS_OFFSET)
    2169                 :     {                           /* This should be impossible */
    2170               0 :       VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n",
    2171                 :                    huff_tree->max_offset));
    2172                 :       my_afree(packed_tree);
    2173               0 :       return 0;
    2174                 :     }
    2175                 : 
    2176              30 :     DBUG_PRINT("info", ("pos: %lu  elements: %u  tree-elements: %lu  "
    2177                 :                         "char_bits: %u\n",
    2178                 :                         (ulong) (file_buffer.pos - file_buffer.buffer),
    2179                 :                         huff_tree->elements, (ulong) (offset - packed_tree),
    2180                 :                         huff_tree->char_bits));
    2181              30 :     if (!huff_tree->counts->tree_buff)
    2182                 :     {
    2183                 :       /* We do a uchar compression on this column. Mark with bit 0. */
    2184              30 :       write_bits(0,1);
    2185              30 :       write_bits(huff_tree->min_chr,8);
    2186              30 :       write_bits(huff_tree->elements,9);
    2187              30 :       write_bits(huff_tree->char_bits,5);
    2188              30 :       write_bits(huff_tree->offset_bits,5);
    2189              30 :       int_length=0;
    2190                 :     }
    2191                 :     else
    2192                 :     {
    2193               0 :       int_length=(uint) (huff_tree->counts->tree_pos -
    2194                 :                          huff_tree->counts->tree_buff);
    2195                 :       /* We have distinct column values for this column. Mark with bit 1. */
    2196               0 :       write_bits(1,1);
    2197               0 :       write_bits(huff_tree->elements,15);
    2198               0 :       write_bits(int_length,16);
    2199               0 :       write_bits(huff_tree->char_bits,5);
    2200               0 :       write_bits(huff_tree->offset_bits,5);
    2201               0 :       intervall_length+=int_length;
    2202                 :     }
    2203              30 :     DBUG_PRINT("info", ("tree: %2u  elements: %4u  char_bits: %2u  "
    2204                 :                         "offset_bits: %2u  %s: %5u  codelen: %2u",
    2205                 :                         tree_no, huff_tree->elements, huff_tree->char_bits,
    2206                 :                         huff_tree->offset_bits, huff_tree->counts->tree_buff ?
    2207                 :                         "bufflen" : "min_chr", huff_tree->counts->tree_buff ?
    2208                 :                         int_length : huff_tree->min_chr, huff_tree->height));
    2209              30 :     if (verbose >= 2)
    2210               0 :       VOID(printf("tree: %2u  elements: %4u  char_bits: %2u  offset_bits: %2u  "
    2211                 :                   "%s: %5u  codelen: %2u\n", tree_no, huff_tree->elements,
    2212                 :                   huff_tree->char_bits, huff_tree->offset_bits,
    2213                 :                   huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
    2214                 :                   huff_tree->counts->tree_buff ? int_length :
    2215                 :                   huff_tree->min_chr, huff_tree->height));
    2216                 : 
    2217                 :     /* Check that the code tree length matches the element count. */
    2218              30 :     length=(uint) (offset-packed_tree);
    2219              30 :     if (length != huff_tree->elements*2-2)
    2220                 :     {
    2221               0 :       VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
    2222                 :                    length, huff_tree->elements * 2 - 2));
    2223               0 :       errors++;
    2224               0 :       break;
    2225                 :     }
    2226                 : 
    2227             770 :     for (i=0 ; i < length ; i++)
    2228                 :     {
    2229             740 :       if (packed_tree[i] & IS_OFFSET)
    2230             340 :         write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits),
    2231                 :                    huff_tree->offset_bits+1);
    2232                 :       else
    2233             400 :         write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
    2234             740 :       DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
    2235                 :                           i, (packed_tree[i] & IS_OFFSET) ?
    2236                 :                           " -> " : "", (packed_tree[i] & IS_OFFSET) ?
    2237                 :                           packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
    2238             740 :       if (verbose >= 3)
    2239               0 :         VOID(printf("tree[0x%04x]: %s0x%04x\n",
    2240                 :                     i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
    2241                 :                     (packed_tree[i] & IS_OFFSET) ?
    2242                 :                     packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
    2243                 :     }
    2244              30 :     flush_bits();
    2245                 : 
    2246                 :     /*
    2247                 :       Display coding tables and check their correctness.
    2248                 :     */
    2249              30 :     codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
    2250            7710 :     for (i= 0; i < codes; i++)
    2251                 :     {
    2252                 :       ulonglong code;
    2253                 :       uint bits;
    2254                 :       uint len;
    2255                 :       uint idx;
    2256                 : 
    2257            7680 :       if (! (len= huff_tree->code_len[i]))
    2258             400 :         continue;
    2259             400 :       DBUG_PRINT("info", ("code[0x%04x]:      0x%s  bits: %2u  bin: %s", i,
    2260                 :                           hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
    2261                 :                           bindigits(huff_tree->code[i],
    2262                 :                                     huff_tree->code_len[i])));
    2263             400 :       if (verbose >= 3)
    2264               0 :         VOID(printf("code[0x%04x]:      0x%s  bits: %2u  bin: %s\n", i,
    2265                 :                     hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
    2266                 :                     bindigits(huff_tree->code[i], huff_tree->code_len[i])));
    2267                 : 
    2268                 :       /* Check that the encode table decodes correctly. */
    2269             400 :       code= 0;
    2270             400 :       bits= 0;
    2271             400 :       idx= 0;
    2272             400 :       DBUG_EXECUTE_IF("forcechkerr1", len--;);
    2273             400 :       DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
    2274             400 :       DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
    2275                 :       for (;;)
    2276                 :       {
    2277            2060 :         if (! len)
    2278                 :         {
    2279               0 :           VOID(fflush(stdout));
    2280               0 :           VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n",
    2281                 :                        hexdigits(huff_tree->code[i]), huff_tree->code_len[i]));
    2282               0 :           errors++;
    2283               0 :           break;
    2284                 :         }
    2285            2060 :         code<<= 1;
    2286            2060 :         code|= (huff_tree->code[i] >> (--len)) & 1;
    2287            2060 :         bits++;
    2288            2060 :         if (bits > 8 * sizeof(code))
    2289                 :         {
    2290               0 :           VOID(fflush(stdout));
    2291               0 :           VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n",
    2292                 :                        bits, (uint) (8 * sizeof(code))));
    2293               0 :           errors++;
    2294               0 :           break;
    2295                 :         }
    2296            2060 :         idx+= (uint) code & 1;
    2297            2060 :         if (idx >= length)
    2298                 :         {
    2299               0 :           VOID(fflush(stdout));
    2300               0 :           VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n",
    2301                 :                        idx, length));
    2302               0 :           errors++;
    2303               0 :           break;
    2304                 :         }
    2305            2060 :         if (packed_tree[idx] & IS_OFFSET)
    2306            1660 :           idx+= packed_tree[idx] & ~IS_OFFSET;
    2307                 :         else
    2308            1660 :           break; /* Hit a leaf. This contains the result value. */
    2309            1660 :       }
    2310             400 :       if (errors)
    2311             400 :         break;
    2312                 : 
    2313             400 :       DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
    2314             400 :       if (packed_tree[idx] != i)
    2315                 :       {
    2316               0 :         VOID(fflush(stdout));
    2317               0 :         VOID(fprintf(stderr, "error: decoded value 0x%04x  should be: 0x%04x\n",
    2318                 :                      packed_tree[idx], i));
    2319               0 :         errors++;
    2320               0 :         break;
    2321                 :       }
    2322                 :     } /*end for (codes)*/
    2323              30 :     if (errors)
    2324              30 :       break;
    2325                 : 
    2326                 :     /* Write column values in case of distinct column value compression. */
    2327              30 :     if (huff_tree->counts->tree_buff)
    2328                 :     {
    2329               0 :       for (i=0 ; i < int_length ; i++)
    2330                 :       {
    2331               0 :         write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
    2332               0 :         DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
    2333                 :                             i, (uchar) huff_tree->counts->tree_buff[i]));
    2334               0 :         if (verbose >= 3)
    2335               0 :           VOID(printf("column_values[0x%04x]: 0x%02x\n",
    2336                 :                       i, (uchar) huff_tree->counts->tree_buff[i]));
    2337                 :       }
    2338                 :     }
    2339              30 :     flush_bits();
    2340                 :   }
    2341              30 :   DBUG_PRINT("info", (" "));
    2342              30 :   if (verbose >= 2)
    2343               0 :     VOID(printf("\n"));
    2344                 :   my_afree(packed_tree);
    2345              30 :   if (errors)
    2346                 :   {
    2347               0 :     VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n"));
    2348               0 :     return 0;
    2349                 :   }
    2350              30 :   return elements;
    2351                 : }
    2352                 : 
    2353                 : 
    2354                 : static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
    2355                 :                                    uint *offset)
    2356             370 : {
    2357                 :   uint *prev_offset;
    2358                 : 
    2359             370 :   prev_offset= offset;
    2360                 :   /*
    2361                 :     'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
    2362                 :     then there is no left child and, hence no right child either. This
    2363                 :     is a property of a binary tree. An element is either a node with two
    2364                 :     childs, or a leaf without childs.
    2365                 : 
    2366                 :     The current element is always a node with two childs. Go left first.
    2367                 :   */
    2368             370 :   if (!element->a.nod.left->a.leaf.null)
    2369                 :   {
    2370                 :     /* Store the uchar code or the index of the column value. */
    2371             215 :     prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
    2372             215 :     offset+=2;
    2373                 :   }
    2374                 :   else
    2375                 :   {
    2376                 :     /*
    2377                 :       Recursively traverse the tree to the left. Mark it as an offset to
    2378                 :       another tree node (in contrast to a uchar code or column value index).
    2379                 :     */
    2380             155 :     prev_offset[0]= IS_OFFSET+2;
    2381             155 :     offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
    2382                 :   }
    2383                 : 
    2384                 :   /* Now, check the right child. */
    2385             370 :   if (!element->a.nod.right->a.leaf.null)
    2386                 :   {
    2387                 :     /* Store the uchar code or the index of the column value. */
    2388             185 :     prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
    2389             185 :     return offset;
    2390                 :   }
    2391                 :   else
    2392                 :   {
    2393                 :     /*
    2394                 :       Recursively traverse the tree to the right. Mark it as an offset to
    2395                 :       another tree node (in contrast to a uchar code or column value index).
    2396                 :     */
    2397             185 :     uint temp=(uint) (offset-prev_offset-1);
    2398             185 :     prev_offset[1]= IS_OFFSET+ temp;
    2399             185 :     if (huff_tree->max_offset < temp)
    2400              80 :       huff_tree->max_offset = temp;
    2401             185 :     return make_offset_code_tree(huff_tree,element->a.nod.right,offset);
    2402                 :   }
    2403                 : }
    2404                 : 
    2405                 :         /* Get number of bits neaded to represent value */
    2406                 : 
    2407                 : static uint max_bit(register uint value)
    2408             480 : {
    2409             480 :   reg2 uint power=1;
    2410                 : 
    2411            2470 :   while ((value>>=1))
    2412            1510 :     power++;
    2413             480 :   return (power);
    2414                 : }
    2415                 : 
    2416                 : 
    2417                 : static int compress_maria_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
    2418              30 : {
    2419                 :   int error;
    2420                 :   uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length;
    2421                 :   uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes;
    2422                 :   my_off_t record_count;
    2423                 :   char llbuf[32];
    2424                 :   ulong length,pack_length;
    2425                 :   uchar *record,*pos,*end_pos,*record_pos,*start_pos;
    2426                 :   HUFF_COUNTS *count,*end_count;
    2427                 :   HUFF_TREE *tree;
    2428              30 :   MARIA_HA *isam_file=mrg->file[0];
    2429              30 :   uint pack_version= (uint) isam_file->s->pack.version;
    2430              30 :   DBUG_ENTER("compress_maria_file");
    2431                 : 
    2432                 :   /* Allocate a buffer for the records (excluding blobs). */
    2433              30 :   if (!(record=(uchar*) my_alloca(isam_file->s->base.reclength)))
    2434               0 :     return -1;
    2435                 : 
    2436              30 :   end_count=huff_counts+isam_file->s->base.fields;
    2437              30 :   min_record_length= (uint) ~0;
    2438              30 :   max_record_length=0;
    2439              30 :   null_bytes= isam_file->s->base.null_bytes;
    2440                 : 
    2441                 :   /*
    2442                 :     Calculate the maximum number of bits required to pack the records.
    2443                 :     Remember to understand 'max_zero_fill' as 'min_zero_fill'.
    2444                 :     The tree height determines the maximum number of bits per value.
    2445                 :     Some fields skip leading or trailing spaces or zeroes. The skipped
    2446                 :     number of bytes is encoded by 'length_bits' bits.
    2447                 :     Empty blobs and varchar are encoded with a single 1 bit. Other blobs
    2448                 :     and varchar get a leading 0 bit.
    2449                 :   */
    2450              30 :   max_calc_length= null_bytes;
    2451              90 :   for (i= 0 ; i < isam_file->s->base.fields ; i++)
    2452                 :   {
    2453              60 :     if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
    2454              60 :       huff_counts[i].max_zero_fill=0;
    2455              60 :     if (huff_counts[i].field_type == FIELD_CONSTANT ||
    2456                 :         huff_counts[i].field_type == FIELD_ZERO ||
    2457                 :         huff_counts[i].field_type == FIELD_CHECK)
    2458                 :       continue;
    2459              60 :     if (huff_counts[i].field_type == FIELD_INTERVALL)
    2460               0 :       max_calc_length+=huff_counts[i].tree->height;
    2461              60 :     else if (huff_counts[i].field_type == FIELD_BLOB ||
    2462                 :              huff_counts[i].field_type == FIELD_VARCHAR)
    2463               0 :       max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1;
    2464                 :     else
    2465              60 :       max_calc_length+=
    2466                 :         (huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
    2467                 :           huff_counts[i].tree->height+huff_counts[i].length_bits;
    2468                 :   }
    2469              30 :   max_calc_length= (max_calc_length + 7) / 8;
    2470              30 :   pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length);
    2471              30 :   record_count=0;
    2472                 :   /* 'max_blob_length' is the max length of all blobs of a record. */
    2473              30 :   pack_blob_length= isam_file->s->base.blobs ?
    2474                 :                     _ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0;
    2475              30 :   max_pack_length=pack_ref_length+pack_blob_length;
    2476                 : 
    2477              30 :   DBUG_PRINT("fields", ("==="));
    2478              30 :   mrg_reset(mrg);
    2479             645 :   while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
    2480                 :   {
    2481             585 :     ulong tot_blob_length=0;
    2482             585 :     if (! error)
    2483                 :     {
    2484             525 :       if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length +
    2485                 :                        null_bytes))
    2486             525 :         break;
    2487             525 :       record_pos= file_buffer.pos;
    2488             525 :       file_buffer.pos+= max_pack_length;
    2489             525 :       if (null_bytes)
    2490                 :       {
    2491                 :         /* Copy null bits 'as is' */
    2492             525 :         memcpy(file_buffer.pos, record, null_bytes);
    2493             525 :         file_buffer.pos+= null_bytes;
    2494                 :       }
    2495             525 :       for (start_pos=record+null_bytes, count= huff_counts;
    2496            2100 :            count < end_count ;
    2497            1050 :            count++)
    2498                 :       {
    2499            1050 :         end_pos=start_pos+(field_length=count->field_length);
    2500            1050 :         tree=count->tree;
    2501                 : 
    2502            1050 :         DBUG_PRINT("fields", ("column: %3lu  type: %2u  pack: %2u  zero: %4u  "
    2503                 :                               "lbits: %2u  tree: %2u  length: %4u",
    2504                 :                               (ulong) (count - huff_counts + 1),
    2505                 :                               count->field_type,
    2506                 :                               count->pack_type, count->max_zero_fill,
    2507                 :                               count->length_bits, count->tree->tree_number,
    2508                 :                               count->field_length));
    2509                 : 
    2510                 :         /* Check if the column contains spaces only. */
    2511            1050 :         if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
    2512                 :         {
    2513               0 :           for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
    2514               0 :           if (pos == end_pos)
    2515                 :           {
    2516               0 :             DBUG_PRINT("fields",
    2517                 :                        ("PACK_TYPE_SPACE_FIELDS spaces only, bits:  1"));
    2518               0 :             DBUG_PRINT("fields", ("---"));
    2519               0 :             write_bits(1,1);
    2520               0 :             start_pos=end_pos;
    2521               0 :             continue;
    2522                 :           }
    2523               0 :           DBUG_PRINT("fields",
    2524                 :                      ("PACK_TYPE_SPACE_FIELDS not only spaces, bits:  1"));
    2525               0 :           write_bits(0,1);
    2526                 :         }
    2527            1050 :         end_pos-=count->max_zero_fill;
    2528            1050 :         field_length-=count->max_zero_fill;
    2529                 : 
    2530            1050 :         switch (count->field_type) {
    2531                 :         case FIELD_SKIP_ZERO:
    2532               0 :           if (!memcmp(start_pos, zero_string, field_length))
    2533                 :           {
    2534               0 :             DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits:  1"));
    2535               0 :             write_bits(1,1);
    2536               0 :             start_pos=end_pos;
    2537               0 :             break;
    2538                 :           }
    2539               0 :           DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits:  1"));
    2540               0 :           write_bits(0,1);
    2541                 :           /* Fall through */
    2542                 :         case FIELD_NORMAL:
    2543             525 :           DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
    2544                 :                                 (ulong) (end_pos - start_pos)));
    2545           13125 :           for ( ; start_pos < end_pos ; start_pos++)
    2546                 :           {
    2547           12600 :             DBUG_PRINT("fields",
    2548                 :                        ("value: 0x%02x  code: 0x%s  bits: %2u  bin: %s",
    2549                 :                         (uchar) *start_pos,
    2550                 :                         hexdigits(tree->code[(uchar) *start_pos]),
    2551                 :                         (uint) tree->code_len[(uchar) *start_pos],
    2552                 :                         bindigits(tree->code[(uchar) *start_pos],
    2553                 :                                   (uint) tree->code_len[(uchar) *start_pos])));
    2554           12600 :             write_bits(tree->code[(uchar) *start_pos],
    2555                 :                        (uint) tree->code_len[(uchar) *start_pos]);
    2556                 :           }
    2557                 :           break;
    2558                 :         case FIELD_SKIP_ENDSPACE:
    2559               0 :           for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
    2560               0 :           length= (ulong) (end_pos - pos);
    2561               0 :           if (count->pack_type & PACK_TYPE_SELECTED)
    2562                 :           {
    2563               0 :             if (length > count->min_space)
    2564                 :             {
    2565               0 :               DBUG_PRINT("fields",
    2566                 :                          ("FIELD_SKIP_ENDSPACE more than min_space, bits:  1"));
    2567               0 :               DBUG_PRINT("fields",
    2568                 :                          ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
    2569                 :                           length, field_length, count->length_bits));
    2570               0 :               write_bits(1,1);
    2571               0 :               write_bits(length,count->length_bits);
    2572                 :             }
    2573                 :             else
    2574                 :             {
    2575               0 :               DBUG_PRINT("fields",
    2576                 :                          ("FIELD_SKIP_ENDSPACE not more than min_space, "
    2577                 :                           "bits:  1"));
    2578               0 :               write_bits(0,1);
    2579               0 :               pos=end_pos;
    2580                 :             }
    2581                 :           }
    2582                 :           else
    2583                 :           {
    2584               0 :             DBUG_PRINT("fields",
    2585                 :                        ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
    2586                 :                         length, field_length, count->length_bits));
    2587               0 :             write_bits(length,count->length_bits);
    2588                 :           }
    2589                 :           /* Encode all significant bytes. */
    2590               0 :           DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
    2591                 :                                 (ulong) (pos - start_pos)));
    2592               0 :           for ( ; start_pos < pos ; start_pos++)
    2593                 :           {
    2594               0 :             DBUG_PRINT("fields",
    2595                 :                        ("value: 0x%02x  code: 0x%s  bits: %2u  bin: %s",
    2596                 :                         (uchar) *start_pos,
    2597                 :                         hexdigits(tree->code[(uchar) *start_pos]),
    2598                 :                         (uint) tree->code_len[(uchar) *start_pos],
    2599                 :                         bindigits(tree->code[(uchar) *start_pos],
    2600                 :                                   (uint) tree->code_len[(uchar) *start_pos])));
    2601               0 :             write_bits(tree->code[(uchar) *start_pos],
    2602                 :                        (uint) tree->code_len[(uchar) *start_pos]);
    2603                 :           }
    2604               0 :           start_pos=end_pos;
    2605               0 :           break;
    2606                 :         case FIELD_SKIP_PRESPACE:
    2607             525 :           for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
    2608             525 :           length= (ulong) (pos - start_pos);
    2609             525 :           if (count->pack_type & PACK_TYPE_SELECTED)
    2610                 :           {
    2611               0 :             if (length > count->min_space)
    2612                 :             {
    2613               0 :               DBUG_PRINT("fields",
    2614                 :                          ("FIELD_SKIP_PRESPACE more than min_space, bits:  1"));
    2615               0 :               DBUG_PRINT("fields",
    2616                 :                          ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
    2617                 :                           length, field_length, count->length_bits));
    2618               0 :               write_bits(1,1);
    2619               0 :               write_bits(length,count->length_bits);
    2620                 :             }
    2621                 :             else
    2622                 :             {
    2623               0 :               DBUG_PRINT("fields",
    2624                 :                          ("FIELD_SKIP_PRESPACE not more than min_space, "
    2625                 :                           "bits:  1"));
    2626               0 :               pos=start_pos;
    2627               0 :               write_bits(0,1);
    2628                 :             }
    2629                 :           }
    2630                 :           else
    2631                 :           {
    2632             525 :             DBUG_PRINT("fields",
    2633                 :                        ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
    2634                 :                         length, field_length, count->length_bits));
    2635             525 :             write_bits(length,count->length_bits);
    2636                 :           }
    2637                 :           /* Encode all significant bytes. */
    2638             525 :           DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
    2639                 :                                 (ulong) (end_pos - start_pos)));
    2640            1350 :           for (start_pos=pos ; start_pos < end_pos ; start_pos++)
    2641                 :           {
    2642             825 :             DBUG_PRINT("fields",
    2643                 :                        ("value: 0x%02x  code: 0x%s  bits: %2u  bin: %s",
    2644                 :                         (uchar) *start_pos,
    2645                 :                         hexdigits(tree->code[(uchar) *start_pos]),
    2646                 :                         (uint) tree->code_len[(uchar) *start_pos],
    2647                 :                         bindigits(tree->code[(uchar) *start_pos],
    2648                 :                                   (uint) tree->code_len[(uchar) *start_pos])));
    2649             825 :             write_bits(tree->code[(uchar) *start_pos],
    2650                 :                        (uint) tree->code_len[(uchar) *start_pos]);
    2651                 :           }
    2652                 :           break;
    2653                 :         case FIELD_CONSTANT:
    2654                 :         case FIELD_ZERO:
    2655                 :         case FIELD_CHECK:
    2656               0 :           DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
    2657               0 :           start_pos=end_pos;
    2658               0 :           break;
    2659                 :         case FIELD_INTERVALL:
    2660               0 :           global_count=count;
    2661               0 :           pos=(uchar*) tree_search(&count->int_tree, start_pos,
    2662                 :                                   count->int_tree.custom_arg);
    2663               0 :           intervall=(uint) (pos - count->tree_buff)/field_length;
    2664               0 :           DBUG_PRINT("fields", ("FIELD_INTERVALL"));
    2665               0 :           DBUG_PRINT("fields", ("index: %4u code: 0x%s  bits: %2u",
    2666                 :                                 intervall, hexdigits(tree->code[intervall]),
    2667                 :                                 (uint) tree->code_len[intervall]));
    2668               0 :           write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
    2669               0 :           start_pos=end_pos;
    2670               0 :           break;
    2671                 :         case FIELD_BLOB:
    2672                 :         {
    2673                 :           ulong blob_length= _ma_calc_blob_length(field_length-
    2674                 :                                                  portable_sizeof_char_ptr,
    2675               0 :                                                  start_pos);
    2676                 :           /* Empty blobs are encoded with a single 1 bit. */
    2677               0 :           if (!blob_length)
    2678                 :           {
    2679               0 :             DBUG_PRINT("fields", ("FIELD_BLOB empty, bits:  1"));
    2680               0 :             write_bits(1,1);
    2681                 :           }
    2682                 :           else
    2683                 :           {
    2684                 :             uchar *blob,*blob_end;
    2685               0 :             DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits:  1"));
    2686               0 :             write_bits(0,1);
    2687                 :             /* Write the blob length. */
    2688               0 :             DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
    2689                 :                                   blob_length, count->length_bits));
    2690               0 :             write_bits(blob_length,count->length_bits);
    2691               0 :             memcpy_fixed(&blob,end_pos-portable_sizeof_char_ptr,
    2692                 :                          sizeof(char*));
    2693               0 :             blob_end=blob+blob_length;
    2694                 :             /* Encode the blob bytes. */
    2695               0 :             for ( ; blob < blob_end ; blob++)
    2696                 :             {
    2697               0 :               DBUG_PRINT("fields",
    2698                 :                          ("value: 0x%02x  code: 0x%s  bits: %2u  bin: %s",
    2699                 :                           (uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
    2700                 :                           (uint) tree->code_len[(uchar) *blob],
    2701                 :                           bindigits(tree->code[(uchar) *start_pos],
    2702                 :                                     (uint)tree->code_len[(uchar) *start_pos])));
    2703               0 :               write_bits(tree->code[(uchar) *blob],
    2704                 :                          (uint) tree->code_len[(uchar) *blob]);
    2705                 :             }
    2706               0 :             tot_blob_length+=blob_length;
    2707                 :           }
    2708               0 :           start_pos= end_pos;
    2709               0 :           break;
    2710                 :         }
    2711                 :         case FIELD_VARCHAR:
    2712                 :         {
    2713               0 :           uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
    2714                 :           ulong col_length= (var_pack_length == 1 ?
    2715                 :                              (uint) *(uchar*) start_pos :
    2716               0 :                              uint2korr(start_pos));
    2717                 :           /* Empty varchar are encoded with a single 1 bit. */
    2718               0 :           if (!col_length)
    2719                 :           {
    2720               0 :             DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits:  1"));
    2721               0 :             write_bits(1,1);                    /* Empty varchar */
    2722                 :           }
    2723                 :           else
    2724                 :           {
    2725               0 :             uchar *end= start_pos + var_pack_length + col_length;
    2726               0 :             DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits:  1"));
    2727               0 :             write_bits(0,1);
    2728                 :             /* Write the varchar length. */
    2729               0 :             DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
    2730                 :                                   col_length, count->length_bits));
    2731               0 :             write_bits(col_length,count->length_bits);
    2732                 :             /* Encode the varchar bytes. */
    2733               0 :             for (start_pos+= var_pack_length ; start_pos < end ; start_pos++)
    2734                 :             {
    2735               0 :               DBUG_PRINT("fields",
    2736                 :                          ("value: 0x%02x  code: 0x%s  bits: %2u  bin: %s",
    2737                 :                           (uchar) *start_pos,
    2738                 :                           hexdigits(tree->code[(uchar) *start_pos]),
    2739                 :                           (uint) tree->code_len[(uchar) *start_pos],
    2740                 :                           bindigits(tree->code[(uchar) *start_pos],
    2741                 :                                     (uint)tree->code_len[(uchar) *start_pos])));
    2742               0 :               write_bits(tree->code[(uchar) *start_pos],
    2743                 :                          (uint) tree->code_len[(uchar) *start_pos]);
    2744                 :             }
    2745                 :           }
    2746               0 :           start_pos= end_pos;
    2747               0 :           break;
    2748                 :         }
    2749                 :         case FIELD_LAST:
    2750                 :         case FIELD_enum_val_count:
    2751               0 :           abort();                              /* Impossible */
    2752                 :         }
    2753            1050 :         start_pos+=count->max_zero_fill;
    2754            1050 :         DBUG_PRINT("fields", ("---"));
    2755                 :       }
    2756             525 :       flush_bits();
    2757             525 :       length=(ulong) (file_buffer.pos - record_pos) - max_pack_length;
    2758             525 :       pack_length= _ma_save_pack_length(pack_version, record_pos, length);
    2759             525 :       if (pack_blob_length)
    2760               0 :         pack_length+= _ma_save_pack_length(pack_version,
    2761                 :                                            record_pos + pack_length,
    2762                 :                                            tot_blob_length);
    2763             525 :       DBUG_PRINT("fields", ("record: %lu  length: %lu  blob-length: %lu  "
    2764                 :                             "length-bytes: %lu", (ulong) record_count, length,
    2765                 :                             tot_blob_length, pack_length));
    2766             525 :       DBUG_PRINT("fields", ("==="));
    2767                 : 
    2768                 :       /* Correct file buffer if the header was smaller */
    2769             525 :       if (pack_length != max_pack_length)
    2770                 :       {
    2771               0 :         bmove(record_pos+pack_length,record_pos+max_pack_length,length);
    2772               0 :         file_buffer.pos-= (max_pack_length-pack_length);
    2773                 :       }
    2774             525 :       if (length < (ulong) min_record_length)
    2775              55 :         min_record_length=(uint) length;
    2776             525 :       if (length > (ulong) max_record_length)
    2777              35 :         max_record_length=(uint) length;
    2778             525 :       record_count++;
    2779             525 :       if (write_loop && record_count % WRITE_COUNT == 0)
    2780                 :       {
    2781               0 :         VOID(printf("%lu\r", (ulong) record_count));
    2782               0 :         VOID(fflush(stdout));
    2783                 :       }
    2784                 :     }
    2785              60 :     else if (error != HA_ERR_RECORD_DELETED)
    2786             585 :       break;
    2787                 :   }
    2788              30 :   if (error == HA_ERR_END_OF_FILE)
    2789              30 :     error=0;
    2790                 :   else
    2791                 :   {
    2792               0 :     VOID(fprintf(stderr, "%s: Got error %d reading records\n",
    2793                 :                  my_progname, error));
    2794                 :   }
    2795              30 :   if (verbose >= 2)
    2796               0 :     VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf)));
    2797                 : 
    2798                 :   my_afree(record);
    2799              30 :   mrg->ref_length=max_pack_length;
    2800              30 :   mrg->min_pack_length=max_record_length ? min_record_length : 0;
    2801              30 :   mrg->max_pack_length=max_record_length;
    2802              30 :   DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0));
    2803                 : }
    2804                 : 
    2805                 : 
    2806                 : static char *make_new_name(char *new_name, char *old_name)
    2807              25 : {
    2808              25 :   return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4);
    2809                 : }
    2810                 : 
    2811                 : static char *make_old_name(char *new_name, char *old_name)
    2812               0 : {
    2813               0 :   return fn_format(new_name,old_name,"",OLD_EXT,2+4);
    2814                 : }
    2815                 : 
    2816                 :         /* rutines for bit writing buffer */
    2817                 : 
    2818                 : static void init_file_buffer(File file, pbool read_buffer)
    2819              30 : {
    2820              30 :   file_buffer.file=file;
    2821              30 :   file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE),
    2822                 :                                          MYF(MY_WME));
    2823              30 :   file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8;
    2824              30 :   file_buffer.pos_in_file=0;
    2825              30 :   error_on_write=0;
    2826              30 :   if (read_buffer)
    2827                 :   {
    2828                 : 
    2829               0 :     file_buffer.pos=file_buffer.end;
    2830               0 :     file_buffer.bits=0;
    2831                 :   }
    2832                 :   else
    2833                 :   {
    2834              30 :     file_buffer.pos=file_buffer.buffer;
    2835              30 :     file_buffer.bits=BITS_SAVED;
    2836                 :   }
    2837              30 :   file_buffer.bitbucket= 0;
    2838                 : }
    2839                 : 
    2840                 : 
    2841                 : static int flush_buffer(ulong neaded_length)
    2842             555 : {
    2843                 :   ulong length;
    2844                 : 
    2845                 :   /*
    2846                 :     file_buffer.end is 8 bytes lower than the real end of the buffer.
    2847                 :     This is done so that the end-of-buffer condition does not need to be
    2848                 :     checked for every uchar (see write_bits()). Consequently,
    2849                 :     file_buffer.pos can become greater than file_buffer.end. The
    2850                 :     algorithms in the other functions ensure that there will never be
    2851                 :     more than 8 bytes written to the buffer without an end-of-buffer
    2852                 :     check. So the buffer cannot be overrun. But we need to check for the
    2853                 :     near-to-buffer-end condition to avoid a negative result, which is
    2854                 :     casted to unsigned and thus becomes giant.
    2855                 :   */
    2856             555 :   if ((file_buffer.pos < file_buffer.end) &&
    2857                 :       ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length))
    2858             525 :     return 0;
    2859              30 :   length=(ulong) (file_buffer.pos-file_buffer.buffer);
    2860              30 :   file_buffer.pos=file_buffer.buffer;
    2861              30 :   file_buffer.pos_in_file+=length;
    2862              30 :   if (test_only)
    2863               0 :     return 0;
    2864              30 :   if (error_on_write|| my_write(file_buffer.file,
    2865                 :                                 (const uchar*) file_buffer.buffer,
    2866                 :                                 length,
    2867                 :                                 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
    2868                 :   {
    2869               0 :     error_on_write=1;
    2870               0 :     return 1;
    2871                 :   }
    2872                 : 
    2873              30 :   if (neaded_length != ~(ulong) 0 &&
    2874                 :       (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length)
    2875                 :   {
    2876                 :     uchar *tmp;
    2877               0 :     neaded_length+=256;                         /* some margin */
    2878               0 :     tmp= (uchar*) my_realloc(file_buffer.buffer, neaded_length,MYF(MY_WME));
    2879               0 :     if (!tmp)
    2880               0 :       return 1;
    2881               0 :     file_buffer.pos=    (tmp + (ulong) (file_buffer.pos - file_buffer.buffer));
    2882               0 :     file_buffer.buffer= tmp;
    2883               0 :     file_buffer.end=    (tmp+neaded_length-8);
    2884                 :   }
    2885              30 :   return 0;
    2886                 : }
    2887                 : 
    2888                 : 
    2889                 : static void end_file_buffer(void)
    2890              30 : {
    2891              30 :   my_free(file_buffer.buffer, MYF(0));
    2892                 : }
    2893                 : 
    2894                 :         /* output `bits` low bits of `value' */
    2895                 : 
    2896                 : static void write_bits(register ulonglong value, register uint bits)
    2897           15080 : {
    2898           15080 :   DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
    2899                 :               (bits == 8 * sizeof(value)));
    2900                 : 
    2901           15080 :   if ((file_buffer.bits-= (int) bits) >= 0)
    2902                 :   {
    2903           14705 :     file_buffer.bitbucket|= value << file_buffer.bits;
    2904                 :   }
    2905                 :   else
    2906                 :   {
    2907                 :     reg3 ulonglong bit_buffer;
    2908             375 :     bits= (uint) -file_buffer.bits;
    2909             375 :     bit_buffer= (file_buffer.bitbucket |
    2910                 :                  ((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
    2911                 : #if BITS_SAVED == 64
    2912             375 :     *file_buffer.pos++= (uchar) (bit_buffer >> 56);
    2913             375 :     *file_buffer.pos++= (uchar) (bit_buffer >> 48);
    2914             375 :     *file_buffer.pos++= (uchar) (bit_buffer >> 40);
    2915             375 :     *file_buffer.pos++= (uchar) (bit_buffer >> 32);
    2916                 : #endif
    2917             375 :     *file_buffer.pos++= (uchar) (bit_buffer >> 24);
    2918             375 :     *file_buffer.pos++= (uchar) (bit_buffer >> 16);
    2919             375 :     *file_buffer.pos++= (uchar) (bit_buffer >> 8);
    2920             375 :     *file_buffer.pos++= (uchar) (bit_buffer);
    2921                 : 
    2922             375 :     if (bits != 8 * sizeof(value))
    2923             375 :       value&= (((ulonglong) 1) << bits) - 1;
    2924             375 :     if (file_buffer.pos >= file_buffer.end)
    2925               0 :       VOID(flush_buffer(~ (ulong) 0));
    2926             375 :     file_buffer.bits=(int) (BITS_SAVED - bits);
    2927             375 :     file_buffer.bitbucket= value << (BITS_SAVED - bits);
    2928                 :   }
    2929                 :   return;
    2930                 : }
    2931                 : 
    2932                 :         /* Flush bits in bit_buffer to buffer */
    2933                 : 
    2934                 : static void flush_bits(void)
    2935             615 : {
    2936                 :   int bits;
    2937                 :   ulonglong bit_buffer;
    2938                 : 
    2939             615 :   bits= file_buffer.bits & ~7;
    2940             615 :   bit_buffer= file_buffer.bitbucket >> bits;
    2941             615 :   bits= BITS_SAVED - bits;
    2942            3615 :   while (bits > 0)
    2943                 :   {
    2944            2385 :     bits-= 8;
    2945            2385 :     *file_buffer.pos++= (uchar) (bit_buffer >> bits);
    2946                 :   }
    2947             615 :   if (file_buffer.pos >= file_buffer.end)
    2948               0 :     VOID(flush_buffer(~ (ulong) 0));
    2949             615 :   file_buffer.bits= BITS_SAVED;
    2950             615 :   file_buffer.bitbucket= 0;
    2951                 : }
    2952                 : 
    2953                 : 
    2954                 : /****************************************************************************
    2955                 : ** functions to handle the joined files
    2956                 : ****************************************************************************/
    2957                 : 
    2958                 : static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
    2959                 :                       my_off_t new_length,
    2960                 :                       ha_checksum crc)
    2961              25 : {
    2962              25 :   MARIA_SHARE *share=isam_file->s;
    2963              25 :   uint options=mi_uint2korr(share->state.header.options);
    2964                 :   uint key;
    2965              25 :   DBUG_ENTER("save_state");
    2966                 : 
    2967              25 :   options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA;
    2968              25 :   mi_int2store(share->state.header.options,options);
    2969                 :   /* Save the original file type of we have to undo the packing later */
    2970              25 :   share->state.header.org_data_file_type= share->state.header.data_file_type;
    2971              25 :   share->state.header.data_file_type= COMPRESSED_RECORD;
    2972                 : 
    2973              25 :   share->state.state.data_file_length=new_length;
    2974              25 :   share->state.state.del=0;
    2975              25 :   share->state.state.empty=0;
    2976              25 :   share->state.dellink= HA_OFFSET_ERROR;
    2977              25 :   share->state.split=(ha_rows) mrg->records;
    2978              25 :   share->state.version=(ulong) time((time_t*) 0);
    2979              25 :   if (share->base.born_transactional)
    2980              10 :     share->state.create_rename_lsn= share->state.is_of_horizon=
    2981                 :       share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
    2982              25 :   if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
    2983                 :   {
    2984                 :     /*
    2985                 :       Some indexes are disabled, cannot use current key_file_length value
    2986                 :       as an estimate of upper bound of index file size. Use packed data file
    2987                 :       size instead.
    2988                 :     */
    2989               0 :     share->state.state.key_file_length= new_length;
    2990                 :   }
    2991                 :   /*
    2992                 :     If there are no disabled indexes, keep key_file_length value from
    2993                 :     original file so "maria_chk -rq" can use this value (this is necessary
    2994                 :     because index size cannot be easily calculated for fulltext keys)
    2995                 :   */
    2996              25 :   maria_clear_all_keys_active(share->state.key_map);
    2997              50 :   for (key=0 ; key < share->base.keys ; key++)
    2998              25 :     share->state.key_root[key]= HA_OFFSET_ERROR;
    2999              25 :   share->state.key_del= HA_OFFSET_ERROR;
    3000              25 :   share->state.state.checksum= crc;     /* Save crc in file */
    3001              25 :   share->changed=1;                  /* Force write of header */
    3002              25 :   share->state.open_count=0;
    3003              25 :   share->global_changed=0;
    3004              25 :   VOID(my_chsize(share->kfile.file, share->base.keystart, 0, MYF(0)));
    3005              25 :   if (share->base.keys)
    3006              25 :     isamchk_neaded=1;
    3007              25 :   DBUG_RETURN(_ma_state_info_write_sub(share->kfile.file,
    3008                 :                                        &share->state,
    3009                 :                                        MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
    3010                 :                                        MA_STATE_INFO_WRITE_FULL_INFO));
    3011                 : }
    3012                 : 
    3013                 : 
    3014                 : static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
    3015                 :                           ha_checksum crc)
    3016               5 : {
    3017                 :   MARIA_STATE_INFO state;
    3018               5 :   MARIA_HA *isam_file=mrg->file[0];
    3019                 :   uint options;
    3020               5 :   DBUG_ENTER("save_state_mrg");
    3021                 : 
    3022               5 :   state= isam_file->s->state;
    3023               5 :   options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD |
    3024                 :             HA_OPTION_READ_ONLY_DATA);
    3025               5 :   mi_int2store(state.header.options,options);
    3026                 :   /* Save the original file type of we have to undo the packing later */
    3027               5 :   state.header.org_data_file_type= state.header.data_file_type;
    3028               5 :   state.header.data_file_type= COMPRESSED_RECORD;
    3029                 : 
    3030               5 :   state.state.data_file_length=new_length;
    3031               5 :   state.state.del=0;
    3032               5 :   state.state.empty=0;
    3033               5 :   state.state.records=state.split=(ha_rows) mrg->records;
    3034               5 :   state.create_rename_lsn= state.is_of_horizon= state.skip_redo_lsn=
    3035                 :     LSN_NEEDS_NEW_STATE_LSNS;
    3036                 : 
    3037                 :   /* See comment above in save_state about key_file_length handling. */
    3038               5 :   if (mrg->src_file_has_indexes_disabled)
    3039                 :   {
    3040               0 :     isam_file->s->state.state.key_file_length=
    3041                 :       max(isam_file->s->state.state.key_file_length, new_length);
    3042                 :   }
    3043               5 :   state.dellink= HA_OFFSET_ERROR;
    3044               5 :   state.version=(ulong) time((time_t*) 0);
    3045               5 :   maria_clear_all_keys_active(state.key_map);
    3046               5 :   state.state.checksum=crc;
    3047               5 :   if (isam_file->s->base.keys)
    3048               5 :     isamchk_neaded=1;
    3049               5 :   state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */
    3050               5 :   DBUG_RETURN (_ma_state_info_write_sub(file, &state,
    3051                 :                                         MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
    3052                 :                                         MA_STATE_INFO_WRITE_FULL_INFO));
    3053                 : }
    3054                 : 
    3055                 : 
    3056                 : /* reset for mrg_rrnd */
    3057                 : 
    3058                 : static void mrg_reset(PACK_MRG_INFO *mrg)
    3059              60 : {
    3060              60 :   if (mrg->current)
    3061                 :   {
    3062              30 :     maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0);
    3063              30 :     mrg->current=0;
    3064                 :   }
    3065                 : }
    3066                 : 
    3067                 : static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf)
    3068            1230 : {
    3069                 :   int error;
    3070                 :   MARIA_HA *isam_info;
    3071                 :   my_off_t filepos;
    3072                 : 
    3073            1230 :   if (!info->current)
    3074                 :   {
    3075              60 :     isam_info= *(info->current=info->file);
    3076              60 :     info->end=info->current+info->count;
    3077              60 :     maria_reset(isam_info);
    3078              60 :     maria_extra(isam_info, HA_EXTRA_CACHE, 0);
    3079              60 :     if ((error= maria_scan_init(isam_info)))
    3080               0 :       return(error);
    3081                 :   }
    3082                 :   else
    3083            1170 :     isam_info= *info->current;
    3084                 : 
    3085                 :   for (;;)
    3086                 :   {
    3087            1240 :     if (!(error= maria_scan(isam_info, buf)) ||
    3088                 :         error != HA_ERR_END_OF_FILE)
    3089            1170 :       return (error);
    3090              70 :     maria_scan_end(isam_info);
    3091              70 :     maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0);
    3092              70 :     if (info->current+1 == info->end)
    3093              60 :       return(HA_ERR_END_OF_FILE);
    3094              10 :     info->current++;
    3095              10 :     isam_info= *info->current;
    3096              10 :     filepos=isam_info->s->pack.header_length;
    3097              10 :     maria_reset(isam_info);
    3098              10 :     maria_extra(isam_info,HA_EXTRA_CACHE, 0);
    3099              10 :     if ((error= maria_scan_init(isam_info)))
    3100               0 :       return(error);
    3101                 :   }
    3102                 : }
    3103                 : 
    3104                 : 
    3105                 : static int mrg_close(PACK_MRG_INFO *mrg)
    3106              30 : {
    3107                 :   uint i;
    3108              30 :   int error=0;
    3109              30 :   DBUG_ENTER("mrg_close");
    3110                 : 
    3111              65 :   for (i=0 ; i < mrg->count ; i++)
    3112              35 :     error|=maria_close(mrg->file[i]);
    3113              30 :   if (mrg->free_file)
    3114               5 :     my_free(mrg->file, MYF(0));
    3115              30 :   DBUG_RETURN(error);
    3116                 : }
    3117                 : 
    3118                 : 
    3119                 : #if !defined(DBUG_OFF)
    3120                 : /*
    3121                 :   Fake the counts to get big Huffman codes.
    3122                 : 
    3123                 :   SYNOPSIS
    3124                 :     fakebigcodes()
    3125                 :     huff_counts                 A pointer to the counts array.
    3126                 :     end_count                   A pointer past the counts array.
    3127                 : 
    3128                 :   DESCRIPTION
    3129                 : 
    3130                 :     Huffman coding works by removing the two least frequent values from
    3131                 :     the list of values and add a new value with the sum of their
    3132                 :     incidences in a loop until only one value is left. Every time a
    3133                 :     value is reused for a new value, it gets one more bit for its
    3134                 :     encoding. Hence, the least frequent values get the longest codes.
    3135                 : 
    3136                 :     To get a maximum code length for a value, two of the values must
    3137                 :     have an incidence of 1. As their sum is 2, the next infrequent value
    3138                 :     must have at least an incidence of 2, then 4, 8, 16 and so on. This
    3139                 :     means that one needs 2**n bytes (values) for a code length of n
    3140                 :     bits. However, using more distinct values forces the use of longer
    3141                 :     codes, or reaching the code length with less total bytes (values).
    3142                 : 
    3143                 :     To get 64(32)-bit codes, I sort the counts by decreasing incidence.
    3144                 :     I assign counts of 1 to the two most frequent values, a count of 2
    3145                 :     for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
    3146                 :     the remaining values get 1. That way every possible uchar has an
    3147                 :     assigned code, though not all codes are used if not all uchar values
    3148                 :     are present in the column.
    3149                 : 
    3150                 :     This strategy would work with distinct column values too, but
    3151                 :     requires that at least 64(32) values are present. To make things
    3152                 :     easier here, I cancel all distinct column values and force byte
    3153                 :     compression for all columns.
    3154                 : 
    3155                 :   RETURN
    3156                 :     void
    3157                 : */
    3158                 : 
    3159                 : static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
    3160               0 : {
    3161                 :   HUFF_COUNTS   *count;
    3162                 :   my_off_t      *cur_count_p;
    3163                 :   my_off_t      *end_count_p;
    3164                 :   my_off_t      **cur_sort_p;
    3165                 :   my_off_t      **end_sort_p;
    3166                 :   my_off_t      *sort_counts[256];
    3167                 :   my_off_t      total;
    3168               0 :   DBUG_ENTER("fakebigcodes");
    3169                 : 
    3170               0 :   for (count= huff_counts; count < end_count; count++)
    3171                 :   {
    3172                 :     /*
    3173                 :       Remove distinct column values.
    3174                 :     */
    3175               0 :     if (huff_counts->tree_buff)
    3176                 :     {
    3177               0 :       my_free(huff_counts->tree_buff, MYF(0));
    3178               0 :       delete_tree(&huff_counts->int_tree);
    3179               0 :       huff_counts->tree_buff= NULL;
    3180               0 :       DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
    3181                 :     }
    3182                 : 
    3183                 :     /*
    3184                 :       Sort counts by decreasing incidence.
    3185                 :     */
    3186               0 :     cur_count_p= count->counts;
    3187               0 :     end_count_p= cur_count_p + 256;
    3188               0 :     cur_sort_p= sort_counts;
    3189               0 :     while (cur_count_p < end_count_p)
    3190               0 :       *(cur_sort_p++)= cur_count_p++;
    3191               0 :     (void) my_qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
    3192                 : 
    3193                 :     /*
    3194                 :       Assign faked counts.
    3195                 :     */
    3196               0 :     cur_sort_p= sort_counts;
    3197                 : #if SIZEOF_LONG_LONG > 4
    3198               0 :     end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
    3199                 : #else
    3200                 :     end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
    3201                 : #endif
    3202                 :     /* Most frequent value gets a faked count of 1. */
    3203               0 :     **(cur_sort_p++)= 1;
    3204               0 :     total= 1;
    3205               0 :     while (cur_sort_p < end_sort_p)
    3206                 :     {
    3207               0 :       **(cur_sort_p++)= total;
    3208               0 :       total<<= 1;
    3209                 :     }
    3210                 :     /* Set the last value. */
    3211               0 :     **(cur_sort_p++)= --total;
    3212                 :     /*
    3213                 :       Set the remaining counts.
    3214                 :     */
    3215               0 :     end_sort_p= sort_counts + 256;
    3216               0 :     while (cur_sort_p < end_sort_p)
    3217               0 :       **(cur_sort_p++)= 1;
    3218                 :   }
    3219               0 :   DBUG_VOID_RETURN;
    3220                 : }
    3221                 : 
    3222                 : 
    3223                 : /*
    3224                 :   Compare two counts for reverse sorting.
    3225                 : 
    3226                 :   SYNOPSIS
    3227                 :     fakecmp()
    3228                 :     count1              One count.
    3229                 :     count2              Another count.
    3230                 : 
    3231                 :   RETURN
    3232                 :     1                   count1  < count2
    3233                 :     0                   count1 == count2
    3234                 :     -1                  count1 >  count2
    3235                 : */
    3236                 : 
    3237                 : static int fakecmp(my_off_t **count1, my_off_t **count2)
    3238               0 : {
    3239               0 :   return ((**count1 < **count2) ? 1 :
    3240                 :           (**count1 > **count2) ? -1 : 0);
    3241                 : }
    3242                 : #endif

Generated by: LTP GCOV extension version 1.4