Main Page   Compound List   File List   Compound Members   File Members  

rr_libs/sih.c

Go to the documentation of this file.
00001 /* sih:  string-to-int32 hashing */
00002 /*=====================================================================
00003                 =======   COPYRIGHT NOTICE   =======
00004 Copyright (C) 1994, Carnegie Mellon University and Ronald Rosenfeld.
00005 All rights reserved.
00006 
00007 This software is made available for research purposes only.  It may be
00008 redistributed freely for this purpose, in full or in part, provided
00009 that this entire copyright notice is included on any copies of this
00010 software and applications and derivations thereof.
00011 
00012 This software is provided on an "as is" basis, without warranty of any
00013 kind, either expressed or implied, as to any matter including, but not
00014 limited to warranty of fitness of purpose, or merchantability, or
00015 results obtained from use of this software.
00016 ======================================================================*/
00017 /* The string: any non-null character string.
00018    The int32 value: any int32 value.
00019    Only addition, update and lookup are currently allowed.
00020    If the hash table was not meant for update, a warning will be printed during update.
00021    The hash table grows automatically to preserve the 'max_occupancy' condition.
00022 */
00023 /* 94/08/18, RR: sih_lookup(): if not found, also put 0 in 'intval'  */
00024 
00025 /* Edited by Philip Clarkson, March 1997 to prevent compilation warnings */
00026 
00027 #include <stdio.h>
00028 #include <sys/types.h>
00029 #include "general.h"
00030 #include "sih.h"
00031 
00032 /* need to include stdlib to prevent warnings at compilation time,
00033    Philip Clarkson, March 1997 */
00034 
00035 #include <stdlib.h>
00036 
00041 #define HASH_VERSION 940728 /* change this if you change the hash function below */
00042 #define SIH_KEY(STRING,KEY) {\
00043     int i = 0; \
00044     char *pstr = STRING; \
00045     KEY = 0; \
00046     do {KEY += *pstr++ << (0xF & --i);} while (*pstr);  /* Fil's hash function */ \
00047 }
00048 
00050 int nearest_prime_up(int num)
00051 {
00052   int num_has_divisor=1;
00053   if (num/2*2 == num) num++; /* start w/ an odd number */
00054   for (; num_has_divisor; num+=2) {
00055      int div;
00056      num_has_divisor=0;
00057      for (div=3; div<=num/3; div++) {
00058         if ((num/div) * div == num) {
00059            num_has_divisor=1;
00060            break;
00061         }
00062      }
00063   }
00064   num -= 2;
00065   return(num);
00066 }
00067 
00069 sih_t *sih_create(int initial_size, double max_occupancy, double growth_ratio, int warn_on_update)
00070 {
00071   static char rname[]="sih_create";
00072   sih_t *ht = (sih_t *) rr_malloc(sizeof(sih_t));
00073 
00074   initial_size = nearest_prime_up(MAX(initial_size,11));
00075   if (max_occupancy<0.01 || max_occupancy>0.99) 
00076     quit(-1,"%s ERROR: max_occupancy (%.3f) must be in the range 0.01-0.99\n",rname,max_occupancy);
00077   if (growth_ratio<1.10 || growth_ratio>100) 
00078     quit(-1,"%s ERROR: growth_ratio (%.3f) must be in the range 1.1-100\n",rname,growth_ratio);
00079 
00080   ht->max_occupancy = max_occupancy;
00081   ht->growth_ratio = growth_ratio;
00082   ht->warn_on_update = warn_on_update;
00083   ht->nslots = initial_size;
00084   ht->nentries = 0;
00085   ht->slots = (sih_slot_t *) rr_calloc(ht->nslots,sizeof(sih_slot_t));
00086   return(ht);
00087 }
00088 
00089 
00091 void sih_add(sih_t *ht, char *string, int32 intval)
00092 {
00093   static char *rname = "sih_add";
00094   unsigned int  key;
00095 
00096   if (*string==0) quit(-1,"%s ERROR: cannot hash the null string\n",rname);
00097 
00098   /* if the "occupancy rate" exceeded its limit, expand the hash table */
00099   if (((ht->nentries+1) / (double)ht->nslots) > ht->max_occupancy) {
00100      sih_slot_t *old_slots = ht->slots, 
00101                 *end_old_slots = (old_slots + ht->nslots),
00102                 *pslot;
00103 
00104      /* allocate the new hash table */
00105      ht->nslots = (int)(ht->nslots * ht->growth_ratio) + 3;
00106      if ((ht->nentries / (double)ht->nslots) > ht->max_occupancy)
00107         ht->nslots = ht->nslots * (int)(ht->max_occupancy+1) + 3;
00108      ht->nslots = nearest_prime_up(ht->nslots);
00109      ht->nentries = 0;
00110      ht->slots = (sih_slot_t *) rr_calloc(ht->nslots,sizeof(sih_slot_t));
00111 
00112      /* copy all entries from old hash table to new one */
00113      for (pslot = old_slots; pslot<end_old_slots; pslot++) {
00114         if (pslot->string) sih_add(ht, pslot->string, pslot->intval);
00115      }
00116      free (old_slots);
00117   }
00118 
00119   SIH_KEY(string,key);
00120 
00121   for (; ; key++) {
00122      char *pstr;
00123      key %= ht->nslots;
00124      pstr = ht->slots[key].string;
00125      if (!pstr) {
00126         ht->slots[key].string = string;
00127         ht->slots[key].intval = intval;
00128         ht->nentries++;
00129         break;
00130      }
00131      else if (strcmp(pstr,string) == 0) {  /* updating an existing entry*/
00132         if (ht->warn_on_update) {
00133            fprintf(stderr,"%s WARNING: repeated hashing of '%s'",rname,string);
00134            if (ht->slots[key].intval != intval) 
00135              fprintf(stderr,", older value will be overridden.\n");
00136            else fprintf(stderr,".\n");
00137         }
00138         ht->slots[key].intval = intval;
00139         break;
00140      }
00141   }
00142 }
00143 
00145 char sih_lookup(sih_t *ht, char *string, int32 *p_intval)
00146 {
00147   static char *rname = "sih_lookup";
00148   unsigned int  key;
00149 
00150   if (*string == 0) quit(-1,"%s ERROR: cannot hash the null string\n",rname);
00151 
00152   SIH_KEY(string,key);
00153 
00154   for (; ; key++) {
00155      char *pstr;
00156 
00157      key %= ht->nslots;
00158      pstr = ht->slots[key].string;
00159      if (!pstr) {
00160         *p_intval = 0;
00161         return(0);
00162      }
00163      else if (strcmp(pstr, string) == 0) {
00164         *p_intval = (int) ht->slots[key].intval;
00165         return (1);
00166      }
00167   }
00168 }
00169 
00170 
00171 
00172 /* ======================================================================== */
00173 
00174 
00181 void *sih_val_write_to_file(sih_t *ht, FILE *fp, char *filename, int verbosity)
00182 {
00183 
00184   static char rname[] = "sih_val_wrt_to_file";
00185   int nstrings=0, total_string_space=0, islot, version=HASH_VERSION;
00186   char null_char = '\0';
00187 
00188   /* write out the header */
00189   rr_fwrite(&version,sizeof(int),1,fp,"version");
00190   rr_fwrite(&ht->max_occupancy,sizeof(double),1,fp,"ht->max_occupancy");
00191   rr_fwrite(&ht->growth_ratio,sizeof(double),1,fp,"ht->growth_ratio");
00192   rr_fwrite(&ht->warn_on_update,sizeof(int),1,fp,"ht->warn_on_update");
00193   rr_fwrite(&ht->nslots,sizeof(int),1,fp,"ht->nslots");
00194   rr_fwrite(&ht->nentries,sizeof(int),1,fp,"ht->nentries");
00195 
00196   /* Write out the array of 'intval's.  */
00197   /* Also, compute and write out the total space taken by the strings */
00198   for (islot=0; islot<ht->nslots; islot++) {
00199     rr_fwrite(&(ht->slots[islot].intval),sizeof(int32),1,fp,"ht->slots[islot].intval");
00200     if (ht->slots[islot].string) {
00201        total_string_space += strlen(ht->slots[islot].string)+1;
00202        nstrings++;
00203     }
00204     else total_string_space++;
00205   }
00206   if (nstrings!=ht->nentries)
00207      quit(-1,"%s: nentries=%d, but there are actually %d non-empty entries\n",
00208               rname, ht->nentries, nstrings);
00209 
00210   /* Write out the strings, with the trailing null, preceded by their length */
00211   rr_fwrite(&total_string_space,sizeof(int),1,fp,"total_string_space");
00212   for (islot=0; islot<ht->nslots; islot++) {
00213      if (ht->slots[islot].string)
00214        rr_fwrite(ht->slots[islot].string,sizeof(char),strlen(ht->slots[islot].string)+1,fp,"str");
00215      else 
00216        rr_fwrite(&null_char,sizeof(char),1,fp,"null");
00217   }
00218   if (verbosity) fprintf(stderr,
00219      "%s: a hash table of %d entries (%d non-empty) was written to '%s'\n",
00220       rname, ht->nslots, ht->nentries, filename);
00221 
00222   return(0); /* Not relevant, but stops compilation warnings. */
00223 
00224 }
00225 
00226 
00227 void *sih_val_read_from_file(sih_t *ht, FILE *fp, char *filename, int verbosity)
00228 {
00229   static char rname[] = "sih_val_rd_fm_file";
00230   int total_string_space=0, islot, version;
00231   char *string_block, *pstring,  *past_end_of_block;
00232 
00233   /* read in the header and allocate a zeroed table */
00234   rr_fread(&version,sizeof(int),1,fp,"version",0);
00235   if (version!=HASH_VERSION)
00236     quit(-1,"%s ERROR: version of '%s' is %d, current version is %d\n",
00237              rname, filename, version, HASH_VERSION);
00238   rr_fread(&ht->max_occupancy,sizeof(double),1,fp,"ht->max_occupancy",0);
00239   rr_fread(&ht->growth_ratio,sizeof(double),1,fp,"ht->growth_ratio",0);
00240   rr_fread(&ht->warn_on_update,sizeof(int),1,fp,"ht->warn_on_update",0);
00241   rr_fread(&ht->nslots,sizeof(int),1,fp,"ht->nslots",0);
00242   rr_fread(&ht->nentries,sizeof(int),1,fp,"ht->nentries",0);
00243   ht->slots = (sih_slot_t *) rr_calloc(ht->nslots,sizeof(sih_slot_t));
00244 
00245   /* Read in the array of 'intval's  */
00246   for (islot=0; islot<ht->nslots; islot++) {
00247      rr_fread(&(ht->slots[islot].intval),sizeof(int32),1,fp,"intv",0);
00248   }
00249 
00250   /* Read in the block of strings */
00251   rr_fread(&total_string_space,sizeof(int),1,fp,"total_string_space",0);
00252   string_block = (char *) rr_malloc(total_string_space*sizeof(char));
00253   rr_fread(string_block,sizeof(char),total_string_space,fp,"string_block",0);
00254 
00255   /* make 'string's of non-empty entries point to their corresponding string */
00256   pstring = string_block;
00257   past_end_of_block = string_block + total_string_space;
00258   for (islot=0; islot<ht->nslots; islot++) {
00259      if (*pstring == NULL) {
00260         /* an empty entry: */
00261         ht->slots[islot].string = NULL;
00262         pstring++;
00263      }
00264      else {
00265         /* a real entry: find the trailing null */
00266         ht->slots[islot].string = pstring;
00267         while (*pstring && (pstring < past_end_of_block)) pstring++;
00268         if (pstring >= past_end_of_block) 
00269           quit(-1,"%s ERROR: in '%s', string block ended prematurely\n",
00270                     rname, filename);
00271         pstring++; /* point to beginning of next string */
00272      }
00273   }
00274   if (pstring!=past_end_of_block) 
00275     quit(-1,"%s ERROR: some strings remained unaccounted for in %s\n",
00276              rname, filename);
00277   if (verbosity) fprintf(stderr,
00278      "%s: a hash table of %d entries (%d non-empty) was read from '%s'\n",
00279       rname, ht->nslots, ht->nentries, filename);
00280 
00281   return(0); /* Not relevant, but stops compilation warnings. */
00282 }
00283 
00284 
00285 
00286 

Generated on Tue Dec 21 13:54:46 2004 by doxygen1.2.18