00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <stdio.h>
00028 #include <sys/types.h>
00029 #include "general.h"
00030 #include "sih.h"
00031
00032
00033
00034
00035 #include <stdlib.h>
00036
00041 #define HASH_VERSION 940728
00042 #define SIH_KEY(STRING,KEY) {\
00043 int i = 0; \
00044 char *pstr = STRING; \
00045 KEY = 0; \
00046 do {KEY += *pstr++ << (0xF & --i);} while (*pstr); \
00047 }
00048
00050 int nearest_prime_up(int num)
00051 {
00052 int num_has_divisor=1;
00053 if (num/2*2 == num) num++;
00054 for (; num_has_divisor; num+=2) {
00055 int div;
00056 num_has_divisor=0;
00057 for (div=3; div<=num/3; div++) {
00058 if ((num/div) * div == num) {
00059 num_has_divisor=1;
00060 break;
00061 }
00062 }
00063 }
00064 num -= 2;
00065 return(num);
00066 }
00067
00069 sih_t *sih_create(int initial_size, double max_occupancy, double growth_ratio, int warn_on_update)
00070 {
00071 static char rname[]="sih_create";
00072 sih_t *ht = (sih_t *) rr_malloc(sizeof(sih_t));
00073
00074 initial_size = nearest_prime_up(MAX(initial_size,11));
00075 if (max_occupancy<0.01 || max_occupancy>0.99)
00076 quit(-1,"%s ERROR: max_occupancy (%.3f) must be in the range 0.01-0.99\n",rname,max_occupancy);
00077 if (growth_ratio<1.10 || growth_ratio>100)
00078 quit(-1,"%s ERROR: growth_ratio (%.3f) must be in the range 1.1-100\n",rname,growth_ratio);
00079
00080 ht->max_occupancy = max_occupancy;
00081 ht->growth_ratio = growth_ratio;
00082 ht->warn_on_update = warn_on_update;
00083 ht->nslots = initial_size;
00084 ht->nentries = 0;
00085 ht->slots = (sih_slot_t *) rr_calloc(ht->nslots,sizeof(sih_slot_t));
00086 return(ht);
00087 }
00088
00089
00091 void sih_add(sih_t *ht, char *string, int32 intval)
00092 {
00093 static char *rname = "sih_add";
00094 unsigned int key;
00095
00096 if (*string==0) quit(-1,"%s ERROR: cannot hash the null string\n",rname);
00097
00098
00099 if (((ht->nentries+1) / (double)ht->nslots) > ht->max_occupancy) {
00100 sih_slot_t *old_slots = ht->slots,
00101 *end_old_slots = (old_slots + ht->nslots),
00102 *pslot;
00103
00104
00105 ht->nslots = (int)(ht->nslots * ht->growth_ratio) + 3;
00106 if ((ht->nentries / (double)ht->nslots) > ht->max_occupancy)
00107 ht->nslots = ht->nslots * (int)(ht->max_occupancy+1) + 3;
00108 ht->nslots = nearest_prime_up(ht->nslots);
00109 ht->nentries = 0;
00110 ht->slots = (sih_slot_t *) rr_calloc(ht->nslots,sizeof(sih_slot_t));
00111
00112
00113 for (pslot = old_slots; pslot<end_old_slots; pslot++) {
00114 if (pslot->string) sih_add(ht, pslot->string, pslot->intval);
00115 }
00116 free (old_slots);
00117 }
00118
00119 SIH_KEY(string,key);
00120
00121 for (; ; key++) {
00122 char *pstr;
00123 key %= ht->nslots;
00124 pstr = ht->slots[key].string;
00125 if (!pstr) {
00126 ht->slots[key].string = string;
00127 ht->slots[key].intval = intval;
00128 ht->nentries++;
00129 break;
00130 }
00131 else if (strcmp(pstr,string) == 0) {
00132 if (ht->warn_on_update) {
00133 fprintf(stderr,"%s WARNING: repeated hashing of '%s'",rname,string);
00134 if (ht->slots[key].intval != intval)
00135 fprintf(stderr,", older value will be overridden.\n");
00136 else fprintf(stderr,".\n");
00137 }
00138 ht->slots[key].intval = intval;
00139 break;
00140 }
00141 }
00142 }
00143
00145 char sih_lookup(sih_t *ht, char *string, int32 *p_intval)
00146 {
00147 static char *rname = "sih_lookup";
00148 unsigned int key;
00149
00150 if (*string == 0) quit(-1,"%s ERROR: cannot hash the null string\n",rname);
00151
00152 SIH_KEY(string,key);
00153
00154 for (; ; key++) {
00155 char *pstr;
00156
00157 key %= ht->nslots;
00158 pstr = ht->slots[key].string;
00159 if (!pstr) {
00160 *p_intval = 0;
00161 return(0);
00162 }
00163 else if (strcmp(pstr, string) == 0) {
00164 *p_intval = (int) ht->slots[key].intval;
00165 return (1);
00166 }
00167 }
00168 }
00169
00170
00171
00172
00173
00174
00181 void *sih_val_write_to_file(sih_t *ht, FILE *fp, char *filename, int verbosity)
00182 {
00183
00184 static char rname[] = "sih_val_wrt_to_file";
00185 int nstrings=0, total_string_space=0, islot, version=HASH_VERSION;
00186 char null_char = '\0';
00187
00188
00189 rr_fwrite(&version,sizeof(int),1,fp,"version");
00190 rr_fwrite(&ht->max_occupancy,sizeof(double),1,fp,"ht->max_occupancy");
00191 rr_fwrite(&ht->growth_ratio,sizeof(double),1,fp,"ht->growth_ratio");
00192 rr_fwrite(&ht->warn_on_update,sizeof(int),1,fp,"ht->warn_on_update");
00193 rr_fwrite(&ht->nslots,sizeof(int),1,fp,"ht->nslots");
00194 rr_fwrite(&ht->nentries,sizeof(int),1,fp,"ht->nentries");
00195
00196
00197
00198 for (islot=0; islot<ht->nslots; islot++) {
00199 rr_fwrite(&(ht->slots[islot].intval),sizeof(int32),1,fp,"ht->slots[islot].intval");
00200 if (ht->slots[islot].string) {
00201 total_string_space += strlen(ht->slots[islot].string)+1;
00202 nstrings++;
00203 }
00204 else total_string_space++;
00205 }
00206 if (nstrings!=ht->nentries)
00207 quit(-1,"%s: nentries=%d, but there are actually %d non-empty entries\n",
00208 rname, ht->nentries, nstrings);
00209
00210
00211 rr_fwrite(&total_string_space,sizeof(int),1,fp,"total_string_space");
00212 for (islot=0; islot<ht->nslots; islot++) {
00213 if (ht->slots[islot].string)
00214 rr_fwrite(ht->slots[islot].string,sizeof(char),strlen(ht->slots[islot].string)+1,fp,"str");
00215 else
00216 rr_fwrite(&null_char,sizeof(char),1,fp,"null");
00217 }
00218 if (verbosity) fprintf(stderr,
00219 "%s: a hash table of %d entries (%d non-empty) was written to '%s'\n",
00220 rname, ht->nslots, ht->nentries, filename);
00221
00222 return(0);
00223
00224 }
00225
00226
00227 void *sih_val_read_from_file(sih_t *ht, FILE *fp, char *filename, int verbosity)
00228 {
00229 static char rname[] = "sih_val_rd_fm_file";
00230 int total_string_space=0, islot, version;
00231 char *string_block, *pstring, *past_end_of_block;
00232
00233
00234 rr_fread(&version,sizeof(int),1,fp,"version",0);
00235 if (version!=HASH_VERSION)
00236 quit(-1,"%s ERROR: version of '%s' is %d, current version is %d\n",
00237 rname, filename, version, HASH_VERSION);
00238 rr_fread(&ht->max_occupancy,sizeof(double),1,fp,"ht->max_occupancy",0);
00239 rr_fread(&ht->growth_ratio,sizeof(double),1,fp,"ht->growth_ratio",0);
00240 rr_fread(&ht->warn_on_update,sizeof(int),1,fp,"ht->warn_on_update",0);
00241 rr_fread(&ht->nslots,sizeof(int),1,fp,"ht->nslots",0);
00242 rr_fread(&ht->nentries,sizeof(int),1,fp,"ht->nentries",0);
00243 ht->slots = (sih_slot_t *) rr_calloc(ht->nslots,sizeof(sih_slot_t));
00244
00245
00246 for (islot=0; islot<ht->nslots; islot++) {
00247 rr_fread(&(ht->slots[islot].intval),sizeof(int32),1,fp,"intv",0);
00248 }
00249
00250
00251 rr_fread(&total_string_space,sizeof(int),1,fp,"total_string_space",0);
00252 string_block = (char *) rr_malloc(total_string_space*sizeof(char));
00253 rr_fread(string_block,sizeof(char),total_string_space,fp,"string_block",0);
00254
00255
00256 pstring = string_block;
00257 past_end_of_block = string_block + total_string_space;
00258 for (islot=0; islot<ht->nslots; islot++) {
00259 if (*pstring == NULL) {
00260
00261 ht->slots[islot].string = NULL;
00262 pstring++;
00263 }
00264 else {
00265
00266 ht->slots[islot].string = pstring;
00267 while (*pstring && (pstring < past_end_of_block)) pstring++;
00268 if (pstring >= past_end_of_block)
00269 quit(-1,"%s ERROR: in '%s', string block ended prematurely\n",
00270 rname, filename);
00271 pstring++;
00272 }
00273 }
00274 if (pstring!=past_end_of_block)
00275 quit(-1,"%s ERROR: some strings remained unaccounted for in %s\n",
00276 rname, filename);
00277 if (verbosity) fprintf(stderr,
00278 "%s: a hash table of %d entries (%d non-empty) was read from '%s'\n",
00279 rname, ht->nslots, ht->nentries, filename);
00280
00281 return(0);
00282 }
00283
00284
00285
00286