0
|
1 /* An expandable hash tables datatype.
|
|
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
|
|
3 Free Software Foundation, Inc.
|
|
4 Contributed by Vladimir Makarov (vmakarov@cygnus.com).
|
|
5
|
|
6 This file is part of the libiberty library.
|
|
7 Libiberty is free software; you can redistribute it and/or
|
|
8 modify it under the terms of the GNU Library General Public
|
|
9 License as published by the Free Software Foundation; either
|
|
10 version 2 of the License, or (at your option) any later version.
|
|
11
|
|
12 Libiberty is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15 Library General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU Library General Public
|
|
18 License along with libiberty; see the file COPYING.LIB. If
|
|
19 not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
|
|
20 Boston, MA 02110-1301, USA. */
|
|
21
|
|
22 /* This package implements basic hash table functionality. It is possible
|
|
23 to search for an entry, create an entry and destroy an entry.
|
|
24
|
|
25 Elements in the table are generic pointers.
|
|
26
|
|
27 The size of the table is not fixed; if the occupancy of the table
|
|
28 grows too high the hash table will be expanded.
|
|
29
|
|
30 The abstract data implementation is based on generalized Algorithm D
|
|
31 from Knuth's book "The art of computer programming". Hash table is
|
|
32 expanded by creation of new hash table and transferring elements from
|
|
33 the old table to the new table. */
|
|
34
|
|
35 #ifdef HAVE_CONFIG_H
|
|
36 #include "config.h"
|
|
37 #endif
|
|
38
|
|
39 #include <sys/types.h>
|
|
40
|
|
41 #ifdef HAVE_STDLIB_H
|
|
42 #include <stdlib.h>
|
|
43 #endif
|
|
44 #ifdef HAVE_STRING_H
|
|
45 #include <string.h>
|
|
46 #endif
|
|
47 #ifdef HAVE_MALLOC_H
|
|
48 #include <malloc.h>
|
|
49 #endif
|
|
50 #ifdef HAVE_LIMITS_H
|
|
51 #include <limits.h>
|
|
52 #endif
|
|
53 #ifdef HAVE_STDINT_H
|
|
54 #include <stdint.h>
|
|
55 #endif
|
|
56
|
|
57 #include <stdio.h>
|
|
58
|
|
59 #include "libiberty.h"
|
|
60 #include "ansidecl.h"
|
|
61 #include "hashtab.h"
|
|
62
|
|
63 #ifndef CHAR_BIT
|
|
64 #define CHAR_BIT 8
|
|
65 #endif
|
|
66
|
|
67 static unsigned int higher_prime_index (unsigned long);
|
|
68 static hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int);
|
|
69 static hashval_t htab_mod (hashval_t, htab_t);
|
|
70 static hashval_t htab_mod_m2 (hashval_t, htab_t);
|
|
71 static hashval_t hash_pointer (const void *);
|
|
72 static int eq_pointer (const void *, const void *);
|
|
73 static int htab_expand (htab_t);
|
|
74 static PTR *find_empty_slot_for_expand (htab_t, hashval_t);
|
|
75
|
|
76 /* At some point, we could make these be NULL, and modify the
|
|
77 hash-table routines to handle NULL specially; that would avoid
|
|
78 function-call overhead for the common case of hashing pointers. */
|
|
79 htab_hash htab_hash_pointer = hash_pointer;
|
|
80 htab_eq htab_eq_pointer = eq_pointer;
|
|
81
|
|
82 /* Table of primes and multiplicative inverses.
|
|
83
|
|
84 Note that these are not minimally reduced inverses. Unlike when generating
|
|
85 code to divide by a constant, we want to be able to use the same algorithm
|
|
86 all the time. All of these inverses (are implied to) have bit 32 set.
|
|
87
|
|
88 For the record, here's the function that computed the table; it's a
|
|
89 vastly simplified version of the function of the same name from gcc. */
|
|
90
|
|
91 #if 0
|
|
92 unsigned int
|
|
93 ceil_log2 (unsigned int x)
|
|
94 {
|
|
95 int i;
|
|
96 for (i = 31; i >= 0 ; --i)
|
|
97 if (x > (1u << i))
|
|
98 return i+1;
|
|
99 abort ();
|
|
100 }
|
|
101
|
|
102 unsigned int
|
|
103 choose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp)
|
|
104 {
|
|
105 unsigned long long mhigh;
|
|
106 double nx;
|
|
107 int lgup, post_shift;
|
|
108 int pow, pow2;
|
|
109 int n = 32, precision = 32;
|
|
110
|
|
111 lgup = ceil_log2 (d);
|
|
112 pow = n + lgup;
|
|
113 pow2 = n + lgup - precision;
|
|
114
|
|
115 nx = ldexp (1.0, pow) + ldexp (1.0, pow2);
|
|
116 mhigh = nx / d;
|
|
117
|
|
118 *shiftp = lgup - 1;
|
|
119 *mlp = mhigh;
|
|
120 return mhigh >> 32;
|
|
121 }
|
|
122 #endif
|
|
123
|
|
124 struct prime_ent
|
|
125 {
|
|
126 hashval_t prime;
|
|
127 hashval_t inv;
|
|
128 hashval_t inv_m2; /* inverse of prime-2 */
|
|
129 hashval_t shift;
|
|
130 };
|
|
131
|
|
132 static struct prime_ent const prime_tab[] = {
|
|
133 { 7, 0x24924925, 0x9999999b, 2 },
|
|
134 { 13, 0x3b13b13c, 0x745d1747, 3 },
|
|
135 { 31, 0x08421085, 0x1a7b9612, 4 },
|
|
136 { 61, 0x0c9714fc, 0x15b1e5f8, 5 },
|
|
137 { 127, 0x02040811, 0x0624dd30, 6 },
|
|
138 { 251, 0x05197f7e, 0x073260a5, 7 },
|
|
139 { 509, 0x01824366, 0x02864fc8, 8 },
|
|
140 { 1021, 0x00c0906d, 0x014191f7, 9 },
|
|
141 { 2039, 0x0121456f, 0x0161e69e, 10 },
|
|
142 { 4093, 0x00300902, 0x00501908, 11 },
|
|
143 { 8191, 0x00080041, 0x00180241, 12 },
|
|
144 { 16381, 0x000c0091, 0x00140191, 13 },
|
|
145 { 32749, 0x002605a5, 0x002a06e6, 14 },
|
|
146 { 65521, 0x000f00e2, 0x00110122, 15 },
|
|
147 { 131071, 0x00008001, 0x00018003, 16 },
|
|
148 { 262139, 0x00014002, 0x0001c004, 17 },
|
|
149 { 524287, 0x00002001, 0x00006001, 18 },
|
|
150 { 1048573, 0x00003001, 0x00005001, 19 },
|
|
151 { 2097143, 0x00004801, 0x00005801, 20 },
|
|
152 { 4194301, 0x00000c01, 0x00001401, 21 },
|
|
153 { 8388593, 0x00001e01, 0x00002201, 22 },
|
|
154 { 16777213, 0x00000301, 0x00000501, 23 },
|
|
155 { 33554393, 0x00001381, 0x00001481, 24 },
|
|
156 { 67108859, 0x00000141, 0x000001c1, 25 },
|
|
157 { 134217689, 0x000004e1, 0x00000521, 26 },
|
|
158 { 268435399, 0x00000391, 0x000003b1, 27 },
|
|
159 { 536870909, 0x00000019, 0x00000029, 28 },
|
|
160 { 1073741789, 0x0000008d, 0x00000095, 29 },
|
|
161 { 2147483647, 0x00000003, 0x00000007, 30 },
|
|
162 /* Avoid "decimal constant so large it is unsigned" for 4294967291. */
|
|
163 { 0xfffffffb, 0x00000006, 0x00000008, 31 }
|
|
164 };
|
|
165
|
|
166 /* The following function returns an index into the above table of the
|
|
167 nearest prime number which is greater than N, and near a power of two. */
|
|
168
|
|
169 static unsigned int
|
|
170 higher_prime_index (unsigned long n)
|
|
171 {
|
|
172 unsigned int low = 0;
|
|
173 unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
|
|
174
|
|
175 while (low != high)
|
|
176 {
|
|
177 unsigned int mid = low + (high - low) / 2;
|
|
178 if (n > prime_tab[mid].prime)
|
|
179 low = mid + 1;
|
|
180 else
|
|
181 high = mid;
|
|
182 }
|
|
183
|
|
184 /* If we've run out of primes, abort. */
|
|
185 if (n > prime_tab[low].prime)
|
|
186 {
|
|
187 fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
|
|
188 abort ();
|
|
189 }
|
|
190
|
|
191 return low;
|
|
192 }
|
|
193
|
|
194 /* Returns a hash code for P. */
|
|
195
|
|
196 static hashval_t
|
|
197 hash_pointer (const PTR p)
|
|
198 {
|
|
199 return (hashval_t) ((long)p >> 3);
|
|
200 }
|
|
201
|
|
202 /* Returns non-zero if P1 and P2 are equal. */
|
|
203
|
|
204 static int
|
|
205 eq_pointer (const PTR p1, const PTR p2)
|
|
206 {
|
|
207 return p1 == p2;
|
|
208 }
|
|
209
|
|
210
|
|
211 /* The parens around the function names in the next two definitions
|
|
212 are essential in order to prevent macro expansions of the name.
|
|
213 The bodies, however, are expanded as expected, so they are not
|
|
214 recursive definitions. */
|
|
215
|
|
216 /* Return the current size of given hash table. */
|
|
217
|
|
218 #define htab_size(htab) ((htab)->size)
|
|
219
|
|
220 size_t
|
|
221 (htab_size) (htab_t htab)
|
|
222 {
|
|
223 return htab_size (htab);
|
|
224 }
|
|
225
|
|
226 /* Return the current number of elements in given hash table. */
|
|
227
|
|
228 #define htab_elements(htab) ((htab)->n_elements - (htab)->n_deleted)
|
|
229
|
|
230 size_t
|
|
231 (htab_elements) (htab_t htab)
|
|
232 {
|
|
233 return htab_elements (htab);
|
|
234 }
|
|
235
|
|
236 /* Return X % Y. */
|
|
237
|
|
238 static inline hashval_t
|
|
239 htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
|
|
240 {
|
|
241 /* The multiplicative inverses computed above are for 32-bit types, and
|
|
242 requires that we be able to compute a highpart multiply. */
|
|
243 #ifdef UNSIGNED_64BIT_TYPE
|
|
244 __extension__ typedef UNSIGNED_64BIT_TYPE ull;
|
|
245 if (sizeof (hashval_t) * CHAR_BIT <= 32)
|
|
246 {
|
|
247 hashval_t t1, t2, t3, t4, q, r;
|
|
248
|
|
249 t1 = ((ull)x * inv) >> 32;
|
|
250 t2 = x - t1;
|
|
251 t3 = t2 >> 1;
|
|
252 t4 = t1 + t3;
|
|
253 q = t4 >> shift;
|
|
254 r = x - (q * y);
|
|
255
|
|
256 return r;
|
|
257 }
|
|
258 #endif
|
|
259
|
|
260 /* Otherwise just use the native division routines. */
|
|
261 return x % y;
|
|
262 }
|
|
263
|
|
264 /* Compute the primary hash for HASH given HTAB's current size. */
|
|
265
|
|
266 static inline hashval_t
|
|
267 htab_mod (hashval_t hash, htab_t htab)
|
|
268 {
|
|
269 const struct prime_ent *p = &prime_tab[htab->size_prime_index];
|
|
270 return htab_mod_1 (hash, p->prime, p->inv, p->shift);
|
|
271 }
|
|
272
|
|
273 /* Compute the secondary hash for HASH given HTAB's current size. */
|
|
274
|
|
275 static inline hashval_t
|
|
276 htab_mod_m2 (hashval_t hash, htab_t htab)
|
|
277 {
|
|
278 const struct prime_ent *p = &prime_tab[htab->size_prime_index];
|
|
279 return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
|
|
280 }
|
|
281
|
|
282 /* This function creates table with length slightly longer than given
|
|
283 source length. Created hash table is initiated as empty (all the
|
|
284 hash table entries are HTAB_EMPTY_ENTRY). The function returns the
|
|
285 created hash table, or NULL if memory allocation fails. */
|
|
286
|
|
287 htab_t
|
|
288 htab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f,
|
|
289 htab_del del_f, htab_alloc alloc_f, htab_free free_f)
|
|
290 {
|
|
291 htab_t result;
|
|
292 unsigned int size_prime_index;
|
|
293
|
|
294 size_prime_index = higher_prime_index (size);
|
|
295 size = prime_tab[size_prime_index].prime;
|
|
296
|
|
297 result = (htab_t) (*alloc_f) (1, sizeof (struct htab));
|
|
298 if (result == NULL)
|
|
299 return NULL;
|
|
300 result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR));
|
|
301 if (result->entries == NULL)
|
|
302 {
|
|
303 if (free_f != NULL)
|
|
304 (*free_f) (result);
|
|
305 return NULL;
|
|
306 }
|
|
307 result->size = size;
|
|
308 result->size_prime_index = size_prime_index;
|
|
309 result->hash_f = hash_f;
|
|
310 result->eq_f = eq_f;
|
|
311 result->del_f = del_f;
|
|
312 result->alloc_f = alloc_f;
|
|
313 result->free_f = free_f;
|
|
314 return result;
|
|
315 }
|
|
316
|
|
317 /* As above, but use the variants of alloc_f and free_f which accept
|
|
318 an extra argument. */
|
|
319
|
|
320 htab_t
|
|
321 htab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f,
|
|
322 htab_del del_f, void *alloc_arg,
|
|
323 htab_alloc_with_arg alloc_f,
|
|
324 htab_free_with_arg free_f)
|
|
325 {
|
|
326 htab_t result;
|
|
327 unsigned int size_prime_index;
|
|
328
|
|
329 size_prime_index = higher_prime_index (size);
|
|
330 size = prime_tab[size_prime_index].prime;
|
|
331
|
|
332 result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab));
|
|
333 if (result == NULL)
|
|
334 return NULL;
|
|
335 result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR));
|
|
336 if (result->entries == NULL)
|
|
337 {
|
|
338 if (free_f != NULL)
|
|
339 (*free_f) (alloc_arg, result);
|
|
340 return NULL;
|
|
341 }
|
|
342 result->size = size;
|
|
343 result->size_prime_index = size_prime_index;
|
|
344 result->hash_f = hash_f;
|
|
345 result->eq_f = eq_f;
|
|
346 result->del_f = del_f;
|
|
347 result->alloc_arg = alloc_arg;
|
|
348 result->alloc_with_arg_f = alloc_f;
|
|
349 result->free_with_arg_f = free_f;
|
|
350 return result;
|
|
351 }
|
|
352
|
|
353 /* Update the function pointers and allocation parameter in the htab_t. */
|
|
354
|
|
355 void
|
|
356 htab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f,
|
|
357 htab_del del_f, PTR alloc_arg,
|
|
358 htab_alloc_with_arg alloc_f, htab_free_with_arg free_f)
|
|
359 {
|
|
360 htab->hash_f = hash_f;
|
|
361 htab->eq_f = eq_f;
|
|
362 htab->del_f = del_f;
|
|
363 htab->alloc_arg = alloc_arg;
|
|
364 htab->alloc_with_arg_f = alloc_f;
|
|
365 htab->free_with_arg_f = free_f;
|
|
366 }
|
|
367
|
|
368 /* These functions exist solely for backward compatibility. */
|
|
369
|
|
370 #undef htab_create
|
|
371 htab_t
|
|
372 htab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
|
|
373 {
|
|
374 return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free);
|
|
375 }
|
|
376
|
|
377 htab_t
|
|
378 htab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
|
|
379 {
|
|
380 return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free);
|
|
381 }
|
|
382
|
|
383 /* This function frees all memory allocated for given hash table.
|
|
384 Naturally the hash table must already exist. */
|
|
385
|
|
386 void
|
|
387 htab_delete (htab_t htab)
|
|
388 {
|
|
389 size_t size = htab_size (htab);
|
|
390 PTR *entries = htab->entries;
|
|
391 int i;
|
|
392
|
|
393 if (htab->del_f)
|
|
394 for (i = size - 1; i >= 0; i--)
|
|
395 if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
|
|
396 (*htab->del_f) (entries[i]);
|
|
397
|
|
398 if (htab->free_f != NULL)
|
|
399 {
|
|
400 (*htab->free_f) (entries);
|
|
401 (*htab->free_f) (htab);
|
|
402 }
|
|
403 else if (htab->free_with_arg_f != NULL)
|
|
404 {
|
|
405 (*htab->free_with_arg_f) (htab->alloc_arg, entries);
|
|
406 (*htab->free_with_arg_f) (htab->alloc_arg, htab);
|
|
407 }
|
|
408 }
|
|
409
|
|
410 /* This function clears all entries in the given hash table. */
|
|
411
|
|
412 void
|
|
413 htab_empty (htab_t htab)
|
|
414 {
|
|
415 size_t size = htab_size (htab);
|
|
416 PTR *entries = htab->entries;
|
|
417 int i;
|
|
418
|
|
419 if (htab->del_f)
|
|
420 for (i = size - 1; i >= 0; i--)
|
|
421 if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
|
|
422 (*htab->del_f) (entries[i]);
|
|
423
|
|
424 /* Instead of clearing megabyte, downsize the table. */
|
|
425 if (size > 1024*1024 / sizeof (PTR))
|
|
426 {
|
|
427 int nindex = higher_prime_index (1024 / sizeof (PTR));
|
|
428 int nsize = prime_tab[nindex].prime;
|
|
429
|
|
430 if (htab->free_f != NULL)
|
|
431 (*htab->free_f) (htab->entries);
|
|
432 else if (htab->free_with_arg_f != NULL)
|
|
433 (*htab->free_with_arg_f) (htab->alloc_arg, htab->entries);
|
|
434 if (htab->alloc_with_arg_f != NULL)
|
|
435 htab->entries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
|
|
436 sizeof (PTR *));
|
|
437 else
|
|
438 htab->entries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
|
|
439 htab->size = nsize;
|
|
440 htab->size_prime_index = nindex;
|
|
441 }
|
|
442 else
|
|
443 memset (entries, 0, size * sizeof (PTR));
|
|
444 htab->n_deleted = 0;
|
|
445 htab->n_elements = 0;
|
|
446 }
|
|
447
|
|
448 /* Similar to htab_find_slot, but without several unwanted side effects:
|
|
449 - Does not call htab->eq_f when it finds an existing entry.
|
|
450 - Does not change the count of elements/searches/collisions in the
|
|
451 hash table.
|
|
452 This function also assumes there are no deleted entries in the table.
|
|
453 HASH is the hash value for the element to be inserted. */
|
|
454
|
|
455 static PTR *
|
|
456 find_empty_slot_for_expand (htab_t htab, hashval_t hash)
|
|
457 {
|
|
458 hashval_t index = htab_mod (hash, htab);
|
|
459 size_t size = htab_size (htab);
|
|
460 PTR *slot = htab->entries + index;
|
|
461 hashval_t hash2;
|
|
462
|
|
463 if (*slot == HTAB_EMPTY_ENTRY)
|
|
464 return slot;
|
|
465 else if (*slot == HTAB_DELETED_ENTRY)
|
|
466 abort ();
|
|
467
|
|
468 hash2 = htab_mod_m2 (hash, htab);
|
|
469 for (;;)
|
|
470 {
|
|
471 index += hash2;
|
|
472 if (index >= size)
|
|
473 index -= size;
|
|
474
|
|
475 slot = htab->entries + index;
|
|
476 if (*slot == HTAB_EMPTY_ENTRY)
|
|
477 return slot;
|
|
478 else if (*slot == HTAB_DELETED_ENTRY)
|
|
479 abort ();
|
|
480 }
|
|
481 }
|
|
482
|
|
483 /* The following function changes size of memory allocated for the
|
|
484 entries and repeatedly inserts the table elements. The occupancy
|
|
485 of the table after the call will be about 50%. Naturally the hash
|
|
486 table must already exist. Remember also that the place of the
|
|
487 table entries is changed. If memory allocation failures are allowed,
|
|
488 this function will return zero, indicating that the table could not be
|
|
489 expanded. If all goes well, it will return a non-zero value. */
|
|
490
|
|
491 static int
|
|
492 htab_expand (htab_t htab)
|
|
493 {
|
|
494 PTR *oentries;
|
|
495 PTR *olimit;
|
|
496 PTR *p;
|
|
497 PTR *nentries;
|
|
498 size_t nsize, osize, elts;
|
|
499 unsigned int oindex, nindex;
|
|
500
|
|
501 oentries = htab->entries;
|
|
502 oindex = htab->size_prime_index;
|
|
503 osize = htab->size;
|
|
504 olimit = oentries + osize;
|
|
505 elts = htab_elements (htab);
|
|
506
|
|
507 /* Resize only when table after removal of unused elements is either
|
|
508 too full or too empty. */
|
|
509 if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
|
|
510 {
|
|
511 nindex = higher_prime_index (elts * 2);
|
|
512 nsize = prime_tab[nindex].prime;
|
|
513 }
|
|
514 else
|
|
515 {
|
|
516 nindex = oindex;
|
|
517 nsize = osize;
|
|
518 }
|
|
519
|
|
520 if (htab->alloc_with_arg_f != NULL)
|
|
521 nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
|
|
522 sizeof (PTR *));
|
|
523 else
|
|
524 nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
|
|
525 if (nentries == NULL)
|
|
526 return 0;
|
|
527 htab->entries = nentries;
|
|
528 htab->size = nsize;
|
|
529 htab->size_prime_index = nindex;
|
|
530 htab->n_elements -= htab->n_deleted;
|
|
531 htab->n_deleted = 0;
|
|
532
|
|
533 p = oentries;
|
|
534 do
|
|
535 {
|
|
536 PTR x = *p;
|
|
537
|
|
538 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
|
|
539 {
|
|
540 PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
|
|
541
|
|
542 *q = x;
|
|
543 }
|
|
544
|
|
545 p++;
|
|
546 }
|
|
547 while (p < olimit);
|
|
548
|
|
549 if (htab->free_f != NULL)
|
|
550 (*htab->free_f) (oentries);
|
|
551 else if (htab->free_with_arg_f != NULL)
|
|
552 (*htab->free_with_arg_f) (htab->alloc_arg, oentries);
|
|
553 return 1;
|
|
554 }
|
|
555
|
|
556 /* This function searches for a hash table entry equal to the given
|
|
557 element. It cannot be used to insert or delete an element. */
|
|
558
|
|
559 PTR
|
|
560 htab_find_with_hash (htab_t htab, const PTR element, hashval_t hash)
|
|
561 {
|
|
562 hashval_t index, hash2;
|
|
563 size_t size;
|
|
564 PTR entry;
|
|
565
|
|
566 htab->searches++;
|
|
567 size = htab_size (htab);
|
|
568 index = htab_mod (hash, htab);
|
|
569
|
|
570 entry = htab->entries[index];
|
|
571 if (entry == HTAB_EMPTY_ENTRY
|
|
572 || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
|
|
573 return entry;
|
|
574
|
|
575 hash2 = htab_mod_m2 (hash, htab);
|
|
576 for (;;)
|
|
577 {
|
|
578 htab->collisions++;
|
|
579 index += hash2;
|
|
580 if (index >= size)
|
|
581 index -= size;
|
|
582
|
|
583 entry = htab->entries[index];
|
|
584 if (entry == HTAB_EMPTY_ENTRY
|
|
585 || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
|
|
586 return entry;
|
|
587 }
|
|
588 }
|
|
589
|
|
590 /* Like htab_find_slot_with_hash, but compute the hash value from the
|
|
591 element. */
|
|
592
|
|
593 PTR
|
|
594 htab_find (htab_t htab, const PTR element)
|
|
595 {
|
|
596 return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
|
|
597 }
|
|
598
|
|
599 /* This function searches for a hash table slot containing an entry
|
|
600 equal to the given element. To delete an entry, call this with
|
|
601 insert=NO_INSERT, then call htab_clear_slot on the slot returned
|
|
602 (possibly after doing some checks). To insert an entry, call this
|
|
603 with insert=INSERT, then write the value you want into the returned
|
|
604 slot. When inserting an entry, NULL may be returned if memory
|
|
605 allocation fails. */
|
|
606
|
|
607 PTR *
|
|
608 htab_find_slot_with_hash (htab_t htab, const PTR element,
|
|
609 hashval_t hash, enum insert_option insert)
|
|
610 {
|
|
611 PTR *first_deleted_slot;
|
|
612 hashval_t index, hash2;
|
|
613 size_t size;
|
|
614 PTR entry;
|
|
615
|
|
616 size = htab_size (htab);
|
|
617 if (insert == INSERT && size * 3 <= htab->n_elements * 4)
|
|
618 {
|
|
619 if (htab_expand (htab) == 0)
|
|
620 return NULL;
|
|
621 size = htab_size (htab);
|
|
622 }
|
|
623
|
|
624 index = htab_mod (hash, htab);
|
|
625
|
|
626 htab->searches++;
|
|
627 first_deleted_slot = NULL;
|
|
628
|
|
629 entry = htab->entries[index];
|
|
630 if (entry == HTAB_EMPTY_ENTRY)
|
|
631 goto empty_entry;
|
|
632 else if (entry == HTAB_DELETED_ENTRY)
|
|
633 first_deleted_slot = &htab->entries[index];
|
|
634 else if ((*htab->eq_f) (entry, element))
|
|
635 return &htab->entries[index];
|
|
636
|
|
637 hash2 = htab_mod_m2 (hash, htab);
|
|
638 for (;;)
|
|
639 {
|
|
640 htab->collisions++;
|
|
641 index += hash2;
|
|
642 if (index >= size)
|
|
643 index -= size;
|
|
644
|
|
645 entry = htab->entries[index];
|
|
646 if (entry == HTAB_EMPTY_ENTRY)
|
|
647 goto empty_entry;
|
|
648 else if (entry == HTAB_DELETED_ENTRY)
|
|
649 {
|
|
650 if (!first_deleted_slot)
|
|
651 first_deleted_slot = &htab->entries[index];
|
|
652 }
|
|
653 else if ((*htab->eq_f) (entry, element))
|
|
654 return &htab->entries[index];
|
|
655 }
|
|
656
|
|
657 empty_entry:
|
|
658 if (insert == NO_INSERT)
|
|
659 return NULL;
|
|
660
|
|
661 if (first_deleted_slot)
|
|
662 {
|
|
663 htab->n_deleted--;
|
|
664 *first_deleted_slot = HTAB_EMPTY_ENTRY;
|
|
665 return first_deleted_slot;
|
|
666 }
|
|
667
|
|
668 htab->n_elements++;
|
|
669 return &htab->entries[index];
|
|
670 }
|
|
671
|
|
672 /* Like htab_find_slot_with_hash, but compute the hash value from the
|
|
673 element. */
|
|
674
|
|
675 PTR *
|
|
676 htab_find_slot (htab_t htab, const PTR element, enum insert_option insert)
|
|
677 {
|
|
678 return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
|
|
679 insert);
|
|
680 }
|
|
681
|
|
682 /* This function deletes an element with the given value from hash
|
|
683 table (the hash is computed from the element). If there is no matching
|
|
684 element in the hash table, this function does nothing. */
|
|
685
|
|
686 void
|
|
687 htab_remove_elt (htab_t htab, PTR element)
|
|
688 {
|
|
689 htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element));
|
|
690 }
|
|
691
|
|
692
|
|
693 /* This function deletes an element with the given value from hash
|
|
694 table. If there is no matching element in the hash table, this
|
|
695 function does nothing. */
|
|
696
|
|
697 void
|
|
698 htab_remove_elt_with_hash (htab_t htab, PTR element, hashval_t hash)
|
|
699 {
|
|
700 PTR *slot;
|
|
701
|
|
702 slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT);
|
|
703 if (*slot == HTAB_EMPTY_ENTRY)
|
|
704 return;
|
|
705
|
|
706 if (htab->del_f)
|
|
707 (*htab->del_f) (*slot);
|
|
708
|
|
709 *slot = HTAB_DELETED_ENTRY;
|
|
710 htab->n_deleted++;
|
|
711 }
|
|
712
|
|
713 /* This function clears a specified slot in a hash table. It is
|
|
714 useful when you've already done the lookup and don't want to do it
|
|
715 again. */
|
|
716
|
|
717 void
|
|
718 htab_clear_slot (htab_t htab, PTR *slot)
|
|
719 {
|
|
720 if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
|
|
721 || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
|
|
722 abort ();
|
|
723
|
|
724 if (htab->del_f)
|
|
725 (*htab->del_f) (*slot);
|
|
726
|
|
727 *slot = HTAB_DELETED_ENTRY;
|
|
728 htab->n_deleted++;
|
|
729 }
|
|
730
|
|
731 /* This function scans over the entire hash table calling
|
|
732 CALLBACK for each live entry. If CALLBACK returns false,
|
|
733 the iteration stops. INFO is passed as CALLBACK's second
|
|
734 argument. */
|
|
735
|
|
736 void
|
|
737 htab_traverse_noresize (htab_t htab, htab_trav callback, PTR info)
|
|
738 {
|
|
739 PTR *slot;
|
|
740 PTR *limit;
|
|
741
|
|
742 slot = htab->entries;
|
|
743 limit = slot + htab_size (htab);
|
|
744
|
|
745 do
|
|
746 {
|
|
747 PTR x = *slot;
|
|
748
|
|
749 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
|
|
750 if (!(*callback) (slot, info))
|
|
751 break;
|
|
752 }
|
|
753 while (++slot < limit);
|
|
754 }
|
|
755
|
|
756 /* Like htab_traverse_noresize, but does resize the table when it is
|
|
757 too empty to improve effectivity of subsequent calls. */
|
|
758
|
|
759 void
|
|
760 htab_traverse (htab_t htab, htab_trav callback, PTR info)
|
|
761 {
|
|
762 if (htab_elements (htab) * 8 < htab_size (htab))
|
|
763 htab_expand (htab);
|
|
764
|
|
765 htab_traverse_noresize (htab, callback, info);
|
|
766 }
|
|
767
|
|
768 /* Return the fraction of fixed collisions during all work with given
|
|
769 hash table. */
|
|
770
|
|
771 double
|
|
772 htab_collisions (htab_t htab)
|
|
773 {
|
|
774 if (htab->searches == 0)
|
|
775 return 0.0;
|
|
776
|
|
777 return (double) htab->collisions / (double) htab->searches;
|
|
778 }
|
|
779
|
|
780 /* Hash P as a null-terminated string.
|
|
781
|
|
782 Copied from gcc/hashtable.c. Zack had the following to say with respect
|
|
783 to applicability, though note that unlike hashtable.c, this hash table
|
|
784 implementation re-hashes rather than chain buckets.
|
|
785
|
|
786 http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
|
|
787 From: Zack Weinberg <zackw@panix.com>
|
|
788 Date: Fri, 17 Aug 2001 02:15:56 -0400
|
|
789
|
|
790 I got it by extracting all the identifiers from all the source code
|
|
791 I had lying around in mid-1999, and testing many recurrences of
|
|
792 the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
|
|
793 prime numbers or the appropriate identity. This was the best one.
|
|
794 I don't remember exactly what constituted "best", except I was
|
|
795 looking at bucket-length distributions mostly.
|
|
796
|
|
797 So it should be very good at hashing identifiers, but might not be
|
|
798 as good at arbitrary strings.
|
|
799
|
|
800 I'll add that it thoroughly trounces the hash functions recommended
|
|
801 for this use at http://burtleburtle.net/bob/hash/index.html, both
|
|
802 on speed and bucket distribution. I haven't tried it against the
|
|
803 function they just started using for Perl's hashes. */
|
|
804
|
|
805 hashval_t
|
|
806 htab_hash_string (const PTR p)
|
|
807 {
|
|
808 const unsigned char *str = (const unsigned char *) p;
|
|
809 hashval_t r = 0;
|
|
810 unsigned char c;
|
|
811
|
|
812 while ((c = *str++) != 0)
|
|
813 r = r * 67 + c - 113;
|
|
814
|
|
815 return r;
|
|
816 }
|
|
817
|
|
818 /* DERIVED FROM:
|
|
819 --------------------------------------------------------------------
|
|
820 lookup2.c, by Bob Jenkins, December 1996, Public Domain.
|
|
821 hash(), hash2(), hash3, and mix() are externally useful functions.
|
|
822 Routines to test the hash are included if SELF_TEST is defined.
|
|
823 You can use this free for any purpose. It has no warranty.
|
|
824 --------------------------------------------------------------------
|
|
825 */
|
|
826
|
|
827 /*
|
|
828 --------------------------------------------------------------------
|
|
829 mix -- mix 3 32-bit values reversibly.
|
|
830 For every delta with one or two bit set, and the deltas of all three
|
|
831 high bits or all three low bits, whether the original value of a,b,c
|
|
832 is almost all zero or is uniformly distributed,
|
|
833 * If mix() is run forward or backward, at least 32 bits in a,b,c
|
|
834 have at least 1/4 probability of changing.
|
|
835 * If mix() is run forward, every bit of c will change between 1/3 and
|
|
836 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
|
837 mix() was built out of 36 single-cycle latency instructions in a
|
|
838 structure that could supported 2x parallelism, like so:
|
|
839 a -= b;
|
|
840 a -= c; x = (c>>13);
|
|
841 b -= c; a ^= x;
|
|
842 b -= a; x = (a<<8);
|
|
843 c -= a; b ^= x;
|
|
844 c -= b; x = (b>>13);
|
|
845 ...
|
|
846 Unfortunately, superscalar Pentiums and Sparcs can't take advantage
|
|
847 of that parallelism. They've also turned some of those single-cycle
|
|
848 latency instructions into multi-cycle latency instructions. Still,
|
|
849 this is the fastest good hash I could find. There were about 2^^68
|
|
850 to choose from. I only looked at a billion or so.
|
|
851 --------------------------------------------------------------------
|
|
852 */
|
|
853 /* same, but slower, works on systems that might have 8 byte hashval_t's */
|
|
854 #define mix(a,b,c) \
|
|
855 { \
|
|
856 a -= b; a -= c; a ^= (c>>13); \
|
|
857 b -= c; b -= a; b ^= (a<< 8); \
|
|
858 c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
|
|
859 a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
|
|
860 b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
|
|
861 c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
|
|
862 a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
|
|
863 b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
|
|
864 c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
|
|
865 }
|
|
866
|
|
867 /*
|
|
868 --------------------------------------------------------------------
|
|
869 hash() -- hash a variable-length key into a 32-bit value
|
|
870 k : the key (the unaligned variable-length array of bytes)
|
|
871 len : the length of the key, counting by bytes
|
|
872 level : can be any 4-byte value
|
|
873 Returns a 32-bit value. Every bit of the key affects every bit of
|
|
874 the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
|
875 About 36+6len instructions.
|
|
876
|
|
877 The best hash table sizes are powers of 2. There is no need to do
|
|
878 mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
|
879 use a bitmask. For example, if you need only 10 bits, do
|
|
880 h = (h & hashmask(10));
|
|
881 In which case, the hash table should have hashsize(10) elements.
|
|
882
|
|
883 If you are hashing n strings (ub1 **)k, do it like this:
|
|
884 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
|
|
885
|
|
886 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
|
|
887 code any way you wish, private, educational, or commercial. It's free.
|
|
888
|
|
889 See http://burtleburtle.net/bob/hash/evahash.html
|
|
890 Use for hash table lookup, or anything where one collision in 2^32 is
|
|
891 acceptable. Do NOT use for cryptographic purposes.
|
|
892 --------------------------------------------------------------------
|
|
893 */
|
|
894
|
|
895 hashval_t
|
|
896 iterative_hash (const PTR k_in /* the key */,
|
|
897 register size_t length /* the length of the key */,
|
|
898 register hashval_t initval /* the previous hash, or
|
|
899 an arbitrary value */)
|
|
900 {
|
|
901 register const unsigned char *k = (const unsigned char *)k_in;
|
|
902 register hashval_t a,b,c,len;
|
|
903
|
|
904 /* Set up the internal state */
|
|
905 len = length;
|
|
906 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
|
907 c = initval; /* the previous hash value */
|
|
908
|
|
909 /*---------------------------------------- handle most of the key */
|
|
910 #ifndef WORDS_BIGENDIAN
|
|
911 /* On a little-endian machine, if the data is 4-byte aligned we can hash
|
|
912 by word for better speed. This gives nondeterministic results on
|
|
913 big-endian machines. */
|
|
914 if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
|
|
915 while (len >= 12) /* aligned */
|
|
916 {
|
|
917 a += *(hashval_t *)(k+0);
|
|
918 b += *(hashval_t *)(k+4);
|
|
919 c += *(hashval_t *)(k+8);
|
|
920 mix(a,b,c);
|
|
921 k += 12; len -= 12;
|
|
922 }
|
|
923 else /* unaligned */
|
|
924 #endif
|
|
925 while (len >= 12)
|
|
926 {
|
|
927 a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
|
|
928 b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
|
|
929 c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
|
|
930 mix(a,b,c);
|
|
931 k += 12; len -= 12;
|
|
932 }
|
|
933
|
|
934 /*------------------------------------- handle the last 11 bytes */
|
|
935 c += length;
|
|
936 switch(len) /* all the case statements fall through */
|
|
937 {
|
|
938 case 11: c+=((hashval_t)k[10]<<24);
|
|
939 case 10: c+=((hashval_t)k[9]<<16);
|
|
940 case 9 : c+=((hashval_t)k[8]<<8);
|
|
941 /* the first byte of c is reserved for the length */
|
|
942 case 8 : b+=((hashval_t)k[7]<<24);
|
|
943 case 7 : b+=((hashval_t)k[6]<<16);
|
|
944 case 6 : b+=((hashval_t)k[5]<<8);
|
|
945 case 5 : b+=k[4];
|
|
946 case 4 : a+=((hashval_t)k[3]<<24);
|
|
947 case 3 : a+=((hashval_t)k[2]<<16);
|
|
948 case 2 : a+=((hashval_t)k[1]<<8);
|
|
949 case 1 : a+=k[0];
|
|
950 /* case 0: nothing left to add */
|
|
951 }
|
|
952 mix(a,b,c);
|
|
953 /*-------------------------------------------- report the result */
|
|
954 return c;
|
|
955 }
|