?? hash.c
字號:
/* * Hash Table Data Type * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net> * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * This source code may be translated into executable form and incorporated * into proprietary software; there is no requirement for such software to * contain a copyright notice related to this source. * * $Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $ * $Name: kazlib_1_20 $ */#include <stdlib.h>#include <stddef.h>#include <assert.h>#include <string.h>#define HASH_IMPLEMENTATION#include "hash.h"#ifdef KAZLIB_RCSIDstatic const char rcsid[] = "$Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $";#endif#define INIT_BITS 6#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */#define INIT_MASK ((INIT_SIZE) - 1)#define next hash_next#define key hash_key#define data hash_data#define hkey hash_hkey#define table hash_table#define nchains hash_nchains#define nodecount hash_nodecount#define maxcount hash_maxcount#define highmark hash_highmark#define lowmark hash_lowmark#define compare hash_compare#define function hash_function#define allocnode hash_allocnode#define freenode hash_freenode#define context hash_context#define mask hash_mask#define dynamic hash_dynamic#define table hash_table#define chain hash_chainstatic hnode_t *hnode_alloc(void *context);static void hnode_free(hnode_t *node, void *context);static hash_val_t hash_fun_default(const void *key);static int hash_comp_default(const void *key1, const void *key2);int hash_val_t_bit;/* * Compute the number of bits in the hash_val_t type. We know that hash_val_t * is an unsigned integral type. Thus the highest value it can hold is a * Mersenne number (power of two, less one). We initialize a hash_val_t * object with this value and then shift bits out one by one while counting. * Notes: * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power * of two. This means that its binary representation consists of all one * bits, and hence ``val'' is initialized to all one bits. * 2. While bits remain in val, we increment the bit count and shift it to the * right, replacing the topmost bit by zero. */static void compute_bits(void){ hash_val_t val = HASH_VAL_T_MAX; /* 1 */ int bits = 0; while (val) { /* 2 */ bits++; val >>= 1; } hash_val_t_bit = bits;}/* * Verify whether the given argument is a power of two. */static int is_power_of_two(hash_val_t arg){ if (arg == 0) return 0; while ((arg & 1) == 0) arg >>= 1; return (arg == 1);}/* * Compute a shift amount from a given table size */static hash_val_t compute_mask(hashcount_t size){ assert (is_power_of_two(size)); assert (size >= 2); return size - 1;}/* * Initialize the table of pointers to null. */static void clear_table(hash_t *hash){ hash_val_t i; for (i = 0; i < hash->nchains; i++) hash->table[i] = NULL;}/* * Double the size of a dynamic table. This works as follows. Each chain splits * into two adjacent chains. The shift amount increases by one, exposing an * additional bit of each hashed key. For each node in the original chain, the * value of this newly exposed bit will decide which of the two new chains will * receive the node: if the bit is 1, the chain with the higher index will have * the node, otherwise the lower chain will receive the node. In this manner, * the hash table will continue to function exactly as before without having to * rehash any of the keys. * Notes: * 1. Overflow check. * 2. The new number of chains is twice the old number of chains. * 3. The new mask is one bit wider than the previous, revealing a * new bit in all hashed keys. * 4. Allocate a new table of chain pointers that is twice as large as the * previous one. * 5. If the reallocation was successful, we perform the rest of the growth * algorithm, otherwise we do nothing. * 6. The exposed_bit variable holds a mask with which each hashed key can be * AND-ed to test the value of its newly exposed bit. * 7. Now loop over each chain in the table and sort its nodes into two * chains based on the value of each node's newly exposed hash bit. * 8. The low chain replaces the current chain. The high chain goes * into the corresponding sister chain in the upper half of the table. * 9. We have finished dealing with the chains and nodes. We now update * the various bookeeping fields of the hash structure. */static void grow_table(hash_t *hash){ hnode_t **newtable; assert (2 * hash->nchains > hash->nchains); /* 1 */ newtable = realloc(hash->table, sizeof *newtable * hash->nchains * 2); /* 4 */ if (newtable) { /* 5 */ hash_val_t mask = (hash->mask << 1) | 1; /* 3 */ hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */ hash_val_t chain; assert (mask != hash->mask); for (chain = 0; chain < hash->nchains; chain++) { /* 7 */ hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next; for (hptr = newtable[chain]; hptr != 0; hptr = next) { next = hptr->next; if (hptr->hkey & exposed_bit) { hptr->next = high_chain; high_chain = hptr; } else { hptr->next = low_chain; low_chain = hptr; } } newtable[chain] = low_chain; /* 8 */ newtable[chain + hash->nchains] = high_chain; } hash->table = newtable; /* 9 */ hash->mask = mask; hash->nchains *= 2; hash->lowmark *= 2; hash->highmark *= 2; } assert (hash_verify(hash));}/* * Cut a table size in half. This is done by folding together adjacent chains * and populating the lower half of the table with these chains. The chains are * simply spliced together. Once this is done, the whole table is reallocated * to a smaller object. * Notes: * 1. It is illegal to have a hash table with one slot. This would mean that * hash->shift is equal to hash_val_t_bit, an illegal shift value. * Also, other things could go wrong, such as hash->lowmark becoming zero. * 2. Looping over each pair of sister chains, the low_chain is set to * point to the head node of the chain in the lower half of the table, * and high_chain points to the head node of the sister in the upper half. * 3. The intent here is to compute a pointer to the last node of the * lower chain into the low_tail variable. If this chain is empty, * low_tail ends up with a null value. * 4. If the lower chain is not empty, we simply tack the upper chain onto it. * If the upper chain is a null pointer, nothing happens. * 5. Otherwise if the lower chain is empty but the upper one is not, * If the low chain is empty, but the high chain is not, then the * high chain is simply transferred to the lower half of the table. * 6. Otherwise if both chains are empty, there is nothing to do. * 7. All the chain pointers are in the lower half of the table now, so * we reallocate it to a smaller object. This, of course, invalidates * all pointer-to-pointers which reference into the table from the * first node of each chain. * 8. Though it's unlikely, the reallocation may fail. In this case we * pretend that the table _was_ reallocated to a smaller object. * 9. Finally, update the various table parameters to reflect the new size. */static void shrink_table(hash_t *hash){ hash_val_t chain, nchains; hnode_t **newtable, *low_tail, *low_chain, *high_chain; assert (hash->nchains >= 2); /* 1 */ nchains = hash->nchains / 2; for (chain = 0; chain < nchains; chain++) { low_chain = hash->table[chain]; /* 2 */ high_chain = hash->table[chain + nchains]; for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next) ; /* 3 */ if (low_chain != 0) /* 4 */ low_tail->next = high_chain; else if (high_chain != 0) /* 5 */ hash->table[chain] = high_chain; else assert (hash->table[chain] == NULL); /* 6 */ } newtable = realloc(hash->table, sizeof *newtable * nchains); /* 7 */ if (newtable) /* 8 */ hash->table = newtable; hash->mask >>= 1; /* 9 */ hash->nchains = nchains; hash->lowmark /= 2; hash->highmark /= 2; assert (hash_verify(hash));}/* * Create a dynamic hash table. Both the hash table structure and the table * itself are dynamically allocated. Furthermore, the table is extendible in * that it will automatically grow as its load factor increases beyond a * certain threshold. * Notes: * 1. If the number of bits in the hash_val_t type has not been computed yet, * we do so here, because this is likely to be the first function that the * user calls. * 2. Allocate a hash table control structure. * 3. If a hash table control structure is successfully allocated, we * proceed to initialize it. Otherwise we return a null pointer. * 4. We try to allocate the table of hash chains. * 5. If we were able to allocate the hash chain table, we can finish * initializing the hash structure and the table. Otherwise, we must * backtrack by freeing the hash structure. * 6. INIT_SIZE should be a power of two. The high and low marks are always set * to be twice the table size and half the table size respectively. When the * number of nodes in the table grows beyond the high size (beyond load * factor 2), it will double in size to cut the load factor down to about * about 1. If the table shrinks down to or beneath load factor 0.5, * it will shrink, bringing the load up to about 1. However, the table * will never shrink beneath INIT_SIZE even if it's emptied. * 7. This indicates that the table is dynamically allocated and dynamically * resized on the fly. A table that has this value set to zero is * assumed to be statically allocated and will not be resized. * 8. The table of chains must be properly reset to all null pointers. */hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun, hash_fun_t hashfun){ hash_t *hash; if (hash_val_t_bit == 0) /* 1 */ compute_bits(); hash = malloc(sizeof *hash); /* 2 */ if (hash) { /* 3 */ hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */ if (hash->table) { /* 5 */ hash->nchains = INIT_SIZE; /* 6 */ hash->highmark = INIT_SIZE * 2; hash->lowmark = INIT_SIZE / 2; hash->nodecount = 0; hash->maxcount = maxcount; hash->compare = compfun ? compfun : hash_comp_default; hash->function = hashfun ? hashfun : hash_fun_default; hash->allocnode = hnode_alloc; hash->freenode = hnode_free; hash->context = NULL; hash->mask = INIT_MASK; hash->dynamic = 1; /* 7 */ clear_table(hash); /* 8 */ assert (hash_verify(hash)); return hash; } free(hash); } return NULL;}/* * Select a different set of node allocator routines. */void hash_set_allocator(hash_t *hash, hnode_alloc_t al, hnode_free_t fr, void *context){ assert (hash_count(hash) == 0); assert ((al == 0 && fr == 0) || (al != 0 && fr != 0)); hash->allocnode = al ? al : hnode_alloc; hash->freenode = fr ? fr : hnode_free; hash->context = context;}/* * Free every node in the hash using the hash->freenode() function pointer, and * cause the hash to become empty. */void hash_free_nodes(hash_t *hash){ hscan_t hs; hnode_t *node; hash_scan_begin(&hs, hash); while ((node = hash_scan_next(&hs))) { hash_scan_delete(hash, node); hash->freenode(node, hash->context); } hash->nodecount = 0; clear_table(hash);}/* * Obsolescent function for removing all nodes from a table, * freeing them and then freeing the table all in one step. */void hash_free(hash_t *hash){#ifdef KAZLIB_OBSOLESCENT_DEBUG assert ("call to obsolescent function hash_free()" && 0);#endif hash_free_nodes(hash); hash_destroy(hash);}/* * Free a dynamic hash table structure. */void hash_destroy(hash_t *hash){ assert (hash_val_t_bit != 0); assert (hash_isempty(hash)); free(hash->table); free(hash);}/* * Initialize a user supplied hash structure. The user also supplies a table of * chains which is assigned to the hash structure. The table is static---it * will not grow or shrink. * 1. See note 1. in hash_create(). * 2. The user supplied array of pointers hopefully contains nchains nodes. * 3. See note 7. in hash_create(). * 4. We must dynamically compute the mask from the given power of two table * size. * 5. The user supplied table can't be assumed to contain null pointers, * so we reset it here. */hash_t *hash_init(hash_t *hash, hashcount_t maxcount, hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, hashcount_t nchains){ if (hash_val_t_bit == 0) /* 1 */ compute_bits(); assert (is_power_of_two(nchains)); hash->table = table; /* 2 */ hash->nchains = nchains; hash->nodecount = 0; hash->maxcount = maxcount; hash->compare = compfun ? compfun : hash_comp_default; hash->function = hashfun ? hashfun : hash_fun_default; hash->dynamic = 0; /* 3 */ hash->mask = compute_mask(nchains); /* 4 */ clear_table(hash); /* 5 */ assert (hash_verify(hash)); return hash;}/* * Reset the hash scanner so that the next element retrieved by * hash_scan_next() shall be the first element on the first non-empty chain. * Notes: * 1. Locate the first non empty chain. * 2. If an empty chain is found, remember which one it is and set the next * pointer to refer to its first element. * 3. Otherwise if a chain is not found, set the next pointer to NULL * so that hash_scan_next() shall indicate failure. */void hash_scan_begin(hscan_t *scan, hash_t *hash){ hash_val_t nchains = hash->nchains; hash_val_t chain; scan->table = hash; /* 1 */ for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++) ; if (chain < nchains) { /* 2 */ scan->chain = chain; scan->next = hash->table[chain]; } else { /* 3 */ scan->next = NULL; }}/* * Retrieve the next node from the hash table, and update the pointer * for the next invocation of hash_scan_next(). * Notes: * 1. Remember the next pointer in a temporary value so that it can be * returned. * 2. This assertion essentially checks whether the module has been properly * initialized. The first point of interaction with the module should be * either hash_create() or hash_init(), both of which set hash_val_t_bit to * a non zero value. * 3. If the next pointer we are returning is not NULL, then the user is * allowed to call hash_scan_next() again. We prepare the new next pointer * for that call right now. That way the user is allowed to delete the node * we are about to return, since we will no longer be needing it to locate * the next node. * 4. If there is a next node in the chain (next->next), then that becomes the * new next node, otherwise ... * 5. We have exhausted the current chain, and must locate the next subsequent * non-empty chain in the table. * 6. If a non-empty chain is found, the first element of that chain becomes * the new next node. Otherwise there is no new next node and we set the * pointer to NULL so that the next time hash_scan_next() is called, a null * pointer shall be immediately returned. */hnode_t *hash_scan_next(hscan_t *scan){ hnode_t *next = scan->next; /* 1 */ hash_t *hash = scan->table; hash_val_t chain = scan->chain + 1; hash_val_t nchains = hash->nchains; assert (hash_val_t_bit != 0); /* 2 */ if (next) { /* 3 */ if (next->next) { /* 4 */ scan->next = next->next; } else { while (chain < nchains && hash->table[chain] == 0) /* 5 */ chain++; if (chain < nchains) { /* 6 */ scan->chain = chain; scan->next = hash->table[chain]; } else { scan->next = NULL; } } } return next;}/* * Insert a node into the hash table. * Notes: * 1. It's illegal to insert more than the maximum number of nodes. The client * should verify that the hash table is not full before attempting an * insertion. * 2. The same key may not be inserted into a table twice. * 3. If the table is dynamic and the load factor is already at >= 2, * grow the table. * 4. We take the bottom N bits of the hash value to derive the chain index, * where N is the base 2 logarithm of the size of the hash table. */void hash_insert(hash_t *hash, hnode_t *node, const void *key)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -