blob: c66c5f5783817f593fd8557f758f0b8f22899748 [file] [log] [blame]
/******************************************************************************
* Copyright (c) 2005 The Regents of the University of California.
* This material was produced under U.S. Government contract W-7405-ENG-36
* for Los Alamos National Laboratory, which is operated by the University
* of California for the U.S. Department of Energy. The U.S. Government has
* rights to use, reproduce, and distribute this software. NEITHER THE
* GOVERNMENT NOR THE UNIVERSITY MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
* ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
* to produce derivative works, such modified software should be clearly
* marked, so as not to confuse it with the version available from LANL.
*
* Additionally, this program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* LA-CC 04-115
******************************************************************************/
/*
** Original copyright by Bob Jenkins, December 1996.
*/
/*
* NOTE: This hash implementation is not synchronized across iteration (HashSet/HashGet). If there are two
* threads simultaneously accessing the hash, then one thread may affect the other's access. For instance,
* if thread 1 calls HashGet(), then thread 2 calls HashGet(), then the next time thread 1 calls
* HashGet(), it will get the 3rd element in the hash, not the 2nd. Also, if two threads are accessing
* objects in the hash, and freeing data associated with those objects while traversing the hash, then this
* may cause problems where thread 1 frees data that thread 2 is about to access.
*
* Applications wishing to use threads to access hashes in this way should provide functions that obtain
* and release a global lock on the hash which prevents any other thread from accessing the hash while
* that lock is held.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "compat.h"
#include "hash.h"
THREAD_DECL(hash);
/*
** mix 3 32-bit values reversibly.
** For every delta with one or two bit set, and the deltas of all three
** high bits or all three low bits, whether the original value of a,b,c
** is almost all zero or is uniformly distributed,
** If mix() is run forward or backward, at least 32 bits in a,b,c
** have at least 1/4 probability of changing.
** If mix() is run forward, every bit of c will change between 1/3 and
** 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
** mix() was built out of 36 single-cycle latency instructions in a
** structure that could supported 2x parallelism, like so:
** a -= b;
** a -= c; x = (c>>13);
** b -= c; a ^= x;
** b -= a; x = (a<<8);
** c -= a; b ^= x;
** c -= b; x = (b>>13);
** ...
** Unfortunately, superscalar Pentiums and Sparcs can't take advantage
** of that parallelism. They've also turned some of those single-cycle
** latency instructions into multi-cycle latency instructions. Still,
** this is the fastest good hash I could find. There were about 2^^68
** to choose from. I only looked at a billion or so.
*/
#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
static void _hgrow(Hash *);
Hash *
HashCreate(int size)
{
unsigned int i;
unsigned int logsize = -1;
Hash * htab;
htab = (Hash *)malloc(sizeof(Hash));
/*
* Compute log2(size)
*/
for (i = (unsigned int)size; i > 0; i >>= 1)
logsize++;
/*
** Maximum size is 2^31
*/
if ( logsize > BITSPERBYTE * sizeof(unsigned int) - 1 )
logsize = BITSPERBYTE * sizeof(unsigned int) - 1;
if ( logsize <= 0 )
logsize = 2;
htab->logsize = logsize;
htab->size = (unsigned int)(1 << logsize);
htab->mask = htab->size - 1;
htab->count = 0;
/* allocate memory and zero out */
htab->table = (HashEntry **)malloc(htab->size * sizeof(HashEntry *));
for ( i = 0 ; i < htab->size ; i++ )
htab->table[i] = (HashEntry *)NULL;
/* everything went alright */
return htab;
}
void
HashDestroy(Hash *htab, void (*destroy)(void *))
{
int i;
HashEntry * h;
HashEntry * hn;
/* Test for correct arguments. */
if ( htab == NULL )
return;
THREAD_LOCK(hash);
for ( i = 0 ; i < (int)htab->size ; i++ )
{
for ( h = htab->table[i] ; h != NULL ; )
{
if ( destroy != NULL )
destroy(h->h_data);
hn = h->h_next;
free(h);
h = hn;
}
}
if (htab->table != NULL)
free(htab->table);
free(htab);
THREAD_UNLOCK(hash);
}
/*
** findentry(tab, key, hv) -- hash a variable-length key into a 32-bit value
**
** key: the key (the unaligned variable-length array of bytes)
** hv : can be any 4-byte value
**
** Returns a 32-bit value. Every bit of the key affects every bit of
** the return value. Every 1-bit and 2-bit delta achieves avalanche.
** About 6len+35 instructions.
**
** The best hash table sizes are powers of 2. There is no need to do
** mod a prime (mod is sooo slow!). If you need less than 32 bits,
** use a bitmask. For example, if you need only 10 bits, do
** h = (h & hashmask(10));
** In which case, the hash table should have hashsize(10) elements.
**
** If you are hashing n strings (ub1 **)k, do it like this:
** for (i=0, h=0; i<n; ++i) h = findentry( k[i], h);
**
** By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
** code any way you wish, private, educational, or commercial.
**
** See http://burtleburtle.net/bob/hash/evahash.html
** Use for hash table lookup, or anything where one collision in 2^32 is
** acceptable. Do NOT use for cryptographic purposes.
*/
unsigned int
findentry(char *key, int len, int hv)
{
unsigned int a;
unsigned int b;
unsigned int c;
unsigned int l;
if ( key == NULL )
return 0;
/* Set up the internal state */
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = hv; /* the previous hash value */
/*---------------------------------------- handle most of the key */
for ( l = len ; l >= 12 ; l -= 12 )
{
a += (key[0] +((unsigned int)key[1]<<8) +((unsigned int)key[2]<<16) +((unsigned int)key[3]<<24));
b += (key[4] +((unsigned int)key[5]<<8) +((unsigned int)key[6]<<16) +((unsigned int)key[7]<<24));
c += (key[8] +((unsigned int)key[9]<<8) +((unsigned int)key[10]<<16)+((unsigned int)key[11]<<24));
mix(a,b,c);
key += 12;
}
/*------------------------------------- handle the last 11 bytes */
c += len;
switch ( l ) /* all the case statements fall through */
{
case 11: c += ((unsigned int)key[10]<<24);
case 10: c += ((unsigned int)key[9]<<16);
case 9 : c += ((unsigned int)key[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b += ((unsigned int)key[7]<<24);
case 7 : b += ((unsigned int)key[6]<<16);
case 6 : b += ((unsigned int)key[5]<<8);
case 5 : b += key[4];
case 4 : a += ((unsigned int)key[3]<<24);
case 3 : a += ((unsigned int)key[2]<<16);
case 2 : a += ((unsigned int)key[1]<<8);
case 1 : a += key[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
return c;
}
unsigned int
HashCompute(char *key, int len)
{
return findentry(key, len, 0);
}
void *
HashSearch(Hash *htab, unsigned int idx)
{
HashEntry * h;
THREAD_LOCK(hash);
for ( h = htab->table[idx & htab->mask] ; h != NULL ; h = h->h_next)
{
if ( idx == h->h_hval ) {
THREAD_UNLOCK(hash);
return h->h_data;
}
}
THREAD_UNLOCK(hash);
return NULL;
}
HashEntry *
HashInsert(Hash *htab, unsigned int idx, void *data)
{
HashEntry * h;
HashEntry ** hp;
THREAD_LOCK(hash);
/*
** Check if entry already exists
*/
for ( h = htab->table[idx & htab->mask] ; h != (HashEntry *)NULL ; h = h->h_next)
{
if ( idx == h->h_hval ) {
THREAD_UNLOCK(hash);
return NULL;
}
}
/*
** Grow hash table if it's getting full.
*/
if ( ++htab->count > htab->size )
_hgrow(htab);
h = (HashEntry *)malloc(sizeof(HashEntry));
h->h_hval = idx;
h->h_data = data;
hp = &htab->table[idx & htab->mask];
h->h_next = *hp;
*hp = h;
THREAD_UNLOCK(hash);
return h;
}
void
HashRemove(Hash *htab, unsigned int idx)
{
HashEntry * h;
HashEntry ** hp;
THREAD_LOCK(hash);
/*
** Find item.
*/
for ( hp = &htab->table[idx & htab->mask] ; *hp != NULL && (*hp)->h_hval != idx ; )
hp = &(*hp)->h_next;
/*
* Found item?
*/
if (*hp == NULL) {
THREAD_UNLOCK(hash);
return;
}
/*
* Update scan values
*/
if (htab->scan_entry == *hp)
htab->scan_entry = htab->scan_entry->h_next;
/*
** Remove item.
*/
h = *hp;
*hp = (*hp)->h_next;
free(h);
htab->count--;
THREAD_UNLOCK(hash);
}
void
HashSet(Hash *htab)
{
THREAD_LOCK(hash);
htab->scan = 0;
htab->scan_entry = NULL;
THREAD_UNLOCK(hash);
}
HashEntry *
HashGet(Hash *htab)
{
HashEntry * h;
THREAD_LOCK(hash);
if ((h = htab->scan_entry) != NULL) {
htab->scan_entry = htab->scan_entry->h_next;
THREAD_UNLOCK(hash);
return h;
}
while (htab->scan < htab->size) {
if ((h = htab->table[htab->scan++]) != NULL) {
htab->scan_entry = h->h_next;
THREAD_UNLOCK(hash);
return h;
}
}
THREAD_UNLOCK(hash);
return NULL;
}
static void
_hgrow(Hash *htab)
{
unsigned int i;
unsigned int nsize;
unsigned int nmask;
HashEntry ** ntab;
nsize = (unsigned int)(1 << ++htab->logsize);
nmask = nsize - 1;
ntab = (HashEntry **)malloc(nsize * sizeof(HashEntry *));
for ( i = 0 ; i < nsize ; i++ )
ntab[i] = (HashEntry *)NULL;
/*
** Move old entries to new table.
*/
for ( i = 0 ; i < htab->size ; i++ )
{
HashEntry * h;
HashEntry * h2;
HashEntry ** hp;
for ( h = htab->table[i] ; h != (HashEntry *)NULL ; )
{
h2 = h;
h = h->h_next;
hp = &ntab[h2->h_hval & nmask];
h2->h_next = *hp;
*hp = h2;
}
}
free(htab->table);
htab->table = ntab;
htab->size = nsize;
htab->mask = nmask;
}