/* NIPS VM - New Implementation of Promela Semantics Virtual Machine * Copyright (C) 2005: Stefan Schuermans * Michael Weber * Lehrstuhl fuer Informatik II, RWTH Aachen * Copyleft: GNU public license - http://www.gnu.org/copyleft/gpl.html */ #include #include #include #include "state.h" #include "tools.h" #include "hashtab.h" #define HASH_MIN_ENTRIES 65536 // 64k entries (because of calculation of hash_rest) // type for hash values typedef unsigned long t_hash; typedef st_global_state_header *t_hashtab_value; typedef struct t_hashtab_bucket { t_hashtab_value value; unsigned short hash_rest; // part of hash value that is lost in "entry = hash_value % entries" // ---> "hash_rest = hash_value / entries" } PACKED st_hashtab_bucket; // hashtable pointer type typedef struct t_hashtab_header { unsigned long entries; // number of entries in the hash table unsigned long retries; // maximum allowed number of retries by re-hashing unsigned long memory_size; // size of entire hash table in bytes unsigned int max_retry_count; // maximum number of retries needed so far unsigned long conflict_count; // number of conflicts found so far st_hashtab_bucket *bucket; // entries of hashtable } st_hashtab_header; // HASH FUNCTION TAKEN FROM http://burtleburtle.net/bob/hash/doobs.html - begin typedef uint32_t ub4; /* unsigned 4-byte quantities */ typedef uint8_t ub1; /* unsigned 1-byte quantities */ #define hashsize(n) ((ub4)1<<(n)) #define hashmask(n) (hashsize(n)-1) /* * -------------------------------------------------------------------- * mix -- mix 3 32-bit values reversibly. * For every delta with one or two bits set, and the deltas of all three * high bits or all three low bits, whether the original value of a,b,c * is almost all zero or is uniformly distributed, * * If mix() is run forward or backward, at least 32 bits in a,b,c * have at least 1/4 probability of changing. * * If mix() is run forward, every bit of c will change between 1/3 and * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) * mix() was built out of 36 single-cycle latency instructions in a * structure that could supported 2x parallelism, like so: * a -= b; * a -= c; x = (c>>13); * b -= c; a ^= x; * b -= a; x = (a<<8); * c -= a; b ^= x; * c -= b; x = (b>>13); * ... * Unfortunately, superscalar Pentiums and Sparcs can't take advantage * of that parallelism. They've also turned some of those single-cycle * latency instructions into multi-cycle latency instructions. Still, * this is the fastest good hash I could find. There were about 2^^68 * to choose from. I only looked at a billion or so. * -------------------------------------------------------------------- */ #define mix(a,b,c) \ { \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ c -= a; c -= b; c ^= (b>>13); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<16); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>3); \ b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \ } /* * -------------------------------------------------------------------- * hash() -- hash a variable-length key into a 32-bit value * k : the key (the unaligned variable-length array of bytes) * len : the length of the key, counting by bytes * initval : can be any 4-byte value * Returns a 32-bit value. Every bit of the key affects every bit of * the return value. Every 1-bit and 2-bit delta achieves avalanche. * About 6*len+35 instructions. * * The best hash table sizes are powers of 2. There is no need to do * mod a prime (mod is sooo slow!). If you need less than 32 bits, * use a bitmask. For example, if you need only 10 bits, do * h = (h & hashmask(10)); * In which case, the hash table should have hashsize(10) elements. * * If you are hashing n strings (ub1 **)k, do it like this: * for (i=0, h=0; i= 12) { a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24)); b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24)); c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24)); mix(a,b,c); k += 12; len -= 12; } /*------------------------------------- handle the last 11 bytes */ c += length; switch(len) /* all the case statements fall through */ { case 11: c+=((ub4)k[10]<<24); case 10: c+=((ub4)k[9]<<16); case 9 : c+=((ub4)k[8]<<8); /* the first byte of c is reserved for the length */ case 8 : b+=((ub4)k[7]<<24); case 7 : b+=((ub4)k[6]<<16); case 6 : b+=((ub4)k[5]<<8); case 5 : b+=k[4]; case 4 : a+=((ub4)k[3]<<24); case 3 : a+=((ub4)k[2]<<16); case 2 : a+=((ub4)k[1]<<8); case 1 : a+=k[0]; /* case 0: nothing left to add */ } mix(a,b,c); /*-------------------------------------------- report the result */ return c; } // HASH FUNCTION TAKEN FROM http://burtleburtle.net/bob/hash/doobs.html - end // a hash function for binary data static t_hash hashtab_hash( unsigned char * p_data, unsigned long size, unsigned long initval ) { return hash( p_data, size, initval ); } // create a new hash table t_hashtab hashtab_new( unsigned long entries, unsigned long retries ) // extern { unsigned long size; t_hashtab hashtab; char * ptr; // correct parameters entries = max( entries, HASH_MIN_ENTRIES ); retries = max( retries, 1 ); // calculate size size = sizeof( st_hashtab_header ); // header size += entries * sizeof( st_hashtab_bucket ); // entries // allocate memory hashtab = (t_hashtab)calloc( 1, size ); if( hashtab == NULL ) return NULL; // initialize hashtab header hashtab->entries = entries; hashtab->retries = retries; hashtab->memory_size = size; // initialize hashtable ptr = (char *)hashtab + sizeof( st_hashtab_header ); // pointer to array with entries hashtab->bucket = (st_hashtab_bucket *)ptr; return hashtab; } // free a hash table void hashtab_free( t_hashtab hashtab ) // extern { free( hashtab ); } // get index for/of an entry in a hash table // - returns: 1 if state can be inserted into hash table // 0 if state is already in hash table // -1 if state did not fit into hash table (hash conflict that could not be resolved) // - fills *p_pos with pointer to position for/of entry (not for return value -1) extern int hashtab_get_pos (t_hashtab hashtab, size_t size, nipsvm_state_t *state, nipsvm_state_t ***p_pos) { unsigned long entry, i; t_hash hash, k; // get hashtable entry hash = hashtab_hash( (unsigned char *)state, size, 0 ); k = 1 + (hash % (hashtab->entries - 1)); // naive double hashing entry = hash % hashtab->entries; for( i = 0; i < hashtab->retries; i++ ) { if( hashtab->bucket[entry].value == NULL ) { hashtab->bucket[entry].hash_rest = hash / hashtab->entries; *p_pos = &hashtab->bucket[entry].value; // return pointer to position return 1; // can be inserted into hash table } else if( hashtab->bucket[entry].hash_rest != hash / hashtab->entries ) { // no match, try next } else if( nipsvm_state_compare( hashtab->bucket[entry].value, state, size ) == 0 ) { *p_pos = &hashtab->bucket[entry].value; // return pointer to position return 0; } hashtab->conflict_count++; hashtab->max_retry_count = max( hashtab->max_retry_count, i + 1 ); entry = (entry + k) % hashtab->entries; // considers indexes h, h + k, ..., h + i*k (mod entries) } // for( i ... // no more space ---> unresolvable hash conflict *p_pos = NULL; return -1; } // put a state into a hash table // - only the pointer is stored // - returns: 1 if state was added to hash table // 0 if state was already in hash table // -1 if state did not fit into hash table (hash conflict that could not be resolved) extern int hashtab_insert( t_hashtab hashtab, size_t size, nipsvm_state_t *state ) { unsigned long ret_val; t_hashtab_value* pos; // get indices where to place entry ret_val = hashtab_get_pos( hashtab, size, state, &pos ); // put pointer to state into hash table if it is a new one if( ret_val > 0 ) *pos = state; return ret_val; } // output statistical information about hashtable void hashtab_print_statistics( FILE * stream, t_hashtab hashtab ) { unsigned int i; unsigned int entries = 0; // count entries in hash table for( i = 0; i < hashtab->entries; i++ ) if( hashtab->bucket[i].value != NULL ) entries++; double fill_ratio = 100.0 * (double)entries / (double)hashtab->entries; fprintf( stream, "hashtable statistics:\n" " memory usage: %0.2fMB\n", hashtab->memory_size / 1048576.0 ); fprintf( stream, " buckets used/available: %u/%lu (%0.1f%%)\n", entries, hashtab->entries, fill_ratio ); fprintf( stream, " max. no. of retries: %u\n", hashtab->max_retry_count ); fprintf( stream, " conflicts (resolved): %lu\n", hashtab->conflict_count ); } // output statistical information about hashtable extern void table_statistics (t_hashtab table, table_statistics_t *stats) { unsigned int i; unsigned int n_entries = 0; assert (table != NULL); assert (stats != NULL); // count entries in hash table for( i = 0; i < table->entries; i++ ) { if (table->bucket[i].value != NULL) { n_entries++; } } memset (stats, sizeof stats, 0); stats->memory_size = table->memory_size; stats->entries_used = n_entries; stats->entries_available = table->entries; stats->conflicts = table->conflict_count; stats->max_retries = table->max_retry_count; }