rpm 5.3.7

rpmio/rpmhash.c

Go to the documentation of this file.
00001 
00006 #include "system.h"
00007 #include <rpmiotypes.h>
00008 #include <rpmio.h>
00009 #include <rpmhash.h>
00010 #include "debug.h"
00011 
00012 /*@unchecked@*/
00013 int _ht_debug = 0;
00014 
00015 typedef /*@owned@*/ const void * voidptr;
00016 
00017 typedef struct hashBucket_s * hashBucket;
00018 
00021 struct hashBucket_s {
00022     voidptr key;                        
00023 /*@owned@*/ voidptr * data;             
00024     int dataCount;                      
00025 /*@dependent@*/hashBucket next;         
00026 };
00027 
00030 struct hashTable_s {
00031     struct rpmioItem_s _item;   
00032     int numBuckets;                     
00033     size_t keySize;                     
00034     int freeData;       
00035     hashBucket * buckets;               
00036 /*@relnull@*/
00037     hashFunctionType fn;                
00038 /*@relnull@*/
00039     hashEqualityType eq;                
00040 #if defined(__LCLINT__)
00041 /*@refs@*/
00042     int nrefs;                          
00043 #endif
00044 };
00045 
00052 static /*@shared@*/ /*@null@*/
00053 hashBucket findEntry(hashTable ht, const void * key)
00054         /*@*/
00055 {
00056     rpmuint32_t hash = 0;
00057     hashBucket b;
00058 
00059     /*@-modunconnomods@*/
00060     hash = ht->fn(hash, key, 0) % ht->numBuckets;
00061     b = ht->buckets[hash];
00062 
00063     while (b && b->key && ht->eq(b->key, key))
00064         b = b->next;
00065     /*@=modunconnomods@*/
00066 
00067     return b;
00068 }
00069 
00070 int hashEqualityString(const void * key1, const void * key2)
00071 {
00072     const char *k1 = (const char *)key1;
00073     const char *k2 = (const char *)key2;
00074     return strcmp(k1, k2);
00075 }
00076 
00077 rpmuint32_t hashFunctionString(rpmuint32_t h, const void * data, size_t size)
00078 {
00079     const char *key = data;
00080 
00081     if (size == 0)
00082         size = strlen(key);
00083 
00084     /*
00085      * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
00086      *
00087      * This is Daniel J. Bernstein's popular `times 33' hash function as
00088      * posted by him years ago on comp.lang.c. It basically uses a  function
00089      * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
00090      * known hash functions for strings. Because it is both computed very
00091      * fast and distributes very well.
00092      *
00093      * The magic of number 33, i.e. why it works better than many other
00094      * constants, prime or not, has never been adequately explained by
00095      * anyone. So I try an explanation: if one experimentally tests all
00096      * multipliers between 1 and 256 (as RSE did now) one detects that even
00097      * numbers are not useable at all. The remaining 128 odd numbers
00098      * (except for the number 1) work more or less all equally well. They
00099      * all distribute in an acceptable way and this way fill a hash table
00100      * with an average percent of approx. 86%.
00101      *
00102      * If one compares the Chi^2 values of the variants, the number 33 not
00103      * even has the best value. But the number 33 and a few other equally
00104      * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
00105      * advantage to the remaining numbers in the large set of possible
00106      * multipliers: their multiply operation can be replaced by a faster
00107      * operation based on just one shift plus either a single addition
00108      * or subtraction operation. And because a hash function has to both
00109      * distribute good _and_ has to be very fast to compute, those few
00110      * numbers should be preferred and seems to be the reason why Daniel J.
00111      * Bernstein also preferred it.
00112      *
00113      * Below you can find the variant implemented with the
00114      * multiplication optimized via bit shifts and hash unrolled eight
00115      * times for speed.
00116      *                     -- Ralf S. Engelschall <rse@engelschall.com>
00117      */
00118     if (h == 0)
00119         h = 5381;
00120     for (; size >= 8; size -= 8) {
00121         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00122         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00123         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00124         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00125         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00126         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00127         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00128         h = ((h << 5) + h) + (rpmuint32_t)*key++;
00129     }
00130     switch (size) {
00131         case 7: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
00132         case 6: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
00133         case 5: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
00134         case 4: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
00135         case 3: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
00136         case 2: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
00137         case 1: h = ((h << 5) + h) + (rpmuint32_t)*key++; break;
00138         default: /* case 0: */ break;
00139     }
00140 
00141     return h;
00142 }
00143 
00144 void htAddEntry(hashTable ht, const void * key, const void * data)
00145 {
00146     rpmuint32_t hash = 0;
00147     hashBucket b;
00148 
00149     hash = ht->fn(hash, key, 0) % ht->numBuckets;
00150     b = ht->buckets[hash];
00151 
00152     while (b && b->key && ht->eq(b->key, key))
00153         b = b->next;
00154 
00155     if (b == NULL) {
00156         b = xmalloc(sizeof(*b));
00157         if (ht->keySize) {
00158             char *k = xmalloc(ht->keySize);
00159             memcpy(k, key, ht->keySize);
00160             b->key = k;
00161         } else {
00162             b->key = key;
00163         }
00164         b->dataCount = 0;
00165         b->next = ht->buckets[hash];
00166         b->data = NULL;
00167         ht->buckets[hash] = b;
00168     }
00169 
00170     b->data = xrealloc(b->data, sizeof(*b->data) * (b->dataCount + 1));
00171     b->data[b->dataCount++] = data;
00172 }
00173 
00174 int htHasEntry(hashTable ht, const void * key)
00175 {
00176     hashBucket b;
00177 
00178     if (!(b = findEntry(ht, key))) return 0; else return 1;
00179 }
00180 
00181 int htGetEntry(hashTable ht, const void * key, const void * data,
00182                int * dataCount, const void * tableKey)
00183 {
00184     hashBucket b;
00185 
00186     if ((b = findEntry(ht, key)) == NULL)
00187         return 1;
00188 
00189     if (data)
00190         *(const void ***)data = (const void **) b->data;
00191     if (dataCount)
00192         *dataCount = b->dataCount;
00193     if (tableKey)
00194         *(const void **)tableKey = b->key;
00195 
00196     return 0;
00197 }
00198 
00199 const void ** htGetKeys(hashTable ht)
00200 {
00201     const void ** keys = xcalloc(ht->numBuckets+1, sizeof(const void*));
00202     const void ** keypointer = keys;
00203     hashBucket b, n;
00204     int i;
00205 
00206     for (i = 0; i < ht->numBuckets; i++) {
00207         b = ht->buckets[i];
00208         if (b == NULL)
00209             continue;
00210         if (b->data)
00211             *(keys++) = b->key;
00212         do {
00213             n = b->next;
00214             if(n != NULL)
00215                 *(keys++) = n->key;
00216         } while ((b = n) != NULL);
00217     }
00218 
00219     return keypointer;
00220 }
00221 
00222 /*@-mustmod@*/  /* XXX splint on crack */
00223 static void htFini(void * _ht)
00224         /*@modifies _ht @*/
00225 {
00226     hashTable ht = _ht;
00227     hashBucket b, n;
00228     int i;
00229 
00230     for (i = 0; i < ht->numBuckets; i++) {
00231         b = ht->buckets[i];
00232         if (b == NULL)
00233             continue;
00234         ht->buckets[i] = NULL;
00235         if (ht->keySize > 0)
00236             b->key = _free(b->key);
00237         do {
00238             n = b->next;
00239             if (b->data) {
00240                 if (ht->freeData)
00241                     *b->data = _free(*b->data);
00242                 b->data = _free(b->data);
00243             }
00244             b = _free(b);
00245         } while ((b = n) != NULL);
00246     }
00247 
00248     ht->buckets = _free(ht->buckets);
00249 }
00250 /*@=mustmod@*/
00251 
00252 /*@unchecked@*/ /*@only@*/ /*@null@*/
00253 rpmioPool _htPool;
00254 
00255 static hashTable htGetPool(/*@null@*/ rpmioPool pool)
00256         /*@globals _htPool, fileSystem @*/
00257         /*@modifies pool, _htPool, fileSystem @*/
00258 {
00259     hashTable ht;
00260 
00261     if (_htPool == NULL) {
00262         _htPool = rpmioNewPool("ht", sizeof(*ht), -1, _ht_debug,
00263                         NULL, NULL, htFini);
00264         pool = _htPool;
00265     }
00266     return (hashTable) rpmioGetPool(pool, sizeof(*ht));
00267 }
00268 
00269 hashTable htCreate(int numBuckets, size_t keySize, int freeData,
00270                 hashFunctionType fn, hashEqualityType eq)
00271 {
00272     hashTable ht = htGetPool(_htPool);
00273 
00274     ht->numBuckets = numBuckets;
00275     ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
00276     ht->keySize = keySize;
00277     ht->freeData = freeData;
00278     /*@-assignexpose@*/
00279     ht->fn = (fn != NULL ? fn : hashFunctionString);
00280     ht->eq = (eq != NULL ? eq : hashEqualityString);
00281     /*@=assignexpose@*/
00282 
00283     return htLink(ht);
00284 }