rubbos/app/httpd-2.0.64/srclib/apr/tables/apr_hash.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "apr_private.h"
  18
  19 #include "apr_general.h"
  20 #include "apr_pools.h"
  21
  22 #include "apr_hash.h"
  23
  24 #if APR_HAVE_STDLIB_H
  25 #include <stdlib.h>
  26 #endif
  27 #if APR_HAVE_STRING_H
  28 #include <string.h>
  29 #endif
  30
  31
  32 /*
  33  * The internal form of a hash table.
  34  *
  35  * The table is an array indexed by the hash of the key; collisions
  36  * are resolved by hanging a linked list of hash entries off each
  37  * element of the array. Although this is a really simple design it
  38  * isn't too bad given that pools have a low allocation overhead.
  39  */
  40
  41 typedef struct apr_hash_entry_t apr_hash_entry_t;
  42
  43 struct apr_hash_entry_t {
  44     apr_hash_entry_t *next;
  45     unsigned int      hash;
  46     const void       *key;
  47     apr_ssize_t       klen;
  48     const void       *val;
  49 };
  50
  51 /*
  52  * Data structure for iterating through a hash table.
  53  *
  54  * We keep a pointer to the next hash entry here to allow the current
  55  * hash entry to be freed or otherwise mangled between calls to
  56  * apr_hash_next().
  57  */
  58 struct apr_hash_index_t {
  59     apr_hash_t         *ht;
  60     apr_hash_entry_t   *this, *next;
  61     unsigned int        index;
  62 };
  63
  64 /*
  65  * The size of the array is always a power of two. We use the maximum
  66  * index rather than the size so that we can use bitwise-AND for
  67  * modular arithmetic.
  68  * The count of hash entries may be greater depending on the chosen
  69  * collision rate.
  70  */
  71 struct apr_hash_t {
  72     apr_pool_t          *pool;
  73     apr_hash_entry_t   **array;
  74     apr_hash_index_t     iterator;  /* For apr_hash_first(NULL, ...) */
  75     unsigned int         count, max;
  76     apr_hash_entry_t    *free;  /* List of recycled entries */
  77 };
  78
  79 #define INITIAL_MAX 15 /* tunable == 2^n - 1 */
  80
  81
  82 /*
  83  * Hash creation functions.
  84  */
  85
  86 static apr_hash_entry_t **alloc_array(apr_hash_t *ht, unsigned int max)
  87 {
  88    return apr_pcalloc(ht->pool, sizeof(*ht->array) * (max + 1));
  89 }
  90
  91 APR_DECLARE(apr_hash_t *) apr_hash_make(apr_pool_t *pool)
  92 {
  93     apr_hash_t *ht;
  94     ht = apr_palloc(pool, sizeof(apr_hash_t));
  95     ht->pool = pool;
  96     ht->free = NULL;
  97     ht->count = 0;
  98     ht->max = INITIAL_MAX;
  99     ht->array = alloc_array(ht, ht->max);
 100     return ht;
 101 }
 102
 103
 104 /*
 105  * Hash iteration functions.
 106  */
 107
 108 APR_DECLARE(apr_hash_index_t *) apr_hash_next(apr_hash_index_t *hi)
 109 {
 110     hi->this = hi->next;
 111     while (!hi->this) {
 112         if (hi->index > hi->ht->max)
 113             return NULL;
 114
 115         hi->this = hi->ht->array[hi->index++];
 116     }
 117     hi->next = hi->this->next;
 118     return hi;
 119 }
 120
 121 APR_DECLARE(apr_hash_index_t *) apr_hash_first(apr_pool_t *p, apr_hash_t *ht)
 122 {
 123     apr_hash_index_t *hi;
 124     if (p)
 125         hi = apr_palloc(p, sizeof(*hi));
 126     else
 127         hi = &ht->iterator;
 128
 129     hi->ht = ht;
 130     hi->index = 0;
 131     hi->this = NULL;
 132     hi->next = NULL;
 133     return apr_hash_next(hi);
 134 }
 135
 136 APR_DECLARE(void) apr_hash_this(apr_hash_index_t *hi,
 137                                 const void **key,
 138                                 apr_ssize_t *klen,
 139                                 void **val)
 140 {
 141     if (key)  *key  = hi->this->key;
 142     if (klen) *klen = hi->this->klen;
 143     if (val)  *val  = (void *)hi->this->val;
 144 }
 145
 146
 147 /*
 148  * Expanding a hash table
 149  */
 150
 151 static void expand_array(apr_hash_t *ht)
 152 {
 153     apr_hash_index_t *hi;
 154     apr_hash_entry_t **new_array;
 155     unsigned int new_max;
 156
 157     new_max = ht->max * 2 + 1;
 158     new_array = alloc_array(ht, new_max);
 159     for (hi = apr_hash_first(NULL, ht); hi; hi = apr_hash_next(hi)) {
 160         unsigned int i = hi->this->hash & new_max;
 161         hi->this->next = new_array[i];
 162         new_array[i] = hi->this;
 163     }
 164     ht->array = new_array;
 165     ht->max = new_max;
 166 }
 167
 168 /*
 169  * This is where we keep the details of the hash function and control
 170  * the maximum collision rate.
 171  *
 172  * If val is non-NULL it creates and initializes a new hash entry if
 173  * there isn't already one there; it returns an updatable pointer so
 174  * that hash entries can be removed.
 175  */
 176
 177 static apr_hash_entry_t **find_entry(apr_hash_t *ht,
 178                                      const void *key,
 179                                      apr_ssize_t klen,
 180                                      const void *val)
 181 {
 182     apr_hash_entry_t **hep, *he;
 183     const unsigned char *p;
 184     unsigned int hash;
 185     apr_ssize_t i;
 186
 187     /*
 188      * This is the popular `times 33' hash algorithm which is used by
 189      * perl and also appears in Berkeley DB. This is one of the best
 190      * known hash functions for strings because it is both computed
 191      * very fast and distributes very well.
 192      *
 193      * The originator may be Dan Bernstein but the code in Berkeley DB
 194      * cites Chris Torek as the source. The best citation I have found
 195      * is "Chris Torek, Hash function for text in C, Usenet message
 196      * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
 197      * Salz's USENIX 1992 paper about INN which can be found at
 198      * <http://citeseer.nj.nec.com/salz92internetnews.html>.
 199      *
 200      * The magic of number 33, i.e. why it works better than many other
 201      * constants, prime or not, has never been adequately explained by
 202      * anyone. So I try an explanation: if one experimentally tests all
 203      * multipliers between 1 and 256 (as I did while writing a low-level
 204      * data structure library some time ago) one detects that even
 205      * numbers are not useable at all. The remaining 128 odd numbers
 206      * (except for the number 1) work more or less all equally well.
 207      * They all distribute in an acceptable way and this way fill a hash
 208      * table with an average percent of approx. 86%.
 209      *
 210      * If one compares the chi^2 values of the variants (see
 211      * Bob Jenkins ``Hashing Frequently Asked Questions'' at
 212      * http://burtleburtle.net/bob/hash/hashfaq.html for a description
 213      * of chi^2), the number 33 not even has the best value. But the
 214      * number 33 and a few other equally good numbers like 17, 31, 63,
 215      * 127 and 129 have nevertheless a great advantage to the remaining
 216      * numbers in the large set of possible multipliers: their multiply
 217      * operation can be replaced by a faster operation based on just one
 218      * shift plus either a single addition or subtraction operation. And
 219      * because a hash function has to both distribute good _and_ has to
 220      * be very fast to compute, those few numbers should be preferred.
 221      *
 222      *                  -- Ralf S. Engelschall <rse@engelschall.com>
 223      */
 224     hash = 0;
 225     if (klen == APR_HASH_KEY_STRING) {
 226         for (p = key; *p; p++) {
 227             hash = hash * 33 + *p;
 228         }
 229         klen = p - (const unsigned char *)key;
 230     }
 231     else {
 232         for (p = key, i = klen; i; i--, p++) {
 233             hash = hash * 33 + *p;
 234         }
 235     }
 236
 237     /* scan linked list */
 238     for (hep = &ht->array[hash & ht->max], he = *hep;
 239          he; hep = &he->next, he = *hep) {
 240         if (he->hash == hash
 241             && he->klen == klen
 242             && memcmp(he->key, key, klen) == 0)
 243             break;
 244     }
 245     if (he || !val)
 246         return hep;
 247
 248     /* add a new entry for non-NULL values */
 249     if ((he = ht->free) != NULL)
 250         ht->free = he->next;
 251     else
 252         he = apr_palloc(ht->pool, sizeof(*he));
 253     he->next = NULL;
 254     he->hash = hash;
 255     he->key  = key;
 256     he->klen = klen;
 257     he->val  = val;
 258     *hep = he;
 259     ht->count++;
 260     return hep;
 261 }
 262
 263 APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
 264                                         const apr_hash_t *orig)
 265 {
 266     apr_hash_t *ht;
 267     apr_hash_entry_t *new_vals;
 268     unsigned int i, j;
 269
 270     ht = apr_palloc(pool, sizeof(apr_hash_t) +
 271                     sizeof(*ht->array) * (orig->max + 1) +
 272                     sizeof(apr_hash_entry_t) * orig->count);
 273     ht->pool = pool;
 274     ht->free = NULL;
 275     ht->count = orig->count;
 276     ht->max = orig->max;
 277     ht->array = (apr_hash_entry_t **)((char *)ht + sizeof(apr_hash_t));
 278
 279     new_vals = (apr_hash_entry_t *)((char *)(ht) + sizeof(apr_hash_t) +
 280                                     sizeof(*ht->array) * (orig->max + 1));
 281     j = 0;
 282     for (i = 0; i <= ht->max; i++) {
 283         apr_hash_entry_t **new_entry = &(ht->array[i]);
 284         apr_hash_entry_t *orig_entry = orig->array[i];
 285         while (orig_entry) {
 286             *new_entry = &new_vals[j++];
 287             (*new_entry)->hash = orig_entry->hash;
 288             (*new_entry)->key = orig_entry->key;
 289             (*new_entry)->klen = orig_entry->klen;
 290             (*new_entry)->val = orig_entry->val;
 291             new_entry = &((*new_entry)->next);
 292             orig_entry = orig_entry->next;
 293         }
 294         *new_entry = NULL;
 295     }
 296     return ht;
 297 }
 298
 299 APR_DECLARE(void *) apr_hash_get(apr_hash_t *ht,
 300                                  const void *key,
 301                                  apr_ssize_t klen)
 302 {
 303     apr_hash_entry_t *he;
 304     he = *find_entry(ht, key, klen, NULL);
 305     if (he)
 306         return (void *)he->val;
 307     else
 308         return NULL;
 309 }
 310
 311 APR_DECLARE(void) apr_hash_set(apr_hash_t *ht,
 312                                const void *key,
 313                                apr_ssize_t klen,
 314                                const void *val)
 315 {
 316     apr_hash_entry_t **hep;
 317     hep = find_entry(ht, key, klen, val);
 318     if (*hep) {
 319         if (!val) {
 320             /* delete entry */
 321             apr_hash_entry_t *old = *hep;
 322             *hep = (*hep)->next;
 323             old->next = ht->free;
 324             ht->free = old;
 325             --ht->count;
 326         }
 327         else {
 328             /* replace entry */
 329             (*hep)->val = val;
 330             /* check that the collision rate isn't too high */
 331             if (ht->count > ht->max) {
 332                 expand_array(ht);
 333             }
 334         }
 335     }
 336     /* else key not present and val==NULL */
 337 }
 338
 339 APR_DECLARE(unsigned int) apr_hash_count(apr_hash_t *ht)
 340 {
 341     return ht->count;
 342 }
 343
 344 APR_DECLARE(apr_hash_t*) apr_hash_overlay(apr_pool_t *p,
 345                                           const apr_hash_t *overlay,
 346                                           const apr_hash_t *base)
 347 {
 348     return apr_hash_merge(p, overlay, base, NULL, NULL);
 349 }
 350
 351 APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
 352                                          const apr_hash_t *overlay,
 353                                          const apr_hash_t *base,
 354                                          void * (*merger)(apr_pool_t *p,
 355                                                      const void *key,
 356                                                      apr_ssize_t klen,
 357                                                      const void *h1_val,
 358                                                      const void *h2_val,
 359                                                      const void *data),
 360                                          const void *data)
 361 {
 362     apr_hash_t *res;
 363     apr_hash_entry_t *new_vals = NULL;
 364     apr_hash_entry_t *iter;
 365     apr_hash_entry_t *ent;
 366     unsigned int i,j,k;
 367
 368 #ifdef POOL_DEBUG
 369     /* we don't copy keys and values, so it's necessary that
 370      * overlay->a.pool and base->a.pool have a life span at least
 371      * as long as p
 372      */
 373     if (!apr_pool_is_ancestor(overlay->pool, p)) {
 374         fprintf(stderr,
 375                 "apr_hash_overlay: overlay's pool is not an ancestor of p\n");
 376         abort();
 377     }
 378     if (!apr_pool_is_ancestor(base->pool, p)) {
 379         fprintf(stderr,
 380                 "apr_hash_overlay: base's pool is not an ancestor of p\n");
 381         abort();
 382     }
 383 #endif
 384
 385     res = apr_palloc(p, sizeof(apr_hash_t));
 386     res->pool = p;
 387     res->free = NULL;
 388     res->count = base->count;
 389     res->max = (overlay->max > base->max) ? overlay->max : base->max;
 390     if (base->count + overlay->count > res->max) {
 391         res->max = res->max * 2 + 1;
 392     }
 393     res->array = alloc_array(res, res->max);
 394     if (base->count + overlay->count) {
 395         new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) *
 396                               (base->count + overlay->count));
 397     }
 398     j = 0;
 399     for (k = 0; k <= base->max; k++) {
 400         for (iter = base->array[k]; iter; iter = iter->next) {
 401             i = iter->hash & res->max;
 402             new_vals[j].klen = iter->klen;
 403             new_vals[j].key = iter->key;
 404             new_vals[j].val = iter->val;
 405             new_vals[j].hash = iter->hash;
 406             new_vals[j].next = res->array[i];
 407             res->array[i] = &new_vals[j];
 408             j++;
 409         }
 410     }
 411
 412     for (k = 0; k <= overlay->max; k++) {
 413         for (iter = overlay->array[k]; iter; iter = iter->next) {
 414             i = iter->hash & res->max;
 415             for (ent = res->array[i]; ent; ent = ent->next) {
 416                 if ((ent->klen == iter->klen) &&
 417                     (memcmp(ent->key, iter->key, iter->klen) == 0)) {
 418                     if (merger) {
 419                         ent->val = (*merger)(p, iter->key, iter->klen,
 420                                              iter->val, ent->val, data);
 421                     }
 422                     else {
 423                         ent->val = iter->val;
 424                     }
 425                     break;
 426                 }
 427             }
 428             if (!ent) {
 429                 new_vals[j].klen = iter->klen;
 430                 new_vals[j].key = iter->key;
 431                 new_vals[j].val = iter->val;
 432                 new_vals[j].hash = iter->hash;
 433                 new_vals[j].next = res->array[i];
 434                 res->array[i] = &new_vals[j];
 435                 res->count++;
 436                 j++;
 437             }
 438         }
 439     }
 440     return res;
 441 }
 442
 443 APR_POOL_IMPLEMENT_ACCESSOR(hash)