1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "apr_general.h"
19 #include "mod_cache.h"
20 #include "cache_hash.h"
31 * The internal form of a hash table.
33 * The table is an array indexed by the hash of the key; collisions
34 * are resolved by hanging a linked list of hash entries off each
35 * element of the array. Although this is a really simple design it
36 * isn't too bad given that pools have a low allocation overhead.
39 typedef struct cache_hash_entry_t cache_hash_entry_t;
41 struct cache_hash_entry_t {
42 cache_hash_entry_t *next;
50 * Data structure for iterating through a hash table.
52 * We keep a pointer to the next hash entry here to allow the current
53 * hash entry to be freed or otherwise mangled between calls to
56 struct cache_hash_index_t {
58 cache_hash_entry_t *this, *next;
63 * The size of the array is always a power of two. We use the maximum
64 * index rather than the size so that we can use bitwise-AND for
66 * The count of hash entries may be greater depending on the chosen
70 cache_hash_entry_t **array;
71 cache_hash_index_t iterator; /* For cache_hash_first(NULL, ...) */
76 * Hash creation functions.
78 static cache_hash_entry_t **alloc_array(cache_hash_t *ht, int max)
80 return calloc(1, sizeof(*ht->array) * (max + 1));
83 CACHE_DECLARE(cache_hash_t *) cache_hash_make(apr_size_t size)
86 ht = malloc(sizeof(cache_hash_t));
92 ht->array = alloc_array(ht, ht->max);
100 CACHE_DECLARE(void) cache_hash_free(cache_hash_t *ht)
110 * Hash iteration functions.
113 CACHE_DECLARE(cache_hash_index_t *) cache_hash_next(cache_hash_index_t *hi)
117 if (hi->index > hi->ht->max)
119 hi->this = hi->ht->array[hi->index++];
121 hi->next = hi->this->next;
125 CACHE_DECLARE(cache_hash_index_t *) cache_hash_first(cache_hash_t *ht)
127 cache_hash_index_t *hi;
134 return cache_hash_next(hi);
137 CACHE_DECLARE(void) cache_hash_this(cache_hash_index_t *hi,
142 if (key) *key = hi->this->key;
143 if (klen) *klen = hi->this->klen;
144 if (val) *val = (void *)hi->this->val;
149 * This is where we keep the details of the hash function and control
150 * the maximum collision rate.
152 * If val is non-NULL it creates and initializes a new hash entry if
153 * there isn't already one there; it returns an updatable pointer so
154 * that hash entries can be removed.
157 static cache_hash_entry_t **find_entry(cache_hash_t *ht,
162 cache_hash_entry_t **hep, *he;
163 const unsigned char *p;
168 * This is the popular `times 33' hash algorithm which is used by
169 * perl and also appears in Berkeley DB. This is one of the best
170 * known hash functions for strings because it is both computed
171 * very fast and distributes very well.
173 * The originator may be Dan Bernstein but the code in Berkeley DB
174 * cites Chris Torek as the source. The best citation I have found
175 * is "Chris Torek, Hash function for text in C, Usenet message
176 * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
177 * Salz's USENIX 1992 paper about INN which can be found at
178 * <http://citeseer.nj.nec.com/salz92internetnews.html>.
180 * The magic of number 33, i.e. why it works better than many other
181 * constants, prime or not, has never been adequately explained by
182 * anyone. So I try an explanation: if one experimentally tests all
183 * multipliers between 1 and 256 (as I did while writing a low-level
184 * data structure library some time ago) one detects that even
185 * numbers are not useable at all. The remaining 128 odd numbers
186 * (except for the number 1) work more or less all equally well.
187 * They all distribute in an acceptable way and this way fill a hash
188 * table with an average percent of approx. 86%.
190 * If one compares the chi^2 values of the variants (see
191 * Bob Jenkins ``Hashing Frequently Asked Questions'' at
192 * http://burtleburtle.net/bob/hash/hashfaq.html for a description
193 * of chi^2), the number 33 not even has the best value. But the
194 * number 33 and a few other equally good numbers like 17, 31, 63,
195 * 127 and 129 have nevertheless a great advantage to the remaining
196 * numbers in the large set of possible multipliers: their multiply
197 * operation can be replaced by a faster operation based on just one
198 * shift plus either a single addition or subtraction operation. And
199 * because a hash function has to both distribute good _and_ has to
200 * be very fast to compute, those few numbers should be preferred.
202 * -- Ralf S. Engelschall <rse@engelschall.com>
205 if (klen == CACHE_HASH_KEY_STRING) {
206 for (p = key; *p; p++) {
207 hash = hash * 33 + *p;
209 klen = p - (const unsigned char *)key;
212 for (p = key, i = klen; i; i--, p++) {
213 hash = hash * 33 + *p;
217 /* scan linked list */
218 for (hep = &ht->array[hash % ht->max], he = *hep;
220 hep = &he->next, he = *hep) {
221 if (he->hash == hash &&
223 memcmp(he->key, key, klen) == 0)
228 /* add a new entry for non-NULL values */
229 he = malloc(sizeof(*he));
243 CACHE_DECLARE(void *) cache_hash_get(cache_hash_t *ht,
247 cache_hash_entry_t *he;
248 he = *find_entry(ht, key, klen, NULL);
250 return (void *)he->val;
255 CACHE_DECLARE(void *) cache_hash_set(cache_hash_t *ht,
260 cache_hash_entry_t **hep, *tmp;
262 hep = find_entry(ht, key, klen, val);
263 /* If hep == NULL, then the malloc() in find_entry failed */
278 /* Return the object just removed from the cache to let the
279 * caller clean it up. Cast the constness away upon return.
281 return (void *) tval;
283 /* else key not present and val==NULL */
287 CACHE_DECLARE(int) cache_hash_count(cache_hash_t *ht)