root / trunk / util / lookup2.c

Revision 309, 12.8 kB (checked in by saurik, 5 months ago)

Forgot to add this file many blue moons ago.

Line 
1/*
2--------------------------------------------------------------------
3lookup2.c, by Bob Jenkins, December 1996, Public Domain.
4hash(), hash2(), hash3, and mix() are externally useful functions.
5Routines to test the hash are included if SELF_TEST is defined.
6You can use this free for any purpose.  It has no warranty.
7--------------------------------------------------------------------
8*/
9#include <stdio.h>
10#include <stddef.h>
11#include <stdlib.h>
12typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
13typedef  unsigned       char ub1;
14
15#define hashsize(n) ((ub4)1<<(n))
16#define hashmask(n) (hashsize(n)-1)
17
18/*
19--------------------------------------------------------------------
20mix -- mix 3 32-bit values reversibly.
21For every delta with one or two bit set, and the deltas of all three
22  high bits or all three low bits, whether the original value of a,b,c
23  is almost all zero or is uniformly distributed,
24* If mix() is run forward or backward, at least 32 bits in a,b,c
25  have at least 1/4 probability of changing.
26* If mix() is run forward, every bit of c will change between 1/3 and
27  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
28mix() was built out of 36 single-cycle latency instructions in a
29  structure that could supported 2x parallelism, like so:
30      a -= b;
31      a -= c; x = (c>>13);
32      b -= c; a ^= x;
33      b -= a; x = (a<<8);
34      c -= a; b ^= x;
35      c -= b; x = (b>>13);
36      ...
37  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
38  of that parallelism.  They've also turned some of those single-cycle
39  latency instructions into multi-cycle latency instructions.  Still,
40  this is the fastest good hash I could find.  There were about 2^^68
41  to choose from.  I only looked at a billion or so.
42--------------------------------------------------------------------
43*/
44#define mix(a,b,c) \
45{ \
46  a -= b; a -= c; a ^= (c>>13); \
47  b -= c; b -= a; b ^= (a<<8); \
48  c -= a; c -= b; c ^= (b>>13); \
49  a -= b; a -= c; a ^= (c>>12);  \
50  b -= c; b -= a; b ^= (a<<16); \
51  c -= a; c -= b; c ^= (b>>5); \
52  a -= b; a -= c; a ^= (c>>3);  \
53  b -= c; b -= a; b ^= (a<<10); \
54  c -= a; c -= b; c ^= (b>>15); \
55}
56
57/* same, but slower, works on systems that might have 8 byte ub4's */
58#define mix2(a,b,c) \
59{ \
60  a -= b; a -= c; a ^= (c>>13); \
61  b -= c; b -= a; b ^= (a<< 8); \
62  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
63  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
64  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
65  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
66  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
67  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
68  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
69}
70
71/*
72--------------------------------------------------------------------
73hash() -- hash a variable-length key into a 32-bit value
74  k     : the key (the unaligned variable-length array of bytes)
75  len   : the length of the key, counting by bytes
76  level : can be any 4-byte value
77Returns a 32-bit value.  Every bit of the key affects every bit of
78the return value.  Every 1-bit and 2-bit delta achieves avalanche.
79About 36+6len instructions.
80
81The best hash table sizes are powers of 2.  There is no need to do
82mod a prime (mod is sooo slow!).  If you need less than 32 bits,
83use a bitmask.  For example, if you need only 10 bits, do
84  h = (h & hashmask(10));
85In which case, the hash table should have hashsize(10) elements.
86
87If you are hashing n strings (ub1 **)k, do it like this:
88  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
89
90By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
91code any way you wish, private, educational, or commercial.  It's free.
92
93See http://burlteburtle.net/bob/hash/evahash.html
94Use for hash table lookup, or anything where one collision in 2^32 is
95acceptable.  Do NOT use for cryptographic purposes.
96--------------------------------------------------------------------
97*/
98
99ub4 hash( k, length, initval)
100register ub1 *k;        /* the key */
101register ub4  length;   /* the length of the key */
102register ub4  initval;    /* the previous hash, or an arbitrary value */
103{
104   register ub4 a,b,c,len;
105
106   /* Set up the internal state */
107   len = length;
108   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
109   c = initval;           /* the previous hash value */
110
111   /*---------------------------------------- handle most of the key */
112   while (len >= 12)
113   {
114      a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
115      b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
116      c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
117      mix(a,b,c);
118      k += 12; len -= 12;
119   }
120
121   /*------------------------------------- handle the last 11 bytes */
122   c += length;
123   switch(len)              /* all the case statements fall through */
124   {
125   case 11: c+=((ub4)k[10]<<24);
126   case 10: c+=((ub4)k[9]<<16);
127   case 9 : c+=((ub4)k[8]<<8);
128      /* the first byte of c is reserved for the length */
129   case 8 : b+=((ub4)k[7]<<24);
130   case 7 : b+=((ub4)k[6]<<16);
131   case 6 : b+=((ub4)k[5]<<8);
132   case 5 : b+=k[4];
133   case 4 : a+=((ub4)k[3]<<24);
134   case 3 : a+=((ub4)k[2]<<16);
135   case 2 : a+=((ub4)k[1]<<8);
136   case 1 : a+=k[0];
137     /* case 0: nothing left to add */
138   }
139   mix(a,b,c);
140   /*-------------------------------------------- report the result */
141   return c;
142}
143
144
145/*
146--------------------------------------------------------------------
147 This works on all machines.  hash2() is identical to hash() on
148 little-endian machines, except that the length has to be measured
149 in ub4s instead of bytes.  It is much faster than hash().  It
150 requires
151 -- that the key be an array of ub4's, and
152 -- that all your machines have the same endianness, and
153 -- that the length be the number of ub4's in the key
154--------------------------------------------------------------------
155*/
156ub4 hash2( k, length, initval)
157register ub4 *k;        /* the key */
158register ub4  length;   /* the length of the key, in ub4s */
159register ub4  initval;  /* the previous hash, or an arbitrary value */
160{
161   register ub4 a,b,c,len;
162
163   /* Set up the internal state */
164   len = length;
165   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
166   c = initval;           /* the previous hash value */
167
168   /*---------------------------------------- handle most of the key */
169   while (len >= 3)
170   {
171      a += k[0];
172      b += k[1];
173      c += k[2];
174      mix(a,b,c);
175      k += 3; len -= 3;
176   }
177
178   /*-------------------------------------- handle the last 2 ub4's */
179   c += length;
180   switch(len)              /* all the case statements fall through */
181   {
182     /* c is reserved for the length */
183   case 2 : b+=k[1];
184   case 1 : a+=k[0];
185     /* case 0: nothing left to add */
186   }
187   mix(a,b,c);
188   /*-------------------------------------------- report the result */
189   return c;
190}
191
192/*
193--------------------------------------------------------------------
194 This is identical to hash() on little-endian machines (like Intel
195 x86s or VAXen).  It gives nondeterministic results on big-endian
196 machines.  It is faster than hash(), but a little slower than
197 hash2(), and it requires
198 -- that all your machines be little-endian
199--------------------------------------------------------------------
200*/
201
202ub4 hash3( k, length, initval)
203register ub1 *k;        /* the key */
204register ub4  length;   /* the length of the key */
205register ub4  initval;  /* the previous hash, or an arbitrary value */
206{
207   register ub4 a,b,c,len;
208
209   /* Set up the internal state */
210   len = length;
211   a = b = 0x9e3779b9;    /* the golden ratio; an arbitrary value */
212   c = initval;           /* the previous hash value */
213
214   /*---------------------------------------- handle most of the key */
215   if (((ub4)k)&3)
216   {
217      while (len >= 12)    /* unaligned */
218      {
219         a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
220         b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
221         c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
222         mix(a,b,c);
223         k += 12; len -= 12;
224      }
225   }
226   else
227   {
228      while (len >= 12)    /* aligned */
229      {
230         a += *(ub4 *)(k+0);
231         b += *(ub4 *)(k+4);
232         c += *(ub4 *)(k+8);
233         mix(a,b,c);
234         k += 12; len -= 12;
235      }
236   }
237
238   /*------------------------------------- handle the last 11 bytes */
239   c += length;
240   switch(len)              /* all the case statements fall through */
241   {
242   case 11: c+=((ub4)k[10]<<24);
243   case 10: c+=((ub4)k[9]<<16);
244   case 9 : c+=((ub4)k[8]<<8);
245      /* the first byte of c is reserved for the length */
246   case 8 : b+=((ub4)k[7]<<24);
247   case 7 : b+=((ub4)k[6]<<16);
248   case 6 : b+=((ub4)k[5]<<8);
249   case 5 : b+=k[4];
250   case 4 : a+=((ub4)k[3]<<24);
251   case 3 : a+=((ub4)k[2]<<16);
252   case 2 : a+=((ub4)k[1]<<8);
253   case 1 : a+=k[0];
254     /* case 0: nothing left to add */
255   }
256   mix(a,b,c);
257   /*-------------------------------------------- report the result */
258   return c;
259}
260
261
262
263#ifdef SELF_TEST
264
265/* used for timings */
266void driver1()
267{
268  ub4 buf[256];
269  ub4 i;
270  ub4 h=0;
271
272  for (i=0; i<256; ++i)
273  {
274    h = hash(buf,i,h);
275  }
276}
277
278/* check that every input bit changes every output bit half the time */
279#define HASHSTATE 1
280#define HASHLEN   1
281#define MAXPAIR 80
282#define MAXLEN 70
283void driver2()
284{
285  ub1 qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
286  ub4 c[HASHSTATE], d[HASHSTATE], i, j=0, k, l, m, z;
287  ub4 e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
288  ub4 x[HASHSTATE],y[HASHSTATE];
289  ub4 hlen;
290
291  printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
292  for (hlen=0; hlen < MAXLEN; ++hlen)
293  {
294    z=0;
295    for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
296    {
297      for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
298      {
299        for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
300        {
301          for (l=0; l<HASHSTATE; ++l) e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((ub4)0);
302
303          /*---- check that every output bit is affected by that input bit */
304          for (k=0; k<MAXPAIR; k+=2)
305          {
306            ub4 finished=1;
307            /* keys have one bit different */
308            for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (ub1)0;}
309            /* have a and b be two keys differing in only one bit */
310            a[i] ^= (k<<j);
311            a[i] ^= (k>>(8-j));
312             c[0] = hash(a, hlen, m);
313            b[i] ^= ((k+1)<<j);
314            b[i] ^= ((k+1)>>(8-j));
315             d[0] = hash(b, hlen, m);
316            /* check every bit is 1, 0, set, and not set at least once */
317            for (l=0; l<HASHSTATE; ++l)
318            {
319              e[l] &= (c[l]^d[l]);
320              f[l] &= ~(c[l]^d[l]);
321              g[l] &= c[l];
322              h[l] &= ~c[l];
323              x[l] &= d[l];
324              y[l] &= ~d[l];
325              if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
326            }
327            if (finished) break;
328          }
329          if (k>z) z=k;
330          if (k==MAXPAIR)
331          {
332             printf("Some bit didn't change: ");
333             printf("%.8lx %.8lx %.8lx %.8lx %.8lx %.8lx  ",
334                    e[0],f[0],g[0],h[0],x[0],y[0]);
335             printf("i %ld j %ld m %ld len %ld\n",i,j,m,hlen);
336          }
337          if (z==MAXPAIR) goto done;
338        }
339      }
340    }
341   done:
342    if (z < MAXPAIR)
343    {
344      printf("Mix success  %2ld bytes  %2ld initvals  ",i,m);
345      printf("required  %ld  trials\n",z/2);
346    }
347  }
348  printf("\n");
349}
350
351/* Check for reading beyond the end of the buffer and alignment problems */
352void driver3()
353{
354  ub1 buf[MAXLEN+20], *b;
355  ub4 len;
356  ub1 q[] = "This is the time for all good men to come to the aid of their country";
357  ub1 qq[] = "xThis is the time for all good men to come to the aid of their country";
358  ub1 qqq[] = "xxThis is the time for all good men to come to the aid of their country";
359  ub1 qqqq[] = "xxxThis is the time for all good men to come to the aid of their country";
360  ub4 h,i,j,ref,x,y;
361
362  printf("Endianness.  These should all be the same:\n");
363  printf("%.8lx\n", hash(q, sizeof(q)-1, (ub4)0));
364  printf("%.8lx\n", hash(qq+1, sizeof(q)-1, (ub4)0));
365  printf("%.8lx\n", hash(qqq+2, sizeof(q)-1, (ub4)0));
366  printf("%.8lx\n", hash(qqqq+3, sizeof(q)-1, (ub4)0));
367  printf("\n");
368  for (h=0, b=buf+1; h<8; ++h, ++b)
369  {
370    for (i=0; i<MAXLEN; ++i)
371    {
372      len = i;
373      for (j=0; j<i; ++j) *(b+j)=0;
374
375      /* these should all be equal */
376      ref = hash(b, len, (ub4)1);
377      *(b+i)=(ub1)~0;
378      *(b-1)=(ub1)~0;
379      x = hash(b, len, (ub4)1);
380      y = hash(b, len, (ub4)1);
381      if ((ref != x) || (ref != y))
382      {
383        printf("alignment error: %.8lx %.8lx %.8lx %ld %ld\n",ref,x,y,h,i);
384      }
385    }
386  }
387}
388
389/* check for problems with nulls */
390 void driver4()
391{
392  ub1 buf[1];
393  ub4 h,i,state[HASHSTATE];
394
395
396  buf[0] = ~0;
397  for (i=0; i<HASHSTATE; ++i) state[i] = 1;
398  printf("These should all be different\n");
399  for (i=0, h=0; i<8; ++i)
400  {
401    h = hash(buf, (ub4)0, h);
402    printf("%2ld  0-byte strings, hash is  %.8lx\n", i, h);
403  }
404}
405
406
407int main()
408{
409  driver1();   /* test that the key is hashed: used for timings */
410  driver2();   /* test that whole key is hashed thoroughly */
411  driver3();   /* test that nothing but the key is hashed */
412  driver4();   /* test hashing multiple buffers (all buffers are null) */
413  return 1;
414}
415
416#endif  /* SELF_TEST */
Note: See TracBrowser for help on using the browser.