petewarden · ultimate010 · May 9, 2013 · May 9, 2013 · May 9, 2013 · May 9, 2013
diff --git a/README b/README
diff --git a/README.md b/README.md
@@ -0,0 +1,39 @@
+This is a simple C hashmap, using strings for the keys.
+
+Originally based on code by Eliot Back at http://elliottback.com/wp/hashmap-implementation-in-c/
+
+Reworked by Pete Warden - http://petewarden.typepad.com/searchbrowser/2010/01/c-hashmap.html
+
+Zaks Wang
+
+        1.fix bug that put same key the map value will increase
+
+        2.add feature that you can change hash function
+
+        You can chose SIMPLE_HASH RS_HASH JS_HASH PJW_HASH ELF_HASH BKDR_HASH DJB_HASH AP_HASH
+        CRC_HAHS
+
+main.c contains an example that tests the functionality of the hashmap module.
+
+To compile it, run something like this on your system:
+
+gcc main.c hashmap.c hash.c -o hashmaptest
+
+There are no restrictions on how you reuse this code.
+
+hash_func_test
+##############
+
+        一个字符串hash函数的评测,原文http://blog.csdn.net/liuben/article/details/5050697
+        实际语料测试结果，BKDR_HASH远远高于其他HASH函数，其次是AP_HASH
+        如果冲突，建议将MAX_CHAIN_LENGTH设置稍大
+
+hashMap
+#######
+
+        cheungmine修改版hashmap http://blog.csdn.net/cheungmine/article/details/7704686
+
+
+待解决的问题：
+        仅仅一个数组保存pair的指针，当分配8亿多长度的数组时候，内存会不够，可以分为多段数组，
+        再用一个hash解决在多个数组间跳跃问题。有时间再改!
diff --git a/hash.c b/hash.c
@@ -0,0 +1,183 @@
+#include <string.h>
+#include "hash.h"
+
+/* A Simple Hash Function */
+unsigned int simple_hash(char *str)
+{
+	register unsigned int hash;
+	register unsigned char *p;
+
+	for(hash = 0, p = (unsigned char *)str; *p ; p++)
+		hash = 31 * hash + *p;
+
+	return (hash & 0x7FFFFFFF);
+}
+
+/* RS Hash Function */
+unsigned int RS_hash(char *str)
+{
+         unsigned int b = 378551;
+         unsigned int a = 63689;
+         unsigned int hash = 0;
+
+         while (*str)
+         {
+                 hash = hash * a + (*str++);
+                 a *= b;
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* JS Hash Function */
+unsigned int JS_hash(char *str)
+{
+         unsigned int hash = 1315423911;
+
+         while (*str)
+         {
+                 hash ^= ((hash << 5) + (*str++) + (hash >> 2));
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* P. J. Weinberger Hash Function */
+unsigned int PJW_hash(char *str)
+{
+         unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8);
+         unsigned int ThreeQuarters     = (unsigned int)((BitsInUnignedInt   * 3) / 4);
+         unsigned int OneEighth         = (unsigned int)(BitsInUnignedInt / 8);
+
+         unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth);
+         unsigned int hash              = 0;
+         unsigned int test              = 0;
+
+         while (*str)
+         {
+                 hash = (hash << OneEighth) + (*str++);
+                 if ((test = hash & HighBits) != 0)
+                 {
+                         hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));
+                 }
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* ELF Hash Function */
+unsigned int ELF_hash(char *str)
+{
+         unsigned int hash = 0;
+         unsigned int x     = 0;
+
+         while (*str)
+         {
+                 hash = (hash << 4) + (*str++);
+                 if ((x = hash & 0xF0000000L) != 0)
+                 {
+                         hash ^= (x >> 24);
+                         hash &= ~x;
+                 }
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* BKDR Hash Function */
+unsigned int BKDR_hash(char *str)
+{
+         unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
+         unsigned int hash = 0;
+
+         while (*str)
+         {
+                 hash = hash * seed + (*str++);
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* SDBM Hash Function */
+unsigned int SDBM_hash(char *str)
+{
+         unsigned int hash = 0;
+
+         while (*str)
+         {
+                 hash = (*str++) + (hash << 6) + (hash << 16) - hash;
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* DJB Hash Function */
+unsigned int DJB_hash(char *str)
+{
+         unsigned int hash = 5381;
+
+         while (*str)
+         {
+                 hash += (hash << 5) + (*str++);
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* AP Hash Function */
+unsigned int AP_hash(char *str)
+{
+         unsigned int hash = 0;
+         int i;
+         for (i=0; *str; i++)
+         {
+                 if ((i & 1) == 0)
+                 {
+                         hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
+                 }
+                 else
+                 {
+                         hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5)));
+                 }
+         }
+
+         return (hash & 0x7FFFFFFF);
+}
+
+/* CRC Hash Function */
+unsigned int CRC_hash(char *str)
+{
+    unsigned int        nleft   = strlen(str);
+    unsigned long long  sum     = 0;
+    unsigned short int *w       = (unsigned short int *)str;
+    unsigned short int  answer  = 0;
+
+    /*
+     * Our algorithm is simple, using a 32 bit accumulator (sum), we add
+     * sequential 16 bit words to it, and at the end, fold back all the
+     * carry bits from the top 16 bits into the lower 16 bits.
+     */
+    while ( nleft > 1 ) {
+        sum += *w++;
+        nleft -= 2;
+    }
+    /*
+     * mop up an odd byte, if necessary
+     */
+    if ( 1 == nleft ) {
+        *( unsigned char * )( &answer ) = *( unsigned char * )w ;
+        sum += answer;
+    }
+    /*
+     * add back carry outs from top 16 bits to low 16 bits
+     * add hi 16 to low 16
+     */
+    sum = ( sum >> 16 ) + ( sum & 0xFFFF );
+    /* add carry */
+    sum += ( sum >> 16 );
+    /* truncate to 16 bits */
+    answer = ~sum;
+
+    return (answer & 0xFFFFFFFF);
+}
+
diff --git a/hash.h b/hash.h
@@ -0,0 +1,42 @@
+#ifndef _HASH_H
+#define _HASH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* A Simple Hash Function */
+unsigned int simple_hash(char *str);
+
+/* RS Hash Function */
+unsigned int RS_hash(char *str);
+
+/* JS Hash Function */
+unsigned int JS_hash(char *str);
+
+/* P. J. Weinberger Hash Function */
+unsigned int PJW_hash(char *str);
+
+/* ELF Hash Function */
+unsigned int ELF_hash(char *str);
+
+/* BKDR Hash Function */
+unsigned int BKDR_hash(char *str);
+
+/* SDBM Hash Function */
+unsigned int SDBM_hash(char *str);
+
+/* DJB Hash Function */
+unsigned int DJB_hash(char *str);
+
+/* AP Hash Function */
+unsigned int AP_hash(char *str);
+
+/* CRC Hash Function */
+unsigned int CRC_hash(char *str);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/hashMap/README b/hashMap/README
@@ -0,0 +1,2 @@
+作者:cheungmine
+网址:http://blog.csdn.net/cheungmine/article/details/7704686
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		作者:cheungmine
		网址:http://blog.csdn.net/cheungmine/article/details/7704686