Skip to content

the bug when put same key #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions README

This file was deleted.

39 changes: 39 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
This is a simple C hashmap, using strings for the keys.

Originally based on code by Eliot Back at http://elliottback.com/wp/hashmap-implementation-in-c/

Reworked by Pete Warden - http://petewarden.typepad.com/searchbrowser/2010/01/c-hashmap.html

Zaks Wang

1.fix bug that put same key the map value will increase

2.add feature that you can change hash function

You can chose SIMPLE_HASH RS_HASH JS_HASH PJW_HASH ELF_HASH BKDR_HASH DJB_HASH AP_HASH
CRC_HAHS

main.c contains an example that tests the functionality of the hashmap module.

To compile it, run something like this on your system:

gcc main.c hashmap.c hash.c -o hashmaptest

There are no restrictions on how you reuse this code.

hash_func_test
##############

一个字符串hash函数的评测,原文http://blog.csdn.net/liuben/article/details/5050697
实际语料测试结果,BKDR_HASH远远高于其他HASH函数,其次是AP_HASH
如果冲突,建议将MAX_CHAIN_LENGTH设置稍大

hashMap
#######

cheungmine修改版hashmap http://blog.csdn.net/cheungmine/article/details/7704686


待解决的问题:
仅仅一个数组保存pair的指针,当分配8亿多长度的数组时候,内存会不够,可以分为多段数组,
再用一个hash解决在多个数组间跳跃问题。有时间再改!
183 changes: 183 additions & 0 deletions hash.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
#include <string.h>
#include "hash.h"

/* A Simple Hash Function */
unsigned int simple_hash(char *str)
{
register unsigned int hash;
register unsigned char *p;

for(hash = 0, p = (unsigned char *)str; *p ; p++)
hash = 31 * hash + *p;

return (hash & 0x7FFFFFFF);
}

/* RS Hash Function */
unsigned int RS_hash(char *str)
{
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;

while (*str)
{
hash = hash * a + (*str++);
a *= b;
}

return (hash & 0x7FFFFFFF);
}

/* JS Hash Function */
unsigned int JS_hash(char *str)
{
unsigned int hash = 1315423911;

while (*str)
{
hash ^= ((hash << 5) + (*str++) + (hash >> 2));
}

return (hash & 0x7FFFFFFF);
}

/* P. J. Weinberger Hash Function */
unsigned int PJW_hash(char *str)
{
unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8);
unsigned int ThreeQuarters = (unsigned int)((BitsInUnignedInt * 3) / 4);
unsigned int OneEighth = (unsigned int)(BitsInUnignedInt / 8);

unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth);
unsigned int hash = 0;
unsigned int test = 0;

while (*str)
{
hash = (hash << OneEighth) + (*str++);
if ((test = hash & HighBits) != 0)
{
hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));
}
}

return (hash & 0x7FFFFFFF);
}

/* ELF Hash Function */
unsigned int ELF_hash(char *str)
{
unsigned int hash = 0;
unsigned int x = 0;

while (*str)
{
hash = (hash << 4) + (*str++);
if ((x = hash & 0xF0000000L) != 0)
{
hash ^= (x >> 24);
hash &= ~x;
}
}

return (hash & 0x7FFFFFFF);
}

/* BKDR Hash Function */
unsigned int BKDR_hash(char *str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
unsigned int hash = 0;

while (*str)
{
hash = hash * seed + (*str++);
}

return (hash & 0x7FFFFFFF);
}

/* SDBM Hash Function */
unsigned int SDBM_hash(char *str)
{
unsigned int hash = 0;

while (*str)
{
hash = (*str++) + (hash << 6) + (hash << 16) - hash;
}

return (hash & 0x7FFFFFFF);
}

/* DJB Hash Function */
unsigned int DJB_hash(char *str)
{
unsigned int hash = 5381;

while (*str)
{
hash += (hash << 5) + (*str++);
}

return (hash & 0x7FFFFFFF);
}

/* AP Hash Function */
unsigned int AP_hash(char *str)
{
unsigned int hash = 0;
int i;
for (i=0; *str; i++)
{
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5)));
}
}

return (hash & 0x7FFFFFFF);
}

/* CRC Hash Function */
unsigned int CRC_hash(char *str)
{
unsigned int nleft = strlen(str);
unsigned long long sum = 0;
unsigned short int *w = (unsigned short int *)str;
unsigned short int answer = 0;

/*
* Our algorithm is simple, using a 32 bit accumulator (sum), we add
* sequential 16 bit words to it, and at the end, fold back all the
* carry bits from the top 16 bits into the lower 16 bits.
*/
while ( nleft > 1 ) {
sum += *w++;
nleft -= 2;
}
/*
* mop up an odd byte, if necessary
*/
if ( 1 == nleft ) {
*( unsigned char * )( &answer ) = *( unsigned char * )w ;
sum += answer;
}
/*
* add back carry outs from top 16 bits to low 16 bits
* add hi 16 to low 16
*/
sum = ( sum >> 16 ) + ( sum & 0xFFFF );
/* add carry */
sum += ( sum >> 16 );
/* truncate to 16 bits */
answer = ~sum;

return (answer & 0xFFFFFFFF);
}

42 changes: 42 additions & 0 deletions hash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef _HASH_H
#define _HASH_H

#ifdef __cplusplus
extern "C" {
#endif

/* A Simple Hash Function */
unsigned int simple_hash(char *str);

/* RS Hash Function */
unsigned int RS_hash(char *str);

/* JS Hash Function */
unsigned int JS_hash(char *str);

/* P. J. Weinberger Hash Function */
unsigned int PJW_hash(char *str);

/* ELF Hash Function */
unsigned int ELF_hash(char *str);

/* BKDR Hash Function */
unsigned int BKDR_hash(char *str);

/* SDBM Hash Function */
unsigned int SDBM_hash(char *str);

/* DJB Hash Function */
unsigned int DJB_hash(char *str);

/* AP Hash Function */
unsigned int AP_hash(char *str);

/* CRC Hash Function */
unsigned int CRC_hash(char *str);

#ifdef __cplusplus
}
#endif

#endif
2 changes: 2 additions & 0 deletions hashMap/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
作者:cheungmine
网址:http://blog.csdn.net/cheungmine/article/details/7704686
Loading