From f1490bcbe20fbf3d8d8044c2a7c8267d0ee98f4f Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 14:44:10 -0700 Subject: [PATCH 001/106] Added hash function performace comparison and validation codes. Bringing over codes fro original hash_functions branch [ticket: X] --- .../validation/Makefile.validation | 84 ++ .../hash_functions/validation/SpookyV2.cpp | 351 ++++++++ .../hash_functions/validation/SpookyV2.h | 299 +++++++ .../validation/SpookyV2Test.cpp | 52 ++ src/tests/hash_functions/validation/nmhash.c | 8 + src/tests/hash_functions/validation/nmhash.h | 832 ++++++++++++++++++ .../hash_functions/validation/nmhash_scalar.c | 8 + .../hash_functions/validation/nmhash_scalar.h | 824 +++++++++++++++++ .../hash_functions/validation/pengyhash.c | 30 + .../hash_functions/validation/pengyhash.h | 9 + .../test_32_bit_hash_validation.f90 | 86 ++ .../test_64_bit_hash_validation.f90 | 64 ++ .../hash_functions/validation/waterhash.c | 6 + .../hash_functions/validation/waterhash.h | 54 ++ 14 files changed, 2707 insertions(+) create mode 100755 src/tests/hash_functions/validation/Makefile.validation create mode 100755 src/tests/hash_functions/validation/SpookyV2.cpp create mode 100755 src/tests/hash_functions/validation/SpookyV2.h create mode 100755 src/tests/hash_functions/validation/SpookyV2Test.cpp create mode 100755 src/tests/hash_functions/validation/nmhash.c create mode 100755 src/tests/hash_functions/validation/nmhash.h create mode 100755 src/tests/hash_functions/validation/nmhash_scalar.c create mode 100755 src/tests/hash_functions/validation/nmhash_scalar.h create mode 100755 src/tests/hash_functions/validation/pengyhash.c create mode 100755 src/tests/hash_functions/validation/pengyhash.h create mode 100755 src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 create mode 100755 src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 create mode 100755 src/tests/hash_functions/validation/waterhash.c create mode 100755 src/tests/hash_functions/validation/waterhash.h diff --git a/src/tests/hash_functions/validation/Makefile.validation b/src/tests/hash_functions/validation/Makefile.validation new file mode 100755 index 000000000..3578ef69f --- /dev/null +++ b/src/tests/hash_functions/validation/Makefile.validation @@ -0,0 +1,84 @@ +# Makefile for validation tests of hash codes, currently using the GNU +# GCC compiler collection. + +FC = /opt/local/bin/gfortran-mp-11 +CC = /opt/local/bin/gcc-mp-11 +CXX = /opt/local/bin/g++-mp-11 + +FFLAGS = -O3 +CFLAGS = -O3 +CXXFLAGS = -O3 +LIBDIRS = -L../../../ -L./ -L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib/ +LIBS = -lfortran_stdlib -lc_hash +MOD_DIRS = -I../../../mod_files -J./mod_files +INCLUDE_DIRS = -I./ -I/Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/include/tidy + +all: test_32_bit_hash_validation test_64_bit_hash_validation + +test_32_bit_hash_validation: test_32_bit_hash_validation.o \ +nmhash_wrapper.o waterhash_wrapper.o ./libc_hash.a + $(FC) $(FFLAGS) $(MOD_DIRS) $(LIBDIRS) \ +test_32_bit_hash_validation.o \ +nmhash_wrapper.o \ +waterhash_wrapper.o \ +$(LIBS) \ +-o test_32_bit_hash_validation + +test_32_bit_hash_validation.o: test_32_bit_hash_validation.f90 \ +../../../mod_files/stdlib_64_bit_hash_functions.mod \ +nmhash_wrapper.o waterhash_wrapper.o + $(FC) $(FFLAGS) $(MOD_DIRS) -c test_32_bit_hash_validation.f90 \ + -o test_32_bit_hash_validation.o + +test_64_bit_hash_validation: test_64_bit_hash_validation.o \ +spookyv2_wrapper.o pengy_wrapper.o ./libc_hash.a + $(FC) $(FFLAGS) $(MOD_DIRS) $(LIBDIRS) \ +test_64_bit_hash_validation.o \ +spookyv2_wrapper.o \ +pengy_wrapper.o \ +$(LIBS) \ +-o test_64_bit_hash_validation + +test_64_bit_hash_validation.o: test_64_bit_hash_validation.f90 \ +../../../mod_files/stdlib_64_bit_hash_functions.mod \ +spookyv2_wrapper.o \ +pengy_wrapper.o + $(FC) $(FFLAGS) $(MOD_DIRS) -c test_64_bit_hash_validation.f90 \ + -o test_64_bit_hash_validation.o + +libc_hash.a: SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o waterhash.o + ar rcs libc_hash.a SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o \ + waterhash.o + +pengyhash.o: pengyhash.c pengyhash.h + $(CC) $(CFLAGS) $(INCLUDE_DIRS) -c pengyhash.c -o pengyhash.o + +waterhash.o: waterhash.c waterhash.h + $(CC) $(CFLAGS) $(INCLUDE_DIRS) -c waterhash.c -o waterhash.o + +SpookyV2.o: SpookyV2.cpp SpookyV2.h + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -c SpookyV2.cpp -o SpookyV2.o + +SpookyV2Test.o: SpookyV2Test.cpp SpookyV2.h + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -c SpookyV2Test.cpp -o SpookyV2Test.o + +spookyv2_wrapper.o: spookyv2_wrapper.f90 + $(FC) $(FFLAGS) $(MOD_DIRS) -c spookyv2_wrapper.f90 \ + -o spookyv2_wrapper.o + +pengy_wrapper.o: pengy_wrapper.f90 + $(FC) $(FFLAGS) $(MOD_DIRS) -c pengy_wrapper.f90 \ + -o pengy_wrapper.o + +nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h + $(CC) $(CXXFLAGS) $(INCLUDE_DIRS) -c nmhash_scalar.c -o nmhash_scalar.o + +nmhash_wrapper.o: nmhash_wrapper.f90 + $(FC) $(FFLAGS) $(MOD_DIRS) -c nmhash_wrapper.f90 \ + -o nmhash_wrapper.o + +waterhash_wrapper.o: waterhash_wrapper.f90 + $(FC) $(FFLAGS) $(MOD_DIRS) -c waterhash_wrapper.f90 \ + -o waterhash_wrapper.o + + diff --git a/src/tests/hash_functions/validation/SpookyV2.cpp b/src/tests/hash_functions/validation/SpookyV2.cpp new file mode 100755 index 000000000..735bd5629 --- /dev/null +++ b/src/tests/hash_functions/validation/SpookyV2.cpp @@ -0,0 +1,351 @@ +// Spooky Hash +// A 128-bit noncryptographic hash, for checksums and table lookup +// By Bob Jenkins. Public domain. +// Oct 31 2010: published framework, disclaimer ShortHash isn't right +// Nov 7 2010: disabled ShortHash +// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again +// April 10 2012: buffer overflow on platforms without unaligned reads +// July 12 2012: was passing out variables in final to in/out in short +// July 30 2012: I reintroduced the buffer overflow +// August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash + +#include +#include "SpookyV2.h" + +#define ALLOW_UNALIGNED_READS 1 + +// +// short hash ... it could be used on any message, +// but it's used by Spooky just for short messages. +// +void SpookyHash::Short( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + uint64 buf[2*sc_numVars]; + union + { + const uint8 *p8; + uint32 *p32; + uint64 *p64; + size_t i; + } u; + + u.p8 = (const uint8 *)message; + + if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) + { + memcpy(buf, message, length); + u.p64 = buf; + } + + size_t remainder = length%32; + uint64 a=*hash1; + uint64 b=*hash2; + uint64 c=sc_const; + uint64 d=sc_const; + + if (length > 15) + { + const uint64 *end = u.p64 + (length/32)*4; + + // handle all complete sets of 32 bytes + for (; u.p64 < end; u.p64 += 4) + { + c += u.p64[0]; + d += u.p64[1]; + ShortMix(a,b,c,d); + a += u.p64[2]; + b += u.p64[3]; + } + + //Handle the case of 16+ remaining bytes. + if (remainder >= 16) + { + c += u.p64[0]; + d += u.p64[1]; + ShortMix(a,b,c,d); + u.p64 += 2; + remainder -= 16; + } + } + + // Handle the last 0..15 bytes, and its length + d += ((uint64)length) << 56; + switch (remainder) + { + case 15: + d += ((uint64)u.p8[14]) << 48; + case 14: + d += ((uint64)u.p8[13]) << 40; + case 13: + d += ((uint64)u.p8[12]) << 32; + case 12: + d += u.p32[2]; + c += u.p64[0]; + break; + case 11: + d += ((uint64)u.p8[10]) << 16; + case 10: + d += ((uint64)u.p8[9]) << 8; + case 9: + d += (uint64)u.p8[8]; + case 8: + c += u.p64[0]; + break; + case 7: + c += ((uint64)u.p8[6]) << 48; + case 6: + c += ((uint64)u.p8[5]) << 40; + case 5: + c += ((uint64)u.p8[4]) << 32; + case 4: + c += u.p32[0]; + break; + case 3: + c += ((uint64)u.p8[2]) << 16; + case 2: + c += ((uint64)u.p8[1]) << 8; + case 1: + c += (uint64)u.p8[0]; + break; + case 0: + c += sc_const; + d += sc_const; + } + ShortEnd(a,b,c,d); + *hash1 = a; + *hash2 = b; +} + + + + +// do the whole hash in one call +void SpookyHash::Hash128( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + if (length < sc_bufSize) + { + Short(message, length, hash1, hash2); + return; + } + + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + uint64 buf[sc_numVars]; + uint64 *end; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + size_t remainder; + + h0=h3=h6=h9 = *hash1; + h1=h4=h7=h10 = *hash2; + h2=h5=h8=h11 = sc_const; + + u.p8 = (const uint8 *)message; + end = u.p64 + (length/sc_blockSize)*sc_numVars; + + // handle all whole sc_blockSize blocks of bytes + if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(buf, u.p64, sc_blockSize); + Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // handle the last partial block of sc_blockSize bytes + remainder = (length - ((const uint8 *)end-(const uint8 *)message)); + memcpy(buf, end, remainder); + memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); + ((uint8 *)buf)[sc_blockSize-1] = remainder; + + // do some final mixing + End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + *hash1 = h0; + *hash2 = h1; +} + + + +// init spooky state +void SpookyHash::Init(uint64 seed1, uint64 seed2) +{ + m_length = 0; + m_remainder = 0; + m_state[0] = seed1; + m_state[1] = seed2; +} + + +// add a message fragment to the state +void SpookyHash::Update(const void *message, size_t length) +{ + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + size_t newLength = length + m_remainder; + uint8 remainder; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + const uint64 *end; + + // Is this message fragment too short? If it is, stuff it away. + if (newLength < sc_bufSize) + { + memcpy(&((uint8 *)m_data)[m_remainder], message, length); + m_length = length + m_length; + m_remainder = (uint8)newLength; + return; + } + + // init the variables + if (m_length < sc_bufSize) + { + h0=h3=h6=h9 = m_state[0]; + h1=h4=h7=h10 = m_state[1]; + h2=h5=h8=h11 = sc_const; + } + else + { + h0 = m_state[0]; + h1 = m_state[1]; + h2 = m_state[2]; + h3 = m_state[3]; + h4 = m_state[4]; + h5 = m_state[5]; + h6 = m_state[6]; + h7 = m_state[7]; + h8 = m_state[8]; + h9 = m_state[9]; + h10 = m_state[10]; + h11 = m_state[11]; + } + m_length = length + m_length; + + // if we've got anything stuffed away, use it now + if (m_remainder) + { + uint8 prefix = sc_bufSize-m_remainder; + memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); + u.p64 = m_data; + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p8 = ((const uint8 *)message) + prefix; + length -= prefix; + } + else + { + u.p8 = (const uint8 *)message; + } + + // handle all whole blocks of sc_blockSize bytes + end = u.p64 + (length/sc_blockSize)*sc_numVars; + remainder = (uint8)(length-((const uint8 *)end-u.p8)); + if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(m_data, u.p8, sc_blockSize); + Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // stuff away the last few bytes + m_remainder = remainder; + memcpy(m_data, end, remainder); + + // stuff away the variables + m_state[0] = h0; + m_state[1] = h1; + m_state[2] = h2; + m_state[3] = h3; + m_state[4] = h4; + m_state[5] = h5; + m_state[6] = h6; + m_state[7] = h7; + m_state[8] = h8; + m_state[9] = h9; + m_state[10] = h10; + m_state[11] = h11; +} + + +// report the hash for the concatenation of all message fragments so far +void SpookyHash::Final(uint64 *hash1, uint64 *hash2) +{ + // init the variables + if (m_length < sc_bufSize) + { + *hash1 = m_state[0]; + *hash2 = m_state[1]; + Short( m_data, m_length, hash1, hash2); + return; + } + + const uint64 *data = (const uint64 *)m_data; + uint8 remainder = m_remainder; + + uint64 h0 = m_state[0]; + uint64 h1 = m_state[1]; + uint64 h2 = m_state[2]; + uint64 h3 = m_state[3]; + uint64 h4 = m_state[4]; + uint64 h5 = m_state[5]; + uint64 h6 = m_state[6]; + uint64 h7 = m_state[7]; + uint64 h8 = m_state[8]; + uint64 h9 = m_state[9]; + uint64 h10 = m_state[10]; + uint64 h11 = m_state[11]; + + if (remainder >= sc_blockSize) + { + // m_data can contain two blocks; handle any whole first block + Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + data += sc_numVars; + remainder -= sc_blockSize; + } + + // mix in the last partial block, and the length mod sc_blockSize + memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); + + ((uint8 *)data)[sc_blockSize-1] = remainder; + + // do some final mixing + End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + + *hash1 = h0; + *hash2 = h1; +} + diff --git a/src/tests/hash_functions/validation/SpookyV2.h b/src/tests/hash_functions/validation/SpookyV2.h new file mode 100755 index 000000000..4ccc0d523 --- /dev/null +++ b/src/tests/hash_functions/validation/SpookyV2.h @@ -0,0 +1,299 @@ +// +// SpookyHash: a 128-bit noncryptographic hash function +// By Bob Jenkins, public domain +// Oct 31 2010: alpha, framework + SpookyHash::Mix appears right +// Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right +// Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas +// Feb 2 2012: production, same bits as beta +// Feb 5 2012: adjusted definitions of uint* to be more portable +// Mar 30 2012: 3 bytes/cycle, not 4. Alpha was 4 but wasn't thorough enough. +// August 5 2012: SpookyV2 (different results) +// +// Up to 3 bytes/cycle for long messages. Reasonably fast for short messages. +// All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit. +// +// This was developed for and tested on 64-bit x86-compatible processors. +// It assumes the processor is little-endian. There is a macro +// controlling whether unaligned reads are allowed (by default they are). +// This should be an equally good hash on big-endian machines, but it will +// compute different results on them than on little-endian machines. +// +// Google's CityHash has similar specs to SpookyHash, and CityHash is faster +// on new Intel boxes. MD4 and MD5 also have similar specs, but they are orders +// of magnitude slower. CRCs are two or more times slower, but unlike +// SpookyHash, they have nice math for combining the CRCs of pieces to form +// the CRCs of wholes. There are also cryptographic hashes, but those are even +// slower than MD5. +// + +#include + +#ifdef _MSC_VER +# define INLINE __forceinline + typedef unsigned __int64 uint64; + typedef unsigned __int32 uint32; + typedef unsigned __int16 uint16; + typedef unsigned __int8 uint8; +#else +# include +# define INLINE inline + typedef uint64_t uint64; + typedef uint32_t uint32; + typedef uint16_t uint16; + typedef uint8_t uint8; +#endif + + +class SpookyHash +{ +public: + // + // SpookyHash: hash a single message in one call, produce 128-bit output + // + static void Hash128( + const void *message, // message to hash + size_t length, // length of message in bytes + uint64 *hash1, // in/out: in seed 1, out hash value 1 + uint64 *hash2); // in/out: in seed 2, out hash value 2 + + // + // Hash64: hash a single message in one call, return 64-bit output + // + static uint64 Hash64( + const void *message, // message to hash + size_t length, // length of message in bytes + uint64 seed) // seed + { + uint64 hash1 = seed; + Hash128(message, length, &hash1, &seed); + return hash1; + } + + // + // Hash32: hash a single message in one call, produce 32-bit output + // + static uint32 Hash32( + const void *message, // message to hash + size_t length, // length of message in bytes + uint32 seed) // seed + { + uint64 hash1 = seed, hash2 = seed; + Hash128(message, length, &hash1, &hash2); + return (uint32)hash1; + } + + // + // Init: initialize the context of a SpookyHash + // + void Init( + uint64 seed1, // any 64-bit value will do, including 0 + uint64 seed2); // different seeds produce independent hashes + + // + // Update: add a piece of a message to a SpookyHash state + // + void Update( + const void *message, // message fragment + size_t length); // length of message fragment in bytes + + + // + // Final: compute the hash for the current SpookyHash state + // + // This does not modify the state; you can keep updating it afterward + // + // The result is the same as if SpookyHash() had been called with + // all the pieces concatenated into one message. + // + void Final( + uint64 *hash1, // out only: first 64 bits of hash value. + uint64 *hash2); // out only: second 64 bits of hash value. + + // + // left rotate a 64-bit value by k bytes + // + static INLINE uint64 Rot64(uint64 x, int k) + { + return (x << k) | (x >> (64 - k)); + } + + // + // This is used if the input is 96 bytes long or longer. + // + // The internal state is fully overwritten every 96 bytes. + // Every input bit appears to cause at least 128 bits of entropy + // before 96 other bytes are combined, when run forward or backward + // For every input bit, + // Two inputs differing in just that input bit + // Where "differ" means xor or subtraction + // And the base value is random + // When run forward or backwards one Mix + // I tried 3 pairs of each; they all differed by at least 212 bits. + // + static INLINE void Mix( + const uint64 *data, + uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3, + uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7, + uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11) + { + s0 += data[0]; s2 ^= s10; s11 ^= s0; s0 = Rot64(s0,11); s11 += s1; + s1 += data[1]; s3 ^= s11; s0 ^= s1; s1 = Rot64(s1,32); s0 += s2; + s2 += data[2]; s4 ^= s0; s1 ^= s2; s2 = Rot64(s2,43); s1 += s3; + s3 += data[3]; s5 ^= s1; s2 ^= s3; s3 = Rot64(s3,31); s2 += s4; + s4 += data[4]; s6 ^= s2; s3 ^= s4; s4 = Rot64(s4,17); s3 += s5; + s5 += data[5]; s7 ^= s3; s4 ^= s5; s5 = Rot64(s5,28); s4 += s6; + s6 += data[6]; s8 ^= s4; s5 ^= s6; s6 = Rot64(s6,39); s5 += s7; + s7 += data[7]; s9 ^= s5; s6 ^= s7; s7 = Rot64(s7,57); s6 += s8; + s8 += data[8]; s10 ^= s6; s7 ^= s8; s8 = Rot64(s8,55); s7 += s9; + s9 += data[9]; s11 ^= s7; s8 ^= s9; s9 = Rot64(s9,54); s8 += s10; + s10 += data[10]; s0 ^= s8; s9 ^= s10; s10 = Rot64(s10,22); s9 += s11; + s11 += data[11]; s1 ^= s9; s10 ^= s11; s11 = Rot64(s11,46); s10 += s0; + } + + // + // Mix all 12 inputs together so that h0, h1 are a hash of them all. + // + // For two inputs differing in just the input bits + // Where "differ" means xor or subtraction + // And the base value is random, or a counting value starting at that bit + // The final result will have each bit of h0, h1 flip + // For every input bit, + // with probability 50 +- .3% + // For every pair of input bits, + // with probability 50 +- 3% + // + // This does not rely on the last Mix() call having already mixed some. + // Two iterations was almost good enough for a 64-bit result, but a + // 128-bit result is reported, so End() does three iterations. + // + static INLINE void EndPartial( + uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, + uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, + uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) + { + h11+= h1; h2 ^= h11; h1 = Rot64(h1,44); + h0 += h2; h3 ^= h0; h2 = Rot64(h2,15); + h1 += h3; h4 ^= h1; h3 = Rot64(h3,34); + h2 += h4; h5 ^= h2; h4 = Rot64(h4,21); + h3 += h5; h6 ^= h3; h5 = Rot64(h5,38); + h4 += h6; h7 ^= h4; h6 = Rot64(h6,33); + h5 += h7; h8 ^= h5; h7 = Rot64(h7,10); + h6 += h8; h9 ^= h6; h8 = Rot64(h8,13); + h7 += h9; h10^= h7; h9 = Rot64(h9,38); + h8 += h10; h11^= h8; h10= Rot64(h10,53); + h9 += h11; h0 ^= h9; h11= Rot64(h11,42); + h10+= h0; h1 ^= h10; h0 = Rot64(h0,54); + } + + static INLINE void End( + const uint64 *data, + uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, + uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, + uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) + { + h0 += data[0]; h1 += data[1]; h2 += data[2]; h3 += data[3]; + h4 += data[4]; h5 += data[5]; h6 += data[6]; h7 += data[7]; + h8 += data[8]; h9 += data[9]; h10 += data[10]; h11 += data[11]; + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + } + + // + // The goal is for each bit of the input to expand into 128 bits of + // apparent entropy before it is fully overwritten. + // n trials both set and cleared at least m bits of h0 h1 h2 h3 + // n: 2 m: 29 + // n: 3 m: 46 + // n: 4 m: 57 + // n: 5 m: 107 + // n: 6 m: 146 + // n: 7 m: 152 + // when run forwards or backwards + // for all 1-bit and 2-bit diffs + // with diffs defined by either xor or subtraction + // with a base of all zeros plus a counter, or plus another bit, or random + // + static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) + { + h2 = Rot64(h2,50); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,52); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,30); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,41); h1 += h2; h3 ^= h1; + h2 = Rot64(h2,54); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,48); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,38); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,37); h1 += h2; h3 ^= h1; + h2 = Rot64(h2,62); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,34); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,5); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,36); h1 += h2; h3 ^= h1; + } + + // + // Mix all 4 inputs together so that h0, h1 are a hash of them all. + // + // For two inputs differing in just the input bits + // Where "differ" means xor or subtraction + // And the base value is random, or a counting value starting at that bit + // The final result will have each bit of h0, h1 flip + // For every input bit, + // with probability 50 +- .3% (it is probably better than that) + // For every pair of input bits, + // with probability 50 +- .75% (the worst case is approximately that) + // + static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) + { + h3 ^= h2; h2 = Rot64(h2,15); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,52); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,26); h1 += h0; + h2 ^= h1; h1 = Rot64(h1,51); h2 += h1; + h3 ^= h2; h2 = Rot64(h2,28); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,9); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,47); h1 += h0; + h2 ^= h1; h1 = Rot64(h1,54); h2 += h1; + h3 ^= h2; h2 = Rot64(h2,32); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,25); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,63); h1 += h0; + } + +private: + + // + // Short is used for messages under 192 bytes in length + // Short has a low startup cost, the normal mode is good for long + // keys, the cost crossover is at about 192 bytes. The two modes were + // held to the same quality bar. + // + static void Short( + const void *message, // message (array of bytes, not necessarily aligned) + size_t length, // length of message (in bytes) + uint64 *hash1, // in/out: in the seed, out the hash value + uint64 *hash2); // in/out: in the seed, out the hash value + + // number of uint64's in internal state + static const size_t sc_numVars = 12; + + // size of the internal state + static const size_t sc_blockSize = sc_numVars*8; + + // size of buffer of unhashed data, in bytes + static const size_t sc_bufSize = 2*sc_blockSize; + + // + // sc_const: a constant which: + // * is not zero + // * is odd + // * is a not-very-regular mix of 1's and 0's + // * does not need any other special mathematical properties + // + static const uint64 sc_const = 0xdeadbeefdeadbeefLL; + + uint64 m_data[2*sc_numVars]; // unhashed data, for partial messages + uint64 m_state[sc_numVars]; // internal state of the hash + size_t m_length; // total length of the input so far + uint8 m_remainder; // length of unhashed data stashed in m_data +}; + + + diff --git a/src/tests/hash_functions/validation/SpookyV2Test.cpp b/src/tests/hash_functions/validation/SpookyV2Test.cpp new file mode 100755 index 000000000..3b9e6826f --- /dev/null +++ b/src/tests/hash_functions/validation/SpookyV2Test.cpp @@ -0,0 +1,52 @@ +#include "SpookyV2.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void SpookyHash32_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t s0 = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, &s0, &s1); + ((uint32_t *)out)[0]= (uint32_t)s0; +} + +void SpookyHash64_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, out64, &s1); +} + +void SpookyHash128_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + out64[1] = state64[1]; + SpookyHash::Hash128(key, len, out64, out64+1); +} + +void SpookyHash_seed_state_test(int in_bits, const void *seed, void *state) { + uint64_t *state64= (uint64_t *)state; + if (in_bits == 32) { + state64[0]= state64[1]= ((uint32_t*)seed)[0]; + } + else { + uint64_t *seed64= (uint64_t *)seed; + if (in_bits == 64) { + state64[0]= state64[1]= seed64[0]; + } + else + if (in_bits == 128) { + state64[0]= seed64[0]; + state64[1]= seed64[1]; + } + } +} + + +#ifdef __cplusplus +} +#endif diff --git a/src/tests/hash_functions/validation/nmhash.c b/src/tests/hash_functions/validation/nmhash.c new file mode 100755 index 000000000..987bc568c --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash.c @@ -0,0 +1,8 @@ +#include "nmhash.h" +int32_t nmhash32_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32 (key, (const size_t) len, seed); +} + +int32_t nmhash32x_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32X (key, (const size_t) len, seed); +} diff --git a/src/tests/hash_functions/validation/nmhash.h b/src/tests/hash_functions/validation/nmhash.h new file mode 100755 index 000000000..21bb90022 --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash.h @@ -0,0 +1,832 @@ +/* + * verification: + * NMHASH32: + * rurban/smhasher: 0x12A30553 + * demerphq/smhasher: 0x3D8F6C47 + * NMHASH32X: + * rurban/smhasher: 0xA8580227 + * demerphq/smhasher: 0x40B451B3 + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _nmhash_h_ +#define _nmhash_h_ + +#define NMH_VERSION 2 + +#ifdef _MSC_VER +# pragma warning(push, 3) +#endif + +#if defined(__cplusplus) && __cplusplus < 201103L +# define __STDC_CONSTANT_MACROS 1 +#endif + +#include +#include + +#if defined(__GNUC__) +# if defined(__AVX2__) +# include +# elif defined(__SSE2__) +# include +# endif +#elif defined(_MSC_VER) +# include +#endif + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ + || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ + || defined(__clang__) +# define NMH_likely(x) __builtin_expect(x, 1) +#else +# define NMH_likely(x) (x) +#endif + +#if defined(__has_builtin) +# if __has_builtin(__builtin_rotateleft32) +# define NMH_rotl32 __builtin_rotateleft32 /* clang */ +# endif +#endif +#if !defined(NMH_rotl32) +# if defined(_MSC_VER) + /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +# define NMH_rotl32(x,r) _rotl(x,r) +# else +# define NMH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# endif +#endif + +#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +# define NMH_RESTRICT /* disable */ +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +# define NMH_RESTRICT restrict +#elif defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)) +# define NMH_RESTRICT __restrict__ +#elif defined(__cplusplus) && defined(_MSC_VER) +# define NMH_RESTRICT __restrict +#else +# define NMH_RESTRICT /* disable */ +#endif + +/* endian macros */ +#ifndef NMHASH_LITTLE_ENDIAN +# if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || defined(__x86_64__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__SDCC) +# define NMHASH_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define NMHASH_LITTLE_ENDIAN 0 +# else +# warning could not determine endianness! Falling back to little endian. +# define NMHASH_LITTLE_ENDIAN 1 +# endif +#endif + +/* vector macros */ +#define NMH_SCALAR 0 +#define NMH_SSE2 1 +#define NMH_AVX2 2 +#define NMH_AVX512 3 + +#ifndef NMH_VECTOR /* can be defined on command line */ +# if defined(__AVX512BW__) +# define NMH_VECTOR NMH_AVX512 /* _mm512_mullo_epi16 requires AVX512BW */ +# elif defined(__AVX2__) +# define NMH_VECTOR NMH_AVX2 /* add '-mno-avx256-split-unaligned-load' and '-mn-oavx256-split-unaligned-store' for gcc */ +# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +# define NMH_VECTOR NMH_SSE2 +# else +# define NMH_VECTOR NMH_SCALAR +# endif +#endif + +/* align macros */ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ +# include +# define NMH_ALIGN(n) alignas(n) +#elif defined(__GNUC__) +# define NMH_ALIGN(n) __attribute__ ((aligned(n))) +#elif defined(_MSC_VER) +# define NMH_ALIGN(n) __declspec(align(n)) +#else +# define NMH_ALIGN(n) /* disabled */ +#endif + +#if NMH_VECTOR > 0 +# define NMH_ACC_ALIGN 64 +#elif defined(__BIGGEST_ALIGNMENT__) +# define NMH_ACC_ALIGN __BIGGEST_ALIGNMENT__ +#elif defined(__SDCC) +# define NMH_ACC_ALIGN 1 +#else +# define NMH_ACC_ALIGN 16 +#endif + +/* constants */ + +/* primes from xxh */ +#define NMH_PRIME32_1 UINT32_C(0x9E3779B1) +#define NMH_PRIME32_2 UINT32_C(0x85EBCA77) +#define NMH_PRIME32_3 UINT32_C(0xC2B2AE3D) +#define NMH_PRIME32_4 UINT32_C(0x27D4EB2F) + +/*! Pseudorandom secret taken directly from FARSH. */ +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t NMH_ACC_INIT[32] = { + UINT32_C(0xB8FE6C39), UINT32_C(0x23A44BBE), UINT32_C(0x7C01812C), UINT32_C(0xF721AD1C), + UINT32_C(0xDED46DE9), UINT32_C(0x839097DB), UINT32_C(0x7240A4A4), UINT32_C(0xB7B3671F), + UINT32_C(0xCB79E64E), UINT32_C(0xCCC0E578), UINT32_C(0x825AD07D), UINT32_C(0xCCFF7221), + UINT32_C(0xB8084674), UINT32_C(0xF743248E), UINT32_C(0xE03590E6), UINT32_C(0x813A264C), + + UINT32_C(0x3C2852BB), UINT32_C(0x91C300CB), UINT32_C(0x88D0658B), UINT32_C(0x1B532EA3), + UINT32_C(0x71644897), UINT32_C(0xA20DF94E), UINT32_C(0x3819EF46), UINT32_C(0xA9DEACD8), + UINT32_C(0xA8FA763F), UINT32_C(0xE39C343F), UINT32_C(0xF9DCBBC7), UINT32_C(0xC70B4F1D), + UINT32_C(0x8A51E04B), UINT32_C(0xCDB45931), UINT32_C(0xC89F7EC9), UINT32_C(0xD9787364), +}; + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(push) +# pragma warning(disable: 5045) +#endif +#ifdef __SDCC +# define const +# pragma save +# pragma disable_warning 110 +# pragma disable_warning 126 +#endif + +/* read functions */ +static inline +uint32_t +NMH_readLE32(const void *const p) +{ + uint32_t v; + memcpy(&v, p, 4); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + return __builtin_bswap32(v); +# elif defined(_MSC_VER) + return _byteswap_ulong(v); +# else + return ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); +# endif +} + +static inline +uint16_t +NMH_readLE16(const void *const p) +{ + uint16_t v; + memcpy(&v, p, 2); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# else + return (uint16_t)((v << 8) | (v >> 8)); +# endif +} + +static inline +uint32_t +NMHASH32_0to8(uint32_t const x, uint32_t const seed2) +{ + /* base mixer: [-6 -12 776bf593 -19 11 3fb39c65 -15 -9 e9139917 -11 16] = 0.027071104091278835 */ + const uint32_t m1 = UINT32_C(0x776BF593); + const uint32_t m2 = UINT32_C(0x3FB39C65); + const uint32_t m3 = UINT32_C(0xE9139917); + +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 12) ^ (vx.u32 >> 6); + vx.u16[0] *= (uint16_t)m1; + vx.u16[1] *= (uint16_t)(m1 >> 16); + vx.u32 ^= (vx.u32 << 11) ^ ( vx.u32 >> 19); + vx.u16[0] *= (uint16_t)m2; + vx.u16[1] *= (uint16_t)(m2 >> 16); + vx.u32 ^= seed2; + vx.u32 ^= (vx.u32 >> 15) ^ ( vx.u32 >> 9); + vx.u16[0] *= (uint16_t)m3; + vx.u16[1] *= (uint16_t)(m3 >> 16); + vx.u32 ^= (vx.u32 << 16) ^ ( vx.u32 >> 11); + return vx.u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i hv = _mm_setr_epi32((int)x, 0, 0, 0); + const __m128i sv = _mm_setr_epi32((int)seed2, 0, 0, 0); + const uint32_t *const result = (const uint32_t*)&hv; + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 12)), _mm_srli_epi32(hv, 6)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m1, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 11)), _mm_srli_epi32(hv, 19)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m2, 0, 0, 0)); + + hv = _mm_xor_si128(hv, sv); + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 15)), _mm_srli_epi32(hv, 9)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m3, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 16)), _mm_srli_epi32(hv, 11)); + + return *result; + } +# endif +} + +#define __NMH_M1 UINT32_C(0xF0D9649B) +#define __NMH_M2 UINT32_C(0x29A7935D) +#define __NMH_M3 UINT32_C(0x55D35831) + +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M1_V[32] = { + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M2_V[32] = { + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M3_V[32] = { + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, +}; + +static inline +uint32_t +NMHASH32_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed, int const type) +{ + /* base mixer: [f0d9649b 5 -13 29a7935d -9 11 55d35831 -20 -10 ] = 0.93495901789135362 */ + uint32_t result = 0; +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } x[4], y[4]; + uint32_t const sl = seed + (uint32_t)len; + size_t j; + x[0].u32 = NMH_PRIME32_1; + x[1].u32 = NMH_PRIME32_2; + x[2].u32 = NMH_PRIME32_3; + x[3].u32 = NMH_PRIME32_4; + for (j = 0; j < 4; ++j) y[j].u32 = sl; + + if (type) { + /* 33 to 255 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4 + 16); + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + len - 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + len - 16 + j * 4); + } else { + /* 9 to 32 bytes */ + x[0].u32 ^= NMH_readLE32(p); + x[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3)); + x[2].u32 ^= NMH_readLE32(p + len - 8); + x[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3)); + y[0].u32 ^= NMH_readLE32(p + 4); + y[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3) + 4); + y[2].u32 ^= NMH_readLE32(p + len - 8 + 4); + y[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4); + } + + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + for (j = 0; j < 4; ++j) y[j].u32 ^= (y[j].u32 << 17) ^ (y[j].u32 >> 6); + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + + x[0].u32 ^= NMH_PRIME32_1; + x[1].u32 ^= NMH_PRIME32_2; + x[2].u32 ^= NMH_PRIME32_3; + x[3].u32 ^= NMH_PRIME32_4; + + for (j = 1; j < 4; ++j) x[0].u32 += x[j].u32; + + x[0].u32 ^= sl + (sl >> 5); + x[0].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[0].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + x[0].u32 ^= (x[0].u32 >> 10) ^ (x[0].u32 >> 20); + + result = x[0].u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i const h0 = _mm_setr_epi32((int)NMH_PRIME32_1, (int)NMH_PRIME32_2, (int)NMH_PRIME32_3, (int)NMH_PRIME32_4); + __m128i const sl = _mm_set1_epi32((int)seed + (int)len); + __m128i const m1 = _mm_set1_epi32((int)__NMH_M1); + __m128i const m2 = _mm_set1_epi32((int)__NMH_M2); + __m128i const m3 = _mm_set1_epi32((int)__NMH_M3); + __m128i x = h0; + __m128i y = sl; + const uint32_t *const px = (const uint32_t*)&x; + + if (type) { + /* 32 to 127 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + i * 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + i * 32 + 16))); + x = _mm_add_epi32(x, y); + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + } + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + len - 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + len - 16))); + } else { + /* 9 to 32 bytes */ + x = _mm_xor_si128(x, _mm_setr_epi32((int)NMH_readLE32(p), (int)NMH_readLE32(p + ((len>>4)<<3)), (int)NMH_readLE32(p + len - 8), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3)))); + y = _mm_xor_si128(y, _mm_setr_epi32((int)NMH_readLE32(p + 4), (int)NMH_readLE32(p + ((len>>4)<<3) + 4), (int)NMH_readLE32(p + len - 8 + 4), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4))); + } + + x = _mm_add_epi32(x, y); + + y = _mm_xor_si128(_mm_xor_si128(y, _mm_slli_epi32(y, 17)), _mm_srli_epi32(y, 6)); + + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + x = _mm_xor_si128(x, h0); + x = _mm_add_epi32(x, _mm_srli_si128(x, 4)); + x = _mm_add_epi32(x, _mm_srli_si128(x, 8)); + + x = _mm_xor_si128(x, _mm_add_epi32(sl, _mm_srli_epi32(sl, 5))); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + result = *px; + } +# endif + return *&result; +} +#define NMHASH32_9to32(p, len, seed) NMHASH32_9to255(p, len, seed, 0) +#define NMHASH32_33to255(p, len, seed) NMHASH32_9to255(p, len, seed, 1) + +#undef __NMH_M1 +#undef __NMH_M2 +#undef __NMH_M3 + +#if NMH_VECTOR == NMH_SCALAR +#define NMHASH32_long_round NMHASH32_long_round_scalar +static inline +void +NMHASH32_long_round_scalar(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + /* breadth first calculation will hint some compiler to auto vectorize the code + * on gcc, the performance becomes 10x than the depth first, and about 80% of the manually vectorized code + */ + const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT); + size_t i; + + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= NMH_readLE32(p + i * 4); + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= NMH_readLE32(p + i * 4 + sizeof(NMH_ACC_INIT)); + } + for (i = 0; i < nbGroups; ++i) { + accX[i] += accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= accX[i] >> 1; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 5 ^ accX[i] >> 13; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M2_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 11 ^ accX[i] >> 9; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M3_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] >> 10 ^ accX[i] >> 20; + } +} +#endif + +#if NMH_VECTOR == NMH_SSE2 +# define _NMH_MM_(F) _mm_ ## F +# define _NMH_MMW_(F) _mm_ ## F ## 128 +# define _NMH_MM_T __m128i +#elif NMH_VECTOR == NMH_AVX2 +# define _NMH_MM_(F) _mm256_ ## F +# define _NMH_MMW_(F) _mm256_ ## F ## 256 +# define _NMH_MM_T __m256i +#elif NMH_VECTOR == NMH_AVX512 +# define _NMH_MM_(F) _mm512_ ## F +# define _NMH_MMW_(F) _mm512_ ## F ## 512 +# define _NMH_MM_T __m512i +#endif + +#if NMH_VECTOR == NMH_SSE2 || NMH_VECTOR == NMH_AVX2 || NMH_VECTOR == NMH_AVX512 +# define NMHASH32_long_round NMHASH32_long_round_sse +# define NMH_VECTOR_NB_GROUP (sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT) / (sizeof(_NMH_MM_T) / sizeof(*NMH_ACC_INIT))) +static inline +void +NMHASH32_long_round_sse(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + const _NMH_MM_T *const NMH_RESTRICT m1 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M1_V; + const _NMH_MM_T *const NMH_RESTRICT m2 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M2_V; + const _NMH_MM_T *const NMH_RESTRICT m3 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M3_V; + _NMH_MM_T *const xaccX = ( _NMH_MM_T * )accX; + _NMH_MM_T *const xaccY = ( _NMH_MM_T * )accY; + _NMH_MM_T *const xp = ( _NMH_MM_T * )p; + size_t i; + + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], _NMH_MMW_(loadu_si)(xp + i)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MMW_(loadu_si)(xp + i + NMH_VECTOR_NB_GROUP)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(add_epi32)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MM_(srli_epi32)(xaccX[i], 1)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m1); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 5)), _NMH_MM_(srli_epi32)(xaccX[i], 13)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m2); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 11)), _NMH_MM_(srli_epi32)(xaccX[i], 9)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m3); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(srli_epi32)(xaccX[i], 10)), _NMH_MM_(srli_epi32)(xaccX[i], 20)); + } +} +# undef _NMH_MM_ +# undef _NMH_MMW_ +# undef _NMH_MM_T +# undef NMH_VECTOR_NB_GROUP +#endif + +static +uint32_t +NMHASH32_long(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)]; + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accY[sizeof(accX)/sizeof(*accX)]; + size_t const nbRounds = (len - 1) / (sizeof(accX) + sizeof(accY)); + size_t i; + uint32_t sum = 0; + + /* init */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] = NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accY)/sizeof(*accY); ++i) accY[i] = seed; + + for (i = 0; i < nbRounds; ++i) { + NMHASH32_long_round(accX, accY, p + i * (sizeof(accX) + sizeof(accY))); + } + NMHASH32_long_round(accX, accY, p + len - (sizeof(accX) + sizeof(accY))); + + /* merge acc */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] ^= NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) sum += accX[i]; + +# if SIZE_MAX > UINT32_C(-1) + sum += (uint32_t)(len >> 32); +# endif + return sum ^ (uint32_t)len; +} + +static inline +uint32_t +NMHASH32_avalanche32(uint32_t const x) +{ + /* [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 */ + const uint32_t m1 = UINT32_C(0xCCE5196D); + const uint32_t m2 = UINT32_C(0x464BE229); + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 8) ^ (vx.u32 >> 21); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m1); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m1 >> 16)); + vx.u32 ^= (vx.u32 << 12) ^ (vx.u32 >> 7); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m2); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m2 >> 16)); + return vx.u32 ^ (vx.u32 >> 8) ^ (vx.u32 >> 21); +} + +static inline +uint32_t +NMHASH32(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 32)) { + if(NMH_likely(len > 8)) { + return NMHASH32_9to32(p, len, seed); + } + if(NMH_likely(len > 4)) { + uint32_t x = NMH_readLE32(p); + uint32_t y = NMH_readLE32(p + len - 4) ^ (NMH_PRIME32_4 + 2 + seed); + x += y; + x ^= x << (len + 7); + return NMHASH32_0to8(x, NMH_rotl32(y, 5)); + } else { + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_3; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32_0to8(data.u32 + seed, NMH_rotl32(seed, 5)); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32_33to255(p, len, seed); + } + return NMHASH32_avalanche32(NMHASH32_long(p, len, seed)); +} + +static inline +uint32_t +NMHASH32X_0to4(uint32_t x, uint32_t const seed) +{ + /* [bdab1ea9 18 a7896a1b 12 83796a2d 16] = 0.092922873297662509 */ + x ^= seed; + x *= UINT32_C(0xBDAB1EA9); + x += NMH_rotl32(seed, 31); + x ^= x >> 18; + x *= UINT32_C(0xA7896A1B); + x ^= x >> 12; + x *= UINT32_C(0x83796A2D); + x ^= x >> 16; + return x; +} + +static inline +uint32_t +NMHASH32X_5to8(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - 5 to 9 bytes + * - mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 */ + + uint32_t x = NMH_readLE32(p) ^ NMH_PRIME32_3; + uint32_t const y = NMH_readLE32(p + len - 4) ^ seed; + x += y; + x ^= x >> len; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + return x; +} + +static inline +uint32_t +NMHASH32X_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - at least 9 bytes + * - base mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + * - tail mixer: [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322 + */ + + uint32_t x = NMH_PRIME32_3; + uint32_t y = seed; + uint32_t a = NMH_PRIME32_4; + uint32_t b = seed; + size_t i, r = (len - 1) / 16; + + for (i = 0; i < r; ++i) { + x ^= NMH_readLE32(p + i * 16 + 0); + y ^= NMH_readLE32(p + i * 16 + 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + y = NMH_rotl32(y, 4); + x ^= y; + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + + a ^= NMH_readLE32(p + i * 16 + 8); + b ^= NMH_readLE32(p + i * 16 + 12); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + b = NMH_rotl32(b, 3); + a ^= b; + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + a ^= a >> 12; + } + + if (NMH_likely(((uint8_t)len-1) & 8)) { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16 + 0); + b ^= NMH_readLE32(p + r * 16 + 4); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + a ^= NMH_rotl32(b, 4); + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + } else { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + + x ^= NMH_readLE32(p + len - 8); + y ^= NMH_readLE32(p + len - 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + } else { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + x ^= NMH_readLE32(p + len - 4) + y; + x ^= x >> 16; + x *= UINT32_C(0xA52FB2CD); + x ^= x >> 15; + x *= UINT32_C(0x551E4D49); + } + + x ^= (uint32_t)len; + x ^= NMH_rotl32(a, 27); /* rotate one lane to pass Diff test */ + x ^= x >> 14; + x *= UINT32_C(0x141CC535); + + return x; +} + +static inline +uint32_t +NMHASH32X_avalanche32(uint32_t x) +{ + /* mixer with 2 mul from skeeto/hash-prospector: + * [15 d168aaad 15 af723597 15] = 0.15983776156606694 + */ + x ^= x >> 15; + x *= UINT32_C(0xD168AAAD); + x ^= x >> 15; + x *= UINT32_C(0xAF723597); + x ^= x >> 15; + return x; +} + +/* use 32*32->32 multiplication for short hash */ +static inline +uint32_t +NMHASH32X(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 8)) { + if (NMH_likely(len > 4)) { + return NMHASH32X_5to8(p, len, seed); + } else { + /* 0-4 bytes */ + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_1; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32X_0to4(data.u32, seed); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32X_9to255(p, len, seed); + } + return NMHASH32X_avalanche32(NMHASH32_long(p, len, seed)); +} + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(pop) +#endif +#ifdef __SDCC +# pragma restore +# undef const +#endif + +#endif /* _nmhash_h_ */ + +#ifdef __cplusplus +} +#endif diff --git a/src/tests/hash_functions/validation/nmhash_scalar.c b/src/tests/hash_functions/validation/nmhash_scalar.c new file mode 100755 index 000000000..051a65d5f --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash_scalar.c @@ -0,0 +1,8 @@ +#include "nmhash_scalar.h" +int32_t nmhash32_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32 (key, (const size_t) len, seed); +} + +int32_t nmhash32x_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32X (key, (const size_t) len, seed); +} diff --git a/src/tests/hash_functions/validation/nmhash_scalar.h b/src/tests/hash_functions/validation/nmhash_scalar.h new file mode 100755 index 000000000..bee950670 --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash_scalar.h @@ -0,0 +1,824 @@ +/* + * verification: + * NMHASH32: + * rurban/smhasher: 0x12A30553 + * demerphq/smhasher: 0x3D8F6C47 + * NMHASH32X: + * rurban/smhasher: 0xA8580227 + * demerphq/smhasher: 0x40B451B3 + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _nmhash_h_ +#define _nmhash_h_ + +#define NMH_VERSION 2 + +#ifdef _MSC_VER +# pragma warning(push, 3) +#endif + +#if defined(__cplusplus) && __cplusplus < 201103L +# define __STDC_CONSTANT_MACROS 1 +#endif + +#include +#include + +#if defined(__GNUC__) +# if defined(__AVX2__) +# include +# elif defined(__SSE2__) +# include +# endif +#elif defined(_MSC_VER) +# include +#endif + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ + || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ + || defined(__clang__) +# define NMH_likely(x) __builtin_expect(x, 1) +#else +# define NMH_likely(x) (x) +#endif + +#if defined(__has_builtin) +# if __has_builtin(__builtin_rotateleft32) +# define NMH_rotl32 __builtin_rotateleft32 /* clang */ +# endif +#endif +#if !defined(NMH_rotl32) +# if defined(_MSC_VER) + /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +# define NMH_rotl32(x,r) _rotl(x,r) +# else +# define NMH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# endif +#endif + +#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +# define NMH_RESTRICT /* disable */ +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +# define NMH_RESTRICT restrict +#elif defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)) +# define NMH_RESTRICT __restrict__ +#elif defined(__cplusplus) && defined(_MSC_VER) +# define NMH_RESTRICT __restrict +#else +# define NMH_RESTRICT /* disable */ +#endif + +/* endian macros */ +#ifndef NMHASH_LITTLE_ENDIAN +# if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || defined(__x86_64__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__SDCC) +# define NMHASH_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define NMHASH_LITTLE_ENDIAN 0 +# else +# warning could not determine endianness! Falling back to little endian. +# define NMHASH_LITTLE_ENDIAN 1 +# endif +#endif + +/* vector macros */ +#define NMH_SCALAR 0 +#define NMH_SSE2 1 +#define NMH_AVX2 2 +#define NMH_AVX512 3 + +#ifndef NMH_VECTOR /* can be defined on command line */ +# define NMH_VECTOR NMH_SCALAR +#endif + +/* align macros */ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ +# include +# define NMH_ALIGN(n) alignas(n) +#elif defined(__GNUC__) +# define NMH_ALIGN(n) __attribute__ ((aligned(n))) +#elif defined(_MSC_VER) +# define NMH_ALIGN(n) __declspec(align(n)) +#else +# define NMH_ALIGN(n) /* disabled */ +#endif + +#if NMH_VECTOR > 0 +# define NMH_ACC_ALIGN 64 +#elif defined(__BIGGEST_ALIGNMENT__) +# define NMH_ACC_ALIGN __BIGGEST_ALIGNMENT__ +#elif defined(__SDCC) +# define NMH_ACC_ALIGN 1 +#else +# define NMH_ACC_ALIGN 16 +#endif + +/* constants */ + +/* primes from xxh */ +#define NMH_PRIME32_1 UINT32_C(0x9E3779B1) +#define NMH_PRIME32_2 UINT32_C(0x85EBCA77) +#define NMH_PRIME32_3 UINT32_C(0xC2B2AE3D) +#define NMH_PRIME32_4 UINT32_C(0x27D4EB2F) + +/*! Pseudorandom secret taken directly from FARSH. */ +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t NMH_ACC_INIT[32] = { + UINT32_C(0xB8FE6C39), UINT32_C(0x23A44BBE), UINT32_C(0x7C01812C), UINT32_C(0xF721AD1C), + UINT32_C(0xDED46DE9), UINT32_C(0x839097DB), UINT32_C(0x7240A4A4), UINT32_C(0xB7B3671F), + UINT32_C(0xCB79E64E), UINT32_C(0xCCC0E578), UINT32_C(0x825AD07D), UINT32_C(0xCCFF7221), + UINT32_C(0xB8084674), UINT32_C(0xF743248E), UINT32_C(0xE03590E6), UINT32_C(0x813A264C), + + UINT32_C(0x3C2852BB), UINT32_C(0x91C300CB), UINT32_C(0x88D0658B), UINT32_C(0x1B532EA3), + UINT32_C(0x71644897), UINT32_C(0xA20DF94E), UINT32_C(0x3819EF46), UINT32_C(0xA9DEACD8), + UINT32_C(0xA8FA763F), UINT32_C(0xE39C343F), UINT32_C(0xF9DCBBC7), UINT32_C(0xC70B4F1D), + UINT32_C(0x8A51E04B), UINT32_C(0xCDB45931), UINT32_C(0xC89F7EC9), UINT32_C(0xD9787364), +}; + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(push) +# pragma warning(disable: 5045) +#endif +#ifdef __SDCC +# define const +# pragma save +# pragma disable_warning 110 +# pragma disable_warning 126 +#endif + +/* read functions */ +static inline +uint32_t +NMH_readLE32(const void *const p) +{ + uint32_t v; + memcpy(&v, p, 4); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + return __builtin_bswap32(v); +# elif defined(_MSC_VER) + return _byteswap_ulong(v); +# else + return ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); +# endif +} + +static inline +uint16_t +NMH_readLE16(const void *const p) +{ + uint16_t v; + memcpy(&v, p, 2); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# else + return (uint16_t)((v << 8) | (v >> 8)); +# endif +} + +static inline +uint32_t +NMHASH32_0to8(uint32_t const x, uint32_t const seed2) +{ + /* base mixer: [-6 -12 776bf593 -19 11 3fb39c65 -15 -9 e9139917 -11 16] = 0.027071104091278835 */ + const uint32_t m1 = UINT32_C(0x776BF593); + const uint32_t m2 = UINT32_C(0x3FB39C65); + const uint32_t m3 = UINT32_C(0xE9139917); + +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 12) ^ (vx.u32 >> 6); + vx.u16[0] *= (uint16_t)m1; + vx.u16[1] *= (uint16_t)(m1 >> 16); + vx.u32 ^= (vx.u32 << 11) ^ ( vx.u32 >> 19); + vx.u16[0] *= (uint16_t)m2; + vx.u16[1] *= (uint16_t)(m2 >> 16); + vx.u32 ^= seed2; + vx.u32 ^= (vx.u32 >> 15) ^ ( vx.u32 >> 9); + vx.u16[0] *= (uint16_t)m3; + vx.u16[1] *= (uint16_t)(m3 >> 16); + vx.u32 ^= (vx.u32 << 16) ^ ( vx.u32 >> 11); + return vx.u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i hv = _mm_setr_epi32((int)x, 0, 0, 0); + const __m128i sv = _mm_setr_epi32((int)seed2, 0, 0, 0); + const uint32_t *const result = (const uint32_t*)&hv; + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 12)), _mm_srli_epi32(hv, 6)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m1, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 11)), _mm_srli_epi32(hv, 19)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m2, 0, 0, 0)); + + hv = _mm_xor_si128(hv, sv); + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 15)), _mm_srli_epi32(hv, 9)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m3, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 16)), _mm_srli_epi32(hv, 11)); + + return *result; + } +# endif +} + +#define __NMH_M1 UINT32_C(0xF0D9649B) +#define __NMH_M2 UINT32_C(0x29A7935D) +#define __NMH_M3 UINT32_C(0x55D35831) + +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M1_V[32] = { + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M2_V[32] = { + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M3_V[32] = { + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, +}; + +static inline +uint32_t +NMHASH32_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed, int const type) +{ + /* base mixer: [f0d9649b 5 -13 29a7935d -9 11 55d35831 -20 -10 ] = 0.93495901789135362 */ + uint32_t result = 0; +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } x[4], y[4]; + uint32_t const sl = seed + (uint32_t)len; + size_t j; + x[0].u32 = NMH_PRIME32_1; + x[1].u32 = NMH_PRIME32_2; + x[2].u32 = NMH_PRIME32_3; + x[3].u32 = NMH_PRIME32_4; + for (j = 0; j < 4; ++j) y[j].u32 = sl; + + if (type) { + /* 33 to 255 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4 + 16); + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + len - 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + len - 16 + j * 4); + } else { + /* 9 to 32 bytes */ + x[0].u32 ^= NMH_readLE32(p); + x[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3)); + x[2].u32 ^= NMH_readLE32(p + len - 8); + x[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3)); + y[0].u32 ^= NMH_readLE32(p + 4); + y[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3) + 4); + y[2].u32 ^= NMH_readLE32(p + len - 8 + 4); + y[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4); + } + + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + for (j = 0; j < 4; ++j) y[j].u32 ^= (y[j].u32 << 17) ^ (y[j].u32 >> 6); + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + + x[0].u32 ^= NMH_PRIME32_1; + x[1].u32 ^= NMH_PRIME32_2; + x[2].u32 ^= NMH_PRIME32_3; + x[3].u32 ^= NMH_PRIME32_4; + + for (j = 1; j < 4; ++j) x[0].u32 += x[j].u32; + + x[0].u32 ^= sl + (sl >> 5); + x[0].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[0].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + x[0].u32 ^= (x[0].u32 >> 10) ^ (x[0].u32 >> 20); + + result = x[0].u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i const h0 = _mm_setr_epi32((int)NMH_PRIME32_1, (int)NMH_PRIME32_2, (int)NMH_PRIME32_3, (int)NMH_PRIME32_4); + __m128i const sl = _mm_set1_epi32((int)seed + (int)len); + __m128i const m1 = _mm_set1_epi32((int)__NMH_M1); + __m128i const m2 = _mm_set1_epi32((int)__NMH_M2); + __m128i const m3 = _mm_set1_epi32((int)__NMH_M3); + __m128i x = h0; + __m128i y = sl; + const uint32_t *const px = (const uint32_t*)&x; + + if (type) { + /* 32 to 127 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + i * 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + i * 32 + 16))); + x = _mm_add_epi32(x, y); + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + } + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + len - 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + len - 16))); + } else { + /* 9 to 32 bytes */ + x = _mm_xor_si128(x, _mm_setr_epi32((int)NMH_readLE32(p), (int)NMH_readLE32(p + ((len>>4)<<3)), (int)NMH_readLE32(p + len - 8), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3)))); + y = _mm_xor_si128(y, _mm_setr_epi32((int)NMH_readLE32(p + 4), (int)NMH_readLE32(p + ((len>>4)<<3) + 4), (int)NMH_readLE32(p + len - 8 + 4), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4))); + } + + x = _mm_add_epi32(x, y); + + y = _mm_xor_si128(_mm_xor_si128(y, _mm_slli_epi32(y, 17)), _mm_srli_epi32(y, 6)); + + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + x = _mm_xor_si128(x, h0); + x = _mm_add_epi32(x, _mm_srli_si128(x, 4)); + x = _mm_add_epi32(x, _mm_srli_si128(x, 8)); + + x = _mm_xor_si128(x, _mm_add_epi32(sl, _mm_srli_epi32(sl, 5))); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + result = *px; + } +# endif + return *&result; +} +#define NMHASH32_9to32(p, len, seed) NMHASH32_9to255(p, len, seed, 0) +#define NMHASH32_33to255(p, len, seed) NMHASH32_9to255(p, len, seed, 1) + +#undef __NMH_M1 +#undef __NMH_M2 +#undef __NMH_M3 + +#if NMH_VECTOR == NMH_SCALAR +#define NMHASH32_long_round NMHASH32_long_round_scalar +static inline +void +NMHASH32_long_round_scalar(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + /* breadth first calculation will hint some compiler to auto vectorize the code + * on gcc, the performance becomes 10x than the depth first, and about 80% of the manually vectorized code + */ + const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT); + size_t i; + + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= NMH_readLE32(p + i * 4); + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= NMH_readLE32(p + i * 4 + sizeof(NMH_ACC_INIT)); + } + for (i = 0; i < nbGroups; ++i) { + accX[i] += accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= accX[i] >> 1; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 5 ^ accX[i] >> 13; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M2_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 11 ^ accX[i] >> 9; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M3_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] >> 10 ^ accX[i] >> 20; + } +} +#endif + +#if NMH_VECTOR == NMH_SSE2 +# define _NMH_MM_(F) _mm_ ## F +# define _NMH_MMW_(F) _mm_ ## F ## 128 +# define _NMH_MM_T __m128i +#elif NMH_VECTOR == NMH_AVX2 +# define _NMH_MM_(F) _mm256_ ## F +# define _NMH_MMW_(F) _mm256_ ## F ## 256 +# define _NMH_MM_T __m256i +#elif NMH_VECTOR == NMH_AVX512 +# define _NMH_MM_(F) _mm512_ ## F +# define _NMH_MMW_(F) _mm512_ ## F ## 512 +# define _NMH_MM_T __m512i +#endif + +#if NMH_VECTOR == NMH_SSE2 || NMH_VECTOR == NMH_AVX2 || NMH_VECTOR == NMH_AVX512 +# define NMHASH32_long_round NMHASH32_long_round_sse +# define NMH_VECTOR_NB_GROUP (sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT) / (sizeof(_NMH_MM_T) / sizeof(*NMH_ACC_INIT))) +static inline +void +NMHASH32_long_round_sse(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + const _NMH_MM_T *const NMH_RESTRICT m1 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M1_V; + const _NMH_MM_T *const NMH_RESTRICT m2 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M2_V; + const _NMH_MM_T *const NMH_RESTRICT m3 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M3_V; + _NMH_MM_T *const xaccX = ( _NMH_MM_T * )accX; + _NMH_MM_T *const xaccY = ( _NMH_MM_T * )accY; + _NMH_MM_T *const xp = ( _NMH_MM_T * )p; + size_t i; + + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], _NMH_MMW_(loadu_si)(xp + i)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MMW_(loadu_si)(xp + i + NMH_VECTOR_NB_GROUP)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(add_epi32)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MM_(srli_epi32)(xaccX[i], 1)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m1); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 5)), _NMH_MM_(srli_epi32)(xaccX[i], 13)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m2); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 11)), _NMH_MM_(srli_epi32)(xaccX[i], 9)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m3); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(srli_epi32)(xaccX[i], 10)), _NMH_MM_(srli_epi32)(xaccX[i], 20)); + } +} +# undef _NMH_MM_ +# undef _NMH_MMW_ +# undef _NMH_MM_T +# undef NMH_VECTOR_NB_GROUP +#endif + +static +uint32_t +NMHASH32_long(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)]; + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accY[sizeof(accX)/sizeof(*accX)]; + size_t const nbRounds = (len - 1) / (sizeof(accX) + sizeof(accY)); + size_t i; + uint32_t sum = 0; + + /* init */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] = NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accY)/sizeof(*accY); ++i) accY[i] = seed; + + for (i = 0; i < nbRounds; ++i) { + NMHASH32_long_round(accX, accY, p + i * (sizeof(accX) + sizeof(accY))); + } + NMHASH32_long_round(accX, accY, p + len - (sizeof(accX) + sizeof(accY))); + + /* merge acc */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] ^= NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) sum += accX[i]; + +# if SIZE_MAX > UINT32_C(-1) + sum += (uint32_t)(len >> 32); +# endif + return sum ^ (uint32_t)len; +} + +static inline +uint32_t +NMHASH32_avalanche32(uint32_t const x) +{ + /* [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 */ + const uint32_t m1 = UINT32_C(0xCCE5196D); + const uint32_t m2 = UINT32_C(0x464BE229); + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 8) ^ (vx.u32 >> 21); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m1); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m1 >> 16)); + vx.u32 ^= (vx.u32 << 12) ^ (vx.u32 >> 7); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m2); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m2 >> 16)); + return vx.u32 ^ (vx.u32 >> 8) ^ (vx.u32 >> 21); +} + +static inline +uint32_t +NMHASH32(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 32)) { + if(NMH_likely(len > 8)) { + return NMHASH32_9to32(p, len, seed); + } + if(NMH_likely(len > 4)) { + uint32_t x = NMH_readLE32(p); + uint32_t y = NMH_readLE32(p + len - 4) ^ (NMH_PRIME32_4 + 2 + seed); + x += y; + x ^= x << (len + 7); + return NMHASH32_0to8(x, NMH_rotl32(y, 5)); + } else { + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_3; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32_0to8(data.u32 + seed, NMH_rotl32(seed, 5)); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32_33to255(p, len, seed); + } + return NMHASH32_avalanche32(NMHASH32_long(p, len, seed)); +} + +static inline +uint32_t +NMHASH32X_0to4(uint32_t x, uint32_t const seed) +{ + /* [bdab1ea9 18 a7896a1b 12 83796a2d 16] = 0.092922873297662509 */ + x ^= seed; + x *= UINT32_C(0xBDAB1EA9); + x += NMH_rotl32(seed, 31); + x ^= x >> 18; + x *= UINT32_C(0xA7896A1B); + x ^= x >> 12; + x *= UINT32_C(0x83796A2D); + x ^= x >> 16; + return x; +} + +static inline +uint32_t +NMHASH32X_5to8(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - 5 to 9 bytes + * - mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 */ + + uint32_t x = NMH_readLE32(p) ^ NMH_PRIME32_3; + uint32_t const y = NMH_readLE32(p + len - 4) ^ seed; + x += y; + x ^= x >> len; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + return x; +} + +static inline +uint32_t +NMHASH32X_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - at least 9 bytes + * - base mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + * - tail mixer: [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322 + */ + + uint32_t x = NMH_PRIME32_3; + uint32_t y = seed; + uint32_t a = NMH_PRIME32_4; + uint32_t b = seed; + size_t i, r = (len - 1) / 16; + + for (i = 0; i < r; ++i) { + x ^= NMH_readLE32(p + i * 16 + 0); + y ^= NMH_readLE32(p + i * 16 + 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + y = NMH_rotl32(y, 4); + x ^= y; + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + + a ^= NMH_readLE32(p + i * 16 + 8); + b ^= NMH_readLE32(p + i * 16 + 12); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + b = NMH_rotl32(b, 3); + a ^= b; + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + a ^= a >> 12; + } + + if (NMH_likely(((uint8_t)len-1) & 8)) { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16 + 0); + b ^= NMH_readLE32(p + r * 16 + 4); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + a ^= NMH_rotl32(b, 4); + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + } else { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + + x ^= NMH_readLE32(p + len - 8); + y ^= NMH_readLE32(p + len - 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + } else { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + x ^= NMH_readLE32(p + len - 4) + y; + x ^= x >> 16; + x *= UINT32_C(0xA52FB2CD); + x ^= x >> 15; + x *= UINT32_C(0x551E4D49); + } + + x ^= (uint32_t)len; + x ^= NMH_rotl32(a, 27); /* rotate one lane to pass Diff test */ + x ^= x >> 14; + x *= UINT32_C(0x141CC535); + + return x; +} + +static inline +uint32_t +NMHASH32X_avalanche32(uint32_t x) +{ + /* mixer with 2 mul from skeeto/hash-prospector: + * [15 d168aaad 15 af723597 15] = 0.15983776156606694 + */ + x ^= x >> 15; + x *= UINT32_C(0xD168AAAD); + x ^= x >> 15; + x *= UINT32_C(0xAF723597); + x ^= x >> 15; + return x; +} + +/* use 32*32->32 multiplication for short hash */ +static inline +uint32_t +NMHASH32X(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 8)) { + if (NMH_likely(len > 4)) { + return NMHASH32X_5to8(p, len, seed); + } else { + /* 0-4 bytes */ + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_1; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32X_0to4(data.u32, seed); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32X_9to255(p, len, seed); + } + return NMHASH32X_avalanche32(NMHASH32_long(p, len, seed)); +} + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(pop) +#endif +#ifdef __SDCC +# pragma restore +# undef const +#endif + +#endif /* _nmhash_h_ */ + +#ifdef __cplusplus +} +#endif diff --git a/src/tests/hash_functions/validation/pengyhash.c b/src/tests/hash_functions/validation/pengyhash.c new file mode 100755 index 000000000..d7b1ec02d --- /dev/null +++ b/src/tests/hash_functions/validation/pengyhash.c @@ -0,0 +1,30 @@ +/* pengyhash v0.2 */ + +#include "pengyhash.h" + +uint64_t pengyhash(const void *p, size_t size, uint32_t seed) +{ + uint64_t b[4] = { 0 }; + uint64_t s[4] = { 0, 0, 0, size }; + int i; + + for(; size >= 32; size -= 32, p = (const char*)p + 32) { + memcpy(b, p, 32); + + s[1] = (s[0] += s[1] + b[3]) + (s[1] << 14 | s[1] >> 50); + s[3] = (s[2] += s[3] + b[2]) + (s[3] << 23 | s[3] >> 41); + s[3] = (s[0] += s[3] + b[1]) ^ (s[3] << 16 | s[3] >> 48); + s[1] = (s[2] += s[1] + b[0]) ^ (s[1] << 40 | s[1] >> 24); + } + + memcpy(b, p, size); + + for(i = 0; i < 6; i++) { + s[1] = (s[0] += s[1] + b[3]) + (s[1] << 14 | s[1] >> 50) + seed; + s[3] = (s[2] += s[3] + b[2]) + (s[3] << 23 | s[3] >> 41); + s[3] = (s[0] += s[3] + b[1]) ^ (s[3] << 16 | s[3] >> 48); + s[1] = (s[2] += s[1] + b[0]) ^ (s[1] << 40 | s[1] >> 24); + } + + return s[0] + s[1] + s[2] + s[3]; +} diff --git a/src/tests/hash_functions/validation/pengyhash.h b/src/tests/hash_functions/validation/pengyhash.h new file mode 100755 index 000000000..b9ff7010c --- /dev/null +++ b/src/tests/hash_functions/validation/pengyhash.h @@ -0,0 +1,9 @@ +#ifndef _PENGYHASH_H +#define _PENGYHASH_H + +#include +#include + +uint64_t pengyhash(const void *p, size_t size, uint32_t seed); + +#endif diff --git a/src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 b/src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 new file mode 100755 index 000000000..12b113a65 --- /dev/null +++ b/src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 @@ -0,0 +1,86 @@ +program test_32_bit_hash_validation +!! Compares the output of Fortran versions of 64 bit hash procedures +!! withe the original C/C++ versions + + use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 + use, intrinsic :: iso_c_binding, only : c_loc, c_long + use stdlib_32_bit_hash_functions, only: & + nmhash32, & + new_nmhash32_seed, & + nmhash32x, & + new_nmhash32x_seed, & + water_hash, & + new_water_hash_seed + use nmhash_wrapper, only: c_nmhash32, c_nmhash32x + use waterhash_wrapper, only: c_waterhash + + implicit none + + integer(int32) :: nmhash32_code, c_nmhash32_code, & + nmhash32x_code, c_nmhash32x_code, water_hash_code, c_waterhash_code + integer(int32) :: nmhash32_seed, nmhash32x_seed + integer(int64) :: waterhash_seed + integer(int8) :: test_array(512) + real(real64) :: rand(128) + integer(int32) :: dummy(128) + integer :: i + +! Create test array + call random_number( rand ) + do i=1, 128 + dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) + end do + test_array = transfer( dummy, 0_int8, 512 ) + + waterhash_seed = 0 + call new_water_hash_seed( waterhash_seed ) + + do i=0, 512 + water_hash_code = water_hash( test_array(1:i), waterhash_seed ) + c_waterhash_code = c_waterhash( test_array(1:i), waterhash_seed ) + if ( .not. ( water_hash_code == c_waterhash_code ) ) then + write(*,*) "WATER_HASH failed for INT8 array size = ", i + write(*,*) "WATERHASH_SEED = ", waterhash_seed + write(*,*) 'WATER_HASH_CODE = ', water_hash_code + write(*,*) 'C_WATERHASH_CODE = ', c_waterhash_code + write(*,*) "Array = ", test_array(1:i) + stop "Hash failure" + end if + end do + write(*,*) "WATER_HASH passed validation test." + + nmhash32_seed = 0 +! call new_nmhash32_seed( nmhash32_seed ) + + do i=0, 512 + nmhash32_code = nmhash32( test_array(1:i), nmhash32_seed ) + c_nmhash32_code = c_nmhash32( test_array(1:i), nmhash32_seed ) + if ( .not. ( nmhash32_code == c_nmhash32_code ) ) then + write(*,*) "NMHASH32 failed for INT8 array size = ", i + write(*,*) "NMHASH32_SEED = ", nmhash32_seed + write(*,*) 'NMHASH32_CODE = ', nmhash32_code + write(*,*) 'C_NMHASH32_CODE = ', c_nmhash32_code + write(*,*) "Array = ", test_array(1:i) + stop "Hash failure" + end if + end do + write(*,*) "NMHASH32 passed validation test." + + nmhash32x_seed = 0 +! call new_nmhash32x_seed( nmhash32x_seed ) + + do i=0, 512 + nmhash32x_code = nmhash32x( test_array(1:i), nmhash32_seed ) + c_nmhash32x_code = c_nmhash32x( test_array(1:i), nmhash32_seed ) + if ( .not. ( nmhash32x_code == c_nmhash32x_code ) ) then + write(*,*) "NMHASH32X failed for INT8 array size = ", i + write(*,*) "NMHASH32X_SEED = ", nmhash32x_seed + write(*,*) 'NMHASH32X_CODE = ', nmhash32x_code + write(*,*) 'C_NMHASH32X_CODE = ', c_nmhash32x_code + write(*,*) "Array = ", test_array(1:i) + stop "Hash failure" + end if + end do + write(*,*) "NMHASH32X passed validation test." + +end program test_32_bit_hash_validation diff --git a/src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 b/src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 new file mode 100755 index 000000000..e9ae286bf --- /dev/null +++ b/src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 @@ -0,0 +1,64 @@ +program test_64_bit_hash_validation +!! Compares the output of Fortran versions of 64 bit hash procedures +!! withe the original C/C++ versions + + use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 + use, intrinsic :: iso_c_binding, only : c_loc, c_long + use stdlib_64_bit_hash_functions, only: & + pengy_hash, & + new_pengy_hash_seed, & + spooky_hash, & + new_spooky_hash_seed + use pengy_wrapper, only: c_pengyhash + use spookyv2_wrapper, only: c_spooky128 + + implicit none + + integer(int64) :: pengy_hash_code, c_pengy_hash_code, & + spooky_seed(2), spooky_hash_code(2), c_spooky_hash_code(2) + integer(int32) :: pengy_seed + integer(int8) :: test_array(512) + real(real64) :: rand(128) + integer(int32) :: dummy(128) + integer :: i + +! Create test array + call random_number( rand ) + do i=1, 128 + dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) + end do + test_array = transfer( dummy, 0_int8, 512 ) + + pengy_seed = 0_int64 + call new_pengy_hash_seed( pengy_seed ) + + do i=0, 512 + pengy_hash_code = pengy_hash( test_array(1:i), pengy_seed ) + c_pengy_hash_code = c_pengyhash( test_array(1:i), pengy_seed ) + if ( .not. ( pengy_hash_code == c_pengy_hash_code ) ) then + write(*,*) "PENGY_HASH failed for INT8 array size = ", i + write(*,*) 'PENGY_HASH_CODE = ', pengy_hash_code + write(*,*) 'C_PENGY_HASH_CODE = ', c_pengy_hash_code + write(*,*) "Array = ", test_array(1:i) + stop "Hash failure" + end if + end do + write(*,*) "PENGY_HASH passed validation test." + + spooky_seed = [ 0_int64, 0_int64 ] + call new_spooky_hash_seed( spooky_seed ) + + do i=0, 512 + spooky_hash_code = spooky_hash( test_array(1:i), spooky_seed ) + c_spooky_hash_code = c_spooky128( test_array(1:i), spooky_seed ) + if ( .not. all( spooky_hash_code == c_spooky_hash_code ) ) then + write(*,*) "SPOOKY_HASH failed for INT8 array size = ", i + write(*,*) 'SPOOKY_HASH_CODE = ', spooky_hash_code + write(*,*) 'C_SPOOKY_HASH_CODE = ', c_spooky_hash_code + write(*,*) "Array = ", test_array(1:i) + stop "Hash failure" + end if + end do + write(*,*) "SPOOKY_HASH passed validation test." + +end program test_64_bit_hash_validation diff --git a/src/tests/hash_functions/validation/waterhash.c b/src/tests/hash_functions/validation/waterhash.c new file mode 100755 index 000000000..7d6c92d99 --- /dev/null +++ b/src/tests/hash_functions/validation/waterhash.c @@ -0,0 +1,6 @@ +#include "waterhash.h" + +int32_t waterhash_test ( const void * key, uint32_t len, uint64_t seed ) { + return waterhash (key, len, seed); +} + diff --git a/src/tests/hash_functions/validation/waterhash.h b/src/tests/hash_functions/validation/waterhash.h new file mode 100755 index 000000000..d05dc1269 --- /dev/null +++ b/src/tests/hash_functions/validation/waterhash.h @@ -0,0 +1,54 @@ +/* + Waterhash takes (optimally) 32-bit inputs and produces a 32-bit hash as its result. + It is an edited version of wyhash that uses at most 64-bit math instead of 128-bit. + It is meant to use very similar code to Wheathash, which produces a 64-bit hash. + Original Author: Wang Yi + Waterhash Variant Author: Tommy Ettinger +*/ +#ifndef waterhash_version_3 +#define waterhash_version_3 +#include +#include +#include +const uint64_t _waterp0 = 0xa0761d65ull, _waterp1 = 0xe7037ed1ull, _waterp2 = 0x8ebc6af1ull; +const uint64_t _waterp3 = 0x589965cdull, _waterp4 = 0x1d8e4e27ull, _waterp5 = 0xeb44accbull; + +static inline uint64_t _watermum(const uint64_t A, const uint64_t B) { + uint64_t r = A * B; + return r - (r >> 32); +} + +static inline uint64_t _waterr08(const uint8_t *p){ uint8_t v; memcpy(&v, p, 1); return v; } +static inline uint64_t _waterr16(const uint8_t *p){ uint16_t v; memcpy(&v, p, 2); return v; } +static inline uint64_t _waterr32(const uint8_t *p){ uint32_t v; memcpy(&v, p, 4); return v; } +static inline uint32_t waterhash(const void* key, uint32_t len, uint64_t seed){ + const uint8_t *p = (const uint8_t*)key; + uint32_t i; + for (i = 0; i + 16 <= len; i += 16, p += 16) { + seed = _watermum( + _watermum(_waterr32(p) ^ _waterp1, _waterr32(p + 4) ^ _waterp2) + seed, + _watermum(_waterr32(p + 8) ^ _waterp3, _waterr32(p + 12) ^ _waterp4)); + } + seed += _waterp5; + switch (len & 15) { + case 1: seed = _watermum(_waterp2 ^ seed, _waterr08(p) ^ _waterp1); break; + case 2: seed = _watermum(_waterp3 ^ seed, _waterr16(p) ^ _waterp4); break; + case 3: seed = _watermum(_waterr16(p) ^ seed, _waterr08(p + 2) ^ _waterp2); break; + case 4: seed = _watermum(_waterr16(p) ^ seed, _waterr16(p + 2) ^ _waterp3); break; + case 5: seed = _watermum(_waterr32(p) ^ seed, _waterr08(p + 4) ^ _waterp1); break; + case 6: seed = _watermum(_waterr32(p) ^ seed, _waterr16(p + 4) ^ _waterp1); break; + case 7: seed = _watermum(_waterr32(p) ^ seed, (_waterr16(p + 4) << 8 | _waterr08(p + 6)) ^ _waterp1); break; + case 8: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp0); break; + case 9: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterp4, _waterr08(p + 8) ^ _waterp3); break; + case 10: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed, _waterr16(p + 8) ^ _waterp3); break; + case 11: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed, ((_waterr16(p + 8) << 8) | _waterr08(p + 10)) ^ _waterp3); break; + case 12: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), _waterp4); break; + case 13: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), (_waterr08(p + 12)) ^ _waterp4); break; + case 14: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), (_waterr16(p + 12)) ^ _waterp4); break; + case 15: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), (_waterr16(p + 12) << 8 | _waterr08(p + 14)) ^ _waterp4); break; + } + seed = (seed ^ seed << 16) * (len ^ _waterp0); + return (uint32_t)(seed - (seed >> 32)); +} +#endif + From 8cd2f2e45ddf51b4fb225f4b9632159ce92605ec Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 14:46:11 -0700 Subject: [PATCH 002/106] Added hash function codes Brought over codes from the original hash_functions branch. [ticket: X] --- src/stdlib_32_bit_fnv_hashes.fypp | 126 ++++ src/stdlib_32_bit_hash_functions.fypp | 244 ++++++++ src/stdlib_32_bit_nmhashes.fypp | 796 +++++++++++++++++++++++++ src/stdlib_32_bit_water_hashes.fypp | 282 +++++++++ src/stdlib_64_bit_fnv_hashes.fypp | 125 ++++ src/stdlib_64_bit_hash_functions.fypp | 308 ++++++++++ src/stdlib_64_bit_pengy_hashes.fypp | 148 +++++ src/stdlib_64_bit_spookyv2_hashes.fypp | 718 ++++++++++++++++++++++ 8 files changed, 2747 insertions(+) create mode 100755 src/stdlib_32_bit_fnv_hashes.fypp create mode 100755 src/stdlib_32_bit_hash_functions.fypp create mode 100755 src/stdlib_32_bit_nmhashes.fypp create mode 100755 src/stdlib_32_bit_water_hashes.fypp create mode 100755 src/stdlib_64_bit_fnv_hashes.fypp create mode 100755 src/stdlib_64_bit_hash_functions.fypp create mode 100755 src/stdlib_64_bit_pengy_hashes.fypp create mode 100755 src/stdlib_64_bit_spookyv2_hashes.fypp diff --git a/src/stdlib_32_bit_fnv_hashes.fypp b/src/stdlib_32_bit_fnv_hashes.fypp new file mode 100755 index 000000000..562de2978 --- /dev/null +++ b/src/stdlib_32_bit_fnv_hashes.fypp @@ -0,0 +1,126 @@ +!!------------------------------------------------------------------------------ +!! `FNV_1_HASH` and `FNV_1A_Hash` are translations to Fortran 2008 of the +!! `FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, +!! and Phong Vo, that has been released into the public domain. Permission +!! has been granted, by Landon Curt Noll, for the use of these algorithms +!! in the Fortran Standard Library. A description of these functions is +!! available at https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function. +!!------------------------------------------------------------------------------ + +!#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_fnv_hashes +!! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt +!! Noll, and Kiem-Phong-Vo, +!! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function + implicit none + + integer(int_hash), parameter :: & + offset_basis = int( z'811C9DC5', int_hash ), & + prime = int( z'01000193', int_hash ) + +contains + + pure module function int8_fnv_1( key ) result(hash_code) +!! The original FNV-1 8-bit key algorithm. + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + hash_code = hash_code * prime + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + end do + + end function int8_fnv_1 + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) +! A ${k1}$ array key wrapper for the FNV-1 algorithm. + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, 0_int8, & + bytes_${k1}$* & + size( key, kind=int64 ) ) ) + + end function ${k1}$_fnv_1 + +#:endfor + + + pure module function character_fnv_1( key ) result(hash_code) +! A default character key wrapper for the FNV-1 algorithm. + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, & + 0_int8, & + bytes_char* & + len(key, kind=int64) ) ) + + end function character_fnv_1 + + + pure module function int8_fnv_1a( key ) result(hash_code) +!! The original FNV-1a 8-bit key algorithm. + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + hash_code = hash_code * prime + end do + + end function int8_fnv_1a + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_code) +! A ${k1}$ array key wrapper for the FNV-1a algorithm. + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + bytes_${k1}$* & + size(key, kind=int64)) ) + + end function ${k1}$_fnv_1a + +#:endfor + + pure module function character_fnv_1a( key ) result(hash_code) +! A default character key wrapper for the FNV-1 algorithm. + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + (bits_char/bits_int8)* & + len(key, kind=int64) ) ) + + end function character_fnv_1a + +end submodule stdlib_32_bit_fnv_hashes diff --git a/src/stdlib_32_bit_hash_functions.fypp b/src/stdlib_32_bit_hash_functions.fypp new file mode 100755 index 000000000..fcdfc1466 --- /dev/null +++ b/src/stdlib_32_bit_hash_functions.fypp @@ -0,0 +1,244 @@ +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int8", "int16", "int32", "int64"] + +module stdlib_32_bit_hash_functions + + use, intrinsic :: iso_fortran_env, only : & + character_storage_size + + use stdlib_kinds, only: & + dp, & + int8, & + int16, & + int32, & + int64 + + implicit none + + private + + integer, parameter, public :: & + int_hash = int32 +!! The number of bits in the output hash + +! pow32_over_phi is the odd integer that most closely approximates 2**32/phi, +! where phi is the golden ratio 1.618... + integer(int32), parameter :: & + pow32_over_phi = int( z'9E3779B9', int32 ) + +! The number of bits used by each integer type + integer, parameter :: & +! Should be 8 + bits_int8 = bit_size(0_int8), & +! Should be 16 + bits_int16 = bit_size(0_int16), & +! Should be 32 + bits_int32 = bit_size(0_int32), & +! Should be 64 + bits_int64 = bit_size(0_int64) + + integer, parameter :: & +! Should be 8 + bytes_int8 = bits_int8/bits_int8, & +! Should be 16 + bytes_int16 = bits_int16/bits_int8, & +! Should be 32 + bytes_int32 = bits_int32/bits_int8, & +! Should be 64 + bytes_int64 = bits_int64/bits_int8 + + integer, parameter :: & + bits_char = character_storage_size, & + bytes_char = bits_char/bits_int8 + +! Dealing with different endians + logical, parameter, public :: & + little_endian = ( 1 == transfer([1_int8, 0_int8], 0_int16) ) + + public :: & + fibonacci_hash, & + fnv_1_hash, & + fnv_1a_hash, & + new_nmhash32_seed, & + new_nmhash32x_seed, & + new_water_hash_seed,& + nmhash32, & + nmhash32x, & + odd_random_integer, & + universal_mult_hash,& + water_hash + + + interface fnv_1_hash +!! FNV_1 interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + end function ${k1}$_fnv_1 + + #:endfor + + pure module function character_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for default character string keys + character(*), intent(in) :: key + integer(int_hash) :: hash_code + end function character_fnv_1 + + end interface fnv_1_hash + + interface fnv_1a_hash +!! FNV_1A interfaces + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_value) +!! FNV_1A hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_value + end function ${k1}$_fnv_1a + + #:endfor + + pure module function character_fnv_1a( key ) result(hash_value) +!! FNV_1A hash function for default character string keys + character(*), intent(in) :: key + integer(int_hash) :: hash_value + end function character_fnv_1a + + end interface fnv_1a_hash + + interface nmhash32 +!! NMHASH32 interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function ${k1}$_nmhash32 + + #:endfor + + pure module function character_nmhash32( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for default character string keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function character_nmhash32 + + end interface nmhash32 + + interface nmhash32x +!! NMHASH32X interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32x( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function ${k1}$_nmhash32x + + #:endfor + + pure module function character_nmhash32x( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for default character string keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function character_nmhash32x + + end interface nmhash32x + + interface water_hash +!! WATER_HASH interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_water_hash( key, seed ) & + result(hash_code) +!! WATER HASH function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + end function ${k1}$_water_hash + #:endfor + + pure module function character_water_hash( key, seed ) & + result(hash_code) +!! WATER hash function for default character string keys + character(*), intent(in) :: key + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + end function character_water_hash + + end interface water_hash + + interface new_water_hash_seed + + module subroutine new_water_hash_seed( seed ) + integer(int64), intent(inout) :: seed + end subroutine new_water_hash_seed + + end interface new_water_hash_seed + + interface new_nmhash32_seed + + module subroutine new_nmhash32_seed( seed ) + integer(int32), intent(inout) :: seed + end subroutine new_nmhash32_seed + + end interface new_nmhash32_seed + + interface new_nmhash32x_seed + + module subroutine new_nmhash32x_seed( seed ) + integer(int32), intent(inout) :: seed + end subroutine new_nmhash32x_seed + + end interface new_nmhash32x_seed + +contains + + pure function fibonacci_hash( key, nbits ) result( sample ) +!! Maps the 32 bit integer KEY to an unsigned integer value with only NBITS +!! bits where NBITS is less than 32 + integer(int32), intent(in) :: key + integer, intent(in) :: nbits + integer(int32) :: sample + + sample = ishft( key*pow32_over_phi, -32 + nbits ) + + end function fibonacci_hash + + pure function universal_mult_hash( key, seed, nbits ) result( sample ) +!! Uses the "random" odd 32 bit integer SEED to map the 32 bit integer KEY to +!! an unsigned integer value with only NBITS bits where NBITS is less than 32 + integer(int32), intent(in) :: key + integer(int32), intent(in) :: seed + integer, intent(in) :: nbits + integer(int32) :: sample + + sample = ishft( key*seed, -32 + nbits ) + + end function universal_mult_hash + + subroutine odd_random_integer( harvest ) +!! Returns a 32 bit pseudo random integer, HARVEST, distributed uniformly over +!! the odd integers of the INT32 kind. + integer(int32), intent(out) :: harvest + real(dp) :: sample + + call random_number( sample ) + harvest = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + harvest = ishft( harvest, 1 ) + 1_int32 + + end subroutine odd_random_integer + +end module stdlib_32_bit_hash_functions diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp new file mode 100755 index 000000000..2d2c273bb --- /dev/null +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -0,0 +1,796 @@ +!!------------------------------------------------------------------------------ +!! `NM_HASH32` and `NM_HASH32X` are translations to Fortran 2008 and signed +!! two's complement arithmetic of the `nmhash32` and `nmhash32x` scalar +!! algorithms of James Z. M. Gao, copyright 2021. James Z. M. Gao's original +!! C++ code, `nmhash.h`, is available at the URL: +!! https://github.com/gzm55/hash-garage/blob/a8913138bdb3b7539c202edee30a7f0794bbd835/nmhash.h +!! under the BSD 2-Clause License: +!! https://github.com/gzm55/hash-garage/blob/a8913138bdb3b7539c202edee30a7f0794bbd835/LICENSE +!! The algorithms come in multiple versions, depending on whether the +!! vectorized instructions SSE2 or AVX2 are available. As neither instruction +!! is available in portable Fortran 2008, the algorithms that do not use these +!! instructions are used. +!! +!! The BSD 2-Clause license is as follows: +!! +!! BSD 2-Clause License +!! +!! Copyright (c) 2021, water hash algorithm. James Z.M. Gao +!! All rights reserved. +!! +!! Redistribution and use in source and binary forms, with or without +!! modification, are permitted provided that the following conditions are met: +!! +!! 1. Redistributions of source code must retain the above copyright notice, +!! this list of conditions and the following disclaimer. +!! +!! 2. Redistributions in binary form must reproduce the above copyright notice, +!! this list of conditions and the following disclaimer in the documentation +!! and/or other materials provided with the distribution. +!! +!! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +!! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +!! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +!! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!! POSSIBILITY OF SUCH DAMAGE. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_nmhashes + + implicit none + +! Primes from XXH + integer(int32), parameter :: nmh_prime32_1 = int( Z'9E3779B1', int32 ) + integer(int32), parameter :: nmh_prime32_2 = int( Z'85EBCA77', int32 ) + integer(int32), parameter :: nmh_prime32_3 = int( Z'C2B2AE3D', int32 ) + integer(int32), parameter :: nmh_prime32_4 = int( Z'27D4EB2F', int32 ) + + integer(int32), parameter :: nmh_m1 = int(z'F0D9649B', int32 ) + integer(int32), parameter :: nmh_m2 = int(z'29A7935D', int32 ) + integer(int32), parameter :: nmh_m3 = int(z'55D35831', int32 ) + + integer(int32), parameter :: nmh_m1_v(0:31) = nmh_m1 + integer(int32), parameter :: nmh_m2_v(0:31) = nmh_m2 + integer(int32), parameter :: nmh_m3_v(0:31) = nmh_m3 + + integer(int16), parameter :: nmh_m3_16(2) = transfer( nmh_m3, 0_int16, 2 ) + + logical, parameter :: nmh_short32_without_seed2=.false. + logical, parameter :: nmh_short32_with_seed2=.true. + + integer, parameter :: init_size = 32 + +! Pseudorandom secrets taken directly from FARSH. + integer(int32), parameter :: nmh_acc_init(0:init_size-1) = [ & + int( z'B8FE6C39', int32 ), int( z'23A44BBE', int32 ), & + int( z'7C01812C', int32 ), int( z'F721AD1C', int32 ), & + int( z'DED46DE9', int32 ), int( z'839097DB', int32 ), & + int( z'7240A4A4', int32 ), int( z'B7B3671F', int32 ), & + int( z'CB79E64E', int32 ), int( z'CCC0E578', int32 ), & + int( z'825AD07D', int32 ), int( z'CCFF7221', int32 ), & + int( z'B8084674', int32 ), int( z'F743248E', int32 ), & + int( z'E03590E6', int32 ), int( z'813A264C', int32 ), & + + int( z'3C2852BB', int32 ), int( z'91C300CB', int32 ), & + int( z'88D0658B', int32 ), int( z'1B532EA3', int32 ), & + int( z'71644897', int32 ), int( z'A20DF94E', int32 ), & + int( z'3819EF46', int32 ), int( z'A9DEACD8', int32 ), & + int( z'A8FA763F', int32 ), int( z'E39C343F', int32 ), & + int( z'F9DCBBC7', int32 ), int( z'C70B4F1D', int32 ), & + int( z'8A51E04B', int32 ), int( z'CDB45931', int32 ), & + int( z'C89F7EC9', int32 ), int( z'D9787364', int32 ) ] + +contains + + pure function nmh_readle32( p ) result( v ) + integer(int32) :: v + integer(int8), intent(in) :: p(:) + + if ( little_endian ) then + v = transfer( p(1:4), 0_int32 ) + else + v = transfer( [ p(4), p(3), p(2), p(1) ], 0_int32 ) + end if + + end function nmh_readle32 + + pure function nmh_readle16( p ) result( v ) + integer(int16) :: v + integer(int8), intent(in) :: p(:) + + if ( little_endian ) then + v = transfer( p(1:2), 0_int16 ) + else + v = transfer( [ p(2), p(1) ], 0_int16 ) + end if + + end function nmh_readle16 + + pure function nmhash32_0to8( x, seed ) result( vx32 ) + integer(int32), intent(in) :: x + integer(int32), intent(in) :: seed + integer(int32) :: vx32 + ! base mixer: [-6 -12 776bf593 -19 11 3fb39c65 -15 -9 e9139917 -11 16] + ! = 0.027071104091278835 + integer(int32), parameter :: m1 = int(z'776BF593', int32) + integer(int32), parameter :: m2 = int(z'3FB39C65', int32) + integer(int32), parameter :: m3 = int(z'E9139917', int32) + + integer(int16) :: vx16(2) + integer(int16), parameter :: & + m116(2) = transfer( m1, 0_int16, 2 ), & + m216(2) = transfer( m2, 0_int16, 2 ), & + m316(2) = transfer( m3, 0_int16, 2 ) + + vx32 = x + vx32 = ieor( vx32, ieor( ishft( vx32, -12 ), ishft( vx32, -6 ) ) ) + vx16 = transfer( vx32, 0_int16, 2 ) + vx16 = vx16 * m116 + vx32 = transfer( vx16, 0_int32 ) + vx32 = ieor( vx32, ieor( ishft( vx32, 11 ), ishft( vx32, -19 ) ) ) + vx16 = transfer( vx32, 0_int16, 2 ) + vx16 = vx16 * m216 + vx32 = transfer( vx16, 0_int32 ) + vx32 = ieor( vx32, seed ) + vx32 = ieor( vx32, ieor( ishft( vx32, -15 ), ishft( vx32, -9 ) ) ) + vx16 = transfer( vx32, 0_int16, 2 ) + vx16 = vx16 * m316 + vx32 = transfer( vx16, 0_int32 ) + vx32 = ieor( vx32, ieor( ishft(vx32, 16), ishft(vx32, -11) ) ) + + end function nmhash32_0to8 + + pure function nmhash32_9to255( p, seed, full_avalanche ) result( result ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + logical, intent(in) :: full_avalanche + integer(int32) :: result + + integer(int32) :: xu32(0:3), yu32(0:3) + integer(int16) :: xu16(0:1) + integer(int16), parameter :: & + nmh_m1_16(0:1) = transfer( nmh_m1, 0_int16, 2 ), & + nmh_m2_16(0:1) = transfer( nmh_m2, 0_int16, 2 ), & + nmh_m3_16(0:1) = transfer( nmh_m3, 0_int16, 2 ) + integer(int32) :: s1 + integer(int64) :: length + integer(int32) :: length32(0:1) + integer(int64) :: i, j, r + + ! base mixer: [f0d9649b 5 -13 29a7935d -9 11 55d35831 -20 -10 ] = + ! 0.93495901789135362 + + result = 0 + length = size( p, kind=int64 ) + length32 = transfer(length, 0_int32, 2) + if (little_endian) then + s1 = seed + length32(0) + else + s1 = seed + length32(1) + end if + xu32(0) = nmh_prime32_1 + xu32(1) = nmh_prime32_2 + xu32(2) = nmh_prime32_3 + xu32(3) = nmh_prime32_4 + yu32(:) = s1 + + if (full_avalanche) then + ! 33 to 255 bytes + r = (length - 1 ) /32 + do i=0, r-1 + do j=0, 3 + xu32(j) = ieor( xu32(j), nmh_readle32( p(i*32 + j*4: ) ) ) + yu32(j) = ieor( yu32(j), & + nmh_readle32( p(i*32 + j*4 + 16: ) ) ) + xu32(j) = xu32(j) + yu32(j) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m1_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), & + ieor( ishft(xu32(j), 5), & + ishft(xu32(j), -13)) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m2_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), yu32(j) ) + xu32(j) = ieor( xu32(j), & + ieor( ishft(xu32(j), 11), & + ishft(xu32(j), -9) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m3_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), & + ieor( ishft(xu32(j),-10), & + ishft(xu32(j), -20) ) ) + end do + end do + do j=0, 3 + xu32(j) = ieor( xu32(j), & + nmh_readle32( p(length - 32 + j*4: ) ) ) + yu32(j) = ieor( yu32(j), & + nmh_readle32( p(length - 16 + j*4: ) ) ) + end do + else + ! 9 to 32 bytes + xu32(0) = ieor(xu32(0), nmh_readle32(p(0:))) + xu32(1) = ieor(xu32(1), nmh_readle32(p(ishft(ishft(length,-4),3):))) + xu32(2) = ieor(xu32(2), nmh_readle32(p(length-8:))) + xu32(3) = ieor(xu32(3), & + nmh_readle32(p(length-8-ishft(ishft(length,-4),3):))) + yu32(0) = ieor(yu32(0), nmh_readle32(p(4:))) + yu32(1) = ieor(yu32(1), & + nmh_readle32(p(ishft(ishft(length,-4),3)+4:))) + yu32(2) = ieor(yu32(2), nmh_readle32(p(length-8+4:))) + yu32(3) = ieor(yu32(3), & + nmh_readle32(p(length - 8 - & + ishft(ishft(length,-4),3)+4:))) + end if + do j=0, 3 + xu32(j) = xu32(j) + yu32(j) + yu32(j) = ieor( yu32(j), ieor(ishft(yu32(j), 17), & + ishft(yu32(j), -6) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m1_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), ieor(ishft(xu32(j), 5), & + ishft(xu32(j), -13) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m2_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), yu32(j) ) + xu32(j) = ieor( xu32(j), ieor(ishft(xu32(j), 11), & + ishft(xu32(j), -9) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m3_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), ieor(ishft(xu32(j), -10), & + ishft(xu32(j), -20) ) ) + end do + xu32(0) = ieor( xu32(0), nmh_prime32_1 ) + xu32(1) = ieor( xu32(1), nmh_prime32_2 ) + xu32(2) = ieor( xu32(2), nmh_prime32_3 ) + xu32(3) = ieor( xu32(3), nmh_prime32_4 ) + do j=1, 3 + xu32(0) = xu32(0) + xu32(j) + end do + xu32(0) = ieor(xu32(0), s1 + ishft(s1, -5) ) + xu16 = transfer( xu32(0), 0_int16, 2 ) + xu16 = xu16 * nmh_m3_16 + xu32(0) = transfer( xu16, 0_int32 ) + xu32(0) = ieor(xu32(0), & + ieor(ishft(xu32(0), -10), ishft(xu32(0), -20) ) ) + result = xu32(0) + + end function nmhash32_9to255 + + pure function nmhash32_9to32( p, seed ) result( result ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: result + + result = nmhash32_9to255( p, seed, .false. ) + + end function nmhash32_9to32 + + pure function nmhash32_33to255( p, seed ) result( result ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: result + + result = nmhash32_9to255( p, seed, .true. ) + + end function nmhash32_33to255 + + pure subroutine nmhash32_long_round( accx, accy, p ) + integer(int32), intent(inout) :: accx(0:) + integer(int32), intent(inout) :: accy(0:) + integer(int8), intent(in) :: p(0:) + + integer(int64), parameter :: nbgroups = init_size + integer(int64) :: i + integer(int16) :: dummy1(0:1) + integer(int16) :: dummy2(0:1) + + do i = 0, nbgroups-1 + accx(i) = ieor( accx(i), nmh_readle32( p(i*4:) ) ) + accy(i) = ieor( accy(i), nmh_readle32( p(i*4+nbgroups*4:) ) ) + accx(i) = accx(i) + accy(i) + accy(i) = ieor( accy(i), ishft(accx(i), -1) ) + dummy1 = transfer( accx(i), 0_int16, 2 ) + dummy2 = transfer( nmh_m1_v(i), 0_int16, 2 ) + dummy1 = dummy1 * dummy2 + accx(i) = transfer( dummy1, 0_int32 ) + accx(i) = ieor( accx(i), ieor( ishft(accx(i), 5), & + ishft(accx(i),-13) ) ) + dummy1 = transfer( accx(i), 0_int16, 2 ) + dummy2 = transfer( nmh_m2_v(i), 0_int16, 2 ) + dummy1 = dummy1 * dummy2 + accx(i) = transfer( dummy1, 0_int32 ) + accx(i) = ieor( accx(i), accy(i) ) + accx(i) = ieor( accx(i), ieor( ishft(accx(i), 11), & + ishft(accx(i),-9) ) ) + dummy1 = transfer( accx(i), 0_int16, 2 ) + dummy2 = transfer( nmh_m3_v(i), 0_int16, 2 ) + dummy1 = dummy1 * dummy2 + accx(i) = transfer( dummy1, 0_int32 ) + accx(i) = ieor( accx(i), ieor( ishft(accx(i),-10), & + ishft(accx(i),-20) ) ) + end do + + end subroutine nmhash32_long_round + + pure function nmhash32_long( p, seed ) result( sum ) + integer(int32) :: sum + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + + integer(int32) :: accx(0:size(nmh_acc_init)-1) + integer(int32) :: accy(0:size(nmh_acc_init)-1) + integer(int64) :: nbrounds + integer(int64) :: len + integer(int32) :: len32(0:1) + integer(int64) :: i + + len = size( p, kind=int64 ) + nbrounds = (len-1) / ( 4*size(accx, kind=int64) * 2 ) + sum = 0 + +! Init + do i=0_int64, size(nmh_acc_init, kind=int64)-1 + accx(i) = nmh_acc_init(i) + accy(i) = seed + end do + + ! init + do i=0_int64, nbrounds-1 + call nmhash32_long_round( accx, accy, & + p(i*8*size(accx, kind=int64):) ) + end do + call nmhash32_long_round( accx, accy, & + p(len-8*size(accx, kind=int64):) ) + + ! merge acc + do i=0, size( accx, kind=int64 )-1 + accx(i) = ieor( accx(i), nmh_acc_init(i) ) + sum = sum + accx(i) + end do + + len32 = transfer(len, 0_int32, 2) + if ( little_endian ) then + sum = sum + len32(1) + sum = ieor(sum, len32(0)) + else + sum = sum + len32(0) + sum = ieor(sum, len32(1)) + end if + + end function nmhash32_long + + pure function nmhash32_avalanche32( x ) result( u32 ) + integer(int32) :: u32 + integer(int32), intent(in) :: x + + integer(int16) :: u16(0:1) + integer(int32), parameter:: m1 = int(z'CCE5196D', int32) + integer(int32), parameter:: m2 = int(z'464BE229', int32) + integer(int16), parameter:: m1_16(0:1) = transfer(m1, 0_int16, 2) + integer(int16), parameter:: m2_16(0:1) = transfer(m2, 0_int16, 2) + ! [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 + + u32 = x + u32 = ieor( u32, ieor( ishft( u32, -8 ), ishft( u32, -21 ) ) ) + u16 = transfer( u32, 0_int16, 2 ) + u16(0) = u16(0) * m1_16(0) + u16(1) = u16(1) * m1_16(1) + u32 = transfer( u16, 0_int32 ) + u32 = ieor( u32, ieor( ishft( u32, 12 ), ishft( u32, -7 ) ) ) + u16 = transfer( u32, 0_int16, 2 ) + u16(0) = u16(0) * m2_16(0) + u16(1) = u16(1) * m2_16(1) + u32 = transfer( u16, 0_int32 ) + u32 = ieor( u32, ieor( ishft( u32, -8 ), ishft( u32, -21 ) ) ) + + end function nmhash32_avalanche32 + + pure module function int8_nmhash32( key, seed ) result( hash ) +!! NMHASH32 hash function for rank 1 array keys of kind INT8 + integer(int32) :: hash + integer(int8), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + integer(int64) :: len + integer(int32) :: u32 + integer(int16) :: u16(0:1) + integer(int32) :: x, y + integer(int32) :: new_seed + + len = size( key, kind=int64 ) + if ( len <= 32 ) then + if ( len > 8 ) then + hash = nmhash32_9to32( key, seed ) + return + else if ( len > 4 ) then + x = nmh_readle32(key) + y = ieor( nmh_readle32(key(len-4:)), nmh_prime32_4 + 2 + seed ) + x = x + y + x = ieor( x, ishft(x, len + 7 ) ) + hash = nmhash32_0to8( x, ishftc(y, 5) ) + return + else + select case(len) + case(0) + new_seed = seed + nmh_prime32_2 + u32 = 0 + case(1) + new_seed = seed + nmh_prime32_2 + ishft(1_int32, 24) + & + 2_int32 + if ( little_endian ) then + u32 = transfer( [key(0), 0_int8, 0_int8, 0_int8], & + 0_int32 ) + else + u32 = transfer( [0_int8, 0_int8, 0_int8, key(0)], & + 0_int32 ) + end if + case(2) + new_seed = seed + nmh_prime32_2 + ishft(2_int32, 24) + & + 4_int32 + if (little_endian) then + u32 = transfer( [nmh_readle16(key), 0_int16], 0_int32 ) + else + u32 = transfer( [0_int16, nmh_readle16(key)], 0_int32 ) + end if + case(3) + new_seed = seed + nmh_prime32_2 + ishft(3_int32, 24) + & + 6_int32 + if ( little_endian ) then + u16(1) = transfer( [key(2), 0_int8], 0_int16 ) + u16(0) = nmh_readle16( key ) + else + u16(0) = transfer( [0_int8, key(2)], 0_int16 ) + u16(1) = nmh_readle16( key ) + end if + u32 = transfer( u16, 0_int32 ) + case(4) + new_seed = seed + nmh_prime32_3 + u32 = nmh_readle32(key) + case default + hash = 0 + return + end select + hash = nmhash32_0to8(u32+new_seed, ishftc(new_seed, 5) ) + return + end if + else if ( len < 256_int64 ) then + hash = nmhash32_33to255( key, seed ) + return + else + hash = nmhash32_avalanche32( nmhash32_long(key, seed )) + return + end if + + end function int8_nmhash32 + + pure function nmhash32x_0to4( x, seed ) result( hash ) + integer(int32), intent(in) :: x + integer(int32), intent(in) :: seed + integer(int32) :: hash + + ! [bdab1ea9 18 a7896a1b 12 83796a2d 16] = 0.092922873297662509 + + hash = x + hash = ieor( hash, seed ) + hash = hash * int(z'BDAB1EA9', int32) + hash = hash + ishftc(seed, 31) + hash = ieor( hash, ishft(hash, -18) ) + hash = hash * int(z'A7896A1B', int32) + hash = ieor( hash, ishft(hash, -12) ) + hash = hash * int(z'83796A2D', int32) + hash = ieor( hash, ishft(hash, -16) ) + + end function nmhash32x_0to4 + + pure function nmhash32x_5to8( p, seed ) result( x ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: x + + integer(int64) :: len + integer(int32) :: y + + ! 5 to 9 bytes + ! mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + + len = size(p, kind=int64) + x = ieor( nmh_readle32(p), nmh_prime32_3 ) + y = ieor( nmh_readle32(p(len-4:)), seed ) + x = x + y + x = ieor( x, ishft(x, -len) ) + x = x * int(z'11049A7D', int32) + x = ieor( x, ishft(x, -23) ) + x = x * int(z'BCCCDC7B', int32) + x = ieor( x, ishftc(y, 3) ) + x = ieor( x, ishft(x, -12) ) + x = x * int(z'065E9DAD', int32) + x = ieor( x, ishft(x, -12) ) + + end function nmhash32x_5to8 + + pure function nmhash32x_9to255( p, seed ) result( x ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: x + + integer(int64) :: len + integer(int32) :: len32(0:1), len_base + integer(int32) :: y + integer(int32) :: a, b + integer(int64) :: i, r + + ! 5 to 9 bytes + ! mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + + len = size(p, kind=int64) + len32 = transfer(len, 0_int32, 2) + if (little_endian) then + len_base = len32(0) + else + len_base = len32(1) + end if + x = nmh_prime32_3 + y = seed + a = nmh_prime32_4 + b = seed + r = (len - 1)/16 + + do i=0, r-1 + x = ieor(x, nmh_readle32( p(i*16 + 0:) ) ) + y = ieor(y, nmh_readle32( p(i*16 + 4:) ) ) + x = ieor(x, y) + x = x * int(z'11049A7D', int32) + x = ieor(x, ishft(x, -23) ) + x = x * int(z'BCCCDC7B', int32) + y = ishftc(y, 4) + x = ieor(x, y) + x = ieor(x, ishft(x, -12) ) + x = x * int(z'065E9DAD', int32) + x = ieor(x, ishft(x, -12) ) + + a = ieor(a, nmh_readle32(p(i*16 + 8:))) + b = ieor(b, nmh_readle32(p(i*16 + 12:))) + a = ieor(a, b) + a = a * int(z'11049A7D', int32) + a = ieor(a, ishft(a, -23) ) + a = a * int(z'BCCCDC7B', int32) + b = ishftc(b, 3) + a = ieor(a, b) + a = ieor(a, ishft(a, -12) ) + a = a * int(z'065E9DAD', int32) + a = ieor(a, ishft(a, -12) ) + end do + + if ( iand(len_base-1_int32, 8_int32) /= 0 ) then + if ( iand(len_base-1_int32, 4_int32) /= 0 ) then + a = ieor( a, nmh_readle32( p(r*16 + 0:) ) ) + b = ieor( b, nmh_readle32( p(r*16 + 4:) ) ) + a = ieor(a, b) + a = a * int(z'11049A7D', int32) + a = ieor(a, ishft(a, -23) ) + a = a * int(z'BCCCDC7B', int32) + a = ieor(a, ishftc(b, 4)) + a = ieor(a, ishft(a, -12)) + a = a * int(z'065E9DAD', int32) + else + a = ieor( a, nmh_readle32( p(r*16:) ) + b ) + a = ieor( a, ishft(a, -16) ) + a = a * int(z'A52FB2CD', int32) + a = ieor( a, ishft(a, -15) ) + a = a * int(z'551E4D49', int32) + end if + x = ieor( x, nmh_readle32( p(len - 8:) ) ) + y = ieor( y, nmh_readle32( p(len - 4:) ) ) + x = ieor( x, y ) + x = x * int(z'11049A7D', int32) + x = ieor( x, ishft(x, -23) ) + x = x * int(z'BCCCDC7B', int32); + x = ieor( x, ishftc(y, 3) ) + x = ieor( x, ishft(x, -12) ) + x = x * int(z'065E9DAD', int32) + else + if ( iand(len_base-1_int32, 4_int32) /= 0) then + a = ieor(a, nmh_readle32(p( r * 16:) ) + b ) + a = ieor( a, ishft(a,-16) ) + a = a * int(z'A52FB2CD', int32) + a = ieor( a, ishft(a,-15) ) + a = a * int(z'551E4D49', int32) + end if + x = ieor( x, nmh_readle32(p( len - 4:) ) + y ) + x = ieor( x, ishft(x,-16) ) + x = x * int(z'A52FB2CD', int32) + x = ieor( x, ishft(x,-15) ) + x = x * int(z'551E4D49', int32) + end if + + x = ieor(x, len_base ) + x = ieor(x, ishftc(a, 27)) ! rotate one lane to pass Diff test + x = ieor(x, ishft(x,-14)) + x = x * int(z'141CC535', int32 ) + + end function nmhash32x_9to255 + + pure function nmhash32x_avalanche32( x ) result(hash) + integer(int32) :: hash + integer(int32), intent(in) :: x +! Mixer with 2 mul from skeeto/hash-prospector: +! [15 d168aaad 15 af723597 15] = 0.15983776156606694 + + hash = x + hash = ieor( hash, ishft( hash, -15 ) ) + hash = hash * int( z'D168AAAD', int32 ) + hash = ieor( hash, ishft( hash, -15 ) ) + hash = hash * int( z'AF723597', int32 ) + hash = ieor( hash, ishft( hash, -15 ) ) + + end function nmhash32x_avalanche32 + + pure module function int8_nmhash32x( key, seed ) result(hash) +!! NMHASH32x hash function for rank 1 array keys of kind INT8 + integer(int32) :: hash + integer(int8), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + + integer(int64) :: len + integer(int32) :: seed2 + integer(int32) :: u32 + integer(int16) :: u16(0:1) + + len = size( key, kind=int64 ) + if ( len <= 8 ) then + if ( len > 4 ) then + hash = nmhash32x_5to8( key, seed ) + return + else ! 0 to 4 bytes + select case (len) + case(0) + seed2 = seed + nmh_prime32_2 + u32 = 0 + case(1) + seed2 = seed + nmh_prime32_2 + ishft(1_int32, 24) + & + ishft(1_int32, 1) + if (little_endian) then + u32 = transfer( [key(0), 0_int8, 0_int8, 0_int8], & + 0_int32 ) + else + u32 = transfer( [0_int8, 0_int8, 0_int8, key(0)], & + 0_int32 ) + end if + case(2) + seed2 = seed + nmh_prime32_2 + ishft(2_int32, 24) + & + ishft(2_int32, 1) + if (little_endian) then + u32 = transfer( [nmh_readle16(key), 0_int16], 0_int32 ) + else + u32 = transfer( [0_int16, nmh_readle16(key)], 0_int32 ) + end if + case(3) + seed2 = seed + nmh_prime32_2 + ishft(3_int32, 24) + & + ishft(3_int32, 1) + if (little_endian ) then + u16(1) = transfer( [ key(2), 0_int8 ], 0_int16 ) + u16(0) = nmh_readle16(key) + else + u16(0) = transfer( [ 0_int8, key(2) ], 0_int16 ) + u16(1) = nmh_readle16(key) + end if + u32 = transfer( u16, 0_int32 ) + case(4) + seed2 = seed + nmh_prime32_1 + u32 = nmh_readle32(key) + case default + hash = 0 + return + end select + hash = nmhash32x_0to4(u32, seed2) + return + end if + end if + if (len < 256) then + hash = nmhash32x_9to255(key, seed) + return + end if + hash = nmhash32x_avalanche32(nmhash32_long(key, seed)) + + end function int8_nmhash32x + +#:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32( key, seed ) result(hash_code) +!! NMHASH32 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_nmhash32 + +#:endfor + + pure module function character_nmhash32( key, seed ) result(hash_code) +!! NMHASH32 hash function for default character keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_nmhash32 + +#:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32x( key, seed ) result(hash_code) +!! NMHASH32X hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32x( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_nmhash32x + +#:endfor + + pure module function character_nmhash32x( key, seed ) result(hash_code) +!! NMHASH32X hash function for default character keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32x( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_nmhash32x + + module subroutine new_nmhash32_seed( seed ) +! Random SEED generator for NMHASH32 + integer(int32), intent(inout) :: seed + + integer(int32) :: old_seed + real(dp) :: sample + + old_seed = seed + find_seed:do + call random_number( sample ) + seed = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_nmhash32_seed + + module subroutine new_nmhash32x_seed( seed ) +! Random SEED generator for NMHASH32X + integer(int32), intent(inout) :: seed + + integer(int32) :: old_seed + real(dp) :: sample + + old_seed = seed + find_seed:do + call random_number( sample ) + seed = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_nmhash32x_seed + +end submodule stdlib_32_bit_nmhashes diff --git a/src/stdlib_32_bit_water_hashes.fypp b/src/stdlib_32_bit_water_hashes.fypp new file mode 100755 index 000000000..33181ab3f --- /dev/null +++ b/src/stdlib_32_bit_water_hashes.fypp @@ -0,0 +1,282 @@ +!!------------------------------------------------------------------------------ +!! `WATER_HASH` is a translation to Fortran 2008 of the `waterhash` algorithm +!! of Tommy Ettinger. Tommy Ettinger's original C++ code, `waterhash.h`, is +!! available at the URL: https://github.com/tommyettinger/waterhash under the +!! `unlicense`, https://github.com/tommyettinger/waterhash/blob/master/LICENSE. +!! "`waterhash` is a variant on Wang Yi's `wyhash`, with 32 bit output, +!! using at most 64 bit arithmetic. `wyhash` is available at the URL: +!! `https://github.com/wangyi-fudan/wyhash` also under the unlicense: +!! `https://github.com/wangyi-fudan/wyhash/blob/master/LICENSE`. +!! Original Author: Wang Yi +!! Waterhash Variant Author: Tommy Ettinger +!! +!! The `unlicense` reads as follows: +!! This is free and unencumbered software released into the public domain. +!! +!! Anyone is free to copy, modify, publish, use, compile, sell, or +!! distribute this software, either in source code form or as a compiled +!! binary, for any purpose, commercial or non-commercial, and by any +!! means. +!! +!! In jurisdictions that recognize copyright laws, the author or authors +!! of this software dedicate any and all copyright interest in the +!! software to the public domain. We make this dedication for the benefit +!! of the public at large and to the detriment of our heirs and +!! successors. We intend this dedication to be an overt act of +!! relinquishment in perpetuity of all present and future rights to this +!! software under copyright law. +!! +!! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +!! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +!! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +!! IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +!! OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +!! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +!! OTHER DEALINGS IN THE SOFTWARE. +!! +!! For more information, please refer to +!! +!! `WATER_HASH` is distributed as part of the `stdlib_32_bit_hash_functions.f90` +!! module and its `stdlib_32_bit_water_hashes.f90` submodule with the Fortran +!! Standard Library at URL: https://github.com/fortran-lang/stdlib. +!! The Fortran Standard Library, including this code, is distributed under the +!! MIT License as described in the `LICENSE` file distributed with the library. +!! `WATER_HASH` differs from `waterhash.h` not only in its use of Fortran, +!! but also in its use of signed two's complement arithmetic in contrast to +!! the unsigned arithmetic of Ettinger and Wang Yi, and in making some of the +!! uses of `TRANSFER` endian dependent, in an attempt to make the quality of +!! the hash endian independent. The use of signed arithmetic may change with +!! the planned introduction of the unsigned BITS datatype in what is currently +!! known as Fortran 202X. +!! +!! To be useful this code must be processed by a processor that implements two +!! Fortran 2008 extensions to Fortran 2003: submodules, and 64 bit (`INT64`) +!! integers. The processor must also use two's complement integers +!! (all Fortran 95+ processors use two's complement arithmetic) with +!! wrap around overflow at runtime and for BOZ constants. The latest releases +!! of the following processors are known to implement the required Fortran +!! 2008 extensions and default to runtime wrap around overflow: FLANG, +!! gfortran, ifort, and NAG Fortran. Older versions of gfortran will require +!! the compiler flag, `-fno-range-check`, to ensure wrap around semantics +!! for BOZ constants, and only versions of the NAG compiler starting with +!! version 17, have implemented submodules. The latest releases of Cray +!! Fortran and IBM Fortran are known to implement the Fortran 2008 extensions, +!! but whether they also implement wrap around overflow is unknown. +!! +!! This implementation has only been tested on little endian processors. It +!! will generate different hashes on big endian processors, but they are +!! believed to be of comparable quality to those generated for little endian +!! processors. +!! +!! No version of this hash is suitable as a cryptographic hash. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_water_hashes + implicit none + +contains + + pure module function int8_water_hash( key, seed ) result(hash_code) + integer(int32) :: hash_code + integer(int8), intent(in) :: key(0:) + integer(int64), intent(in) :: seed + + integer(int32) :: dummy(2) + integer(int64) :: h + integer(int64) :: i + integer(int64) :: len + integer(int64), parameter :: & + waterp0 = int(z'a0761d65', kind=int64), & + waterp1 = int(z'e7037ed1', kind=int64), & + waterp2 = int(z'8ebc6af1', kind=int64), & + waterp3 = int(z'589965cd', kind=int64), & + waterp4 = int(z'1d8e4e27', kind=int64), & + waterp5 = int(z'eb44accb', kind=int64) + + len = size(key, kind=int64) + h = seed + do i = 0_int64, len-16, 16 + h = watermum(watermum(ieor(waterr32(key(i:)),waterp1), & + ieor(waterr32(key(i+4:)),waterp2)) + h, & + watermum(ieor(waterr32(key(i+8:)),waterp3), & + ieor(waterr32(key(i+12:)),waterp4))) + end do + h = h + waterp5 + + select case( iand(len, 15_int64) ) + case(1) + h = watermum(ieor(waterp2, h), & + ieor(waterr08(key(i:)), waterp1)) + case(2) + h = watermum(ieor(waterp3, h), & + ieor(waterr16(key(i:)), waterp4)) + case(3) + h = watermum(ieor(waterr16(key(i:)), h), & + ieor(waterr08(key(i+2:)), waterp2)) + case(4) + h = watermum(ieor(waterr16(key(i:)), h), & + ieor(waterr16(key(i+2:)), waterp3)) + case(5) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr08(key(i+4:)), waterp1)) + case(6) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr16(key(i+4:)), waterp1)) + case(7) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(ior(ishft(waterr16(key(i+4:)), 8), & + waterr08(key(i+6:))), waterp1)) + case(8) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp0)) + case(9) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterp4), & + ieor(waterr08(key(i+8:)), waterp3))) + case(10) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(h, ieor(waterr16(key(i+8:)), waterp3))) + case(11) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(h, & + ieor(ior(ishft(waterr16(key(i+8:)),8), & + waterr08(key(i+10:))), & + waterp3))) + case(12) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + waterp4)) + case(13) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + ieor(waterr08(key(i+12:)), waterp4))) + case(14) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + ieor(waterr16(key(i+12:)), waterp4))) + case(15) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + ieor(ior(ishft(waterr16(key(i+12:)),8), & + waterr08(key(i+14:))), & + waterp4))) + end select + + h = ieor( h, ishft(h,16) ) * ieor( len, waterp0 ) + h = h - ishft( h, -32 ) + dummy(1:2) = transfer(h, dummy, 2) + if (little_endian) then + hash_code = dummy(1) + else + hash_code = dummy(2) + end if + + contains + + pure function watermum( a, b ) result(r) + integer(int64) :: r + integer(int64), intent(in) :: a, b + + r = a * b + r = r - ishft(r, -32) + + end function watermum + + pure function waterr08( p ) result(v) + integer(int64) :: v + integer(int8), intent(in) :: p(:) + + if (little_endian) then + v = transfer( [ p(1), 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8 ], v ) + else + v = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, p(1) ], v ) + end if + + end function waterr08 + + pure function waterr16( p ) result(v) + integer(int64) :: v + integer(int8), intent(in) :: p(:) + + if (little_endian) then + v = transfer( [ p(1), p(2), 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8 ], v ) + else + v = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, p(2), p(1) ], v ) + end if + + end function waterr16 + + pure function waterr32( p ) result(v) + integer(int64) :: v + integer(int8), intent(in) :: p(:) + + if (little_endian) then + v = transfer( [ p(1), p(2), p(3), p(4), & + 0_int8, 0_int8, 0_int8, 0_int8 ], v ) + else + v = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + p(4), p(3), p(2), p(1) ], v ) + end if + + end function waterr32 + + end function int8_water_hash + + +#:for k1 in INT_KINDS + pure module function ${k1}$_water_hash( key, seed ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + + hash_code = int8_water_hash( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_water_hash + +#:endfor + + pure module function character_water_hash( key, seed ) result(hash_code) + character(*), intent(in) :: key + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + + hash_code = int8_water_hash( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_water_hash + + module subroutine new_water_hash_seed( seed ) + integer(int64), intent(inout) :: seed + + integer(int64) :: old_seed + + real(dp) :: sample(2) + integer(int32) :: part(2) + + old_seed = seed + find_seed:do + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + seed = transfer( part, seed ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_water_hash_seed + +end submodule stdlib_32_bit_water_hashes diff --git a/src/stdlib_64_bit_fnv_hashes.fypp b/src/stdlib_64_bit_fnv_hashes.fypp new file mode 100755 index 000000000..1eefdb886 --- /dev/null +++ b/src/stdlib_64_bit_fnv_hashes.fypp @@ -0,0 +1,125 @@ +!!------------------------------------------------------------------------------ +!! `FNV_1_HASH` and `FNV_1A_HASH` are translations to Fortran 2008 of the +!! `FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, +!! and Phong Vo, that has been released into the public domain. Permission +!! has been granted, by Landon Curt Noll, for the use of these algorithms +!! in the Fortran Standard Library. A description of these functions is +!! available at https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function. +!! The functions have been modified from their normal form to also encode +!! the size of the structure in the hash. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_fnv_hashes +! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt +! Noll, and Kiem-Phong-Vo, +! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function + implicit none + + integer(int_hash), parameter :: & + offset_basis = int( z'CBF29CE484222325', int_hash ), & + prime = int( z'100000001B3', int_hash ) + +contains + + pure module function int8_fnv_1( key ) result(hash_code) + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + hash_code = hash_code * prime + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + end do + + end function int8_fnv_1 + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, 0_int8, & + bytes_${k1}$* & + size( key, kind=int64 ) ) ) + + end function ${k1}$_fnv_1 + +#:endfor + + pure module function character_fnv_1( key ) result(hash_code) + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, & + 0_int8, & + bytes_char* & + len(key, kind=int64) ) ) + + end function character_fnv_1 + + + pure module function int8_fnv_1a( key ) result(hash_code) + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + hash_code = hash_code * prime + end do + + end function int8_fnv_1a + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + bytes_${k1}$* & + size(key, kind=int64))) + + end function ${k1}$_fnv_1a + +#:endfor + + pure module function character_fnv_1a( key ) result(hash_code) + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + (bits_char/bits_int8)* & + len(key, kind=int64) ) ) + + end function character_fnv_1a + +end submodule stdlib_64_bit_fnv_hashes diff --git a/src/stdlib_64_bit_hash_functions.fypp b/src/stdlib_64_bit_hash_functions.fypp new file mode 100755 index 000000000..0f31a0d26 --- /dev/null +++ b/src/stdlib_64_bit_hash_functions.fypp @@ -0,0 +1,308 @@ +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int8", "int16", "int32", "int64"] + +module stdlib_64_bit_hash_functions + + use, intrinsic :: iso_fortran_env, only : & + character_storage_size + + use stdlib_kinds, only: & + dp, & + int8, & + int16, & + int32, & + int64 + + implicit none + + private + + integer, parameter, public :: & + int_hash = int64 +!! The number of bits in the output hash + +! The number of bits used by each integer type + integer, parameter, public :: & +! Should be 8 + bits_int8 = bit_size(0_int8), & +! Should be 16 + bits_int16 = bit_size(0_int16), & +! Should be 32 + bits_int32 = bit_size(0_int32), & +! Should be 64 + bits_int64 = bit_size(0_int64) + + integer, parameter, public :: & +! Should be 8 + bytes_int8 = bits_int8/bits_int8, & +! Should be 16 + bytes_int16 = bits_int16/bits_int8, & +! Should be 32 + bytes_int32 = bits_int32/bits_int8, & +! Should be 64 + bytes_int64 = bits_int64/bits_int8 + + integer, parameter, public :: & + bits_char = character_storage_size, & + bytes_char = bits_char/bits_int8 + +! Dealing with different endians + logical, parameter, public :: & + little_endian = ( 1 == transfer( [1_int8, 0_int8], 0_int16) ) + + public :: & + fibonacci_hash, & + fnv_1_hash, & + fnv_1a_hash, & + new_pengy_hash_seed, & + new_spooky_hash_seed, & + odd_random_integer, & + pengy_hash, & + spooky_hash, & + spookyhash_128, & + universal_mult_hash + +! pow64_over_phi is the odd number that most closely approximates 2**64/phi, +! where phi is the golden ratio 1.618... + integer(int64), parameter :: & + pow64_over_phi = int(z'9E3779B97F4A7C15', int64) + + integer(int_hash), parameter :: & + two_32 = 2_int_hash**32 + +! constants used by Bob Jenkins' SpookyHash + integer(int32), parameter :: & + sc_numvars = 12, & + sc_blocksize = sc_numvars*8, & + sc_buffsize = 2*sc_blocksize, & + sc_constsub = int(z'deadbeef', int32) + ! twos complement "deadbeef" + + integer(int64), parameter :: & + sc_const = transfer( [sc_constsub, sc_constsub], 0_int64 ) + + type :: spooky_subhash + integer(int8) :: data(0:2*sc_blocksize-1) + integer(int64) :: state(0:sc_numvars-1) + integer(int64) :: length + integer(int16) :: remainder + end type spooky_subhash + + interface fnv_1_hash +!! FNV_1 interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + end function ${k1}$_fnv_1 + #:endfor + + pure module function character_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for character strings + character(*), intent(in) :: key + integer(int_hash) :: hash_code + end function character_fnv_1 + + end interface fnv_1_hash + + + interface fnv_1a_hash +!! FNV_1A interfaces + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_code) +!! FNV_1A hash function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + end function ${k1}$_fnv_1a + #:endfor + + pure module function character_fnv_1a( key ) result(hash_code) +!! FNV_1A hash function for character strings + character(*), intent(in) :: key + integer(int_hash) :: hash_code + end function character_fnv_1a + + end interface fnv_1a_hash + + + interface murmur2_hash +!! MURMUR2_HASHES interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_murmur2_hash( key, seed ) & + result(hash_code) +!! MURMUR2 hash function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int_hash), intent(in) :: seed + integer(int_hash) :: hash_code + end function ${k1}$_murmur2_hash + #:endfor + + pure module function character_murmur2_hash( key, seed ) & + result(hash_code) +!! MURMUR2 hash function for character strings + character(*), intent(in) :: key + integer(int_hash), intent(in) :: seed + integer(int_hash) :: hash_code + end function character_murmur2_hash + + end interface murmur2_hash + + + interface spooky_hash +!! SPOOKY_HASH interfaces + + #:for k1 in INT_KINDS + module function ${k1}$_spooky_hash( key, seed ) & + result(hash_code) +!! SPOOKY HASH function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int_hash), intent(in) :: seed(2) + integer(int_hash) :: hash_code(2) + end function ${k1}$_spooky_hash + #:endfor + + module function character_spooky_hash( key, seed ) & + result(hash_code) +!! SPOOKY hash function for character strings + character(*), intent(in) :: key + integer(int_hash), intent(in) :: seed(2) + integer(int_hash) :: hash_code(2) + end function character_spooky_hash + + end interface spooky_hash + + interface + + module subroutine spookyHash_128( key, hash_inout ) + integer(int8), intent(in), target :: key(0:) + integer(int_hash), intent(inout) :: hash_inout(2) + end subroutine spookyHash_128 + + end interface + + + interface spooky_init + + module subroutine spookysubhash_init( self, seed ) + type(spooky_subhash), intent(out) :: self + integer(int_hash), intent(in) :: seed(2) + end subroutine spookysubhash_init + + end interface spooky_init + + + interface spooky_update + + module subroutine spookyhash_update( spooky, key ) + type(spooky_subhash), intent(out) :: spooky + integer(int8), intent(in) :: key(0:) + end subroutine spookyhash_update + + end interface spooky_update + + + interface spooky_final + + module subroutine spookyhash_final(spooky, hash_code) + type(spooky_subhash), intent(inout) :: spooky + integer(int_hash), intent(inout) :: hash_code(2) + end subroutine spookyhash_final + + end interface spooky_final + +interface + + module subroutine new_spooky_hash_seed( seed ) +! Random SEED generator for + integer(int64), intent(inout) :: seed(2) + end subroutine new_spooky_hash_seed + + end interface + + interface pengy_hash +!! PENGY_HASH interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_pengy_hash( key, seed ) result(hash_code) +!! PENGY_HASH hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + end function ${k1}$_pengy_hash + #:endfor + + pure module function character_pengy_hash( key, seed ) & + result(hash_code) +!! MIR HASH STRICT function for character strings + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + end function character_pengy_hash + + end interface pengy_hash + + interface + + module subroutine new_pengy_hash_seed( seed ) +! Random SEED generator for MIR_HASH_STRICT + integer(int32), intent(inout) :: seed + end subroutine new_pengy_hash_seed + + end interface + +contains + + pure function fibonacci_hash( key, nbits ) result( sample ) +!! Maps the 64 bit integer KEY to an unsigned integer value with only NBITS +!! bits where NBITS is less than 64 + integer(int64), intent(in) :: key + integer, intent(in) :: nbits + integer(int64) :: sample + + sample = ishft( key*pow64_over_phi, -64 + nbits ) + + end function fibonacci_hash + + pure function universal_mult_hash( key, seed, nbits ) result( sample ) +!! Uses the "random" odd 64 bit integer SEED to map the 64 bit integer KEY to +!! an unsigned integer value with only NBITS bits where NBITS is less than 64. + integer(int64), intent(in) :: key + integer(int64), intent(in) :: seed + integer, intent(in) :: nbits + integer(int64) :: sample + + sample = ishft( key*seed, -64 + nbits ) + + end function universal_mult_hash + + subroutine odd_random_integer( harvest ) +!! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over +!! the odd integers of the 64 bit kind. + integer(int64), intent(out) :: harvest + real(dp) :: sample(2) + integer(int32) :: part(2) + + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, int32 ) + harvest = transfer( part, harvest ) + harvest = ishft( harvest, 1 ) + 1_int64 + + end subroutine odd_random_integer + + subroutine random_integer( harvest ) +!! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over +!! the values of the 64 bit kind. + integer(int64), intent(out) :: harvest + real(dp) :: sample(2) + integer(int32) :: part(2) + + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, int32 ) + harvest = transfer( part, harvest ) + + end subroutine random_integer + +end module stdlib_64_bit_hash_functions diff --git a/src/stdlib_64_bit_pengy_hashes.fypp b/src/stdlib_64_bit_pengy_hashes.fypp new file mode 100755 index 000000000..ca1f14791 --- /dev/null +++ b/src/stdlib_64_bit_pengy_hashes.fypp @@ -0,0 +1,148 @@ +!!------------------------------------------------------------------------------ +!! `PENGY_HASH` is a translation to Fortran 2008 and signed two's complement +!! arithmetic of the `pengyhash` algorithm of Alberto Fajardo, copyright 2020. +!! Alberto Fajardo's original C code, `pengyhash.c`, is available at the URL: +!! https://github.com/tinypeng/pengyhash/blob/master/pengyhash.c +!! under the BSD 2-Clause License: +!! https://github.com/tinypeng/pengyhash/blob/master/LICENSE +!! +!! The BSD 2-Clause license is as follows: +!! +!! BSD 2-Clause License +!! +!! pengyhash +!! Copyright (c) 2020 Alberto Fajardo +!! All rights reserved. +!! +!! Redistribution and use in source and binary forms, with or without +!! modification, are permitted provided that the following conditions are met: +!! +!! 1. Redistributions of source code must retain the above copyright notice, +!! this list of conditions and the following disclaimer. +!! +!! 2. Redistributions in binary form must reproduce the above copyright notice, +!! this list of conditions and the following disclaimer in the documentation +!! and/or other materials provided with the distribution. +!! +!! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +!! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +!! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +!! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!! POSSIBILITY OF SUCH DAMAGE. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_pengy_hashes + + implicit none + +contains + + pure module function int8_pengy_hash( key, seed ) result(hash_code) + integer(int64) :: hash_code + integer(int8), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + + integer(int64) :: b(0:3) + integer(int64) :: i + integer(int64) :: index + integer(int64) :: len + integer(int64) :: s(0:3) + integer(int64) :: seed2 + integer(int8) :: dummy(0:31) + + b(0:3) = 0_int64 + len = size( key, kind=int64 ) + s(0:3) = [ 0_int64, 0_int64, 0_int64, len ] + + index = 0_int64 + do while ( len >= 32 ) + b(0:3) = transfer( key( index:index+31 ), 0_int64, 4 ) + + s(0) = s(0) + s(1) + b(3) + s(1) = s(0) + ishftc( s(1), 14 ) + s(2) = s(2) + s(3) + b(2) + s(3) = s(2) + ishftc( s(3), 23 ) + s(0) = s(0) + s(3) + b(1) + s(3) = ieor( s(0), ishftc( s(3), 16 ) ) + s(2) = s(2) + s(1) + b(0) + s(1) = ieor( s(2), ishftc( s(1), 40 ) ) + + len = len - 32 + index = index + 32 + end do + + dummy(0:31) = transfer( b, 0_int8, 32 ) + dummy(0:len-1) = key(index:index+len-1) + b(0:3) = transfer( dummy, 0_int64, 4) + if ( little_endian ) then + seed2 = transfer( [ seed, 0_int32 ], 0_int64 ) + else + seed2 = transfer( [ 0_int32, seed ], 0_int64 ) + end if + + do i = 0, 5 + s(0) = s(0) + s(1) + b(3) + s(1) = s(0) + ishftc( s(1), 14 ) + seed2 + s(2) = s(2) + s(3) + b(2) + s(3) = s(2) + ishftc( s(3), 23 ) + s(0) = s(0) + s(3) + b(1) + s(3) = ieor( s(0), ishftc( s(3), 16 ) ) + s(2) = s(2) + s(1) + b(0) + s(1) = ieor( s(2), ishftc( s(1), 40 ) ) + end do + + hash_code = s(0) + s(1) + s(2) + s(3) + + end function int8_pengy_hash + +#:for k1 in INT_KINDS + pure module function ${k1}$_pengy_hash( key, seed ) result(hash_code) +!! PENGY_HASH hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + + hash_code = int8_pengy_hash( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_pengy_hash + +#:endfor + + pure module function character_pengy_hash( key, seed ) result(hash_code) +!! PENGY_HASH hash function for default character keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + + hash_code = int8_pengy_hash( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_pengy_hash + + module subroutine new_pengy_hash_seed( seed ) +! Random SEED generator for PENGY_HASH + integer(int32), intent(inout) :: seed + real(dp) :: sample + integer(int32) :: old_seed + + old_seed = seed + find_seed: do + call random_number( sample ) + seed = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_pengy_hash_seed + +end submodule stdlib_64_bit_pengy_hashes diff --git a/src/stdlib_64_bit_spookyv2_hashes.fypp b/src/stdlib_64_bit_spookyv2_hashes.fypp new file mode 100755 index 000000000..eaaccff4d --- /dev/null +++ b/src/stdlib_64_bit_spookyv2_hashes.fypp @@ -0,0 +1,718 @@ +!!------------------------------------------------------------------------------ +!! `SPOOKY_HASH` is a translation to Fortran 2008 of the unsigned 64 bit +!! `SpookyHash` V2 function of Bob Jenkins +!! to signed 64 bit +!! operations. Bob Jenkins has put his code in the public domain and has +!! given permission to treat this code as public domain in the USA, +!! provided the code can be used under other licenses and he is given +!! appropriate credit. +!! The code was designed for Little-Endian processors. The output is +!! different on Big Endian processors, but still probably as good quality. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_spookyv2_hashes + +! I have tried to make this portable while retaining efficiency. I assume +! processors with two's complement integers from 8, 16, 32, and 64 bits. +! The code is a transliteration of the 64 bit SpookyHash V2 of Bob Jenkins +! +! The code was designed for Little-Endian processors. The output is +! different on Big Endian processors, but still probably as good quality. + + implicit none + +contains + + + module function int8_spooky_hash( key, seed ) result(hash_code) + integer(int8), intent(in) :: key(:) + integer(int64), intent(in) :: seed(2) + integer(int64) :: hash_code(2) + + integer(int64) :: hash2(2) + + hash2(:) = seed + call spookyhash_128( key, hash2 ) + hash_code = hash2 + + end function int8_spooky_hash + + +#:for k1 in INT_KINDS + module function ${k1}$_spooky_hash( key, seed ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int64), intent(in) :: seed(2) + integer(int64) :: hash_code(2) + + integer(int64) :: hash2(2) + + hash2(:) = seed + call spookyhash_128( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), hash2 ) + hash_code = hash2 + + end function ${k1}$_spooky_hash + +#:endfor + + module function character_spooky_hash( key, seed ) result(hash_code) + character(*), intent(in) :: key + integer(int64), intent(in) :: seed(2) + integer(int64) :: hash_code(2) + + integer(int64) :: hash2(2) + + hash2(:) = seed + call spookyhash_128( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), hash2 ) + hash_code = hash2 + + end function character_spooky_hash + +! +! short hash ... it could be used on any message, +! but it's used by Spooky just for short messages. +! + subroutine spookyhash_short( key, hash_inout ) + integer(int8), intent(in), target :: key(0:) + integer(int64), intent(inout) :: hash_inout(2) + + integer(int64) :: a, b, c, d + integer(int64) :: length, p8, remainder + + p8 = 0 + length = size( key, kind=int64 ) + + ! The number of bytes after all the INT256s + remainder = iand( length, 31_int64 ) + a = hash_inout(1) + b = hash_inout(2) + c = sc_const + d = sc_const + + if ( length > 15 ) then + block + integer(int64) :: bend, step + integer(int64) :: buf(0:2*sc_numVars-1) + bend = ishft(length, -4) ! The number of complete INT128s + buf(0:2*bend-1) = transfer( key(0:16*bend-1), 0_int64, 2*bend ) + ! Number of Int64's in number of complete INT256s + bend = ishft(ishft(length, -5), 2) + + ! handle all complete sets of 32 bytes + do step = 0_int64, bend-1, 4 + c = c + buf(step) + d = d + buf(step+1) + call shortmix( a, b, c, d ) + a = a + buf(step+2) + b = b + buf(step+3) + end do + ! Completed all INT64s in complete INT256s + p8 = p8 + 8*bend ! Number of INT8s in complete INT256s + + ! Handle the case of 16+ remaining bytes. + if (remainder >= 16) then + c = c + buf(step) + d = d + buf(step+1) + call shortmix( a, b, c, d ) + p8 = p8 + 16 + remainder = remainder - 16 + end if + + end block + end if + + ! Handle the last 0..15 bytes, and its length V2 + d = d + shiftl( length, 56_int64 ) + + select case(remainder) + case(15) + go to 115 + case(14) + go to 114 + case(13) + go to 113 + case(12) + go to 112 + case(11) + go to 111 + case(10) + go to 110 + case(9) + go to 109 + case(8) + go to 108 + case(7) + go to 107 + case(6) + go to 106 + case(5) + go to 105 + case(4) + go to 104 + case(3) + go to 103 + case(2) + go to 102 + case(1) + go to 101 + case(0) + go to 100 + end select + +115 d = d + shiftl( map_to_64( key(p8+14) ), 48_int64 ) +114 d = d + shiftl( map_to_64( key(p8+13) ), 40_int64 ) +113 d = d + shiftl( map_to_64( key(p8+12) ), 32_int64 ) +112 if ( little_endian) then + d = d + transfer( [ transfer(key(p8+8:p8+11), 0_int32), & + 0_int32 ], 0_int64) + else + d = d + transfer( [ 0_int32, & + transfer(key(p8+8:p8+11), 0_int32) ], & + 0_int64) + end if + c = c + transfer( key(p8+0:p8+7), 0_int64 ) + go to 888 + +111 d = d + shiftl( map_to_64( key(p8+10) ), 16_int32 ) +110 d = d + shiftl( map_to_64( key(p8+9) ), 8_int32 ) +109 d = d + map_to_64( key(p8+8) ) +108 c = c + transfer( key(p8+0:p8+7), 0_int64 ) + go to 888 + +107 c = c + shiftl( map_to_64( key(p8+6) ), 48_int64 ) +106 c = c + shiftl( map_to_64( key(p8+5) ), 40_int64 ) +105 c = c + shiftl( map_to_64( key(p8+4) ), 32_int64 ) +104 if ( little_endian) then + c = c + transfer( [ transfer( key(p8+0:p8+3), 0_int32 ), & + 0_int32 ], 0_int64 ) + else + c = c + transfer( [ 0_int32, & + transfer( key(p8+0:p8+3), 0_int32 ) ], 0_int64 ) + end if + + go to 888 + +103 c = c + shiftl( map_to_64( key(p8+2) ), 16_int64 ) +102 c = c + shiftl( map_to_64( key(p8+1) ), 8_int64 ) +101 c = c + map_to_64( key(p8+0) ) + go to 888 + +100 c = c + sc_const + d = d + sc_const + +888 call short_end( a, b, c, d ) + + hash_inout(1) = a + hash_inout(2) = b + close(40) + + contains + + pure function map_to_64( key ) + integer(int8), intent(in) :: key + integer(int64) :: map_to_64 + + if ( little_endian ) then + map_to_64 = transfer( [ key, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8 ], & + 0_int64 ) + else + map_to_64 = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, key ], & + 0_int64 ) + end if + + end function map_to_64 + + pure subroutine shortmix( h0, h1, h2, h3 ) + ! + ! The goal is for each bit of the input to expand into 128 bits of + ! apparent entropy before it is fully overwritten. + ! n trials both set and cleared at least m bits of h0 h1 h2 h3 + ! n: 2 m: 29 + ! n: 3 m: 46 + ! n: 4 m: 57 + ! n: 5 m: 107 + ! n: 6 m: 146 + ! n: 7 m: 152 + ! when run forwards or backwards + ! for all 1-bit and 2-bit diffs + ! with diffs defined by either xor or subtraction + ! with a base of all zeros plus a counter, or plus another bit, or random + ! + integer(int64), intent(inout) :: h0, h1, h2, h3 + + h2 = ishftc( h2, 50 ) + h2 = h2 + h3 + h0 = ieor( h0, h2 ) + h3 = ishftc( h3, 52 ) + h3 = h3 + h0 + h1 = ieor( h1, h3 ) + h0 = ishftc( h0, 30 ) + h0 = h0 + h1 + h2 = ieor( h2, h0 ) + h1 = ishftc( h1, 41 ) + h1 = h1 + h2 + h3 = ieor( h3, h1 ) + h2 = ishftc( h2, 54 ) + h2 = h2 + h3 + h0 = ieor( h0, h2 ) + h3 = ishftc( h3, 48 ) + h3 = h3 + h0 + h1 = ieor( h1, h3 ) + h0 = ishftc( h0, 38 ) + h0 = h0 + h1 + h2 = ieor( h2, h0 ) + h1 = ishftc( h1, 37 ) + h1 = h1 + h2 + h3 = ieor( h3, h1 ) + h2 = ishftc( h2, 62 ) + h2 = h2 + h3 + h0 = ieor( h0, h2 ) + h3 = ishftc( h3, 34 ) + h3 = h3 + h0 + h1 = ieor( h1, h3 ) + h0 = ishftc( h0, 5 ) + h0 = h0 + h1 + h2 = ieor( h2, h0 ) + h1 = ishftc( h1, 36 ) + h1 = h1 + h2 + h3 = ieor( h3, h1 ) + + end subroutine shortmix + + pure subroutine short_end( h0, h1, h2, h3 ) + ! + ! Mix all 4 inputs together so that h0, h1 are a hash of them all. + ! + ! For two inputs differing in just the input bits + ! Where "differ" means xor or subtraction + ! And the base value is random, or a counting value starting at that bit + ! The final result will have each bit of h0, h1 flip + ! For every input bit, + ! with probability 50 +- .3% (it is probably better than that) + ! For every pair of input bits, + ! with probability 50 +- .75% (the worst case is approximately that) + ! + integer(int64), intent(inout) :: h0, h1, h2, h3 + + h3 = ieor( h3, h2 ) + h2 = ishftc( h2, 15 ) + h3 = h3 + h2 + h0 = ieor( h0, h3 ) + h3 = ishftc( h3, 52 ) + h0 = h0 + h3 + h1 = ieor( h1, h0 ) + h0 = ishftc( h0, 26 ) + h1 = h1 + h0 + h2 = ieor( h2, h1 ) + h1 = ishftc( h1, 51 ) + h2 = h2 + h1 + h3 = ieor( h3, h2 ) + h2 = ishftc( h2, 28 ) + h3 = h3 + h2 + h0 = ieor( h0, h3 ) + h3 = ishftc( h3, 9 ) + h0 = h0 + h3 + h1 = ieor( h1, h0 ) + h0 = ishftc( h0, 47 ) + h1 = h1 + h0 + h2 = ieor( h2, h1 ) + h1 = ishftc( h1, 54 ) + h2 = h2 + h1 + h3 = ieor( h3, h2 ) + h2 = ishftc( h2, 32 ) + h3 = h3 + h2 + h0 = ieor( h0, h3 ) + h3 = ishftc( h3, 25 ) + h0 = h0 + h3 + h1 = ieor( h1, h0 ) + h0 = ishftc( h0, 63 ) + h1 = h1 + h0 + + end subroutine short_end + + end subroutine spookyhash_short + + +! do the whole hash in one call + module subroutine spookyHash_128( key, hash_inout ) + integer(int8), intent(in), target :: key(0:) + integer(int64), intent(inout) :: hash_inout(2) + + integer(int64) :: buf(sc_numvars) + integer(int64) :: h(0:11) + integer(int64) :: bend, i, length, p8, remain, remainder, tail + integer(int8) :: buf8(8) + + length = size(key, kind=int64) + + if ( length < sc_buffsize ) then + call spookyhash_short( key, hash_inout ) + return + end if + + h( [ 0, 3, 6, 9 ] ) = hash_inout(1) + h( [ 1, 4, 7, 10 ] ) = hash_inout(2) + h( [ 2, 5, 8, 11 ] ) = sc_const + + ! Number of bytes in number of complete internal states + bend = (length/sc_blocksize)*sc_blocksize + + ! Handle all SC_BLOCKSIZE blocks of bytes + do i=0, bend-1, sc_blocksize + buf(:) = transfer( key(i:i+sc_blocksize-1), 0_int64, sc_numVars ) + call spookyhash_mix( buf, h ) + end do ! all complete internal states processed + + ! handle the last partial block of sc_blocksize bytes + remainder = ( length - bend ) ! 0 <= remainder < sc_blocksize == 96 + remain = remainder / 8 ! Number of INT64's in partial block + buf(1:remain) = transfer( key(bend:bend+remain*8-1), 0_int64, remain ) + buf(remain+1:sc_numvars) = 0_int64 + tail = remainder - 8 * remain ! Number of INT8s after INT64s + p8 = bend + remain * 8 ! # of bytes until tail start + buf8(1:tail) = key(p8:p8+tail-1) + buf8(tail+1:8) = 0_int8 + buf(remain+1) = transfer( buf8, 0_int64 ) + buf8(1:7) = 0_int8 + buf8(8) = int( remainder, kind=int8 ) ! 0 <= remainder < 96 + buf(sc_numvars) = ieor( buf(sc_numvars), transfer( buf8, 0_int64 ) ) + + ! do some final mixing + call spookyhash_end( buf, h ) + hash_inout(1:2) = h(0:1) + + end subroutine spookyHash_128 + + ! + ! This is used if the input is 96 bytes long or longer. + ! + ! The internal state is fully overwritten every 96 bytes. + ! Every input bit appears to cause at least 128 bits of entropy + ! before 96 other bytes are combined, when run forward or backward + ! For every input bit, + ! Two inputs differing in just that input bit + ! Where "differ" means xor or subtraction + ! And the base value is random + ! When run forward or backwards one Mix + ! I tried 3 pairs of each; they all differed by at least 212 bits. + ! + pure subroutine spookyhash_mix( data, s ) + integer(int64), intent(in) :: data(0:) + integer(int64), intent(inout) :: s(0:11) + + s(0) = s(0) + data(0) + s(2) = ieor( s(2), s(10) ) + s(11) = ieor( s(11), s(0) ) + s(0) = ishftc( s(0), 11 ) + s(11) = s(11) + s(1) + s(1) = s(1) + data(1) + s(3) = ieor( s(3), s(11) ) + s(0) = ieor( s(0), s(1) ) + s(1) = ishftc( s(1), 32 ) + s(0) = s(0) + s(2) + s(2) = s(2) + data(2) + s(4) = ieor( s(4), s(0) ) + s(1) = ieor( s(1), s(2) ) + s(2) = ishftc( s(2), 43 ) + s(1) = s(1) + s(3) + s(3) = s(3) + data(3) + s(5) = ieor( s(5), s(1) ) + s(2) = ieor( s(2), s(3) ) + s(3) = ishftc( s(3), 31 ) + s(2) = s(2) + s(4) + s(4) = s(4) + data(4) + s(6) = ieor( s(6), s(2) ) + s(3) = ieor( s(3), s(4) ) + s(4) = ishftc( s(4), 17 ) + s(3) = s(3) + s(5) + s(5) = s(5) + data(5) + s(7) = ieor( s(7), s(3) ) + s(4) = ieor( s(4), s(5) ) + s(5) = ishftc( s(5), 28 ) + s(4) = s(4) + s(6) + s(6) = s(6) + data(6) + s(8) = ieor( s(8), s(4) ) + s(5) = ieor( s(5), s(6) ) + s(6) = ishftc( s(6), 39 ) + s(5) = s(5) + s(7) + s(7) = s(7) + data(7) + s(9) = ieor( s(9), s(5) ) + s(6) = ieor( s(6), s(7) ) + s(7) = ishftc( s(7), 57 ) + s(6) = s(6) + s(8) + s(8) = s(8) + data(8) + s(10) = ieor( s(10), s(6) ) + s(7) = ieor( s(7), s(8) ) + s(8) = ishftc( s(8), 55 ) + s(7) = s(7) + s(9) + s(9) = s(9) + data(9) + s(11) = ieor( s(11), s(7) ) + s(8) = ieor( s(8), s(9) ) + s(9) = ishftc( s(9), 54 ) + s(8) = s(8) + s(10) + s(10) = s(10) + data(10) + s(0) = ieor( s(0), s(8) ) + s(9) = ieor( s(9), s(10) ) + s(10) = ishftc( s(10), 22 ) + s(9) = s(9) + s(11) + s(11) = s(11) + data(11) + s(1) = ieor( s(1), s(9) ) + s(10) = ieor( s(10), s(11) ) + s(11) = ishftc( s(11), 46 ) + s(10) = s(10) + s(0) + + end subroutine spookyhash_mix + + + pure subroutine spookyhash_end( data, h) + integer(int64), intent(in) :: data(0:) + integer(int64), intent(inout) :: h(0:11) + + h = h + data(0:11) + call endpartial( h ) + call endpartial( h ) + call endpartial( h ) + + contains + ! + ! Mix all 12 inputs together so that h0, h1 are a hash of them all. + ! + ! For two inputs differing in just the input bits + ! Where "differ" means xor or subtraction + ! And the base value is random, or a counting value starting at that bit + ! The final result will have each bit of h0, h1 flip + ! For every input bit, + ! with probability 50 +- .3% + ! For every pair of input bits, + ! with probability 50 +- 3% + ! + ! This does not rely on the last Mix() call having already mixed some. + ! Two iterations was almost good enough for a 64-bit result, but a + ! 128-bit result is reported, so End() does three iterations. + ! + pure subroutine endpartial( h ) + integer(int64), intent(inout) :: h(0:11) + + h(11) = h(11) + h(1) + h(2) = ieor( h(2), h(11) ) + h(1) = ishftc( h(1), 44 ) + h(0) = h(0) + h(2) + h(3) = ieor( h(3), h(0) ) + h(2) = ishftc( h(2), 15 ) + h(1) = h(1) + h(3) + h(4) = ieor( h(4), h(1) ) + h(3) = ishftc( h(3), 34 ) + h(2) = h(2) + h(4) + h(5) = ieor( h(5), h(2) ) + h(4) = ishftc( h(4), 21 ) + h(3) = h(3) + h(5) + h(6) = ieor( h(6), h(3) ) + h(5) = ishftc( h(5), 38 ) + h(4) = h(4) + h(6) + h(7) = ieor( h(7), h(4) ) + h(6) = ishftc( h(6), 33 ) + h(5) = h(5) + h(7) + h(8) = ieor( h(8), h(5) ) + h(7) = ishftc( h(7), 10 ) + h(6) = h(6) + h(8) + h(9) = ieor( h(9), h(6) ) + h(8) = ishftc( h(8), 13 ) + h(7) = h(7) + h(9) + h(10) = ieor( h(10), h(7) ) + h(9) = ishftc( h(9), 38 ) + h(8) = h(8) + h(10) + h(11) = ieor( h(11), h(8) ) + h(10) = ishftc( h(10), 53 ) + h(9) = h(9) + h(11) + h(0) = ieor( h(0), h(9) ) + h(11) = ishftc( h(11), 42 ) + h(10) = h(10) + h(0) + h(1) = ieor( h(1), h(10) ) + h(0) = ishftc( h(0), 54 ) + + end subroutine endpartial + + end subroutine spookyhash_end + + + module subroutine spookysubhash_init( self, seed ) + type(spooky_subhash), intent(out) :: self + integer(int64), intent(in) :: seed(2) + + self % state(0:1) = seed + self % length = 0 + self % remainder = 0_int8 + + end subroutine spookysubhash_init + + +! add a message fragment to the state + module subroutine spookyhash_update( spooky, key ) + type(spooky_subhash), intent(out) :: spooky + integer(int8), intent(in) :: key(0:) + + integer(int8) :: dummy(0:7) + integer(int64) :: h(0:11) + integer(int64) :: bend, & + length, & + new_length, & + p8, & + remainder + + length = size(key, kind=int64) + new_length = length + spooky % remainder + + ! Is this message fragment too short? If it is, stuff it away. + if ( new_Length < sc_buffsize ) then + remainder = spooky % remainder + spooky % data( remainder:remainder+length-1 ) = key + spooky % length = length + spooky % length + dummy = transfer( new_length, 0_int8, 8 ) + if ( little_endian ) then + spooky % remainder = transfer( [ dummy(0), 0_int8 ], 0_int16 ) + else + spooky % remainder = transfer( [ 0_int8, dummy(7) ], 0_int16 ) + end if + return + end if + + ! init the variables + if ( spooky % length < sc_buffsize ) then + h( [ 0, 3, 6, 9 ] ) = spooky % state(0) + h( [ 1, 4, 7, 10 ] ) = spooky % state(1) + h( [ 2, 5, 8, 11 ] ) = sc_const + else + h(0:11) = spooky % state(0:11) + end if + + spooky % length = length + spooky % length + + ! if we've got anything stuffed away, use it now + if ( spooky % remainder /= 0_int16 ) then + block + integer(int16) :: prefix + prefix = sc_buffsize - spooky % remainder + remainder = spooky % remainder + spooky % data(remainder:remainder+prefix-1) = key(0:prefix-1) + call spookyhash_mix( transfer(spooky % data(0:sc_blocksize-1), & + 0_int64, sc_numvars), h ) + call spookyhash_mix( & + transfer(spooky % data(sc_blocksize:2*sc_blocksize-1), & + 0_int64, sc_numvars), h ) + p8 = prefix + length = length - prefix + end block + else + p8 = 0 + end if + + ! handle all whole blocks of sc_blocksize bytes requiring aligned bytes + bend = p8 + 8*(length/sc_blocksize)*sc_numVars + remainder = length - ( bend - p8 ) + do while( p8 < bend ) + spooky % data(0:sc_blocksize-1) = key( p8:p8+sc_blocksize-1 ) + call spookyhash_mix( transfer( spooky % data(0:sc_blocksize-1), & + 0_int64, sc_numvars), h ) + p8 = p8 + sc_blocksize + end do + + ! stuff away the last few bytes + spooky % remainder = remainder + + if ( remainder > 0 ) then + spooky % data(0:remainder-1) = & + key(bend:bend+remainder-1) + end if + + ! stuff away the variables + spooky % state(0:11) = h(0:11) + + end subroutine spookyhash_update + + +! report the hash for the concatenation of all message fragments so far + module subroutine spookyhash_final(spooky, hash_code) + type(spooky_subhash), intent(inout) :: spooky + integer(int64), intent(inout) :: hash_code(2) + + integer(int64) :: h(0:11) + integer(int64) :: index, remainder + integer(int8) :: dummy(2) + + ! init the variables + if ( spooky % length < sc_buffsize ) then + hash_code = spooky % state(0:1) + call spookyhash_short( spooky % data(0:spooky % length-1), & + hash_code ) + return + end if + + remainder = spooky % remainder + + h(0:11) = spooky % state(0:11) + + if ( remainder >= sc_blocksize ) then + ! m_data can contain two blocks; handle any whole first block + call spookyhash_mix( transfer( spooky % data, 0_int64, & + 2*sc_numvars), h ) + index = sc_blocksize + remainder = remainder - sc_blocksize + else + index = 0 + end if + + ! mix in the last partial block, and the length mod sc_blocksize + spooky % data(sc_blocksize+remainder:) = 0_int8 + dummy = transfer( remainder, 0_int8, 2 ) + + if ( little_endian ) then + spooky % data(sc_blocksize-1) = dummy(1) + else + spooky % data(sc_blocksize-1) = dummy(2) + end if + + ! do some final mixing + call spookyhash_end( transfer(spooky % data, 0_int64, 2*sc_numvars), h ) + + hash_code(1:2) = h(0:1) + + end subroutine spookyhash_final + + + function rot_64_32( a, k ) + integer(int64) :: rot_64_32 + integer(int64), intent(in) :: a + integer, intent(in) :: k + + rot_64_32 = iand( ior( shiftl( a, k ), shiftr( a, 32-k ) ), two_32-1 ) + + end function rot_64_32 + + + module subroutine new_spooky_hash_seed( seed ) +! Random SEED generator for + integer(int64), intent(inout) :: seed(2) + + integer(int64) :: old_seed(2) + real(dp) :: sample(4) + integer(int32) :: part(4) + + old_seed = seed + find_seed: do + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + seed = transfer( part, seed, 2 ) + if ( seed(1) /= old_seed(1) .or. seed(2) /= old_seed(2) ) return + end do find_seed + + end subroutine new_spooky_hash_seed + + +end submodule stdlib_64_bit_spookyv2_hashes From 2b0a9ec4ba5a4e61edd5a91811e5e7050fd04c25 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 14:58:28 -0700 Subject: [PATCH 003/106] Add files to test hash function performance Brought over code from original hash_functions [ticket: X] --- src/tests/hash_functions/CMakeLists.txt | 2 + src/tests/hash_functions/Makefile.manual | 3 + .../test_32_bit_hash_performance.f90 | 190 ++++++++++++++++++ .../test_64_bit_hash_performance.f90 | 161 +++++++++++++++ 4 files changed, 356 insertions(+) create mode 100755 src/tests/hash_functions/CMakeLists.txt create mode 100755 src/tests/hash_functions/Makefile.manual create mode 100755 src/tests/hash_functions/test_32_bit_hash_performance.f90 create mode 100755 src/tests/hash_functions/test_64_bit_hash_performance.f90 diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions/CMakeLists.txt new file mode 100755 index 000000000..459719c32 --- /dev/null +++ b/src/tests/hash_functions/CMakeLists.txt @@ -0,0 +1,2 @@ +ADDTEST(32_bit_hash_performance) +ADDTEST(64_bit_hash_performance) diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual new file mode 100755 index 000000000..d3e59bd18 --- /dev/null +++ b/src/tests/hash_functions/Makefile.manual @@ -0,0 +1,3 @@ +PROGS_SRC = test_64_bit_hash_performance.f90 test_32_bit_hash_performance.f90 + +include ../Makefile.manual.test.mk diff --git a/src/tests/hash_functions/test_32_bit_hash_performance.f90 b/src/tests/hash_functions/test_32_bit_hash_performance.f90 new file mode 100755 index 000000000..acee5e36b --- /dev/null +++ b/src/tests/hash_functions/test_32_bit_hash_performance.f90 @@ -0,0 +1,190 @@ +program test_32_bit_hash_performance +!! Program to compare the relative performance of different 32 bit hash +!! functions + + use stdlib_kinds, only: & + dp, & + int8, & + int32, & + int64 + + use stdlib_32_bit_hash_functions + + implicit none + + integer, parameter :: & + block_size(8) = [ 1, 2, 4, 8, 16, 64, 256, 1024 ] + integer(int32), parameter :: huge32 = huge(0_int32) + real(dp), parameter :: hugep1 = real(huge32, dp) + 1.0_dp + integer, parameter :: rand_power = 16 + integer, parameter :: rand_size = 2**rand_power + integer, parameter :: test_size = rand_size * 4 + integer, parameter :: test_block = 2**10 + integer, parameter :: repeat = 4 + integer :: index, k + integer :: lun + real(dp) :: rand(2) + integer(int32) :: rand_object(rand_size) + integer(int8) :: test_object(test_size) + + open( newunit=lun, file="32_bit_hash_performance.txt", & + access="sequential", action="write", form="formatted", & + position="rewind" ) + + do index=1, rand_size + call random_number(rand) + if (rand(1) < 0.5_dp) then + rand_object(index) = ceiling(-rand(2)*hugep1, int32) - 1 + else + rand_object(index) = floor(rand(2)*hugep1, int32) + end if + end do + + test_object(:) = transfer( rand_object, 0_int8, test_size ) + + write(lun, '("| Algorithm | Key Size | Key # | Time (s) |")') + write(lun, '("| | Bytes | | |")') + write(lun, '("|------------|-----------|------------|----------|")') + + call test_fnv_1() + + call test_fnv_1a() + + call test_nmhash32() + + call test_nmhash32x() + + call test_water() + +contains + + subroutine test_fnv_1() + integer :: index2 + integer(int_hash) :: hash + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1 + + subroutine test_fnv_1a() + integer :: index2 + integer(int_hash) :: hash + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1a_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1a', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1a + + subroutine test_nmhash32() + integer :: index2 + integer(int_hash) :: hash + integer(int32) :: seed = 0_int32 + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + call new_nmhash32_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = nmhash32( test_object( index2: & + index2+block_size(k)-1 ),& + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'nmhash32', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_nmhash32 + + subroutine test_nmhash32x() + integer :: index2 + integer(int_hash) :: hash + integer(int32) :: seed = 0_int32 + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + call new_nmhash32x_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = nmhash32x( test_object( index2: & + index2+block_size(k)-1 ),& + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'nmhash32x', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_nmhash32x + + subroutine test_water() + integer :: index2 + integer(int_hash) :: hash + integer(int64) :: seed = 0_int64 + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + call new_water_hash_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = water_hash( test_object( index2: & + index2+block_size(k)-1 ),& + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'water', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_water + +end program test_32_bit_hash_performance diff --git a/src/tests/hash_functions/test_64_bit_hash_performance.f90 b/src/tests/hash_functions/test_64_bit_hash_performance.f90 new file mode 100755 index 000000000..6c445f781 --- /dev/null +++ b/src/tests/hash_functions/test_64_bit_hash_performance.f90 @@ -0,0 +1,161 @@ +program test_64_bit_hash_performance +!! Program to compare the relative performance of different 64 bit hash +!! functions + + use stdlib_kinds, only: & + dp, & + int8, & + int32, & + int64 + + use stdlib_64_bit_hash_functions + + implicit none + + integer, parameter :: & + block_size(8) = [ 1, 2, 4, 8, 16, 64, 256, 1024 ] + integer(int32), parameter :: huge32 = huge(0_int32) + real(dp), parameter :: hugep1 = real(huge32, dp) + 1.0_dp + integer, parameter :: rand_power = 16 + integer, parameter :: rand_size = 2**rand_power + integer, parameter :: test_size = rand_size * 4 + integer, parameter :: repeat = 4 + integer :: index, k + integer :: lun + real(dp) :: rand(2) + integer(int32) :: rand_object(rand_size) + integer(int8) :: test_object(test_size) + + + open( newunit=lun, file="64_bit_hash_performance.txt", & + access="sequential", action="write", form="formatted", & + position="rewind" ) + + do index=1, rand_size + call random_number(rand) + if (rand(1) < 0.5_dp) then + rand_object(index) = ceiling(-rand(2)*hugep1, int32) - 1 + else + rand_object(index) = floor(rand(2)*hugep1, int32) + end if + end do + + test_object(:) = transfer( rand_object, 0_int8, test_size ) + + write(lun, '("| Algorithm | Key Size | Key # | Time (s) |")') + write(lun, '("| | Bytes | | |")') + write(lun, '("|------------|-----------|------------|----------|")') + + call test_fnv_1() + + call test_fnv_1a() + + call test_pengy() + + call test_spooky() + +contains + + subroutine test_fnv_1() + integer :: index2 + integer(int64) :: hash + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1 + + subroutine test_fnv_1a() + integer :: index2 + integer(int64) :: hash + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1a_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1a', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1a + + subroutine test_spooky() + integer :: index2 + integer(int64) :: hash(2) + integer(int64) :: seed(2) = [ 0_int64, 0_int64 ] + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + call new_spooky_hash_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = spooky_hash( test_object( index2: & + index2+block_size(k)-1 ), & + seed ) + if (index2 == index) summary(index) = hash(1) + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'Spooky', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_spooky + + subroutine test_pengy() + integer :: index2 + integer(int64) :: hash + integer(int32) :: seed = int( z'DEADBEEF', int32 ) + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + call new_pengy_hash_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = pengy_hash( test_object( index2: & + index2+block_size(k)-1 ), & + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'Pengy', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_pengy + +end program test_64_bit_hash_performance From b00421598fafbbcae670c58696b1503c960fd37c Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 16:10:28 -0700 Subject: [PATCH 004/106] Modified to have no-range-check Modified so it doesn't check overflow of integers. [ticket: X] --- Makefile.manual | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.manual b/Makefile.manual index b7af735b7..54dc3b89f 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,7 +1,7 @@ # Fortran stdlib Makefile FC ?= gfortran -FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all +FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check FYPPFLAGS ?= export FC From 98e22dd13ec29a779290ffb086f7a4eb698e5e15 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 16:14:17 -0700 Subject: [PATCH 005/106] Modified to have no-range-checks Added the gcc compiler flag no-range-checks [ticket: X] --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 27c5058c4..bcba35bbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,9 +21,11 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) endif() add_compile_options(-fimplicit-none) add_compile_options(-ffree-line-length-132) + add_compile_options(-fno-range-check) add_compile_options(-Wall) add_compile_options(-Wextra) add_compile_options(-Wimplicit-procedure) + add_compile_options(-Wconversion-extra) # -pedantic-errors triggers a false positive for optional arguments of elemental functions, # see test_optval and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95446 if(CMAKE_Fortran_COMPILER_VERSION VERSION_LESS 11.0) From d77867134cb637707cb8f1f83fe2f30a8a49230a Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 16:22:46 -0700 Subject: [PATCH 006/106] Added hash modules to the compile list Added files with hash function modules to the list of fypp files to be processed. [ticket: X] --- src/CMakeLists.txt | 8 ++++++++ src/Makefile.manual | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bcb4931b3..1837e6b9d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,6 +2,14 @@ # Create a list of the files to be preprocessed set(fppFiles + stdlib_32_bit_fnv_hashes.fypp + stdlib_32_bit_hash_functions.fypp + stdlib_32_bit_nmhashes.fypp + stdlib_32_bit_water_hashes.fypp + stdlib_64_bit_fnv_hashes.fypp + stdlib_64_bit_hash_functions.fypp + stdlib_64_bit_pengy_hashes.fypp + stdlib_64_bit_spookyv2_hashes.fypp stdlib_ascii.fypp stdlib_bitsets.fypp stdlib_bitsets_64.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index f573fa6cf..cef6c4f5a 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -1,4 +1,12 @@ SRCFYPP = \ + stdlib_32_bit_fnv_hashes.fypp \ + stdlib_32_bit_hash_functions.fypp \ + stdlib_32_bit_nmhashes.fypp \ + stdlib_32_bit_water_hashes.fypp \ + stdlib_64_bit_fnv_hashes.fypp \ + stdlib_64_bit_hash_functions.fypp \ + stdlib_64_bit_pengy_hashes.fypp \ + stdlib_64_bit_spookyv2_hashes.fypp \ stdlib_ascii.fypp \ stdlib_bitsets_64.fypp \ stdlib_bitsets_large.fypp \ @@ -74,6 +82,22 @@ $(SRCGEN): %.f90: %.fypp common.fypp # Fortran module dependencies f18estop.o: stdlib_error.o +stdlib_32_bit_fnv_hashes.o: \ + stdlib_32_bit_hash_functions.o +stdlib_32_bit_hash_functions.o: \ + stdlib_kinds.o +stdlib_32_bit_nmhashes.o: \ + stdlib_32_bit_hash_functions.o +stdlib_32_bit_water_hashes.o: \ + stdlib_32_bit_hash_functions.o +stdlib_64_bit_fnv_hashes.o: \ + stdlib_64_bit_hash_functions.o +stdlib_64_bit_hash_functions.o: \ + stdlib_kinds.o +stdlib_64_bit_pengy_hashes.o: \ + stdlib_64_bit_hash_functions.o +stdlib_64_bit_spookyv2_hashes.o: \ + stdlib_64_bit_hash_functions.o stdlib_ascii.o: stdlib_kinds.o stdlib_bitsets.o: stdlib_kinds.o stdlib_bitsets_64.o: stdlib_bitsets.o From 461543a37f102fa8406571b6b5e2455f8cd81ecd Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 16:31:16 -0700 Subject: [PATCH 007/106] Added hash_functions to the test directories Added hash_functions to the test directories [ticket: X] --- src/tests/CMakeLists.txt | 1 + src/tests/Makefile.manual | 1 + 2 files changed, 2 insertions(+) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 290980533..7279538a2 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -17,6 +17,7 @@ list( add_subdirectory(ascii) add_subdirectory(bitsets) +add_subdirectory(hash_functions) add_subdirectory(io) add_subdirectory(linalg) add_subdirectory(logger) diff --git a/src/tests/Makefile.manual b/src/tests/Makefile.manual index d3594201e..6f632ae65 100644 --- a/src/tests/Makefile.manual +++ b/src/tests/Makefile.manual @@ -14,6 +14,7 @@ testdrive.F90: all test clean:: $(MAKE) -f Makefile.manual --directory=ascii $@ $(MAKE) -f Makefile.manual --directory=bitsets $@ + $(MAKE) -f Makefile.manual --directory=hash_functions $@ $(MAKE) -f Makefile.manual --directory=io $@ $(MAKE) -f Makefile.manual --directory=logger $@ $(MAKE) -f Makefile.manual --directory=optval $@ From 29a5297fc14276a67dc1562841419c5f9ada4502 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 16:36:39 -0700 Subject: [PATCH 008/106] Added stdlib_hash_functions.md Added documentation for the hash functions. [ticket: X] --- doc/specs/stdlib_hash_functions.md | 1697 ++++++++++++++++++++++++++++ 1 file changed, 1697 insertions(+) create mode 100755 doc/specs/stdlib_hash_functions.md diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md new file mode 100755 index 000000000..e0e1e08b5 --- /dev/null +++ b/doc/specs/stdlib_hash_functions.md @@ -0,0 +1,1697 @@ +--- +title: Hash codes +--- + +# The `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions` modules + +(TOC) + +## Overview of hash functions + +The comparison of lexical entities or other objects for equality +can be computationally expensive. +This cost is often reduced by computing a near unique integer value, +termed a hash code, from the structure of the object using a procedure +termed a hash function. +Equality of hash codes is a necessary, but not sufficient, condition +for the original objects to be equal. +As integer comparisons are very efficient, performing an initial +comparison of hash codes and then performing a detailed comparison +only if the hash codes are equal can improve performance. +The hash codes, in turn, can be mapped to a smaller set of integers, +that can be used as an index, termed a hash index, to a rank one +array, often termed a hash table. +This mapping will be known as a scalar hash. +The use of a hash table reduces the number of hash codes that need to +be compared, further improving performance. +A hash function can also be used to generate a checksum to verify that +data has not changed. +The Fortran Standard Library therefore provides procedures to compute +hash codes and scalar hashes, and derived types implementing hash +tables. +This document only discusses the hash codes and scalar hashes in the +library. + +## Licensing + +The Fortran Standard Library is distributed under the MIT License. +However components of the library may be based on code with additional +licensing restrictions. In particular, the hash codes are often based +on algorithms with additional restrictions on distribution. +The algorithms with such restrictions (`Fibonacci Hash`, `Universal +Multiplicative Hash`, +`FNV-1 Hash`, `FNV-1A Hash`, `nmhash32`, `nmhash32x`, `waterhash`, +`pengyhash` and `SpookyHash`) are discussed below. + +`FIBONACCI_HASH` is a scalar hash. It is an implementation in Fortran +2008 and signed two's complement integers of the Fibonacci Hash +described in D. E. Knuth, "The Art of +Computer Programming, Second Edition, Volume 3, Sorting and +Searching", Addison-Wesley, Upper Saddle River, NJ, +pp. 517-518, 1998. The algorithms in that source are considered public +domain. + +`UNIVERSAL_MULT_HASH` is a scalar hash. It is an implementation in +Fortran 2008 and signed two's complement integers of the +universal multiplicative hash algorithm of M. Dietzfelbinger, +T. Hagerup, J. Katajainen, and M. Penttonen, "A Reliable Randomized +Algorithm for the Closest-Pair Problem," J. Algorithms, Vol. 25, +No. 1, Oct. 1997, pp. 19-51. Because of its publication in the Journal +of Algorithms, the universal multiplicative hash algorithm is public +domain. + +`FNV_1_HASH` and `FNV_1A_HASH` are translations to Fortran 2008 and +signed two's complement integers of the +`FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, +and Phong Vo, that has been released into the public +domain. Permission has been granted, by Landon Curt Noll, for the use +of these algorithms in the Fortran Standard Library. A description of +these functions is available at +. +These functions have been modified from their normal forms to also +encode the structure size in the output hash. + +Similarly `SPOOKY_HASH` and associated procedures are translations to +Fortran 2008 and signed two's complement integers of the unsigned 64 +bit version 2 `SpookyHash` functions of Bob +Jenkins to signed 64 +bit operations. Version 2 was chosen over version 1 as it has better +performance and fewer bad seeds +Bob Jenkins has also put this code in the public +domain and has given permission to treat this code as public domain in +the USA, provided the code can be used under other licenses and he is +given appropriate credit. + +`NMHASH32` and `NMHASH32x` are translations to Fortran 2008 and signed +two's complement integers of the unsigned 32 bit +hashes of James Z. M. Gao's `nmhash32` and `nmhash32x` version of 0.2, + +James Z. M. Gao has released his code under the BSD 2 Clause +License. The BSD 2-Clause license is as follows: + + BSD 2-Clause License + + Copyright (c) 2021, James Z.M. Gao + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +`WATER_HASH` is a translation to Fortran 2008 and signed two's +complement integers of the `waterhash` algorithm +of Tommy Ettinger. This algorithm is inspired by the Wy Hash of +Wang Yi. Tommy Ettinger's original C++ code, `waterhash.h`, +is available at URL: under +the `unlicense`, +. +The `unlicense` reads as follows: + + This is free and unencumbered software released into the public domain. + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to + +`PENGY_HASH` is a translation to Fortran 2008 and signed two's +complement arithmetic of the `pengyhash` algorithm of Alberto Fajardo, +copyright 2020. Alberto Fajardo's original C code, `pengyhash.c`, is +available at the URL: +https://github.com/tinypeng/pengyhash/blob/master/pengyhash.c +under the BSD 2-Clause License: +https://github.com/tinypeng/pengyhash/blob/master/LICENSE + +The BSD 2-Clause license is as follows: + + BSD 2-Clause License + + pengyhash + Copyright (c) 2020 Alberto Fajardo + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +## The hash codes modules + +### Overview of the modules + +The Standard Library provides two modules implementing hash +functions and scalar hashes. +The `stdlib_32_bit_hash_functions` module provides procedures to +compute 32 bit integer hash codes and a scalar hash. +The hash codes are useful for tables of up to `2**15` entries, and +for keys with a few hundred elements. +The `stdlib_64_bit_hash_functions` module provides hash procedures to +compute 64 bit integer hash codes and a scalar hash. +The hash codes are useful for tables of up to `2**30` entries, and +for keys with a few thousand elements. +While one of the codes in `stdlib_64_bit_hash_functions`, +`SPSOOKY_HASH`, can also be used to calculate 128 bit hash codes, none +of the current codes can be used to calculate 256 bit hash codes. +Such larger hash codes are useful for larger hash tables and keys, and +for checksums. +Such larger keys and tables are little used, if used at all, in +current +Fortran codes, but the larger hash codes may be added to the library +if there is a demand for them. + +Hash functions are often divided into two categories +"cryptographic" and "non-cryptographic". +Cryptographic hash functions produce codes that are infeasible to +reverse without additional information beyond the identity of +the hash function used to generate the code and the resulting codes. +Non-cryptographic codes, in some circumstances, are believed to be +reversible. +The modules only implement hash +functions that are believed to be non-cryptographic, with +implementations available in the public domain. + +There are a number of algorithms available for the computation of +non-cryptographic 32 and 64 bit hash codes that differ in their +computational complexity, +their relative performance on different size keys, and the +expected uniqueness (randomness) of the resulting hash codes. +Their relative performance in the analysis of text, in particular, +can depend on the processor, character set, language, and content. +The quality of a hash function is often evaluated using +the SMHasher test suite, originally written by +[Austin Appleby](https://github.com/aappleby/smhasher), but greatly +extended by [Reini Urban](https://github.com/rurban/smhasher). +All except the simplest, `FNV_1` and `FNV_1A`, of the hash functions +defined in the modules perform well on the tests in Reini Urban's +version of SMHasher. + +There are two problems in implementing hash functions in Fortran. +First, the static typing of Fortran makes it awkward to define general +purpose hash functions. +Instead hash functions are defined for some of the more common objects +that are sufficiently complicated that a direct comparison is costly +and common enough that a general procedure is useful: +character strings and rank one arrays of integers. +Other objects can, in principle, be hashed by using `transfer` to +map their contents to an integer array, typically one of kind `INT8`. +The other problem is that hash codes are typically defined using +modular unsigned integer arithmetic. +As such integers are not part of the current Fortran standard, +workarounds have to be used. +These can take two forms. +In one, the operations are emulated by using an integer of a +larger size, or, for the larger integers, by dividing the integer into +two lower and higher order halves, +and performing the operations on each half separately using +the larger integers. +In the other, the unsigned integers may be replaced directly by +the corresponding signed integers, but +otherwise not modifying the the code logic. +The first should be standard conforming on current processors, but +is more computationally intensive unless the processors recognize +underlying idioms that are rarely used in Fortran codes. The second is +not standard conforming as bit operations involving the sign are +undefined, +but should yield equivalent results with fewer operations on +processors with two's complement integers that do not trap on over +or under flow. The codes currently use the second method. + +In order to compile the hash function modules, the processors must +implement much of Fortran 2003, and selected components of Fortran +2008: submodules, 64 bit integers, and some bit intrinsics. +The main limitation on valid processors is whether they +implement the submodules enhancement of Fortran 2008. +In order to properly run the hash functions, the compilers must +use two's complement integers, and be able to execute them with +wraparound semantics and no integer overflow exceptions. +Current Fortran 2003+ processors solely use two's complement +integers, and appear to be able to turn off overflow detection, +so the modules use signed integer arithmetic. For that reason +trapping on signed arithmetic must be disabled. The command line +flags to disable overflow detection for processors implementing +submodules are summarized in the table below. +Note that FLANG, gfortran, ifort, and NAG all default to +integer overflow wrapping. + +|Processor|Legal flag|Illegal flag|Default| +|---------|----------|------------|-------| +| ARM Fortran | NA? | NA? | overflow wrapping? | +| Cray Fortran | NA? | NA? | overflow wrapping? | +| FLANG/PGI | -fwrapv | -ftrapv | -fwrapv | +| gfortran | -fwrapv | -ftrapv | -fwrapv | +| IBM Fortran | NA? | NA? | overflow wrapping? | +| ifort| NA? | NA? | overflow wrapping | +| NAG Fortran | -C=none | -C=intovf | -C=none | +| NEC Fortran | NA? | NA? | overflow wrapping? | +| NVIDIA Fortran | NA? | NA? | overflow wrapping? | + +All of the modules' hash functions take one or two arguments. +All of them have as their first argument the object to be hashed, +termed a *key*. +Most have a second argument, termed a *seed*, that sets the initial +value of the hash code changing the hash function behavior. +In particular, inputs that hash to the same hash index with a given +seed, will often hash to different indexes with a different seed. +This difference in behavior makes algorithms that use a seed much +more resistant to denial of service attacks that use the properties +of a known hash to increase the number of hash table collisions. +This additional integer must be kept the same for all hashes +in a given hash table, but can be changed and the objects rehashed +if collisions are unusually common. +The *seed* can be either a scalar or a two element array. +Some of the hash functions have alternatives that allow incremental +hashing. + +|Algorithm|Seed|Result| +|---------|----|------| +|FNV-1|None|32 or 64 bit integer| +|FNV-1a|None|32 or 64 bit integer| +|nmhash32 |32 bit scalar integer|32 bit integer| +|nmhash32x |32 bit scalar integer|32 bit integer| +|pengyhash |32 bit scalar integer|64 bit integer| +|Spooky Hash|64 bit two element vector|64 bit two element vector| +|waterhash|64 bit scalar integer|32 bit integer| + +The hash function modules each provide at least five algorithms for +hash functions: two optimized for small (< 32 `INT8` integer elements) +keys, and three optimized for large (> 100 `INT8` integer elements) +keys. +The core implementation for each algorithm is for keys that are +vectors of `INT8` integers. +These core implementations are then used in wrappers for keys +that are vectors of `INT16`, `INT32` and `INT64` integers, or default +character strings, in the expectation that inlining will eliminate the +overhead of transferring the other keys to `INT8` integer vectors. + +The `stdlib_32_bit_hash_functions` module provides +implementations of five hash code algorithms: +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +Landon Curt Noll, and Kiem-Phong Vo; +the *nmhash32* and *nmhash32x* of James Z. M. Gao; +and the *waterhash* of Tommy Ettinger. +The detailed implementation of each algorithm is handled in a separate +submodule: `stdlib_32_bit_fnv_hashes`, +`stdlib_32_bit_nmhashes`, and `stdlib_32_bit_water_hashes`, +respectively. The `nmhash32`, `nmhash32x`, and `waterhash` algorithms +require seeds. The submodules provide separate seed generators +for each algorithm. +The module itself +implements two scalar hash functions, `FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`. +It also implements the subroutine, `ODD_RANDOM_INTEGER`, for +generating seeds for `UNIVERSAL_MULT_HASH`. +All assume a two's complement sign bit, and no out of +range checks. + +The `stdlib_64_bit_hash_functions` module also provides +implementations of four hash code algorithms: +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +Landon Curt Noll, and Kiem-Phong Vo; +the *pengynash* of Alberto Fajardo; +and the *SpookyHash* of Bob Jenkins. +The detailed implementation of each algorithm is handled in a separate +submodule: `stdlib_64_bit_fnv_hashes`, +`stdlib_64_bit_pengy_hashes`, and `stdlib_64_bit_spooky_hashes`, +respectively. +The `pengyhash`, and `Spooky Hash` algorithms +require seeds. The submodules provide separate seed generators +for each algorithm. +The module itself implements two scalar hash functions, +`FIBONACCI_HASH` and `UNIVERSAL_MULT_HASH`. +It also implements the subroutine, `ODD_RANDOM_INTEGER`, for +generating seeds for `UNIVERSAL_MULT_HASH`. +All assume a two's complement sign bit, and no out of +range checks. + +The `stdlib_32_bit_fnv_hashes` and `stdlib_64_bits_fnv_hashes` +submodules each provide implementations of ths FNV-1 and FNV-1A +algorithms in the form of two separate overloaded functions: `FNV_1` +and `FNV_1A`. +The FNV-1 and FNV-2 algorithms differ in their order of the +multiplication and exclusive or operations. +They differ from their normal implementation in that they also +encode the structure size in the hash code. +The 32 and 64 bit algorithms differ in their initial offsets and in +their multiplicative constants. +Analysis suggests that `FNV_1A` should be better at randomizing the +input, but tests with hash tables show negligible difference. +These algorithms have the reputation of being particularly useful for +small byte strings, i.e, strings of less than 32 bytes. +While they do not at all perform well on the SMHasher test suite, +usage indicates that that that this has little impact on the +performance of small hash tables, and the small size of the functions +allows their quick loading and retainment in the instruction cache, +givng a performance boost where the hashing is intermittent. +(See the +[SMHasher discussion](https://github.com/rurban/smhasher/README.md) +and S. Richter, V. Alvarez, and J. Dittrich, +["A Seven-Dimensional Analysis of Hashing Methods and its Implications on Query Processing"](https://bigdata.uni-saarland.de/publications/p249-richter.pdf). + +The `stdlib_32_bit_nmhashes` submodule provides implementations +of James Z.M. Gao's `nmhash32` and `nmhash32x` algorithms, +version 0.2, +in the form of the overloaded functions, `NMHASH32` and `NMHASH32X`. +The implementations are based on the scalar versions of Gao's +algorithms and not the vector versions that require access to +the vector instructions of some processors. +Both algorithms perform well on the SMHasher tests, and have no known +bad seeds. The vector versions of both codes perform well on large +keys, with the `nmhash32x` faster on short keys. To provide randomly +generated seeds for the two functions the submodule also defines the +subroutines `NEW_NMHASH32_SEED` and `NEW_NMHASH32X_SEED`. Gao claims +that `NMHASH32X` is significantly faster than `NMHASH32` on short +seeds, but slower on long seeds, but our limited testing so far shows +`NMHASH32X` to be significantly faster on short seeds and slightly +faster on long seeds. + +The `stdlib_32_bit_water_hashes` submodule provides implementations +of Tommy Ettinger's `waterhash` algorithm in the form of the overloaded +function, `WATER_HASH`. Water Hash has not been tested by Reini Urban, +but Tommy Ettinger has tested it with Urban's SMHasher and presents +results that shows Water Hash passing all the tests. So far his +testing hasn't found any bad seeds for the algorithm. To provide +randomly generated seeds for the hash function the submodule also +defines the subroutine `NEW_WATER_HASH_SEED`. + +The `stdlib_64_bit_pengy_hashes` submodule provides implementations of +Alberto Fajardo's `pengyhash` in the form of the overloaded function, +`PENGY_HASH`. Reini Urban's testing shows that PengyHash passes all +the tests and has no bad seeds. To provide randomly generated seeds +for the hash function the submodule also defines the subroutine +`NEW_PENGY_HASH_SEED`. + +The `stdlib_64_bit_spooky_hashes` submodule provides implementations +of Bob Jenkins' SpookyHash in the form of the overloaded function, +`SPOOKY_HASH`. Future implementations may provide the SpookyHash +incremental hashing procedures. +SpookyHash is optimized for large objects and should give excellent +performance for objects greater than about 96 byes, but has +significant overhead for smaller objects. +The code was designed for Little Endian processors, and will give +different results on Big Endian processors, but the hash quality on +those processors is probably just as good. +SpookyHash version 2 passes all of Reini Urban's SMHasher tests, and +has one bad seed only when reduced to a 32 bit output. +Its only potential problem is undefined behavior if the key is +misaligned. + +## The `stdlib_32_bit_hash_codes` module + +### Overview of the module + +Thirty two bit hash functions are primarily useful for generating hash +codes for hash tables. +Checksums generally benefit from having a larger number of bits. +The `stdlib_32_bit_hash_codes` module defines five public overloaded +32 bit hash code functions, `FNV_1`, `FNV-1A`, `NMHASH32`, `NMHASH32x` +and `WATER_HASH`, two scalar hash functions, `FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`, four seed generators, `ODD_RANDOM_INTEGER` for +`UNIVERSAL_MULT_HASH`, and `NEW_NMHASH32_SEED`, `NEW_NMHASH32X_SEED`, +and `NEW_WATER_HASH_SEED`, for their respective hash code +functions. It also defines the integer kind constant, `INT_HASH`, and +a logical constant, `LITTLE_ENDIAN`, used to deal with one aspect of +the machine dependence of the hash codes. + +### The `INT_HASH` parameter + +It is necessary to define the kind of integer used to return the hash +code. +As `stdlib_32_bit_hash_codes` deals exclusively with 32 bit hash codes, +`INT_HASH` is an alias for the integer kind `INT32`. + +### The `LITTLE_ENDIAN` parameter + +In implementing hash functions it is sometimes necessary to know the +"endianess" of the processor's integers. To this end the +`stdlib_32_bit_hash_codes` module defines the logical parameter +`LITTLE_ENDIAN` that, if true, indicates that the processor has little +endian integers, and that if false indicates that the integers are big +endian. + +### Specifications of the `stdlib_32_bit_hash_codes` procedures + +#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 32 bit integer. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fibonacci_hash]]( key, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +32`. It is an `intent(in)` argument. + +##### Result + +The result is an integer of kind `INT32` with at most the lowest +`nbits` nonzero. + +##### Note + +`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`2**32/phi`, where `phi` is the golden ratio 1.618..., and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_fibonacci_hash + use stdlib_32_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = int(Z'1FFFFFF', int32) + hash = fibonacci_hash(source, 6) + azray1(hash) = source + print *, hash + end program demo_fibonacci_hash +``` + +#### `FNV_1_HASH`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fnv_1_hash]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1_HASH` is an implementation of the original FNV-1 hash code of Glenn +Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + + +##### Example + +```fortran + program demo_fnv_1_hash + use stdlib_32_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1_hash(array1) + print *, hash + end program demo_fnv_1_hash +``` + + +#### `FNV_1A_HASH`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fnv_1a_hash]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1A_HASH` is an implementation of the alternative FNV-1a hash code of +Glenn Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + +##### Example + +```fortran + program demo_fnv_1a_hash + use stdlib_32_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1a_hash(array1) + print *, hash + end program demo_fnv_1a_hash +``` + + +#### `NEW_NMHASH32_SEED`- returns a valid input seed for `NMHASH32` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `NMHASH32` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_nmhash32_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `NMHASH32`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `NMHASH32`. + + +#### `NEW_NMHASH32X_SEED`- returns a valid input seed for `NMHASH32X` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `NMHASH32X` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_nmhash32x_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `NMHASH32X`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `NMHASH32X`. + + +#### `NEW_WATER_HASH_SEED`- returns a valid input seed for `WATER_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit "random" integer that is believed to be a valid +seed for `WATER_HASH` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_water_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT64`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `WATER_HASH`, but if any +are identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + + +##### Example + +See the example for `WATER_HASH`. + + +#### `NMHASH32`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:nmhash32]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT32`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`NMHASH32` is an implementation of the `nmhash32` hash code of +James Z. M. Gao. +This code has good, but not great, performance on long keys, poorer +performance on short keys. +As a result it should give fair performance for typical hash table +applications. +This code passes the SMHasher tests, and has no known bad seeds: + +##### Example + +```fortran + program demo_nmhash32 + use stdlib_32_bit_hash_codes, only: nmhash32, & + new_nmhash32_seed + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int32) :: seed = int(Z'11111111`, int32) + call new_nmhash32_seed(seed) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = nmhash32(array1, seed) + print *, seed, hash + end program demo_nmhash32 +``` + + +#### `NMHASH32X`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:nmhash32x]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT32`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`NMHASH32X` is an implementation of the `nmhash32x` hash code of +James Z. M. Gao. +This code has good, but not great, performance on long keys, poorer +performance on short keys. +As a result it should give fair performance for typical hash table +applications. +This code passes the SMHasher tests, and has no known bad seeds: + +##### Example + +```fortran + program demo_nmhash32x + use stdlib_32_bit_hash_codes, only: nmhash32x, & + new_nmhash32x_seed + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int32) :: seed = int(Z'11111111`, int32) + call new_nmhash32x_seed(seed) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = nmhash32x(array1, seed) + print *, seed, hash + end program demo_nmhash32x +``` + +#### `ODD_RANDOM_INTEGER` - returns an odd integer + +##### Status + +Experimental + +##### Description + +Returns a random 32 bit integer distributed uniformly over the odd values. + +##### Syntax + +`call [[stdlib_32_bit_hash_codes:odd_random_integer]]( harvest )` + +##### Class + +Subroutine + +##### Argument + +`harvest`: Shall be a scalar integer variable of kind `INT32`. It is +an `intent(out)` argument. + +##### Note + +`ODD_RANDOM_INTEGER` is intended to generate seeds for + `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See `UNIVERSAL_MULT_HASH`. + + +#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 32 bit integer. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. + +`seed`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. It must have an odd value. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +32`. It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32` with at most the lowest +`nbits` nonzero. + +##### Note + +`UNIVERSAL_MULT_HASH` is an implementation of the Universal +Multiplicative Hash of M. Dietzfelbinger, et al. +It multiplies the `KEY` by `SEED`, and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + do i = 0, 2**6-1 + array(i) = i + end do + call odd_random_integer( seed ) + source = int(Z'1FFFFFF', int32) + hash = universal_mult_hash(source, seed, 6) + azray1(hash) = source + print *, seed, hash, array1 + end program demo_odd_random_integer +``` + +#### `WATER_HASH`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:water_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`WATER_HASH` is an implementation of the `waterhash` hash code of +Tommy Ettinger. +This code has excellent performance on long keys, and good performance +on short keys. +As a result it should give reasonable performance for typical hash +table applications. +This code passes the SMHasher tests. +The `waterhash` is based on the `wyhash` of Wang Yi. +While `wyhash` has a number of bad seeds, depending on the version, +so far testing has not found any bad seeds for `waterhash`. +It can have undefined behavior if the key is not word aligned. + +##### Example + +```fortran + program demo_water_hash + use stdlib_32_bit_hash_codes, only: water_hash, & + new_water_hash_seed + use iso_fortran_env, only: int32, int64 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int64) :: seed = int(Z'11111111`, int64) + call new_water_hash_seed( seed ) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = water_hash(array1, seed) + print *, hash, seed + end program demo_water_hash +``` + +## The `stdlib_64_bit_hash_codes` module + +### Overview of the module + +Sixty four bit hash functions are generally overkill for hash table +applications, and are primarily useful for check sums and related +applications. +As checksums often have to deal with extremely large files or +directories, it is often useful to use incremental hashing as well as +direct hashing, so 64 bit and higher hash algorithms often provide +multiple implementations. The current module, for simplicity of API, +doesn't provide any incremental hashes. +The `stdlib_64_bit_hash_codes` module defines several public +overloaded 64 bit hash procedures, `FNV_1`, `FNV-1A`, +`PENGY_HASH`, and `SPOOKY_HASH`, two scalar hash functions, +`FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`, a seed generator, `ODD_RANDOM_INTEGER`, for the +`UNIVERSAL_MULT_HASH`, and two seed generators, `NEW_PENGY_HASH_SEED` +and `NEW_SPOOKY_HASH_SEED` for their respective hash functions. It +also defines the integer kind constant, `INT_HASH`, used to specify +the kind of the hash function results, and a logical constant, +`LITTLE_ENDIAN`, used to deal with one aspect of the machine +dependence of the hash codes. +Note that while SpookyHash can be used as a sixty four bit hash +algorithm, its algorithms actually returns two element integer arrays +of kind `INT64`, so it can also be used as a 128 bit hash. + +### The `INT_HASH` parameters + +It is necessary to define the kind of integer used to return the hash +code. +As `stdlib_64_bit_hash_codes` deals exclusively with 64 bit hash codes, +`INT_HASH` is an alias for the integer kind `INT64`. + +### The `LITTLE_ENDIAN` parameter + +In implementing hash functions it is sometimes necessary to know the +"endianess" of the processor's integers. To this end the +`stdlib_64_bit_hash_codes` module defines the logical parameter +`LITTLE_ENDIAN` that if true indicates that the processor has little +endian integers, and that if false indicates that the integers are big +endian. + + +### Specifications of the `stdlib_64_bit_hash_codes` procedures + +#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 64 bit integer. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fibonacci_hash]]( key, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT64`. It is an +`intent(in)` argument. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +64`. It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT64` with at most the lowest +`nbits` nonzero. + +##### Note + +`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`2**64/phi`, where `phi` is the golden ratio 1.618..., and returns the +`nbits` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_fibonacci_hash + use stdlib_64_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = int(Z'1FFFFFFFF', int64) + hash = fibonacci_hash(source, 6) + azray1(hash) = source + print *, hash + end program demo_fibonacci_hash +``` + +#### `FNV_1`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fnv_1]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT64`. + +##### Note + +`FNV_1` is an implementation of the original FNV-1 hash code of Glenn +Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications, although it is rare for them to need 64 bits. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + + +##### Example + +```fortran + program demo_fnv_1_hash + use stdlib_64_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1_hash(array1) + print *, hash + end program demo_fnv_1_hash +``` + + +#### `FNV_1A`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fnv_1a]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1A` is an implementation of the alternative FNV-1a hash code of +Glenn Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + +##### Example + +```fortran + program demo_fnv_1a_hash + use stdlib_64_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1a_hash(array1) + print *, hash + end program demo_fnv_1a_hash +``` + + +#### `NEW_PENGY_HASH_SEED`- returns a valid input seed for `PENGY_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `PENGY_HASH` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_pengy_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `PENGY_HASH`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `PENGY_HASH`. + + +#### `NEW_SPOOKY_HASH_SEED`- returns a valid input seed for `SPOOKY_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit two element vector of "random" integer values that +is believed to be a valid seed for `SPOOKY_HASH` and is also different +from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_spooky_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined two element integer vector variable of kind +`INT32`. It is an `intent(inout)` argument. On input `seed` should be +defined, and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `SPOOKY_HASH`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `SPOOKY_HASH`. + + +#### `ODD_RANDOM_INTEGER` - returns odd integer + +##### Status + +Experimental + +##### Description + +Returns a random 64 bit integer distributed uniformly over the odd values. + +##### Syntax + +`call [[stdlib_64_bit_hash_codes:odd_random_integer]]( harvest )` + +##### Class + +Subroutine + +##### Argument + +`harvest`: Shall be an integer of kind `INT64`. It is an `intent(out)` +argument. + +##### Note + +`ODD_RANDOM_INTEGER` is intended to generate seeds for + `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See `UNIVERSAL_MULT_HASH`. + + +#### `PENGY_HASH` - maps a character string or integer vector to an integer + +##### Status + +Experimental + +##### Description + +Maps a character string or integer vector to a 64 bit integer whose +value also depends on a scalar 32 bit integer, `seed`. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:pengy_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: shall be a scalar expression of type default character or a +Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +`INTT64`. It is an `intent(in)` argument. + +`seed`: shall be an integer ex of kind `INT64`. It ispression +an `intent(in)` argument. + +##### Result + +The result is an integer of kind `INT64`. + +##### Note + +`PENGY_HASH` is an implementation of the 64 bit `pengyhash` of Alberto +Fajardo. The hash has acceptable performance on small keys, and good +performance on long keys. It passes all the SMHasher tests, and has +no known bad seeds. + +##### Exampl + +```fortran + program demo_pengy_hash + use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash + integer(int32) :: seed + key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + seed = 0_int32 + call new_pengy_hash_seed( seed ) + hash = pengy_hash( key, seed ) + print *, seed, hash + end program demo_pengy_hash +``` + + +#### `SPOOKY_HASH` - maps a character string or integer vector to an integer + +##### Status + +Experimental + +##### Description + +Maps a character string or integer vector to a 64 bit integer whose +value also depends on a two element vector, `seed`. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:spooky_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: shall be a scalar of type default character expression or a +Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +`INTT64`. It is an `intent(in)` argument. + +`seed`: shall be a two element integer vector expression of kind +`INT64`. It is an `intent(in)` argument. + +##### Result + +The result is a two element integer vector of kind `INT64`. + +##### Note + +`SPOOKY_HASH` is an implementation of the 64 bit version 2 of +SpookyHash of Bob Jenkins. The code was designed for Little-Endian +processors. The output is different on Big Endian processors, but still +probably as good quality. It is often used as a 64 bit hash using the +first element of the returned value, but can be used as a 128 bit +hash. This version of `SPOOKY_HASH` has good performance on small keys +and excellent performance on long keys. It passes all the SMHasher tests +and has no known bad seeds. + +##### Example + +```fortran + program demo_spooky_hash + use stdlib_64_bit_hash_codes, only: new_spooky_hash_seed, & + spooky_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash(2), seed(2), source + key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + seed = [ 119_int64, 2_int64**41-1 ] + call new_spooky_hash_seed( seed ) + hash = spooky_hash( key, seed ) + print *, seed, hash + end program demo_spooky_hash +``` + +#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 64 bit integer. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be an integer of kind `INT64`. It is an `intent(in)` +argument. + +`seed`: Shall be an integer of kind `INT64`. It is an `intent(in)` +argument. It should be an odd value. + +`nbits` Shall be a default integer with `0 < nbits < 64`. It is an +`intent(in)` argument. It must be an odd integer. + +##### Result + +The result is an integer of kind `INT64` with at most the lowest +`nbits` nonzero. + +##### Note + +`UNIVERSAL_MULT_HASH` is an implementation of the Universal +Multiplicative Hash of M. Dietzfelbinger, et al. +It multiplies the `KEY` by `SEED`, and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + + +```fortran + program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + do i = 0, 2**6-1 + array(i) = i + end do + call odd_random_integer( seed ) + source = int(Z'1FFFFFF', int64) + hash = universal_mult_hash(source, seed, 6) + azray1(hash) = source + print *, seed, hash, array1 + end program demo_universal_mult_hash +``` + + +### Test Codes + +The Fortran Standard Library provides two test codes for the hash +functions of `stdlib_32_bit_hash_functions` and +`stdlib_64_bit_hash_functions`, `test_32_bit_hash_performance` and +`test_64_bit_hash_performance` respectively. These are primarily set +up to test runtime performance of the functions. They take a sample of +`2**18` integers of kind `INT8` and break it up into vectors of size +1, 2, 4, 8, 16, 64, 256, and 1024 elements, yielding `2**18`, +`2**17`, `2**16`, `2**15`, `2**14`, `2**12`, `2**10`, and `2**8` +vectors respectively. These are then processed by the hash functions +4 times, and the time for processing is reported. Testing so far has +been on a MacBook Pro with a 2.3 GHz Quad-Core Intel Core i5 and 8 GB +2133 MHz LPDDR3 of RAM, using GNU Fortran (GCC) 11.1.0 to compile the +code. The results for `test_32_bit_hash_performance` is given by the +following table: + +| Algorithm | Key Size | Key # | Time (s) | +| | Bytes | | | +|------------|-----------|------------|----------| +| FNV-1 | 1 | 1048576 | 0.02949 | +| FNV-1 | 2 | 524288 | 0.02361 | +| FNV-1 | 4 | 262144 | 0.02016 | +| FNV-1 | 8 | 131072 | 0.01806 | +| FNV-1 | 16 | 65536 | 0.01867 | +| FNV-1 | 64 | 16384 | 0.01717 | +| FNV-1 | 256 | 4096 | 0.01759 | +| FNV-1 | 1024 | 1024 | 0.01659 | +| FNV-1a | 1 | 1048576 | 0.02897 | +| FNV-1a | 2 | 524288 | 0.02472 | +| FNV-1a | 4 | 262144 | 0.02025 | +| FNV-1a | 8 | 131072 | 0.01901 | +| FNV-1a | 16 | 65536 | 0.01898 | +| FNV-1a | 64 | 16384 | 0.01784 | +| FNV-1a | 256 | 4096 | 0.01723 | +| FNV-1a | 1024 | 1024 | 0.01673 | +| nmhash32 | 1 | 1048576 | 0.31092 | +| nmhash32 | 2 | 524288 | 0.16230 | +| nmhash32 | 4 | 262144 | 0.07815 | +| nmhash32 | 8 | 131072 | 0.04176 | +| nmhash32 | 16 | 65536 | 0.09261 | +| nmhash32 | 64 | 16384 | 0.04587 | +| nmhash32 | 256 | 4096 | 0.07238 | +| nmhash32 | 1024 | 1024 | 0.07263 | +| nmhash32x | 1 | 1048576 | 0.04294 | +| nmhash32x | 2 | 524288 | 0.02937 | +| nmhash32x | 4 | 262144 | 0.01096 | +| nmhash32x | 8 | 131072 | 0.00911 | +| nmhash32x | 16 | 65536 | 0.01291 | +| nmhash32x | 64 | 16384 | 0.00859 | +| nmhash32x | 256 | 4096 | 0.07373 | +| nmhash32x | 1024 | 1024 | 0.07618 | +| water | 1 | 1048576 | 0.12560 | +| water | 2 | 524288 | 0.06302 | +| water | 4 | 262144 | 0.04020 | +| water | 8 | 131072 | 0.01999 | +| water | 16 | 65536 | 0.01459 | +| water | 64 | 16384 | 0.00923 | +| water | 256 | 4096 | 0.00816 | +| water | 1024 | 1024 | 0.00792 | + +while for `test_64_bit_hash_performance` the results are: + +| Algorithm | Key Size | Key # | Time (s) | +| | Bytes | | | +|------------|-----------|------------|----------| +| FNV-1 | 1 | 1048576 | 0.02981 | +| FNV-1 | 2 | 524288 | 0.02697 | +| FNV-1 | 4 | 262144 | 0.02275 | +| FNV-1 | 8 | 131072 | 0.02431 | +| FNV-1 | 16 | 65536 | 0.02158 | +| FNV-1 | 64 | 16384 | 0.02007 | +| FNV-1 | 256 | 4096 | 0.01932 | +| FNV-1 | 1024 | 1024 | 0.02089 | +| FNV-1a | 1 | 1048576 | 0.03226 | +| FNV-1a | 2 | 524288 | 0.03076 | +| FNV-1a | 4 | 262144 | 0.02359 | +| FNV-1a | 8 | 131072 | 0.02542 | +| FNV-1a | 16 | 65536 | 0.02364 | +| FNV-1a | 64 | 16384 | 0.02130 | +| FNV-1a | 256 | 4096 | 0.01962 | +| FNV-1a | 1024 | 1024 | 0.01966 | +| Pengy | 1 | 1048576 | 0.24294 | +| Pengy | 2 | 524288 | 0.12066 | +| Pengy | 4 | 262144 | 0.06205 | +| Pengy | 8 | 131072 | 0.03138 | +| Pengy | 16 | 65536 | 0.01608 | +| Pengy | 64 | 16384 | 0.00669 | +| Pengy | 256 | 4096 | 0.00387 | +| Pengy | 1024 | 1024 | 0.00295 | +| Spooky | 1 | 1048576 | 0.11920 | +| Spooky | 2 | 524288 | 0.07478 | +| Spooky | 4 | 262144 | 0.03185 | +| Spooky | 8 | 131072 | 0.01468 | +| Spooky | 16 | 65536 | 0.01503 | +| Spooky | 64 | 16384 | 0.00440 | +| Spooky | 256 | 4096 | 0.00290 | +| Spooky | 1024 | 1024 | 0.00177 | + +As the tested function will typically reside in the instruction cache +these results do not include the costs of reloading the procedure if +hashing is intermittent. If hashing is intermittent then that can more +severely impact the performance of `nmhash32`, `nmhash32x`, +`water_hash`, `pengy_hash`, and `spooky_hash` relative to +`fnv_1_hash` and `fnv_1a_hash`. From 165f4ff7fa76a539c1cfc7f2d97e7d300efced77 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 17:35:19 -0700 Subject: [PATCH 009/106] Removed two F90 files Removed two filesusedd in an older version of hash validation. [ticket: X] --- .../test_32_bit_hash_validation.f90 | 86 ------------------- .../test_64_bit_hash_validation.f90 | 64 -------------- 2 files changed, 150 deletions(-) delete mode 100755 src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 delete mode 100755 src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 diff --git a/src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 b/src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 deleted file mode 100755 index 12b113a65..000000000 --- a/src/tests/hash_functions/validation/test_32_bit_hash_validation.f90 +++ /dev/null @@ -1,86 +0,0 @@ -program test_32_bit_hash_validation -!! Compares the output of Fortran versions of 64 bit hash procedures -!! withe the original C/C++ versions - - use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 - use, intrinsic :: iso_c_binding, only : c_loc, c_long - use stdlib_32_bit_hash_functions, only: & - nmhash32, & - new_nmhash32_seed, & - nmhash32x, & - new_nmhash32x_seed, & - water_hash, & - new_water_hash_seed - use nmhash_wrapper, only: c_nmhash32, c_nmhash32x - use waterhash_wrapper, only: c_waterhash - - implicit none - - integer(int32) :: nmhash32_code, c_nmhash32_code, & - nmhash32x_code, c_nmhash32x_code, water_hash_code, c_waterhash_code - integer(int32) :: nmhash32_seed, nmhash32x_seed - integer(int64) :: waterhash_seed - integer(int8) :: test_array(512) - real(real64) :: rand(128) - integer(int32) :: dummy(128) - integer :: i - -! Create test array - call random_number( rand ) - do i=1, 128 - dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) - end do - test_array = transfer( dummy, 0_int8, 512 ) - - waterhash_seed = 0 - call new_water_hash_seed( waterhash_seed ) - - do i=0, 512 - water_hash_code = water_hash( test_array(1:i), waterhash_seed ) - c_waterhash_code = c_waterhash( test_array(1:i), waterhash_seed ) - if ( .not. ( water_hash_code == c_waterhash_code ) ) then - write(*,*) "WATER_HASH failed for INT8 array size = ", i - write(*,*) "WATERHASH_SEED = ", waterhash_seed - write(*,*) 'WATER_HASH_CODE = ', water_hash_code - write(*,*) 'C_WATERHASH_CODE = ', c_waterhash_code - write(*,*) "Array = ", test_array(1:i) - stop "Hash failure" - end if - end do - write(*,*) "WATER_HASH passed validation test." - - nmhash32_seed = 0 -! call new_nmhash32_seed( nmhash32_seed ) - - do i=0, 512 - nmhash32_code = nmhash32( test_array(1:i), nmhash32_seed ) - c_nmhash32_code = c_nmhash32( test_array(1:i), nmhash32_seed ) - if ( .not. ( nmhash32_code == c_nmhash32_code ) ) then - write(*,*) "NMHASH32 failed for INT8 array size = ", i - write(*,*) "NMHASH32_SEED = ", nmhash32_seed - write(*,*) 'NMHASH32_CODE = ', nmhash32_code - write(*,*) 'C_NMHASH32_CODE = ', c_nmhash32_code - write(*,*) "Array = ", test_array(1:i) - stop "Hash failure" - end if - end do - write(*,*) "NMHASH32 passed validation test." - - nmhash32x_seed = 0 -! call new_nmhash32x_seed( nmhash32x_seed ) - - do i=0, 512 - nmhash32x_code = nmhash32x( test_array(1:i), nmhash32_seed ) - c_nmhash32x_code = c_nmhash32x( test_array(1:i), nmhash32_seed ) - if ( .not. ( nmhash32x_code == c_nmhash32x_code ) ) then - write(*,*) "NMHASH32X failed for INT8 array size = ", i - write(*,*) "NMHASH32X_SEED = ", nmhash32x_seed - write(*,*) 'NMHASH32X_CODE = ', nmhash32x_code - write(*,*) 'C_NMHASH32X_CODE = ', c_nmhash32x_code - write(*,*) "Array = ", test_array(1:i) - stop "Hash failure" - end if - end do - write(*,*) "NMHASH32X passed validation test." - -end program test_32_bit_hash_validation diff --git a/src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 b/src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 deleted file mode 100755 index e9ae286bf..000000000 --- a/src/tests/hash_functions/validation/test_64_bit_hash_validation.f90 +++ /dev/null @@ -1,64 +0,0 @@ -program test_64_bit_hash_validation -!! Compares the output of Fortran versions of 64 bit hash procedures -!! withe the original C/C++ versions - - use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 - use, intrinsic :: iso_c_binding, only : c_loc, c_long - use stdlib_64_bit_hash_functions, only: & - pengy_hash, & - new_pengy_hash_seed, & - spooky_hash, & - new_spooky_hash_seed - use pengy_wrapper, only: c_pengyhash - use spookyv2_wrapper, only: c_spooky128 - - implicit none - - integer(int64) :: pengy_hash_code, c_pengy_hash_code, & - spooky_seed(2), spooky_hash_code(2), c_spooky_hash_code(2) - integer(int32) :: pengy_seed - integer(int8) :: test_array(512) - real(real64) :: rand(128) - integer(int32) :: dummy(128) - integer :: i - -! Create test array - call random_number( rand ) - do i=1, 128 - dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) - end do - test_array = transfer( dummy, 0_int8, 512 ) - - pengy_seed = 0_int64 - call new_pengy_hash_seed( pengy_seed ) - - do i=0, 512 - pengy_hash_code = pengy_hash( test_array(1:i), pengy_seed ) - c_pengy_hash_code = c_pengyhash( test_array(1:i), pengy_seed ) - if ( .not. ( pengy_hash_code == c_pengy_hash_code ) ) then - write(*,*) "PENGY_HASH failed for INT8 array size = ", i - write(*,*) 'PENGY_HASH_CODE = ', pengy_hash_code - write(*,*) 'C_PENGY_HASH_CODE = ', c_pengy_hash_code - write(*,*) "Array = ", test_array(1:i) - stop "Hash failure" - end if - end do - write(*,*) "PENGY_HASH passed validation test." - - spooky_seed = [ 0_int64, 0_int64 ] - call new_spooky_hash_seed( spooky_seed ) - - do i=0, 512 - spooky_hash_code = spooky_hash( test_array(1:i), spooky_seed ) - c_spooky_hash_code = c_spooky128( test_array(1:i), spooky_seed ) - if ( .not. all( spooky_hash_code == c_spooky_hash_code ) ) then - write(*,*) "SPOOKY_HASH failed for INT8 array size = ", i - write(*,*) 'SPOOKY_HASH_CODE = ', spooky_hash_code - write(*,*) 'C_SPOOKY_HASH_CODE = ', c_spooky_hash_code - write(*,*) "Array = ", test_array(1:i) - stop "Hash failure" - end if - end do - write(*,*) "SPOOKY_HASH passed validation test." - -end program test_64_bit_hash_validation From 5c22bb24e52014d8b2e1a3caba583a1d555ffa00 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 17:36:37 -0700 Subject: [PATCH 010/106] Added three source code files Added generate_key_array.f90 generate_hash_arrays.f90 and hash_validity_test..f90 to implement a new version of the validity test. [ticket: X] --- .../validation/generate_hash_arrays.cpp | 174 ++++++++++++++++++ .../validation/generate_key_array.f90 | 22 +++ .../validation/hash_validity_test.f90 | 123 +++++++++++++ 3 files changed, 319 insertions(+) create mode 100755 src/tests/hash_functions/validation/generate_hash_arrays.cpp create mode 100755 src/tests/hash_functions/validation/generate_key_array.f90 create mode 100755 src/tests/hash_functions/validation/hash_validity_test.f90 diff --git a/src/tests/hash_functions/validation/generate_hash_arrays.cpp b/src/tests/hash_functions/validation/generate_hash_arrays.cpp new file mode 100755 index 000000000..bcd95dfb3 --- /dev/null +++ b/src/tests/hash_functions/validation/generate_hash_arrays.cpp @@ -0,0 +1,174 @@ +#include +#include + +extern "C" { + #include "nmhash.h" + #include "nmhash_scalar.h" + #include "pengyhash.h" + #include "waterhash.h" +} + +#include "SpookyV2.h" + +void SpookyHash32_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t s0 = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, &s0, &s1); + ((uint32_t *)out)[0]= (uint32_t)s0; +} + +void SpookyHash64_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, out64, &s1); +} + +void SpookyHash128_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + out64[1] = state64[1]; + SpookyHash::Hash128(key, len, out64, out64+1); +} + +void SpookyHash_seed_state_test(int in_bits, const void *seed, void *state) { + uint64_t *state64= (uint64_t *)state; + if (in_bits == 32) { + state64[0]= state64[1]= ((uint32_t*)seed)[0]; + } + else { + uint64_t *seed64= (uint64_t *)seed; + if (in_bits == 64) { + state64[0]= state64[1]= seed64[0]; + } + else + if (in_bits == 128) { + state64[0]= seed64[0]; + state64[1]= seed64[1]; + } + } +} + +using namespace std; + +static const int SIZE = 2048; +char * key_array = new char[SIZE]; +static const uint32_t NM_SEED = 0xdeadbeef; +static const uint64_t WATER_SEED = 0xdeadbeef1eadbeef; +static const uint32_t PENGY_SEED = 0xdeadbeef; +static const uint64_t SPOOKY_SEED[2] = { WATER_SEED, WATER_SEED }; + +int read_keys(){ + string inFileName = "key_array.bin"; + std::ifstream fin( inFileName, ios::in | ios::binary ); + if (!fin){ + cout << "Cannot open key_array.bin!" << endl; + return 1; + } + fin.read(key_array, SIZE); + fin.close(); + return 0; +} + +int write_nmhash32(){ + size_t i; + uint32_t hash; + string outFileName = "c_nmhash32_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_nmhash32_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = NMHASH32((void *) key_array, i, NM_SEED); + fout.write((char *) &hash, 4); + } + fout.close(); + return 0; +} + +int write_nmhash32x(){ + size_t i; + uint32_t hash; + string outFileName = "c_nmhash32x_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_nmhash32x_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = NMHASH32X((void *) key_array, i, NM_SEED); + fout.write((char *) &hash, 4); + } + fout.close(); + return 0; +} + +int write_water(){ + uint32_t i; + uint32_t hash; + string outFileName = "c_water_hash_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_water_hash_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = waterhash((void *) key_array, i, WATER_SEED); + fout.write((char *) &hash, 4); + } + fout.close(); + return 0; +} + +int write_pengy(){ + size_t i; + uint64_t hash; + string outFileName = "c_pengy_hash_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_pengy_hash_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = pengyhash((void *) key_array, i, PENGY_SEED); + fout.write((char *) &hash, 8); + } + fout.close(); + return 0; +} + +int write_spooky(){ + size_t i; + uint64_t hash[2]; + string outFileName = "c_spooky_hash_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_spooky_hash_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + SpookyHash128_with_state_test((void *) key_array, i, (void *) SPOOKY_SEED, (void *) hash); + fout.write((char *) hash, 16); + } + fout.close(); + return 0; +} + +int main(){ + if (read_keys()==1){return 1;}; + if (write_nmhash32()==1){return 1;}; + if (write_nmhash32x()==1){return 1;}; + if (write_water()==1){return 1;}; + if (write_pengy()==1){return 1;}; + if (write_spooky()==1){return 1;}; + return 0; +} diff --git a/src/tests/hash_functions/validation/generate_key_array.f90 b/src/tests/hash_functions/validation/generate_key_array.f90 new file mode 100755 index 000000000..40b43a043 --- /dev/null +++ b/src/tests/hash_functions/validation/generate_key_array.f90 @@ -0,0 +1,22 @@ +program generate_key_array + + use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 + + integer :: lun + integer(int8) :: key_array(2048) + integer(int32) :: dummy(512) + real(real64) :: rand(512) + +! Create key array + call random_number( rand ) + do i=1, 512 + dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) + end do + key_array = transfer( dummy, 0_int8, 2048 ) + + open(newunit=lun, file="key_array.bin", form="unformatted", & + access="stream", status="new", action="write") + write(lun) key_array + close(lun) + +end program generate_key_array diff --git a/src/tests/hash_functions/validation/hash_validity_test.f90 b/src/tests/hash_functions/validation/hash_validity_test.f90 new file mode 100755 index 000000000..86d2cc3df --- /dev/null +++ b/src/tests/hash_functions/validation/hash_validity_test.f90 @@ -0,0 +1,123 @@ +!! HASH_VALIDITY_TEST processes a vector of eight bit integers, +!! extracting subvectors of length 0, 1, 2, ... 2048 from the beginning +!! hashing each subvector and comparing the resulting hash with the +!! corresponding hash produced by the original C/C++ code, stopping if +!! they are different. As the original C/C++ code was typically developed +!! for Little-Endian machines the testing should only be cone on such +!! machones. The Fortran codes also assume two's complement integers. +!! The code set assume that C's int32_t and int64_t have the same +!! representation as Firtrans int32 and int64 respectively. + +program hash_validity_test + + use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 + use stdlib_32_bit_hash_functions, only: & + little_endian, & + nmhash32, & + nmhash32x, & + water_hash + use stdlib_64_bit_hash_functions, only: & + pengy_hash, & + spooky_hash + + integer(int32), parameter :: nm_seed = int( z'deadbeef', int32 ) + integer(int64), parameter :: water_seed = int( z'deadbeef1eadbeef', int64 ) + integer(int32), parameter :: pengy_seed = int( z'deadbeef', int32 ) + integer(int64), parameter :: spooky_seed(2) = [ water_seed, water_seed ] + integer :: index + integer :: lun + integer(int8) :: key_array(2048) + integer(int32) :: c_nmhash32(0:2048) + integer(int32) :: c_nmhash32x(0:2048) + integer(int32) :: c_water_hash(0:2048) + integer(int64) :: c_pengy_hash(0:2048) + integer(int64) :: c_spooky_hash(0:1, 0:2048) + + + ! Test for endianness + if ( .not. little_endian ) then + stop "The processor is not Little-Endian" + end if + + ! Read key array used to generate hash array + open(newunit=lun, file="key_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) key_array + close(lun) + + ! Read hash array generated from key array by the C version of nmhash32 + open(newunit=lun, file="c_nmhash32_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_nmhash32 + close(lun) + + do index=0, 2048 + if ( c_nmhash32(index) /= nmhash32(key_array(1:index), nm_seed) ) then + write(*,'("NMHASH32 failed for KEY_ARRAY(1:", I0, ")")') index + stop "NMHASH32 is invalid." + end if + end do + write(*,*) "NMHASH32 is valid." + + ! Read hash array generated from key array by the C version of nmhash32x + open(newunit=lun, file="c_nmhash32x_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_nmhash32x + close(lun) + + do index=0, 2048 + if ( c_nmhash32x(index) /= nmhash32x(key_array(1:index), nm_seed) ) then + write(*,'("NMHASH32X failed for KEY_ARRAY(1:", I0, ")")') index + stop "NMHASH32X is invalid." + end if + end do + write(*,*) "NMHASH32X is valid." + + ! Read hash array generated from key array by the C version of water hash + open(newunit=lun, file="c_water_hash_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_water_hash + close(lun) + + do index=0, 2048 + if ( c_water_hash(index) /= & + water_hash(key_array(1:index), water_seed) ) then + write(*,'("WATER_HASH failed for KEY_ARRAY(1:", I0, ")")') index + stop "WATER_HASH is invalid." + end if + end do + write(*,*) "WATER_HASH is valid." + + ! Read hash array generated from key array by the C version of pengy hash + open(newunit=lun, file="c_pengy_hash_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_pengy_hash + close(lun) + + do index=0, 2048 + if ( c_pengy_hash(index) /= & + pengy_hash(key_array(1:index), pengy_seed) ) then + write(*,'("PENGY_HASH failed for KEY_ARRAY(1:", I0, ")")') index + stop "PENGY_HASH is invalid." + end if + end do + write(*,*) "PENGY_HASH is valid." + + ! Read hash array generated from key array by the C version of Spooky hash + open(newunit=lun, file="c_spooky_hash_array.bin", form="unformatted", & + access="stream", status="old", action="read") + do index=0, 2048 + read(lun) c_spooky_hash(:, index) + end do + close(lun) + + do index=0, 2048 + if ( .not. all( c_spooky_hash(:,index) == & + spooky_hash(key_array(1:index), spooky_seed) ) ) then + write(*,'("SPOOKY_HASH failed for KEY_ARRAY(:,1:", I0, ")")') index + stop "SPOOKY_HASH is invalid." + end if + end do + write(*,*) "SPOOKY_HASH is valid." + +end program hash_validity_test From 1cf7bcd132fb95a0e062abd0ea4edeac57011d2c Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 17:41:15 -0700 Subject: [PATCH 011/106] Modified Makefile.validation Modified the Makefile so it would compile the new validation. [ticket: X] --- .../validation/Makefile.validation | 76 +++++-------------- 1 file changed, 21 insertions(+), 55 deletions(-) diff --git a/src/tests/hash_functions/validation/Makefile.validation b/src/tests/hash_functions/validation/Makefile.validation index 3578ef69f..3f8e8ebcf 100755 --- a/src/tests/hash_functions/validation/Makefile.validation +++ b/src/tests/hash_functions/validation/Makefile.validation @@ -1,54 +1,31 @@ -# Makefile for validation tests of hash codes, currently using the GNU -# GCC compiler collection. - -FC = /opt/local/bin/gfortran-mp-11 -CC = /opt/local/bin/gcc-mp-11 -CXX = /opt/local/bin/g++-mp-11 +MOD_PATH = -I../../../ FFLAGS = -O3 CFLAGS = -O3 CXXFLAGS = -O3 -LIBDIRS = -L../../../ -L./ -L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib/ -LIBS = -lfortran_stdlib -lc_hash -MOD_DIRS = -I../../../mod_files -J./mod_files -INCLUDE_DIRS = -I./ -I/Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/include -I/Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/include/tidy +LIBDIRS = -L./ +LIBS = -lc_hash +INCLUDE_DIRS = -I./ -all: test_32_bit_hash_validation test_64_bit_hash_validation +all: generate_hash_arrays generate_key_array hash_validity_test -test_32_bit_hash_validation: test_32_bit_hash_validation.o \ -nmhash_wrapper.o waterhash_wrapper.o ./libc_hash.a - $(FC) $(FFLAGS) $(MOD_DIRS) $(LIBDIRS) \ -test_32_bit_hash_validation.o \ -nmhash_wrapper.o \ -waterhash_wrapper.o \ -$(LIBS) \ --o test_32_bit_hash_validation +generate_key_array: generate_key_array.f90 + $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array -test_32_bit_hash_validation.o: test_32_bit_hash_validation.f90 \ -../../../mod_files/stdlib_64_bit_hash_functions.mod \ -nmhash_wrapper.o waterhash_wrapper.o - $(FC) $(FFLAGS) $(MOD_DIRS) -c test_32_bit_hash_validation.f90 \ - -o test_32_bit_hash_validation.o +hash_validity_test: hash_validity_test.f90 + $(FC) $(FFLAGS) -L../../../ -lstdlib $(MOD_PATH) \ + hash_validity_test.f90 -o hash_validity_test -test_64_bit_hash_validation: test_64_bit_hash_validation.o \ -spookyv2_wrapper.o pengy_wrapper.o ./libc_hash.a - $(FC) $(FFLAGS) $(MOD_DIRS) $(LIBDIRS) \ -test_64_bit_hash_validation.o \ -spookyv2_wrapper.o \ -pengy_wrapper.o \ -$(LIBS) \ --o test_64_bit_hash_validation +generate_hash_arrays: generate_hash_arrays.o ./libc_hash.a + $(CXX) $(CXXFLAGS) $(LIBDIRS) generate_hash_arrays.o \ +$(LIBS) -o generate_hash_arrays -test_64_bit_hash_validation.o: test_64_bit_hash_validation.f90 \ -../../../mod_files/stdlib_64_bit_hash_functions.mod \ -spookyv2_wrapper.o \ -pengy_wrapper.o - $(FC) $(FFLAGS) $(MOD_DIRS) -c test_64_bit_hash_validation.f90 \ - -o test_64_bit_hash_validation.o +generate_hash_arrays.o: generate_hash_arrays.cpp libc_hash.a + $(CXX) $(CXXFLAGS) -c generate_hash_arrays.cpp -o generate_hash_arrays.o libc_hash.a: SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o waterhash.o - ar rcs libc_hash.a SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o \ - waterhash.o + ar rcs libc_hash.a SpookyV2.o SpookyV2Test.o pengyhash.o \ + nmhash_scalar.o waterhash.o pengyhash.o: pengyhash.c pengyhash.h $(CC) $(CFLAGS) $(INCLUDE_DIRS) -c pengyhash.c -o pengyhash.o @@ -62,23 +39,12 @@ SpookyV2.o: SpookyV2.cpp SpookyV2.h SpookyV2Test.o: SpookyV2Test.cpp SpookyV2.h $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -c SpookyV2Test.cpp -o SpookyV2Test.o -spookyv2_wrapper.o: spookyv2_wrapper.f90 - $(FC) $(FFLAGS) $(MOD_DIRS) -c spookyv2_wrapper.f90 \ - -o spookyv2_wrapper.o - -pengy_wrapper.o: pengy_wrapper.f90 - $(FC) $(FFLAGS) $(MOD_DIRS) -c pengy_wrapper.f90 \ - -o pengy_wrapper.o - nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h $(CC) $(CXXFLAGS) $(INCLUDE_DIRS) -c nmhash_scalar.c -o nmhash_scalar.o -nmhash_wrapper.o: nmhash_wrapper.f90 - $(FC) $(FFLAGS) $(MOD_DIRS) -c nmhash_wrapper.f90 \ - -o nmhash_wrapper.o - -waterhash_wrapper.o: waterhash_wrapper.f90 - $(FC) $(FFLAGS) $(MOD_DIRS) -c waterhash_wrapper.f90 \ - -o waterhash_wrapper.o +clean: + rm nmhash_scalar.o SpookyV2Test.o SpookyV2.o waterhash.o pengyhash.o \ + libc_hash.a generate_hash_arrays.o generate_hash_arrays \ + hash_validity_test generate_key_array From cf76cc15ec59b2b576e3edc2829c609cd4c17501 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 21 Nov 2021 17:56:08 -0700 Subject: [PATCH 012/106] Added documentations Added README>md that is documentation for the code in the hash function validation directory. [ticket: X] --- src/tests/hash_functions/validation/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 src/tests/hash_functions/validation/README.md diff --git a/src/tests/hash_functions/validation/README.md b/src/tests/hash_functions/validation/README.md new file mode 100644 index 000000000..41840253c --- /dev/null +++ b/src/tests/hash_functions/validation/README.md @@ -0,0 +1,9 @@ +The validation directory contains code to validate the Fortran hash functions against the original C/C++ codes. It consists of three executable applications: + +* `generate_key_array.f90` - creates a file containing 2048 random 8 bit integers. + +* `generate_hash_arrays.cpp` - reads the file generated by `generate_key_array.f90` and uses its contents to generate 2049 hashes for each hash algorithm and outputa files containing the hashes. + +* `hash_validity_test.f90`- reads the file generated by `generate_key_array.f90` and uses its contents to generate 2049 hashes for each hash algorithm and compares the result with the corresponding outputs of `generate_hash_arrays.cpp` reporting if the outputs are not equal. + +Note the C code for nmhash assumes that the C compiler is either gcc or MSVC, and will not compile with the Intel C compiler. From f35875b1977088786ec8dbf0c694a71cf40c8c22 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 22 Nov 2021 06:21:04 -0700 Subject: [PATCH 013/106] Fixes to various files Implemented fixes to various files addressing problems brought up by Jeremie Vandenplas. [ticket: X] --- CMakeLists.txt | 2 +- Makefile.manual | 2 +- doc/specs/index.md | 2 ++ doc/specs/stdlib_hash_functions.md | 15 +++++++-------- src/stdlib_32_bit_hash_functions.fypp | 8 ++++---- .../validation/hash_validity_test.f90 | 2 +- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bcba35bbc..ea587ec31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) endif() add_compile_options(-fimplicit-none) add_compile_options(-ffree-line-length-132) - add_compile_options(-fno-range-check) + add_compile_options(-fwrapv) add_compile_options(-Wall) add_compile_options(-Wextra) add_compile_options(-Wimplicit-procedure) diff --git a/Makefile.manual b/Makefile.manual index 54dc3b89f..33a3e2cf9 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,7 +1,7 @@ # Fortran stdlib Makefile FC ?= gfortran -FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check +FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fwrapv FYPPFLAGS ?= export FC diff --git a/doc/specs/index.md b/doc/specs/index.md index a3b0a5def..aa16e1350 100644 --- a/doc/specs/index.md +++ b/doc/specs/index.md @@ -14,6 +14,8 @@ This is and index/directory of the specifications (specs) for each new module/fe - [ascii](./stdlib_ascii.html) - Procedures for handling ASCII characters - [bitsets](./stdlib_bitsets.html) - Bitset data types and procedures - [error](./stdlib_error.html) - Catching and handling errors + - [hash\_functions](./stdlib_hash_functions.html) - Hashing integer + vectors or character strings - [IO](./stdlib_io.html) - Input/output helper & convenience - [kinds](./stdlib_kinds.html) - Kind parameters - [linalg](./stdlib_linalg.html) - Linear Algebra diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md index e0e1e08b5..056259722 100755 --- a/doc/specs/stdlib_hash_functions.md +++ b/doc/specs/stdlib_hash_functions.md @@ -27,8 +27,7 @@ be compared, further improving performance. A hash function can also be used to generate a checksum to verify that data has not changed. The Fortran Standard Library therefore provides procedures to compute -hash codes and scalar hashes, and derived types implementing hash -tables. +hash codes and scalar hashes. This document only discusses the hash codes and scalar hashes in the library. @@ -49,7 +48,7 @@ described in D. E. Knuth, "The Art of Computer Programming, Second Edition, Volume 3, Sorting and Searching", Addison-Wesley, Upper Saddle River, NJ, pp. 517-518, 1998. The algorithms in that source are considered public -domain. +domain, and its use is unrestricted. `UNIVERSAL_MULT_HASH` is a scalar hash. It is an implementation in Fortran 2008 and signed two's complement integers of the @@ -250,7 +249,7 @@ that are sufficiently complicated that a direct comparison is costly and common enough that a general procedure is useful: character strings and rank one arrays of integers. Other objects can, in principle, be hashed by using `transfer` to -map their contents to an integer array, typically one of kind `INT8`. +map their contents to an integer array, typically one of kind `INT8`. The other problem is that hash codes are typically defined using modular unsigned integer arithmetic. As such integers are not part of the current Fortran standard, @@ -261,7 +260,7 @@ larger size, or, for the larger integers, by dividing the integer into two lower and higher order halves, and performing the operations on each half separately using the larger integers. -In the other, the unsigned integers may be replaced directly by +In the second, the unsigned integers may be replaced directly by the corresponding signed integers, but otherwise not modifying the the code logic. The first should be standard conforming on current processors, but @@ -381,19 +380,19 @@ All assume a two's complement sign bit, and no out of range checks. The `stdlib_32_bit_fnv_hashes` and `stdlib_64_bits_fnv_hashes` -submodules each provide implementations of ths FNV-1 and FNV-1A +submodules each provide implementations of the FNV-1 and FNV-1A algorithms in the form of two separate overloaded functions: `FNV_1` and `FNV_1A`. The FNV-1 and FNV-2 algorithms differ in their order of the multiplication and exclusive or operations. They differ from their normal implementation in that they also -encode the structure size in the hash code. +encode the structure size in the hash code. The 32 and 64 bit algorithms differ in their initial offsets and in their multiplicative constants. Analysis suggests that `FNV_1A` should be better at randomizing the input, but tests with hash tables show negligible difference. These algorithms have the reputation of being particularly useful for -small byte strings, i.e, strings of less than 32 bytes. +small byte strings, i.e., strings of less than 32 bytes. While they do not at all perform well on the SMHasher test suite, usage indicates that that that this has little impact on the performance of small hash tables, and the small size of the functions diff --git a/src/stdlib_32_bit_hash_functions.fypp b/src/stdlib_32_bit_hash_functions.fypp index fcdfc1466..7ec2760d3 100755 --- a/src/stdlib_32_bit_hash_functions.fypp +++ b/src/stdlib_32_bit_hash_functions.fypp @@ -38,13 +38,13 @@ module stdlib_32_bit_hash_functions bits_int64 = bit_size(0_int64) integer, parameter :: & -! Should be 8 +! Should be 1 bytes_int8 = bits_int8/bits_int8, & -! Should be 16 +! Should be 2 bytes_int16 = bits_int16/bits_int8, & -! Should be 32 +! Should be 4 bytes_int32 = bits_int32/bits_int8, & -! Should be 64 +! Should be 8 bytes_int64 = bits_int64/bits_int8 integer, parameter :: & diff --git a/src/tests/hash_functions/validation/hash_validity_test.f90 b/src/tests/hash_functions/validation/hash_validity_test.f90 index 86d2cc3df..313a77764 100755 --- a/src/tests/hash_functions/validation/hash_validity_test.f90 +++ b/src/tests/hash_functions/validation/hash_validity_test.f90 @@ -4,7 +4,7 @@ !! corresponding hash produced by the original C/C++ code, stopping if !! they are different. As the original C/C++ code was typically developed !! for Little-Endian machines the testing should only be cone on such -!! machones. The Fortran codes also assume two's complement integers. +!! machines. The Fortran codes also assume two's complement integers. !! The code set assume that C's int32_t and int64_t have the same !! representation as Firtrans int32 and int64 respectively. From 1661cfffd317e7590bd940f69f2cfced4ce6ce45 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 22 Nov 2021 06:53:28 -0700 Subject: [PATCH 014/106] Change s compile flag Change a compile flag for gfortran from -fwrapv to -fno-range-check. [ticket: X] --- CMakeLists.txt | 2 +- Makefile.manual | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ea587ec31..bcba35bbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) endif() add_compile_options(-fimplicit-none) add_compile_options(-ffree-line-length-132) - add_compile_options(-fwrapv) + add_compile_options(-fno-range-check) add_compile_options(-Wall) add_compile_options(-Wextra) add_compile_options(-Wimplicit-procedure) diff --git a/Makefile.manual b/Makefile.manual index 33a3e2cf9..54dc3b89f 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,7 +1,7 @@ # Fortran stdlib Makefile FC ?= gfortran -FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fwrapv +FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check FYPPFLAGS ?= export FC From 542741d1f6189a05c58f05f0d585b26b9813f347 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 22 Nov 2021 21:52:39 -0700 Subject: [PATCH 015/106] Changed brackets on TOC Changed from parentheses to square brackets. [ticket: X] --- doc/specs/stdlib_hash_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md index 056259722..66ba7ea3b 100755 --- a/doc/specs/stdlib_hash_functions.md +++ b/doc/specs/stdlib_hash_functions.md @@ -4,7 +4,7 @@ title: Hash codes # The `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions` modules -(TOC) +[TOC] ## Overview of hash functions From 1769bb72a179201c10279fe47dbc058d90d14f81 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 26 Nov 2021 13:38:31 -0700 Subject: [PATCH 016/106] Changes in response to Gareth's comments Changed three files "CMakeLists.txt", "stdlib_hash_functions.md", and "stdlib_64_bith_hash_functions.fypp"in response to @gareth-nx's comments [ticket: X] --- CMakeLists.txt | 4 +-- doc/specs/stdlib_hash_functions.md | 47 ++++++++++++++++++--------- src/stdlib_64_bit_hash_functions.fypp | 8 ++--- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bcba35bbc..20edc300b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,11 +21,11 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) endif() add_compile_options(-fimplicit-none) add_compile_options(-ffree-line-length-132) - add_compile_options(-fno-range-check) + add_compile_options(-fno-range-check) # Needed for gfortran 9 and + # earlier for hash functions add_compile_options(-Wall) add_compile_options(-Wextra) add_compile_options(-Wimplicit-procedure) - add_compile_options(-Wconversion-extra) # -pedantic-errors triggers a false positive for optional arguments of elemental functions, # see test_optval and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95446 if(CMAKE_Fortran_COMPILER_VERSION VERSION_LESS 11.0) diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md index 66ba7ea3b..7c43898c4 100755 --- a/doc/specs/stdlib_hash_functions.md +++ b/doc/specs/stdlib_hash_functions.md @@ -177,18 +177,29 @@ The BSD 2-Clause license is as follows: with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS - BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - SUCH DAMAGE. + CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +## Glossary + +There are a few words used in this document that may not be familiar to +readers of this document: + +* Key - a value to be used to find entries in a hash table typically + using its hashed value for the initial search; and + +* Seed - an additional argument to a hash function that changes its + output making some attacks impractical. ## The hash codes modules @@ -199,12 +210,16 @@ The Standard Library provides two modules implementing hash functions and scalar hashes. The `stdlib_32_bit_hash_functions` module provides procedures to compute 32 bit integer hash codes and a scalar hash. -The hash codes are useful for tables of up to `2**15` entries, and -for keys with a few hundred elements. +The hash codes can be used for tables of up to `2**30` entries, and +for keys with a few hundred elements, but performance has only been +tested for tables up to `2**16` entries and performance may degrade +for larger numbers of entries. The `stdlib_64_bit_hash_functions` module provides hash procedures to compute 64 bit integer hash codes and a scalar hash. -The hash codes are useful for tables of up to `2**30` entries, and -for keys with a few thousand elements. +The hash codes can, in principle, be used for tables of up to `2**62` +entries, and for keys with a few thousand elements, but testing of +performance has only been been for tables up to `2**16`elements and +performance may degrade for larger numbers of entries. While one of the codes in `stdlib_64_bit_hash_functions`, `SPSOOKY_HASH`, can also be used to calculate 128 bit hash codes, none of the current codes can be used to calculate 256 bit hash codes. diff --git a/src/stdlib_64_bit_hash_functions.fypp b/src/stdlib_64_bit_hash_functions.fypp index 0f31a0d26..5b075aada 100755 --- a/src/stdlib_64_bit_hash_functions.fypp +++ b/src/stdlib_64_bit_hash_functions.fypp @@ -33,13 +33,13 @@ module stdlib_64_bit_hash_functions bits_int64 = bit_size(0_int64) integer, parameter, public :: & -! Should be 8 +! Should be 1 bytes_int8 = bits_int8/bits_int8, & -! Should be 16 +! Should be 2 bytes_int16 = bits_int16/bits_int8, & -! Should be 32 +! Should be 4 bytes_int32 = bits_int32/bits_int8, & -! Should be 64 +! Should be 8 bytes_int64 = bits_int64/bits_int8 integer, parameter, public :: & From e62abdf5a71045bb2049ed5b8fd914af986c607e Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 26 Nov 2021 18:28:38 -0700 Subject: [PATCH 017/106] More changes from Gareth Incorporated more changes in stdlib_hash_functions.md based on comments by @garreth-nx. [ticket: X] --- doc/specs/stdlib_hash_functions.md | 107 +++++++++++++++++++---------- 1 file changed, 70 insertions(+), 37 deletions(-) diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md index 7c43898c4..01fa4e1ca 100755 --- a/doc/specs/stdlib_hash_functions.md +++ b/doc/specs/stdlib_hash_functions.md @@ -196,7 +196,9 @@ There are a few words used in this document that may not be familiar to readers of this document: * Key - a value to be used to find entries in a hash table typically - using its hashed value for the initial search; and + using its hashed value for the initial search; + +* Salt - see seed, and; * Seed - an additional argument to a hash function that changes its output making some attacks impractical. @@ -247,7 +249,7 @@ computational complexity, their relative performance on different size keys, and the expected uniqueness (randomness) of the resulting hash codes. Their relative performance in the analysis of text, in particular, -can depend on the processor, character set, language, and content. +can depend on the compiler, character set, language, and content. The quality of a hash function is often evaluated using the SMHasher test suite, originally written by [Austin Appleby](https://github.com/aappleby/smhasher), but greatly @@ -259,10 +261,8 @@ version of SMHasher. There are two problems in implementing hash functions in Fortran. First, the static typing of Fortran makes it awkward to define general purpose hash functions. -Instead hash functions are defined for some of the more common objects -that are sufficiently complicated that a direct comparison is costly -and common enough that a general procedure is useful: -character strings and rank one arrays of integers. +Instead hash functions are defined for some of the more common +objects: character strings and rank one arrays of integers. Other objects can, in principle, be hashed by using `transfer` to map their contents to an integer array, typically one of kind `INT8`. The other problem is that hash codes are typically defined using @@ -278,33 +278,33 @@ the larger integers. In the second, the unsigned integers may be replaced directly by the corresponding signed integers, but otherwise not modifying the the code logic. -The first should be standard conforming on current processors, but -is more computationally intensive unless the processors recognize +The first should be standard conforming on current compilers, but +is more computationally intensive unless the compilers recognize underlying idioms that are rarely used in Fortran codes. The second is not standard conforming as bit operations involving the sign are undefined, but should yield equivalent results with fewer operations on -processors with two's complement integers that do not trap on over +compilers with two's complement integers that do not trap on over or under flow. The codes currently use the second method. -In order to compile the hash function modules, the processors must +In order to compile the hash function modules, the compilers must implement much of Fortran 2003, and selected components of Fortran 2008: submodules, 64 bit integers, and some bit intrinsics. -The main limitation on valid processors is whether they +The main limitation on valid compilers is whether they implement the submodules enhancement of Fortran 2008. In order to properly run the hash functions, the compilers must use two's complement integers, and be able to execute them with wraparound semantics and no integer overflow exceptions. -Current Fortran 2003+ processors solely use two's complement +Current Fortran 2003+ compilers solely use two's complement integers, and appear to be able to turn off overflow detection, so the modules use signed integer arithmetic. For that reason trapping on signed arithmetic must be disabled. The command line -flags to disable overflow detection for processors implementing +flags to disable overflow detection for compilers implementing submodules are summarized in the table below. Note that FLANG, gfortran, ifort, and NAG all default to integer overflow wrapping. -|Processor|Legal flag|Illegal flag|Default| +|Compiler|Legal flag|Illegal flag|Default| |---------|----------|------------|-------| | ARM Fortran | NA? | NA? | overflow wrapping? | | Cray Fortran | NA? | NA? | overflow wrapping? | @@ -424,7 +424,7 @@ version 0.2, in the form of the overloaded functions, `NMHASH32` and `NMHASH32X`. The implementations are based on the scalar versions of Gao's algorithms and not the vector versions that require access to -the vector instructions of some processors. +the vector instructions of some compilers. Both algorithms perform well on the SMHasher tests, and have no known bad seeds. The vector versions of both codes perform well on large keys, with the `nmhash32x` faster on short keys. To provide randomly @@ -458,9 +458,9 @@ incremental hashing procedures. SpookyHash is optimized for large objects and should give excellent performance for objects greater than about 96 byes, but has significant overhead for smaller objects. -The code was designed for Little Endian processors, and will give -different results on Big Endian processors, but the hash quality on -those processors is probably just as good. +The code was designed for Little Endian compilers, and will give +different results on Big Endian compilers, but the hash quality on +those compilers is probably just as good. SpookyHash version 2 passes all of Reini Urban's SMHasher tests, and has one bad seed only when reduced to a 32 bit output. Its only potential problem is undefined behavior if the key is @@ -471,8 +471,9 @@ misaligned. ### Overview of the module Thirty two bit hash functions are primarily useful for generating hash -codes for hash tables. -Checksums generally benefit from having a larger number of bits. +codes and hash indices for hash tables. +They tend to be less useful for generating checksums, which generally +benefit from having a larger number of bits. The `stdlib_32_bit_hash_codes` module defines five public overloaded 32 bit hash code functions, `FNV_1`, `FNV-1A`, `NMHASH32`, `NMHASH32x` and `WATER_HASH`, two scalar hash functions, `FIBONACCI_HASH` and @@ -493,9 +494,9 @@ As `stdlib_32_bit_hash_codes` deals exclusively with 32 bit hash codes, ### The `LITTLE_ENDIAN` parameter In implementing hash functions it is sometimes necessary to know the -"endianess" of the processor's integers. To this end the +"endianess" of the compiler's integers. To this end the `stdlib_32_bit_hash_codes` module defines the logical parameter -`LITTLE_ENDIAN` that, if true, indicates that the processor has little +`LITTLE_ENDIAN` that, if true, indicates that the compiler has little endian integers, and that if false indicates that the integers are big endian. @@ -509,7 +510,8 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 32 bit integer. +Calculates an `nbits` hash code from a 64 bit integer. This is useful +in mapping hash codes into small arrays. ##### Syntax @@ -530,7 +532,7 @@ Pure function ##### Result The result is an integer of kind `INT32` with at most the lowest -`nbits` nonzero. +`nbits` nonzero, mapping to a range 0 to `nbits-1`. ##### Note @@ -550,7 +552,7 @@ E. Knuth. It multiplies the `KEY` by the odd valued approximation to integer(int32) :: hash, source allocate( array1(0:2**6-1) ) array1(:) = 0 - source = int(Z'1FFFFFF', int32) + source = 42_int32 hash = fibonacci_hash(source, 6) azray1(hash) = source print *, hash @@ -961,7 +963,8 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 32 bit integer. +Calculates an `nbits` hash code from a 32 bit integer. This is useful +in mapping a hash value to a range 0 to `2**nbits-1`. ##### Syntax @@ -1060,9 +1063,12 @@ As a result it should give reasonable performance for typical hash table applications. This code passes the SMHasher tests. The `waterhash` is based on the `wyhash` of Wang Yi. -While `wyhash` has a number of bad seeds, depending on the version, +While `wyhash` has a number of bad seeds, where randomiaation of the +output is poor, so far testing has not found any bad seeds for `waterhash`. -It can have undefined behavior if the key is not word aligned. +It can have undefined behavior if the key is not word aligned, +i.e. some computer processors can only process a given size integer if +the address of the integer is a multiple of the integer size. ##### Example @@ -1119,9 +1125,9 @@ As `stdlib_64_bit_hash_codes` deals exclusively with 64 bit hash codes, ### The `LITTLE_ENDIAN` parameter In implementing hash functions it is sometimes necessary to know the -"endianess" of the processor's integers. To this end the +"endianess" of the compiler's integers. To this end the `stdlib_64_bit_hash_codes` module defines the logical parameter -`LITTLE_ENDIAN` that if true indicates that the processor has little +`LITTLE_ENDIAN` that if true indicates that the compiler has little endian integers, and that if false indicates that the integers are big endian. @@ -1136,7 +1142,8 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 64 bit integer. +Calculates an `nbits` hash code from a 64 bit integer. This is useful +in mapping hash codes into small arrays. ##### Syntax @@ -1156,8 +1163,8 @@ Pure function ##### Result -The result is a scalar integer of kind `INT64` with at most the lowest -`nbits` nonzero. +The result is an integer of kind `INT64` with at most the lowest +`nbits` nonzero, mapping to a range 0 to `nbits-1`. ##### Note @@ -1513,7 +1520,7 @@ The result is a two element integer vector of kind `INT64`. `SPOOKY_HASH` is an implementation of the 64 bit version 2 of SpookyHash of Bob Jenkins. The code was designed for Little-Endian -processors. The output is different on Big Endian processors, but still +compilers. The output is different on Big Endian compilers, but still probably as good quality. It is often used as a 64 bit hash using the first element of the returned value, but can be used as a 128 bit hash. This version of `SPOOKY_HASH` has good performance on small keys @@ -1546,7 +1553,8 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 64 bit integer. +Calculates an `nbits` hash code from a 64 bit integer. This is useful +in mapping a hash value to a range 0 to `2**nbits-1`. ##### Syntax @@ -1606,8 +1614,15 @@ It multiplies the `KEY` by `SEED`, and returns the ### Test Codes -The Fortran Standard Library provides two test codes for the hash -functions of `stdlib_32_bit_hash_functions` and +The Fortran Standard Library provides two categories of test +codes. One ccategory is tests of the relative performance of the +various hash functions. The other is a comparison of the outputs of +the Fortran hash functions, with the outputs of the C and C++ hash +procedures that are the inspiration for the Fortran hash functions. + +In the `src/test/hash_functions` subdirectory, the Fortran Standard +Library provides two performance test codes for +the hash functions of `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions`, `test_32_bit_hash_performance` and `test_64_bit_hash_performance` respectively. These are primarily set up to test runtime performance of the functions. They take a sample of @@ -1709,3 +1724,21 @@ hashing is intermittent. If hashing is intermittent then that can more severely impact the performance of `nmhash32`, `nmhash32x`, `water_hash`, `pengy_hash`, and `spooky_hash` relative to `fnv_1_hash` and `fnv_1a_hash`. + +In the `src/test/hash_functions/validation` subdirectory, the Fortran +Standard Library implements three executables to test the validity of +the Fortran codes against the original C and C++ codes. The tree +executables must be compiled manually using the makefile +`Makefile.validation`, and the the compiler suite used must be +GCC's. The first executable, `generate_key_array` is +based on Fortran code, and generates a random sequence of 2048 +integers of kind `INT8`, and stores that sequence in the binary file +`key_array.bin`. The second executable, `generate_hash_arrays`, reads +the values in `key_array.bin`, and, for each complicated hash +procedure generates a corresponding binary file containing 2049 hash +values generated from the values in `key_array.bin`. The third +executsble, `hash_validity_test`, reads the binary files and for each +complicated hash procedure compares the contents of the binary file +with the results of calculating hash values using the corresponding +Fortran hash procedure on the same keys. These executables mus be run +manually in the same ordeer. From 96ed2e727b4155ba6e3d18a0c94665720c6c3787 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 27 Nov 2021 16:39:39 -0700 Subject: [PATCH 018/106] Renamed stdlib_hash_functions.md Renamed stdlib_hash_functions.md to stdlib_hash_procedures.md. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 1744 +++++++++++++++++++++++++++ 1 file changed, 1744 insertions(+) create mode 100755 doc/specs/stdlib_hash_procedures.md diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md new file mode 100755 index 000000000..01fa4e1ca --- /dev/null +++ b/doc/specs/stdlib_hash_procedures.md @@ -0,0 +1,1744 @@ +--- +title: Hash codes +--- + +# The `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions` modules + +[TOC] + +## Overview of hash functions + +The comparison of lexical entities or other objects for equality +can be computationally expensive. +This cost is often reduced by computing a near unique integer value, +termed a hash code, from the structure of the object using a procedure +termed a hash function. +Equality of hash codes is a necessary, but not sufficient, condition +for the original objects to be equal. +As integer comparisons are very efficient, performing an initial +comparison of hash codes and then performing a detailed comparison +only if the hash codes are equal can improve performance. +The hash codes, in turn, can be mapped to a smaller set of integers, +that can be used as an index, termed a hash index, to a rank one +array, often termed a hash table. +This mapping will be known as a scalar hash. +The use of a hash table reduces the number of hash codes that need to +be compared, further improving performance. +A hash function can also be used to generate a checksum to verify that +data has not changed. +The Fortran Standard Library therefore provides procedures to compute +hash codes and scalar hashes. +This document only discusses the hash codes and scalar hashes in the +library. + +## Licensing + +The Fortran Standard Library is distributed under the MIT License. +However components of the library may be based on code with additional +licensing restrictions. In particular, the hash codes are often based +on algorithms with additional restrictions on distribution. +The algorithms with such restrictions (`Fibonacci Hash`, `Universal +Multiplicative Hash`, +`FNV-1 Hash`, `FNV-1A Hash`, `nmhash32`, `nmhash32x`, `waterhash`, +`pengyhash` and `SpookyHash`) are discussed below. + +`FIBONACCI_HASH` is a scalar hash. It is an implementation in Fortran +2008 and signed two's complement integers of the Fibonacci Hash +described in D. E. Knuth, "The Art of +Computer Programming, Second Edition, Volume 3, Sorting and +Searching", Addison-Wesley, Upper Saddle River, NJ, +pp. 517-518, 1998. The algorithms in that source are considered public +domain, and its use is unrestricted. + +`UNIVERSAL_MULT_HASH` is a scalar hash. It is an implementation in +Fortran 2008 and signed two's complement integers of the +universal multiplicative hash algorithm of M. Dietzfelbinger, +T. Hagerup, J. Katajainen, and M. Penttonen, "A Reliable Randomized +Algorithm for the Closest-Pair Problem," J. Algorithms, Vol. 25, +No. 1, Oct. 1997, pp. 19-51. Because of its publication in the Journal +of Algorithms, the universal multiplicative hash algorithm is public +domain. + +`FNV_1_HASH` and `FNV_1A_HASH` are translations to Fortran 2008 and +signed two's complement integers of the +`FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, +and Phong Vo, that has been released into the public +domain. Permission has been granted, by Landon Curt Noll, for the use +of these algorithms in the Fortran Standard Library. A description of +these functions is available at +. +These functions have been modified from their normal forms to also +encode the structure size in the output hash. + +Similarly `SPOOKY_HASH` and associated procedures are translations to +Fortran 2008 and signed two's complement integers of the unsigned 64 +bit version 2 `SpookyHash` functions of Bob +Jenkins to signed 64 +bit operations. Version 2 was chosen over version 1 as it has better +performance and fewer bad seeds +Bob Jenkins has also put this code in the public +domain and has given permission to treat this code as public domain in +the USA, provided the code can be used under other licenses and he is +given appropriate credit. + +`NMHASH32` and `NMHASH32x` are translations to Fortran 2008 and signed +two's complement integers of the unsigned 32 bit +hashes of James Z. M. Gao's `nmhash32` and `nmhash32x` version of 0.2, + +James Z. M. Gao has released his code under the BSD 2 Clause +License. The BSD 2-Clause license is as follows: + + BSD 2-Clause License + + Copyright (c) 2021, James Z.M. Gao + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +`WATER_HASH` is a translation to Fortran 2008 and signed two's +complement integers of the `waterhash` algorithm +of Tommy Ettinger. This algorithm is inspired by the Wy Hash of +Wang Yi. Tommy Ettinger's original C++ code, `waterhash.h`, +is available at URL: under +the `unlicense`, +. +The `unlicense` reads as follows: + + This is free and unencumbered software released into the public domain. + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to + +`PENGY_HASH` is a translation to Fortran 2008 and signed two's +complement arithmetic of the `pengyhash` algorithm of Alberto Fajardo, +copyright 2020. Alberto Fajardo's original C code, `pengyhash.c`, is +available at the URL: +https://github.com/tinypeng/pengyhash/blob/master/pengyhash.c +under the BSD 2-Clause License: +https://github.com/tinypeng/pengyhash/blob/master/LICENSE + +The BSD 2-Clause license is as follows: + + BSD 2-Clause License + + pengyhash + Copyright (c) 2020 Alberto Fajardo + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +## Glossary + +There are a few words used in this document that may not be familiar to +readers of this document: + +* Key - a value to be used to find entries in a hash table typically + using its hashed value for the initial search; + +* Salt - see seed, and; + +* Seed - an additional argument to a hash function that changes its + output making some attacks impractical. + + +## The hash codes modules + +### Overview of the modules + +The Standard Library provides two modules implementing hash +functions and scalar hashes. +The `stdlib_32_bit_hash_functions` module provides procedures to +compute 32 bit integer hash codes and a scalar hash. +The hash codes can be used for tables of up to `2**30` entries, and +for keys with a few hundred elements, but performance has only been +tested for tables up to `2**16` entries and performance may degrade +for larger numbers of entries. +The `stdlib_64_bit_hash_functions` module provides hash procedures to +compute 64 bit integer hash codes and a scalar hash. +The hash codes can, in principle, be used for tables of up to `2**62` +entries, and for keys with a few thousand elements, but testing of +performance has only been been for tables up to `2**16`elements and +performance may degrade for larger numbers of entries. +While one of the codes in `stdlib_64_bit_hash_functions`, +`SPSOOKY_HASH`, can also be used to calculate 128 bit hash codes, none +of the current codes can be used to calculate 256 bit hash codes. +Such larger hash codes are useful for larger hash tables and keys, and +for checksums. +Such larger keys and tables are little used, if used at all, in +current +Fortran codes, but the larger hash codes may be added to the library +if there is a demand for them. + +Hash functions are often divided into two categories +"cryptographic" and "non-cryptographic". +Cryptographic hash functions produce codes that are infeasible to +reverse without additional information beyond the identity of +the hash function used to generate the code and the resulting codes. +Non-cryptographic codes, in some circumstances, are believed to be +reversible. +The modules only implement hash +functions that are believed to be non-cryptographic, with +implementations available in the public domain. + +There are a number of algorithms available for the computation of +non-cryptographic 32 and 64 bit hash codes that differ in their +computational complexity, +their relative performance on different size keys, and the +expected uniqueness (randomness) of the resulting hash codes. +Their relative performance in the analysis of text, in particular, +can depend on the compiler, character set, language, and content. +The quality of a hash function is often evaluated using +the SMHasher test suite, originally written by +[Austin Appleby](https://github.com/aappleby/smhasher), but greatly +extended by [Reini Urban](https://github.com/rurban/smhasher). +All except the simplest, `FNV_1` and `FNV_1A`, of the hash functions +defined in the modules perform well on the tests in Reini Urban's +version of SMHasher. + +There are two problems in implementing hash functions in Fortran. +First, the static typing of Fortran makes it awkward to define general +purpose hash functions. +Instead hash functions are defined for some of the more common +objects: character strings and rank one arrays of integers. +Other objects can, in principle, be hashed by using `transfer` to +map their contents to an integer array, typically one of kind `INT8`. +The other problem is that hash codes are typically defined using +modular unsigned integer arithmetic. +As such integers are not part of the current Fortran standard, +workarounds have to be used. +These can take two forms. +In one, the operations are emulated by using an integer of a +larger size, or, for the larger integers, by dividing the integer into +two lower and higher order halves, +and performing the operations on each half separately using +the larger integers. +In the second, the unsigned integers may be replaced directly by +the corresponding signed integers, but +otherwise not modifying the the code logic. +The first should be standard conforming on current compilers, but +is more computationally intensive unless the compilers recognize +underlying idioms that are rarely used in Fortran codes. The second is +not standard conforming as bit operations involving the sign are +undefined, +but should yield equivalent results with fewer operations on +compilers with two's complement integers that do not trap on over +or under flow. The codes currently use the second method. + +In order to compile the hash function modules, the compilers must +implement much of Fortran 2003, and selected components of Fortran +2008: submodules, 64 bit integers, and some bit intrinsics. +The main limitation on valid compilers is whether they +implement the submodules enhancement of Fortran 2008. +In order to properly run the hash functions, the compilers must +use two's complement integers, and be able to execute them with +wraparound semantics and no integer overflow exceptions. +Current Fortran 2003+ compilers solely use two's complement +integers, and appear to be able to turn off overflow detection, +so the modules use signed integer arithmetic. For that reason +trapping on signed arithmetic must be disabled. The command line +flags to disable overflow detection for compilers implementing +submodules are summarized in the table below. +Note that FLANG, gfortran, ifort, and NAG all default to +integer overflow wrapping. + +|Compiler|Legal flag|Illegal flag|Default| +|---------|----------|------------|-------| +| ARM Fortran | NA? | NA? | overflow wrapping? | +| Cray Fortran | NA? | NA? | overflow wrapping? | +| FLANG/PGI | -fwrapv | -ftrapv | -fwrapv | +| gfortran | -fwrapv | -ftrapv | -fwrapv | +| IBM Fortran | NA? | NA? | overflow wrapping? | +| ifort| NA? | NA? | overflow wrapping | +| NAG Fortran | -C=none | -C=intovf | -C=none | +| NEC Fortran | NA? | NA? | overflow wrapping? | +| NVIDIA Fortran | NA? | NA? | overflow wrapping? | + +All of the modules' hash functions take one or two arguments. +All of them have as their first argument the object to be hashed, +termed a *key*. +Most have a second argument, termed a *seed*, that sets the initial +value of the hash code changing the hash function behavior. +In particular, inputs that hash to the same hash index with a given +seed, will often hash to different indexes with a different seed. +This difference in behavior makes algorithms that use a seed much +more resistant to denial of service attacks that use the properties +of a known hash to increase the number of hash table collisions. +This additional integer must be kept the same for all hashes +in a given hash table, but can be changed and the objects rehashed +if collisions are unusually common. +The *seed* can be either a scalar or a two element array. +Some of the hash functions have alternatives that allow incremental +hashing. + +|Algorithm|Seed|Result| +|---------|----|------| +|FNV-1|None|32 or 64 bit integer| +|FNV-1a|None|32 or 64 bit integer| +|nmhash32 |32 bit scalar integer|32 bit integer| +|nmhash32x |32 bit scalar integer|32 bit integer| +|pengyhash |32 bit scalar integer|64 bit integer| +|Spooky Hash|64 bit two element vector|64 bit two element vector| +|waterhash|64 bit scalar integer|32 bit integer| + +The hash function modules each provide at least five algorithms for +hash functions: two optimized for small (< 32 `INT8` integer elements) +keys, and three optimized for large (> 100 `INT8` integer elements) +keys. +The core implementation for each algorithm is for keys that are +vectors of `INT8` integers. +These core implementations are then used in wrappers for keys +that are vectors of `INT16`, `INT32` and `INT64` integers, or default +character strings, in the expectation that inlining will eliminate the +overhead of transferring the other keys to `INT8` integer vectors. + +The `stdlib_32_bit_hash_functions` module provides +implementations of five hash code algorithms: +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +Landon Curt Noll, and Kiem-Phong Vo; +the *nmhash32* and *nmhash32x* of James Z. M. Gao; +and the *waterhash* of Tommy Ettinger. +The detailed implementation of each algorithm is handled in a separate +submodule: `stdlib_32_bit_fnv_hashes`, +`stdlib_32_bit_nmhashes`, and `stdlib_32_bit_water_hashes`, +respectively. The `nmhash32`, `nmhash32x`, and `waterhash` algorithms +require seeds. The submodules provide separate seed generators +for each algorithm. +The module itself +implements two scalar hash functions, `FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`. +It also implements the subroutine, `ODD_RANDOM_INTEGER`, for +generating seeds for `UNIVERSAL_MULT_HASH`. +All assume a two's complement sign bit, and no out of +range checks. + +The `stdlib_64_bit_hash_functions` module also provides +implementations of four hash code algorithms: +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +Landon Curt Noll, and Kiem-Phong Vo; +the *pengynash* of Alberto Fajardo; +and the *SpookyHash* of Bob Jenkins. +The detailed implementation of each algorithm is handled in a separate +submodule: `stdlib_64_bit_fnv_hashes`, +`stdlib_64_bit_pengy_hashes`, and `stdlib_64_bit_spooky_hashes`, +respectively. +The `pengyhash`, and `Spooky Hash` algorithms +require seeds. The submodules provide separate seed generators +for each algorithm. +The module itself implements two scalar hash functions, +`FIBONACCI_HASH` and `UNIVERSAL_MULT_HASH`. +It also implements the subroutine, `ODD_RANDOM_INTEGER`, for +generating seeds for `UNIVERSAL_MULT_HASH`. +All assume a two's complement sign bit, and no out of +range checks. + +The `stdlib_32_bit_fnv_hashes` and `stdlib_64_bits_fnv_hashes` +submodules each provide implementations of the FNV-1 and FNV-1A +algorithms in the form of two separate overloaded functions: `FNV_1` +and `FNV_1A`. +The FNV-1 and FNV-2 algorithms differ in their order of the +multiplication and exclusive or operations. +They differ from their normal implementation in that they also +encode the structure size in the hash code. +The 32 and 64 bit algorithms differ in their initial offsets and in +their multiplicative constants. +Analysis suggests that `FNV_1A` should be better at randomizing the +input, but tests with hash tables show negligible difference. +These algorithms have the reputation of being particularly useful for +small byte strings, i.e., strings of less than 32 bytes. +While they do not at all perform well on the SMHasher test suite, +usage indicates that that that this has little impact on the +performance of small hash tables, and the small size of the functions +allows their quick loading and retainment in the instruction cache, +givng a performance boost where the hashing is intermittent. +(See the +[SMHasher discussion](https://github.com/rurban/smhasher/README.md) +and S. Richter, V. Alvarez, and J. Dittrich, +["A Seven-Dimensional Analysis of Hashing Methods and its Implications on Query Processing"](https://bigdata.uni-saarland.de/publications/p249-richter.pdf). + +The `stdlib_32_bit_nmhashes` submodule provides implementations +of James Z.M. Gao's `nmhash32` and `nmhash32x` algorithms, +version 0.2, +in the form of the overloaded functions, `NMHASH32` and `NMHASH32X`. +The implementations are based on the scalar versions of Gao's +algorithms and not the vector versions that require access to +the vector instructions of some compilers. +Both algorithms perform well on the SMHasher tests, and have no known +bad seeds. The vector versions of both codes perform well on large +keys, with the `nmhash32x` faster on short keys. To provide randomly +generated seeds for the two functions the submodule also defines the +subroutines `NEW_NMHASH32_SEED` and `NEW_NMHASH32X_SEED`. Gao claims +that `NMHASH32X` is significantly faster than `NMHASH32` on short +seeds, but slower on long seeds, but our limited testing so far shows +`NMHASH32X` to be significantly faster on short seeds and slightly +faster on long seeds. + +The `stdlib_32_bit_water_hashes` submodule provides implementations +of Tommy Ettinger's `waterhash` algorithm in the form of the overloaded +function, `WATER_HASH`. Water Hash has not been tested by Reini Urban, +but Tommy Ettinger has tested it with Urban's SMHasher and presents +results that shows Water Hash passing all the tests. So far his +testing hasn't found any bad seeds for the algorithm. To provide +randomly generated seeds for the hash function the submodule also +defines the subroutine `NEW_WATER_HASH_SEED`. + +The `stdlib_64_bit_pengy_hashes` submodule provides implementations of +Alberto Fajardo's `pengyhash` in the form of the overloaded function, +`PENGY_HASH`. Reini Urban's testing shows that PengyHash passes all +the tests and has no bad seeds. To provide randomly generated seeds +for the hash function the submodule also defines the subroutine +`NEW_PENGY_HASH_SEED`. + +The `stdlib_64_bit_spooky_hashes` submodule provides implementations +of Bob Jenkins' SpookyHash in the form of the overloaded function, +`SPOOKY_HASH`. Future implementations may provide the SpookyHash +incremental hashing procedures. +SpookyHash is optimized for large objects and should give excellent +performance for objects greater than about 96 byes, but has +significant overhead for smaller objects. +The code was designed for Little Endian compilers, and will give +different results on Big Endian compilers, but the hash quality on +those compilers is probably just as good. +SpookyHash version 2 passes all of Reini Urban's SMHasher tests, and +has one bad seed only when reduced to a 32 bit output. +Its only potential problem is undefined behavior if the key is +misaligned. + +## The `stdlib_32_bit_hash_codes` module + +### Overview of the module + +Thirty two bit hash functions are primarily useful for generating hash +codes and hash indices for hash tables. +They tend to be less useful for generating checksums, which generally +benefit from having a larger number of bits. +The `stdlib_32_bit_hash_codes` module defines five public overloaded +32 bit hash code functions, `FNV_1`, `FNV-1A`, `NMHASH32`, `NMHASH32x` +and `WATER_HASH`, two scalar hash functions, `FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`, four seed generators, `ODD_RANDOM_INTEGER` for +`UNIVERSAL_MULT_HASH`, and `NEW_NMHASH32_SEED`, `NEW_NMHASH32X_SEED`, +and `NEW_WATER_HASH_SEED`, for their respective hash code +functions. It also defines the integer kind constant, `INT_HASH`, and +a logical constant, `LITTLE_ENDIAN`, used to deal with one aspect of +the machine dependence of the hash codes. + +### The `INT_HASH` parameter + +It is necessary to define the kind of integer used to return the hash +code. +As `stdlib_32_bit_hash_codes` deals exclusively with 32 bit hash codes, +`INT_HASH` is an alias for the integer kind `INT32`. + +### The `LITTLE_ENDIAN` parameter + +In implementing hash functions it is sometimes necessary to know the +"endianess" of the compiler's integers. To this end the +`stdlib_32_bit_hash_codes` module defines the logical parameter +`LITTLE_ENDIAN` that, if true, indicates that the compiler has little +endian integers, and that if false indicates that the integers are big +endian. + +### Specifications of the `stdlib_32_bit_hash_codes` procedures + +#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 64 bit integer. This is useful +in mapping hash codes into small arrays. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fibonacci_hash]]( key, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +32`. It is an `intent(in)` argument. + +##### Result + +The result is an integer of kind `INT32` with at most the lowest +`nbits` nonzero, mapping to a range 0 to `nbits-1`. + +##### Note + +`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`2**32/phi`, where `phi` is the golden ratio 1.618..., and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_fibonacci_hash + use stdlib_32_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = 42_int32 + hash = fibonacci_hash(source, 6) + azray1(hash) = source + print *, hash + end program demo_fibonacci_hash +``` + +#### `FNV_1_HASH`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fnv_1_hash]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1_HASH` is an implementation of the original FNV-1 hash code of Glenn +Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + + +##### Example + +```fortran + program demo_fnv_1_hash + use stdlib_32_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1_hash(array1) + print *, hash + end program demo_fnv_1_hash +``` + + +#### `FNV_1A_HASH`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fnv_1a_hash]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1A_HASH` is an implementation of the alternative FNV-1a hash code of +Glenn Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + +##### Example + +```fortran + program demo_fnv_1a_hash + use stdlib_32_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1a_hash(array1) + print *, hash + end program demo_fnv_1a_hash +``` + + +#### `NEW_NMHASH32_SEED`- returns a valid input seed for `NMHASH32` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `NMHASH32` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_nmhash32_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `NMHASH32`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `NMHASH32`. + + +#### `NEW_NMHASH32X_SEED`- returns a valid input seed for `NMHASH32X` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `NMHASH32X` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_nmhash32x_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `NMHASH32X`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `NMHASH32X`. + + +#### `NEW_WATER_HASH_SEED`- returns a valid input seed for `WATER_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit "random" integer that is believed to be a valid +seed for `WATER_HASH` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_water_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT64`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `WATER_HASH`, but if any +are identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + + +##### Example + +See the example for `WATER_HASH`. + + +#### `NMHASH32`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:nmhash32]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT32`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`NMHASH32` is an implementation of the `nmhash32` hash code of +James Z. M. Gao. +This code has good, but not great, performance on long keys, poorer +performance on short keys. +As a result it should give fair performance for typical hash table +applications. +This code passes the SMHasher tests, and has no known bad seeds: + +##### Example + +```fortran + program demo_nmhash32 + use stdlib_32_bit_hash_codes, only: nmhash32, & + new_nmhash32_seed + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int32) :: seed = int(Z'11111111`, int32) + call new_nmhash32_seed(seed) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = nmhash32(array1, seed) + print *, seed, hash + end program demo_nmhash32 +``` + + +#### `NMHASH32X`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:nmhash32x]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT32`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`NMHASH32X` is an implementation of the `nmhash32x` hash code of +James Z. M. Gao. +This code has good, but not great, performance on long keys, poorer +performance on short keys. +As a result it should give fair performance for typical hash table +applications. +This code passes the SMHasher tests, and has no known bad seeds: + +##### Example + +```fortran + program demo_nmhash32x + use stdlib_32_bit_hash_codes, only: nmhash32x, & + new_nmhash32x_seed + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int32) :: seed = int(Z'11111111`, int32) + call new_nmhash32x_seed(seed) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = nmhash32x(array1, seed) + print *, seed, hash + end program demo_nmhash32x +``` + +#### `ODD_RANDOM_INTEGER` - returns an odd integer + +##### Status + +Experimental + +##### Description + +Returns a random 32 bit integer distributed uniformly over the odd values. + +##### Syntax + +`call [[stdlib_32_bit_hash_codes:odd_random_integer]]( harvest )` + +##### Class + +Subroutine + +##### Argument + +`harvest`: Shall be a scalar integer variable of kind `INT32`. It is +an `intent(out)` argument. + +##### Note + +`ODD_RANDOM_INTEGER` is intended to generate seeds for + `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See `UNIVERSAL_MULT_HASH`. + + +#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 32 bit integer. This is useful +in mapping a hash value to a range 0 to `2**nbits-1`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. + +`seed`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. It must have an odd value. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +32`. It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32` with at most the lowest +`nbits` nonzero. + +##### Note + +`UNIVERSAL_MULT_HASH` is an implementation of the Universal +Multiplicative Hash of M. Dietzfelbinger, et al. +It multiplies the `KEY` by `SEED`, and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + do i = 0, 2**6-1 + array(i) = i + end do + call odd_random_integer( seed ) + source = int(Z'1FFFFFF', int32) + hash = universal_mult_hash(source, seed, 6) + azray1(hash) = source + print *, seed, hash, array1 + end program demo_odd_random_integer +``` + +#### `WATER_HASH`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:water_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`WATER_HASH` is an implementation of the `waterhash` hash code of +Tommy Ettinger. +This code has excellent performance on long keys, and good performance +on short keys. +As a result it should give reasonable performance for typical hash +table applications. +This code passes the SMHasher tests. +The `waterhash` is based on the `wyhash` of Wang Yi. +While `wyhash` has a number of bad seeds, where randomiaation of the +output is poor, +so far testing has not found any bad seeds for `waterhash`. +It can have undefined behavior if the key is not word aligned, +i.e. some computer processors can only process a given size integer if +the address of the integer is a multiple of the integer size. + +##### Example + +```fortran + program demo_water_hash + use stdlib_32_bit_hash_codes, only: water_hash, & + new_water_hash_seed + use iso_fortran_env, only: int32, int64 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int64) :: seed = int(Z'11111111`, int64) + call new_water_hash_seed( seed ) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = water_hash(array1, seed) + print *, hash, seed + end program demo_water_hash +``` + +## The `stdlib_64_bit_hash_codes` module + +### Overview of the module + +Sixty four bit hash functions are generally overkill for hash table +applications, and are primarily useful for check sums and related +applications. +As checksums often have to deal with extremely large files or +directories, it is often useful to use incremental hashing as well as +direct hashing, so 64 bit and higher hash algorithms often provide +multiple implementations. The current module, for simplicity of API, +doesn't provide any incremental hashes. +The `stdlib_64_bit_hash_codes` module defines several public +overloaded 64 bit hash procedures, `FNV_1`, `FNV-1A`, +`PENGY_HASH`, and `SPOOKY_HASH`, two scalar hash functions, +`FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`, a seed generator, `ODD_RANDOM_INTEGER`, for the +`UNIVERSAL_MULT_HASH`, and two seed generators, `NEW_PENGY_HASH_SEED` +and `NEW_SPOOKY_HASH_SEED` for their respective hash functions. It +also defines the integer kind constant, `INT_HASH`, used to specify +the kind of the hash function results, and a logical constant, +`LITTLE_ENDIAN`, used to deal with one aspect of the machine +dependence of the hash codes. +Note that while SpookyHash can be used as a sixty four bit hash +algorithm, its algorithms actually returns two element integer arrays +of kind `INT64`, so it can also be used as a 128 bit hash. + +### The `INT_HASH` parameters + +It is necessary to define the kind of integer used to return the hash +code. +As `stdlib_64_bit_hash_codes` deals exclusively with 64 bit hash codes, +`INT_HASH` is an alias for the integer kind `INT64`. + +### The `LITTLE_ENDIAN` parameter + +In implementing hash functions it is sometimes necessary to know the +"endianess" of the compiler's integers. To this end the +`stdlib_64_bit_hash_codes` module defines the logical parameter +`LITTLE_ENDIAN` that if true indicates that the compiler has little +endian integers, and that if false indicates that the integers are big +endian. + + +### Specifications of the `stdlib_64_bit_hash_codes` procedures + +#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 64 bit integer. This is useful +in mapping hash codes into small arrays. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fibonacci_hash]]( key, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT64`. It is an +`intent(in)` argument. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +64`. It is an `intent(in)` argument. + +##### Result + +The result is an integer of kind `INT64` with at most the lowest +`nbits` nonzero, mapping to a range 0 to `nbits-1`. + +##### Note + +`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`2**64/phi`, where `phi` is the golden ratio 1.618..., and returns the +`nbits` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_fibonacci_hash + use stdlib_64_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = int(Z'1FFFFFFFF', int64) + hash = fibonacci_hash(source, 6) + azray1(hash) = source + print *, hash + end program demo_fibonacci_hash +``` + +#### `FNV_1`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fnv_1]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT64`. + +##### Note + +`FNV_1` is an implementation of the original FNV-1 hash code of Glenn +Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications, although it is rare for them to need 64 bits. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + + +##### Example + +```fortran + program demo_fnv_1_hash + use stdlib_64_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1_hash(array1) + print *, hash + end program demo_fnv_1_hash +``` + + +#### `FNV_1A`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fnv_1a]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1A` is an implementation of the alternative FNV-1a hash code of +Glenn Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + +##### Example + +```fortran + program demo_fnv_1a_hash + use stdlib_64_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1a_hash(array1) + print *, hash + end program demo_fnv_1a_hash +``` + + +#### `NEW_PENGY_HASH_SEED`- returns a valid input seed for `PENGY_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `PENGY_HASH` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_pengy_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `PENGY_HASH`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `PENGY_HASH`. + + +#### `NEW_SPOOKY_HASH_SEED`- returns a valid input seed for `SPOOKY_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit two element vector of "random" integer values that +is believed to be a valid seed for `SPOOKY_HASH` and is also different +from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_spooky_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined two element integer vector variable of kind +`INT32`. It is an `intent(inout)` argument. On input `seed` should be +defined, and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `SPOOKY_HASH`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `SPOOKY_HASH`. + + +#### `ODD_RANDOM_INTEGER` - returns odd integer + +##### Status + +Experimental + +##### Description + +Returns a random 64 bit integer distributed uniformly over the odd values. + +##### Syntax + +`call [[stdlib_64_bit_hash_codes:odd_random_integer]]( harvest )` + +##### Class + +Subroutine + +##### Argument + +`harvest`: Shall be an integer of kind `INT64`. It is an `intent(out)` +argument. + +##### Note + +`ODD_RANDOM_INTEGER` is intended to generate seeds for + `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See `UNIVERSAL_MULT_HASH`. + + +#### `PENGY_HASH` - maps a character string or integer vector to an integer + +##### Status + +Experimental + +##### Description + +Maps a character string or integer vector to a 64 bit integer whose +value also depends on a scalar 32 bit integer, `seed`. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:pengy_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: shall be a scalar expression of type default character or a +Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +`INTT64`. It is an `intent(in)` argument. + +`seed`: shall be an integer ex of kind `INT64`. It ispression +an `intent(in)` argument. + +##### Result + +The result is an integer of kind `INT64`. + +##### Note + +`PENGY_HASH` is an implementation of the 64 bit `pengyhash` of Alberto +Fajardo. The hash has acceptable performance on small keys, and good +performance on long keys. It passes all the SMHasher tests, and has +no known bad seeds. + +##### Exampl + +```fortran + program demo_pengy_hash + use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash + integer(int32) :: seed + key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + seed = 0_int32 + call new_pengy_hash_seed( seed ) + hash = pengy_hash( key, seed ) + print *, seed, hash + end program demo_pengy_hash +``` + + +#### `SPOOKY_HASH` - maps a character string or integer vector to an integer + +##### Status + +Experimental + +##### Description + +Maps a character string or integer vector to a 64 bit integer whose +value also depends on a two element vector, `seed`. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:spooky_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: shall be a scalar of type default character expression or a +Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +`INTT64`. It is an `intent(in)` argument. + +`seed`: shall be a two element integer vector expression of kind +`INT64`. It is an `intent(in)` argument. + +##### Result + +The result is a two element integer vector of kind `INT64`. + +##### Note + +`SPOOKY_HASH` is an implementation of the 64 bit version 2 of +SpookyHash of Bob Jenkins. The code was designed for Little-Endian +compilers. The output is different on Big Endian compilers, but still +probably as good quality. It is often used as a 64 bit hash using the +first element of the returned value, but can be used as a 128 bit +hash. This version of `SPOOKY_HASH` has good performance on small keys +and excellent performance on long keys. It passes all the SMHasher tests +and has no known bad seeds. + +##### Example + +```fortran + program demo_spooky_hash + use stdlib_64_bit_hash_codes, only: new_spooky_hash_seed, & + spooky_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash(2), seed(2), source + key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + seed = [ 119_int64, 2_int64**41-1 ] + call new_spooky_hash_seed( seed ) + hash = spooky_hash( key, seed ) + print *, seed, hash + end program demo_spooky_hash +``` + +#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 64 bit integer. This is useful +in mapping a hash value to a range 0 to `2**nbits-1`. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be an integer of kind `INT64`. It is an `intent(in)` +argument. + +`seed`: Shall be an integer of kind `INT64`. It is an `intent(in)` +argument. It should be an odd value. + +`nbits` Shall be a default integer with `0 < nbits < 64`. It is an +`intent(in)` argument. It must be an odd integer. + +##### Result + +The result is an integer of kind `INT64` with at most the lowest +`nbits` nonzero. + +##### Note + +`UNIVERSAL_MULT_HASH` is an implementation of the Universal +Multiplicative Hash of M. Dietzfelbinger, et al. +It multiplies the `KEY` by `SEED`, and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + + +```fortran + program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + do i = 0, 2**6-1 + array(i) = i + end do + call odd_random_integer( seed ) + source = int(Z'1FFFFFF', int64) + hash = universal_mult_hash(source, seed, 6) + azray1(hash) = source + print *, seed, hash, array1 + end program demo_universal_mult_hash +``` + + +### Test Codes + +The Fortran Standard Library provides two categories of test +codes. One ccategory is tests of the relative performance of the +various hash functions. The other is a comparison of the outputs of +the Fortran hash functions, with the outputs of the C and C++ hash +procedures that are the inspiration for the Fortran hash functions. + +In the `src/test/hash_functions` subdirectory, the Fortran Standard +Library provides two performance test codes for +the hash functions of `stdlib_32_bit_hash_functions` and +`stdlib_64_bit_hash_functions`, `test_32_bit_hash_performance` and +`test_64_bit_hash_performance` respectively. These are primarily set +up to test runtime performance of the functions. They take a sample of +`2**18` integers of kind `INT8` and break it up into vectors of size +1, 2, 4, 8, 16, 64, 256, and 1024 elements, yielding `2**18`, +`2**17`, `2**16`, `2**15`, `2**14`, `2**12`, `2**10`, and `2**8` +vectors respectively. These are then processed by the hash functions +4 times, and the time for processing is reported. Testing so far has +been on a MacBook Pro with a 2.3 GHz Quad-Core Intel Core i5 and 8 GB +2133 MHz LPDDR3 of RAM, using GNU Fortran (GCC) 11.1.0 to compile the +code. The results for `test_32_bit_hash_performance` is given by the +following table: + +| Algorithm | Key Size | Key # | Time (s) | +| | Bytes | | | +|------------|-----------|------------|----------| +| FNV-1 | 1 | 1048576 | 0.02949 | +| FNV-1 | 2 | 524288 | 0.02361 | +| FNV-1 | 4 | 262144 | 0.02016 | +| FNV-1 | 8 | 131072 | 0.01806 | +| FNV-1 | 16 | 65536 | 0.01867 | +| FNV-1 | 64 | 16384 | 0.01717 | +| FNV-1 | 256 | 4096 | 0.01759 | +| FNV-1 | 1024 | 1024 | 0.01659 | +| FNV-1a | 1 | 1048576 | 0.02897 | +| FNV-1a | 2 | 524288 | 0.02472 | +| FNV-1a | 4 | 262144 | 0.02025 | +| FNV-1a | 8 | 131072 | 0.01901 | +| FNV-1a | 16 | 65536 | 0.01898 | +| FNV-1a | 64 | 16384 | 0.01784 | +| FNV-1a | 256 | 4096 | 0.01723 | +| FNV-1a | 1024 | 1024 | 0.01673 | +| nmhash32 | 1 | 1048576 | 0.31092 | +| nmhash32 | 2 | 524288 | 0.16230 | +| nmhash32 | 4 | 262144 | 0.07815 | +| nmhash32 | 8 | 131072 | 0.04176 | +| nmhash32 | 16 | 65536 | 0.09261 | +| nmhash32 | 64 | 16384 | 0.04587 | +| nmhash32 | 256 | 4096 | 0.07238 | +| nmhash32 | 1024 | 1024 | 0.07263 | +| nmhash32x | 1 | 1048576 | 0.04294 | +| nmhash32x | 2 | 524288 | 0.02937 | +| nmhash32x | 4 | 262144 | 0.01096 | +| nmhash32x | 8 | 131072 | 0.00911 | +| nmhash32x | 16 | 65536 | 0.01291 | +| nmhash32x | 64 | 16384 | 0.00859 | +| nmhash32x | 256 | 4096 | 0.07373 | +| nmhash32x | 1024 | 1024 | 0.07618 | +| water | 1 | 1048576 | 0.12560 | +| water | 2 | 524288 | 0.06302 | +| water | 4 | 262144 | 0.04020 | +| water | 8 | 131072 | 0.01999 | +| water | 16 | 65536 | 0.01459 | +| water | 64 | 16384 | 0.00923 | +| water | 256 | 4096 | 0.00816 | +| water | 1024 | 1024 | 0.00792 | + +while for `test_64_bit_hash_performance` the results are: + +| Algorithm | Key Size | Key # | Time (s) | +| | Bytes | | | +|------------|-----------|------------|----------| +| FNV-1 | 1 | 1048576 | 0.02981 | +| FNV-1 | 2 | 524288 | 0.02697 | +| FNV-1 | 4 | 262144 | 0.02275 | +| FNV-1 | 8 | 131072 | 0.02431 | +| FNV-1 | 16 | 65536 | 0.02158 | +| FNV-1 | 64 | 16384 | 0.02007 | +| FNV-1 | 256 | 4096 | 0.01932 | +| FNV-1 | 1024 | 1024 | 0.02089 | +| FNV-1a | 1 | 1048576 | 0.03226 | +| FNV-1a | 2 | 524288 | 0.03076 | +| FNV-1a | 4 | 262144 | 0.02359 | +| FNV-1a | 8 | 131072 | 0.02542 | +| FNV-1a | 16 | 65536 | 0.02364 | +| FNV-1a | 64 | 16384 | 0.02130 | +| FNV-1a | 256 | 4096 | 0.01962 | +| FNV-1a | 1024 | 1024 | 0.01966 | +| Pengy | 1 | 1048576 | 0.24294 | +| Pengy | 2 | 524288 | 0.12066 | +| Pengy | 4 | 262144 | 0.06205 | +| Pengy | 8 | 131072 | 0.03138 | +| Pengy | 16 | 65536 | 0.01608 | +| Pengy | 64 | 16384 | 0.00669 | +| Pengy | 256 | 4096 | 0.00387 | +| Pengy | 1024 | 1024 | 0.00295 | +| Spooky | 1 | 1048576 | 0.11920 | +| Spooky | 2 | 524288 | 0.07478 | +| Spooky | 4 | 262144 | 0.03185 | +| Spooky | 8 | 131072 | 0.01468 | +| Spooky | 16 | 65536 | 0.01503 | +| Spooky | 64 | 16384 | 0.00440 | +| Spooky | 256 | 4096 | 0.00290 | +| Spooky | 1024 | 1024 | 0.00177 | + +As the tested function will typically reside in the instruction cache +these results do not include the costs of reloading the procedure if +hashing is intermittent. If hashing is intermittent then that can more +severely impact the performance of `nmhash32`, `nmhash32x`, +`water_hash`, `pengy_hash`, and `spooky_hash` relative to +`fnv_1_hash` and `fnv_1a_hash`. + +In the `src/test/hash_functions/validation` subdirectory, the Fortran +Standard Library implements three executables to test the validity of +the Fortran codes against the original C and C++ codes. The tree +executables must be compiled manually using the makefile +`Makefile.validation`, and the the compiler suite used must be +GCC's. The first executable, `generate_key_array` is +based on Fortran code, and generates a random sequence of 2048 +integers of kind `INT8`, and stores that sequence in the binary file +`key_array.bin`. The second executable, `generate_hash_arrays`, reads +the values in `key_array.bin`, and, for each complicated hash +procedure generates a corresponding binary file containing 2049 hash +values generated from the values in `key_array.bin`. The third +executsble, `hash_validity_test`, reads the binary files and for each +complicated hash procedure compares the contents of the binary file +with the results of calculating hash values using the corresponding +Fortran hash procedure on the same keys. These executables mus be run +manually in the same ordeer. From 87869ad07692c58b356532e30fd81a3c8b869815 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 27 Nov 2021 16:48:13 -0700 Subject: [PATCH 019/106] Deleted file Renamed stdlib_hash_functions.md to stdlib_hash_proceedures.md effectively deleting it. [ticket: X] --- doc/specs/stdlib_hash_functions.md | 1744 ---------------------------- 1 file changed, 1744 deletions(-) delete mode 100755 doc/specs/stdlib_hash_functions.md diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md deleted file mode 100755 index 01fa4e1ca..000000000 --- a/doc/specs/stdlib_hash_functions.md +++ /dev/null @@ -1,1744 +0,0 @@ ---- -title: Hash codes ---- - -# The `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions` modules - -[TOC] - -## Overview of hash functions - -The comparison of lexical entities or other objects for equality -can be computationally expensive. -This cost is often reduced by computing a near unique integer value, -termed a hash code, from the structure of the object using a procedure -termed a hash function. -Equality of hash codes is a necessary, but not sufficient, condition -for the original objects to be equal. -As integer comparisons are very efficient, performing an initial -comparison of hash codes and then performing a detailed comparison -only if the hash codes are equal can improve performance. -The hash codes, in turn, can be mapped to a smaller set of integers, -that can be used as an index, termed a hash index, to a rank one -array, often termed a hash table. -This mapping will be known as a scalar hash. -The use of a hash table reduces the number of hash codes that need to -be compared, further improving performance. -A hash function can also be used to generate a checksum to verify that -data has not changed. -The Fortran Standard Library therefore provides procedures to compute -hash codes and scalar hashes. -This document only discusses the hash codes and scalar hashes in the -library. - -## Licensing - -The Fortran Standard Library is distributed under the MIT License. -However components of the library may be based on code with additional -licensing restrictions. In particular, the hash codes are often based -on algorithms with additional restrictions on distribution. -The algorithms with such restrictions (`Fibonacci Hash`, `Universal -Multiplicative Hash`, -`FNV-1 Hash`, `FNV-1A Hash`, `nmhash32`, `nmhash32x`, `waterhash`, -`pengyhash` and `SpookyHash`) are discussed below. - -`FIBONACCI_HASH` is a scalar hash. It is an implementation in Fortran -2008 and signed two's complement integers of the Fibonacci Hash -described in D. E. Knuth, "The Art of -Computer Programming, Second Edition, Volume 3, Sorting and -Searching", Addison-Wesley, Upper Saddle River, NJ, -pp. 517-518, 1998. The algorithms in that source are considered public -domain, and its use is unrestricted. - -`UNIVERSAL_MULT_HASH` is a scalar hash. It is an implementation in -Fortran 2008 and signed two's complement integers of the -universal multiplicative hash algorithm of M. Dietzfelbinger, -T. Hagerup, J. Katajainen, and M. Penttonen, "A Reliable Randomized -Algorithm for the Closest-Pair Problem," J. Algorithms, Vol. 25, -No. 1, Oct. 1997, pp. 19-51. Because of its publication in the Journal -of Algorithms, the universal multiplicative hash algorithm is public -domain. - -`FNV_1_HASH` and `FNV_1A_HASH` are translations to Fortran 2008 and -signed two's complement integers of the -`FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, -and Phong Vo, that has been released into the public -domain. Permission has been granted, by Landon Curt Noll, for the use -of these algorithms in the Fortran Standard Library. A description of -these functions is available at -. -These functions have been modified from their normal forms to also -encode the structure size in the output hash. - -Similarly `SPOOKY_HASH` and associated procedures are translations to -Fortran 2008 and signed two's complement integers of the unsigned 64 -bit version 2 `SpookyHash` functions of Bob -Jenkins to signed 64 -bit operations. Version 2 was chosen over version 1 as it has better -performance and fewer bad seeds -Bob Jenkins has also put this code in the public -domain and has given permission to treat this code as public domain in -the USA, provided the code can be used under other licenses and he is -given appropriate credit. - -`NMHASH32` and `NMHASH32x` are translations to Fortran 2008 and signed -two's complement integers of the unsigned 32 bit -hashes of James Z. M. Gao's `nmhash32` and `nmhash32x` version of 0.2, - -James Z. M. Gao has released his code under the BSD 2 Clause -License. The BSD 2-Clause license is as follows: - - BSD 2-Clause License - - Copyright (c) 2021, James Z.M. Gao - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -`WATER_HASH` is a translation to Fortran 2008 and signed two's -complement integers of the `waterhash` algorithm -of Tommy Ettinger. This algorithm is inspired by the Wy Hash of -Wang Yi. Tommy Ettinger's original C++ code, `waterhash.h`, -is available at URL: under -the `unlicense`, -. -The `unlicense` reads as follows: - - This is free and unencumbered software released into the public domain. - Anyone is free to copy, modify, publish, use, compile, sell, or - distribute this software, either in source code form or as a compiled - binary, for any purpose, commercial or non-commercial, and by any - means. - - In jurisdictions that recognize copyright laws, the author or authors - of this software dedicate any and all copyright interest in the - software to the public domain. We make this dedication for the benefit - of the public at large and to the detriment of our heirs and - successors. We intend this dedication to be an overt act of - relinquishment in perpetuity of all present and future rights to this - software under copyright law. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - - For more information, please refer to - -`PENGY_HASH` is a translation to Fortran 2008 and signed two's -complement arithmetic of the `pengyhash` algorithm of Alberto Fajardo, -copyright 2020. Alberto Fajardo's original C code, `pengyhash.c`, is -available at the URL: -https://github.com/tinypeng/pengyhash/blob/master/pengyhash.c -under the BSD 2-Clause License: -https://github.com/tinypeng/pengyhash/blob/master/LICENSE - -The BSD 2-Clause license is as follows: - - BSD 2-Clause License - - pengyhash - Copyright (c) 2020 Alberto Fajardo - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS - BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - SUCH DAMAGE. - -## Glossary - -There are a few words used in this document that may not be familiar to -readers of this document: - -* Key - a value to be used to find entries in a hash table typically - using its hashed value for the initial search; - -* Salt - see seed, and; - -* Seed - an additional argument to a hash function that changes its - output making some attacks impractical. - - -## The hash codes modules - -### Overview of the modules - -The Standard Library provides two modules implementing hash -functions and scalar hashes. -The `stdlib_32_bit_hash_functions` module provides procedures to -compute 32 bit integer hash codes and a scalar hash. -The hash codes can be used for tables of up to `2**30` entries, and -for keys with a few hundred elements, but performance has only been -tested for tables up to `2**16` entries and performance may degrade -for larger numbers of entries. -The `stdlib_64_bit_hash_functions` module provides hash procedures to -compute 64 bit integer hash codes and a scalar hash. -The hash codes can, in principle, be used for tables of up to `2**62` -entries, and for keys with a few thousand elements, but testing of -performance has only been been for tables up to `2**16`elements and -performance may degrade for larger numbers of entries. -While one of the codes in `stdlib_64_bit_hash_functions`, -`SPSOOKY_HASH`, can also be used to calculate 128 bit hash codes, none -of the current codes can be used to calculate 256 bit hash codes. -Such larger hash codes are useful for larger hash tables and keys, and -for checksums. -Such larger keys and tables are little used, if used at all, in -current -Fortran codes, but the larger hash codes may be added to the library -if there is a demand for them. - -Hash functions are often divided into two categories -"cryptographic" and "non-cryptographic". -Cryptographic hash functions produce codes that are infeasible to -reverse without additional information beyond the identity of -the hash function used to generate the code and the resulting codes. -Non-cryptographic codes, in some circumstances, are believed to be -reversible. -The modules only implement hash -functions that are believed to be non-cryptographic, with -implementations available in the public domain. - -There are a number of algorithms available for the computation of -non-cryptographic 32 and 64 bit hash codes that differ in their -computational complexity, -their relative performance on different size keys, and the -expected uniqueness (randomness) of the resulting hash codes. -Their relative performance in the analysis of text, in particular, -can depend on the compiler, character set, language, and content. -The quality of a hash function is often evaluated using -the SMHasher test suite, originally written by -[Austin Appleby](https://github.com/aappleby/smhasher), but greatly -extended by [Reini Urban](https://github.com/rurban/smhasher). -All except the simplest, `FNV_1` and `FNV_1A`, of the hash functions -defined in the modules perform well on the tests in Reini Urban's -version of SMHasher. - -There are two problems in implementing hash functions in Fortran. -First, the static typing of Fortran makes it awkward to define general -purpose hash functions. -Instead hash functions are defined for some of the more common -objects: character strings and rank one arrays of integers. -Other objects can, in principle, be hashed by using `transfer` to -map their contents to an integer array, typically one of kind `INT8`. -The other problem is that hash codes are typically defined using -modular unsigned integer arithmetic. -As such integers are not part of the current Fortran standard, -workarounds have to be used. -These can take two forms. -In one, the operations are emulated by using an integer of a -larger size, or, for the larger integers, by dividing the integer into -two lower and higher order halves, -and performing the operations on each half separately using -the larger integers. -In the second, the unsigned integers may be replaced directly by -the corresponding signed integers, but -otherwise not modifying the the code logic. -The first should be standard conforming on current compilers, but -is more computationally intensive unless the compilers recognize -underlying idioms that are rarely used in Fortran codes. The second is -not standard conforming as bit operations involving the sign are -undefined, -but should yield equivalent results with fewer operations on -compilers with two's complement integers that do not trap on over -or under flow. The codes currently use the second method. - -In order to compile the hash function modules, the compilers must -implement much of Fortran 2003, and selected components of Fortran -2008: submodules, 64 bit integers, and some bit intrinsics. -The main limitation on valid compilers is whether they -implement the submodules enhancement of Fortran 2008. -In order to properly run the hash functions, the compilers must -use two's complement integers, and be able to execute them with -wraparound semantics and no integer overflow exceptions. -Current Fortran 2003+ compilers solely use two's complement -integers, and appear to be able to turn off overflow detection, -so the modules use signed integer arithmetic. For that reason -trapping on signed arithmetic must be disabled. The command line -flags to disable overflow detection for compilers implementing -submodules are summarized in the table below. -Note that FLANG, gfortran, ifort, and NAG all default to -integer overflow wrapping. - -|Compiler|Legal flag|Illegal flag|Default| -|---------|----------|------------|-------| -| ARM Fortran | NA? | NA? | overflow wrapping? | -| Cray Fortran | NA? | NA? | overflow wrapping? | -| FLANG/PGI | -fwrapv | -ftrapv | -fwrapv | -| gfortran | -fwrapv | -ftrapv | -fwrapv | -| IBM Fortran | NA? | NA? | overflow wrapping? | -| ifort| NA? | NA? | overflow wrapping | -| NAG Fortran | -C=none | -C=intovf | -C=none | -| NEC Fortran | NA? | NA? | overflow wrapping? | -| NVIDIA Fortran | NA? | NA? | overflow wrapping? | - -All of the modules' hash functions take one or two arguments. -All of them have as their first argument the object to be hashed, -termed a *key*. -Most have a second argument, termed a *seed*, that sets the initial -value of the hash code changing the hash function behavior. -In particular, inputs that hash to the same hash index with a given -seed, will often hash to different indexes with a different seed. -This difference in behavior makes algorithms that use a seed much -more resistant to denial of service attacks that use the properties -of a known hash to increase the number of hash table collisions. -This additional integer must be kept the same for all hashes -in a given hash table, but can be changed and the objects rehashed -if collisions are unusually common. -The *seed* can be either a scalar or a two element array. -Some of the hash functions have alternatives that allow incremental -hashing. - -|Algorithm|Seed|Result| -|---------|----|------| -|FNV-1|None|32 or 64 bit integer| -|FNV-1a|None|32 or 64 bit integer| -|nmhash32 |32 bit scalar integer|32 bit integer| -|nmhash32x |32 bit scalar integer|32 bit integer| -|pengyhash |32 bit scalar integer|64 bit integer| -|Spooky Hash|64 bit two element vector|64 bit two element vector| -|waterhash|64 bit scalar integer|32 bit integer| - -The hash function modules each provide at least five algorithms for -hash functions: two optimized for small (< 32 `INT8` integer elements) -keys, and three optimized for large (> 100 `INT8` integer elements) -keys. -The core implementation for each algorithm is for keys that are -vectors of `INT8` integers. -These core implementations are then used in wrappers for keys -that are vectors of `INT16`, `INT32` and `INT64` integers, or default -character strings, in the expectation that inlining will eliminate the -overhead of transferring the other keys to `INT8` integer vectors. - -The `stdlib_32_bit_hash_functions` module provides -implementations of five hash code algorithms: -the *FNV_1* and *FNV_1A* variants of Glenn Fowler, -Landon Curt Noll, and Kiem-Phong Vo; -the *nmhash32* and *nmhash32x* of James Z. M. Gao; -and the *waterhash* of Tommy Ettinger. -The detailed implementation of each algorithm is handled in a separate -submodule: `stdlib_32_bit_fnv_hashes`, -`stdlib_32_bit_nmhashes`, and `stdlib_32_bit_water_hashes`, -respectively. The `nmhash32`, `nmhash32x`, and `waterhash` algorithms -require seeds. The submodules provide separate seed generators -for each algorithm. -The module itself -implements two scalar hash functions, `FIBONACCI_HASH` and -`UNIVERSAL_MULT_HASH`. -It also implements the subroutine, `ODD_RANDOM_INTEGER`, for -generating seeds for `UNIVERSAL_MULT_HASH`. -All assume a two's complement sign bit, and no out of -range checks. - -The `stdlib_64_bit_hash_functions` module also provides -implementations of four hash code algorithms: -the *FNV_1* and *FNV_1A* variants of Glenn Fowler, -Landon Curt Noll, and Kiem-Phong Vo; -the *pengynash* of Alberto Fajardo; -and the *SpookyHash* of Bob Jenkins. -The detailed implementation of each algorithm is handled in a separate -submodule: `stdlib_64_bit_fnv_hashes`, -`stdlib_64_bit_pengy_hashes`, and `stdlib_64_bit_spooky_hashes`, -respectively. -The `pengyhash`, and `Spooky Hash` algorithms -require seeds. The submodules provide separate seed generators -for each algorithm. -The module itself implements two scalar hash functions, -`FIBONACCI_HASH` and `UNIVERSAL_MULT_HASH`. -It also implements the subroutine, `ODD_RANDOM_INTEGER`, for -generating seeds for `UNIVERSAL_MULT_HASH`. -All assume a two's complement sign bit, and no out of -range checks. - -The `stdlib_32_bit_fnv_hashes` and `stdlib_64_bits_fnv_hashes` -submodules each provide implementations of the FNV-1 and FNV-1A -algorithms in the form of two separate overloaded functions: `FNV_1` -and `FNV_1A`. -The FNV-1 and FNV-2 algorithms differ in their order of the -multiplication and exclusive or operations. -They differ from their normal implementation in that they also -encode the structure size in the hash code. -The 32 and 64 bit algorithms differ in their initial offsets and in -their multiplicative constants. -Analysis suggests that `FNV_1A` should be better at randomizing the -input, but tests with hash tables show negligible difference. -These algorithms have the reputation of being particularly useful for -small byte strings, i.e., strings of less than 32 bytes. -While they do not at all perform well on the SMHasher test suite, -usage indicates that that that this has little impact on the -performance of small hash tables, and the small size of the functions -allows their quick loading and retainment in the instruction cache, -givng a performance boost where the hashing is intermittent. -(See the -[SMHasher discussion](https://github.com/rurban/smhasher/README.md) -and S. Richter, V. Alvarez, and J. Dittrich, -["A Seven-Dimensional Analysis of Hashing Methods and its Implications on Query Processing"](https://bigdata.uni-saarland.de/publications/p249-richter.pdf). - -The `stdlib_32_bit_nmhashes` submodule provides implementations -of James Z.M. Gao's `nmhash32` and `nmhash32x` algorithms, -version 0.2, -in the form of the overloaded functions, `NMHASH32` and `NMHASH32X`. -The implementations are based on the scalar versions of Gao's -algorithms and not the vector versions that require access to -the vector instructions of some compilers. -Both algorithms perform well on the SMHasher tests, and have no known -bad seeds. The vector versions of both codes perform well on large -keys, with the `nmhash32x` faster on short keys. To provide randomly -generated seeds for the two functions the submodule also defines the -subroutines `NEW_NMHASH32_SEED` and `NEW_NMHASH32X_SEED`. Gao claims -that `NMHASH32X` is significantly faster than `NMHASH32` on short -seeds, but slower on long seeds, but our limited testing so far shows -`NMHASH32X` to be significantly faster on short seeds and slightly -faster on long seeds. - -The `stdlib_32_bit_water_hashes` submodule provides implementations -of Tommy Ettinger's `waterhash` algorithm in the form of the overloaded -function, `WATER_HASH`. Water Hash has not been tested by Reini Urban, -but Tommy Ettinger has tested it with Urban's SMHasher and presents -results that shows Water Hash passing all the tests. So far his -testing hasn't found any bad seeds for the algorithm. To provide -randomly generated seeds for the hash function the submodule also -defines the subroutine `NEW_WATER_HASH_SEED`. - -The `stdlib_64_bit_pengy_hashes` submodule provides implementations of -Alberto Fajardo's `pengyhash` in the form of the overloaded function, -`PENGY_HASH`. Reini Urban's testing shows that PengyHash passes all -the tests and has no bad seeds. To provide randomly generated seeds -for the hash function the submodule also defines the subroutine -`NEW_PENGY_HASH_SEED`. - -The `stdlib_64_bit_spooky_hashes` submodule provides implementations -of Bob Jenkins' SpookyHash in the form of the overloaded function, -`SPOOKY_HASH`. Future implementations may provide the SpookyHash -incremental hashing procedures. -SpookyHash is optimized for large objects and should give excellent -performance for objects greater than about 96 byes, but has -significant overhead for smaller objects. -The code was designed for Little Endian compilers, and will give -different results on Big Endian compilers, but the hash quality on -those compilers is probably just as good. -SpookyHash version 2 passes all of Reini Urban's SMHasher tests, and -has one bad seed only when reduced to a 32 bit output. -Its only potential problem is undefined behavior if the key is -misaligned. - -## The `stdlib_32_bit_hash_codes` module - -### Overview of the module - -Thirty two bit hash functions are primarily useful for generating hash -codes and hash indices for hash tables. -They tend to be less useful for generating checksums, which generally -benefit from having a larger number of bits. -The `stdlib_32_bit_hash_codes` module defines five public overloaded -32 bit hash code functions, `FNV_1`, `FNV-1A`, `NMHASH32`, `NMHASH32x` -and `WATER_HASH`, two scalar hash functions, `FIBONACCI_HASH` and -`UNIVERSAL_MULT_HASH`, four seed generators, `ODD_RANDOM_INTEGER` for -`UNIVERSAL_MULT_HASH`, and `NEW_NMHASH32_SEED`, `NEW_NMHASH32X_SEED`, -and `NEW_WATER_HASH_SEED`, for their respective hash code -functions. It also defines the integer kind constant, `INT_HASH`, and -a logical constant, `LITTLE_ENDIAN`, used to deal with one aspect of -the machine dependence of the hash codes. - -### The `INT_HASH` parameter - -It is necessary to define the kind of integer used to return the hash -code. -As `stdlib_32_bit_hash_codes` deals exclusively with 32 bit hash codes, -`INT_HASH` is an alias for the integer kind `INT32`. - -### The `LITTLE_ENDIAN` parameter - -In implementing hash functions it is sometimes necessary to know the -"endianess" of the compiler's integers. To this end the -`stdlib_32_bit_hash_codes` module defines the logical parameter -`LITTLE_ENDIAN` that, if true, indicates that the compiler has little -endian integers, and that if false indicates that the integers are big -endian. - -### Specifications of the `stdlib_32_bit_hash_codes` procedures - -#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits - -##### Status - -Experimental - -##### Description - -Calculates an `nbits` hash code from a 64 bit integer. This is useful -in mapping hash codes into small arrays. - -##### Syntax - -`code = [[stdlib_32_bit_hash_codes:fibonacci_hash]]( key, nbits )` - -##### Class - -Pure function - -##### Arguments - -`key`: Shall be a scalar integer expression of kind `INT32`. It is an -`intent(in)` argument. - -`nbits` Shall be a scalar default integer expression with `0 < nbits < -32`. It is an `intent(in)` argument. - -##### Result - -The result is an integer of kind `INT32` with at most the lowest -`nbits` nonzero, mapping to a range 0 to `nbits-1`. - -##### Note - -`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald -E. Knuth. It multiplies the `KEY` by the odd valued approximation to -`2**32/phi`, where `phi` is the golden ratio 1.618..., and returns the -`NBITS` upper bits of the product as the lowest bits of the result. - -##### Example - -```fortran - program demo_fibonacci_hash - use stdlib_32_bit_hash_codes, only: fibonacci_hash - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash, source - allocate( array1(0:2**6-1) ) - array1(:) = 0 - source = 42_int32 - hash = fibonacci_hash(source, 6) - azray1(hash) = source - print *, hash - end program demo_fibonacci_hash -``` - -#### `FNV_1_HASH`- calculates a hash code from a key - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit hash code from a rank 1 integer array or a default -character string. - -##### Syntax - -`code = [[stdlib_32_bit_hash_codes:fnv_1_hash]]( key )` - -##### Class - -Pure function - -##### Argument - -`key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. -It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT32`. - -##### Note - -`FNV_1_HASH` is an implementation of the original FNV-1 hash code of Glenn -Fowler, Landon Curt Noll, and Phong Vo. -It differs from typical implementations in that it also ecodes the -size of the structure in the hash code. -This code is relatively fast on short keys, and is small enough that it -will often be retained in the instruction cache if hashing is -intermittent. -As a result it should give good performance for typical hash table -applications. -This code does not pass any of the SMHasher tests, but the resulting -degradation in performance due to its larger number of collisions is -expected to be minor compared to its faster hashing rate. - - -##### Example - -```fortran - program demo_fnv_1_hash - use stdlib_32_bit_hash_codes, only: fnv_1_hash - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1_hash(array1) - print *, hash - end program demo_fnv_1_hash -``` - - -#### `FNV_1A_HASH`- calculates a hash code from a key - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit hash code from a rank 1 integer array or a default -character string. - -##### Syntax - -`code = [[stdlib_32_bit_hash_codes:fnv_1a_hash]]( key )` - -##### Class - -Pure function - -##### Argument - -`key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. -It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT32`. - -##### Note - -`FNV_1A_HASH` is an implementation of the alternative FNV-1a hash code of -Glenn Fowler, Landon Curt Noll, and Phong Vo. -It differs from typical implementations in that it also ecodes the -size of the structure in the hash code. -This code is relatively fast on short keys, and is small enough that it -will often be retained in the instruction cache if hashing is -intermittent. -As a result it should give good performance for typical hash table -applications. -This code does not pass any of the SMHasher tests, but the resulting -degradation in performance due to its larger number of collisions is -expected to be minor compared to its faster hashing rate. - -##### Example - -```fortran - program demo_fnv_1a_hash - use stdlib_32_bit_hash_codes, only: fnv_1a_hash - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1a_hash(array1) - print *, hash - end program demo_fnv_1a_hash -``` - - -#### `NEW_NMHASH32_SEED`- returns a valid input seed for `NMHASH32` - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit "random" integer that is believed to be a valid -seed for `NMHASH32` and is also different from the input seed. - -##### Syntax - -`code = call [[stdlib_32_bit_hash_codes:new_nmhash32_seed]]( seed )` - -##### Class - -Subroutine - -##### Argument - -`seed`: shall be a defined integer scalar variable of kind `INT32`. -It is an `intent(inout)` argument. On input `seed` should be defined, -and on output it will be different from the input `seed`. - -##### Note - -Currently there are no known bad seeds for `NMHASH32`, but if any are -identified the procedure will be revised so that they cannot be -returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - -##### Example - -See the example for `NMHASH32`. - - -#### `NEW_NMHASH32X_SEED`- returns a valid input seed for `NMHASH32X` - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit "random" integer that is believed to be a valid -seed for `NMHASH32X` and is also different from the input seed. - -##### Syntax - -`code = call [[stdlib_32_bit_hash_codes:new_nmhash32x_seed]]( seed )` - -##### Class - -Subroutine - -##### Argument - -`seed`: shall be a defined integer scalar variable of kind `INT32`. -It is an `intent(inout)` argument. On input `seed` should be defined, -and on output it will be different from the input `seed`. - -##### Note - -Currently there are no known bad seeds for `NMHASH32X`, but if any are -identified the procedure will be revised so that they cannot be -returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - -##### Example - -See the example for `NMHASH32X`. - - -#### `NEW_WATER_HASH_SEED`- returns a valid input seed for `WATER_HASH` - -##### Status - -Experimental - -##### Description - -Calculates a 64 bit "random" integer that is believed to be a valid -seed for `WATER_HASH` and is also different from the input seed. - -##### Syntax - -`code = call [[stdlib_32_bit_hash_codes:new_water_hash_seed]]( seed )` - -##### Class - -Subroutine - -##### Argument - -`seed`: shall be a defined integer scalar variable of kind `INT64`. -It is an `intent(inout)` argument. On input `seed` should be defined, -and on output it will be different from the input `seed`. - -##### Note - -Currently there are no known bad seeds for `WATER_HASH`, but if any -are identified the procedure will be revised so that they cannot be -returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - - -##### Example - -See the example for `WATER_HASH`. - - -#### `NMHASH32`- calculates a hash code from a key and a seed - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit hash code from a rank 1 integer array or a default -character string, and the input `seed`. - -##### Syntax - -`code = [[stdlib_32_bit_hash_codes:nmhash32]]( key, seed )` - -##### Class - -Pure function - -##### Arguments - -`key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. -It is an `intent(in)` argument. - -`seed`: shall be an integer scalar expression of kind `INT32`. -It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT32`. - -##### Note - -`NMHASH32` is an implementation of the `nmhash32` hash code of -James Z. M. Gao. -This code has good, but not great, performance on long keys, poorer -performance on short keys. -As a result it should give fair performance for typical hash table -applications. -This code passes the SMHasher tests, and has no known bad seeds: - -##### Example - -```fortran - program demo_nmhash32 - use stdlib_32_bit_hash_codes, only: nmhash32, & - new_nmhash32_seed - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash - integer(int32) :: seed = int(Z'11111111`, int32) - call new_nmhash32_seed(seed) - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = nmhash32(array1, seed) - print *, seed, hash - end program demo_nmhash32 -``` - - -#### `NMHASH32X`- calculates a hash code from a key and a seed - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit hash code from a rank 1 integer array or a default -character string, and the input `seed`. - -##### Syntax - -`code = [[stdlib_32_bit_hash_codes:nmhash32x]]( key, seed )` - -##### Class - -Pure function - -##### Arguments - -`key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. -It is an `intent(in)` argument. - -`seed`: shall be an integer scalar expression of kind `INT32`. -It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT32`. - -##### Note - -`NMHASH32X` is an implementation of the `nmhash32x` hash code of -James Z. M. Gao. -This code has good, but not great, performance on long keys, poorer -performance on short keys. -As a result it should give fair performance for typical hash table -applications. -This code passes the SMHasher tests, and has no known bad seeds: - -##### Example - -```fortran - program demo_nmhash32x - use stdlib_32_bit_hash_codes, only: nmhash32x, & - new_nmhash32x_seed - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash - integer(int32) :: seed = int(Z'11111111`, int32) - call new_nmhash32x_seed(seed) - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = nmhash32x(array1, seed) - print *, seed, hash - end program demo_nmhash32x -``` - -#### `ODD_RANDOM_INTEGER` - returns an odd integer - -##### Status - -Experimental - -##### Description - -Returns a random 32 bit integer distributed uniformly over the odd values. - -##### Syntax - -`call [[stdlib_32_bit_hash_codes:odd_random_integer]]( harvest )` - -##### Class - -Subroutine - -##### Argument - -`harvest`: Shall be a scalar integer variable of kind `INT32`. It is -an `intent(out)` argument. - -##### Note - -`ODD_RANDOM_INTEGER` is intended to generate seeds for - `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - -##### Example - -See `UNIVERSAL_MULT_HASH`. - - -#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits - -##### Status - -Experimental - -##### Description - -Calculates an `nbits` hash code from a 32 bit integer. This is useful -in mapping a hash value to a range 0 to `2**nbits-1`. - -##### Syntax - -`code = [[stdlib_32_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` - -##### Class - -Pure function - -##### Arguments - -`key`: Shall be a scalar integer expression of kind `INT32`. It is an -`intent(in)` argument. - -`seed`: Shall be a scalar integer expression of kind `INT32`. It is an -`intent(in)` argument. It must have an odd value. - -`nbits` Shall be a scalar default integer expression with `0 < nbits < -32`. It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT32` with at most the lowest -`nbits` nonzero. - -##### Note - -`UNIVERSAL_MULT_HASH` is an implementation of the Universal -Multiplicative Hash of M. Dietzfelbinger, et al. -It multiplies the `KEY` by `SEED`, and returns the -`NBITS` upper bits of the product as the lowest bits of the result. - -##### Example - -```fortran - program demo_universal_mult_hash - use stdlib_32_bit_hash_codes, only: odd_random_integer, & - universal_mult_hash - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash, i, seed, source - seed = 0 - allocate( array1(0:2**6-1) ) - do i = 0, 2**6-1 - array(i) = i - end do - call odd_random_integer( seed ) - source = int(Z'1FFFFFF', int32) - hash = universal_mult_hash(source, seed, 6) - azray1(hash) = source - print *, seed, hash, array1 - end program demo_odd_random_integer -``` - -#### `WATER_HASH`- calculates a hash code from a key and a seed - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit hash code from a rank 1 integer array or a default -character string, and the input `seed`. - -##### Syntax - -`code = [[stdlib_32_bit_hash_codes:water_hash]]( key, seed )` - -##### Class - -Pure function - -##### Arguments - -`key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. -It is an `intent(in)` argument. - -`seed`: shall be an integer scalar expression of kind `INT64`. -It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT32`. - -##### Note - -`WATER_HASH` is an implementation of the `waterhash` hash code of -Tommy Ettinger. -This code has excellent performance on long keys, and good performance -on short keys. -As a result it should give reasonable performance for typical hash -table applications. -This code passes the SMHasher tests. -The `waterhash` is based on the `wyhash` of Wang Yi. -While `wyhash` has a number of bad seeds, where randomiaation of the -output is poor, -so far testing has not found any bad seeds for `waterhash`. -It can have undefined behavior if the key is not word aligned, -i.e. some computer processors can only process a given size integer if -the address of the integer is a multiple of the integer size. - -##### Example - -```fortran - program demo_water_hash - use stdlib_32_bit_hash_codes, only: water_hash, & - new_water_hash_seed - use iso_fortran_env, only: int32, int64 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash - integer(int64) :: seed = int(Z'11111111`, int64) - call new_water_hash_seed( seed ) - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = water_hash(array1, seed) - print *, hash, seed - end program demo_water_hash -``` - -## The `stdlib_64_bit_hash_codes` module - -### Overview of the module - -Sixty four bit hash functions are generally overkill for hash table -applications, and are primarily useful for check sums and related -applications. -As checksums often have to deal with extremely large files or -directories, it is often useful to use incremental hashing as well as -direct hashing, so 64 bit and higher hash algorithms often provide -multiple implementations. The current module, for simplicity of API, -doesn't provide any incremental hashes. -The `stdlib_64_bit_hash_codes` module defines several public -overloaded 64 bit hash procedures, `FNV_1`, `FNV-1A`, -`PENGY_HASH`, and `SPOOKY_HASH`, two scalar hash functions, -`FIBONACCI_HASH` and -`UNIVERSAL_MULT_HASH`, a seed generator, `ODD_RANDOM_INTEGER`, for the -`UNIVERSAL_MULT_HASH`, and two seed generators, `NEW_PENGY_HASH_SEED` -and `NEW_SPOOKY_HASH_SEED` for their respective hash functions. It -also defines the integer kind constant, `INT_HASH`, used to specify -the kind of the hash function results, and a logical constant, -`LITTLE_ENDIAN`, used to deal with one aspect of the machine -dependence of the hash codes. -Note that while SpookyHash can be used as a sixty four bit hash -algorithm, its algorithms actually returns two element integer arrays -of kind `INT64`, so it can also be used as a 128 bit hash. - -### The `INT_HASH` parameters - -It is necessary to define the kind of integer used to return the hash -code. -As `stdlib_64_bit_hash_codes` deals exclusively with 64 bit hash codes, -`INT_HASH` is an alias for the integer kind `INT64`. - -### The `LITTLE_ENDIAN` parameter - -In implementing hash functions it is sometimes necessary to know the -"endianess" of the compiler's integers. To this end the -`stdlib_64_bit_hash_codes` module defines the logical parameter -`LITTLE_ENDIAN` that if true indicates that the compiler has little -endian integers, and that if false indicates that the integers are big -endian. - - -### Specifications of the `stdlib_64_bit_hash_codes` procedures - -#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits - -##### Status - -Experimental - -##### Description - -Calculates an `nbits` hash code from a 64 bit integer. This is useful -in mapping hash codes into small arrays. - -##### Syntax - -`code = [[stdlib_64_bit_hash_codes:fibonacci_hash]]( key, nbits )` - -##### Class - -Pure function - -##### Arguments - -`key`: Shall be a scalar integer expression of kind `INT64`. It is an -`intent(in)` argument. - -`nbits` Shall be a scalar default integer expression with `0 < nbits < -64`. It is an `intent(in)` argument. - -##### Result - -The result is an integer of kind `INT64` with at most the lowest -`nbits` nonzero, mapping to a range 0 to `nbits-1`. - -##### Note - -`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald -E. Knuth. It multiplies the `KEY` by the odd valued approximation to -`2**64/phi`, where `phi` is the golden ratio 1.618..., and returns the -`nbits` upper bits of the product as the lowest bits of the result. - -##### Example - -```fortran - program demo_fibonacci_hash - use stdlib_64_bit_hash_codes, only: fibonacci_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash, source - allocate( array1(0:2**6-1) ) - array1(:) = 0 - source = int(Z'1FFFFFFFF', int64) - hash = fibonacci_hash(source, 6) - azray1(hash) = source - print *, hash - end program demo_fibonacci_hash -``` - -#### `FNV_1`- calculates a hash code from a key - -##### Status - -Experimental - -##### Description - -Calculates a 64 bit hash code from a rank 1 integer array or a default -character string. - -##### Syntax - -`code = [[stdlib_64_bit_hash_codes:fnv_1]]( key )` - -##### Class - -Pure function - -##### Argument - -`key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. -It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT64`. - -##### Note - -`FNV_1` is an implementation of the original FNV-1 hash code of Glenn -Fowler, Landon Curt Noll, and Phong Vo. -It differs from typical implementations in that it also ecodes the -size of the structure in the hash code. -This code is relatively fast on short keys, and is small enough that it -will often be retained in the instruction cache if hashing is -intermittent. -As a result it should give good performance for typical hash table -applications, although it is rare for them to need 64 bits. -This code does not pass any of the SMHasher tests, but the resulting -degradation in performance due to its larger number of collisions is -expected to be minor compared to its faster hashing rate. - - -##### Example - -```fortran - program demo_fnv_1_hash - use stdlib_64_bit_hash_codes, only: fnv_1_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1_hash(array1) - print *, hash - end program demo_fnv_1_hash -``` - - -#### `FNV_1A`- calculates a hash code from a key - -##### Status - -Experimental - -##### Description - -Calculates a 64 bit hash code from a rank 1 integer array or a default -character string. - -##### Syntax - -`code = [[stdlib_64_bit_hash_codes:fnv_1a]]( key )` - -##### Class - -Pure function - -##### Argument - -`key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. -It is an `intent(in)` argument. - -##### Result - -The result is a scalar integer of kind `INT32`. - -##### Note - -`FNV_1A` is an implementation of the alternative FNV-1a hash code of -Glenn Fowler, Landon Curt Noll, and Phong Vo. -It differs from typical implementations in that it also ecodes the -size of the structure in the hash code. -This code is relatively fast on short keys, and is small enough that it -will often be retained in the instruction cache if hashing is -intermittent. -As a result it should give good performance for typical hash table -applications. -This code does not pass any of the SMHasher tests, but the resulting -degradation in performance due to its larger number of collisions is -expected to be minor compared to its faster hashing rate. - -##### Example - -```fortran - program demo_fnv_1a_hash - use stdlib_64_bit_hash_codes, only: fnv_1a_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1a_hash(array1) - print *, hash - end program demo_fnv_1a_hash -``` - - -#### `NEW_PENGY_HASH_SEED`- returns a valid input seed for `PENGY_HASH` - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit "random" integer that is believed to be a valid -seed for `PENGY_HASH` and is also different from the input seed. - -##### Syntax - -`code = call [[stdlib_32_bit_hash_codes:new_pengy_hash_seed]]( seed )` - -##### Class - -Subroutine - -##### Argument - -`seed`: shall be a defined integer scalar variable of kind `INT32`. -It is an `intent(inout)` argument. On input `seed` should be defined, -and on output it will be different from the input `seed`. - -##### Note - -Currently there are no known bad seeds for `PENGY_HASH`, but if any are -identified the procedure will be revised so that they cannot be -returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - -##### Example - -See the example for `PENGY_HASH`. - - -#### `NEW_SPOOKY_HASH_SEED`- returns a valid input seed for `SPOOKY_HASH` - -##### Status - -Experimental - -##### Description - -Calculates a 32 bit two element vector of "random" integer values that -is believed to be a valid seed for `SPOOKY_HASH` and is also different -from the input seed. - -##### Syntax - -`code = call [[stdlib_32_bit_hash_codes:new_spooky_hash_seed]]( seed )` - -##### Class - -Subroutine - -##### Argument - -`seed`: shall be a defined two element integer vector variable of kind -`INT32`. It is an `intent(inout)` argument. On input `seed` should be -defined, and on output it will be different from the input `seed`. - -##### Note - -Currently there are no known bad seeds for `SPOOKY_HASH`, but if any are -identified the procedure will be revised so that they cannot be -returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - -##### Example - -See the example for `SPOOKY_HASH`. - - -#### `ODD_RANDOM_INTEGER` - returns odd integer - -##### Status - -Experimental - -##### Description - -Returns a random 64 bit integer distributed uniformly over the odd values. - -##### Syntax - -`call [[stdlib_64_bit_hash_codes:odd_random_integer]]( harvest )` - -##### Class - -Subroutine - -##### Argument - -`harvest`: Shall be an integer of kind `INT64`. It is an `intent(out)` -argument. - -##### Note - -`ODD_RANDOM_INTEGER` is intended to generate seeds for - `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - -##### Example - -See `UNIVERSAL_MULT_HASH`. - - -#### `PENGY_HASH` - maps a character string or integer vector to an integer - -##### Status - -Experimental - -##### Description - -Maps a character string or integer vector to a 64 bit integer whose -value also depends on a scalar 32 bit integer, `seed`. - -##### Syntax - -`code = [[stdlib_64_bit_hash_codes:pengy_hash]]( key, seed )` - -##### Class - -Pure function - -##### Arguments - -`key`: shall be a scalar expression of type default character or a -Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or -`INTT64`. It is an `intent(in)` argument. - -`seed`: shall be an integer ex of kind `INT64`. It ispression -an `intent(in)` argument. - -##### Result - -The result is an integer of kind `INT64`. - -##### Note - -`PENGY_HASH` is an implementation of the 64 bit `pengyhash` of Alberto -Fajardo. The hash has acceptable performance on small keys, and good -performance on long keys. It passes all the SMHasher tests, and has -no known bad seeds. - -##### Exampl - -```fortran - program demo_pengy_hash - use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: key(:) - integer(int64) :: hash - integer(int32) :: seed - key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] - seed = 0_int32 - call new_pengy_hash_seed( seed ) - hash = pengy_hash( key, seed ) - print *, seed, hash - end program demo_pengy_hash -``` - - -#### `SPOOKY_HASH` - maps a character string or integer vector to an integer - -##### Status - -Experimental - -##### Description - -Maps a character string or integer vector to a 64 bit integer whose -value also depends on a two element vector, `seed`. - -##### Syntax - -`code = [[stdlib_64_bit_hash_codes:spooky_hash]]( key, seed )` - -##### Class - -Pure function - -##### Arguments - -`key`: shall be a scalar of type default character expression or a -Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or -`INTT64`. It is an `intent(in)` argument. - -`seed`: shall be a two element integer vector expression of kind -`INT64`. It is an `intent(in)` argument. - -##### Result - -The result is a two element integer vector of kind `INT64`. - -##### Note - -`SPOOKY_HASH` is an implementation of the 64 bit version 2 of -SpookyHash of Bob Jenkins. The code was designed for Little-Endian -compilers. The output is different on Big Endian compilers, but still -probably as good quality. It is often used as a 64 bit hash using the -first element of the returned value, but can be used as a 128 bit -hash. This version of `SPOOKY_HASH` has good performance on small keys -and excellent performance on long keys. It passes all the SMHasher tests -and has no known bad seeds. - -##### Example - -```fortran - program demo_spooky_hash - use stdlib_64_bit_hash_codes, only: new_spooky_hash_seed, & - spooky_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: key(:) - integer(int64) :: hash(2), seed(2), source - key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] - seed = [ 119_int64, 2_int64**41-1 ] - call new_spooky_hash_seed( seed ) - hash = spooky_hash( key, seed ) - print *, seed, hash - end program demo_spooky_hash -``` - -#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits - -##### Status - -Experimental - -##### Description - -Calculates an `nbits` hash code from a 64 bit integer. This is useful -in mapping a hash value to a range 0 to `2**nbits-1`. - -##### Syntax - -`code = [[stdlib_64_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` - -##### Class - -Pure function - -##### Arguments - -`key`: Shall be an integer of kind `INT64`. It is an `intent(in)` -argument. - -`seed`: Shall be an integer of kind `INT64`. It is an `intent(in)` -argument. It should be an odd value. - -`nbits` Shall be a default integer with `0 < nbits < 64`. It is an -`intent(in)` argument. It must be an odd integer. - -##### Result - -The result is an integer of kind `INT64` with at most the lowest -`nbits` nonzero. - -##### Note - -`UNIVERSAL_MULT_HASH` is an implementation of the Universal -Multiplicative Hash of M. Dietzfelbinger, et al. -It multiplies the `KEY` by `SEED`, and returns the -`NBITS` upper bits of the product as the lowest bits of the result. - -##### Example - - -```fortran - program demo_universal_mult_hash - use stdlib_32_bit_hash_codes, only: odd_random_integer, & - universal_mult_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash, i, seed, source - seed = 0 - allocate( array1(0:2**6-1) ) - do i = 0, 2**6-1 - array(i) = i - end do - call odd_random_integer( seed ) - source = int(Z'1FFFFFF', int64) - hash = universal_mult_hash(source, seed, 6) - azray1(hash) = source - print *, seed, hash, array1 - end program demo_universal_mult_hash -``` - - -### Test Codes - -The Fortran Standard Library provides two categories of test -codes. One ccategory is tests of the relative performance of the -various hash functions. The other is a comparison of the outputs of -the Fortran hash functions, with the outputs of the C and C++ hash -procedures that are the inspiration for the Fortran hash functions. - -In the `src/test/hash_functions` subdirectory, the Fortran Standard -Library provides two performance test codes for -the hash functions of `stdlib_32_bit_hash_functions` and -`stdlib_64_bit_hash_functions`, `test_32_bit_hash_performance` and -`test_64_bit_hash_performance` respectively. These are primarily set -up to test runtime performance of the functions. They take a sample of -`2**18` integers of kind `INT8` and break it up into vectors of size -1, 2, 4, 8, 16, 64, 256, and 1024 elements, yielding `2**18`, -`2**17`, `2**16`, `2**15`, `2**14`, `2**12`, `2**10`, and `2**8` -vectors respectively. These are then processed by the hash functions -4 times, and the time for processing is reported. Testing so far has -been on a MacBook Pro with a 2.3 GHz Quad-Core Intel Core i5 and 8 GB -2133 MHz LPDDR3 of RAM, using GNU Fortran (GCC) 11.1.0 to compile the -code. The results for `test_32_bit_hash_performance` is given by the -following table: - -| Algorithm | Key Size | Key # | Time (s) | -| | Bytes | | | -|------------|-----------|------------|----------| -| FNV-1 | 1 | 1048576 | 0.02949 | -| FNV-1 | 2 | 524288 | 0.02361 | -| FNV-1 | 4 | 262144 | 0.02016 | -| FNV-1 | 8 | 131072 | 0.01806 | -| FNV-1 | 16 | 65536 | 0.01867 | -| FNV-1 | 64 | 16384 | 0.01717 | -| FNV-1 | 256 | 4096 | 0.01759 | -| FNV-1 | 1024 | 1024 | 0.01659 | -| FNV-1a | 1 | 1048576 | 0.02897 | -| FNV-1a | 2 | 524288 | 0.02472 | -| FNV-1a | 4 | 262144 | 0.02025 | -| FNV-1a | 8 | 131072 | 0.01901 | -| FNV-1a | 16 | 65536 | 0.01898 | -| FNV-1a | 64 | 16384 | 0.01784 | -| FNV-1a | 256 | 4096 | 0.01723 | -| FNV-1a | 1024 | 1024 | 0.01673 | -| nmhash32 | 1 | 1048576 | 0.31092 | -| nmhash32 | 2 | 524288 | 0.16230 | -| nmhash32 | 4 | 262144 | 0.07815 | -| nmhash32 | 8 | 131072 | 0.04176 | -| nmhash32 | 16 | 65536 | 0.09261 | -| nmhash32 | 64 | 16384 | 0.04587 | -| nmhash32 | 256 | 4096 | 0.07238 | -| nmhash32 | 1024 | 1024 | 0.07263 | -| nmhash32x | 1 | 1048576 | 0.04294 | -| nmhash32x | 2 | 524288 | 0.02937 | -| nmhash32x | 4 | 262144 | 0.01096 | -| nmhash32x | 8 | 131072 | 0.00911 | -| nmhash32x | 16 | 65536 | 0.01291 | -| nmhash32x | 64 | 16384 | 0.00859 | -| nmhash32x | 256 | 4096 | 0.07373 | -| nmhash32x | 1024 | 1024 | 0.07618 | -| water | 1 | 1048576 | 0.12560 | -| water | 2 | 524288 | 0.06302 | -| water | 4 | 262144 | 0.04020 | -| water | 8 | 131072 | 0.01999 | -| water | 16 | 65536 | 0.01459 | -| water | 64 | 16384 | 0.00923 | -| water | 256 | 4096 | 0.00816 | -| water | 1024 | 1024 | 0.00792 | - -while for `test_64_bit_hash_performance` the results are: - -| Algorithm | Key Size | Key # | Time (s) | -| | Bytes | | | -|------------|-----------|------------|----------| -| FNV-1 | 1 | 1048576 | 0.02981 | -| FNV-1 | 2 | 524288 | 0.02697 | -| FNV-1 | 4 | 262144 | 0.02275 | -| FNV-1 | 8 | 131072 | 0.02431 | -| FNV-1 | 16 | 65536 | 0.02158 | -| FNV-1 | 64 | 16384 | 0.02007 | -| FNV-1 | 256 | 4096 | 0.01932 | -| FNV-1 | 1024 | 1024 | 0.02089 | -| FNV-1a | 1 | 1048576 | 0.03226 | -| FNV-1a | 2 | 524288 | 0.03076 | -| FNV-1a | 4 | 262144 | 0.02359 | -| FNV-1a | 8 | 131072 | 0.02542 | -| FNV-1a | 16 | 65536 | 0.02364 | -| FNV-1a | 64 | 16384 | 0.02130 | -| FNV-1a | 256 | 4096 | 0.01962 | -| FNV-1a | 1024 | 1024 | 0.01966 | -| Pengy | 1 | 1048576 | 0.24294 | -| Pengy | 2 | 524288 | 0.12066 | -| Pengy | 4 | 262144 | 0.06205 | -| Pengy | 8 | 131072 | 0.03138 | -| Pengy | 16 | 65536 | 0.01608 | -| Pengy | 64 | 16384 | 0.00669 | -| Pengy | 256 | 4096 | 0.00387 | -| Pengy | 1024 | 1024 | 0.00295 | -| Spooky | 1 | 1048576 | 0.11920 | -| Spooky | 2 | 524288 | 0.07478 | -| Spooky | 4 | 262144 | 0.03185 | -| Spooky | 8 | 131072 | 0.01468 | -| Spooky | 16 | 65536 | 0.01503 | -| Spooky | 64 | 16384 | 0.00440 | -| Spooky | 256 | 4096 | 0.00290 | -| Spooky | 1024 | 1024 | 0.00177 | - -As the tested function will typically reside in the instruction cache -these results do not include the costs of reloading the procedure if -hashing is intermittent. If hashing is intermittent then that can more -severely impact the performance of `nmhash32`, `nmhash32x`, -`water_hash`, `pengy_hash`, and `spooky_hash` relative to -`fnv_1_hash` and `fnv_1a_hash`. - -In the `src/test/hash_functions/validation` subdirectory, the Fortran -Standard Library implements three executables to test the validity of -the Fortran codes against the original C and C++ codes. The tree -executables must be compiled manually using the makefile -`Makefile.validation`, and the the compiler suite used must be -GCC's. The first executable, `generate_key_array` is -based on Fortran code, and generates a random sequence of 2048 -integers of kind `INT8`, and stores that sequence in the binary file -`key_array.bin`. The second executable, `generate_hash_arrays`, reads -the values in `key_array.bin`, and, for each complicated hash -procedure generates a corresponding binary file containing 2049 hash -values generated from the values in `key_array.bin`. The third -executsble, `hash_validity_test`, reads the binary files and for each -complicated hash procedure compares the contents of the binary file -with the results of calculating hash values using the corresponding -Fortran hash procedure on the same keys. These executables mus be run -manually in the same ordeer. From d449bd0fbddd7c7c8887f8f05de7bda258e14583 Mon Sep 17 00:00:00 2001 From: "William B. Clodius" Date: Sat, 27 Nov 2021 18:54:24 -0700 Subject: [PATCH 020/106] Update doc/specs/stdlib_hash_functions.md Co-authored-by: Jeremie Vandenplas --- doc/specs/stdlib_hash_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md index 01fa4e1ca..8dca0a532 100755 --- a/doc/specs/stdlib_hash_functions.md +++ b/doc/specs/stdlib_hash_functions.md @@ -1,5 +1,5 @@ --- -title: Hash codes +title: Hash procedures --- # The `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions` modules From 874d1c4eae42323d3c918211c32bcfad964b8bbf Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 27 Nov 2021 22:27:27 -0700 Subject: [PATCH 021/106] Changes suggested by Jermie Vandenplas Changed documentation in ways suggested by Jeremie's comments. [ticket: X] --- doc/specs/index.md | 2 +- doc/specs/stdlib_hash_procedures.md | 88 +++++++++++++---------------- 2 files changed, 39 insertions(+), 51 deletions(-) diff --git a/doc/specs/index.md b/doc/specs/index.md index aa16e1350..52147fd2f 100644 --- a/doc/specs/index.md +++ b/doc/specs/index.md @@ -14,7 +14,7 @@ This is and index/directory of the specifications (specs) for each new module/fe - [ascii](./stdlib_ascii.html) - Procedures for handling ASCII characters - [bitsets](./stdlib_bitsets.html) - Bitset data types and procedures - [error](./stdlib_error.html) - Catching and handling errors - - [hash\_functions](./stdlib_hash_functions.html) - Hashing integer + - [hash\_procedures](./stdlib_hash_procedures.html) - Hashing integer vectors or character strings - [IO](./stdlib_io.html) - Input/output helper & convenience - [kinds](./stdlib_kinds.html) - Kind parameters diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 01fa4e1ca..117ee4582 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1,12 +1,12 @@ --- -title: Hash codes +title: Hash procedures --- -# The `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions` modules +# The `stdlib_32_bit_hash_codes` and `stdlib_64_bit_hash_codes` modules [TOC] -## Overview of hash functions +## Overview of hash procedures The comparison of lexical entities or other objects for equality can be computationally expensive. @@ -34,13 +34,13 @@ library. ## Licensing The Fortran Standard Library is distributed under the MIT License. -However components of the library may be based on code with additional -licensing restrictions. In particular, the hash codes are often based -on algorithms with additional restrictions on distribution. -The algorithms with such restrictions (`Fibonacci Hash`, `Universal -Multiplicative Hash`, -`FNV-1 Hash`, `FNV-1A Hash`, `nmhash32`, `nmhash32x`, `waterhash`, -`pengyhash` and `SpookyHash`) are discussed below. +However components of the library may be based on code released under a +different license. In particular, the hash codes are often based +on algorithms considered as public domain (`Fibonacci Hash`, `Universal +Multiplicative Hash)`or released under a different license than the +MIT license (`FNV-1 Hash`, `FNV-1A Hash`, `nmhash32`, `nmhash32x`, +`waterhash`, `pengyhash` and `SpookyHash`) +The licensing status of the algorithms are discussed below. `FIBONACCI_HASH` is a scalar hash. It is an implementation in Fortran 2008 and signed two's complement integers of the Fibonacci Hash @@ -329,7 +329,7 @@ of a known hash to increase the number of hash table collisions. This additional integer must be kept the same for all hashes in a given hash table, but can be changed and the objects rehashed if collisions are unusually common. -The *seed* can be either a scalar or a two element array. +The *seed* can be either a scalar or a two-element array. Some of the hash functions have alternatives that allow incremental hashing. @@ -539,7 +539,7 @@ The result is an integer of kind `INT32` with at most the lowest `FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald E. Knuth. It multiplies the `KEY` by the odd valued approximation to `2**32/phi`, where `phi` is the golden ratio 1.618..., and returns the -`NBITS` upper bits of the product as the lowest bits of the result. +`nbits` upper bits of the product as the lowest bits of the result. ##### Example @@ -612,10 +612,8 @@ expected to be minor compared to its faster hashing rate. use stdlib_32_bit_hash_codes, only: fnv_1_hash use iso_fortran_env, only: int32 implicit none - integer, allocatable :: array1(:) integer(int32) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1_hash(array1) + hash = fnv_1_hash([ 5, 4, 3, 1, 10, 4, 9]) print *, hash end program demo_fnv_1_hash ``` @@ -655,7 +653,7 @@ The result is a scalar integer of kind `INT32`. `FNV_1A_HASH` is an implementation of the alternative FNV-1a hash code of Glenn Fowler, Landon Curt Noll, and Phong Vo. -It differs from typical implementations in that it also ecodes the +It differs from typical implementations in that it also encodes the size of the structure in the hash code. This code is relatively fast on short keys, and is small enough that it will often be retained in the instruction cache if hashing is @@ -673,10 +671,8 @@ expected to be minor compared to its faster hashing rate. use stdlib_32_bit_hash_codes, only: fnv_1a_hash use iso_fortran_env, only: int32 implicit none - integer, allocatable :: array1(:) integer(int32) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1a_hash(array1) + hash = fnv_1a_hash( [ 5, 4, 3, 1, 10, 4, 9] ) print *, hash end program demo_fnv_1a_hash ``` @@ -711,7 +707,7 @@ and on output it will be different from the input `seed`. Currently there are no known bad seeds for `NMHASH32`, but if any are identified the procedure will be revised so that they cannot be -returned. This subroutine uses Fortran's intrinsic +returned. This subroutine uses Fortran's intrinsic `RANDOM_NUMBER` and the values returned can be changed by calling the intrinsic `RANDOM_INIT`. @@ -838,7 +834,7 @@ This code has good, but not great, performance on long keys, poorer performance on short keys. As a result it should give fair performance for typical hash table applications. -This code passes the SMHasher tests, and has no known bad seeds: +This code passes the SMHasher tests, and has no known bad seeds. ##### Example @@ -848,12 +844,10 @@ This code passes the SMHasher tests, and has no known bad seeds: new_nmhash32_seed use iso_fortran_env, only: int32 implicit none - integer, allocatable :: array1(:) integer(int32) :: hash - integer(int32) :: seed = int(Z'11111111`, int32) + integer(int32) :: seed = 42_int32 call new_nmhash32_seed(seed) - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = nmhash32(array1, seed) + hash = nmhash32([ 5, 4, 3, 1, 10, 4, 9], seed) print *, seed, hash end program demo_nmhash32 ``` @@ -900,7 +894,7 @@ This code has good, but not great, performance on long keys, poorer performance on short keys. As a result it should give fair performance for typical hash table applications. -This code passes the SMHasher tests, and has no known bad seeds: +This code passes the SMHasher tests, and has no known bad seeds. ##### Example @@ -910,12 +904,10 @@ This code passes the SMHasher tests, and has no known bad seeds: new_nmhash32x_seed use iso_fortran_env, only: int32 implicit none - integer, allocatable :: array1(:) integer(int32) :: hash - integer(int32) :: seed = int(Z'11111111`, int32) + integer(int32) :: seed = 42_int32 call new_nmhash32x_seed(seed) - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = nmhash32x(array1, seed) + hash = nmhash32x([ 5, 4, 3, 1, 10, 4, 9], seed) print *, seed, hash end program demo_nmhash32x ``` @@ -963,7 +955,7 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 32 bit integer. This is useful +Calculates an `nbits` hash code from a 32 bit integer. This is useful in mapping a hash value to a range 0 to `2**nbits-1`. ##### Syntax @@ -995,7 +987,7 @@ The result is a scalar integer of kind `INT32` with at most the lowest `UNIVERSAL_MULT_HASH` is an implementation of the Universal Multiplicative Hash of M. Dietzfelbinger, et al. It multiplies the `KEY` by `SEED`, and returns the -`NBITS` upper bits of the product as the lowest bits of the result. +`nbits` upper bits of the product as the lowest bits of the result. ##### Example @@ -1010,12 +1002,12 @@ It multiplies the `KEY` by `SEED`, and returns the seed = 0 allocate( array1(0:2**6-1) ) do i = 0, 2**6-1 - array(i) = i + array1(i) = i end do call odd_random_integer( seed ) - source = int(Z'1FFFFFF', int32) + source = 42_int32 hash = universal_mult_hash(source, seed, 6) - azray1(hash) = source + array1(hash) = source print *, seed, hash, array1 end program demo_odd_random_integer ``` @@ -1063,7 +1055,7 @@ As a result it should give reasonable performance for typical hash table applications. This code passes the SMHasher tests. The `waterhash` is based on the `wyhash` of Wang Yi. -While `wyhash` has a number of bad seeds, where randomiaation of the +While `wyhash` has a number of bad seeds, where randomization of the output is poor, so far testing has not found any bad seeds for `waterhash`. It can have undefined behavior if the key is not word aligned, @@ -1078,12 +1070,10 @@ the address of the integer is a multiple of the integer size. new_water_hash_seed use iso_fortran_env, only: int32, int64 implicit none - integer, allocatable :: array1(:) integer(int32) :: hash - integer(int64) :: seed = int(Z'11111111`, int64) + integer(int64) :: seed = 42_int64 call new_water_hash_seed( seed ) - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = water_hash(array1, seed) + hash = water_hash([ 5, 4, 3, 1, 10, 4, 9], seed) print *, hash, seed end program demo_water_hash ``` @@ -1451,7 +1441,7 @@ Pure function Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or `INTT64`. It is an `intent(in)` argument. -`seed`: shall be an integer ex of kind `INT64`. It ispression +`seed`: shall be an integer expression of kind `INT64`. It is an `intent(in)` argument. ##### Result @@ -1470,12 +1460,12 @@ no known bad seeds. ```fortran program demo_pengy_hash use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash - use iso_fortran_env, only: int64 + use iso_fortran_env, only: int32, int64 implicit none integer, allocatable :: key(:) integer(int64) :: hash integer(int32) :: seed - key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + key = [ 0, 1, 2, 3 ] seed = 0_int32 call new_pengy_hash_seed( seed ) hash = pengy_hash( key, seed ) @@ -1536,8 +1526,8 @@ and has no known bad seeds. use iso_fortran_env, only: int64 implicit none integer, allocatable :: key(:) - integer(int64) :: hash(2), seed(2), source - key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + integer(int64) :: hash(2), seed(2) + key = [ 0, 1, 2, 3 ] seed = [ 119_int64, 2_int64**41-1 ] call new_spooky_hash_seed( seed ) hash = spooky_hash( key, seed ) @@ -1600,11 +1590,9 @@ It multiplies the `KEY` by `SEED`, and returns the integer(int64) :: hash, i, seed, source seed = 0 allocate( array1(0:2**6-1) ) - do i = 0, 2**6-1 - array(i) = i - end do + array1 = 0 call odd_random_integer( seed ) - source = int(Z'1FFFFFF', int64) + source = 42_int64 hash = universal_mult_hash(source, seed, 6) azray1(hash) = source print *, seed, hash, array1 @@ -1727,7 +1715,7 @@ severely impact the performance of `nmhash32`, `nmhash32x`, In the `src/test/hash_functions/validation` subdirectory, the Fortran Standard Library implements three executables to test the validity of -the Fortran codes against the original C and C++ codes. The tree +the Fortran codes against the original C and C++ codes. The three executables must be compiled manually using the makefile `Makefile.validation`, and the the compiler suite used must be GCC's. The first executable, `generate_key_array` is From 89390c4dc05d94fbce9fb0c6b970794481cf6599 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sun, 28 Nov 2021 23:02:46 -0700 Subject: [PATCH 022/106] More changes inspired by Jeremie's comments Kcorrected minor typos in `stdlib_hash_procedures.md`, corrected comments in `stdlib_32_bit_nmhashes.fypp`, changed intents and purity of procedures in `stdlib_64_bit_spookyv2_hashes.fypp` with interfaces in `stdlib_64_bit_hash_functions.fypp` [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 2 +- src/stdlib_32_bit_nmhashes.fypp | 5 +++-- src/stdlib_64_bit_hash_functions.fypp | 4 ++-- src/stdlib_64_bit_spookyv2_hashes.fypp | 11 ++++------- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 117ee4582..81c21b2c0 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1728,5 +1728,5 @@ values generated from the values in `key_array.bin`. The third executsble, `hash_validity_test`, reads the binary files and for each complicated hash procedure compares the contents of the binary file with the results of calculating hash values using the corresponding -Fortran hash procedure on the same keys. These executables mus be run +Fortran hash procedure on the same keys. These executables must be run manually in the same ordeer. diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp index 2d2c273bb..579ff699b 100755 --- a/src/stdlib_32_bit_nmhashes.fypp +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -534,8 +534,9 @@ contains integer(int32) :: a, b integer(int64) :: i, r - ! 5 to 9 bytes - ! mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + ! - at least 9 bytes + ! - base mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + ! - tail mixer: [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322 len = size(p, kind=int64) len32 = transfer(len, 0_int32, 2) diff --git a/src/stdlib_64_bit_hash_functions.fypp b/src/stdlib_64_bit_hash_functions.fypp index 5b075aada..e3dcdf5c5 100755 --- a/src/stdlib_64_bit_hash_functions.fypp +++ b/src/stdlib_64_bit_hash_functions.fypp @@ -186,7 +186,7 @@ module stdlib_64_bit_hash_functions interface spooky_init - module subroutine spookysubhash_init( self, seed ) + pure module subroutine spookysubhash_init( self, seed ) type(spooky_subhash), intent(out) :: self integer(int_hash), intent(in) :: seed(2) end subroutine spookysubhash_init @@ -197,7 +197,7 @@ module stdlib_64_bit_hash_functions interface spooky_update module subroutine spookyhash_update( spooky, key ) - type(spooky_subhash), intent(out) :: spooky + type(spooky_subhash), intent(inout) :: spooky integer(int8), intent(in) :: key(0:) end subroutine spookyhash_update diff --git a/src/stdlib_64_bit_spookyv2_hashes.fypp b/src/stdlib_64_bit_spookyv2_hashes.fypp index eaaccff4d..b0e27f0fe 100755 --- a/src/stdlib_64_bit_spookyv2_hashes.fypp +++ b/src/stdlib_64_bit_spookyv2_hashes.fypp @@ -32,11 +32,8 @@ contains integer(int64), intent(in) :: seed(2) integer(int64) :: hash_code(2) - integer(int64) :: hash2(2) - - hash2(:) = seed - call spookyhash_128( key, hash2 ) - hash_code = hash2 + hash_code(:) = seed + call spookyhash_128( key, hash_code ) end function int8_spooky_hash @@ -541,7 +538,7 @@ contains end subroutine spookyhash_end - module subroutine spookysubhash_init( self, seed ) + pure module subroutine spookysubhash_init( self, seed ) type(spooky_subhash), intent(out) :: self integer(int64), intent(in) :: seed(2) @@ -554,7 +551,7 @@ contains ! add a message fragment to the state module subroutine spookyhash_update( spooky, key ) - type(spooky_subhash), intent(out) :: spooky + type(spooky_subhash), intent(inout) :: spooky integer(int8), intent(in) :: key(0:) integer(int8) :: dummy(0:7) From 40512f3c8ef6ccdd9aa4e9eea0217cbc34b7a738 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 29 Nov 2021 18:11:21 -0700 Subject: [PATCH 023/106] Renamed files First step in changing the stdlib_32_bit_hash_functions module to stdlib_32_bit_hash_codes [ticket: X] --- ...b_32_bit_hash_functions.fypp => stdlib_32_bit_hash_codes.fypp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{stdlib_32_bit_hash_functions.fypp => stdlib_32_bit_hash_codes.fypp} (100%) diff --git a/src/stdlib_32_bit_hash_functions.fypp b/src/stdlib_32_bit_hash_codes.fypp similarity index 100% rename from src/stdlib_32_bit_hash_functions.fypp rename to src/stdlib_32_bit_hash_codes.fypp From c6930b69e585269c5f825b4fa7484b2cfaa81a02 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 29 Nov 2021 18:15:09 -0700 Subject: [PATCH 024/106] Renamed file First step in changing stdlib_64_bit_hash_functions to stdlib_64_bit_hash_codes. [ticket: X] --- ...b_64_bit_hash_functions.fypp => stdlib_64_bit_hash_codes.fypp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{stdlib_64_bit_hash_functions.fypp => stdlib_64_bit_hash_codes.fypp} (100%) diff --git a/src/stdlib_64_bit_hash_functions.fypp b/src/stdlib_64_bit_hash_codes.fypp similarity index 100% rename from src/stdlib_64_bit_hash_functions.fypp rename to src/stdlib_64_bit_hash_codes.fypp From d78dcf2f409cbdae1e07f7d793c13472a85ce29a Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 29 Nov 2021 18:23:16 -0700 Subject: [PATCH 025/106] Changed CMakeLists.txt and Makefile.manual Changed to compile the renamed stdlib_32_bit_hash_codes.fypp and stdlib_64_bit_hash_ccodes.fypp. [ticket: X] --- src/CMakeLists.txt | 4 ++-- src/Makefile.manual | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 92d058ba7..e5560990a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,11 +3,11 @@ # Create a list of the files to be preprocessed set(fppFiles stdlib_32_bit_fnv_hashes.fypp - stdlib_32_bit_hash_functions.fypp + stdlib_32_bit_hash_codes.fypp stdlib_32_bit_nmhashes.fypp stdlib_32_bit_water_hashes.fypp stdlib_64_bit_fnv_hashes.fypp - stdlib_64_bit_hash_functions.fypp + stdlib_64_bit_hash_codes.fypp stdlib_64_bit_pengy_hashes.fypp stdlib_64_bit_spookyv2_hashes.fypp stdlib_ascii.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index f029ce3c1..09622c596 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -1,10 +1,10 @@ SRCFYPP = \ stdlib_32_bit_fnv_hashes.fypp \ - stdlib_32_bit_hash_functions.fypp \ + stdlib_32_bit_hash_codes.fypp \ stdlib_32_bit_nmhashes.fypp \ stdlib_32_bit_water_hashes.fypp \ stdlib_64_bit_fnv_hashes.fypp \ - stdlib_64_bit_hash_functions.fypp \ + stdlib_64_bit_hash_codes.fypp \ stdlib_64_bit_pengy_hashes.fypp \ stdlib_64_bit_spookyv2_hashes.fypp \ stdlib_ascii.fypp \ @@ -84,21 +84,21 @@ $(SRCGEN): %.f90: %.fypp common.fypp # Fortran module dependencies f18estop.o: stdlib_error.o stdlib_32_bit_fnv_hashes.o: \ - stdlib_32_bit_hash_functions.o -stdlib_32_bit_hash_functions.o: \ + stdlib_32_bit_hash_codes.o +stdlib_32_bit_hash_codes.o: \ stdlib_kinds.o stdlib_32_bit_nmhashes.o: \ - stdlib_32_bit_hash_functions.o + stdlib_32_bit_hash_codes.o stdlib_32_bit_water_hashes.o: \ - stdlib_32_bit_hash_functions.o + stdlib_32_bit_hash_codes.o stdlib_64_bit_fnv_hashes.o: \ - stdlib_64_bit_hash_functions.o -stdlib_64_bit_hash_functions.o: \ + stdlib_64_bit_hash_codes.o +stdlib_64_bit_hash_codes.o: \ stdlib_kinds.o stdlib_64_bit_pengy_hashes.o: \ - stdlib_64_bit_hash_functions.o + stdlib_64_bit_hash_codes.o stdlib_64_bit_spookyv2_hashes.o: \ - stdlib_64_bit_hash_functions.o + stdlib_64_bit_hash_codes.o stdlib_ascii.o: stdlib_kinds.o stdlib_bitsets.o: stdlib_kinds.o stdlib_bitsets_64.o: stdlib_bitsets.o From c08c2ab0610a58524a91ca90958d4dbc42e86872 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 29 Nov 2021 18:48:16 -0700 Subject: [PATCH 026/106] Updated module name Updated references to the stdlib_32_bit_hash_functions to refer to stdlib_32_bit_hash_codes. [ticket: X] --- src/stdlib_32_bit_fnv_hashes.fypp | 2 +- src/stdlib_32_bit_hash_codes.fypp | 4 ++-- src/stdlib_32_bit_nmhashes.fypp | 2 +- src/stdlib_32_bit_water_hashes.fypp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/stdlib_32_bit_fnv_hashes.fypp b/src/stdlib_32_bit_fnv_hashes.fypp index 562de2978..a8ef7607a 100755 --- a/src/stdlib_32_bit_fnv_hashes.fypp +++ b/src/stdlib_32_bit_fnv_hashes.fypp @@ -10,7 +10,7 @@ !#! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_fnv_hashes +submodule(stdlib_32_bit_hash_codes) stdlib_32_bit_fnv_hashes !! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt !! Noll, and Kiem-Phong-Vo, !! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function diff --git a/src/stdlib_32_bit_hash_codes.fypp b/src/stdlib_32_bit_hash_codes.fypp index 7ec2760d3..655e90d12 100755 --- a/src/stdlib_32_bit_hash_codes.fypp +++ b/src/stdlib_32_bit_hash_codes.fypp @@ -1,7 +1,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int8", "int16", "int32", "int64"] -module stdlib_32_bit_hash_functions +module stdlib_32_bit_hash_codes use, intrinsic :: iso_fortran_env, only : & character_storage_size @@ -241,4 +241,4 @@ contains end subroutine odd_random_integer -end module stdlib_32_bit_hash_functions +end module stdlib_32_bit_hash_codes diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp index 579ff699b..93eab2b80 100755 --- a/src/stdlib_32_bit_nmhashes.fypp +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -44,7 +44,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_nmhashes +submodule(stdlib_32_bit_hash_codes) stdlib_32_bit_nmhashes implicit none diff --git a/src/stdlib_32_bit_water_hashes.fypp b/src/stdlib_32_bit_water_hashes.fypp index 33181ab3f..a20427702 100755 --- a/src/stdlib_32_bit_water_hashes.fypp +++ b/src/stdlib_32_bit_water_hashes.fypp @@ -74,7 +74,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_water_hashes +submodule(stdlib_32_bit_hash_codes) stdlib_32_bit_water_hashes implicit none contains From 7438b722c07bb867f2c37b1ab9684c314fdf2021 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 29 Nov 2021 18:55:00 -0700 Subject: [PATCH 027/106] Updated module references Updated references to stdlib_64_bit_hash_functions module to refer to the stdlib_64_bit_hash_codes module. [ticket: X] --- src/stdlib_64_bit_fnv_hashes.fypp | 2 +- src/stdlib_64_bit_hash_codes.fypp | 4 ++-- src/stdlib_64_bit_pengy_hashes.fypp | 2 +- src/stdlib_64_bit_spookyv2_hashes.fypp | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/stdlib_64_bit_fnv_hashes.fypp b/src/stdlib_64_bit_fnv_hashes.fypp index 1eefdb886..099a99ca2 100755 --- a/src/stdlib_64_bit_fnv_hashes.fypp +++ b/src/stdlib_64_bit_fnv_hashes.fypp @@ -12,7 +12,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_fnv_hashes +submodule(stdlib_64_bit_hash_codes) stdlib_64_bit_fnv_hashes ! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt ! Noll, and Kiem-Phong-Vo, ! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function diff --git a/src/stdlib_64_bit_hash_codes.fypp b/src/stdlib_64_bit_hash_codes.fypp index e3dcdf5c5..9f110981b 100755 --- a/src/stdlib_64_bit_hash_codes.fypp +++ b/src/stdlib_64_bit_hash_codes.fypp @@ -1,7 +1,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int8", "int16", "int32", "int64"] -module stdlib_64_bit_hash_functions +module stdlib_64_bit_hash_codes use, intrinsic :: iso_fortran_env, only : & character_storage_size @@ -305,4 +305,4 @@ contains end subroutine random_integer -end module stdlib_64_bit_hash_functions +end module stdlib_64_bit_hash_codes diff --git a/src/stdlib_64_bit_pengy_hashes.fypp b/src/stdlib_64_bit_pengy_hashes.fypp index ca1f14791..6eef988fd 100755 --- a/src/stdlib_64_bit_pengy_hashes.fypp +++ b/src/stdlib_64_bit_pengy_hashes.fypp @@ -40,7 +40,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_pengy_hashes +submodule(stdlib_64_bit_hash_codes) stdlib_64_bit_pengy_hashes implicit none diff --git a/src/stdlib_64_bit_spookyv2_hashes.fypp b/src/stdlib_64_bit_spookyv2_hashes.fypp index b0e27f0fe..3aacba5b3 100755 --- a/src/stdlib_64_bit_spookyv2_hashes.fypp +++ b/src/stdlib_64_bit_spookyv2_hashes.fypp @@ -13,7 +13,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_spookyv2_hashes +submodule(stdlib_64_bit_hash_codes) stdlib_64_bit_spookyv2_hashes ! I have tried to make this portable while retaining efficiency. I assume ! processors with two's complement integers from 8, 16, 32, and 64 bits. @@ -552,7 +552,7 @@ contains ! add a message fragment to the state module subroutine spookyhash_update( spooky, key ) type(spooky_subhash), intent(inout) :: spooky - integer(int8), intent(in) :: key(0:) + integer(int8), intent(in) :: key(0:) integer(int8) :: dummy(0:7) integer(int64) :: h(0:11) From 3047dcc603f2ea6327dfd0c48d28f269577b8c4d Mon Sep 17 00:00:00 2001 From: William Clodius Date: Mon, 29 Nov 2021 19:02:30 -0700 Subject: [PATCH 028/106] Changed module references Changed references to stdlib_32_bit_hash_functions and stdlib_64_bit_hash_functions to stdlib_32_bit_hash_codesand stdlib_64_bit_hash_codes respectively. [ticket: X] --- src/tests/hash_functions/test_32_bit_hash_performance.f90 | 2 +- src/tests/hash_functions/test_64_bit_hash_performance.f90 | 2 +- src/tests/hash_functions/validation/hash_validity_test.f90 | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tests/hash_functions/test_32_bit_hash_performance.f90 b/src/tests/hash_functions/test_32_bit_hash_performance.f90 index acee5e36b..3436f080c 100755 --- a/src/tests/hash_functions/test_32_bit_hash_performance.f90 +++ b/src/tests/hash_functions/test_32_bit_hash_performance.f90 @@ -8,7 +8,7 @@ program test_32_bit_hash_performance int32, & int64 - use stdlib_32_bit_hash_functions + use stdlib_32_bit_hash_codes implicit none diff --git a/src/tests/hash_functions/test_64_bit_hash_performance.f90 b/src/tests/hash_functions/test_64_bit_hash_performance.f90 index 6c445f781..78bbb32b4 100755 --- a/src/tests/hash_functions/test_64_bit_hash_performance.f90 +++ b/src/tests/hash_functions/test_64_bit_hash_performance.f90 @@ -8,7 +8,7 @@ program test_64_bit_hash_performance int32, & int64 - use stdlib_64_bit_hash_functions + use stdlib_64_bit_hash_codes implicit none diff --git a/src/tests/hash_functions/validation/hash_validity_test.f90 b/src/tests/hash_functions/validation/hash_validity_test.f90 index 313a77764..3d47a5d88 100755 --- a/src/tests/hash_functions/validation/hash_validity_test.f90 +++ b/src/tests/hash_functions/validation/hash_validity_test.f90 @@ -11,12 +11,12 @@ program hash_validity_test use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 - use stdlib_32_bit_hash_functions, only: & + use stdlib_32_bit_hash_codes, only: & little_endian, & nmhash32, & nmhash32x, & water_hash - use stdlib_64_bit_hash_functions, only: & + use stdlib_64_bit_hash_codes, only: & pengy_hash, & spooky_hash From c4d8d28c05f7f4437cdeb3fb373d6504e5f55189 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Wed, 1 Dec 2021 17:02:50 -0700 Subject: [PATCH 029/106] Fixed typo Changed "outputa" to "outputs". [ticket: X] --- src/tests/hash_functions/validation/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/hash_functions/validation/README.md b/src/tests/hash_functions/validation/README.md index 41840253c..62c18699d 100644 --- a/src/tests/hash_functions/validation/README.md +++ b/src/tests/hash_functions/validation/README.md @@ -2,7 +2,7 @@ The validation directory contains code to validate the Fortran hash functions ag * `generate_key_array.f90` - creates a file containing 2048 random 8 bit integers. -* `generate_hash_arrays.cpp` - reads the file generated by `generate_key_array.f90` and uses its contents to generate 2049 hashes for each hash algorithm and outputa files containing the hashes. +* `generate_hash_arrays.cpp` - reads the file generated by `generate_key_array.f90` and uses its contents to generate 2049 hashes for each hash algorithm and outputs files containing the hashes. * `hash_validity_test.f90`- reads the file generated by `generate_key_array.f90` and uses its contents to generate 2049 hashes for each hash algorithm and compares the result with the corresponding outputs of `generate_hash_arrays.cpp` reporting if the outputs are not equal. From 0b429a4cb94202b817911c6a094426ada80ac53f Mon Sep 17 00:00:00 2001 From: William Clodius Date: Wed, 1 Dec 2021 17:45:11 -0700 Subject: [PATCH 030/106] Changed function attribute Added the pure attribute to rot_64_32. [ticket: X] --- src/stdlib_64_bit_spookyv2_hashes.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stdlib_64_bit_spookyv2_hashes.fypp b/src/stdlib_64_bit_spookyv2_hashes.fypp index 3aacba5b3..cc9353316 100755 --- a/src/stdlib_64_bit_spookyv2_hashes.fypp +++ b/src/stdlib_64_bit_spookyv2_hashes.fypp @@ -682,7 +682,7 @@ contains end subroutine spookyhash_final - function rot_64_32( a, k ) + pure function rot_64_32( a, k ) integer(int64) :: rot_64_32 integer(int64), intent(in) :: a integer, intent(in) :: k From 9b6ce56805b3666c8b7d16d537bfdb3882554937 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 3 Dec 2021 13:19:02 -0700 Subject: [PATCH 031/106] Fixed typos I fixed various typos found when a sell checking application viewed the file. I also added "(since version 10)" after gfortran in describing which compilers default to integer overflow mapping. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 81c21b2c0..8581d22bf 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -301,7 +301,7 @@ so the modules use signed integer arithmetic. For that reason trapping on signed arithmetic must be disabled. The command line flags to disable overflow detection for compilers implementing submodules are summarized in the table below. -Note that FLANG, gfortran, ifort, and NAG all default to +Note that FLANG, gfortran (since version 10), ifort, and NAG all default to integer overflow wrapping. |Compiler|Legal flag|Illegal flag|Default| @@ -378,7 +378,7 @@ The `stdlib_64_bit_hash_functions` module also provides implementations of four hash code algorithms: the *FNV_1* and *FNV_1A* variants of Glenn Fowler, Landon Curt Noll, and Kiem-Phong Vo; -the *pengynash* of Alberto Fajardo; +the *pengyhash* of Alberto Fajardo; and the *SpookyHash* of Bob Jenkins. The detailed implementation of each algorithm is handled in a separate submodule: `stdlib_64_bit_fnv_hashes`, @@ -412,7 +412,7 @@ While they do not at all perform well on the SMHasher test suite, usage indicates that that that this has little impact on the performance of small hash tables, and the small size of the functions allows their quick loading and retainment in the instruction cache, -givng a performance boost where the hashing is intermittent. +giving a performance boost where the hashing is intermittent. (See the [SMHasher discussion](https://github.com/rurban/smhasher/README.md) and S. Richter, V. Alvarez, and J. Dittrich, @@ -745,7 +745,7 @@ and on output it will be different from the input `seed`. Currently there are no known bad seeds for `NMHASH32X`, but if any are identified the procedure will be revised so that they cannot be -returned. This subroutine uses Fortran's intrinsic +returned. This subroutine uses Fortran's intrinsic `RANDOM_NUMBER` and the values returned can be changed by calling the intrinsic `RANDOM_INIT`. @@ -1277,7 +1277,7 @@ The result is a scalar integer of kind `INT32`. `FNV_1A` is an implementation of the alternative FNV-1a hash code of Glenn Fowler, Landon Curt Noll, and Phong Vo. -It differs from typical implementations in that it also ecodes the +It differs from typical implementations in that it also encodes the size of the structure in the hash code. This code is relatively fast on short keys, and is small enough that it will often be retained in the instruction cache if hashing is @@ -1603,7 +1603,7 @@ It multiplies the `KEY` by `SEED`, and returns the ### Test Codes The Fortran Standard Library provides two categories of test -codes. One ccategory is tests of the relative performance of the +codes. One category is tests of the relative performance of the various hash functions. The other is a comparison of the outputs of the Fortran hash functions, with the outputs of the C and C++ hash procedures that are the inspiration for the Fortran hash functions. @@ -1725,8 +1725,8 @@ integers of kind `INT8`, and stores that sequence in the binary file the values in `key_array.bin`, and, for each complicated hash procedure generates a corresponding binary file containing 2049 hash values generated from the values in `key_array.bin`. The third -executsble, `hash_validity_test`, reads the binary files and for each +executable, `hash_validity_test`, reads the binary files and for each complicated hash procedure compares the contents of the binary file with the results of calculating hash values using the corresponding Fortran hash procedure on the same keys. These executables must be run -manually in the same ordeer. +manually in the same order. From f05df9cae3bc2c708d97292c4adf588cf4f14a06 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 3 Dec 2021 13:28:57 -0700 Subject: [PATCH 032/106] Fixed duplicate "the"'s Changed two instances of "the the"to "the". [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 8581d22bf..a298b3513 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -277,7 +277,7 @@ and performing the operations on each half separately using the larger integers. In the second, the unsigned integers may be replaced directly by the corresponding signed integers, but -otherwise not modifying the the code logic. +otherwise not modifying the code logic. The first should be standard conforming on current compilers, but is more computationally intensive unless the compilers recognize underlying idioms that are rarely used in Fortran codes. The second is @@ -1717,7 +1717,7 @@ In the `src/test/hash_functions/validation` subdirectory, the Fortran Standard Library implements three executables to test the validity of the Fortran codes against the original C and C++ codes. The three executables must be compiled manually using the makefile -`Makefile.validation`, and the the compiler suite used must be +`Makefile.validation`, and the compiler suite used must be GCC's. The first executable, `generate_key_array` is based on Fortran code, and generates a random sequence of 2048 integers of kind `INT8`, and stores that sequence in the binary file From 04b8201bfd737996e2d5aad0e7ef674952ad0894 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 21:55:51 +0100 Subject: [PATCH 033/106] mv validation to hash_functions_test and add Makefile-manual --- .../Makefile.manual} | 44 ++++++++++++------- .../README.md | 0 .../SpookyV2.cpp | 0 .../SpookyV2.h | 0 .../SpookyV2Test.cpp | 0 .../generate_hash_arrays.cpp | 0 .../generate_key_array.f90 | 0 .../hash_validity_test.f90 | 0 .../nmhash.c | 0 .../nmhash.h | 0 .../nmhash_scalar.c | 0 .../nmhash_scalar.h | 0 .../pengyhash.c | 0 .../pengyhash.h | 0 .../waterhash.c | 0 .../waterhash.h | 0 16 files changed, 28 insertions(+), 16 deletions(-) rename src/tests/{hash_functions/validation/Makefile.validation => hash_functions_test/Makefile.manual} (50%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/README.md (100%) rename src/tests/{hash_functions/validation => hash_functions_test}/SpookyV2.cpp (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/SpookyV2.h (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/SpookyV2Test.cpp (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/generate_hash_arrays.cpp (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/generate_key_array.f90 (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/hash_validity_test.f90 (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/nmhash.c (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/nmhash.h (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/nmhash_scalar.c (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/nmhash_scalar.h (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/pengyhash.c (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/pengyhash.h (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/waterhash.c (100%) mode change 100755 => 100644 rename src/tests/{hash_functions/validation => hash_functions_test}/waterhash.h (100%) mode change 100755 => 100644 diff --git a/src/tests/hash_functions/validation/Makefile.validation b/src/tests/hash_functions_test/Makefile.manual old mode 100755 new mode 100644 similarity index 50% rename from src/tests/hash_functions/validation/Makefile.validation rename to src/tests/hash_functions_test/Makefile.manual index 3f8e8ebcf..a649cd84c --- a/src/tests/hash_functions/validation/Makefile.validation +++ b/src/tests/hash_functions_test/Makefile.manual @@ -1,24 +1,35 @@ -MOD_PATH = -I../../../ +CC = gcc +CXX = g++ FFLAGS = -O3 CFLAGS = -O3 CXXFLAGS = -O3 -LIBDIRS = -L./ -LIBS = -lc_hash -INCLUDE_DIRS = -I./ -all: generate_hash_arrays generate_key_array hash_validity_test +CPPFLAGS += -I. -I../.. -I.. +LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib + +PROGS = generate_key_array generate_hash_arrays hash_validity_test +TESTPROGS = $(PROGS:=TEST) + +all: $(PROGS) + +test: $(TESTPROGS) + +$(TESTPROGS): + ./$(@:TEST=) + + generate_key_array: generate_key_array.f90 $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array hash_validity_test: hash_validity_test.f90 - $(FC) $(FFLAGS) -L../../../ -lstdlib $(MOD_PATH) \ - hash_validity_test.f90 -o hash_validity_test + $(FC) $(FFLAGS) $(CPPFLAGS) \ + hash_validity_test.f90 -o hash_validity_test $(LDFLAGS) -generate_hash_arrays: generate_hash_arrays.o ./libc_hash.a - $(CXX) $(CXXFLAGS) $(LIBDIRS) generate_hash_arrays.o \ -$(LIBS) -o generate_hash_arrays +generate_hash_arrays: generate_hash_arrays.o libc_hash.a + $(CXX) $(CXXFLAGS) generate_hash_arrays.o \ + -o generate_hash_arrays -L. -lc_hash generate_hash_arrays.o: generate_hash_arrays.cpp libc_hash.a $(CXX) $(CXXFLAGS) -c generate_hash_arrays.cpp -o generate_hash_arrays.o @@ -28,23 +39,24 @@ libc_hash.a: SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o waterhash.o nmhash_scalar.o waterhash.o pengyhash.o: pengyhash.c pengyhash.h - $(CC) $(CFLAGS) $(INCLUDE_DIRS) -c pengyhash.c -o pengyhash.o + $(CC) $(CFLAGS) $(CPPFLAGS) -c pengyhash.c -o pengyhash.o waterhash.o: waterhash.c waterhash.h - $(CC) $(CFLAGS) $(INCLUDE_DIRS) -c waterhash.c -o waterhash.o + $(CC) $(CFLAGS) $(CPPFLAGS) -c waterhash.c -o waterhash.o SpookyV2.o: SpookyV2.cpp SpookyV2.h - $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -c SpookyV2.cpp -o SpookyV2.o + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c SpookyV2.cpp -o SpookyV2.o SpookyV2Test.o: SpookyV2Test.cpp SpookyV2.h - $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -c SpookyV2Test.cpp -o SpookyV2Test.o + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c SpookyV2Test.cpp -o SpookyV2Test.o nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h - $(CC) $(CXXFLAGS) $(INCLUDE_DIRS) -c nmhash_scalar.c -o nmhash_scalar.o + $(CC) $(CXXFLAGS) $(CPPFLAGS) -c nmhash_scalar.c -o nmhash_scalar.o clean: rm nmhash_scalar.o SpookyV2Test.o SpookyV2.o waterhash.o pengyhash.o \ libc_hash.a generate_hash_arrays.o generate_hash_arrays \ - hash_validity_test generate_key_array + hash_validity_test generate_key_array \ + *.bin diff --git a/src/tests/hash_functions/validation/README.md b/src/tests/hash_functions_test/README.md similarity index 100% rename from src/tests/hash_functions/validation/README.md rename to src/tests/hash_functions_test/README.md diff --git a/src/tests/hash_functions/validation/SpookyV2.cpp b/src/tests/hash_functions_test/SpookyV2.cpp old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/SpookyV2.cpp rename to src/tests/hash_functions_test/SpookyV2.cpp diff --git a/src/tests/hash_functions/validation/SpookyV2.h b/src/tests/hash_functions_test/SpookyV2.h old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/SpookyV2.h rename to src/tests/hash_functions_test/SpookyV2.h diff --git a/src/tests/hash_functions/validation/SpookyV2Test.cpp b/src/tests/hash_functions_test/SpookyV2Test.cpp old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/SpookyV2Test.cpp rename to src/tests/hash_functions_test/SpookyV2Test.cpp diff --git a/src/tests/hash_functions/validation/generate_hash_arrays.cpp b/src/tests/hash_functions_test/generate_hash_arrays.cpp old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/generate_hash_arrays.cpp rename to src/tests/hash_functions_test/generate_hash_arrays.cpp diff --git a/src/tests/hash_functions/validation/generate_key_array.f90 b/src/tests/hash_functions_test/generate_key_array.f90 old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/generate_key_array.f90 rename to src/tests/hash_functions_test/generate_key_array.f90 diff --git a/src/tests/hash_functions/validation/hash_validity_test.f90 b/src/tests/hash_functions_test/hash_validity_test.f90 old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/hash_validity_test.f90 rename to src/tests/hash_functions_test/hash_validity_test.f90 diff --git a/src/tests/hash_functions/validation/nmhash.c b/src/tests/hash_functions_test/nmhash.c old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/nmhash.c rename to src/tests/hash_functions_test/nmhash.c diff --git a/src/tests/hash_functions/validation/nmhash.h b/src/tests/hash_functions_test/nmhash.h old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/nmhash.h rename to src/tests/hash_functions_test/nmhash.h diff --git a/src/tests/hash_functions/validation/nmhash_scalar.c b/src/tests/hash_functions_test/nmhash_scalar.c old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/nmhash_scalar.c rename to src/tests/hash_functions_test/nmhash_scalar.c diff --git a/src/tests/hash_functions/validation/nmhash_scalar.h b/src/tests/hash_functions_test/nmhash_scalar.h old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/nmhash_scalar.h rename to src/tests/hash_functions_test/nmhash_scalar.h diff --git a/src/tests/hash_functions/validation/pengyhash.c b/src/tests/hash_functions_test/pengyhash.c old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/pengyhash.c rename to src/tests/hash_functions_test/pengyhash.c diff --git a/src/tests/hash_functions/validation/pengyhash.h b/src/tests/hash_functions_test/pengyhash.h old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/pengyhash.h rename to src/tests/hash_functions_test/pengyhash.h diff --git a/src/tests/hash_functions/validation/waterhash.c b/src/tests/hash_functions_test/waterhash.c old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/waterhash.c rename to src/tests/hash_functions_test/waterhash.c diff --git a/src/tests/hash_functions/validation/waterhash.h b/src/tests/hash_functions_test/waterhash.h old mode 100755 new mode 100644 similarity index 100% rename from src/tests/hash_functions/validation/waterhash.h rename to src/tests/hash_functions_test/waterhash.h From c60277d0aa052a1bedbb4b5aabe52d88f752d462 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 21:56:34 +0100 Subject: [PATCH 034/106] add hash_functions_test to Makefile.manual --- src/tests/Makefile.manual | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/Makefile.manual b/src/tests/Makefile.manual index 30ce6902f..2c065507b 100644 --- a/src/tests/Makefile.manual +++ b/src/tests/Makefile.manual @@ -15,6 +15,7 @@ all test clean:: $(MAKE) -f Makefile.manual --directory=ascii $@ $(MAKE) -f Makefile.manual --directory=bitsets $@ $(MAKE) -f Makefile.manual --directory=hash_functions $@ + $(MAKE) -f Makefile.manual --directory=hash_functions_test $@ $(MAKE) -f Makefile.manual --directory=io $@ $(MAKE) -f Makefile.manual --directory=logger $@ $(MAKE) -f Makefile.manual --directory=optval $@ From 44854ece5b18f833fb8ad2f1d323c38bab9e95bd Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 23:24:59 +0100 Subject: [PATCH 035/106] add testdrive program --- src/tests/hash_functions_test/Makefile.manual | 15 +- .../test_hash_functions.f90 | 271 ++++++++++++++++++ 2 files changed, 278 insertions(+), 8 deletions(-) create mode 100644 src/tests/hash_functions_test/test_hash_functions.f90 diff --git a/src/tests/hash_functions_test/Makefile.manual b/src/tests/hash_functions_test/Makefile.manual index a649cd84c..4eb99bc1c 100644 --- a/src/tests/hash_functions_test/Makefile.manual +++ b/src/tests/hash_functions_test/Makefile.manual @@ -1,14 +1,14 @@ CC = gcc CXX = g++ -FFLAGS = -O3 +FFLAGS = -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check CFLAGS = -O3 CXXFLAGS = -O3 CPPFLAGS += -I. -I../.. -I.. LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib -PROGS = generate_key_array generate_hash_arrays hash_validity_test +PROGS = generate_key_array generate_hash_arrays hash_validity_test test_hash_functions TESTPROGS = $(PROGS:=TEST) all: $(PROGS) @@ -18,14 +18,14 @@ test: $(TESTPROGS) $(TESTPROGS): ./$(@:TEST=) - - generate_key_array: generate_key_array.f90 $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array +test_hash_functions: test_hash_functions.f90 + $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) + hash_validity_test: hash_validity_test.f90 - $(FC) $(FFLAGS) $(CPPFLAGS) \ - hash_validity_test.f90 -o hash_validity_test $(LDFLAGS) + $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) generate_hash_arrays: generate_hash_arrays.o libc_hash.a $(CXX) $(CXXFLAGS) generate_hash_arrays.o \ @@ -55,8 +55,7 @@ nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h clean: rm nmhash_scalar.o SpookyV2Test.o SpookyV2.o waterhash.o pengyhash.o \ - libc_hash.a generate_hash_arrays.o generate_hash_arrays \ - hash_validity_test generate_key_array \ + libc_hash.a generate_hash_arrays.o $(PROGS) \ *.bin diff --git a/src/tests/hash_functions_test/test_hash_functions.f90 b/src/tests/hash_functions_test/test_hash_functions.f90 new file mode 100644 index 000000000..97655cf5c --- /dev/null +++ b/src/tests/hash_functions_test/test_hash_functions.f90 @@ -0,0 +1,271 @@ + +module test_hash_functions + use testdrive, only : new_unittest, unittest_type, error_type, check, skip_test + use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64 + use stdlib_32_bit_hash_codes, only: little_endian & + , nmhash32 & + , nmhash32x & + , water_hash + use stdlib_64_bit_hash_codes, only: pengy_hash, spooky_hash + + implicit none + + real(sp), parameter :: sptol = 1000 * epsilon(1._sp) + real(dp), parameter :: dptol = 1000 * epsilon(1._dp) + + integer, parameter :: size_key_array = 2048 + + integer(int32), parameter :: nm_seed = int( z'deadbeef', int32 ) + integer(int64), parameter :: water_seed = int( z'deadbeef1eadbeef', int64 ) + integer(int32), parameter :: pengy_seed = int( z'deadbeef', int32 ) + integer(int64), parameter :: spooky_seed(2) = [ water_seed, water_seed ] + + interface read_array + module procedure read_array_int8 + module procedure read_array_int32 + module procedure read_array_int64 + module procedure read_2darray_int64 + end interface + +contains + + !> Collect all exported unit tests + subroutine collect_hash_functions(testsuite) + !> Collection of tests + type(unittest_type), allocatable, intent(out) :: testsuite(:) + + testsuite = [ & + new_unittest("dummy", test_dummy) & + , new_unittest("little_endian", test_little_endian) & + , new_unittest("nmhash32", test_nmhash32) & + , new_unittest("nmhash32x", test_nmhash32x) & + , new_unittest("water_hash", test_water_hash) & + , new_unittest("pengy_hash", test_pengy_hash) & + , new_unittest("spooky_hash", test_spooky_hash) & + ] + + end subroutine collect_hash_functions + + subroutine test_dummy(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + call check(error, .true., "dummy") + if (allocated(error)) return + + end subroutine + + subroutine test_little_endian(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + ! Test for endianness + + call check(error, little_endian, "The processor is not Little-Endian") + if (allocated(error)) return + + end subroutine + + subroutine test_nmhash32(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: index + integer(int8) :: key_array(size_key_array) + integer(int32) :: c_hash(0:size_key_array) + + call read_array("key_array.bin", key_array ) + + ! Read hash array generated from key array by the C version of nmhash32 + call read_array("c_nmhash32_array.bin", c_hash) + + do index=0, 2048 + call check(error, c_hash(index) == nmhash32(key_array(1:index), nm_seed) & + , "NMHASH32 failed") + if (allocated(error)) return + end do + + end subroutine + + subroutine test_nmhash32x(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: index + integer(int8) :: key_array(size_key_array) + integer(int32) :: c_hash(0:size_key_array) + + call read_array("key_array.bin", key_array ) + + ! Read hash array generated from key array by the C version of nmhash32x + call read_array("c_nmhash32x_array.bin", c_hash) + + do index=0, 2048 + call check(error, c_hash(index) == nmhash32x(key_array(1:index), nm_seed) & + , "NMHASH32X failed") + if (allocated(error)) return + end do + + end subroutine + + subroutine test_water_hash(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: index + integer(int8) :: key_array(size_key_array) + integer(int32) :: c_hash(0:size_key_array) + + call read_array("key_array.bin", key_array ) + + ! Read hash array generated from key array by the C version of water_hash + call read_array("c_water_hash_array.bin", c_hash) + + do index=0, 2048 + call check(error, c_hash(index) == water_hash(key_array(1:index), water_seed) & + , "WATER_HASH failed") + if (allocated(error)) return + end do + + end subroutine + + subroutine test_pengy_hash(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: index + integer(int8) :: key_array(size_key_array) + integer(int64) :: c_hash(0:size_key_array) + + call read_array("key_array.bin", key_array ) + + ! Read hash array generated from key array by the C version of pengy_hash + call read_array("c_pengy_hash_array.bin", c_hash) + + do index=0, 2048 + call check(error, c_hash(index) == pengy_hash(key_array(1:index), pengy_seed) & + , "PENGY_HASH failed") + if (allocated(error)) return + end do + + end subroutine + + subroutine test_spooky_hash(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: index + integer(int8) :: key_array(size_key_array) + integer(int64) :: c_hash(0:1, 0:size_key_array) + + call read_array("key_array.bin", key_array ) + + ! Read hash array generated from key array by the C version of spooky_hash + call read_array("c_spooky_hash_array.bin", c_hash) + + do index=0, 2048 + call check(error, all(c_hash(:, index) == spooky_hash(key_array(1:index), spooky_seed)) & + , "SPOOKY_HASH failed") + if (allocated(error)) return + end do + + end subroutine + + + + subroutine read_array_int8(filename, res) + character(*), intent(in) :: filename + integer(int8), intent(out) :: res(:) + + integer :: lun + + open(newunit=lun, file=filename, form="unformatted", & + access="stream", status="old", action="read", err = 9908) + read(lun) res + close(lun) + + return + +9908 res = 0 + + end subroutine + + subroutine read_array_int32(filename, res) + character(*), intent(in) :: filename + integer(int32), intent(out) :: res(:) + + integer :: lun + + open(newunit=lun, file=filename, form="unformatted", & + access="stream", status="old", action="read", err = 9908) + read(lun) res + close(lun) + + return + +9908 res = 0 + + end subroutine + + subroutine read_array_int64(filename, res) + character(*), intent(in) :: filename + integer(int64), intent(out) :: res(:) + + integer :: lun + + open(newunit=lun, file=filename, form="unformatted", & + access="stream", status="old", action="read", err = 9908) + read(lun) res + close(lun) + + return + +9908 res = 0 + + end subroutine + + subroutine read_2darray_int64(filename, res) + character(*), intent(in) :: filename + integer(int64), intent(out) :: res(:,:) + + integer :: lun + + open(newunit=lun, file=filename, form="unformatted", & + access="stream", status="old", action="read", err = 9908) + read(lun) res + close(lun) + + return + +9908 res = 0 + + end subroutine + +end module + + +program tester + use, intrinsic :: iso_fortran_env, only : error_unit + use testdrive, only : run_testsuite, new_testsuite, testsuite_type + use test_hash_functions, only : collect_hash_functions + implicit none + integer :: stat, is + type(testsuite_type), allocatable :: testsuites(:) + character(len=*), parameter :: fmt = '("#", *(1x, a))' + + stat = 0 + + testsuites = [ & + new_testsuite("hash_functions", collect_hash_functions) & + ] + + do is = 1, size(testsuites) + write(error_unit, fmt) "Testing:", testsuites(is)%name + call run_testsuite(testsuites(is)%collect, error_unit, stat) + end do + + if (stat > 0) then + write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" + error stop + end if +end program From b02ee07b08a19381c2babde2d910f60e99266240 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 23:37:16 +0100 Subject: [PATCH 036/106] modify generate_key --- src/tests/Makefile.manual | 2 +- src/tests/hash_functions/Makefile.manual | 62 ++++++++++++++++++- .../README.md | 0 .../SpookyV2.cpp | 0 .../SpookyV2.h | 0 .../SpookyV2Test.cpp | 0 .../generate_hash_arrays.cpp | 0 .../generate_key_array.f90 | 2 +- .../hash_validity_test.f90 | 0 .../nmhash.c | 0 .../nmhash.h | 0 .../nmhash_scalar.c | 0 .../nmhash_scalar.h | 0 .../pengyhash.c | 0 .../pengyhash.h | 0 .../test_hash_functions.f90 | 0 .../waterhash.c | 0 .../waterhash.h | 0 .../CMakeLists.txt | 0 src/tests/hash_functions_perf/Makefile.manual | 3 + .../test_32_bit_hash_performance.f90 | 0 .../test_64_bit_hash_performance.f90 | 0 src/tests/hash_functions_test/Makefile.manual | 61 ------------------ 23 files changed, 65 insertions(+), 65 deletions(-) mode change 100755 => 100644 src/tests/hash_functions/Makefile.manual rename src/tests/{hash_functions_test => hash_functions}/README.md (100%) rename src/tests/{hash_functions_test => hash_functions}/SpookyV2.cpp (100%) rename src/tests/{hash_functions_test => hash_functions}/SpookyV2.h (100%) rename src/tests/{hash_functions_test => hash_functions}/SpookyV2Test.cpp (100%) rename src/tests/{hash_functions_test => hash_functions}/generate_hash_arrays.cpp (100%) rename src/tests/{hash_functions_test => hash_functions}/generate_key_array.f90 (90%) rename src/tests/{hash_functions_test => hash_functions}/hash_validity_test.f90 (100%) rename src/tests/{hash_functions_test => hash_functions}/nmhash.c (100%) rename src/tests/{hash_functions_test => hash_functions}/nmhash.h (100%) rename src/tests/{hash_functions_test => hash_functions}/nmhash_scalar.c (100%) rename src/tests/{hash_functions_test => hash_functions}/nmhash_scalar.h (100%) rename src/tests/{hash_functions_test => hash_functions}/pengyhash.c (100%) rename src/tests/{hash_functions_test => hash_functions}/pengyhash.h (100%) rename src/tests/{hash_functions_test => hash_functions}/test_hash_functions.f90 (100%) rename src/tests/{hash_functions_test => hash_functions}/waterhash.c (100%) rename src/tests/{hash_functions_test => hash_functions}/waterhash.h (100%) rename src/tests/{hash_functions => hash_functions_perf}/CMakeLists.txt (100%) create mode 100755 src/tests/hash_functions_perf/Makefile.manual rename src/tests/{hash_functions => hash_functions_perf}/test_32_bit_hash_performance.f90 (100%) rename src/tests/{hash_functions => hash_functions_perf}/test_64_bit_hash_performance.f90 (100%) delete mode 100644 src/tests/hash_functions_test/Makefile.manual diff --git a/src/tests/Makefile.manual b/src/tests/Makefile.manual index 2c065507b..e8d299e42 100644 --- a/src/tests/Makefile.manual +++ b/src/tests/Makefile.manual @@ -14,8 +14,8 @@ testdrive.F90: all test clean:: $(MAKE) -f Makefile.manual --directory=ascii $@ $(MAKE) -f Makefile.manual --directory=bitsets $@ + $(MAKE) -f Makefile.manual --directory=hash_functions_perf $@ $(MAKE) -f Makefile.manual --directory=hash_functions $@ - $(MAKE) -f Makefile.manual --directory=hash_functions_test $@ $(MAKE) -f Makefile.manual --directory=io $@ $(MAKE) -f Makefile.manual --directory=logger $@ $(MAKE) -f Makefile.manual --directory=optval $@ diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual old mode 100755 new mode 100644 index d3e59bd18..4eb99bc1c --- a/src/tests/hash_functions/Makefile.manual +++ b/src/tests/hash_functions/Makefile.manual @@ -1,3 +1,61 @@ -PROGS_SRC = test_64_bit_hash_performance.f90 test_32_bit_hash_performance.f90 +CC = gcc +CXX = g++ + +FFLAGS = -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check +CFLAGS = -O3 +CXXFLAGS = -O3 + +CPPFLAGS += -I. -I../.. -I.. +LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib + +PROGS = generate_key_array generate_hash_arrays hash_validity_test test_hash_functions +TESTPROGS = $(PROGS:=TEST) + +all: $(PROGS) + +test: $(TESTPROGS) + +$(TESTPROGS): + ./$(@:TEST=) + +generate_key_array: generate_key_array.f90 + $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array + +test_hash_functions: test_hash_functions.f90 + $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) + +hash_validity_test: hash_validity_test.f90 + $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) + +generate_hash_arrays: generate_hash_arrays.o libc_hash.a + $(CXX) $(CXXFLAGS) generate_hash_arrays.o \ + -o generate_hash_arrays -L. -lc_hash + +generate_hash_arrays.o: generate_hash_arrays.cpp libc_hash.a + $(CXX) $(CXXFLAGS) -c generate_hash_arrays.cpp -o generate_hash_arrays.o + +libc_hash.a: SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o waterhash.o + ar rcs libc_hash.a SpookyV2.o SpookyV2Test.o pengyhash.o \ + nmhash_scalar.o waterhash.o + +pengyhash.o: pengyhash.c pengyhash.h + $(CC) $(CFLAGS) $(CPPFLAGS) -c pengyhash.c -o pengyhash.o + +waterhash.o: waterhash.c waterhash.h + $(CC) $(CFLAGS) $(CPPFLAGS) -c waterhash.c -o waterhash.o + +SpookyV2.o: SpookyV2.cpp SpookyV2.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c SpookyV2.cpp -o SpookyV2.o + +SpookyV2Test.o: SpookyV2Test.cpp SpookyV2.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c SpookyV2Test.cpp -o SpookyV2Test.o + +nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h + $(CC) $(CXXFLAGS) $(CPPFLAGS) -c nmhash_scalar.c -o nmhash_scalar.o + +clean: + rm nmhash_scalar.o SpookyV2Test.o SpookyV2.o waterhash.o pengyhash.o \ + libc_hash.a generate_hash_arrays.o $(PROGS) \ + *.bin + -include ../Makefile.manual.test.mk diff --git a/src/tests/hash_functions_test/README.md b/src/tests/hash_functions/README.md similarity index 100% rename from src/tests/hash_functions_test/README.md rename to src/tests/hash_functions/README.md diff --git a/src/tests/hash_functions_test/SpookyV2.cpp b/src/tests/hash_functions/SpookyV2.cpp similarity index 100% rename from src/tests/hash_functions_test/SpookyV2.cpp rename to src/tests/hash_functions/SpookyV2.cpp diff --git a/src/tests/hash_functions_test/SpookyV2.h b/src/tests/hash_functions/SpookyV2.h similarity index 100% rename from src/tests/hash_functions_test/SpookyV2.h rename to src/tests/hash_functions/SpookyV2.h diff --git a/src/tests/hash_functions_test/SpookyV2Test.cpp b/src/tests/hash_functions/SpookyV2Test.cpp similarity index 100% rename from src/tests/hash_functions_test/SpookyV2Test.cpp rename to src/tests/hash_functions/SpookyV2Test.cpp diff --git a/src/tests/hash_functions_test/generate_hash_arrays.cpp b/src/tests/hash_functions/generate_hash_arrays.cpp similarity index 100% rename from src/tests/hash_functions_test/generate_hash_arrays.cpp rename to src/tests/hash_functions/generate_hash_arrays.cpp diff --git a/src/tests/hash_functions_test/generate_key_array.f90 b/src/tests/hash_functions/generate_key_array.f90 similarity index 90% rename from src/tests/hash_functions_test/generate_key_array.f90 rename to src/tests/hash_functions/generate_key_array.f90 index 40b43a043..6c40ecb1d 100644 --- a/src/tests/hash_functions_test/generate_key_array.f90 +++ b/src/tests/hash_functions/generate_key_array.f90 @@ -15,7 +15,7 @@ program generate_key_array key_array = transfer( dummy, 0_int8, 2048 ) open(newunit=lun, file="key_array.bin", form="unformatted", & - access="stream", status="new", action="write") + access="stream", status="replace", action="write") write(lun) key_array close(lun) diff --git a/src/tests/hash_functions_test/hash_validity_test.f90 b/src/tests/hash_functions/hash_validity_test.f90 similarity index 100% rename from src/tests/hash_functions_test/hash_validity_test.f90 rename to src/tests/hash_functions/hash_validity_test.f90 diff --git a/src/tests/hash_functions_test/nmhash.c b/src/tests/hash_functions/nmhash.c similarity index 100% rename from src/tests/hash_functions_test/nmhash.c rename to src/tests/hash_functions/nmhash.c diff --git a/src/tests/hash_functions_test/nmhash.h b/src/tests/hash_functions/nmhash.h similarity index 100% rename from src/tests/hash_functions_test/nmhash.h rename to src/tests/hash_functions/nmhash.h diff --git a/src/tests/hash_functions_test/nmhash_scalar.c b/src/tests/hash_functions/nmhash_scalar.c similarity index 100% rename from src/tests/hash_functions_test/nmhash_scalar.c rename to src/tests/hash_functions/nmhash_scalar.c diff --git a/src/tests/hash_functions_test/nmhash_scalar.h b/src/tests/hash_functions/nmhash_scalar.h similarity index 100% rename from src/tests/hash_functions_test/nmhash_scalar.h rename to src/tests/hash_functions/nmhash_scalar.h diff --git a/src/tests/hash_functions_test/pengyhash.c b/src/tests/hash_functions/pengyhash.c similarity index 100% rename from src/tests/hash_functions_test/pengyhash.c rename to src/tests/hash_functions/pengyhash.c diff --git a/src/tests/hash_functions_test/pengyhash.h b/src/tests/hash_functions/pengyhash.h similarity index 100% rename from src/tests/hash_functions_test/pengyhash.h rename to src/tests/hash_functions/pengyhash.h diff --git a/src/tests/hash_functions_test/test_hash_functions.f90 b/src/tests/hash_functions/test_hash_functions.f90 similarity index 100% rename from src/tests/hash_functions_test/test_hash_functions.f90 rename to src/tests/hash_functions/test_hash_functions.f90 diff --git a/src/tests/hash_functions_test/waterhash.c b/src/tests/hash_functions/waterhash.c similarity index 100% rename from src/tests/hash_functions_test/waterhash.c rename to src/tests/hash_functions/waterhash.c diff --git a/src/tests/hash_functions_test/waterhash.h b/src/tests/hash_functions/waterhash.h similarity index 100% rename from src/tests/hash_functions_test/waterhash.h rename to src/tests/hash_functions/waterhash.h diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions_perf/CMakeLists.txt similarity index 100% rename from src/tests/hash_functions/CMakeLists.txt rename to src/tests/hash_functions_perf/CMakeLists.txt diff --git a/src/tests/hash_functions_perf/Makefile.manual b/src/tests/hash_functions_perf/Makefile.manual new file mode 100755 index 000000000..d3e59bd18 --- /dev/null +++ b/src/tests/hash_functions_perf/Makefile.manual @@ -0,0 +1,3 @@ +PROGS_SRC = test_64_bit_hash_performance.f90 test_32_bit_hash_performance.f90 + +include ../Makefile.manual.test.mk diff --git a/src/tests/hash_functions/test_32_bit_hash_performance.f90 b/src/tests/hash_functions_perf/test_32_bit_hash_performance.f90 similarity index 100% rename from src/tests/hash_functions/test_32_bit_hash_performance.f90 rename to src/tests/hash_functions_perf/test_32_bit_hash_performance.f90 diff --git a/src/tests/hash_functions/test_64_bit_hash_performance.f90 b/src/tests/hash_functions_perf/test_64_bit_hash_performance.f90 similarity index 100% rename from src/tests/hash_functions/test_64_bit_hash_performance.f90 rename to src/tests/hash_functions_perf/test_64_bit_hash_performance.f90 diff --git a/src/tests/hash_functions_test/Makefile.manual b/src/tests/hash_functions_test/Makefile.manual deleted file mode 100644 index 4eb99bc1c..000000000 --- a/src/tests/hash_functions_test/Makefile.manual +++ /dev/null @@ -1,61 +0,0 @@ -CC = gcc -CXX = g++ - -FFLAGS = -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check -CFLAGS = -O3 -CXXFLAGS = -O3 - -CPPFLAGS += -I. -I../.. -I.. -LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib - -PROGS = generate_key_array generate_hash_arrays hash_validity_test test_hash_functions -TESTPROGS = $(PROGS:=TEST) - -all: $(PROGS) - -test: $(TESTPROGS) - -$(TESTPROGS): - ./$(@:TEST=) - -generate_key_array: generate_key_array.f90 - $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array - -test_hash_functions: test_hash_functions.f90 - $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) - -hash_validity_test: hash_validity_test.f90 - $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) - -generate_hash_arrays: generate_hash_arrays.o libc_hash.a - $(CXX) $(CXXFLAGS) generate_hash_arrays.o \ - -o generate_hash_arrays -L. -lc_hash - -generate_hash_arrays.o: generate_hash_arrays.cpp libc_hash.a - $(CXX) $(CXXFLAGS) -c generate_hash_arrays.cpp -o generate_hash_arrays.o - -libc_hash.a: SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o waterhash.o - ar rcs libc_hash.a SpookyV2.o SpookyV2Test.o pengyhash.o \ - nmhash_scalar.o waterhash.o - -pengyhash.o: pengyhash.c pengyhash.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c pengyhash.c -o pengyhash.o - -waterhash.o: waterhash.c waterhash.h - $(CC) $(CFLAGS) $(CPPFLAGS) -c waterhash.c -o waterhash.o - -SpookyV2.o: SpookyV2.cpp SpookyV2.h - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c SpookyV2.cpp -o SpookyV2.o - -SpookyV2Test.o: SpookyV2Test.cpp SpookyV2.h - $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c SpookyV2Test.cpp -o SpookyV2Test.o - -nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h - $(CC) $(CXXFLAGS) $(CPPFLAGS) -c nmhash_scalar.c -o nmhash_scalar.o - -clean: - rm nmhash_scalar.o SpookyV2Test.o SpookyV2.o waterhash.o pengyhash.o \ - libc_hash.a generate_hash_arrays.o $(PROGS) \ - *.bin - - From 03fef48b5f83b1b10ea36cbd99263515f1afbf4d Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 23:40:56 +0100 Subject: [PATCH 037/106] add cmakefiles --- src/tests/CMakeLists.txt | 1 + src/tests/hash_functions/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+) create mode 100755 src/tests/hash_functions/CMakeLists.txt diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index f3e7c7c3b..bdb390cd0 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -18,6 +18,7 @@ list( add_subdirectory(ascii) add_subdirectory(bitsets) add_subdirectory(hash_functions) +add_subdirectory(hash_functions_perf) add_subdirectory(io) add_subdirectory(linalg) add_subdirectory(logger) diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions/CMakeLists.txt new file mode 100755 index 000000000..2039910dd --- /dev/null +++ b/src/tests/hash_functions/CMakeLists.txt @@ -0,0 +1 @@ +ADDTEST(test_hash_functions) From 300bfa7a39cd47423ce2c26a2e4a1def45ba6f95 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 23:42:24 +0100 Subject: [PATCH 038/106] update CMakeLists.txt --- src/tests/hash_functions/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions/CMakeLists.txt index 2039910dd..41649fb02 100755 --- a/src/tests/hash_functions/CMakeLists.txt +++ b/src/tests/hash_functions/CMakeLists.txt @@ -1 +1 @@ -ADDTEST(test_hash_functions) +ADDTEST(hash_functions) From 662597e81ab26172f189d083ec1e1162ad00d15e Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 23:52:22 +0100 Subject: [PATCH 039/106] add generate_key into test_hash_functions --- .../hash_functions/test_hash_functions.f90 | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/tests/hash_functions/test_hash_functions.f90 b/src/tests/hash_functions/test_hash_functions.f90 index 97655cf5c..5b89b86d3 100644 --- a/src/tests/hash_functions/test_hash_functions.f90 +++ b/src/tests/hash_functions/test_hash_functions.f90 @@ -1,4 +1,3 @@ - module test_hash_functions use testdrive, only : new_unittest, unittest_type, error_type, check, skip_test use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64 @@ -79,7 +78,7 @@ subroutine test_nmhash32(error) ! Read hash array generated from key array by the C version of nmhash32 call read_array("c_nmhash32_array.bin", c_hash) - do index=0, 2048 + do index=0, size_key_array call check(error, c_hash(index) == nmhash32(key_array(1:index), nm_seed) & , "NMHASH32 failed") if (allocated(error)) return @@ -100,7 +99,7 @@ subroutine test_nmhash32x(error) ! Read hash array generated from key array by the C version of nmhash32x call read_array("c_nmhash32x_array.bin", c_hash) - do index=0, 2048 + do index=0, size_key_array call check(error, c_hash(index) == nmhash32x(key_array(1:index), nm_seed) & , "NMHASH32X failed") if (allocated(error)) return @@ -121,7 +120,7 @@ subroutine test_water_hash(error) ! Read hash array generated from key array by the C version of water_hash call read_array("c_water_hash_array.bin", c_hash) - do index=0, 2048 + do index=0, size_key_array call check(error, c_hash(index) == water_hash(key_array(1:index), water_seed) & , "WATER_HASH failed") if (allocated(error)) return @@ -142,7 +141,7 @@ subroutine test_pengy_hash(error) ! Read hash array generated from key array by the C version of pengy_hash call read_array("c_pengy_hash_array.bin", c_hash) - do index=0, 2048 + do index=0, size_key_array call check(error, c_hash(index) == pengy_hash(key_array(1:index), pengy_seed) & , "PENGY_HASH failed") if (allocated(error)) return @@ -163,7 +162,7 @@ subroutine test_spooky_hash(error) ! Read hash array generated from key array by the C version of spooky_hash call read_array("c_spooky_hash_array.bin", c_hash) - do index=0, 2048 + do index=0, size_key_array call check(error, all(c_hash(:, index) == spooky_hash(key_array(1:index), spooky_seed)) & , "SPOOKY_HASH failed") if (allocated(error)) return @@ -173,6 +172,28 @@ subroutine test_spooky_hash(error) + subroutine generate_key_array() + + integer :: lun + integer(int8) :: key_array(size_key_array) + integer(int32) :: dummy(size_key_array/4) + real(real64) :: rand(size_key_array/4) + + ! Create key array + call random_number( rand ) + do i=1, size_key_array/4 + dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) + end do + key_array = transfer( dummy, 0_int8, size_key_array ) + + open(newunit=lun, file="key_array.bin", form="unformatted", & + access="stream", status="replace", action="write") + write(lun) key_array + close(lun) + + end subroutine + + subroutine read_array_int8(filename, res) character(*), intent(in) :: filename integer(int8), intent(out) :: res(:) From 773afa76b4a256f3aaa1c06f373670d9f045da16 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 3 Dec 2021 23:58:48 +0100 Subject: [PATCH 040/106] fix issues --- src/tests/hash_functions/test_hash_functions.f90 | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/tests/hash_functions/test_hash_functions.f90 b/src/tests/hash_functions/test_hash_functions.f90 index 5b89b86d3..51b55d81c 100644 --- a/src/tests/hash_functions/test_hash_functions.f90 +++ b/src/tests/hash_functions/test_hash_functions.f90 @@ -1,6 +1,6 @@ module test_hash_functions use testdrive, only : new_unittest, unittest_type, error_type, check, skip_test - use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64 + use stdlib_kinds, only: int8, int32, int64, dp use stdlib_32_bit_hash_codes, only: little_endian & , nmhash32 & , nmhash32x & @@ -9,9 +9,6 @@ module test_hash_functions implicit none - real(sp), parameter :: sptol = 1000 * epsilon(1._sp) - real(dp), parameter :: dptol = 1000 * epsilon(1._dp) - integer, parameter :: size_key_array = 2048 integer(int32), parameter :: nm_seed = int( z'deadbeef', int32 ) @@ -174,10 +171,10 @@ subroutine test_spooky_hash(error) subroutine generate_key_array() - integer :: lun + integer :: i, lun integer(int8) :: key_array(size_key_array) integer(int32) :: dummy(size_key_array/4) - real(real64) :: rand(size_key_array/4) + real(dp) :: rand(size_key_array/4) ! Create key array call random_number( rand ) From ece5b03d253976bb34b237d1b310cc074cffbe5a Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 4 Dec 2021 00:03:03 +0100 Subject: [PATCH 041/106] comment hash_function test --- src/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index bdb390cd0..87f23516f 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -18,7 +18,7 @@ list( add_subdirectory(ascii) add_subdirectory(bitsets) add_subdirectory(hash_functions) -add_subdirectory(hash_functions_perf) +#add_subdirectory(hash_functions_perf) add_subdirectory(io) add_subdirectory(linalg) add_subdirectory(logger) From f57248ad31573d480a7d8b546ccb89693c102e71 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 3 Dec 2021 21:07:25 -0700 Subject: [PATCH 042/106] Changed procedures to elemental In reponse to gareth-nx's question as to whether some procedures could be elemental I changed the documentation in stdlib_hash_procedures.md. 1. All fibonacci and universal_mult_hash became elemental 2. The incorrect description of spooky_hash as pure was removed 3. Most hash functions acting on character string were described as elemental, the exception being spooky_hash. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 44 ++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index a298b3513..0a4f89918 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -510,7 +510,7 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 64 bit integer. This is useful +Calculates an `nbits` hash code from a 32 bit integer. This is useful in mapping hash codes into small arrays. ##### Syntax @@ -519,7 +519,7 @@ in mapping hash codes into small arrays. ##### Class -Pure function +Elemental function ##### Arguments @@ -576,7 +576,7 @@ character string. ##### Class -Pure function +Pure/elemental function ##### Argument @@ -603,6 +603,8 @@ applications. This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Example @@ -636,7 +638,7 @@ character string. ##### Class -Pure function +Pure/elemental function ##### Argument @@ -663,6 +665,8 @@ applications. This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Example @@ -810,7 +814,7 @@ character string, and the input `seed`. ##### Class -Pure function +Pure/elemental function ##### Arguments @@ -835,6 +839,8 @@ performance on short keys. As a result it should give fair performance for typical hash table applications. This code passes the SMHasher tests, and has no known bad seeds. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Example @@ -870,7 +876,7 @@ character string, and the input `seed`. ##### Class -Pure function +Pure/elemental function ##### Arguments @@ -895,6 +901,8 @@ performance on short keys. As a result it should give fair performance for typical hash table applications. This code passes the SMHasher tests, and has no known bad seeds. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Example @@ -964,7 +972,7 @@ in mapping a hash value to a range 0 to `2**nbits-1`. ##### Class -Pure function +Elemental function ##### Arguments @@ -1029,7 +1037,7 @@ character string, and the input `seed`. ##### Class -Pure function +Pure/elemental function ##### Arguments @@ -1061,6 +1069,8 @@ so far testing has not found any bad seeds for `waterhash`. It can have undefined behavior if the key is not word aligned, i.e. some computer processors can only process a given size integer if the address of the integer is a multiple of the integer size. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Example @@ -1141,7 +1151,7 @@ in mapping hash codes into small arrays. ##### Class -Pure function +Elemental function ##### Arguments @@ -1198,7 +1208,7 @@ character string. ##### Class -Pure function +Pure/elemental function ##### Argument @@ -1225,6 +1235,8 @@ applications, although it is rare for them to need 64 bits. This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Example @@ -1260,7 +1272,7 @@ character string. ##### Class -Pure function +Pure/elemental function ##### Argument @@ -1287,6 +1299,8 @@ applications. This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Example @@ -1433,7 +1447,7 @@ value also depends on a scalar 32 bit integer, `seed`. ##### Class -Pure function +Pure/elemental function ##### Arguments @@ -1454,6 +1468,8 @@ The result is an integer of kind `INT64`. Fajardo. The hash has acceptable performance on small keys, and good performance on long keys. It passes all the SMHasher tests, and has no known bad seeds. +It is a *pure* function for integer arrays, and an *elemental* +function for character strings. ##### Exampl @@ -1491,7 +1507,7 @@ value also depends on a two element vector, `seed`. ##### Class -Pure function +Function ##### Arguments @@ -1552,7 +1568,7 @@ in mapping a hash value to a range 0 to `2**nbits-1`. ##### Class -Pure function +Elemental function ##### Arguments From b5af57277df6164837aaf0348a93cd7d893fd851 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 3 Dec 2021 21:15:54 -0700 Subject: [PATCH 043/106] Removed Murmur hash references and added elemental attribute 1. Removed references to the murmur hash from stdlib_64_bit_hash_codes.fypp 2. Described all fibonacci and universal_mult_hash procedures as elemental 2. Described fnv_1, FNV_1a, nmhash32, nmhash32x, water_hash, pengy_hash operating on character strings as elemental. [ticket: X] --- src/stdlib_32_bit_fnv_hashes.fypp | 4 ++-- src/stdlib_32_bit_hash_codes.fypp | 14 ++++++------ src/stdlib_32_bit_nmhashes.fypp | 4 ++-- src/stdlib_32_bit_water_hashes.fypp | 3 ++- src/stdlib_64_bit_fnv_hashes.fypp | 4 ++-- src/stdlib_64_bit_hash_codes.fypp | 35 +++++------------------------ src/stdlib_64_bit_pengy_hashes.fypp | 3 ++- 7 files changed, 22 insertions(+), 45 deletions(-) diff --git a/src/stdlib_32_bit_fnv_hashes.fypp b/src/stdlib_32_bit_fnv_hashes.fypp index a8ef7607a..5b7b227b1 100755 --- a/src/stdlib_32_bit_fnv_hashes.fypp +++ b/src/stdlib_32_bit_fnv_hashes.fypp @@ -61,7 +61,7 @@ contains #:endfor - pure module function character_fnv_1( key ) result(hash_code) + elemental module function character_fnv_1( key ) result(hash_code) ! A default character key wrapper for the FNV-1 algorithm. character(*), intent(in) :: key integer(int_hash) :: hash_code @@ -112,7 +112,7 @@ contains #:endfor - pure module function character_fnv_1a( key ) result(hash_code) + elemental module function character_fnv_1a( key ) result(hash_code) ! A default character key wrapper for the FNV-1 algorithm. character(*), intent(in) :: key integer(int_hash) :: hash_code diff --git a/src/stdlib_32_bit_hash_codes.fypp b/src/stdlib_32_bit_hash_codes.fypp index 655e90d12..79b05b366 100755 --- a/src/stdlib_32_bit_hash_codes.fypp +++ b/src/stdlib_32_bit_hash_codes.fypp @@ -81,7 +81,7 @@ module stdlib_32_bit_hash_codes #:endfor - pure module function character_fnv_1( key ) result(hash_code) + elemental module function character_fnv_1( key ) result(hash_code) !! FNV_1 hash function for default character string keys character(*), intent(in) :: key integer(int_hash) :: hash_code @@ -100,7 +100,7 @@ module stdlib_32_bit_hash_codes #:endfor - pure module function character_fnv_1a( key ) result(hash_value) + elemental module function character_fnv_1a( key ) result(hash_value) !! FNV_1A hash function for default character string keys character(*), intent(in) :: key integer(int_hash) :: hash_value @@ -122,7 +122,7 @@ module stdlib_32_bit_hash_codes #:endfor - pure module function character_nmhash32( key, seed ) & + elemental module function character_nmhash32( key, seed ) & result(hash_value) !! NMHASH32 hash function for default character string keys character(*), intent(in) :: key @@ -146,7 +146,7 @@ module stdlib_32_bit_hash_codes #:endfor - pure module function character_nmhash32x( key, seed ) & + elemental module function character_nmhash32x( key, seed ) & result(hash_value) !! NMHASH32 hash function for default character string keys character(*), intent(in) :: key @@ -169,7 +169,7 @@ module stdlib_32_bit_hash_codes end function ${k1}$_water_hash #:endfor - pure module function character_water_hash( key, seed ) & + elemental module function character_water_hash( key, seed ) & result(hash_code) !! WATER hash function for default character string keys character(*), intent(in) :: key @@ -205,7 +205,7 @@ module stdlib_32_bit_hash_codes contains - pure function fibonacci_hash( key, nbits ) result( sample ) + elemental function fibonacci_hash( key, nbits ) result( sample ) !! Maps the 32 bit integer KEY to an unsigned integer value with only NBITS !! bits where NBITS is less than 32 integer(int32), intent(in) :: key @@ -216,7 +216,7 @@ contains end function fibonacci_hash - pure function universal_mult_hash( key, seed, nbits ) result( sample ) + elemental function universal_mult_hash( key, seed, nbits ) result( sample ) !! Uses the "random" odd 32 bit integer SEED to map the 32 bit integer KEY to !! an unsigned integer value with only NBITS bits where NBITS is less than 32 integer(int32), intent(in) :: key diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp index 93eab2b80..6ae0477f9 100755 --- a/src/stdlib_32_bit_nmhashes.fypp +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -724,7 +724,7 @@ contains #:endfor - pure module function character_nmhash32( key, seed ) result(hash_code) + elemental module function character_nmhash32( key, seed ) result(hash_code) !! NMHASH32 hash function for default character keys character(*), intent(in) :: key integer(int32), intent(in) :: seed @@ -749,7 +749,7 @@ contains #:endfor - pure module function character_nmhash32x( key, seed ) result(hash_code) + elemental module function character_nmhash32x( key, seed ) result(hash_code) !! NMHASH32X hash function for default character keys character(*), intent(in) :: key integer(int32), intent(in) :: seed diff --git a/src/stdlib_32_bit_water_hashes.fypp b/src/stdlib_32_bit_water_hashes.fypp index a20427702..240a94d28 100755 --- a/src/stdlib_32_bit_water_hashes.fypp +++ b/src/stdlib_32_bit_water_hashes.fypp @@ -250,7 +250,8 @@ contains #:endfor - pure module function character_water_hash( key, seed ) result(hash_code) + elemental module function character_water_hash( key, seed ) & + result(hash_code) character(*), intent(in) :: key integer(int64), intent(in) :: seed integer(int_hash) :: hash_code diff --git a/src/stdlib_64_bit_fnv_hashes.fypp b/src/stdlib_64_bit_fnv_hashes.fypp index 099a99ca2..6a9fd75b3 100755 --- a/src/stdlib_64_bit_fnv_hashes.fypp +++ b/src/stdlib_64_bit_fnv_hashes.fypp @@ -62,7 +62,7 @@ contains #:endfor - pure module function character_fnv_1( key ) result(hash_code) + elemental module function character_fnv_1( key ) result(hash_code) character(*), intent(in) :: key integer(int_hash) :: hash_code @@ -112,7 +112,7 @@ contains #:endfor - pure module function character_fnv_1a( key ) result(hash_code) + elemental module function character_fnv_1a( key ) result(hash_code) character(*), intent(in) :: key integer(int_hash) :: hash_code diff --git a/src/stdlib_64_bit_hash_codes.fypp b/src/stdlib_64_bit_hash_codes.fypp index 9f110981b..45c721db1 100755 --- a/src/stdlib_64_bit_hash_codes.fypp +++ b/src/stdlib_64_bit_hash_codes.fypp @@ -99,7 +99,7 @@ module stdlib_64_bit_hash_codes end function ${k1}$_fnv_1 #:endfor - pure module function character_fnv_1( key ) result(hash_code) + elemental module function character_fnv_1( key ) result(hash_code) !! FNV_1 hash function for character strings character(*), intent(in) :: key integer(int_hash) :: hash_code @@ -118,7 +118,7 @@ module stdlib_64_bit_hash_codes end function ${k1}$_fnv_1a #:endfor - pure module function character_fnv_1a( key ) result(hash_code) + elemental module function character_fnv_1a( key ) result(hash_code) !! FNV_1A hash function for character strings character(*), intent(in) :: key integer(int_hash) :: hash_code @@ -126,31 +126,6 @@ module stdlib_64_bit_hash_codes end interface fnv_1a_hash - - interface murmur2_hash -!! MURMUR2_HASHES interfaces - - #:for k1 in INT_KINDS - pure module function ${k1}$_murmur2_hash( key, seed ) & - result(hash_code) -!! MURMUR2 hash function for rank 1 arrays of kind ${k1}$ - integer(${k1}$), intent(in) :: key(0:) - integer(int_hash), intent(in) :: seed - integer(int_hash) :: hash_code - end function ${k1}$_murmur2_hash - #:endfor - - pure module function character_murmur2_hash( key, seed ) & - result(hash_code) -!! MURMUR2 hash function for character strings - character(*), intent(in) :: key - integer(int_hash), intent(in) :: seed - integer(int_hash) :: hash_code - end function character_murmur2_hash - - end interface murmur2_hash - - interface spooky_hash !! SPOOKY_HASH interfaces @@ -234,7 +209,7 @@ interface end function ${k1}$_pengy_hash #:endfor - pure module function character_pengy_hash( key, seed ) & + elemental module function character_pengy_hash( key, seed ) & result(hash_code) !! MIR HASH STRICT function for character strings character(*), intent(in) :: key @@ -255,7 +230,7 @@ interface contains - pure function fibonacci_hash( key, nbits ) result( sample ) + elemental function fibonacci_hash( key, nbits ) result( sample ) !! Maps the 64 bit integer KEY to an unsigned integer value with only NBITS !! bits where NBITS is less than 64 integer(int64), intent(in) :: key @@ -266,7 +241,7 @@ contains end function fibonacci_hash - pure function universal_mult_hash( key, seed, nbits ) result( sample ) + elemental function universal_mult_hash( key, seed, nbits ) result( sample ) !! Uses the "random" odd 64 bit integer SEED to map the 64 bit integer KEY to !! an unsigned integer value with only NBITS bits where NBITS is less than 64. integer(int64), intent(in) :: key diff --git a/src/stdlib_64_bit_pengy_hashes.fypp b/src/stdlib_64_bit_pengy_hashes.fypp index 6eef988fd..449698daa 100755 --- a/src/stdlib_64_bit_pengy_hashes.fypp +++ b/src/stdlib_64_bit_pengy_hashes.fypp @@ -118,7 +118,8 @@ contains #:endfor - pure module function character_pengy_hash( key, seed ) result(hash_code) + elemental module function character_pengy_hash( key, seed ) & + result(hash_code) !! PENGY_HASH hash function for default character keys character(*), intent(in) :: key integer(int32), intent(in) :: seed From 1c3a7072df529ec74f4461a6a77e915426546b54 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 3 Dec 2021 22:14:38 -0700 Subject: [PATCH 044/106] Fixed typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed äzray1"to ärray1". [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 0a4f89918..72e8055d6 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -554,7 +554,7 @@ E. Knuth. It multiplies the `KEY` by the odd valued approximation to array1(:) = 0 source = 42_int32 hash = fibonacci_hash(source, 6) - azray1(hash) = source + array1(hash) = source print *, hash end program demo_fibonacci_hash ``` From 044136c81c8be8f5af6ddb09b140232cd0619285 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 4 Dec 2021 18:25:29 +0100 Subject: [PATCH 045/106] integrate all in test_hash_functions.f90 --- src/tests/hash_functions/Makefile.manual | 7 +++-- .../hash_functions/generate_hash_arrays.cpp | 15 ++++++++++ .../hash_functions/test_hash_functions.f90 | 28 +++++++++++++++++-- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual index 4eb99bc1c..679b63ce5 100644 --- a/src/tests/hash_functions/Makefile.manual +++ b/src/tests/hash_functions/Makefile.manual @@ -8,7 +8,8 @@ CXXFLAGS = -O3 CPPFLAGS += -I. -I../.. -I.. LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib -PROGS = generate_key_array generate_hash_arrays hash_validity_test test_hash_functions +#PROGS = generate_key_array generate_hash_arrays hash_validity_test test_hash_functions +PROGS = generate_key_array hash_validity_test test_hash_functions TESTPROGS = $(PROGS:=TEST) all: $(PROGS) @@ -21,8 +22,8 @@ $(TESTPROGS): generate_key_array: generate_key_array.f90 $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array -test_hash_functions: test_hash_functions.f90 - $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) +test_hash_functions: test_hash_functions.f90 generate_hash_arrays.o libc_hash.a + $(FC) $(FFLAGS) $(CPPFLAGS) -L. -o $@ $^ $(LDFLAGS) -lc_hash -lstdc++ hash_validity_test: hash_validity_test.f90 $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) diff --git a/src/tests/hash_functions/generate_hash_arrays.cpp b/src/tests/hash_functions/generate_hash_arrays.cpp index bcd95dfb3..6a61cd589 100644 --- a/src/tests/hash_functions/generate_hash_arrays.cpp +++ b/src/tests/hash_functions/generate_hash_arrays.cpp @@ -6,8 +6,10 @@ extern "C" { #include "nmhash_scalar.h" #include "pengyhash.h" #include "waterhash.h" + int generate_all_c_hash(); } + #include "SpookyV2.h" void SpookyHash32_with_state_test(const void *key, size_t len, const void *state, void *out) { @@ -163,6 +165,18 @@ int write_spooky(){ return 0; } +int generate_all_c_hash(){ + if (read_keys()==1){return 1;}; + if (write_nmhash32()==1){return 1;}; + if (write_nmhash32x()==1){return 1;}; + if (write_water()==1){return 1;}; + if (write_pengy()==1){return 1;}; + if (write_spooky()==1){return 1;}; + return 0; +} + + +/* int main(){ if (read_keys()==1){return 1;}; if (write_nmhash32()==1){return 1;}; @@ -172,3 +186,4 @@ int main(){ if (write_spooky()==1){return 1;}; return 0; } +*/ diff --git a/src/tests/hash_functions/test_hash_functions.f90 b/src/tests/hash_functions/test_hash_functions.f90 index 51b55d81c..7c39ddc18 100644 --- a/src/tests/hash_functions/test_hash_functions.f90 +++ b/src/tests/hash_functions/test_hash_functions.f90 @@ -8,6 +8,9 @@ module test_hash_functions use stdlib_64_bit_hash_codes, only: pengy_hash, spooky_hash implicit none + private + public :: collect_hash_functions + public :: generate_key_array integer, parameter :: size_key_array = 2048 @@ -168,7 +171,6 @@ subroutine test_spooky_hash(error) end subroutine - subroutine generate_key_array() integer :: i, lun @@ -261,16 +263,38 @@ subroutine read_2darray_int64(filename, res) end module +module modchash + use, intrinsic :: ISO_C_Binding + implicit none + private + public :: generate_all_c_hash + + interface + function generate_all_c_hash() result(error) bind(C,name = "generate_all_c_hash") + import C_int + integer(C_int) :: error + end function + end interface + +end module program tester use, intrinsic :: iso_fortran_env, only : error_unit + use, intrinsic :: ISO_C_Binding, only : C_int use testdrive, only : run_testsuite, new_testsuite, testsuite_type - use test_hash_functions, only : collect_hash_functions + use test_hash_functions, only : collect_hash_functions, generate_key_array + use modchash, only: generate_all_c_hash implicit none + integer(C_int) :: error integer :: stat, is type(testsuite_type), allocatable :: testsuites(:) character(len=*), parameter :: fmt = '("#", *(1x, a))' + + call generate_key_array() + + error = generate_all_c_hash() + stat = 0 testsuites = [ & From e0218261a57f3257811b1d1a84a9db97f4b38f82 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 4 Dec 2021 18:29:16 +0100 Subject: [PATCH 046/106] mod Makefile.manual --- src/tests/hash_functions/Makefile.manual | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual index 679b63ce5..1033b9e66 100644 --- a/src/tests/hash_functions/Makefile.manual +++ b/src/tests/hash_functions/Makefile.manual @@ -9,7 +9,7 @@ CPPFLAGS += -I. -I../.. -I.. LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib #PROGS = generate_key_array generate_hash_arrays hash_validity_test test_hash_functions -PROGS = generate_key_array hash_validity_test test_hash_functions +PROGS = test_hash_functions TESTPROGS = $(PROGS:=TEST) all: $(PROGS) From 0059b2ebd9b543f885573ef05336dd30c7b59c1b Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 4 Dec 2021 19:55:05 +0100 Subject: [PATCH 047/106] update Makefile.manual --- src/tests/hash_functions/Makefile.manual | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual index 1033b9e66..8ccb4a205 100644 --- a/src/tests/hash_functions/Makefile.manual +++ b/src/tests/hash_functions/Makefile.manual @@ -8,7 +8,6 @@ CXXFLAGS = -O3 CPPFLAGS += -I. -I../.. -I.. LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib -#PROGS = generate_key_array generate_hash_arrays hash_validity_test test_hash_functions PROGS = test_hash_functions TESTPROGS = $(PROGS:=TEST) @@ -19,19 +18,9 @@ test: $(TESTPROGS) $(TESTPROGS): ./$(@:TEST=) -generate_key_array: generate_key_array.f90 - $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array - test_hash_functions: test_hash_functions.f90 generate_hash_arrays.o libc_hash.a $(FC) $(FFLAGS) $(CPPFLAGS) -L. -o $@ $^ $(LDFLAGS) -lc_hash -lstdc++ -hash_validity_test: hash_validity_test.f90 - $(FC) $(FFLAGS) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) - -generate_hash_arrays: generate_hash_arrays.o libc_hash.a - $(CXX) $(CXXFLAGS) generate_hash_arrays.o \ - -o generate_hash_arrays -L. -lc_hash - generate_hash_arrays.o: generate_hash_arrays.cpp libc_hash.a $(CXX) $(CXXFLAGS) -c generate_hash_arrays.cpp -o generate_hash_arrays.o From 5bd3cb46aa0b8d4b4e793fa40ccd5b0c3fc2ccc9 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 4 Dec 2021 19:58:30 +0100 Subject: [PATCH 048/106] Update src/tests/hash_functions/generate_hash_arrays.cpp --- src/tests/hash_functions/generate_hash_arrays.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tests/hash_functions/generate_hash_arrays.cpp b/src/tests/hash_functions/generate_hash_arrays.cpp index 6a61cd589..11aa9778d 100644 --- a/src/tests/hash_functions/generate_hash_arrays.cpp +++ b/src/tests/hash_functions/generate_hash_arrays.cpp @@ -9,7 +9,6 @@ extern "C" { int generate_all_c_hash(); } - #include "SpookyV2.h" void SpookyHash32_with_state_test(const void *key, size_t len, const void *state, void *out) { From e44ef8f5b9761e29c871c674ec679b3c9c241a2a Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Fri, 10 Dec 2021 19:31:13 +0100 Subject: [PATCH 049/106] add name to subroutines --- .../hash_functions/test_hash_functions.f90 | 34 +++++++------------ 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/src/tests/hash_functions/test_hash_functions.f90 b/src/tests/hash_functions/test_hash_functions.f90 index 7c39ddc18..4ebf696bc 100644 --- a/src/tests/hash_functions/test_hash_functions.f90 +++ b/src/tests/hash_functions/test_hash_functions.f90 @@ -34,8 +34,7 @@ subroutine collect_hash_functions(testsuite) type(unittest_type), allocatable, intent(out) :: testsuite(:) testsuite = [ & - new_unittest("dummy", test_dummy) & - , new_unittest("little_endian", test_little_endian) & + new_unittest("little_endian", test_little_endian) & , new_unittest("nmhash32", test_nmhash32) & , new_unittest("nmhash32x", test_nmhash32x) & , new_unittest("water_hash", test_water_hash) & @@ -45,15 +44,6 @@ subroutine collect_hash_functions(testsuite) end subroutine collect_hash_functions - subroutine test_dummy(error) - !> Error handling - type(error_type), allocatable, intent(out) :: error - - call check(error, .true., "dummy") - if (allocated(error)) return - - end subroutine - subroutine test_little_endian(error) !> Error handling type(error_type), allocatable, intent(out) :: error @@ -63,7 +53,7 @@ subroutine test_little_endian(error) call check(error, little_endian, "The processor is not Little-Endian") if (allocated(error)) return - end subroutine + end subroutine test_little_endian subroutine test_nmhash32(error) !> Error handling @@ -84,7 +74,7 @@ subroutine test_nmhash32(error) if (allocated(error)) return end do - end subroutine + end subroutine test_nmhash32 subroutine test_nmhash32x(error) !> Error handling @@ -105,7 +95,7 @@ subroutine test_nmhash32x(error) if (allocated(error)) return end do - end subroutine + end subroutine test_nmhash32x subroutine test_water_hash(error) !> Error handling @@ -126,7 +116,7 @@ subroutine test_water_hash(error) if (allocated(error)) return end do - end subroutine + end subroutine test_water_hash subroutine test_pengy_hash(error) !> Error handling @@ -147,7 +137,7 @@ subroutine test_pengy_hash(error) if (allocated(error)) return end do - end subroutine + end subroutine test_pengy_hash subroutine test_spooky_hash(error) !> Error handling @@ -168,7 +158,7 @@ subroutine test_spooky_hash(error) if (allocated(error)) return end do - end subroutine + end subroutine test_spooky_hash subroutine generate_key_array() @@ -190,7 +180,7 @@ subroutine generate_key_array() write(lun) key_array close(lun) - end subroutine + end subroutine generate_key_array subroutine read_array_int8(filename, res) @@ -208,7 +198,7 @@ subroutine read_array_int8(filename, res) 9908 res = 0 - end subroutine + end subroutine read_array_int8 subroutine read_array_int32(filename, res) character(*), intent(in) :: filename @@ -225,7 +215,7 @@ subroutine read_array_int32(filename, res) 9908 res = 0 - end subroutine + end subroutine read_array_int32 subroutine read_array_int64(filename, res) character(*), intent(in) :: filename @@ -242,7 +232,7 @@ subroutine read_array_int64(filename, res) 9908 res = 0 - end subroutine + end subroutine read_array_int64 subroutine read_2darray_int64(filename, res) character(*), intent(in) :: filename @@ -259,7 +249,7 @@ subroutine read_2darray_int64(filename, res) 9908 res = 0 - end subroutine + end subroutine read_2darray_int64 end module From 6e2852c91526d7f80e7e74fb47b1294926714304 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 11 Dec 2021 20:45:45 +0100 Subject: [PATCH 050/106] update CMakefile for running test_hash_functions --- CMakeLists.txt | 2 +- src/tests/hash_functions/CMakeLists.txt | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e36490189..325922604 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.14.0) project(fortran_stdlib - LANGUAGES Fortran + LANGUAGES Fortran C CXX DESCRIPTION "Community driven and agreed upon de facto standard library for Fortran" ) diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions/CMakeLists.txt index 41649fb02..f49f717ee 100755 --- a/src/tests/hash_functions/CMakeLists.txt +++ b/src/tests/hash_functions/CMakeLists.txt @@ -1 +1,19 @@ -ADDTEST(hash_functions) +#ADDTEST(hash_functions) + +set(SRC +nmhash_scalar.c +pengyhash.c +SpookyV2.cpp +SpookyV2Test.cpp +waterhash.c +generate_hash_arrays.cpp +) + +add_library(libc_hash ${SRC}) + + +add_executable(test_hash_functions test_hash_functions.f90) +target_link_libraries(test_hash_functions "${PROJECT_NAME}" "test-drive::test-drive" "libc_hash") +add_test(NAME hash_functions + COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) From 389074b90d55ffa63df20ae5129bc9b3419cd47b Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 11 Dec 2021 20:48:13 +0100 Subject: [PATCH 051/106] add test hash_functions_perf --- src/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 87f23516f..bdb390cd0 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -18,7 +18,7 @@ list( add_subdirectory(ascii) add_subdirectory(bitsets) add_subdirectory(hash_functions) -#add_subdirectory(hash_functions_perf) +add_subdirectory(hash_functions_perf) add_subdirectory(io) add_subdirectory(linalg) add_subdirectory(logger) From e5138207f9de68956dacb469f542fd4aa7942010 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 11 Dec 2021 21:23:51 +0100 Subject: [PATCH 052/106] add option for compile language --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 325922604..162b7545f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,11 +42,11 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) add_compile_options(-std=f2018) elseif(CMAKE_Fortran_COMPILER_ID MATCHES "^Intel") if(WIN32) - add_compile_options(/warn:declarations,general,usage,interfaces,unused) - add_compile_options(/stand:f18) + add_compile_options("$<$:/warn:declarations,general,usage,interfaces,unused>") + add_compile_options("$<$:/stand:f18>") else() - add_compile_options(-warn declarations,general,usage,interfaces,unused) - add_compile_options(-stand f18) + add_compile_options("$<$:-warn declarations,general,usage,interfaces,unused>") + add_compile_options("$<$:-stand f18>") endif() endif() From 498f467cfc97d029f606382a4902853bf7284efd Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 11 Dec 2021 21:31:18 +0100 Subject: [PATCH 053/106] update CI.yml --- .github/workflows/CI.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 124c3fcd5..5a437cdb0 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -31,6 +31,8 @@ jobs: build: make env: FC: gfortran-${{ matrix.gcc_v }} + CC: gcc-${{ matrix.gcc_v }} + CXX: g++-${{ matrix.gcc_v }} GCC_V: ${{ matrix.gcc_v }} BUILD_DIR: ${{ matrix.build == 'cmake' && 'build' || '.' }} @@ -107,12 +109,16 @@ jobs: matrix: os: [ubuntu-latest, macos-latest] fc: [ifort] + cc: [icc] + cxx: [icpc] env: MACOS_HPCKIT_URL: >- https://registrationcenter-download.intel.com/akdlm/irc_nas/17398/m_HPCKit_p_2021.1.0.2681_offline.dmg MACOS_FORTRAN_COMPONENTS: >- intel.oneapi.mac.ifort-compiler FC: ${{ matrix.fc }} + CC: ${{ matrix.cc }} + CXX: ${{ matrix.cxx }} steps: - name: Checkout code From 5ae8857c31080c8733f08d4d44a560e93fd13f48 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 11 Dec 2021 21:36:43 +0100 Subject: [PATCH 054/106] add g++ in CI --- .github/workflows/CI.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5a437cdb0..47ce67d97 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -57,9 +57,10 @@ jobs: run: | sudo add-apt-repository ppa:ubuntu-toolchain-r/test sudo apt-get update - sudo apt-get install -y gcc-${GCC_V} gfortran-${GCC_V} + sudo apt-get install -y gcc-${GCC_V} g++-${GCC_V} gfortran-${GCC_V} sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${GCC_V} 100 \ --slave /usr/bin/gfortran gfortran /usr/bin/gfortran-${GCC_V} \ + --slave /usr/bin/g++ g++ /usr/bin/g++-${GCC_V} \ --slave /usr/bin/gcov gcov /usr/bin/gcov-${GCC_V} - name: Install GFortran macOS From edeabfbfa41dec347e06698180aa05b4b5bc2c39 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 11 Dec 2021 21:50:52 +0100 Subject: [PATCH 055/106] test --- .github/workflows/CI.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 47ce67d97..2be6538cc 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -52,7 +52,7 @@ jobs: - name: Install fypp run: pip install --upgrade fypp - - name: Install GFortran Linux + - name: Install GCC compilers Linux if: contains( matrix.os, 'ubuntu') run: | sudo add-apt-repository ppa:ubuntu-toolchain-r/test @@ -63,7 +63,7 @@ jobs: --slave /usr/bin/g++ g++ /usr/bin/g++-${GCC_V} \ --slave /usr/bin/gcov gcov /usr/bin/gcov-${GCC_V} - - name: Install GFortran macOS + - name: Install GCC compilers macOS if: contains( matrix.os, 'macos') run: | brew install gcc@${GCC_V} || brew upgrade gcc@${GCC_V} || true @@ -161,6 +161,7 @@ jobs: if: contains(matrix.os, 'ubuntu') run: | sudo apt-get install intel-oneapi-compiler-fortran + sudo apt-get install intel-oneapi-compiler-c - name: Install Intel oneAPI compiler (OSX) if: contains(matrix.os, 'macos') && steps.cache-install.outputs.cache-hit != 'true' From ac2275700aae176286fb2e75cec561ba063c644a Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 11 Dec 2021 21:58:40 +0100 Subject: [PATCH 056/106] test --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2be6538cc..06846d984 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -161,7 +161,7 @@ jobs: if: contains(matrix.os, 'ubuntu') run: | sudo apt-get install intel-oneapi-compiler-fortran - sudo apt-get install intel-oneapi-compiler-c + sudo apt-get install intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic - name: Install Intel oneAPI compiler (OSX) if: contains(matrix.os, 'macos') && steps.cache-install.outputs.cache-hit != 'true' From 26138a293d44c59ea12291751a9cb85988a8cc36 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 12 Dec 2021 11:52:35 +0100 Subject: [PATCH 057/106] add fortran linker for cmake (instead of using a cxx linker) --- src/tests/hash_functions/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions/CMakeLists.txt index f49f717ee..e23c5b54c 100755 --- a/src/tests/hash_functions/CMakeLists.txt +++ b/src/tests/hash_functions/CMakeLists.txt @@ -11,9 +11,12 @@ generate_hash_arrays.cpp add_library(libc_hash ${SRC}) +set(CMAKE_FORTRAN_LINK_EXECUTABLE " -o ") add_executable(test_hash_functions test_hash_functions.f90) target_link_libraries(test_hash_functions "${PROJECT_NAME}" "test-drive::test-drive" "libc_hash") add_test(NAME hash_functions COMMAND $ ${CMAKE_CURRENT_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +set_target_properties(test_hash_functions PROPERTIES LINKER_LANGUAGE FORTRAN) From 404bd659fe193d728861cc1378324a1a92315cb0 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 12 Dec 2021 12:21:24 +0100 Subject: [PATCH 058/106] remove unused variable --- src/stdlib_32_bit_nmhashes.fypp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp index 93eab2b80..9ea79bd4f 100755 --- a/src/stdlib_32_bit_nmhashes.fypp +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -62,8 +62,6 @@ submodule(stdlib_32_bit_hash_codes) stdlib_32_bit_nmhashes integer(int32), parameter :: nmh_m2_v(0:31) = nmh_m2 integer(int32), parameter :: nmh_m3_v(0:31) = nmh_m3 - integer(int16), parameter :: nmh_m3_16(2) = transfer( nmh_m3, 0_int16, 2 ) - logical, parameter :: nmh_short32_without_seed2=.false. logical, parameter :: nmh_short32_with_seed2=.true. From 216318b410a8b5d26e7d05c729cbc7c63e1111f5 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 12 Dec 2021 12:36:29 +0100 Subject: [PATCH 059/106] remove parameter transfer --- src/stdlib_32_bit_nmhashes.fypp | 37 +++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp index 9ea79bd4f..17cdcbca1 100755 --- a/src/stdlib_32_bit_nmhashes.fypp +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -124,24 +124,20 @@ contains integer(int32), parameter :: m3 = int(z'E9139917', int32) integer(int16) :: vx16(2) - integer(int16), parameter :: & - m116(2) = transfer( m1, 0_int16, 2 ), & - m216(2) = transfer( m2, 0_int16, 2 ), & - m316(2) = transfer( m3, 0_int16, 2 ) vx32 = x vx32 = ieor( vx32, ieor( ishft( vx32, -12 ), ishft( vx32, -6 ) ) ) vx16 = transfer( vx32, 0_int16, 2 ) - vx16 = vx16 * m116 + vx16 = vx16 * transfer( m1, 0_int16, 2 ) vx32 = transfer( vx16, 0_int32 ) vx32 = ieor( vx32, ieor( ishft( vx32, 11 ), ishft( vx32, -19 ) ) ) vx16 = transfer( vx32, 0_int16, 2 ) - vx16 = vx16 * m216 + vx16 = vx16 * transfer( m2, 0_int16, 2 ) vx32 = transfer( vx16, 0_int32 ) vx32 = ieor( vx32, seed ) vx32 = ieor( vx32, ieor( ishft( vx32, -15 ), ishft( vx32, -9 ) ) ) vx16 = transfer( vx32, 0_int16, 2 ) - vx16 = vx16 * m316 + vx16 = vx16 * transfer( m3, 0_int16, 2 ) vx32 = transfer( vx16, 0_int32 ) vx32 = ieor( vx32, ieor( ishft(vx32, 16), ishft(vx32, -11) ) ) @@ -155,15 +151,23 @@ contains integer(int32) :: xu32(0:3), yu32(0:3) integer(int16) :: xu16(0:1) - integer(int16), parameter :: & - nmh_m1_16(0:1) = transfer( nmh_m1, 0_int16, 2 ), & - nmh_m2_16(0:1) = transfer( nmh_m2, 0_int16, 2 ), & - nmh_m3_16(0:1) = transfer( nmh_m3, 0_int16, 2 ) +! Due to an issue with Intel OneAPI ifort 2021 (see +! https://community.intel.com/t5/Intel-Fortran-Compiler/Intrinsic-transfer-with-a-provided-size-un-expected-behavior/m-p/1343313#M158733 +! ), it is not possible to define the following variables as a parameter. +! integer(int16), parameter :: & +! nmh_m1_16(0:1) = transfer( nmh_m1, 0_int16, 2 ), & +! nmh_m2_16(0:1) = transfer( nmh_m2, 0_int16, 2 ), & +! nmh_m3_16(0:1) = transfer( nmh_m3, 0_int16, 2 ) + integer(int16) :: nmh_m1_16(0:1), nmh_m2_16(0:1), nmh_m3_16(0:1) integer(int32) :: s1 integer(int64) :: length integer(int32) :: length32(0:1) integer(int64) :: i, j, r + nmh_m1_16(0:1) = transfer( nmh_m1, 0_int16, 2 ) + nmh_m2_16(0:1) = transfer( nmh_m2, 0_int16, 2 ) + nmh_m3_16(0:1) = transfer( nmh_m3, 0_int16, 2 ) + ! base mixer: [f0d9649b 5 -13 29a7935d -9 11 55d35831 -20 -10 ] = ! 0.93495901789135362 @@ -380,10 +384,17 @@ contains integer(int16) :: u16(0:1) integer(int32), parameter:: m1 = int(z'CCE5196D', int32) integer(int32), parameter:: m2 = int(z'464BE229', int32) - integer(int16), parameter:: m1_16(0:1) = transfer(m1, 0_int16, 2) - integer(int16), parameter:: m2_16(0:1) = transfer(m2, 0_int16, 2) +! Due to an issue with Intel OneAPI ifort 2021 (see +! https://community.intel.com/t5/Intel-Fortran-Compiler/Intrinsic-transfer-with-a-provided-size-un-expected-behavior/m-p/1343313#M158733 +! ), it is not possible to define the following variables as a parameter. + !integer(int16), parameter:: m1_16(0:1) = transfer(m1, 0_int16, 2) + !integer(int16), parameter:: m2_16(0:1) = transfer(m2, 0_int16, 2) + integer(int16) :: m1_16(0:1), m2_16(0:1) ! [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 + m1_16(0:1) = transfer(m1, 0_int16, 2) + m2_16(0:1) = transfer(m2, 0_int16, 2) + u32 = x u32 = ieor( u32, ieor( ishft( u32, -8 ), ishft( u32, -21 ) ) ) u16 = transfer( u32, 0_int16, 2 ) From 608074f8fbc019a86ccd029cb42741779a029390 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 12 Dec 2021 14:09:39 +0100 Subject: [PATCH 060/106] update Makefile --- Makefile.manual | 5 +++++ src/tests/hash_functions/Makefile.manual | 23 +++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Makefile.manual b/Makefile.manual index b4d1a5efe..8a59018d4 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,6 +1,9 @@ # Fortran stdlib Makefile FC ?= gfortran +CC ?= gcc +CXX ?= g++ + FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check FYPPFLAGS ?= @@ -11,6 +14,8 @@ FYPPFLAGS += \ -DPROJECT_VERSION_PATCH=$(word 3,$(VERSION)) export FC +export CC +export CXX export FFLAGS export FYPPFLAGS diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual index 8ccb4a205..88ca4baa6 100644 --- a/src/tests/hash_functions/Makefile.manual +++ b/src/tests/hash_functions/Makefile.manual @@ -1,9 +1,16 @@ -CC = gcc -CXX = g++ - -FFLAGS = -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check -CFLAGS = -O3 -CXXFLAGS = -O3 +FFLAGS = #-g -check all -debug all +CFLAGS = #-g -debug all +CXXFLAGS = #-g -debug all +#CFLAGS += -P -cpp +#CXXFLAGS += -P -cpp + +#CC = gcc +#CXX = g++ +#FFLAGS = +#CFLAGS = +#CXXFLAGS = +#CFLAGS = -E +#CXXFLAGS = -E CPPFLAGS += -I. -I../.. -I.. LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib @@ -41,11 +48,11 @@ SpookyV2Test.o: SpookyV2Test.cpp SpookyV2.h $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c SpookyV2Test.cpp -o SpookyV2Test.o nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h - $(CC) $(CXXFLAGS) $(CPPFLAGS) -c nmhash_scalar.c -o nmhash_scalar.o + $(CC) $(CFLAGS) $(CPPFLAGS) -c nmhash_scalar.c -o nmhash_scalar.o clean: rm nmhash_scalar.o SpookyV2Test.o SpookyV2.o waterhash.o pengyhash.o \ - libc_hash.a generate_hash_arrays.o $(PROGS) \ + libc_hash.a generate_hash_arrays.o $(PROGS) *.*mod\ *.bin From f9168e98ea4dbd9f61f7905f2d0a6fe538f4c147 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Wed, 15 Dec 2021 23:21:43 +0100 Subject: [PATCH 061/106] mv languages C and CXX to test CMakeLists.txt --- CMakeLists.txt | 2 +- src/tests/hash_functions/CMakeLists.txt | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 162b7545f..682e73251 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.14.0) project(fortran_stdlib - LANGUAGES Fortran C CXX + LANGUAGES Fortran DESCRIPTION "Community driven and agreed upon de facto standard library for Fortran" ) diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions/CMakeLists.txt index e23c5b54c..db8f9b0fa 100755 --- a/src/tests/hash_functions/CMakeLists.txt +++ b/src/tests/hash_functions/CMakeLists.txt @@ -9,6 +9,9 @@ waterhash.c generate_hash_arrays.cpp ) +enable_language(CXX) +enable_language(C) + add_library(libc_hash ${SRC}) set(CMAKE_FORTRAN_LINK_EXECUTABLE " -o ") From 82fde8c4c1f97a9ef9afe9245ad7065661cec81c Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 08:31:23 +0100 Subject: [PATCH 062/106] remove the file generate_key_array and hash_validity_test + update README.md --- src/tests/hash_functions/README.md | 11 +- .../hash_functions/generate_key_array.f90 | 22 ---- .../hash_functions/hash_validity_test.f90 | 123 ------------------ 3 files changed, 5 insertions(+), 151 deletions(-) delete mode 100644 src/tests/hash_functions/generate_key_array.f90 delete mode 100644 src/tests/hash_functions/hash_validity_test.f90 diff --git a/src/tests/hash_functions/README.md b/src/tests/hash_functions/README.md index 62c18699d..5cf2550e3 100644 --- a/src/tests/hash_functions/README.md +++ b/src/tests/hash_functions/README.md @@ -1,9 +1,8 @@ -The validation directory contains code to validate the Fortran hash functions against the original C/C++ codes. It consists of three executable applications: +The validation directory contains code to validate the Fortran hash functions against the original C/C++ codes. It consists of one executable `test_hash_functions` that: -* `generate_key_array.f90` - creates a file containing 2048 random 8 bit integers. +* creates a file containing 2048 random 8 bit integers using the subroutine + `generate_key_array`. -* `generate_hash_arrays.cpp` - reads the file generated by `generate_key_array.f90` and uses its contents to generate 2049 hashes for each hash algorithm and outputs files containing the hashes. +* reads the file generated by the subroutine `generate_key_array` and uses its contents to generate 2049 hashes for each C/C++ hash algorithm and outputs files containing the hashes. -* `hash_validity_test.f90`- reads the file generated by `generate_key_array.f90` and uses its contents to generate 2049 hashes for each hash algorithm and compares the result with the corresponding outputs of `generate_hash_arrays.cpp` reporting if the outputs are not equal. - -Note the C code for nmhash assumes that the C compiler is either gcc or MSVC, and will not compile with the Intel C compiler. +* reads the file generated by the subroutine `generate_key_array` and uses its contents to generate 2049 hashes for each Fortran hash algorithm and compares the result with the corresponding outputs of C/C++ hash algorithms. diff --git a/src/tests/hash_functions/generate_key_array.f90 b/src/tests/hash_functions/generate_key_array.f90 deleted file mode 100644 index 6c40ecb1d..000000000 --- a/src/tests/hash_functions/generate_key_array.f90 +++ /dev/null @@ -1,22 +0,0 @@ -program generate_key_array - - use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 - - integer :: lun - integer(int8) :: key_array(2048) - integer(int32) :: dummy(512) - real(real64) :: rand(512) - -! Create key array - call random_number( rand ) - do i=1, 512 - dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) - end do - key_array = transfer( dummy, 0_int8, 2048 ) - - open(newunit=lun, file="key_array.bin", form="unformatted", & - access="stream", status="replace", action="write") - write(lun) key_array - close(lun) - -end program generate_key_array diff --git a/src/tests/hash_functions/hash_validity_test.f90 b/src/tests/hash_functions/hash_validity_test.f90 deleted file mode 100644 index 3d47a5d88..000000000 --- a/src/tests/hash_functions/hash_validity_test.f90 +++ /dev/null @@ -1,123 +0,0 @@ -!! HASH_VALIDITY_TEST processes a vector of eight bit integers, -!! extracting subvectors of length 0, 1, 2, ... 2048 from the beginning -!! hashing each subvector and comparing the resulting hash with the -!! corresponding hash produced by the original C/C++ code, stopping if -!! they are different. As the original C/C++ code was typically developed -!! for Little-Endian machines the testing should only be cone on such -!! machines. The Fortran codes also assume two's complement integers. -!! The code set assume that C's int32_t and int64_t have the same -!! representation as Firtrans int32 and int64 respectively. - -program hash_validity_test - - use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 - use stdlib_32_bit_hash_codes, only: & - little_endian, & - nmhash32, & - nmhash32x, & - water_hash - use stdlib_64_bit_hash_codes, only: & - pengy_hash, & - spooky_hash - - integer(int32), parameter :: nm_seed = int( z'deadbeef', int32 ) - integer(int64), parameter :: water_seed = int( z'deadbeef1eadbeef', int64 ) - integer(int32), parameter :: pengy_seed = int( z'deadbeef', int32 ) - integer(int64), parameter :: spooky_seed(2) = [ water_seed, water_seed ] - integer :: index - integer :: lun - integer(int8) :: key_array(2048) - integer(int32) :: c_nmhash32(0:2048) - integer(int32) :: c_nmhash32x(0:2048) - integer(int32) :: c_water_hash(0:2048) - integer(int64) :: c_pengy_hash(0:2048) - integer(int64) :: c_spooky_hash(0:1, 0:2048) - - - ! Test for endianness - if ( .not. little_endian ) then - stop "The processor is not Little-Endian" - end if - - ! Read key array used to generate hash array - open(newunit=lun, file="key_array.bin", form="unformatted", & - access="stream", status="old", action="read") - read(lun) key_array - close(lun) - - ! Read hash array generated from key array by the C version of nmhash32 - open(newunit=lun, file="c_nmhash32_array.bin", form="unformatted", & - access="stream", status="old", action="read") - read(lun) c_nmhash32 - close(lun) - - do index=0, 2048 - if ( c_nmhash32(index) /= nmhash32(key_array(1:index), nm_seed) ) then - write(*,'("NMHASH32 failed for KEY_ARRAY(1:", I0, ")")') index - stop "NMHASH32 is invalid." - end if - end do - write(*,*) "NMHASH32 is valid." - - ! Read hash array generated from key array by the C version of nmhash32x - open(newunit=lun, file="c_nmhash32x_array.bin", form="unformatted", & - access="stream", status="old", action="read") - read(lun) c_nmhash32x - close(lun) - - do index=0, 2048 - if ( c_nmhash32x(index) /= nmhash32x(key_array(1:index), nm_seed) ) then - write(*,'("NMHASH32X failed for KEY_ARRAY(1:", I0, ")")') index - stop "NMHASH32X is invalid." - end if - end do - write(*,*) "NMHASH32X is valid." - - ! Read hash array generated from key array by the C version of water hash - open(newunit=lun, file="c_water_hash_array.bin", form="unformatted", & - access="stream", status="old", action="read") - read(lun) c_water_hash - close(lun) - - do index=0, 2048 - if ( c_water_hash(index) /= & - water_hash(key_array(1:index), water_seed) ) then - write(*,'("WATER_HASH failed for KEY_ARRAY(1:", I0, ")")') index - stop "WATER_HASH is invalid." - end if - end do - write(*,*) "WATER_HASH is valid." - - ! Read hash array generated from key array by the C version of pengy hash - open(newunit=lun, file="c_pengy_hash_array.bin", form="unformatted", & - access="stream", status="old", action="read") - read(lun) c_pengy_hash - close(lun) - - do index=0, 2048 - if ( c_pengy_hash(index) /= & - pengy_hash(key_array(1:index), pengy_seed) ) then - write(*,'("PENGY_HASH failed for KEY_ARRAY(1:", I0, ")")') index - stop "PENGY_HASH is invalid." - end if - end do - write(*,*) "PENGY_HASH is valid." - - ! Read hash array generated from key array by the C version of Spooky hash - open(newunit=lun, file="c_spooky_hash_array.bin", form="unformatted", & - access="stream", status="old", action="read") - do index=0, 2048 - read(lun) c_spooky_hash(:, index) - end do - close(lun) - - do index=0, 2048 - if ( .not. all( c_spooky_hash(:,index) == & - spooky_hash(key_array(1:index), spooky_seed) ) ) then - write(*,'("SPOOKY_HASH failed for KEY_ARRAY(:,1:", I0, ")")') index - stop "SPOOKY_HASH is invalid." - end if - end do - write(*,*) "SPOOKY_HASH is valid." - -end program hash_validity_test From 64d0b9470488d7cddfb39229cb1b24336fc4331f Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 08:35:05 +0100 Subject: [PATCH 063/106] update fpm-deployment.sh --- ci/fpm-deployment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/fpm-deployment.sh b/ci/fpm-deployment.sh index 4fc20094c..8d80c24f6 100644 --- a/ci/fpm-deployment.sh +++ b/ci/fpm-deployment.sh @@ -29,6 +29,7 @@ include=( prune=( "$destdir/test/test_always_fail.f90" "$destdir/test/test_always_skip.f90" + "$destdir/test/test_hash_functions.f90" "$destdir/src/common.f90" "$destdir/src/f18estop.f90" ) From 412b9174431d7f08160b2850a6015b459bbdfee2 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 08:55:41 +0100 Subject: [PATCH 064/106] update makefile --- Makefile.manual | 4 ---- src/tests/hash_functions/Makefile.manual | 15 ++------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/Makefile.manual b/Makefile.manual index 8a59018d4..67835b1e2 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,8 +1,6 @@ # Fortran stdlib Makefile FC ?= gfortran -CC ?= gcc -CXX ?= g++ FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check FYPPFLAGS ?= @@ -14,8 +12,6 @@ FYPPFLAGS += \ -DPROJECT_VERSION_PATCH=$(word 3,$(VERSION)) export FC -export CC -export CXX export FFLAGS export FYPPFLAGS diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual index 88ca4baa6..c933bc700 100644 --- a/src/tests/hash_functions/Makefile.manual +++ b/src/tests/hash_functions/Makefile.manual @@ -1,16 +1,5 @@ -FFLAGS = #-g -check all -debug all -CFLAGS = #-g -debug all -CXXFLAGS = #-g -debug all -#CFLAGS += -P -cpp -#CXXFLAGS += -P -cpp - -#CC = gcc -#CXX = g++ -#FFLAGS = -#CFLAGS = -#CXXFLAGS = -#CFLAGS = -E -#CXXFLAGS = -E +CC ?= gcc +CXX ?= g++ CPPFLAGS += -I. -I../.. -I.. LDFLAGS += -L../.. -L.. -lstdlib-testing -lstdlib From 9d32d06b6bf7eb035898aae6832e882bd3ca01c8 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 08:57:05 +0100 Subject: [PATCH 065/106] Update Makefile.manual --- Makefile.manual | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile.manual b/Makefile.manual index 67835b1e2..b4d1a5efe 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,7 +1,6 @@ # Fortran stdlib Makefile FC ?= gfortran - FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check FYPPFLAGS ?= From fd6e20bfbbee732f3cc028fced505050cbc64523 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 18:34:10 +0100 Subject: [PATCH 066/106] Update doc/specs/stdlib_hash_procedures.md Co-authored-by: Ivan Pribec --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 72e8055d6..29a127472 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1452,7 +1452,7 @@ Pure/elemental function ##### Arguments `key`: shall be a scalar expression of type default character or a -Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or `INTT64`. It is an `intent(in)` argument. `seed`: shall be an integer expression of kind `INT64`. It is From 09d1fc177e3d6ceece2551e19e06c18ad4bade85 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 18:34:54 +0100 Subject: [PATCH 067/106] Update doc/specs/stdlib_hash_procedures.md Co-authored-by: Ivan Pribec --- doc/specs/stdlib_hash_procedures.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 29a127472..0e155a2b5 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1512,8 +1512,8 @@ Function ##### Arguments `key`: shall be a scalar of type default character expression or a -Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or -`INTT64`. It is an `intent(in)` argument. +Rank 1 integer vector expression of kind `INT8`, `INT16`, `INT32`, or +`INT64`. It is an `intent(in)` argument. `seed`: shall be a two element integer vector expression of kind `INT64`. It is an `intent(in)` argument. From dae5560d9a6855b57d0d9b6aaa11952f8a467cdd Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 18:35:12 +0100 Subject: [PATCH 068/106] Update doc/specs/stdlib_hash_procedures.md Co-authored-by: Ivan Pribec --- doc/specs/stdlib_hash_procedures.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 0e155a2b5..28397af01 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1438,8 +1438,8 @@ Experimental ##### Description -Maps a character string or integer vector to a 64 bit integer whose -value also depends on a scalar 32 bit integer, `seed`. +Maps a character string or integer vector to a 64-bit integer whose +value also depends on a scalar 32-bit integer, `seed`. ##### Syntax From 651adee2afc7f3e57a10c9d135fa90e1c0786728 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 18:38:48 +0100 Subject: [PATCH 069/106] Update doc/specs/stdlib_hash_procedures.md Co-authored-by: Ivan Pribec --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 28397af01..f094e0932 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -409,7 +409,7 @@ input, but tests with hash tables show negligible difference. These algorithms have the reputation of being particularly useful for small byte strings, i.e., strings of less than 32 bytes. While they do not at all perform well on the SMHasher test suite, -usage indicates that that that this has little impact on the +usage indicates that this has little impact on the performance of small hash tables, and the small size of the functions allows their quick loading and retainment in the instruction cache, giving a performance boost where the hashing is intermittent. From 19a1feb3cbc38d903ad80b68f49e5b9defa94037 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 18:39:00 +0100 Subject: [PATCH 070/106] Update doc/specs/stdlib_hash_procedures.md Co-authored-by: Ivan Pribec --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index f094e0932..c70adb1cc 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1092,7 +1092,7 @@ function for character strings. ### Overview of the module -Sixty four bit hash functions are generally overkill for hash table +Sixty-four bit hash functions are generally overkill for hash table applications, and are primarily useful for check sums and related applications. As checksums often have to deal with extremely large files or From 15e868e9917549adac675eb8e3eda7fb8d6e3754 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Thu, 16 Dec 2021 18:39:13 +0100 Subject: [PATCH 071/106] Update doc/specs/stdlib_hash_procedures.md Co-authored-by: Ivan Pribec --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index c70adb1cc..b25f683aa 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1111,7 +1111,7 @@ also defines the integer kind constant, `INT_HASH`, used to specify the kind of the hash function results, and a logical constant, `LITTLE_ENDIAN`, used to deal with one aspect of the machine dependence of the hash codes. -Note that while SpookyHash can be used as a sixty four bit hash +Note that while SpookyHash can be used as a sixty-four bit hash algorithm, its algorithms actually returns two element integer arrays of kind `INT64`, so it can also be used as a 128 bit hash. From b2d9e0ef22132d58c31042d0b693a3974892beb6 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Thu, 16 Dec 2021 18:53:18 +0100 Subject: [PATCH 072/106] Also install Intel C/C++ compilers on OSX --- .github/workflows/CI.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c0b58188d..bfa499714 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -116,8 +116,7 @@ jobs: env: MACOS_HPCKIT_URL: >- https://registrationcenter-download.intel.com/akdlm/irc_nas/17398/m_HPCKit_p_2021.1.0.2681_offline.dmg - MACOS_FORTRAN_COMPONENTS: >- - intel.oneapi.mac.ifort-compiler + MACOS_FORTRAN_COMPONENTS: all FC: ${{ matrix.fc }} CC: ${{ matrix.cc }} CXX: ${{ matrix.cxx }} From da5bcd2031d031a9530d47d932044488218d5ebd Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Thu, 16 Dec 2021 19:39:06 +0100 Subject: [PATCH 073/106] Update Intel from 2021.1 to 2021.4 --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index bfa499714..2bba4d8a1 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -115,7 +115,7 @@ jobs: cxx: [icpc] env: MACOS_HPCKIT_URL: >- - https://registrationcenter-download.intel.com/akdlm/irc_nas/17398/m_HPCKit_p_2021.1.0.2681_offline.dmg + https://registrationcenter-download.intel.com/akdlm/irc_nas/18242/m_HPCKit_p_2021.4.0.3389_offline.dmg MACOS_FORTRAN_COMPONENTS: all FC: ${{ matrix.fc }} CC: ${{ matrix.cc }} From f693892393fa7e42f0508faec538f4e6b8834104 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Thu, 16 Dec 2021 20:54:42 +0100 Subject: [PATCH 074/106] Don't use __builtin_rotateleft32 for Intel on OSX --- src/tests/hash_functions/nmhash.h | 3 ++- src/tests/hash_functions/nmhash_scalar.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tests/hash_functions/nmhash.h b/src/tests/hash_functions/nmhash.h index 21bb90022..85f9cf8a1 100644 --- a/src/tests/hash_functions/nmhash.h +++ b/src/tests/hash_functions/nmhash.h @@ -51,7 +51,8 @@ extern "C" { #endif #if defined(__has_builtin) -# if __has_builtin(__builtin_rotateleft32) +# if __has_builtin(__builtin_rotateleft32) \ + && !(defined(__INTEL_COMPILER) && defined(__APPLE__)) # define NMH_rotl32 __builtin_rotateleft32 /* clang */ # endif #endif diff --git a/src/tests/hash_functions/nmhash_scalar.h b/src/tests/hash_functions/nmhash_scalar.h index bee950670..a2a1a897e 100644 --- a/src/tests/hash_functions/nmhash_scalar.h +++ b/src/tests/hash_functions/nmhash_scalar.h @@ -51,7 +51,8 @@ extern "C" { #endif #if defined(__has_builtin) -# if __has_builtin(__builtin_rotateleft32) +# if __has_builtin(__builtin_rotateleft32) \ + && !(defined(__INTEL_COMPILER) && defined(__APPLE__)) # define NMH_rotl32 __builtin_rotateleft32 /* clang */ # endif #endif From bda3cb9d4b6810e281c48e69a7e953bb40d07429 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 17 Dec 2021 17:33:01 -1000 Subject: [PATCH 075/106] KAdded hyphens Changed the "spelling" of "rank one" and "rank 1" to "rank-1. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index b25f683aa..aba77ffd4 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -19,7 +19,7 @@ As integer comparisons are very efficient, performing an initial comparison of hash codes and then performing a detailed comparison only if the hash codes are equal can improve performance. The hash codes, in turn, can be mapped to a smaller set of integers, -that can be used as an index, termed a hash index, to a rank one +that can be used as an index, termed a hash index, to a rank-1 array, often termed a hash table. This mapping will be known as a scalar hash. The use of a hash table reduces the number of hash codes that need to @@ -262,7 +262,7 @@ There are two problems in implementing hash functions in Fortran. First, the static typing of Fortran makes it awkward to define general purpose hash functions. Instead hash functions are defined for some of the more common -objects: character strings and rank one arrays of integers. +objects: character strings and rank-1 arrays of integers. Other objects can, in principle, be hashed by using `transfer` to map their contents to an integer array, typically one of kind `INT8`. The other problem is that hash codes are typically defined using @@ -567,7 +567,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank 1 integer array or a default +Calculates a 32 bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -581,7 +581,7 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, +or a rank-1 integer array expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. @@ -629,7 +629,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank 1 integer array or a default +Calculates a 32 bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -643,7 +643,7 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, +or a rank-1 integer array expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. @@ -805,7 +805,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank 1 integer array or a default +Calculates a 32 bit hash code from a rank-1 integer array or a default character string, and the input `seed`. ##### Syntax @@ -819,7 +819,7 @@ Pure/elemental function ##### Arguments `key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, +or a rank-1 integer array expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. @@ -867,7 +867,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank 1 integer array or a default +Calculates a 32 bit hash code from a rank-1 integer array or a default character string, and the input `seed`. ##### Syntax @@ -881,7 +881,7 @@ Pure/elemental function ##### Arguments `key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, +or a rank-1 integer array expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. @@ -1028,7 +1028,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank 1 integer array or a default +Calculates a 32 bit hash code from a rank-1 integer array or a default character string, and the input `seed`. ##### Syntax @@ -1042,7 +1042,7 @@ Pure/elemental function ##### Arguments `key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, +or a rank-1 integer array expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. @@ -1199,7 +1199,7 @@ Experimental ##### Description -Calculates a 64 bit hash code from a rank 1 integer array or a default +Calculates a 64 bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -1213,7 +1213,7 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, +or a rank-1 integer array expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. @@ -1263,7 +1263,7 @@ Experimental ##### Description -Calculates a 64 bit hash code from a rank 1 integer array or a default +Calculates a 64 bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -1277,7 +1277,7 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank 1 integer array expression of kind `INT8`, `INT16`, +or a rank-1 integer array expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. @@ -1452,7 +1452,7 @@ Pure/elemental function ##### Arguments `key`: shall be a scalar expression of type default character or a -rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +rank-1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or `INTT64`. It is an `intent(in)` argument. `seed`: shall be an integer expression of kind `INT64`. It is @@ -1512,7 +1512,7 @@ Function ##### Arguments `key`: shall be a scalar of type default character expression or a -Rank 1 integer vector expression of kind `INT8`, `INT16`, `INT32`, or +Rank-1 integer vector expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. `seed`: shall be a two element integer vector expression of kind From b063a78e2c67cc9e15d40e8d1c9c1a1d29f93c1f Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 17 Dec 2021 17:40:54 -1000 Subject: [PATCH 076/106] KChanged "believed to be" Changed some instances of "believed to be" to considered. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index aba77ffd4..a1ee8bf49 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -240,7 +240,7 @@ the hash function used to generate the code and the resulting codes. Non-cryptographic codes, in some circumstances, are believed to be reversible. The modules only implement hash -functions that are believed to be non-cryptographic, with +functions that are considered non-cryptographic, with implementations available in the public domain. There are a number of algorithms available for the computation of From 9e72b87980c7ba8819db3a81ec1e7257dbf80572 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 17 Dec 2021 17:46:02 -1000 Subject: [PATCH 077/106] Changed spelling of big and little endian Changed "Little Endian" and "little endian" to "little-endian"and similarly for "big-endian". [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index a1ee8bf49..96b5a4a3a 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -458,8 +458,8 @@ incremental hashing procedures. SpookyHash is optimized for large objects and should give excellent performance for objects greater than about 96 byes, but has significant overhead for smaller objects. -The code was designed for Little Endian compilers, and will give -different results on Big Endian compilers, but the hash quality on +The code was designed for little-endian compilers, and will give +different results on big-endian compilers, but the hash quality on those compilers is probably just as good. SpookyHash version 2 passes all of Reini Urban's SMHasher tests, and has one bad seed only when reduced to a 32 bit output. @@ -496,9 +496,9 @@ As `stdlib_32_bit_hash_codes` deals exclusively with 32 bit hash codes, In implementing hash functions it is sometimes necessary to know the "endianess" of the compiler's integers. To this end the `stdlib_32_bit_hash_codes` module defines the logical parameter -`LITTLE_ENDIAN` that, if true, indicates that the compiler has little -endian integers, and that if false indicates that the integers are big -endian. +`LITTLE_ENDIAN` that, if true, indicates that the compiler has +little-endian integers, and that if false indicates that the integers +are big-endian. ### Specifications of the `stdlib_32_bit_hash_codes` procedures @@ -1127,9 +1127,9 @@ As `stdlib_64_bit_hash_codes` deals exclusively with 64 bit hash codes, In implementing hash functions it is sometimes necessary to know the "endianess" of the compiler's integers. To this end the `stdlib_64_bit_hash_codes` module defines the logical parameter -`LITTLE_ENDIAN` that if true indicates that the compiler has little -endian integers, and that if false indicates that the integers are big -endian. +`LITTLE_ENDIAN` that if true indicates that the compiler has +little-endian integers, and that if false indicates that the integers +are big-endian. ### Specifications of the `stdlib_64_bit_hash_codes` procedures @@ -1525,8 +1525,8 @@ The result is a two element integer vector of kind `INT64`. ##### Note `SPOOKY_HASH` is an implementation of the 64 bit version 2 of -SpookyHash of Bob Jenkins. The code was designed for Little-Endian -compilers. The output is different on Big Endian compilers, but still +SpookyHash of Bob Jenkins. The code was designed for little-endian +compilers. The output is different on big-endian compilers, but still probably as good quality. It is often used as a 64 bit hash using the first element of the returned value, but can be used as a 128 bit hash. This version of `SPOOKY_HASH` has good performance on small keys From cbda8365cc358e14b140617c839b87e60e993629 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 17 Dec 2021 18:32:42 -1000 Subject: [PATCH 078/106] Changed "Rank" to "rank" Rank was capitalized when it was the beginning of a line and not of a sentence. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 96b5a4a3a..927c76e2f 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1512,7 +1512,7 @@ Function ##### Arguments `key`: shall be a scalar of type default character expression or a -Rank-1 integer vector expression of kind `INT8`, `INT16`, `INT32`, or +rank-1 integer vector expression of kind `INT8`, `INT16`, `INT32`, or `INT64`. It is an `intent(in)` argument. `seed`: shall be a two element integer vector expression of kind From fb3c9e22c66b97794b1b51349804286422094aae Mon Sep 17 00:00:00 2001 From: William Clodius Date: Fri, 17 Dec 2021 18:55:47 -1000 Subject: [PATCH 079/106] Added comment to makefile Added the comment "# -fno-range-check needed for hash functions for gfortran-9" to Makefile.manual, and changed two spaces to one space in the following line. [ticket: X] --- Makefile.manual | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.manual b/Makefile.manual index 7ec5b8d07..ea83ce570 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,7 +1,7 @@ # Fortran stdlib Makefile -FC ?= gfortran -FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check +FC ?= gfortran # -fno-range-check needed for hash functions for gfortran-9 +FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check ADD_FYPPFLAGS ?= VERSION := $(subst ., ,$(file < VERSION)) From 3dd170e8a9a087b3c128b1a8d9849b240f25b23b Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 18 Dec 2021 05:56:42 -0500 Subject: [PATCH 080/106] Changed CMakeLists.txt To avoid issues with Intel compilers and flags --- CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 682e73251..f257972df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,12 +42,11 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) add_compile_options(-std=f2018) elseif(CMAKE_Fortran_COMPILER_ID MATCHES "^Intel") if(WIN32) - add_compile_options("$<$:/warn:declarations,general,usage,interfaces,unused>") - add_compile_options("$<$:/stand:f18>") + set(fortran_flags /stand:f18 /warn:declarations,general,usage,interfaces,unused) else() - add_compile_options("$<$:-warn declarations,general,usage,interfaces,unused>") - add_compile_options("$<$:-stand f18>") + set(fortran_flags -stand f18 -warn declarations,general,usage,interfaces,unused) endif() + add_compile_options("$<$:${fortran_flags}>") endif() # --- compiler feature checks From bef9b98e06dd2debe9a843bc064c1ee5aa5b2a1b Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 18 Dec 2021 12:42:53 +0100 Subject: [PATCH 081/106] add ford links for 32 bit --- src/stdlib_32_bit_hash_codes.fypp | 43 ++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/src/stdlib_32_bit_hash_codes.fypp b/src/stdlib_32_bit_hash_codes.fypp index 79b05b366..b658e13ce 100755 --- a/src/stdlib_32_bit_hash_codes.fypp +++ b/src/stdlib_32_bit_hash_codes.fypp @@ -70,8 +70,10 @@ module stdlib_32_bit_hash_codes interface fnv_1_hash +!! Version: experimental +!! !! FNV_1 interfaces - +!! ([Specification](../page/specs/stdlib_hash_procedures.html#fnv_1_hash-calculates-a-hash-code-from-a-key)) #:for k1 in INT_KINDS pure module function ${k1}$_fnv_1( key ) result(hash_code) !! FNV_1 hash function for rank 1 array keys of kind ${k1}$ @@ -90,7 +92,10 @@ module stdlib_32_bit_hash_codes end interface fnv_1_hash interface fnv_1a_hash +!! Version: experimental +!! !! FNV_1A interfaces +!! ([Specification](../page/specs/stdlib_hash_procedures.html#fnv_1a_hash-calculates-a-hash-code-from-a-key)) #:for k1 in INT_KINDS pure module function ${k1}$_fnv_1a( key ) result(hash_value) !! FNV_1A hash function for rank 1 array keys of kind ${k1}$ @@ -109,8 +114,10 @@ module stdlib_32_bit_hash_codes end interface fnv_1a_hash interface nmhash32 -!! NMHASH32 interfaces - +!! Version: experimental +!! +!! NMHASH32 interfaces +!! ([Specification](../page/specs/stdlib_hash_procedures.html#nmhash32-calculates-a-hash-code-from-a-key-and-a-seed)) #:for k1 in INT_KINDS pure module function ${k1}$_nmhash32( key, seed ) & result(hash_value) @@ -133,8 +140,10 @@ module stdlib_32_bit_hash_codes end interface nmhash32 interface nmhash32x -!! NMHASH32X interfaces - +!! Version: experimental +!! +!! NMHASH32X interfaces +!! ([Specification](file:///home/jvandenp/stdlib/API-doc/page/specs/stdlib_hash_procedures.html#nmhash32x-calculates-a-hash-code-from-a-key-and-a-seed)) #:for k1 in INT_KINDS pure module function ${k1}$_nmhash32x( key, seed ) & result(hash_value) @@ -157,8 +166,10 @@ module stdlib_32_bit_hash_codes end interface nmhash32x interface water_hash +!! Version: experimental +!! !! WATER_HASH interfaces - +!! ([Specification](../page/specs/stdlib_hash_procedures.html#water_hash-calculates-a-hash-code-from-a-key-and-a-seed)) #:for k1 in INT_KINDS pure module function ${k1}$_water_hash( key, seed ) & result(hash_code) @@ -180,7 +191,9 @@ module stdlib_32_bit_hash_codes end interface water_hash interface new_water_hash_seed - +!! Version: experimental +!! +!! ([Specification](file:///home/jvandenp/stdlib/API-doc/page/specs/stdlib_hash_procedures.html#new_water_hash_seed-returns-a-valid-input-seed-for-water_hash)) module subroutine new_water_hash_seed( seed ) integer(int64), intent(inout) :: seed end subroutine new_water_hash_seed @@ -188,6 +201,9 @@ module stdlib_32_bit_hash_codes end interface new_water_hash_seed interface new_nmhash32_seed +!! Version: experimental +!! +!! ([Specification](../page/specs/stdlib_hash_procedures.html#new_nmhash32_seed-returns-a-valid-input-seed-for-nmhash32) module subroutine new_nmhash32_seed( seed ) integer(int32), intent(inout) :: seed @@ -196,6 +212,9 @@ module stdlib_32_bit_hash_codes end interface new_nmhash32_seed interface new_nmhash32x_seed +!! Version: experimental +!! +!! ([Specification](../page/specs/stdlib_hash_procedures.html#new_nmhash32x_seed-returns-a-valid-input-seed-for-nmhash32x)) module subroutine new_nmhash32x_seed( seed ) integer(int32), intent(inout) :: seed @@ -206,8 +225,12 @@ module stdlib_32_bit_hash_codes contains elemental function fibonacci_hash( key, nbits ) result( sample ) +!! Version: experimental +!! !! Maps the 32 bit integer KEY to an unsigned integer value with only NBITS !! bits where NBITS is less than 32 +!! ([Specification](../page/specs/stdlib_hash_procedures.html#fibonacci_hash-maps-an-integer-to-a-smaller-number-of-bits)) + integer(int32), intent(in) :: key integer, intent(in) :: nbits integer(int32) :: sample @@ -217,8 +240,11 @@ contains end function fibonacci_hash elemental function universal_mult_hash( key, seed, nbits ) result( sample ) +!! Version: experimental +!! !! Uses the "random" odd 32 bit integer SEED to map the 32 bit integer KEY to !! an unsigned integer value with only NBITS bits where NBITS is less than 32 +!! ([Specification](../page/specs/stdlib_hash_procedures.html#universal_mult_hash-maps-an-integer-to-a-smaller-number-of-bits)) integer(int32), intent(in) :: key integer(int32), intent(in) :: seed integer, intent(in) :: nbits @@ -229,8 +255,11 @@ contains end function universal_mult_hash subroutine odd_random_integer( harvest ) +!! Version: experimental +!! !! Returns a 32 bit pseudo random integer, HARVEST, distributed uniformly over !! the odd integers of the INT32 kind. +!! ([Specification](../page/specs/stdlib_hash_procedures.html#odd_random_integer-returns-an-odd-integer)) integer(int32), intent(out) :: harvest real(dp) :: sample From 5b14d0c47aef8c812ada3daf657f937cb478a1f8 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 18 Dec 2021 13:17:17 +0100 Subject: [PATCH 082/106] resolve some issues in specs 64 bit --- doc/specs/stdlib_hash_procedures.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 927c76e2f..f7f4370e0 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1204,7 +1204,7 @@ character string. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:fnv_1]]( key )` +`code = [[stdlib_64_bit_hash_codes:fnv_1_hash]]( key )` ##### Class @@ -1268,7 +1268,7 @@ character string. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:fnv_1a]]( key )` +`code = [[stdlib_64_bit_hash_codes:fnv_1a_hash]]( key )` ##### Class @@ -1331,7 +1331,7 @@ seed for `PENGY_HASH` and is also different from the input seed. ##### Syntax -`code = call [[stdlib_32_bit_hash_codes:new_pengy_hash_seed]]( seed )` +`code = call [[stdlib_64_bit_hash_codes:new_pengy_hash_seed]]( seed )` ##### Class @@ -1370,7 +1370,7 @@ from the input seed. ##### Syntax -`code = call [[stdlib_32_bit_hash_codes:new_spooky_hash_seed]]( seed )` +`code = call [[stdlib_64_bit_hash_codes:new_spooky_hash_seed]]( seed )` ##### Class From 1a8a5e3dfbe7ce6f703c465632831b3db603f880 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 18 Dec 2021 13:17:32 +0100 Subject: [PATCH 083/106] add some ford links 64 bit --- src/stdlib_64_bit_hash_codes.fypp | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/src/stdlib_64_bit_hash_codes.fypp b/src/stdlib_64_bit_hash_codes.fypp index 45c721db1..9581bf589 100755 --- a/src/stdlib_64_bit_hash_codes.fypp +++ b/src/stdlib_64_bit_hash_codes.fypp @@ -89,8 +89,10 @@ module stdlib_64_bit_hash_codes end type spooky_subhash interface fnv_1_hash +!! Version: experimental +!! !! FNV_1 interfaces - +!! ([Specification](../page/specs/stdlib_hash_procedures.html#fnv_1-calculates-a-hash-code-from-a-key)) #:for k1 in INT_KINDS pure module function ${k1}$_fnv_1( key ) result(hash_code) !! FNV_1 hash function for rank 1 arrays of kind ${k1}$ @@ -109,7 +111,10 @@ module stdlib_64_bit_hash_codes interface fnv_1a_hash +!! Version: experimental +!! !! FNV_1A interfaces +!! ([Specification](../page/specs/stdlib_hash_procedures.html#fnv_1a-calculates-a-hash-code-from-a-key)) #:for k1 in INT_KINDS pure module function ${k1}$_fnv_1a( key ) result(hash_code) !! FNV_1A hash function for rank 1 arrays of kind ${k1}$ @@ -127,8 +132,10 @@ module stdlib_64_bit_hash_codes end interface fnv_1a_hash interface spooky_hash +!! Version: experimental +!! !! SPOOKY_HASH interfaces - +!!([Specification](../page/specs/stdlib_hash_procedures.html#spooky_hash-maps-a-character-string-or-integer-vector-to-an-integer)) #:for k1 in INT_KINDS module function ${k1}$_spooky_hash( key, seed ) & result(hash_code) @@ -152,6 +159,8 @@ module stdlib_64_bit_hash_codes interface module subroutine spookyHash_128( key, hash_inout ) +!! Version: experimental +!! integer(int8), intent(in), target :: key(0:) integer(int_hash), intent(inout) :: hash_inout(2) end subroutine spookyHash_128 @@ -160,6 +169,8 @@ module stdlib_64_bit_hash_codes interface spooky_init +!! Version: experimental +!! pure module subroutine spookysubhash_init( self, seed ) type(spooky_subhash), intent(out) :: self @@ -172,6 +183,8 @@ module stdlib_64_bit_hash_codes interface spooky_update module subroutine spookyhash_update( spooky, key ) +!! Version: experimental +!! type(spooky_subhash), intent(inout) :: spooky integer(int8), intent(in) :: key(0:) end subroutine spookyhash_update @@ -182,6 +195,8 @@ module stdlib_64_bit_hash_codes interface spooky_final module subroutine spookyhash_final(spooky, hash_code) +!! Version: experimental +!! type(spooky_subhash), intent(inout) :: spooky integer(int_hash), intent(inout) :: hash_code(2) end subroutine spookyhash_final @@ -191,15 +206,19 @@ module stdlib_64_bit_hash_codes interface module subroutine new_spooky_hash_seed( seed ) -! Random SEED generator for +!! Version: experimental +!! +!! Random SEED generator for integer(int64), intent(inout) :: seed(2) end subroutine new_spooky_hash_seed end interface interface pengy_hash +!! Version: experimental +!! !! PENGY_HASH interfaces - +!! ([Specification](../page/specs/stdlib_hash_procedures.html#pengy_hash-maps-a-character-string-or-integer-vector-to-an-integer)) #:for k1 in INT_KINDS pure module function ${k1}$_pengy_hash( key, seed ) result(hash_code) !! PENGY_HASH hash function for rank 1 array keys of kind ${k1}$ @@ -222,7 +241,9 @@ interface interface module subroutine new_pengy_hash_seed( seed ) -! Random SEED generator for MIR_HASH_STRICT +!! Version: experimental +!! +!! Random SEED generator for MIR_HASH_STRICT integer(int32), intent(inout) :: seed end subroutine new_pengy_hash_seed @@ -231,8 +252,12 @@ interface contains elemental function fibonacci_hash( key, nbits ) result( sample ) +!! Version: experimental +!! !! Maps the 64 bit integer KEY to an unsigned integer value with only NBITS !! bits where NBITS is less than 64 +!! ([Specification](../page/specs/stdlib_hash_procedures.html#fibonacci_hash-maps-an-integer-to-a-smaller-number-of-bits_1)) + integer(int64), intent(in) :: key integer, intent(in) :: nbits integer(int64) :: sample @@ -242,8 +267,12 @@ contains end function fibonacci_hash elemental function universal_mult_hash( key, seed, nbits ) result( sample ) +!! Version: experimental +!! !! Uses the "random" odd 64 bit integer SEED to map the 64 bit integer KEY to !! an unsigned integer value with only NBITS bits where NBITS is less than 64. +!! ([Specification](../page/specs/stdlib_hash_procedures.html#universal_mult_hash-maps-an-integer-to-a-smaller-number-of-bits_1)) + integer(int64), intent(in) :: key integer(int64), intent(in) :: seed integer, intent(in) :: nbits @@ -254,8 +283,12 @@ contains end function universal_mult_hash subroutine odd_random_integer( harvest ) +!! Version: experimental +!! !! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over !! the odd integers of the 64 bit kind. +!! ([Specification](../page/specs/stdlib_hash_procedures.html#odd_random_integer-returns-odd-integer)) + integer(int64), intent(out) :: harvest real(dp) :: sample(2) integer(int32) :: part(2) @@ -268,6 +301,8 @@ contains end subroutine odd_random_integer subroutine random_integer( harvest ) +!! Version: experimental +!! !! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over !! the values of the 64 bit kind. integer(int64), intent(out) :: harvest From 6d5d6f11d1fa0c21af7f06498e6728a5b1c50f7c Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sat, 18 Dec 2021 13:26:00 +0100 Subject: [PATCH 084/106] format examples --- doc/specs/stdlib_hash_procedures.md | 310 ++++++++++++++-------------- 1 file changed, 155 insertions(+), 155 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index f7f4370e0..8c5787b9e 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -544,19 +544,19 @@ E. Knuth. It multiplies the `KEY` by the odd valued approximation to ##### Example ```fortran - program demo_fibonacci_hash - use stdlib_32_bit_hash_codes, only: fibonacci_hash - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash, source - allocate( array1(0:2**6-1) ) - array1(:) = 0 - source = 42_int32 - hash = fibonacci_hash(source, 6) - array1(hash) = source - print *, hash - end program demo_fibonacci_hash +program demo_fibonacci_hash + use stdlib_32_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = 42_int32 + hash = fibonacci_hash(source, 6) + array1(hash) = source + print *, hash +end program demo_fibonacci_hash ``` #### `FNV_1_HASH`- calculates a hash code from a key @@ -610,14 +610,14 @@ function for character strings. ##### Example ```fortran - program demo_fnv_1_hash - use stdlib_32_bit_hash_codes, only: fnv_1_hash - use iso_fortran_env, only: int32 - implicit none - integer(int32) :: hash - hash = fnv_1_hash([ 5, 4, 3, 1, 10, 4, 9]) - print *, hash - end program demo_fnv_1_hash +program demo_fnv_1_hash + use stdlib_32_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int32 + implicit none + integer(int32) :: hash + hash = fnv_1_hash([ 5, 4, 3, 1, 10, 4, 9]) + print *, hash +end program demo_fnv_1_hash ``` @@ -671,14 +671,14 @@ function for character strings. ##### Example ```fortran - program demo_fnv_1a_hash - use stdlib_32_bit_hash_codes, only: fnv_1a_hash - use iso_fortran_env, only: int32 - implicit none - integer(int32) :: hash - hash = fnv_1a_hash( [ 5, 4, 3, 1, 10, 4, 9] ) - print *, hash - end program demo_fnv_1a_hash +program demo_fnv_1a_hash + use stdlib_32_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int32 + implicit none + integer(int32) :: hash + hash = fnv_1a_hash( [ 5, 4, 3, 1, 10, 4, 9] ) + print *, hash +end program demo_fnv_1a_hash ``` @@ -845,17 +845,17 @@ function for character strings. ##### Example ```fortran - program demo_nmhash32 - use stdlib_32_bit_hash_codes, only: nmhash32, & - new_nmhash32_seed - use iso_fortran_env, only: int32 - implicit none - integer(int32) :: hash - integer(int32) :: seed = 42_int32 - call new_nmhash32_seed(seed) - hash = nmhash32([ 5, 4, 3, 1, 10, 4, 9], seed) - print *, seed, hash - end program demo_nmhash32 +program demo_nmhash32 + use stdlib_32_bit_hash_codes, only: nmhash32, & + new_nmhash32_seed + use iso_fortran_env, only: int32 + implicit none + integer(int32) :: hash + integer(int32) :: seed = 42_int32 + call new_nmhash32_seed(seed) + hash = nmhash32([ 5, 4, 3, 1, 10, 4, 9], seed) + print *, seed, hash +end program demo_nmhash32 ``` @@ -907,17 +907,17 @@ function for character strings. ##### Example ```fortran - program demo_nmhash32x - use stdlib_32_bit_hash_codes, only: nmhash32x, & - new_nmhash32x_seed - use iso_fortran_env, only: int32 - implicit none - integer(int32) :: hash - integer(int32) :: seed = 42_int32 - call new_nmhash32x_seed(seed) - hash = nmhash32x([ 5, 4, 3, 1, 10, 4, 9], seed) - print *, seed, hash - end program demo_nmhash32x +program demo_nmhash32x + use stdlib_32_bit_hash_codes, only: nmhash32x, & + new_nmhash32x_seed + use iso_fortran_env, only: int32 + implicit none + integer(int32) :: hash + integer(int32) :: seed = 42_int32 + call new_nmhash32x_seed(seed) + hash = nmhash32x([ 5, 4, 3, 1, 10, 4, 9], seed) + print *, seed, hash +end program demo_nmhash32x ``` #### `ODD_RANDOM_INTEGER` - returns an odd integer @@ -1000,24 +1000,24 @@ It multiplies the `KEY` by `SEED`, and returns the ##### Example ```fortran - program demo_universal_mult_hash - use stdlib_32_bit_hash_codes, only: odd_random_integer, & - universal_mult_hash - use iso_fortran_env, only: int32 - implicit none - integer, allocatable :: array1(:) - integer(int32) :: hash, i, seed, source - seed = 0 - allocate( array1(0:2**6-1) ) - do i = 0, 2**6-1 - array1(i) = i - end do - call odd_random_integer( seed ) - source = 42_int32 - hash = universal_mult_hash(source, seed, 6) - array1(hash) = source - print *, seed, hash, array1 - end program demo_odd_random_integer +program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + do i = 0, 2**6-1 + array1(i) = i + end do + call odd_random_integer( seed ) + source = 42_int32 + hash = universal_mult_hash(source, seed, 6) + array1(hash) = source + print *, seed, hash, array1 +end program demo_odd_random_integer ``` #### `WATER_HASH`- calculates a hash code from a key and a seed @@ -1075,17 +1075,17 @@ function for character strings. ##### Example ```fortran - program demo_water_hash - use stdlib_32_bit_hash_codes, only: water_hash, & - new_water_hash_seed - use iso_fortran_env, only: int32, int64 - implicit none - integer(int32) :: hash - integer(int64) :: seed = 42_int64 - call new_water_hash_seed( seed ) - hash = water_hash([ 5, 4, 3, 1, 10, 4, 9], seed) - print *, hash, seed - end program demo_water_hash +program demo_water_hash + use stdlib_32_bit_hash_codes, only: water_hash, & + new_water_hash_seed + use iso_fortran_env, only: int32, int64 + implicit none + integer(int32) :: hash + integer(int64) :: seed = 42_int64 + call new_water_hash_seed( seed ) + hash = water_hash([ 5, 4, 3, 1, 10, 4, 9], seed) + print *, hash, seed +end program demo_water_hash ``` ## The `stdlib_64_bit_hash_codes` module @@ -1176,19 +1176,19 @@ E. Knuth. It multiplies the `KEY` by the odd valued approximation to ##### Example ```fortran - program demo_fibonacci_hash - use stdlib_64_bit_hash_codes, only: fibonacci_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash, source - allocate( array1(0:2**6-1) ) - array1(:) = 0 - source = int(Z'1FFFFFFFF', int64) - hash = fibonacci_hash(source, 6) - azray1(hash) = source - print *, hash - end program demo_fibonacci_hash +program demo_fibonacci_hash + use stdlib_64_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = int(Z'1FFFFFFFF', int64) + hash = fibonacci_hash(source, 6) + array1(hash) = source + print *, hash +end program demo_fibonacci_hash ``` #### `FNV_1`- calculates a hash code from a key @@ -1242,16 +1242,16 @@ function for character strings. ##### Example ```fortran - program demo_fnv_1_hash - use stdlib_64_bit_hash_codes, only: fnv_1_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1_hash(array1) - print *, hash - end program demo_fnv_1_hash +program demo_fnv_1_hash + use stdlib_64_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1_hash(array1) + print *, hash +end program demo_fnv_1_hash ``` @@ -1305,16 +1305,16 @@ function for character strings. ##### Example ```fortran - program demo_fnv_1a_hash - use stdlib_64_bit_hash_codes, only: fnv_1a_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash - array1 = [ 5, 4, 3, 1, 10, 4, 9] - hash = fnv_1a_hash(array1) - print *, hash - end program demo_fnv_1a_hash +program demo_fnv_1a_hash + use stdlib_64_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1a_hash(array1) + print *, hash +end program demo_fnv_1a_hash ``` @@ -1474,19 +1474,19 @@ function for character strings. ##### Exampl ```fortran - program demo_pengy_hash - use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash - use iso_fortran_env, only: int32, int64 - implicit none - integer, allocatable :: key(:) - integer(int64) :: hash - integer(int32) :: seed - key = [ 0, 1, 2, 3 ] - seed = 0_int32 - call new_pengy_hash_seed( seed ) - hash = pengy_hash( key, seed ) - print *, seed, hash - end program demo_pengy_hash +program demo_pengy_hash + use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash + use iso_fortran_env, only: int32, int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash + integer(int32) :: seed + key = [ 0, 1, 2, 3 ] + seed = 0_int32 + call new_pengy_hash_seed( seed ) + hash = pengy_hash( key, seed ) + print *, seed, hash +end program demo_pengy_hash ``` @@ -1536,19 +1536,19 @@ and has no known bad seeds. ##### Example ```fortran - program demo_spooky_hash - use stdlib_64_bit_hash_codes, only: new_spooky_hash_seed, & - spooky_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: key(:) - integer(int64) :: hash(2), seed(2) - key = [ 0, 1, 2, 3 ] - seed = [ 119_int64, 2_int64**41-1 ] - call new_spooky_hash_seed( seed ) - hash = spooky_hash( key, seed ) - print *, seed, hash - end program demo_spooky_hash +program demo_spooky_hash + use stdlib_64_bit_hash_codes, only: new_spooky_hash_seed, & + spooky_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash(2), seed(2) + key = [ 0, 1, 2, 3 ] + seed = [ 119_int64, 2_int64**41-1 ] + call new_spooky_hash_seed( seed ) + hash = spooky_hash( key, seed ) + print *, seed, hash +end program demo_spooky_hash ``` #### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits @@ -1597,22 +1597,22 @@ It multiplies the `KEY` by `SEED`, and returns the ```fortran - program demo_universal_mult_hash - use stdlib_32_bit_hash_codes, only: odd_random_integer, & - universal_mult_hash - use iso_fortran_env, only: int64 - implicit none - integer, allocatable :: array1(:) - integer(int64) :: hash, i, seed, source - seed = 0 - allocate( array1(0:2**6-1) ) - array1 = 0 - call odd_random_integer( seed ) - source = 42_int64 - hash = universal_mult_hash(source, seed, 6) - azray1(hash) = source - print *, seed, hash, array1 - end program demo_universal_mult_hash +program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + array1 = 0 + call odd_random_integer( seed ) + source = 42_int64 + hash = universal_mult_hash(source, seed, 6) + array1(hash) = source + print *, seed, hash, array1 +end program demo_universal_mult_hash ``` From 5035b14e2e9f4d183bdba5a20e006f4b31b5d033 Mon Sep 17 00:00:00 2001 From: "William B. Clodius" Date: Sat, 18 Dec 2021 17:55:05 -1000 Subject: [PATCH 085/106] Update Makefile.manual Co-authored-by: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> --- Makefile.manual | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.manual b/Makefile.manual index ea83ce570..288c18f01 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,6 +1,7 @@ # Fortran stdlib Makefile -FC ?= gfortran # -fno-range-check needed for hash functions for gfortran-9 +FC ?= gfortran +# -fno-range-check needed for hash functions for gfortran-9 FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check ADD_FYPPFLAGS ?= From 5e94b74f829606d80c34b1dd2c87324db03abe93 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 18 Dec 2021 17:59:54 -1000 Subject: [PATCH 086/106] Renamed filed Changed stdlib_64_bit_hash_codes.fypp -> stdlib_hash_64bit.fypp [ticket: X] --- src/{stdlib_64_bit_hash_codes.fypp => stdlib_hash_64bit.fypp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{stdlib_64_bit_hash_codes.fypp => stdlib_hash_64bit.fypp} (100%) diff --git a/src/stdlib_64_bit_hash_codes.fypp b/src/stdlib_hash_64bit.fypp similarity index 100% rename from src/stdlib_64_bit_hash_codes.fypp rename to src/stdlib_hash_64bit.fypp From 58a0a1e9e326ad2c61d88e259406d17f5a33909f Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 18 Dec 2021 18:11:55 -1000 Subject: [PATCH 087/106] Renamed file Renamed stdlib_32_bit_hash_codes.fypp -> stdlib_hash_32bit.fypp [ticket: X] --- src/{stdlib_32_bit_hash_codes.fypp => stdlib_hash_32bit.fypp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{stdlib_32_bit_hash_codes.fypp => stdlib_hash_32bit.fypp} (100%) diff --git a/src/stdlib_32_bit_hash_codes.fypp b/src/stdlib_hash_32bit.fypp similarity index 100% rename from src/stdlib_32_bit_hash_codes.fypp rename to src/stdlib_hash_32bit.fypp From da366e8ffd52ba40218a3033dab2fc66432e8288 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 18 Dec 2021 18:43:16 -1000 Subject: [PATCH 088/106] Unpdated to new module names Consistently used `stdlib_hash_32bit` and `stdlib_hash_64bit` to refer to the hash code modules in the documentation. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 102 ++++++++++++++-------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 8c5787b9e..9f881290b 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -2,7 +2,7 @@ title: Hash procedures --- -# The `stdlib_32_bit_hash_codes` and `stdlib_64_bit_hash_codes` modules +# The `stdlib_hash_32bit` and `stdlib_hash_64bit` modules [TOC] @@ -210,19 +210,19 @@ readers of this document: The Standard Library provides two modules implementing hash functions and scalar hashes. -The `stdlib_32_bit_hash_functions` module provides procedures to +The `stdlib_hash_32bit` module provides procedures to compute 32 bit integer hash codes and a scalar hash. The hash codes can be used for tables of up to `2**30` entries, and for keys with a few hundred elements, but performance has only been tested for tables up to `2**16` entries and performance may degrade for larger numbers of entries. -The `stdlib_64_bit_hash_functions` module provides hash procedures to +The `stdlib_hash_64bit` module provides hash procedures to compute 64 bit integer hash codes and a scalar hash. The hash codes can, in principle, be used for tables of up to `2**62` entries, and for keys with a few thousand elements, but testing of performance has only been been for tables up to `2**16`elements and performance may degrade for larger numbers of entries. -While one of the codes in `stdlib_64_bit_hash_functions`, +While one of the codes in `stdlib_hash_64bit`, `SPSOOKY_HASH`, can also be used to calculate 128 bit hash codes, none of the current codes can be used to calculate 256 bit hash codes. Such larger hash codes are useful for larger hash tables and keys, and @@ -354,7 +354,7 @@ that are vectors of `INT16`, `INT32` and `INT64` integers, or default character strings, in the expectation that inlining will eliminate the overhead of transferring the other keys to `INT8` integer vectors. -The `stdlib_32_bit_hash_functions` module provides +The `stdlib_hash_32bit` module provides implementations of five hash code algorithms: the *FNV_1* and *FNV_1A* variants of Glenn Fowler, Landon Curt Noll, and Kiem-Phong Vo; @@ -374,7 +374,7 @@ generating seeds for `UNIVERSAL_MULT_HASH`. All assume a two's complement sign bit, and no out of range checks. -The `stdlib_64_bit_hash_functions` module also provides +The `stdlib_hash_64bit` module also provides implementations of four hash code algorithms: the *FNV_1* and *FNV_1A* variants of Glenn Fowler, Landon Curt Noll, and Kiem-Phong Vo; @@ -466,7 +466,7 @@ has one bad seed only when reduced to a 32 bit output. Its only potential problem is undefined behavior if the key is misaligned. -## The `stdlib_32_bit_hash_codes` module +## The `stdlib_hash_32bit` module ### Overview of the module @@ -474,7 +474,7 @@ Thirty two bit hash functions are primarily useful for generating hash codes and hash indices for hash tables. They tend to be less useful for generating checksums, which generally benefit from having a larger number of bits. -The `stdlib_32_bit_hash_codes` module defines five public overloaded +The `stdlib_hash_32bit` module defines five public overloaded 32 bit hash code functions, `FNV_1`, `FNV-1A`, `NMHASH32`, `NMHASH32x` and `WATER_HASH`, two scalar hash functions, `FIBONACCI_HASH` and `UNIVERSAL_MULT_HASH`, four seed generators, `ODD_RANDOM_INTEGER` for @@ -488,19 +488,19 @@ the machine dependence of the hash codes. It is necessary to define the kind of integer used to return the hash code. -As `stdlib_32_bit_hash_codes` deals exclusively with 32 bit hash codes, +As `stdlib_hash_32bit` deals exclusively with 32 bit hash codes, `INT_HASH` is an alias for the integer kind `INT32`. ### The `LITTLE_ENDIAN` parameter In implementing hash functions it is sometimes necessary to know the "endianess" of the compiler's integers. To this end the -`stdlib_32_bit_hash_codes` module defines the logical parameter +`stdlib_hash_32bit` module defines the logical parameter `LITTLE_ENDIAN` that, if true, indicates that the compiler has little-endian integers, and that if false indicates that the integers are big-endian. -### Specifications of the `stdlib_32_bit_hash_codes` procedures +### Specifications of the `stdlib_hash_32bit` procedures #### `FIBONACCI_HASH` - maps an integer to a smaller number of bits @@ -515,7 +515,7 @@ in mapping hash codes into small arrays. ##### Syntax -`code = [[stdlib_32_bit_hash_codes:fibonacci_hash]]( key, nbits )` +`code = [[stdlib_hash_32bit:fibonacci_hash]]( key, nbits )` ##### Class @@ -545,7 +545,7 @@ E. Knuth. It multiplies the `KEY` by the odd valued approximation to ```fortran program demo_fibonacci_hash - use stdlib_32_bit_hash_codes, only: fibonacci_hash + use stdlib_hash_32bit, only: fibonacci_hash use iso_fortran_env, only: int32 implicit none integer, allocatable :: array1(:) @@ -572,7 +572,7 @@ character string. ##### Syntax -`code = [[stdlib_32_bit_hash_codes:fnv_1_hash]]( key )` +`code = [[stdlib_hash_32bit:fnv_1_hash]]( key )` ##### Class @@ -611,7 +611,7 @@ function for character strings. ```fortran program demo_fnv_1_hash - use stdlib_32_bit_hash_codes, only: fnv_1_hash + use stdlib_hash_32bit, only: fnv_1_hash use iso_fortran_env, only: int32 implicit none integer(int32) :: hash @@ -634,7 +634,7 @@ character string. ##### Syntax -`code = [[stdlib_32_bit_hash_codes:fnv_1a_hash]]( key )` +`code = [[stdlib_hash_32bit:fnv_1a_hash]]( key )` ##### Class @@ -672,7 +672,7 @@ function for character strings. ```fortran program demo_fnv_1a_hash - use stdlib_32_bit_hash_codes, only: fnv_1a_hash + use stdlib_hash_32bit, only: fnv_1a_hash use iso_fortran_env, only: int32 implicit none integer(int32) :: hash @@ -695,7 +695,7 @@ seed for `NMHASH32` and is also different from the input seed. ##### Syntax -`code = call [[stdlib_32_bit_hash_codes:new_nmhash32_seed]]( seed )` +`code = call [[stdlib_hash_32bit:new_nmhash32_seed]]( seed )` ##### Class @@ -733,7 +733,7 @@ seed for `NMHASH32X` and is also different from the input seed. ##### Syntax -`code = call [[stdlib_32_bit_hash_codes:new_nmhash32x_seed]]( seed )` +`code = call [[stdlib_hash_32bit:new_nmhash32x_seed]]( seed )` ##### Class @@ -771,7 +771,7 @@ seed for `WATER_HASH` and is also different from the input seed. ##### Syntax -`code = call [[stdlib_32_bit_hash_codes:new_water_hash_seed]]( seed )` +`code = call [[stdlib_hash_32bit:new_water_hash_seed]]( seed )` ##### Class @@ -810,7 +810,7 @@ character string, and the input `seed`. ##### Syntax -`code = [[stdlib_32_bit_hash_codes:nmhash32]]( key, seed )` +`code = [[stdlib_hash_32bit:nmhash32]]( key, seed )` ##### Class @@ -846,7 +846,7 @@ function for character strings. ```fortran program demo_nmhash32 - use stdlib_32_bit_hash_codes, only: nmhash32, & + use stdlib_hash_32bit, only: nmhash32, & new_nmhash32_seed use iso_fortran_env, only: int32 implicit none @@ -872,7 +872,7 @@ character string, and the input `seed`. ##### Syntax -`code = [[stdlib_32_bit_hash_codes:nmhash32x]]( key, seed )` +`code = [[stdlib_hash_32bit:nmhash32x]]( key, seed )` ##### Class @@ -908,7 +908,7 @@ function for character strings. ```fortran program demo_nmhash32x - use stdlib_32_bit_hash_codes, only: nmhash32x, & + use stdlib_hash_32bit, only: nmhash32x, & new_nmhash32x_seed use iso_fortran_env, only: int32 implicit none @@ -932,7 +932,7 @@ Returns a random 32 bit integer distributed uniformly over the odd values. ##### Syntax -`call [[stdlib_32_bit_hash_codes:odd_random_integer]]( harvest )` +`call [[stdlib_hash_32bit:odd_random_integer]]( harvest )` ##### Class @@ -968,7 +968,7 @@ in mapping a hash value to a range 0 to `2**nbits-1`. ##### Syntax -`code = [[stdlib_32_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` +`code = [[stdlib_hash_32bit:universal_mult_hash]]( key, seed, nbits )` ##### Class @@ -1001,7 +1001,7 @@ It multiplies the `KEY` by `SEED`, and returns the ```fortran program demo_universal_mult_hash - use stdlib_32_bit_hash_codes, only: odd_random_integer, & + use stdlib_hash_32bit, only: odd_random_integer, & universal_mult_hash use iso_fortran_env, only: int32 implicit none @@ -1033,7 +1033,7 @@ character string, and the input `seed`. ##### Syntax -`code = [[stdlib_32_bit_hash_codes:water_hash]]( key, seed )` +`code = [[stdlib_hash_32bit:water_hash]]( key, seed )` ##### Class @@ -1076,7 +1076,7 @@ function for character strings. ```fortran program demo_water_hash - use stdlib_32_bit_hash_codes, only: water_hash, & + use stdlib_hash_32bit, only: water_hash, & new_water_hash_seed use iso_fortran_env, only: int32, int64 implicit none @@ -1088,7 +1088,7 @@ program demo_water_hash end program demo_water_hash ``` -## The `stdlib_64_bit_hash_codes` module +## The `stdlib_hash_64bit` module ### Overview of the module @@ -1100,7 +1100,7 @@ directories, it is often useful to use incremental hashing as well as direct hashing, so 64 bit and higher hash algorithms often provide multiple implementations. The current module, for simplicity of API, doesn't provide any incremental hashes. -The `stdlib_64_bit_hash_codes` module defines several public +The `stdlib_hash_64bit` module defines several public overloaded 64 bit hash procedures, `FNV_1`, `FNV-1A`, `PENGY_HASH`, and `SPOOKY_HASH`, two scalar hash functions, `FIBONACCI_HASH` and @@ -1119,20 +1119,20 @@ of kind `INT64`, so it can also be used as a 128 bit hash. It is necessary to define the kind of integer used to return the hash code. -As `stdlib_64_bit_hash_codes` deals exclusively with 64 bit hash codes, +As `stdlib_haash_64bit` deals exclusively with 64 bit hash codes, `INT_HASH` is an alias for the integer kind `INT64`. ### The `LITTLE_ENDIAN` parameter In implementing hash functions it is sometimes necessary to know the "endianess" of the compiler's integers. To this end the -`stdlib_64_bit_hash_codes` module defines the logical parameter +`stdlib_hash_64bit` module defines the logical parameter `LITTLE_ENDIAN` that if true indicates that the compiler has little-endian integers, and that if false indicates that the integers are big-endian. -### Specifications of the `stdlib_64_bit_hash_codes` procedures +### Specifications of the `stdlib_hash_64bit` procedures #### `FIBONACCI_HASH` - maps an integer to a smaller number of bits @@ -1147,7 +1147,7 @@ in mapping hash codes into small arrays. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:fibonacci_hash]]( key, nbits )` +`code = [[stdlib_hash_64bit:fibonacci_hash]]( key, nbits )` ##### Class @@ -1177,7 +1177,7 @@ E. Knuth. It multiplies the `KEY` by the odd valued approximation to ```fortran program demo_fibonacci_hash - use stdlib_64_bit_hash_codes, only: fibonacci_hash + use stdlib_hash_64bit, only: fibonacci_hash use iso_fortran_env, only: int64 implicit none integer, allocatable :: array1(:) @@ -1204,7 +1204,7 @@ character string. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:fnv_1_hash]]( key )` +`code = [[stdlib_hash_64bit:fnv_1_hash]]( key )` ##### Class @@ -1243,7 +1243,7 @@ function for character strings. ```fortran program demo_fnv_1_hash - use stdlib_64_bit_hash_codes, only: fnv_1_hash + use stdlib_hash_64bit, only: fnv_1_hash use iso_fortran_env, only: int64 implicit none integer, allocatable :: array1(:) @@ -1268,7 +1268,7 @@ character string. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:fnv_1a_hash]]( key )` +`code = [[stdlib_hash_64bit:fnv_1a_hash]]( key )` ##### Class @@ -1306,7 +1306,7 @@ function for character strings. ```fortran program demo_fnv_1a_hash - use stdlib_64_bit_hash_codes, only: fnv_1a_hash + use stdlib_hash_64bit, only: fnv_1a_hash use iso_fortran_env, only: int64 implicit none integer, allocatable :: array1(:) @@ -1331,7 +1331,7 @@ seed for `PENGY_HASH` and is also different from the input seed. ##### Syntax -`code = call [[stdlib_64_bit_hash_codes:new_pengy_hash_seed]]( seed )` +`code = call [[stdlib_hash_64bit:new_pengy_hash_seed]]( seed )` ##### Class @@ -1370,7 +1370,7 @@ from the input seed. ##### Syntax -`code = call [[stdlib_64_bit_hash_codes:new_spooky_hash_seed]]( seed )` +`code = call [[stdlib_hash_64bit:new_spooky_hash_seed]]( seed )` ##### Class @@ -1407,7 +1407,7 @@ Returns a random 64 bit integer distributed uniformly over the odd values. ##### Syntax -`call [[stdlib_64_bit_hash_codes:odd_random_integer]]( harvest )` +`call [[stdlib_hash_64bit:odd_random_integer]]( harvest )` ##### Class @@ -1443,7 +1443,7 @@ value also depends on a scalar 32-bit integer, `seed`. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:pengy_hash]]( key, seed )` +`code = [[stdlib_hash_64bit:pengy_hash]]( key, seed )` ##### Class @@ -1475,7 +1475,7 @@ function for character strings. ```fortran program demo_pengy_hash - use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash + use stdlib_hash_64bit, only: new_pengy_hash_seed, pengy_hash use iso_fortran_env, only: int32, int64 implicit none integer, allocatable :: key(:) @@ -1503,7 +1503,7 @@ value also depends on a two element vector, `seed`. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:spooky_hash]]( key, seed )` +`code = [[stdlib_hash_64bit:spooky_hash]]( key, seed )` ##### Class @@ -1537,7 +1537,7 @@ and has no known bad seeds. ```fortran program demo_spooky_hash - use stdlib_64_bit_hash_codes, only: new_spooky_hash_seed, & + use stdlib_hash_64bit, only: new_spooky_hash_seed, & spooky_hash use iso_fortran_env, only: int64 implicit none @@ -1564,7 +1564,7 @@ in mapping a hash value to a range 0 to `2**nbits-1`. ##### Syntax -`code = [[stdlib_64_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` +`code = [[stdlib_hash_64bit:universal_mult_hash]]( key, seed, nbits )` ##### Class @@ -1598,7 +1598,7 @@ It multiplies the `KEY` by `SEED`, and returns the ```fortran program demo_universal_mult_hash - use stdlib_32_bit_hash_codes, only: odd_random_integer, & + use stdlib_hash_32bit, only: odd_random_integer, & universal_mult_hash use iso_fortran_env, only: int64 implicit none @@ -1626,8 +1626,8 @@ procedures that are the inspiration for the Fortran hash functions. In the `src/test/hash_functions` subdirectory, the Fortran Standard Library provides two performance test codes for -the hash functions of `stdlib_32_bit_hash_functions` and -`stdlib_64_bit_hash_functions`, `test_32_bit_hash_performance` and +the hash functions of `stdlib_hash_32bit` and +`stdlib_hash_64bit`, `test_32_bit_hash_performance` and `test_64_bit_hash_performance` respectively. These are primarily set up to test runtime performance of the functions. They take a sample of `2**18` integers of kind `INT8` and break it up into vectors of size From 4f8633a8a4c5627207bba8ab5ec65ec2289d750d Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 18 Dec 2021 19:13:17 -1000 Subject: [PATCH 089/106] Updated module names Changed all references to the modules `stdlib_32_bit_hash_codes` and `stdlib_64_bit_hash_codes` to be to rhe renamed modules `stdlib_hash_32bit` and `stdlib_hash_64bit`, respectively. [ticket: X] --- src/CMakeLists.txt | 4 ++-- src/Makefile.manual | 20 ++++++++++---------- src/stdlib_32_bit_fnv_hashes.fypp | 4 ++-- src/stdlib_32_bit_nmhashes.fypp | 2 +- src/stdlib_32_bit_water_hashes.fypp | 2 +- src/stdlib_64_bit_fnv_hashes.fypp | 2 +- src/stdlib_64_bit_pengy_hashes.fypp | 2 +- src/stdlib_64_bit_spookyv2_hashes.fypp | 2 +- src/stdlib_hash_32bit.fypp | 4 ++-- src/stdlib_hash_64bit.fypp | 4 ++-- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 541c4a04d..c8dc09f52 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,17 +3,17 @@ # Create a list of the files to be preprocessed set(fppFiles stdlib_32_bit_fnv_hashes.fypp - stdlib_32_bit_hash_codes.fypp stdlib_32_bit_nmhashes.fypp stdlib_32_bit_water_hashes.fypp stdlib_64_bit_fnv_hashes.fypp - stdlib_64_bit_hash_codes.fypp stdlib_64_bit_pengy_hashes.fypp stdlib_64_bit_spookyv2_hashes.fypp stdlib_ascii.fypp stdlib_bitsets.fypp stdlib_bitsets_64.fypp stdlib_bitsets_large.fypp + stdlib_hash_32bit.fypp + stdlib_hash_64bit.fypp stdlib_io.fypp stdlib_io_npy.fypp stdlib_io_npy_load.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index 72b41572f..2fc6319fe 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -1,16 +1,16 @@ SRCFYPP = \ stdlib_32_bit_fnv_hashes.fypp \ - stdlib_32_bit_hash_codes.fypp \ stdlib_32_bit_nmhashes.fypp \ stdlib_32_bit_water_hashes.fypp \ stdlib_64_bit_fnv_hashes.fypp \ - stdlib_64_bit_hash_codes.fypp \ stdlib_64_bit_pengy_hashes.fypp \ stdlib_64_bit_spookyv2_hashes.fypp \ stdlib_ascii.fypp \ stdlib_bitsets_64.fypp \ stdlib_bitsets_large.fypp \ stdlib_bitsets.fypp \ + stdlib_hash_32bit.fypp \ + stdlib_hash_64bit.fypp \ stdlib_io.fypp \ stdlib_io_npy.fypp \ stdlib_io_npy_load.fypp \ @@ -89,21 +89,21 @@ $(SRCGEN): %.f90: %.fypp common.fypp # Fortran module dependencies f18estop.o: stdlib_error.o stdlib_32_bit_fnv_hashes.o: \ - stdlib_32_bit_hash_codes.o -stdlib_32_bit_hash_codes.o: \ + stdlib_hash_32bit.o +stdlib_hash_32bit.o: \ stdlib_kinds.o stdlib_32_bit_nmhashes.o: \ - stdlib_32_bit_hash_codes.o + stdlib_hash_32bit.o stdlib_32_bit_water_hashes.o: \ - stdlib_32_bit_hash_codes.o + stdlib_hash_32bit.o stdlib_64_bit_fnv_hashes.o: \ - stdlib_64_bit_hash_codes.o -stdlib_64_bit_hash_codes.o: \ + stdlib_hash_64bit.o +stdlib_hash_64bit.o: \ stdlib_kinds.o stdlib_64_bit_pengy_hashes.o: \ - stdlib_64_bit_hash_codes.o + stdlib_hash_64bit.o stdlib_64_bit_spookyv2_hashes.o: \ - stdlib_64_bit_hash_codes.o + stdlib_hash_64bit.o stdlib_ascii.o: stdlib_kinds.o stdlib_bitsets.o: stdlib_kinds.o stdlib_bitsets_64.o: stdlib_bitsets.o diff --git a/src/stdlib_32_bit_fnv_hashes.fypp b/src/stdlib_32_bit_fnv_hashes.fypp index 5b7b227b1..e23863a20 100755 --- a/src/stdlib_32_bit_fnv_hashes.fypp +++ b/src/stdlib_32_bit_fnv_hashes.fypp @@ -10,7 +10,7 @@ !#! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_32_bit_hash_codes) stdlib_32_bit_fnv_hashes +submodule(stdlib_hash_32bit) stdlib_32_bit_fnv_hashes !! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt !! Noll, and Kiem-Phong-Vo, !! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function @@ -29,7 +29,7 @@ contains integer(int64) :: i - hash_code = offset_basis +hash- hash_code = offset_basis do i=1_int64, size(key, kind=int64) hash_code = hash_code * prime if ( little_endian ) then diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp index 611a249be..8ebe16eb5 100755 --- a/src/stdlib_32_bit_nmhashes.fypp +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -44,7 +44,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_32_bit_hash_codes) stdlib_32_bit_nmhashes +submodule(stdlib_hash_32bit) stdlib_32_bit_nmhashes implicit none diff --git a/src/stdlib_32_bit_water_hashes.fypp b/src/stdlib_32_bit_water_hashes.fypp index 240a94d28..bf8c4b907 100755 --- a/src/stdlib_32_bit_water_hashes.fypp +++ b/src/stdlib_32_bit_water_hashes.fypp @@ -74,7 +74,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_32_bit_hash_codes) stdlib_32_bit_water_hashes +submodule(stdlib_hash_32bit) stdlib_32_bit_water_hashes implicit none contains diff --git a/src/stdlib_64_bit_fnv_hashes.fypp b/src/stdlib_64_bit_fnv_hashes.fypp index 6a9fd75b3..913999bce 100755 --- a/src/stdlib_64_bit_fnv_hashes.fypp +++ b/src/stdlib_64_bit_fnv_hashes.fypp @@ -12,7 +12,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_64_bit_hash_codes) stdlib_64_bit_fnv_hashes +submodule(stdlib_hash_64bit) stdlib_64_bit_fnv_hashes ! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt ! Noll, and Kiem-Phong-Vo, ! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function diff --git a/src/stdlib_64_bit_pengy_hashes.fypp b/src/stdlib_64_bit_pengy_hashes.fypp index 449698daa..6974555a8 100755 --- a/src/stdlib_64_bit_pengy_hashes.fypp +++ b/src/stdlib_64_bit_pengy_hashes.fypp @@ -40,7 +40,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_64_bit_hash_codes) stdlib_64_bit_pengy_hashes +submodule(stdlib_hash_64bit) stdlib_64_bit_pengy_hashes implicit none diff --git a/src/stdlib_64_bit_spookyv2_hashes.fypp b/src/stdlib_64_bit_spookyv2_hashes.fypp index cc9353316..805abbab2 100755 --- a/src/stdlib_64_bit_spookyv2_hashes.fypp +++ b/src/stdlib_64_bit_spookyv2_hashes.fypp @@ -13,7 +13,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_64_bit_hash_codes) stdlib_64_bit_spookyv2_hashes +submodule(stdlib_hash_64bit) stdlib_64_bit_spookyv2_hashes ! I have tried to make this portable while retaining efficiency. I assume ! processors with two's complement integers from 8, 16, 32, and 64 bits. diff --git a/src/stdlib_hash_32bit.fypp b/src/stdlib_hash_32bit.fypp index b658e13ce..780293f2a 100755 --- a/src/stdlib_hash_32bit.fypp +++ b/src/stdlib_hash_32bit.fypp @@ -1,7 +1,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int8", "int16", "int32", "int64"] -module stdlib_32_bit_hash_codes +module stdlib_hash_32bit use, intrinsic :: iso_fortran_env, only : & character_storage_size @@ -270,4 +270,4 @@ contains end subroutine odd_random_integer -end module stdlib_32_bit_hash_codes +end module stdlib_hash_32bit diff --git a/src/stdlib_hash_64bit.fypp b/src/stdlib_hash_64bit.fypp index 9581bf589..35b6ce512 100755 --- a/src/stdlib_hash_64bit.fypp +++ b/src/stdlib_hash_64bit.fypp @@ -1,7 +1,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int8", "int16", "int32", "int64"] -module stdlib_64_bit_hash_codes +module stdlib_hash_64bit use, intrinsic :: iso_fortran_env, only : & character_storage_size @@ -315,4 +315,4 @@ contains end subroutine random_integer -end module stdlib_64_bit_hash_codes +end module stdlib_hash_64bit From dd088ca28022df25df501afa7653e058f0671fbc Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 18 Dec 2021 19:20:43 -1000 Subject: [PATCH 090/106] Fixed typo Kremoved "hash-" introduced in renaming modue reference. [ticket: X] --- src/stdlib_32_bit_fnv_hashes.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stdlib_32_bit_fnv_hashes.fypp b/src/stdlib_32_bit_fnv_hashes.fypp index e23863a20..db9ca4c21 100755 --- a/src/stdlib_32_bit_fnv_hashes.fypp +++ b/src/stdlib_32_bit_fnv_hashes.fypp @@ -29,7 +29,7 @@ contains integer(int64) :: i -hash- hash_code = offset_basis + hash_code = offset_basis do i=1_int64, size(key, kind=int64) hash_code = hash_code * prime if ( little_endian ) then From f15d42d1325e233fe318cf68a835653fce1226d4 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 18 Dec 2021 19:33:43 -1000 Subject: [PATCH 091/106] Updated module names For the hash functions test codes updated the module names from `stdlib_32_bit_hash_codes` and `stdlib_64_bit_hash_codes` to `sstdlib_hash_32bit` and `stdlib_hash_64bit`, respectively. [ticket: X] --- src/tests/hash_functions/test_hash_functions.f90 | 7 ++++--- .../hash_functions_perf/test_32_bit_hash_performance.f90 | 2 +- .../hash_functions_perf/test_64_bit_hash_performance.f90 | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tests/hash_functions/test_hash_functions.f90 b/src/tests/hash_functions/test_hash_functions.f90 index 4ebf696bc..7e3695185 100644 --- a/src/tests/hash_functions/test_hash_functions.f90 +++ b/src/tests/hash_functions/test_hash_functions.f90 @@ -1,11 +1,12 @@ module test_hash_functions - use testdrive, only : new_unittest, unittest_type, error_type, check, skip_test + use testdrive, only : new_unittest, unittest_type, error_type, check, & + skip_test use stdlib_kinds, only: int8, int32, int64, dp - use stdlib_32_bit_hash_codes, only: little_endian & + use stdlib_hash_32bit, only: little_endian & , nmhash32 & , nmhash32x & , water_hash - use stdlib_64_bit_hash_codes, only: pengy_hash, spooky_hash + use stdlib_hash_64bit, only: pengy_hash, spooky_hash implicit none private diff --git a/src/tests/hash_functions_perf/test_32_bit_hash_performance.f90 b/src/tests/hash_functions_perf/test_32_bit_hash_performance.f90 index 3436f080c..1fc02dd7b 100755 --- a/src/tests/hash_functions_perf/test_32_bit_hash_performance.f90 +++ b/src/tests/hash_functions_perf/test_32_bit_hash_performance.f90 @@ -8,7 +8,7 @@ program test_32_bit_hash_performance int32, & int64 - use stdlib_32_bit_hash_codes + use stdlib_hash_32bit implicit none diff --git a/src/tests/hash_functions_perf/test_64_bit_hash_performance.f90 b/src/tests/hash_functions_perf/test_64_bit_hash_performance.f90 index 78bbb32b4..139f6225e 100755 --- a/src/tests/hash_functions_perf/test_64_bit_hash_performance.f90 +++ b/src/tests/hash_functions_perf/test_64_bit_hash_performance.f90 @@ -8,7 +8,7 @@ program test_64_bit_hash_performance int32, & int64 - use stdlib_64_bit_hash_codes + use stdlib_hash_64bit implicit none From f79024189ec70f0b86997a4c1a4043f4aede012c Mon Sep 17 00:00:00 2001 From: William Clodius Date: Sat, 18 Dec 2021 20:02:41 -1000 Subject: [PATCH 092/106] Fixed misspelling Changed SPSOOKY to SPOOKY. [ticket: X] --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 9f881290b..cb1166c03 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -223,7 +223,7 @@ entries, and for keys with a few thousand elements, but testing of performance has only been been for tables up to `2**16`elements and performance may degrade for larger numbers of entries. While one of the codes in `stdlib_hash_64bit`, -`SPSOOKY_HASH`, can also be used to calculate 128 bit hash codes, none +`SPOOKY_HASH`, can also be used to calculate 128 bit hash codes, none of the current codes can be used to calculate 256 bit hash codes. Such larger hash codes are useful for larger hash tables and keys, and for checksums. From 15da01f25bf31b270c5d4796eff4a043467aba5e Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 19 Dec 2021 09:51:06 +0100 Subject: [PATCH 093/106] stdlib_hash_procedures.md --- doc/specs/stdlib_hash_procedures.md | 474 ++++++++++++++-------------- 1 file changed, 237 insertions(+), 237 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 9f881290b..d59bd706a 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -14,7 +14,7 @@ This cost is often reduced by computing a near unique integer value, termed a hash code, from the structure of the object using a procedure termed a hash function. Equality of hash codes is a necessary, but not sufficient, condition -for the original objects to be equal. +for the original objects to be equal. As integer comparisons are very efficient, performing an initial comparison of hash codes and then performing a detailed comparison only if the hash codes are equal can improve performance. @@ -42,7 +42,7 @@ MIT license (`FNV-1 Hash`, `FNV-1A Hash`, `nmhash32`, `nmhash32x`, `waterhash`, `pengyhash` and `SpookyHash`) The licensing status of the algorithms are discussed below. -`FIBONACCI_HASH` is a scalar hash. It is an implementation in Fortran +`fibonacci_hash` is a scalar hash. It is an implementation in Fortran 2008 and signed two's complement integers of the Fibonacci Hash described in D. E. Knuth, "The Art of Computer Programming, Second Edition, Volume 3, Sorting and @@ -50,7 +50,7 @@ Searching", Addison-Wesley, Upper Saddle River, NJ, pp. 517-518, 1998. The algorithms in that source are considered public domain, and its use is unrestricted. -`UNIVERSAL_MULT_HASH` is a scalar hash. It is an implementation in +`universal_mult_hash` is a scalar hash. It is an implementation in Fortran 2008 and signed two's complement integers of the universal multiplicative hash algorithm of M. Dietzfelbinger, T. Hagerup, J. Katajainen, and M. Penttonen, "A Reliable Randomized @@ -59,7 +59,7 @@ No. 1, Oct. 1997, pp. 19-51. Because of its publication in the Journal of Algorithms, the universal multiplicative hash algorithm is public domain. -`FNV_1_HASH` and `FNV_1A_HASH` are translations to Fortran 2008 and +`fnv_1_hash` and `fnv_1a_hash` are translations to Fortran 2008 and signed two's complement integers of the `FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, and Phong Vo, that has been released into the public @@ -70,7 +70,7 @@ these functions is available at These functions have been modified from their normal forms to also encode the structure size in the output hash. -Similarly `SPOOKY_HASH` and associated procedures are translations to +Similarly `spooky_hash` and associated procedures are translations to Fortran 2008 and signed two's complement integers of the unsigned 64 bit version 2 `SpookyHash` functions of Bob Jenkins to signed 64 @@ -81,8 +81,8 @@ domain and has given permission to treat this code as public domain in the USA, provided the code can be used under other licenses and he is given appropriate credit. -`NMHASH32` and `NMHASH32x` are translations to Fortran 2008 and signed -two's complement integers of the unsigned 32 bit +`nmhash32` and `nmhash32x` are translations to Fortran 2008 and signed +two's complement integers of the unsigned 32-bit hashes of James Z. M. Gao's `nmhash32` and `nmhash32x` version of 0.2, James Z. M. Gao has released his code under the BSD 2 Clause @@ -115,7 +115,7 @@ License. The BSD 2-Clause license is as follows: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -`WATER_HASH` is a translation to Fortran 2008 and signed two's +`water_hash` is a translation to Fortran 2008 and signed two's complement integers of the `waterhash` algorithm of Tommy Ettinger. This algorithm is inspired by the Wy Hash of Wang Yi. Tommy Ettinger's original C++ code, `waterhash.h`, @@ -148,7 +148,7 @@ The `unlicense` reads as follows: For more information, please refer to -`PENGY_HASH` is a translation to Fortran 2008 and signed two's +`pengy_hash` is a translation to Fortran 2008 and signed two's complement arithmetic of the `pengyhash` algorithm of Alberto Fajardo, copyright 2020. Alberto Fajardo's original C code, `pengyhash.c`, is available at the URL: @@ -211,13 +211,13 @@ readers of this document: The Standard Library provides two modules implementing hash functions and scalar hashes. The `stdlib_hash_32bit` module provides procedures to -compute 32 bit integer hash codes and a scalar hash. +compute 32-bit integer hash codes and a scalar hash. The hash codes can be used for tables of up to `2**30` entries, and for keys with a few hundred elements, but performance has only been tested for tables up to `2**16` entries and performance may degrade for larger numbers of entries. The `stdlib_hash_64bit` module provides hash procedures to -compute 64 bit integer hash codes and a scalar hash. +compute 64-bit integer hash codes and a scalar hash. The hash codes can, in principle, be used for tables of up to `2**62` entries, and for keys with a few thousand elements, but testing of performance has only been been for tables up to `2**16`elements and @@ -233,7 +233,7 @@ Fortran codes, but the larger hash codes may be added to the library if there is a demand for them. Hash functions are often divided into two categories -"cryptographic" and "non-cryptographic". +"cryptographic" and "non-cryptographic". Cryptographic hash functions produce codes that are infeasible to reverse without additional information beyond the identity of the hash function used to generate the code and the resulting codes. @@ -244,7 +244,7 @@ functions that are considered non-cryptographic, with implementations available in the public domain. There are a number of algorithms available for the computation of -non-cryptographic 32 and 64 bit hash codes that differ in their +non-cryptographic 32 and 64-bit hash codes that differ in their computational complexity, their relative performance on different size keys, and the expected uniqueness (randomness) of the resulting hash codes. @@ -264,7 +264,7 @@ purpose hash functions. Instead hash functions are defined for some of the more common objects: character strings and rank-1 arrays of integers. Other objects can, in principle, be hashed by using `transfer` to -map their contents to an integer array, typically one of kind `INT8`. +map their contents to an integer array, typically one of kind `int8`. The other problem is that hash codes are typically defined using modular unsigned integer arithmetic. As such integers are not part of the current Fortran standard, @@ -272,14 +272,14 @@ workarounds have to be used. These can take two forms. In one, the operations are emulated by using an integer of a larger size, or, for the larger integers, by dividing the integer into -two lower and higher order halves, -and performing the operations on each half separately using +two lower and higher order halves, +and performing the operations on each half separately using the larger integers. In the second, the unsigned integers may be replaced directly by the corresponding signed integers, but otherwise not modifying the code logic. The first should be standard conforming on current compilers, but -is more computationally intensive unless the compilers recognize +is more computationally intensive unless the compilers recognize underlying idioms that are rarely used in Fortran codes. The second is not standard conforming as bit operations involving the sign are undefined, @@ -289,7 +289,7 @@ or under flow. The codes currently use the second method. In order to compile the hash function modules, the compilers must implement much of Fortran 2003, and selected components of Fortran -2008: submodules, 64 bit integers, and some bit intrinsics. +2008: submodules, 64-bit integers, and some bit intrinsics. The main limitation on valid compilers is whether they implement the submodules enhancement of Fortran 2008. In order to properly run the hash functions, the compilers must @@ -331,32 +331,32 @@ in a given hash table, but can be changed and the objects rehashed if collisions are unusually common. The *seed* can be either a scalar or a two-element array. Some of the hash functions have alternatives that allow incremental -hashing. +hashing. |Algorithm|Seed|Result| |---------|----|------| -|FNV-1|None|32 or 64 bit integer| -|FNV-1a|None|32 or 64 bit integer| -|nmhash32 |32 bit scalar integer|32 bit integer| -|nmhash32x |32 bit scalar integer|32 bit integer| -|pengyhash |32 bit scalar integer|64 bit integer| -|Spooky Hash|64 bit two element vector|64 bit two element vector| -|waterhash|64 bit scalar integer|32 bit integer| +|FNV-1|None|32 or 64-bit integer| +|FNV-1a|None|32 or 64-bit integer| +|nmhash32 |32-bit scalar integer|32-bit integer| +|nmhash32x |32-bit scalar integer|32-bit integer| +|pengyhash |32-bit scalar integer|64-bit integer| +|Spooky Hash|64-bit two element vector|64-bit two element vector| +|waterhash|64-bit scalar integer|32-bit integer| The hash function modules each provide at least five algorithms for -hash functions: two optimized for small (< 32 `INT8` integer elements) -keys, and three optimized for large (> 100 `INT8` integer elements) +hash functions: two optimized for small (< 32 `int8` integer elements) +keys, and three optimized for large (> 100 `int8` integer elements) keys. The core implementation for each algorithm is for keys that are -vectors of `INT8` integers. +vectors of `int8` integers. These core implementations are then used in wrappers for keys -that are vectors of `INT16`, `INT32` and `INT64` integers, or default +that are vectors of `int16`, `int32` and `int64` integers, or default character strings, in the expectation that inlining will eliminate the -overhead of transferring the other keys to `INT8` integer vectors. +overhead of transferring the other keys to `int8` integer vectors. The `stdlib_hash_32bit` module provides implementations of five hash code algorithms: -the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, Landon Curt Noll, and Kiem-Phong Vo; the *nmhash32* and *nmhash32x* of James Z. M. Gao; and the *waterhash* of Tommy Ettinger. @@ -367,16 +367,16 @@ respectively. The `nmhash32`, `nmhash32x`, and `waterhash` algorithms require seeds. The submodules provide separate seed generators for each algorithm. The module itself -implements two scalar hash functions, `FIBONACCI_HASH` and -`UNIVERSAL_MULT_HASH`. -It also implements the subroutine, `ODD_RANDOM_INTEGER`, for -generating seeds for `UNIVERSAL_MULT_HASH`. +implements two scalar hash functions, `fibonacci_hash` and +`universal_mult_hash`. +It also implements the subroutine, `odd_random_integer`, for +generating seeds for `universal_mult_hash`. All assume a two's complement sign bit, and no out of range checks. The `stdlib_hash_64bit` module also provides implementations of four hash code algorithms: -the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, Landon Curt Noll, and Kiem-Phong Vo; the *pengyhash* of Alberto Fajardo; and the *SpookyHash* of Bob Jenkins. @@ -388,9 +388,9 @@ The `pengyhash`, and `Spooky Hash` algorithms require seeds. The submodules provide separate seed generators for each algorithm. The module itself implements two scalar hash functions, -`FIBONACCI_HASH` and `UNIVERSAL_MULT_HASH`. -It also implements the subroutine, `ODD_RANDOM_INTEGER`, for -generating seeds for `UNIVERSAL_MULT_HASH`. +`fibonacci_hash` and `universal_mult_hash`. +It also implements the subroutine, `odd_random_integer`, for +generating seeds for `universal_mult_hash`. All assume a two's complement sign bit, and no out of range checks. @@ -402,7 +402,7 @@ The FNV-1 and FNV-2 algorithms differ in their order of the multiplication and exclusive or operations. They differ from their normal implementation in that they also encode the structure size in the hash code. -The 32 and 64 bit algorithms differ in their initial offsets and in +The 32 and 64-bit algorithms differ in their initial offsets and in their multiplicative constants. Analysis suggests that `FNV_1A` should be better at randomizing the input, but tests with hash tables show negligible difference. @@ -420,8 +420,8 @@ and S. Richter, V. Alvarez, and J. Dittrich, The `stdlib_32_bit_nmhashes` submodule provides implementations of James Z.M. Gao's `nmhash32` and `nmhash32x` algorithms, -version 0.2, -in the form of the overloaded functions, `NMHASH32` and `NMHASH32X`. +version 0.2, +in the form of the overloaded functions, `nmhash32` and `nmhash32x`. The implementations are based on the scalar versions of Gao's algorithms and not the vector versions that require access to the vector instructions of some compilers. @@ -429,40 +429,40 @@ Both algorithms perform well on the SMHasher tests, and have no known bad seeds. The vector versions of both codes perform well on large keys, with the `nmhash32x` faster on short keys. To provide randomly generated seeds for the two functions the submodule also defines the -subroutines `NEW_NMHASH32_SEED` and `NEW_NMHASH32X_SEED`. Gao claims -that `NMHASH32X` is significantly faster than `NMHASH32` on short +subroutines `new_nmhash32_seed` and `new_nmhash32x_seed`. Gao claims +that `nmhash32x` is significantly faster than `nmhash32` on short seeds, but slower on long seeds, but our limited testing so far shows -`NMHASH32X` to be significantly faster on short seeds and slightly +`nmhash32x` to be significantly faster on short seeds and slightly faster on long seeds. The `stdlib_32_bit_water_hashes` submodule provides implementations of Tommy Ettinger's `waterhash` algorithm in the form of the overloaded -function, `WATER_HASH`. Water Hash has not been tested by Reini Urban, +function, `water_hash`. Water Hash has not been tested by Reini Urban, but Tommy Ettinger has tested it with Urban's SMHasher and presents results that shows Water Hash passing all the tests. So far his testing hasn't found any bad seeds for the algorithm. To provide randomly generated seeds for the hash function the submodule also -defines the subroutine `NEW_WATER_HASH_SEED`. +defines the subroutine `new_water_hash_seed`. The `stdlib_64_bit_pengy_hashes` submodule provides implementations of Alberto Fajardo's `pengyhash` in the form of the overloaded function, -`PENGY_HASH`. Reini Urban's testing shows that PengyHash passes all +`pengy_hash`. Reini Urban's testing shows that PengyHash passes all the tests and has no bad seeds. To provide randomly generated seeds for the hash function the submodule also defines the subroutine -`NEW_PENGY_HASH_SEED`. +`new_pengy_hash_seed`. The `stdlib_64_bit_spooky_hashes` submodule provides implementations of Bob Jenkins' SpookyHash in the form of the overloaded function, -`SPOOKY_HASH`. Future implementations may provide the SpookyHash +`spooky_hash`. Future implementations may provide the SpookyHash incremental hashing procedures. -SpookyHash is optimized for large objects and should give excellent +SpookyHash is optimized for large objects and should give excellent performance for objects greater than about 96 byes, but has significant overhead for smaller objects. The code was designed for little-endian compilers, and will give different results on big-endian compilers, but the hash quality on those compilers is probably just as good. SpookyHash version 2 passes all of Reini Urban's SMHasher tests, and -has one bad seed only when reduced to a 32 bit output. +has one bad seed only when reduced to a 32-bit output. Its only potential problem is undefined behavior if the key is misaligned. @@ -475,34 +475,34 @@ codes and hash indices for hash tables. They tend to be less useful for generating checksums, which generally benefit from having a larger number of bits. The `stdlib_hash_32bit` module defines five public overloaded -32 bit hash code functions, `FNV_1`, `FNV-1A`, `NMHASH32`, `NMHASH32x` -and `WATER_HASH`, two scalar hash functions, `FIBONACCI_HASH` and -`UNIVERSAL_MULT_HASH`, four seed generators, `ODD_RANDOM_INTEGER` for -`UNIVERSAL_MULT_HASH`, and `NEW_NMHASH32_SEED`, `NEW_NMHASH32X_SEED`, -and `NEW_WATER_HASH_SEED`, for their respective hash code -functions. It also defines the integer kind constant, `INT_HASH`, and -a logical constant, `LITTLE_ENDIAN`, used to deal with one aspect of +32-bit hash code functions, `FNV_1`, `FNV-1A`, `nmhash32`, `nmhash32x` +and `water_hash`, two scalar hash functions, `fibonacci_hash` and +`universal_mult_hash`, four seed generators, `odd_random_integer` for +`universal_mult_hash`, and `new_nmhash32_seed`, `new_nmhash32x_seed`, +and `new_water_hash_seed`, for their respective hash code +functions. It also defines the integer kind constant, `int_hash`, and +a logical constant, `little_endian`, used to deal with one aspect of the machine dependence of the hash codes. -### The `INT_HASH` parameter +### The `int_hash` parameter It is necessary to define the kind of integer used to return the hash code. -As `stdlib_hash_32bit` deals exclusively with 32 bit hash codes, -`INT_HASH` is an alias for the integer kind `INT32`. +As `stdlib_hash_32bit` deals exclusively with 32-bit hash codes, +`int_hash` is an alias for the integer kind `int32`. -### The `LITTLE_ENDIAN` parameter +### The `little_endian` parameter In implementing hash functions it is sometimes necessary to know the "endianess" of the compiler's integers. To this end the `stdlib_hash_32bit` module defines the logical parameter -`LITTLE_ENDIAN` that, if true, indicates that the compiler has +`little_endian` that, if true, indicates that the compiler has little-endian integers, and that if false indicates that the integers are big-endian. ### Specifications of the `stdlib_hash_32bit` procedures -#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits +#### `fibonacci_hash` - maps an integer to a smaller number of bits ##### Status @@ -510,7 +510,7 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 32 bit integer. This is useful +Calculates an `nbits` hash code from a 32-bit integer. This is useful in mapping hash codes into small arrays. ##### Syntax @@ -523,7 +523,7 @@ Elemental function ##### Arguments -`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`key`: Shall be a scalar integer expression of kind `int32`. It is an `intent(in)` argument. `nbits` Shall be a scalar default integer expression with `0 < nbits < @@ -531,13 +531,13 @@ Elemental function ##### Result -The result is an integer of kind `INT32` with at most the lowest +The result is an integer of kind `int32` with at most the lowest `nbits` nonzero, mapping to a range 0 to `nbits-1`. ##### Note -`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald -E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`fibonacci_hash` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `key` by the odd valued approximation to `2**32/phi`, where `phi` is the golden ratio 1.618..., and returns the `nbits` upper bits of the product as the lowest bits of the result. @@ -559,7 +559,7 @@ program demo_fibonacci_hash end program demo_fibonacci_hash ``` -#### `FNV_1_HASH`- calculates a hash code from a key +#### `fnv_1_hash`- calculates a hash code from a key ##### Status @@ -567,7 +567,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank-1 integer array or a default +Calculates a 32-bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -581,19 +581,19 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank-1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. +or a rank-1 integer array expression of kind `int8`, `int16`, +`int32`, or `int64`. It is an `intent(in)` argument. ##### Result -The result is a scalar integer of kind `INT32`. +The result is a scalar integer of kind `int32`. ##### Note -`FNV_1_HASH` is an implementation of the original FNV-1 hash code of Glenn +`fnv_1_hash` is an implementation of the original FNV-1 hash code of Glenn Fowler, Landon Curt Noll, and Phong Vo. -It differs from typical implementations in that it also ecodes the +It differs from typical implementations in that it also encodes the size of the structure in the hash code. This code is relatively fast on short keys, and is small enough that it will often be retained in the instruction cache if hashing is @@ -604,7 +604,7 @@ This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. ##### Example @@ -621,7 +621,7 @@ end program demo_fnv_1_hash ``` -#### `FNV_1A_HASH`- calculates a hash code from a key +#### `fnv_1a_hash`- calculates a hash code from a key ##### Status @@ -629,7 +629,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank-1 integer array or a default +Calculates a 32-bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -643,17 +643,17 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank-1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. +or a rank-1 integer array expression of kind `int8`, `int16`, +`int32`, or `int64`. It is an `intent(in)` argument. ##### Result -The result is a scalar integer of kind `INT32`. +The result is a scalar integer of kind `int32`. ##### Note -`FNV_1A_HASH` is an implementation of the alternative FNV-1a hash code of +`fnv_1a_hash` is an implementation of the alternative FNV-1a hash code of Glenn Fowler, Landon Curt Noll, and Phong Vo. It differs from typical implementations in that it also encodes the size of the structure in the hash code. @@ -666,7 +666,7 @@ This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. ##### Example @@ -682,7 +682,7 @@ end program demo_fnv_1a_hash ``` -#### `NEW_NMHASH32_SEED`- returns a valid input seed for `NMHASH32` +#### `new_nmhash32_seed`- returns a valid input seed for `nmhash32` ##### Status @@ -690,8 +690,8 @@ Experimental ##### Description -Calculates a 32 bit "random" integer that is believed to be a valid -seed for `NMHASH32` and is also different from the input seed. +Calculates a 32-bit "random" integer that is believed to be a valid +seed for `nmhash32` and is also different from the input seed. ##### Syntax @@ -703,24 +703,24 @@ Subroutine ##### Argument -`seed`: shall be a defined integer scalar variable of kind `INT32`. +`seed`: shall be a defined integer scalar variable of kind `int32`. It is an `intent(inout)` argument. On input `seed` should be defined, and on output it will be different from the input `seed`. ##### Note -Currently there are no known bad seeds for `NMHASH32`, but if any are +Currently there are no known bad seeds for `nmhash32`, but if any are identified the procedure will be revised so that they cannot be returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. + `random_number` and the values returned can be changed by calling the + intrinsic `random_init`. ##### Example -See the example for `NMHASH32`. +See the example for `nmhash32`. -#### `NEW_NMHASH32X_SEED`- returns a valid input seed for `NMHASH32X` +#### `new_nmhash32x_seed`- returns a valid input seed for `nmhash32x` ##### Status @@ -728,8 +728,8 @@ Experimental ##### Description -Calculates a 32 bit "random" integer that is believed to be a valid -seed for `NMHASH32X` and is also different from the input seed. +Calculates a 32-bit "random" integer that is believed to be a valid +seed for `nmhash32x` and is also different from the input seed. ##### Syntax @@ -741,24 +741,24 @@ Subroutine ##### Argument -`seed`: shall be a defined integer scalar variable of kind `INT32`. +`seed`: shall be a defined integer scalar variable of kind `int32`. It is an `intent(inout)` argument. On input `seed` should be defined, and on output it will be different from the input `seed`. ##### Note -Currently there are no known bad seeds for `NMHASH32X`, but if any are +Currently there are no known bad seeds for `nmhash32x`, but if any are identified the procedure will be revised so that they cannot be returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. + `random_number` and the values returned can be changed by calling the + intrinsic `random_init`. ##### Example -See the example for `NMHASH32X`. +See the example for `nmhash32x`. -#### `NEW_WATER_HASH_SEED`- returns a valid input seed for `WATER_HASH` +#### `new_water_hash_seed`- returns a valid input seed for `water_hash` ##### Status @@ -766,8 +766,8 @@ Experimental ##### Description -Calculates a 64 bit "random" integer that is believed to be a valid -seed for `WATER_HASH` and is also different from the input seed. +Calculates a 64-bit "random" integer that is believed to be a valid +seed for `water_hash` and is also different from the input seed. ##### Syntax @@ -779,25 +779,25 @@ Subroutine ##### Argument -`seed`: shall be a defined integer scalar variable of kind `INT64`. +`seed`: shall be a defined integer scalar variable of kind `int64`. It is an `intent(inout)` argument. On input `seed` should be defined, and on output it will be different from the input `seed`. ##### Note -Currently there are no known bad seeds for `WATER_HASH`, but if any +Currently there are no known bad seeds for `water_hash`, but if any are identified the procedure will be revised so that they cannot be returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - + `random_number` and the values returned can be changed by calling the + intrinsic `random_init`. + ##### Example -See the example for `WATER_HASH`. +See the example for `water_hash`. -#### `NMHASH32`- calculates a hash code from a key and a seed +#### `nmhash32`- calculates a hash code from a key and a seed ##### Status @@ -805,7 +805,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank-1 integer array or a default +Calculates a 32-bit hash code from a rank-1 integer array or a default character string, and the input `seed`. ##### Syntax @@ -819,20 +819,20 @@ Pure/elemental function ##### Arguments `key`: Shall be a deferred length default character scalar expression -or a rank-1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. +or a rank-1 integer array expression of kind `int8`, `int16`, +`int32`, or `int64`. It is an `intent(in)` argument. -`seed`: shall be an integer scalar expression of kind `INT32`. +`seed`: shall be an integer scalar expression of kind `int32`. It is an `intent(in)` argument. ##### Result -The result is a scalar integer of kind `INT32`. +The result is a scalar integer of kind `int32`. ##### Note -`NMHASH32` is an implementation of the `nmhash32` hash code of +`nmhash32` is an implementation of the `nmhash32` hash code of James Z. M. Gao. This code has good, but not great, performance on long keys, poorer performance on short keys. @@ -840,7 +840,7 @@ As a result it should give fair performance for typical hash table applications. This code passes the SMHasher tests, and has no known bad seeds. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. ##### Example @@ -859,7 +859,7 @@ end program demo_nmhash32 ``` -#### `NMHASH32X`- calculates a hash code from a key and a seed +#### `nmhash32x`- calculates a hash code from a key and a seed ##### Status @@ -867,7 +867,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank-1 integer array or a default +Calculates a 32-bit hash code from a rank-1 integer array or a default character string, and the input `seed`. ##### Syntax @@ -881,20 +881,20 @@ Pure/elemental function ##### Arguments `key`: Shall be a deferred length default character scalar expression -or a rank-1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. +or a rank-1 integer array expression of kind `int8`, `int16`, +`int32`, or `int64`. It is an `intent(in)` argument. -`seed`: shall be an integer scalar expression of kind `INT32`. +`seed`: shall be an integer scalar expression of kind `int32`. It is an `intent(in)` argument. ##### Result -The result is a scalar integer of kind `INT32`. +The result is a scalar integer of kind `int32`. ##### Note -`NMHASH32X` is an implementation of the `nmhash32x` hash code of +`nmhash32x` is an implementation of the `nmhash32x` hash code of James Z. M. Gao. This code has good, but not great, performance on long keys, poorer performance on short keys. @@ -902,7 +902,7 @@ As a result it should give fair performance for typical hash table applications. This code passes the SMHasher tests, and has no known bad seeds. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. ##### Example @@ -920,7 +920,7 @@ program demo_nmhash32x end program demo_nmhash32x ``` -#### `ODD_RANDOM_INTEGER` - returns an odd integer +#### `odd_random_integer` - returns an odd integer ##### Status @@ -928,7 +928,7 @@ Experimental ##### Description -Returns a random 32 bit integer distributed uniformly over the odd values. +Returns a random 32-bit integer distributed uniformly over the odd values. ##### Syntax @@ -940,22 +940,22 @@ Subroutine ##### Argument -`harvest`: Shall be a scalar integer variable of kind `INT32`. It is +`harvest`: Shall be a scalar integer variable of kind `int32`. It is an `intent(out)` argument. ##### Note -`ODD_RANDOM_INTEGER` is intended to generate seeds for - `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. - +`odd_random_integer` is intended to generate seeds for + `universal_mult_hash`. `ODD_random_number` uses Fortran's intrinsic + `random_number` and the values returned can be changed by calling the + intrinsic `random_init`. + ##### Example -See `UNIVERSAL_MULT_HASH`. +See `universal_mult_hash`. -#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits +#### `universal_mult_hash` - maps an integer to a smaller number of bits ##### Status @@ -963,7 +963,7 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 32 bit integer. This is useful +Calculates an `nbits` hash code from a 32-bit integer. This is useful in mapping a hash value to a range 0 to `2**nbits-1`. ##### Syntax @@ -976,10 +976,10 @@ Elemental function ##### Arguments -`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`key`: Shall be a scalar integer expression of kind `int32`. It is an `intent(in)` argument. -`seed`: Shall be a scalar integer expression of kind `INT32`. It is an +`seed`: Shall be a scalar integer expression of kind `int32`. It is an `intent(in)` argument. It must have an odd value. `nbits` Shall be a scalar default integer expression with `0 < nbits < @@ -987,14 +987,14 @@ Elemental function ##### Result -The result is a scalar integer of kind `INT32` with at most the lowest +The result is a scalar integer of kind `int32` with at most the lowest `nbits` nonzero. ##### Note -`UNIVERSAL_MULT_HASH` is an implementation of the Universal +`universal_mult_hash` is an implementation of the Universal Multiplicative Hash of M. Dietzfelbinger, et al. -It multiplies the `KEY` by `SEED`, and returns the +It multiplies the `key` by `seed`, and returns the `nbits` upper bits of the product as the lowest bits of the result. ##### Example @@ -1020,7 +1020,7 @@ program demo_universal_mult_hash end program demo_odd_random_integer ``` -#### `WATER_HASH`- calculates a hash code from a key and a seed +#### `water_hash`- calculates a hash code from a key and a seed ##### Status @@ -1028,7 +1028,7 @@ Experimental ##### Description -Calculates a 32 bit hash code from a rank-1 integer array or a default +Calculates a 32-bit hash code from a rank-1 integer array or a default character string, and the input `seed`. ##### Syntax @@ -1042,20 +1042,20 @@ Pure/elemental function ##### Arguments `key`: Shall be a deferred length default character scalar expression -or a rank-1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. +or a rank-1 integer array expression of kind `int8`, `int16`, +`int32`, or `int64`. It is an `intent(in)` argument. -`seed`: shall be an integer scalar expression of kind `INT64`. +`seed`: shall be an integer scalar expression of kind `int64`. It is an `intent(in)` argument. ##### Result -The result is a scalar integer of kind `INT32`. +The result is a scalar integer of kind `int32`. ##### Note -`WATER_HASH` is an implementation of the `waterhash` hash code of +`water_hash` is an implementation of the `waterhash` hash code of Tommy Ettinger. This code has excellent performance on long keys, and good performance on short keys. @@ -1068,9 +1068,9 @@ output is poor, so far testing has not found any bad seeds for `waterhash`. It can have undefined behavior if the key is not word aligned, i.e. some computer processors can only process a given size integer if -the address of the integer is a multiple of the integer size. +the address of the integer is a multiple of the integer size. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. ##### Example @@ -1097,44 +1097,44 @@ applications, and are primarily useful for check sums and related applications. As checksums often have to deal with extremely large files or directories, it is often useful to use incremental hashing as well as -direct hashing, so 64 bit and higher hash algorithms often provide +direct hashing, so 64-bit and higher hash algorithms often provide multiple implementations. The current module, for simplicity of API, doesn't provide any incremental hashes. The `stdlib_hash_64bit` module defines several public -overloaded 64 bit hash procedures, `FNV_1`, `FNV-1A`, -`PENGY_HASH`, and `SPOOKY_HASH`, two scalar hash functions, -`FIBONACCI_HASH` and -`UNIVERSAL_MULT_HASH`, a seed generator, `ODD_RANDOM_INTEGER`, for the -`UNIVERSAL_MULT_HASH`, and two seed generators, `NEW_PENGY_HASH_SEED` -and `NEW_SPOOKY_HASH_SEED` for their respective hash functions. It -also defines the integer kind constant, `INT_HASH`, used to specify +overloaded 64-bit hash procedures, `FNV_1`, `FNV-1A`, +`pengy_hash`, and `spooky_hash`, two scalar hash functions, +`fibonacci_hash` and +`universal_mult_hash`, a seed generator, `odd_random_integer`, for the +`universal_mult_hash`, and two seed generators, `new_pengy_hash_seed` +and `new_spooky_hash_seed` for their respective hash functions. It +also defines the integer kind constant, `int_hash`, used to specify the kind of the hash function results, and a logical constant, -`LITTLE_ENDIAN`, used to deal with one aspect of the machine -dependence of the hash codes. +`little_endian`, used to deal with one aspect of the machine +dependence of the hash codes. Note that while SpookyHash can be used as a sixty-four bit hash algorithm, its algorithms actually returns two element integer arrays -of kind `INT64`, so it can also be used as a 128 bit hash. +of kind `int64`, so it can also be used as a 128 bit hash. -### The `INT_HASH` parameters +### The `int_hash` parameters It is necessary to define the kind of integer used to return the hash code. -As `stdlib_haash_64bit` deals exclusively with 64 bit hash codes, -`INT_HASH` is an alias for the integer kind `INT64`. +As `stdlib_haash_64bit` deals exclusively with 64-bit hash codes, +`int_hash` is an alias for the integer kind `int64`. -### The `LITTLE_ENDIAN` parameter +### The `little_endian` parameter In implementing hash functions it is sometimes necessary to know the "endianess" of the compiler's integers. To this end the `stdlib_hash_64bit` module defines the logical parameter -`LITTLE_ENDIAN` that if true indicates that the compiler has +`little_endian` that if true indicates that the compiler has little-endian integers, and that if false indicates that the integers are big-endian. ### Specifications of the `stdlib_hash_64bit` procedures -#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits +#### `fibonacci_hash` - maps an integer to a smaller number of bits ##### Status @@ -1142,7 +1142,7 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 64 bit integer. This is useful +Calculates an `nbits` hash code from a 64-bit integer. This is useful in mapping hash codes into small arrays. ##### Syntax @@ -1155,7 +1155,7 @@ Elemental function ##### Arguments -`key`: Shall be a scalar integer expression of kind `INT64`. It is an +`key`: Shall be a scalar integer expression of kind `int64`. It is an `intent(in)` argument. `nbits` Shall be a scalar default integer expression with `0 < nbits < @@ -1163,13 +1163,13 @@ Elemental function ##### Result -The result is an integer of kind `INT64` with at most the lowest +The result is an integer of kind `int64` with at most the lowest `nbits` nonzero, mapping to a range 0 to `nbits-1`. ##### Note -`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald -E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`fibonacci_hash` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `key` by the odd valued approximation to `2**64/phi`, where `phi` is the golden ratio 1.618..., and returns the `nbits` upper bits of the product as the lowest bits of the result. @@ -1199,7 +1199,7 @@ Experimental ##### Description -Calculates a 64 bit hash code from a rank-1 integer array or a default +Calculates a 64-bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -1213,13 +1213,13 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank-1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. +or a rank-1 integer array expression of kind `int8`, `int16`, +`int32`, or `int64`. It is an `intent(in)` argument. ##### Result -The result is a scalar integer of kind `INT64`. +The result is a scalar integer of kind `int64`. ##### Note @@ -1236,7 +1236,7 @@ This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. ##### Example @@ -1263,7 +1263,7 @@ Experimental ##### Description -Calculates a 64 bit hash code from a rank-1 integer array or a default +Calculates a 64-bit hash code from a rank-1 integer array or a default character string. ##### Syntax @@ -1277,13 +1277,13 @@ Pure/elemental function ##### Argument `key`: Shall be a deferred length default character scalar expression -or a rank-1 integer array expression of kind `INT8`, `INT16`, -`INT32`, or `INT64`. +or a rank-1 integer array expression of kind `int8`, `int16`, +`int32`, or `int64`. It is an `intent(in)` argument. ##### Result -The result is a scalar integer of kind `INT32`. +The result is a scalar integer of kind `int32`. ##### Note @@ -1300,7 +1300,7 @@ This code does not pass any of the SMHasher tests, but the resulting degradation in performance due to its larger number of collisions is expected to be minor compared to its faster hashing rate. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. ##### Example @@ -1318,7 +1318,7 @@ end program demo_fnv_1a_hash ``` -#### `NEW_PENGY_HASH_SEED`- returns a valid input seed for `PENGY_HASH` +#### `new_pengy_hash_seed`- returns a valid input seed for `pengy_hash` ##### Status @@ -1326,8 +1326,8 @@ Experimental ##### Description -Calculates a 32 bit "random" integer that is believed to be a valid -seed for `PENGY_HASH` and is also different from the input seed. +Calculates a 32-bit "random" integer that is believed to be a valid +seed for `pengy_hash` and is also different from the input seed. ##### Syntax @@ -1339,24 +1339,24 @@ Subroutine ##### Argument -`seed`: shall be a defined integer scalar variable of kind `INT32`. +`seed`: shall be a defined integer scalar variable of kind `int32`. It is an `intent(inout)` argument. On input `seed` should be defined, and on output it will be different from the input `seed`. ##### Note -Currently there are no known bad seeds for `PENGY_HASH`, but if any are +Currently there are no known bad seeds for `pengy_hash`, but if any are identified the procedure will be revised so that they cannot be returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. + `random_number` and the values returned can be changed by calling the + intrinsic `random_init`. ##### Example -See the example for `PENGY_HASH`. +See the example for `pengy_hash`. -#### `NEW_SPOOKY_HASH_SEED`- returns a valid input seed for `SPOOKY_HASH` +#### `new_spooky_hash_seed`- returns a valid input seed for `spooky_hash` ##### Status @@ -1364,9 +1364,9 @@ Experimental ##### Description -Calculates a 32 bit two element vector of "random" integer values that -is believed to be a valid seed for `SPOOKY_HASH` and is also different -from the input seed. +Calculates a 32-bit two element vector of "random" integer values that +is believed to be a valid seed for `spooky_hash` and is also different +from the input seed. ##### Syntax @@ -1379,23 +1379,23 @@ Subroutine ##### Argument `seed`: shall be a defined two element integer vector variable of kind -`INT32`. It is an `intent(inout)` argument. On input `seed` should be +`int32`. It is an `intent(inout)` argument. On input `seed` should be defined, and on output it will be different from the input `seed`. ##### Note -Currently there are no known bad seeds for `SPOOKY_HASH`, but if any are +Currently there are no known bad seeds for `spooky_hash`, but if any are identified the procedure will be revised so that they cannot be returned. This subroutine uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. + `random_number` and the values returned can be changed by calling the + intrinsic `random_init`. ##### Example -See the example for `SPOOKY_HASH`. +See the example for `spooky_hash`. -#### `ODD_RANDOM_INTEGER` - returns odd integer +#### `odd_random_integer` - returns odd integer ##### Status @@ -1403,7 +1403,7 @@ Experimental ##### Description -Returns a random 64 bit integer distributed uniformly over the odd values. +Returns a random 64-bit integer distributed uniformly over the odd values. ##### Syntax @@ -1415,22 +1415,22 @@ Subroutine ##### Argument -`harvest`: Shall be an integer of kind `INT64`. It is an `intent(out)` +`harvest`: Shall be an integer of kind `int64`. It is an `intent(out)` argument. ##### Note -`ODD_RANDOM_INTEGER` is intended to generate seeds for - `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic - `RANDOM_NUMBER` and the values returned can be changed by calling the - intrinsic `RANDOM_INIT`. +`odd_random_integer` is intended to generate seeds for + `universal_mult_hash`. `ODD_random_number` uses Fortran's intrinsic + `random_number` and the values returned can be changed by calling the + intrinsic `random_init`. ##### Example -See `UNIVERSAL_MULT_HASH`. +See `universal_mult_hash`. -#### `PENGY_HASH` - maps a character string or integer vector to an integer +#### `pengy_hash` - maps a character string or integer vector to an integer ##### Status @@ -1452,26 +1452,26 @@ Pure/elemental function ##### Arguments `key`: shall be a scalar expression of type default character or a -rank-1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or -`INTT64`. It is an `intent(in)` argument. +rank-1 integer vector expression of kind `INt8`, `int16`, `int32`, or +`int64`. It is an `intent(in)` argument. -`seed`: shall be an integer expression of kind `INT64`. It is +`seed`: shall be an integer expression of kind `int64`. It is an `intent(in)` argument. ##### Result -The result is an integer of kind `INT64`. +The result is an integer of kind `int64`. ##### Note -`PENGY_HASH` is an implementation of the 64 bit `pengyhash` of Alberto +`pengy_hash` is an implementation of the 64-bit `pengyhash` of Alberto Fajardo. The hash has acceptable performance on small keys, and good performance on long keys. It passes all the SMHasher tests, and has no known bad seeds. It is a *pure* function for integer arrays, and an *elemental* -function for character strings. +function for character strings. -##### Exampl +##### Example ```fortran program demo_pengy_hash @@ -1490,7 +1490,7 @@ end program demo_pengy_hash ``` -#### `SPOOKY_HASH` - maps a character string or integer vector to an integer +#### `spooky_hash` - maps a character string or integer vector to an integer ##### Status @@ -1498,7 +1498,7 @@ Experimental ##### Description -Maps a character string or integer vector to a 64 bit integer whose +Maps a character string or integer vector to a 64-bit integer whose value also depends on a two element vector, `seed`. ##### Syntax @@ -1512,24 +1512,24 @@ Function ##### Arguments `key`: shall be a scalar of type default character expression or a -rank-1 integer vector expression of kind `INT8`, `INT16`, `INT32`, or -`INT64`. It is an `intent(in)` argument. +rank-1 integer vector expression of kind `int8`, `int16`, `int32`, or +`int64`. It is an `intent(in)` argument. `seed`: shall be a two element integer vector expression of kind -`INT64`. It is an `intent(in)` argument. +`int64`. It is an `intent(in)` argument. ##### Result -The result is a two element integer vector of kind `INT64`. +The result is a two element integer vector of kind `int64`. ##### Note -`SPOOKY_HASH` is an implementation of the 64 bit version 2 of +`spooky_hash` is an implementation of the 64-bit version 2 of SpookyHash of Bob Jenkins. The code was designed for little-endian compilers. The output is different on big-endian compilers, but still -probably as good quality. It is often used as a 64 bit hash using the +probably as good quality. It is often used as a 64-bit hash using the first element of the returned value, but can be used as a 128 bit -hash. This version of `SPOOKY_HASH` has good performance on small keys +hash. This version of `spooky_hash` has good performance on small keys and excellent performance on long keys. It passes all the SMHasher tests and has no known bad seeds. @@ -1551,7 +1551,7 @@ program demo_spooky_hash end program demo_spooky_hash ``` -#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits +#### `universal_mult_hash` - maps an integer to a smaller number of bits ##### Status @@ -1559,7 +1559,7 @@ Experimental ##### Description -Calculates an `nbits` hash code from a 64 bit integer. This is useful +Calculates an `nbits` hash code from a 64-bit integer. This is useful in mapping a hash value to a range 0 to `2**nbits-1`. ##### Syntax @@ -1572,10 +1572,10 @@ Elemental function ##### Arguments -`key`: Shall be an integer of kind `INT64`. It is an `intent(in)` +`key`: Shall be an integer of kind `int64`. It is an `intent(in)` argument. -`seed`: Shall be an integer of kind `INT64`. It is an `intent(in)` +`seed`: Shall be an integer of kind `int64`. It is an `intent(in)` argument. It should be an odd value. `nbits` Shall be a default integer with `0 < nbits < 64`. It is an @@ -1583,15 +1583,15 @@ argument. It should be an odd value. ##### Result -The result is an integer of kind `INT64` with at most the lowest +The result is an integer of kind `int64` with at most the lowest `nbits` nonzero. ##### Note -`UNIVERSAL_MULT_HASH` is an implementation of the Universal +`universal_mult_hash` is an implementation of the Universal Multiplicative Hash of M. Dietzfelbinger, et al. -It multiplies the `KEY` by `SEED`, and returns the -`NBITS` upper bits of the product as the lowest bits of the result. +It multiplies the `key` by `seed`, and returns the +`nbits` upper bits of the product as the lowest bits of the result. ##### Example @@ -1630,7 +1630,7 @@ the hash functions of `stdlib_hash_32bit` and `stdlib_hash_64bit`, `test_32_bit_hash_performance` and `test_64_bit_hash_performance` respectively. These are primarily set up to test runtime performance of the functions. They take a sample of -`2**18` integers of kind `INT8` and break it up into vectors of size +`2**18` integers of kind `int8` and break it up into vectors of size 1, 2, 4, 8, 16, 64, 256, and 1024 elements, yielding `2**18`, `2**17`, `2**16`, `2**15`, `2**14`, `2**12`, `2**10`, and `2**8` vectors respectively. These are then processed by the hash functions @@ -1734,9 +1734,9 @@ Standard Library implements three executables to test the validity of the Fortran codes against the original C and C++ codes. The three executables must be compiled manually using the makefile `Makefile.validation`, and the compiler suite used must be -GCC's. The first executable, `generate_key_array` is +GCC's. The first executable, `generate_key_array` is based on Fortran code, and generates a random sequence of 2048 -integers of kind `INT8`, and stores that sequence in the binary file +integers of kind `int8`, and stores that sequence in the binary file `key_array.bin`. The second executable, `generate_hash_arrays`, reads the values in `key_array.bin`, and, for each complicated hash procedure generates a corresponding binary file containing 2049 hash From 95acdca3f28f4a9eff73371d8778fee33c41c4bb Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 19 Dec 2021 09:57:50 +0100 Subject: [PATCH 094/106] stdlib_hash_32bit --- src/stdlib_hash_32bit.fypp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/stdlib_hash_32bit.fypp b/src/stdlib_hash_32bit.fypp index 780293f2a..a01d1080d 100755 --- a/src/stdlib_hash_32bit.fypp +++ b/src/stdlib_hash_32bit.fypp @@ -227,8 +227,8 @@ contains elemental function fibonacci_hash( key, nbits ) result( sample ) !! Version: experimental !! -!! Maps the 32 bit integer KEY to an unsigned integer value with only NBITS -!! bits where NBITS is less than 32 +!! Maps the 32 bit integer `key` to an unsigned integer value with only `nbits` +!! bits where `nbits` is less than 32 !! ([Specification](../page/specs/stdlib_hash_procedures.html#fibonacci_hash-maps-an-integer-to-a-smaller-number-of-bits)) integer(int32), intent(in) :: key @@ -242,8 +242,8 @@ contains elemental function universal_mult_hash( key, seed, nbits ) result( sample ) !! Version: experimental !! -!! Uses the "random" odd 32 bit integer SEED to map the 32 bit integer KEY to -!! an unsigned integer value with only NBITS bits where NBITS is less than 32 +!! Uses the "random" odd 32 bit integer `seed` to map the 32 bit integer `key` to +!! an unsigned integer value with only `nbits` bits where `nbits` is less than 32 !! ([Specification](../page/specs/stdlib_hash_procedures.html#universal_mult_hash-maps-an-integer-to-a-smaller-number-of-bits)) integer(int32), intent(in) :: key integer(int32), intent(in) :: seed @@ -257,8 +257,8 @@ contains subroutine odd_random_integer( harvest ) !! Version: experimental !! -!! Returns a 32 bit pseudo random integer, HARVEST, distributed uniformly over -!! the odd integers of the INT32 kind. +!! Returns a 32 bit pseudo random integer, `harvest`, distributed uniformly over +!! the odd integers of the `int32` kind. !! ([Specification](../page/specs/stdlib_hash_procedures.html#odd_random_integer-returns-an-odd-integer)) integer(int32), intent(out) :: harvest real(dp) :: sample From 2f4d48e0c61c3d325f74c7f2b3d1a2383ed89f96 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 19 Dec 2021 09:58:02 +0100 Subject: [PATCH 095/106] stdlib_hash_64bit --- src/stdlib_hash_64bit.fypp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/stdlib_hash_64bit.fypp b/src/stdlib_hash_64bit.fypp index 35b6ce512..a3aba0c1f 100755 --- a/src/stdlib_hash_64bit.fypp +++ b/src/stdlib_hash_64bit.fypp @@ -208,7 +208,7 @@ interface module subroutine new_spooky_hash_seed( seed ) !! Version: experimental !! -!! Random SEED generator for +!! Random seed generator for SPOOKY_HASH integer(int64), intent(inout) :: seed(2) end subroutine new_spooky_hash_seed @@ -243,7 +243,7 @@ interface module subroutine new_pengy_hash_seed( seed ) !! Version: experimental !! -!! Random SEED generator for MIR_HASH_STRICT +!! Random seed generator for MIR_HASH_STRICT integer(int32), intent(inout) :: seed end subroutine new_pengy_hash_seed @@ -254,8 +254,8 @@ contains elemental function fibonacci_hash( key, nbits ) result( sample ) !! Version: experimental !! -!! Maps the 64 bit integer KEY to an unsigned integer value with only NBITS -!! bits where NBITS is less than 64 +!! Maps the 64 bit integer `key` to an unsigned integer value with only `nbits` +!! bits where `nbits` is less than 64 !! ([Specification](../page/specs/stdlib_hash_procedures.html#fibonacci_hash-maps-an-integer-to-a-smaller-number-of-bits_1)) integer(int64), intent(in) :: key @@ -269,8 +269,8 @@ contains elemental function universal_mult_hash( key, seed, nbits ) result( sample ) !! Version: experimental !! -!! Uses the "random" odd 64 bit integer SEED to map the 64 bit integer KEY to -!! an unsigned integer value with only NBITS bits where NBITS is less than 64. +!! Uses the "random" odd 64 bit integer `seed` to map the 64 bit integer `key` to +!! an unsigned integer value with only `nbits` bits where `nbits` is less than 64. !! ([Specification](../page/specs/stdlib_hash_procedures.html#universal_mult_hash-maps-an-integer-to-a-smaller-number-of-bits_1)) integer(int64), intent(in) :: key @@ -285,7 +285,7 @@ contains subroutine odd_random_integer( harvest ) !! Version: experimental !! -!! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over +!! Returns a 64 bit pseudo random integer, `harvest`, distributed uniformly over !! the odd integers of the 64 bit kind. !! ([Specification](../page/specs/stdlib_hash_procedures.html#odd_random_integer-returns-odd-integer)) @@ -303,7 +303,7 @@ contains subroutine random_integer( harvest ) !! Version: experimental !! -!! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over +!! Returns a 64 bit pseudo random integer, `harvest`, distributed uniformly over !! the values of the 64 bit kind. integer(int64), intent(out) :: harvest real(dp) :: sample(2) From 5ac55910f6447d141cd18344ff905794a36b31aa Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 19 Dec 2021 10:06:40 +0100 Subject: [PATCH 096/106] Update doc/specs/stdlib_hash_procedures.md --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index d59bd706a..5053374e3 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1452,7 +1452,7 @@ Pure/elemental function ##### Arguments `key`: shall be a scalar expression of type default character or a -rank-1 integer vector expression of kind `INt8`, `int16`, `int32`, or +rank-1 integer vector expression of kind `int8`, `int16`, `int32`, or `int64`. It is an `intent(in)` argument. `seed`: shall be an integer expression of kind `int64`. It is From 8bb43ba1188b5344e39bbed54c0078c660f2cb25 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 19 Dec 2021 10:18:41 +0100 Subject: [PATCH 097/106] update the description of testing --- doc/specs/stdlib_hash_procedures.md | 30 +++++++++++++---------------- src/tests/hash_functions/README.md | 2 +- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 9f881290b..22f0e13fc 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1624,7 +1624,7 @@ various hash functions. The other is a comparison of the outputs of the Fortran hash functions, with the outputs of the C and C++ hash procedures that are the inspiration for the Fortran hash functions. -In the `src/test/hash_functions` subdirectory, the Fortran Standard +In the `src/test/hash_functions_perf` subdirectory, the Fortran Standard Library provides two performance test codes for the hash functions of `stdlib_hash_32bit` and `stdlib_hash_64bit`, `test_32_bit_hash_performance` and @@ -1729,20 +1729,16 @@ severely impact the performance of `nmhash32`, `nmhash32x`, `water_hash`, `pengy_hash`, and `spooky_hash` relative to `fnv_1_hash` and `fnv_1a_hash`. -In the `src/test/hash_functions/validation` subdirectory, the Fortran -Standard Library implements three executables to test the validity of -the Fortran codes against the original C and C++ codes. The three -executables must be compiled manually using the makefile -`Makefile.validation`, and the compiler suite used must be -GCC's. The first executable, `generate_key_array` is -based on Fortran code, and generates a random sequence of 2048 -integers of kind `INT8`, and stores that sequence in the binary file -`key_array.bin`. The second executable, `generate_hash_arrays`, reads -the values in `key_array.bin`, and, for each complicated hash -procedure generates a corresponding binary file containing 2049 hash -values generated from the values in `key_array.bin`. The third -executable, `hash_validity_test`, reads the binary files and for each -complicated hash procedure compares the contents of the binary file +In the `src/test/hash_functions` subdirectory, the Fortran +Standard Library contains codes to test the validity of +the Fortran codes against the original C and C++ codes. It consists of one +executable `test_hash_functions` that +1) generates a random sequence of 2048 +integers of kind `int8`, and stores that sequence in the binary file +`key_array.bin`; +2) reads the values in `key_array.bin`, and, for each complicated C/C++-coded +hash procedure, generates a corresponding binary file containing 2049 hash +values generated from the values in `key_array.bin`., and +3) reads the binary files, and, for each complicated C/C++-coded hash procedure, compares the contents of the binary file with the results of calculating hash values using the corresponding -Fortran hash procedure on the same keys. These executables must be run -manually in the same order. +Fortran hash procedure on the same keys. diff --git a/src/tests/hash_functions/README.md b/src/tests/hash_functions/README.md index 5cf2550e3..57ca44177 100644 --- a/src/tests/hash_functions/README.md +++ b/src/tests/hash_functions/README.md @@ -1,4 +1,4 @@ -The validation directory contains code to validate the Fortran hash functions against the original C/C++ codes. It consists of one executable `test_hash_functions` that: +The hash_functions directory contains code to validate the Fortran hash functions against the original C/C++ codes. It consists of one executable `test_hash_functions` that: * creates a file containing 2048 random 8 bit integers using the subroutine `generate_key_array`. From 8ce29b25d69b86260fbb777b8dd7b3fd4cc91cbf Mon Sep 17 00:00:00 2001 From: "William B. Clodius" Date: Sun, 19 Dec 2021 08:09:36 -1000 Subject: [PATCH 098/106] Update doc/specs/stdlib_hash_procedures.md Co-authored-by: Jeremie Vandenplas --- doc/specs/stdlib_hash_procedures.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 9f881290b..25c6c6036 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -415,8 +415,7 @@ allows their quick loading and retainment in the instruction cache, giving a performance boost where the hashing is intermittent. (See the [SMHasher discussion](https://github.com/rurban/smhasher/README.md) -and S. Richter, V. Alvarez, and J. Dittrich, -["A Seven-Dimensional Analysis of Hashing Methods and its Implications on Query Processing"](https://bigdata.uni-saarland.de/publications/p249-richter.pdf). +and [S. Richter, V. Alvarez, and J. Dittrich. 2015. A Seven-Dimensional Analysis of Hashing Methods and its Implications on Query Processing, Proceedings of the VLDB Endowment, Vol. 9, No. 3.](https://bigdata.uni-saarland.de/publications/p249-richter.pdf) [https://doi.org/10.14778/2850583.2850585](https://doi.org/10.14778/2850583.2850585). The `stdlib_32_bit_nmhashes` submodule provides implementations of James Z.M. Gao's `nmhash32` and `nmhash32x` algorithms, From 3d29b922ccf96a491f115fe6c9af14008521c9da Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 19 Dec 2021 21:29:37 +0100 Subject: [PATCH 099/106] Update doc/specs/stdlib_hash_procedures.md --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 5053374e3..af8e6c95b 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -946,7 +946,7 @@ an `intent(out)` argument. ##### Note `odd_random_integer` is intended to generate seeds for - `universal_mult_hash`. `ODD_random_number` uses Fortran's intrinsic + `universal_mult_hash`. `odd_random_integer` uses Fortran's intrinsic `random_number` and the values returned can be changed by calling the intrinsic `random_init`. From ebcf239d97baaef474409bcfa52ad2f5210c46f4 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Sun, 19 Dec 2021 21:29:43 +0100 Subject: [PATCH 100/106] Update doc/specs/stdlib_hash_procedures.md --- doc/specs/stdlib_hash_procedures.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index af8e6c95b..07cbd0d3e 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -1421,7 +1421,7 @@ argument. ##### Note `odd_random_integer` is intended to generate seeds for - `universal_mult_hash`. `ODD_random_number` uses Fortran's intrinsic + `universal_mult_hash`. `odd_random_integer` uses Fortran's intrinsic `random_number` and the values returned can be changed by calling the intrinsic `random_init`. From a144d693178eba916ffb86740b303e4c002d69ea Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Tue, 21 Dec 2021 11:09:18 +0100 Subject: [PATCH 101/106] rename hash submodule files --- src/stdlib_hash_32bit.fypp | 0 ...ib_32_bit_fnv_hashes.fypp => stdlib_hash_32bit_fnv.fypp} | 4 ++-- ...tdlib_32_bit_nmhashes.fypp => stdlib_hash_32bit_nm.fypp} | 4 ++-- ...2_bit_water_hashes.fypp => stdlib_hash_32bit_water.fypp} | 6 +++--- src/stdlib_hash_64bit.fypp | 0 ...ib_64_bit_fnv_hashes.fypp => stdlib_hash_64bit_fnv.fypp} | 4 ++-- ...4_bit_pengy_hashes.fypp => stdlib_hash_64bit_pengy.fypp} | 4 ++-- ...spookyv2_hashes.fypp => stdlib_hash_64bit_spookyv2.fypp} | 4 ++-- 8 files changed, 13 insertions(+), 13 deletions(-) mode change 100755 => 100644 src/stdlib_hash_32bit.fypp rename src/{stdlib_32_bit_fnv_hashes.fypp => stdlib_hash_32bit_fnv.fypp} (98%) mode change 100755 => 100644 rename src/{stdlib_32_bit_nmhashes.fypp => stdlib_hash_32bit_nm.fypp} (99%) mode change 100755 => 100644 rename src/{stdlib_32_bit_water_hashes.fypp => stdlib_hash_32bit_water.fypp} (98%) mode change 100755 => 100644 mode change 100755 => 100644 src/stdlib_hash_64bit.fypp rename src/{stdlib_64_bit_fnv_hashes.fypp => stdlib_hash_64bit_fnv.fypp} (98%) mode change 100755 => 100644 rename src/{stdlib_64_bit_pengy_hashes.fypp => stdlib_hash_64bit_pengy.fypp} (98%) mode change 100755 => 100644 rename src/{stdlib_64_bit_spookyv2_hashes.fypp => stdlib_hash_64bit_spookyv2.fypp} (99%) mode change 100755 => 100644 diff --git a/src/stdlib_hash_32bit.fypp b/src/stdlib_hash_32bit.fypp old mode 100755 new mode 100644 diff --git a/src/stdlib_32_bit_fnv_hashes.fypp b/src/stdlib_hash_32bit_fnv.fypp old mode 100755 new mode 100644 similarity index 98% rename from src/stdlib_32_bit_fnv_hashes.fypp rename to src/stdlib_hash_32bit_fnv.fypp index db9ca4c21..3736f44fd --- a/src/stdlib_32_bit_fnv_hashes.fypp +++ b/src/stdlib_hash_32bit_fnv.fypp @@ -10,7 +10,7 @@ !#! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_hash_32bit) stdlib_32_bit_fnv_hashes +submodule(stdlib_hash_32bit) stdlib_hash_32bit_fnv !! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt !! Noll, and Kiem-Phong-Vo, !! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function @@ -123,4 +123,4 @@ contains end function character_fnv_1a -end submodule stdlib_32_bit_fnv_hashes +end submodule stdlib_hash_32bit_fnv diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_hash_32bit_nm.fypp old mode 100755 new mode 100644 similarity index 99% rename from src/stdlib_32_bit_nmhashes.fypp rename to src/stdlib_hash_32bit_nm.fypp index 8ebe16eb5..3d2c1bbb7 --- a/src/stdlib_32_bit_nmhashes.fypp +++ b/src/stdlib_hash_32bit_nm.fypp @@ -44,7 +44,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_hash_32bit) stdlib_32_bit_nmhashes +submodule(stdlib_hash_32bit) stdlib_hash_32bit_nm implicit none @@ -803,4 +803,4 @@ contains end subroutine new_nmhash32x_seed -end submodule stdlib_32_bit_nmhashes +end submodule stdlib_hash_32bit_nm diff --git a/src/stdlib_32_bit_water_hashes.fypp b/src/stdlib_hash_32bit_water.fypp old mode 100755 new mode 100644 similarity index 98% rename from src/stdlib_32_bit_water_hashes.fypp rename to src/stdlib_hash_32bit_water.fypp index bf8c4b907..ab91c4d02 --- a/src/stdlib_32_bit_water_hashes.fypp +++ b/src/stdlib_hash_32bit_water.fypp @@ -37,7 +37,7 @@ !! For more information, please refer to !! !! `WATER_HASH` is distributed as part of the `stdlib_32_bit_hash_functions.f90` -!! module and its `stdlib_32_bit_water_hashes.f90` submodule with the Fortran +!! module and its `stdlib_hash_32bit_water.f90` submodule with the Fortran !! Standard Library at URL: https://github.com/fortran-lang/stdlib. !! The Fortran Standard Library, including this code, is distributed under the !! MIT License as described in the `LICENSE` file distributed with the library. @@ -74,7 +74,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_hash_32bit) stdlib_32_bit_water_hashes +submodule(stdlib_hash_32bit) stdlib_hash_32bit_water implicit none contains @@ -280,4 +280,4 @@ contains end subroutine new_water_hash_seed -end submodule stdlib_32_bit_water_hashes +end submodule stdlib_hash_32bit_water diff --git a/src/stdlib_hash_64bit.fypp b/src/stdlib_hash_64bit.fypp old mode 100755 new mode 100644 diff --git a/src/stdlib_64_bit_fnv_hashes.fypp b/src/stdlib_hash_64bit_fnv.fypp old mode 100755 new mode 100644 similarity index 98% rename from src/stdlib_64_bit_fnv_hashes.fypp rename to src/stdlib_hash_64bit_fnv.fypp index 913999bce..5bb5bf20f --- a/src/stdlib_64_bit_fnv_hashes.fypp +++ b/src/stdlib_hash_64bit_fnv.fypp @@ -12,7 +12,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_hash_64bit) stdlib_64_bit_fnv_hashes +submodule(stdlib_hash_64bit) stdlib_hash_64bit_fnv ! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt ! Noll, and Kiem-Phong-Vo, ! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function @@ -122,4 +122,4 @@ contains end function character_fnv_1a -end submodule stdlib_64_bit_fnv_hashes +end submodule stdlib_hash_64bit_fnv diff --git a/src/stdlib_64_bit_pengy_hashes.fypp b/src/stdlib_hash_64bit_pengy.fypp old mode 100755 new mode 100644 similarity index 98% rename from src/stdlib_64_bit_pengy_hashes.fypp rename to src/stdlib_hash_64bit_pengy.fypp index 6974555a8..ec1e6b2e0 --- a/src/stdlib_64_bit_pengy_hashes.fypp +++ b/src/stdlib_hash_64bit_pengy.fypp @@ -40,7 +40,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_hash_64bit) stdlib_64_bit_pengy_hashes +submodule(stdlib_hash_64bit) stdlib_hash_64bit_pengy implicit none @@ -146,4 +146,4 @@ contains end subroutine new_pengy_hash_seed -end submodule stdlib_64_bit_pengy_hashes +end submodule stdlib_hash_64bit_pengy diff --git a/src/stdlib_64_bit_spookyv2_hashes.fypp b/src/stdlib_hash_64bit_spookyv2.fypp old mode 100755 new mode 100644 similarity index 99% rename from src/stdlib_64_bit_spookyv2_hashes.fypp rename to src/stdlib_hash_64bit_spookyv2.fypp index 805abbab2..fdd5f3df3 --- a/src/stdlib_64_bit_spookyv2_hashes.fypp +++ b/src/stdlib_hash_64bit_spookyv2.fypp @@ -13,7 +13,7 @@ #! Integer kinds to be considered during templating #:set INT_KINDS = ["int16", "int32", "int64"] -submodule(stdlib_hash_64bit) stdlib_64_bit_spookyv2_hashes +submodule(stdlib_hash_64bit) stdlib_hash_64bit_spookyv2 ! I have tried to make this portable while retaining efficiency. I assume ! processors with two's complement integers from 8, 16, 32, and 64 bits. @@ -712,4 +712,4 @@ contains end subroutine new_spooky_hash_seed -end submodule stdlib_64_bit_spookyv2_hashes +end submodule stdlib_hash_64bit_spookyv2 From bcc0cc2026621dfc7fd330587fb58f585c525199 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Tue, 21 Dec 2021 11:14:22 +0100 Subject: [PATCH 102/106] update CMakeLists.txt --- src/CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index de23dbaed..f23671745 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,18 +2,18 @@ # Create a list of the files to be preprocessed set(fppFiles - stdlib_32_bit_fnv_hashes.fypp - stdlib_32_bit_nmhashes.fypp - stdlib_32_bit_water_hashes.fypp - stdlib_64_bit_fnv_hashes.fypp - stdlib_64_bit_pengy_hashes.fypp - stdlib_64_bit_spookyv2_hashes.fypp stdlib_ascii.fypp stdlib_bitsets.fypp stdlib_bitsets_64.fypp stdlib_bitsets_large.fypp stdlib_hash_32bit.fypp + stdlib_hash_32bit_fnv.fypp + stdlib_hash_32bit_nm.fypp + stdlib_hash_32bit_water.fypp stdlib_hash_64bit.fypp + stdlib_hash_64bit_fnv.fypp + stdlib_hash_64bit_pengy.fypp + stdlib_hash_64bit_spookyv2.fypp stdlib_io.fypp stdlib_io_npy.fypp stdlib_io_npy_load.fypp From cf3b60b0b122265b58e76de91eab8174921cd56f Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Tue, 21 Dec 2021 11:54:59 +0100 Subject: [PATCH 103/106] update Makefile.manual --- src/Makefile.manual | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Makefile.manual b/src/Makefile.manual index 7b486f018..4fe22279d 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -1,10 +1,10 @@ SRCFYPP = \ - stdlib_32_bit_fnv_hashes.fypp \ - stdlib_32_bit_nmhashes.fypp \ - stdlib_32_bit_water_hashes.fypp \ - stdlib_64_bit_fnv_hashes.fypp \ - stdlib_64_bit_pengy_hashes.fypp \ - stdlib_64_bit_spookyv2_hashes.fypp \ + stdlib_hash_32bit_fnv.fypp \ + stdlib_hash_32bit_nm.fypp \ + stdlib_hash_32bit_water.fypp \ + stdlib_hash_64bit_fnv.fypp \ + stdlib_hash_64bit_pengy.fypp \ + stdlib_hash_64bit_spookyv2.fypp \ stdlib_ascii.fypp \ stdlib_bitsets_64.fypp \ stdlib_bitsets_large.fypp \ @@ -90,21 +90,21 @@ $(SRCGEN): %.f90: %.fypp common.fypp # Fortran module dependencies f18estop.o: stdlib_error.o -stdlib_32_bit_fnv_hashes.o: \ +stdlib_hash_32bit_fnv.o: \ stdlib_hash_32bit.o stdlib_hash_32bit.o: \ stdlib_kinds.o -stdlib_32_bit_nmhashes.o: \ +stdlib_hash_32bit_nm.o: \ stdlib_hash_32bit.o -stdlib_32_bit_water_hashes.o: \ +stdlib_hash_32bit_water.o: \ stdlib_hash_32bit.o -stdlib_64_bit_fnv_hashes.o: \ +stdlib_hash_64bit_fnv.o: \ stdlib_hash_64bit.o stdlib_hash_64bit.o: \ stdlib_kinds.o -stdlib_64_bit_pengy_hashes.o: \ +stdlib_hash_64bit_pengy.o: \ stdlib_hash_64bit.o -stdlib_64_bit_spookyv2_hashes.o: \ +stdlib_hash_64bit_spookyv2.o: \ stdlib_hash_64bit.o stdlib_ascii.o: stdlib_kinds.o stdlib_bitsets.o: stdlib_kinds.o \ From 1a1b005970139909f361fc8cb29bf9b5ffa5ec18 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Tue, 21 Dec 2021 13:03:22 +0100 Subject: [PATCH 104/106] change specs --- doc/specs/stdlib_hash_procedures.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/specs/stdlib_hash_procedures.md b/doc/specs/stdlib_hash_procedures.md index 4c73f4d92..d5b5cbc37 100755 --- a/doc/specs/stdlib_hash_procedures.md +++ b/doc/specs/stdlib_hash_procedures.md @@ -361,8 +361,8 @@ Landon Curt Noll, and Kiem-Phong Vo; the *nmhash32* and *nmhash32x* of James Z. M. Gao; and the *waterhash* of Tommy Ettinger. The detailed implementation of each algorithm is handled in a separate -submodule: `stdlib_32_bit_fnv_hashes`, -`stdlib_32_bit_nmhashes`, and `stdlib_32_bit_water_hashes`, +submodule: `stdlib_hash_32bit_fnv`, +`stdlib_hash_32bit_nm`, and `stdlib_hash_32bit_water`, respectively. The `nmhash32`, `nmhash32x`, and `waterhash` algorithms require seeds. The submodules provide separate seed generators for each algorithm. @@ -381,8 +381,8 @@ Landon Curt Noll, and Kiem-Phong Vo; the *pengyhash* of Alberto Fajardo; and the *SpookyHash* of Bob Jenkins. The detailed implementation of each algorithm is handled in a separate -submodule: `stdlib_64_bit_fnv_hashes`, -`stdlib_64_bit_pengy_hashes`, and `stdlib_64_bit_spooky_hashes`, +submodule: `stdlib_hash_64bit_fnv`, +`stdlib_hash_64bit_pengy`, and `stdlib_hash_64bit_spooky`, respectively. The `pengyhash`, and `Spooky Hash` algorithms require seeds. The submodules provide separate seed generators @@ -394,7 +394,7 @@ generating seeds for `universal_mult_hash`. All assume a two's complement sign bit, and no out of range checks. -The `stdlib_32_bit_fnv_hashes` and `stdlib_64_bits_fnv_hashes` +The `stdlib_hash_32bit_fnv` and `stdlib_hash_64bit_fnv` submodules each provide implementations of the FNV-1 and FNV-1A algorithms in the form of two separate overloaded functions: `FNV_1` and `FNV_1A`. @@ -417,7 +417,7 @@ giving a performance boost where the hashing is intermittent. [SMHasher discussion](https://github.com/rurban/smhasher/README.md) and [S. Richter, V. Alvarez, and J. Dittrich. 2015. A Seven-Dimensional Analysis of Hashing Methods and its Implications on Query Processing, Proceedings of the VLDB Endowment, Vol. 9, No. 3.](https://bigdata.uni-saarland.de/publications/p249-richter.pdf) [https://doi.org/10.14778/2850583.2850585](https://doi.org/10.14778/2850583.2850585). -The `stdlib_32_bit_nmhashes` submodule provides implementations +The `stdlib_hash_32bit_nm` submodule provides implementations of James Z.M. Gao's `nmhash32` and `nmhash32x` algorithms, version 0.2, in the form of the overloaded functions, `nmhash32` and `nmhash32x`. @@ -434,7 +434,7 @@ seeds, but slower on long seeds, but our limited testing so far shows `nmhash32x` to be significantly faster on short seeds and slightly faster on long seeds. -The `stdlib_32_bit_water_hashes` submodule provides implementations +The `stdlib_hash_32bit_water` submodule provides implementations of Tommy Ettinger's `waterhash` algorithm in the form of the overloaded function, `water_hash`. Water Hash has not been tested by Reini Urban, but Tommy Ettinger has tested it with Urban's SMHasher and presents @@ -443,14 +443,14 @@ testing hasn't found any bad seeds for the algorithm. To provide randomly generated seeds for the hash function the submodule also defines the subroutine `new_water_hash_seed`. -The `stdlib_64_bit_pengy_hashes` submodule provides implementations of +The `stdlib_hash_64bit_pengy` submodule provides implementations of Alberto Fajardo's `pengyhash` in the form of the overloaded function, `pengy_hash`. Reini Urban's testing shows that PengyHash passes all the tests and has no bad seeds. To provide randomly generated seeds for the hash function the submodule also defines the subroutine `new_pengy_hash_seed`. -The `stdlib_64_bit_spooky_hashes` submodule provides implementations +The `stdlib_hash_64bit_spooky` submodule provides implementations of Bob Jenkins' SpookyHash in the form of the overloaded function, `spooky_hash`. Future implementations may provide the SpookyHash incremental hashing procedures. From c82ec640175469424fce131912e174eb13624427 Mon Sep 17 00:00:00 2001 From: William Clodius Date: Tue, 21 Dec 2021 22:01:27 -1000 Subject: [PATCH 105/106] Updated CHANGELOG.md Updated CHANGELOG.md to reflect the new modules stdlib_hash_32bit and stdlib_hash_64bit. [ticket: X] --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5e4b9181..1f887e4fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ Features available from the latest git source +- new module `stdlib_hash_32bit` + [#573](https://github.com/fortran-lang/stdlib/pull/573) +- new module `stdlib_hash_64bit` + [#573](https://github.com/fortran-lang/stdlib/pull/573) - new module `stdlib_distribution_uniform` [#272](https://github.com/fortran-lang/stdlib/pull/272) - new module `stdlib_selection` From 28f2d6fd1427467a75eb4b0b31f058db88425605 Mon Sep 17 00:00:00 2001 From: Jeremie Vandenplas Date: Wed, 22 Dec 2021 10:33:01 +0100 Subject: [PATCH 106/106] Added public procedures in CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f887e4fb..46200833c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,16 @@ Features available from the latest git source - new module `stdlib_hash_32bit` [#573](https://github.com/fortran-lang/stdlib/pull/573) + - new procedures: `fibonacci_hash`, `fnv_1_hash`, + `fnv_1a_hash`, `new_nmhash32_seed`, `new_nmhash32x_seed`, + `new_water_hash_seed`, `nmhash32`, `nmhash32x`, `odd_random_integer`, + `universal_mult_hash`, and `water_hash` - new module `stdlib_hash_64bit` [#573](https://github.com/fortran-lang/stdlib/pull/573) + - new procedures: `fibonacci_hash`, `fnv_1_hash`, `fnv_1a_hash`, + `new_pengy_hash_seed`, `new_spooky_hash_seed`, + `odd_random_integer`, `pengy_hash`, `spooky_hash`, `spookyhash_128`, and + `universal_mult_hash` - new module `stdlib_distribution_uniform` [#272](https://github.com/fortran-lang/stdlib/pull/272) - new module `stdlib_selection`