From 8deba44f3d7c44751046db7d8a9e259320760d23 Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Tue, 15 Feb 2022 18:26:59 +0300 Subject: [PATCH 1/4] Add a way to create `FxHasher` with a given seed Add `FxHasher::with_seed`. --- src/lib.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 997be1e..8cd8dcf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,6 +67,13 @@ const K: usize = 0x9e3779b9; #[cfg(target_pointer_width = "64")] const K: usize = 0x517cc1b727220a95; +impl FxHasher { + /// Creates `fx` hasher with a given seed. + pub fn with_seed(seed: usize) -> FxHasher { + FxHasher { hash: seed } + } +} + impl Default for FxHasher { #[inline] fn default() -> FxHasher { From 9ff57de296cf571509eac2f7f06f81f16a71e146 Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Tue, 15 Feb 2022 19:47:46 +0300 Subject: [PATCH 2/4] Add a way to seed `FxHasher` with random seeds This commit adds an optional dependency on `rand` (behind a `rand`) feature that allows seeding `FxHasher` with random seeds. This is done via `FxRandomState` that implemented similar to `std::collections::hash_map::RandomState`. `FxHashMapRand` and `FxHashSetRand` are also introduced as type aliases to `HashMap` and `HashSet` with `S = FxRandomState`. --- Cargo.toml | 6 ++- src/lib.rs | 11 +++++- src/random_state.rs | 92 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 src/random_state.rs diff --git a/Cargo.toml b/Cargo.toml index cc1f1b1..2668040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,5 +9,9 @@ keywords = ["hash", "hasher", "fxhash", "rustc"] repository = "https://github.com/rust-lang-nursery/rustc-hash" [features] -std = [] default = ["std"] +std = [] +rand = ["dep:rand", "std"] + +[dependencies] +rand = { version = "0.8", optional = true } diff --git a/src/lib.rs b/src/lib.rs index 8cd8dcf..139ff73 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,12 @@ #[cfg(feature = "std")] extern crate std; +#[cfg(feature = "rand")] +extern crate rand; + +#[cfg(feature = "rand")] +mod random_state; + use core::convert::TryInto; use core::default::Default; #[cfg(feature = "std")] @@ -46,6 +52,9 @@ pub type FxHashMap = HashMap>; #[cfg(feature = "std")] pub type FxHashSet = HashSet>; +#[cfg(feature = "rand")] +pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState}; + /// A speedy hash algorithm for use within rustc. The hashmap in liballoc /// by default uses SipHash which isn't quite as speedy as we want. In the /// compiler we're not really worried about DOS attempts, so we use a fast @@ -161,7 +170,7 @@ mod tests { compile_error!("The test suite only supports 64 bit and 32 bit usize"); use crate::FxHasher; - use core::hash::{BuildHasher, BuildHasherDefault, Hash}; + use core::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; macro_rules! test_hash { ( diff --git a/src/random_state.rs b/src/random_state.rs new file mode 100644 index 0000000..b70ba4d --- /dev/null +++ b/src/random_state.rs @@ -0,0 +1,92 @@ +use std::collections::{HashMap, HashSet}; + +use crate::FxHasher; + +/// Type alias for a hashmap using the `fx` hash algorithm with [`FxRandomState`]. +pub type FxHashMapRand = HashMap; + +/// Type alias for a hashmap using the `fx` hash algorithm with [`FxRandomState`]. +pub type FxHashSetRand = HashSet; + +/// `FxRandomState` is an alternative state for `HashMap` types. +/// +/// A particular instance `FxRandomState` will create the same instances of +/// [`Hasher`], but the hashers created by two different `FxRandomState` +/// instances are unlikely to produce the same result for the same values. +pub struct FxRandomState { + seed: usize, +} + +impl FxRandomState { + /// Constructs a new `FxRandomState` that is initialized with random seed. + pub fn new() -> FxRandomState { + use rand::Rng; + use std::{cell::Cell, thread_local}; + + // This mirrors what `std::collections::hash_map::RandomState` does, as of 2024-01-14. + // + // Basically + // 1. Cache result of the rng in a thread local, so repeatedly + // creating maps is cheaper + // 2. Change the cached result on every creation, so maps created + // on the same thread don't have the same iteration order + thread_local!(static SEED: Cell = { + Cell::new(rand::thread_rng().gen()) + }); + + SEED.with(|seed| { + let s = seed.get(); + seed.set(s.wrapping_add(1)); + FxRandomState { seed: s } + }) + } +} + +impl core::hash::BuildHasher for FxRandomState { + type Hasher = FxHasher; + + fn build_hasher(&self) -> Self::Hasher { + FxHasher::with_seed(self.seed) + } +} + +impl Default for FxRandomState { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use std::thread; + + use crate::FxHashMapRand; + + #[test] + fn random_states_are_different() { + let a = FxHashMapRand::<&str, u32>::default(); + let b = FxHashMapRand::<&str, u32>::default(); + + // That's the whole point of them being random! + // + // N.B.: `FxRandomState` uses a thread-local set to a random value and then incremented, + // which means that this is *guaranteed* to pass :> + assert_ne!(a.hasher().seed, b.hasher().seed); + } + + #[test] + fn random_states_are_different_cross_thread() { + // This is similar to the test above, but uses two different threads, so they both get + // completely random, unrelated values. + // + // This means that this test is technically flaky, but the probability of it failing is + // `1 / 2.pow(bit_size_of::())`. Or 1/1.7e19 for 64 bit platforms or 1/4294967295 + // for 32 bit platforms. I suppose this is acceptable. + let a = FxHashMapRand::<&str, u32>::default(); + let b = thread::spawn(|| FxHashMapRand::<&str, u32>::default()) + .join() + .unwrap(); + + assert_ne!(a.hasher().seed, b.hasher().seed); + } +} From 3a5e0c400604f30a29ddbfe2df972d661efb8dbd Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Wed, 17 Jan 2024 21:11:00 +0100 Subject: [PATCH 3/4] Add `FxSeededState` & co --- src/lib.rs | 4 ++++ src/seeded_state.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 src/seeded_state.rs diff --git a/src/lib.rs b/src/lib.rs index 139ff73..14b965b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,8 @@ extern crate rand; #[cfg(feature = "rand")] mod random_state; +mod seeded_state; + use core::convert::TryInto; use core::default::Default; #[cfg(feature = "std")] @@ -55,6 +57,8 @@ pub type FxHashSet = HashSet>; #[cfg(feature = "rand")] pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState}; +pub use seeded_state::{FxHashMapSeed, FxHashSetSeed, FxSeededState}; + /// A speedy hash algorithm for use within rustc. The hashmap in liballoc /// by default uses SipHash which isn't quite as speedy as we want. In the /// compiler we're not really worried about DOS attempts, so we use a fast diff --git a/src/seeded_state.rs b/src/seeded_state.rs new file mode 100644 index 0000000..8b8c213 --- /dev/null +++ b/src/seeded_state.rs @@ -0,0 +1,56 @@ +use std::collections::{HashMap, HashSet}; + +use crate::FxHasher; + +/// Type alias for a hashmap using the `fx` hash algorithm with [`FxSeededState`]. +pub type FxHashMapSeed = HashMap; + +/// Type alias for a hashmap using the `fx` hash algorithm with [`FxSeededState`]. +pub type FxHashSetSeed = HashSet; + +/// [`FxSetState`] is an alternative state for `HashMap` types, allowing to use [`FxHasher`] with a set seed. +/// +/// ``` +/// # use std::collections::HashMap; +/// use rustc_hash::FxSeededState; +/// +/// let mut map = HashMap::with_hasher(FxSeededState::with_seed(12)); +/// map.insert(15, 610); +/// assert_eq!(map[&15], 610); +/// ``` +pub struct FxSeededState { + seed: usize, +} + +impl FxSeededState { + /// Constructs a new `FxSeededState` that is initialized with a `seed`. + pub fn with_seed(seed: usize) -> FxSeededState { + Self { seed } + } +} + +impl core::hash::BuildHasher for FxSeededState { + type Hasher = FxHasher; + + fn build_hasher(&self) -> Self::Hasher { + FxHasher::with_seed(self.seed) + } +} + +#[cfg(test)] +mod tests { + use core::hash::BuildHasher; + + use crate::{FxHashMapSeed, FxSeededState}; + + #[test] + fn different_states_are_different() { + let a = FxHashMapSeed::<&str, u32>::with_hasher(FxSeededState::with_seed(1)); + let b = FxHashMapSeed::<&str, u32>::with_hasher(FxSeededState::with_seed(2)); + + assert_ne!( + a.hasher().build_hasher().hash, + b.hasher().build_hasher().hash + ); + } +} From 71de84e313c01aac9d35c27a8486fa578af2c18d Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Wed, 17 Jan 2024 21:11:32 +0100 Subject: [PATCH 4/4] Add a test that different seeds cause different hashes --- src/lib.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 14b965b..58652dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -286,4 +286,29 @@ mod tests { hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2345708736 } else { 12390864548135261390 }, } } + + #[test] + fn with_seed_actually_different() { + let seeds = [ + [1, 2], + [42, 17], + [124436707, 99237], + [usize::MIN, usize::MAX], + ]; + + for [a_seed, b_seed] in seeds { + let a = || FxHasher::with_seed(a_seed); + let b = || FxHasher::with_seed(b_seed); + + for x in u8::MIN..=u8::MAX { + let mut a = a(); + let mut b = b(); + + x.hash(&mut a); + x.hash(&mut b); + + assert_ne!(a.finish(), b.finish()) + } + } + } }