From de82ff552ffea52516f7b14b04781d3368638c31 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Tue, 22 Mar 2016 18:56:51 +0100 Subject: [PATCH 01/24] Implement adaptive hashing using specialization --- src/adaptive_hashing.rs | 117 +++++++++++++++ src/adaptive_map.rs | 317 ++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 23 ++- 3 files changed, 449 insertions(+), 8 deletions(-) create mode 100644 src/adaptive_hashing.rs create mode 100644 src/adaptive_map.rs diff --git a/src/adaptive_hashing.rs b/src/adaptive_hashing.rs new file mode 100644 index 0000000..a1b88ca --- /dev/null +++ b/src/adaptive_hashing.rs @@ -0,0 +1,117 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::hash::{BuildHasher, SipHasher, Hasher}; + +use RandomState; + +#[derive(Clone)] +pub struct AdaptiveState { + inner: Option +} + +impl AdaptiveState { + #[inline] + pub fn new_fast() -> Self { + AdaptiveState { + inner: None + } + } + + #[inline] + pub fn switch_to_safe_hashing(&mut self) { + self.inner = Some(RandomState::new()); + } + + pub fn uses_safe_hashing(&self) -> bool { + self.inner.is_some() + } +} + +// For correct creation of HashMap. +impl Default for AdaptiveState { + fn default() -> Self { + let mut this = AdaptiveState::new_fast(); + this.switch_to_safe_hashing(); + this + } +} + +impl BuildHasher for AdaptiveState { + type Hasher = AdaptiveHasher; + #[inline] + fn build_hasher(&self) -> AdaptiveHasher { + AdaptiveHasher { + safe_hasher: self.inner.as_ref().map(|state| state.build_hasher()), + hash: 0, + } + } +} + +pub struct AdaptiveHasher { + safe_hasher: Option, + hash: u64, +} + +/// Load a full u64 word from a byte stream, in LE order. Use +/// `copy_nonoverlapping` to let the compiler generate the most efficient way +/// to load u64 from a possibly unaligned address. +/// +/// Unsafe because: unchecked indexing at 0..len +#[inline] +unsafe fn load_u64_le(buf: &[u8], len: usize) -> u64 { + use std::ptr; + debug_assert!(len <= buf.len()); + let mut data = 0u64; + ptr::copy_nonoverlapping(buf.as_ptr(), &mut data as *mut _ as *mut u8, len); + data.to_le() +} + +// Primes used in XXH64's finalizer. +const PRIME_2: u64 = 14029467366897019727; +const PRIME_3: u64 = 1609587929392839161; + +// Xxhash's finalizer. +fn mix(data: u64) -> u64 { + let mut hash = data; + hash ^= hash >> 33; + hash = hash.wrapping_mul(PRIME_2); + hash ^= hash >> 29; + hash = hash.wrapping_mul(PRIME_3); + hash ^= hash >> 32; + hash +} + +impl Hasher for AdaptiveHasher { + #[inline] + fn write(&mut self, msg: &[u8]) { + if let Some(ref mut hasher) = self.safe_hasher { + hasher.write(msg); + } else { + let msg_data = unsafe { + if msg.len() <= 8 { + load_u64_le(msg, msg.len()) + } else { + panic!() + } + }; + self.hash = mix(msg_data); + } + } + + #[inline] + fn finish(&self) -> u64 { + if let Some(ref hasher) = self.safe_hasher { + hasher.finish() + } else { + self.hash + } + } +} diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs new file mode 100644 index 0000000..d8fcfdf --- /dev/null +++ b/src/adaptive_map.rs @@ -0,0 +1,317 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::hash::{Hash, BuildHasher}; +use std::mem::replace; +use std::ops::{Deref, DerefMut}; + +use adaptive_hashing::AdaptiveState; +use table::{ + Bucket, + RawTable, + SafeHash +}; +use table::BucketState::{ + Empty, + Full, +}; +use HashMap; +use robin_hood; + +// Beyond this displacement, we switch to safe hashing or grow the table. +const DISPLACEMENT_THRESHOLD: usize = 128; +// When the map's load factor is below this threshold, we switch to safe hashing. +// Otherwise, we grow the table. +const LOAD_FACTOR_THRESHOLD: f32 = 0.625; + +// We have this trait, because specialization doesn't work for inherent impls yet. +pub trait SpecializedInsert { + // Method names are changed, because inherent methods shadow trait impl + // methods. + fn specialized_insert_or_replace_with<'a, F>( + &'a mut self, + hash: SafeHash, + k: K, + v: V, + mut found_existing: F + ) -> &'a mut V + where F: FnMut(&mut K, &mut V, K, V); +} + +impl SpecializedInsert for HashMap + where K: Eq + Hash, + S: BuildHasher +{ + #[inline] + default fn specialized_insert_or_replace_with<'a, F>( + &'a mut self, + hash: SafeHash, + k: K, + v: V, + mut found_existing: F + ) -> &'a mut V + where F: FnMut(&mut K, &mut V, K, V), + { + // Worst case, we'll find one empty bucket among `size + 1` buckets. + let size = self.table.size(); + let mut probe = Bucket::new(&mut self.table, hash); + let ib = probe.index(); + + loop { + let mut bucket = match probe.peek() { + Empty(bucket) => { + // Found a hole! + return bucket.put(hash, k, v).into_mut_refs().1; + } + Full(bucket) => bucket + }; + + // hash matches? + if bucket.hash() == hash { + // key matches? + if k == *bucket.read_mut().0 { + let (bucket_k, bucket_v) = bucket.into_mut_refs(); + debug_assert!(k == *bucket_k); + // Key already exists. Get its reference. + found_existing(bucket_k, bucket_v, k, v); + return bucket_v; + } + } + + let robin_ib = bucket.index() as isize - bucket.distance() as isize; + + if (ib as isize) < robin_ib { + // Found a luckier bucket than me. Better steal his spot. + return robin_hood(bucket, robin_ib as usize, hash, k, v); + } + + probe = bucket.next(); + assert!(probe.index() != ib + size + 1); + } + } +} + +macro_rules! specialize { + (K = $key_type:ty; $($type_var:ident),*) => ( + impl SpecializedInsert<$key_type, V> for HashMap<$key_type, V, AdaptiveState> { + #[inline] + fn specialized_insert_or_replace_with<'a, F>( + &'a mut self, + hash: SafeHash, + k: $key_type, + v: V, + mut found_existing: F + ) -> &'a mut V + where F: FnMut(&mut $key_type, &mut V, $key_type, V), + { + // Worst case, we'll find one empty bucket among `size + 1` buckets. + let size = self.table.size(); + let mut probe = Bucket::new(DerefMapToTable(self), hash); + let ib = probe.index(); + + for _ in 0 .. DISPLACEMENT_THRESHOLD { + let mut bucket = match probe.peek() { + Empty(bucket) => { + // Found a hole! + return bucket.put(hash, k, v).into_mut_refs().1; + } + Full(bucket) => bucket + }; + + // hash matches? + if bucket.hash() == hash { + // key matches? + if k == *bucket.read_mut().0 { + let (bucket_k, bucket_v) = bucket.into_mut_refs(); + debug_assert!(k == *bucket_k); + // Key already exists. Get its reference. + found_existing(bucket_k, bucket_v, k, v); + return bucket_v; + } + } + + let robin_ib = bucket.index() as isize - bucket.distance() as isize; + + if (ib as isize) < robin_ib { + // Found a luckier bucket than me. Better steal his spot. + return robin_hood(bucket, robin_ib as usize, hash, k, v); + } + + probe = bucket.next(); + assert!(probe.index() != ib + size + 1); + } + let this = probe.into_table().0; + // Probe sequence is too long. + // Adapt to safe hashing. + adapt_to_safe_hashing(this); + this.specialized_insert_or_replace_with(hash, k, v, found_existing) + } + } + + // For correct creation of HashMap. + impl Default for HashMap<$key_type, V, AdaptiveState> { + fn default() -> Self { + HashMap::with_hash_state(AdaptiveState::new_fast()) + } + } + ) +} + +#[cold] +fn adapt_to_safe_hashing(map: &mut HashMap) + where K: Eq + Hash +{ + let capacity = map.table.capacity(); + let load_factor = map.len() as f32 / capacity as f32; + if load_factor >= LOAD_FACTOR_THRESHOLD { + map.resize(capacity * 2); + } else { + map.hash_state.switch_to_safe_hashing(); + let old_table = replace(&mut map.table, RawTable::new(capacity)); + for (_, k, v) in old_table.into_iter() { + let hash = map.make_hash(&k); + map.insert_hashed_nocheck(hash, k, v); + } + } +} + +specialize! { K = u8; } +specialize! { K = i8; } +specialize! { K = u16; } +specialize! { K = i16; } +specialize! { K = u32; } +specialize! { K = i32; } +specialize! { K = u64; } +specialize! { K = i64; } +specialize! { K = *const T; T } +specialize! { K = *mut T; T } + +struct DerefMapToTable<'a, K: 'a, V: 'a, S: 'a>(&'a mut HashMap); + +impl<'a, K, V, S> Deref for DerefMapToTable<'a, K, V, S> { + type Target = RawTable; + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.0.table + } +} + +impl<'a, K, V, S> DerefMut for DerefMapToTable<'a, K, V, S> { + #[inline(always)] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0.table + } +} + +#[cfg(test)] +mod test_adaptive_map { + use HashMap; + use super::DISPLACEMENT_THRESHOLD; + + // These values all hash to N * 2^24 + 1523546 +/- 2. + static VALUES: &'static [u32] = &[ + 513314, 2977019, 3921903, 5005242, 6124431, 7696812, 16129307, 16296222, 17425488, + 17898424, 19926075, 24768203, 25614709, 29006382, 30234341, 32377109, 34394074, + 40324616, 40892565, 43025295, 43208269, 43761687, 43883113, 45274367, 47850630, + 48320162, 48458322, 48960668, 49470322, 50545229, 51305930, 51391781, 54465806, + 54541272, 55497339, 55788640, 57113511, 58250085, 58326435, 59316149, 62059483, + 64136437, 64978683, 65076823, 66571125, 66632487, 68067917, 69921206, 70107088, + 71829636, 76189936, 78639014, 80841986, 81844602, 83028134, 85818283, 86768196, + 90374529, 91119955, 91540016, 93761675, 94583431, 95027700, 95247246, 95564585, + 95663108, 95742804, 96147866, 97538112, 101129622, 101782620, 102170444, + 104790535, 104815436, 105802703, 106364729, 106520836, 106563112, 107893429, + 112185856, 113337504, 116895916, 122566166, 123359972, 123897385, 124028529, + 125100458, 127234401, 128292718, 129767575, 132088268, 133737047, 133796663, + 135903283, 136513103, 138868673, 139106372, 141282728, 141628856, 143250884, + 143784740, 149114217, 150882858, 151116713, 152221499, 154271016, 155574791, + 156179900, 157228942, 157518087, 159572211, 161327800, 161750984, 162237441, + 164793050, 165064176, 166764350, 166847618, 167111553, 168117915, 169230761, + 170322861, 170937855, 172389295, 173619266, 177610645, 178415544, 179549865, + 185538500, 185906457, 195946437, 196591640, 196952032, 197505405, 200021193, + 201058930, 201496104, 204691301, 206144773, 207320627, 211221882, 215434456, + ]; + + #[test] + fn test_dos_attack() { + let mut map = HashMap::new(); + let mut values = VALUES.iter(); + for &value in (&mut values).take(DISPLACEMENT_THRESHOLD - 1) { + map.insert(value, ()); + } + assert!(!map.hash_state.uses_safe_hashing()); + for &value in values.take(8) { + map.insert(value, ()); + } + assert!(map.hash_state.uses_safe_hashing()); + } + + #[test] + fn test_adaptive_lots_of_insertions() { + let mut m = HashMap::new(); + + // Try this a few times to make sure we never screw up the hashmap's + // internal state. + for _ in 0..10 { + assert!(m.is_empty()); + + for i in 1 ... 1000 { + assert!(m.insert(i, i).is_none()); + + for j in 1...i { + let r = m.get(&j); + assert_eq!(r, Some(&j)); + } + + for j in i+1...1000 { + let r = m.get(&j); + assert_eq!(r, None); + } + } + + for i in 1001...2000 { + assert!(!m.contains_key(&i)); + } + + // remove forwards + for i in 1...1000 { + assert!(m.remove(&i).is_some()); + + for j in 1...i { + assert!(!m.contains_key(&j)); + } + + for j in i+1...1000 { + assert!(m.contains_key(&j)); + } + } + + for i in 1...1000 { + assert!(!m.contains_key(&i)); + } + + for i in 1...1000 { + assert!(m.insert(i, i).is_none()); + } + + // remove backwards + for i in (1..1001).rev() { + assert!(m.remove(&i).is_some()); + + for j in i...1000 { + assert!(!m.contains_key(&j)); + } + + for j in 1...i-1 { + assert!(m.contains_key(&j)); + } + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index da10754..2b2cf66 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,9 @@ heap_api, oom, unique, - unsafe_no_drop_flag)] + unsafe_no_drop_flag, + specialization +)] #![cfg_attr(test, feature(inclusive_range_syntax))] @@ -24,6 +26,8 @@ extern crate rand; mod recover; mod table; +mod adaptive_hashing; +mod adaptive_map; use self::Entry::*; use self::VacantEntryState::*; @@ -35,10 +39,12 @@ use std::fmt::{self, Debug}; use std::hash::{BuildHasher, Hash, SipHasher}; use std::iter::{self, Iterator, ExactSizeIterator, IntoIterator, FromIterator, Extend, Map}; use std::mem::{self, replace}; -use std::ops::{Deref, FnMut, FnOnce, Index}; +use std::ops::{Deref, DerefMut, FnMut, FnOnce, Index}; use std::option::Option::{Some, None}; use rand::{Rng}; use recover::Recover; +use adaptive_hashing::AdaptiveState; +use adaptive_map::SpecializedInsert; use table::{ Bucket, @@ -347,7 +353,7 @@ fn test_resize_policy() { /// } /// ``` #[derive(Clone)] -pub struct HashMap { +pub struct HashMap { // All hashes are keyed on these values, to prevent hash collision attacks. hash_builder: S, @@ -543,7 +549,7 @@ impl HashMap } } -impl HashMap { +impl HashMap { /// Creates an empty HashMap. /// /// # Examples @@ -553,7 +559,7 @@ impl HashMap { /// let mut map: HashMap<&str, isize> = HashMap::new(); /// ``` #[inline] - pub fn new() -> HashMap { + pub fn new() -> HashMap { Default::default() } @@ -566,8 +572,9 @@ impl HashMap { /// let mut map: HashMap<&str, isize> = HashMap::with_capacity(10); /// ``` #[inline] - pub fn with_capacity(capacity: usize) -> HashMap { - HashMap::with_capacity_and_hasher(capacity, Default::default()) + pub fn with_capacity(capacity: usize) -> HashMap { + let map: Self = Default::default(); + HashMap::with_capacity_and_hasher(capacity, map.hash_builder) } } @@ -1286,7 +1293,7 @@ impl Default for HashMap where K: Eq + Hash, S: BuildHasher + Default, { - fn default() -> HashMap { + default fn default() -> HashMap { HashMap::with_hasher(Default::default()) } } From 667bab007d2f339521fb2e6c0b6350e6bf45ee3b Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Tue, 22 Mar 2016 18:56:58 +0100 Subject: [PATCH 02/24] Add benchmarks --- benches/bench_adaptive.rs | 133 ++++++++++++++++++++++++++++++++++ benches/bench_with_siphash.rs | 133 ++++++++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 benches/bench_adaptive.rs create mode 100644 benches/bench_with_siphash.rs diff --git a/benches/bench_adaptive.rs b/benches/bench_adaptive.rs new file mode 100644 index 0000000..63f1a6a --- /dev/null +++ b/benches/bench_adaptive.rs @@ -0,0 +1,133 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] +#![feature(test)] + +extern crate hashmap2; +extern crate test; + +use test::Bencher; + +use hashmap2::HashMap; + +#[bench] +fn new_drop(b : &mut Bencher) { + b.iter(|| { + let m : HashMap = HashMap::new(); + assert_eq!(m.len(), 0); + test::black_box(&m); + }) +} + +#[bench] +fn new_insert_drop(b : &mut Bencher) { + b.iter(|| { + let mut m = HashMap::new(); + m.insert(0, 0); + assert_eq!(m.len(), 1); + test::black_box(&m); + }) +} + +#[bench] +fn grow_by_insertion(b: &mut Bencher) { + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + let mut k = 1001; + + b.iter(|| { + m.insert(k, k); + k += 1; + }); + test::black_box(&m); +} + +#[bench] +fn find_existing(b: &mut Bencher) { + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + b.iter(|| { + for i in 1..1001 { + test::black_box(m.contains_key(&i)); + } + }); +} + +#[bench] +fn find_nonexisting(b: &mut Bencher) { + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + b.iter(|| { + for i in 1001..2001 { + test::black_box(m.contains_key(&i)); + } + }); +} + +#[bench] +fn hashmap_as_queue(b: &mut Bencher) { + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + let mut k = 1; + + b.iter(|| { + m.remove(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }); + test::black_box(&m); +} + +#[bench] +fn get_remove_insert(b: &mut Bencher) { + let mut m = HashMap::new(); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + let mut k = 1; + + b.iter(|| { + m.get(&(k + 400)); + m.get(&(k + 2000)); + m.remove(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }); + test::black_box(&m); +} diff --git a/benches/bench_with_siphash.rs b/benches/bench_with_siphash.rs new file mode 100644 index 0000000..a345073 --- /dev/null +++ b/benches/bench_with_siphash.rs @@ -0,0 +1,133 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] +#![feature(test)] + +extern crate hashmap2; +extern crate test; + +use test::Bencher; + +use hashmap2::{HashMap, RandomState}; + +#[bench] +fn new_drop(b : &mut Bencher) { + b.iter(|| { + let m : HashMap = HashMap::with_hash_state(RandomState::new()); + assert_eq!(m.len(), 0); + test::black_box(&m); + }) +} + +#[bench] +fn new_insert_drop(b : &mut Bencher) { + b.iter(|| { + let mut m = HashMap::with_hash_state(RandomState::new()); + m.insert(0, 0); + assert_eq!(m.len(), 1); + test::black_box(&m); + }) +} + +#[bench] +fn grow_by_insertion(b: &mut Bencher) { + let mut m = HashMap::with_hash_state(RandomState::new()); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + let mut k = 1001; + + b.iter(|| { + m.insert(k, k); + k += 1; + }); + test::black_box(&m); +} + +#[bench] +fn find_existing(b: &mut Bencher) { + let mut m = HashMap::with_hash_state(RandomState::new()); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + b.iter(|| { + for i in 1..1001 { + test::black_box(m.contains_key(&i)); + } + }); +} + +#[bench] +fn find_nonexisting(b: &mut Bencher) { + let mut m = HashMap::with_hash_state(RandomState::new()); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + b.iter(|| { + for i in 1001..2001 { + test::black_box(m.contains_key(&i)); + } + }); +} + +#[bench] +fn hashmap_as_queue(b: &mut Bencher) { + let mut m = HashMap::with_hash_state(RandomState::new()); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + let mut k = 1; + + b.iter(|| { + m.remove(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }); + test::black_box(&m); +} + +#[bench] +fn get_remove_insert(b: &mut Bencher) { + let mut m = HashMap::with_hash_state(RandomState::new()); + + for i in 1..1001 { + m.insert(i, i); + } + + test::black_box(&m); + + let mut k = 1; + + b.iter(|| { + m.get(&(k + 400)); + m.get(&(k + 2000)); + m.remove(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }); + test::black_box(&m); +} From ab8452faa7424972ca80c9f5730017415ba47c08 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Wed, 23 Mar 2016 19:02:01 +0100 Subject: [PATCH 03/24] f update adaptive map for nightly --- src/adaptive_map.rs | 188 +++++++++++++++++--------------------------- src/lib.rs | 9 ++- src/table.rs | 24 ++++++ 3 files changed, 103 insertions(+), 118 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index d8fcfdf..9f677f4 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -14,16 +14,14 @@ use std::ops::{Deref, DerefMut}; use adaptive_hashing::AdaptiveState; use table::{ - Bucket, RawTable, SafeHash }; -use table::BucketState::{ - Empty, - Full, -}; use HashMap; -use robin_hood; +use InternalEntry; +use VacantEntryState::NeqElem; +use VacantEntryState::NoElem; +use search_hashed; // Beyond this displacement, we switch to safe hashing or grow the table. const DISPLACEMENT_THRESHOLD: usize = 128; @@ -32,140 +30,95 @@ const DISPLACEMENT_THRESHOLD: usize = 128; const LOAD_FACTOR_THRESHOLD: f32 = 0.625; // We have this trait, because specialization doesn't work for inherent impls yet. -pub trait SpecializedInsert { +pub trait SafeguardedSearch { // Method names are changed, because inherent methods shadow trait impl // methods. - fn specialized_insert_or_replace_with<'a, F>( - &'a mut self, - hash: SafeHash, - k: K, - v: V, - mut found_existing: F - ) -> &'a mut V - where F: FnMut(&mut K, &mut V, K, V); + fn safeguarded_search(&mut self, key: &K, hash: SafeHash) + -> InternalEntry>; } -impl SpecializedInsert for HashMap +impl SafeguardedSearch for HashMap where K: Eq + Hash, S: BuildHasher { #[inline] - default fn specialized_insert_or_replace_with<'a, F>( - &'a mut self, - hash: SafeHash, - k: K, - v: V, - mut found_existing: F - ) -> &'a mut V - where F: FnMut(&mut K, &mut V, K, V), - { - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = self.table.size(); - let mut probe = Bucket::new(&mut self.table, hash); - let ib = probe.index(); - - loop { - let mut bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return bucket.put(hash, k, v).into_mut_refs().1; - } - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read_mut().0 { - let (bucket_k, bucket_v) = bucket.into_mut_refs(); - debug_assert!(k == *bucket_k); - // Key already exists. Get its reference. - found_existing(bucket_k, bucket_v, k, v); - return bucket_v; - } - } - - let robin_ib = bucket.index() as isize - bucket.distance() as isize; - - if (ib as isize) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return robin_hood(bucket, robin_ib as usize, hash, k, v); - } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); - } + default fn safeguarded_search(&mut self, key: &K, hash: SafeHash) + -> InternalEntry> { + search_hashed(&mut self.table, hash, |k| k == key) } } macro_rules! specialize { (K = $key_type:ty; $($type_var:ident),*) => ( - impl SpecializedInsert<$key_type, V> for HashMap<$key_type, V, AdaptiveState> { + impl SafeguardedSearch<$key_type, V> + for HashMap<$key_type, V, AdaptiveState> { #[inline] - fn specialized_insert_or_replace_with<'a, F>( - &'a mut self, - hash: SafeHash, - k: $key_type, - v: V, - mut found_existing: F - ) -> &'a mut V - where F: FnMut(&mut $key_type, &mut V, $key_type, V), - { - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = self.table.size(); - let mut probe = Bucket::new(DerefMapToTable(self), hash); - let ib = probe.index(); - - for _ in 0 .. DISPLACEMENT_THRESHOLD { - let mut bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return bucket.put(hash, k, v).into_mut_refs().1; - } - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read_mut().0 { - let (bucket_k, bucket_v) = bucket.into_mut_refs(); - debug_assert!(k == *bucket_k); - // Key already exists. Get its reference. - found_existing(bucket_k, bucket_v, k, v); - return bucket_v; - } + fn safeguarded_search(&mut self, key: &$key_type, hash: SafeHash) + -> InternalEntry<$key_type, V, &mut RawTable<$key_type, V>> { + let table_capacity = self.table.capacity(); + let entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); + match entry { + InternalEntry::Occupied { elem } => { + // This should compile down to a no-op. + InternalEntry::Occupied { elem: elem.convert_table() } } - - let robin_ib = bucket.index() as isize - bucket.distance() as isize; - - if (ib as isize) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return robin_hood(bucket, robin_ib as usize, hash, k, v); + InternalEntry::TableIsEmpty => { + InternalEntry::TableIsEmpty + } + InternalEntry::Vacant { elem, hash } => { + let index = match elem { + NeqElem(ref bucket, _) => bucket.index(), + NoElem(ref bucket) => bucket.index(), + }; + // Copied from FullBucket::displacement. + let displacement = + index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1); + if displacement > DISPLACEMENT_THRESHOLD { + let map = match elem { + NeqElem(bucket, _) => { + bucket.into_table() + } + NoElem(bucket) => { + bucket.into_table() + } + }; + // Probe sequence is too long. + // Adapt to safe hashing if desirable. + maybe_adapt_to_safe_hashing(map.0); + search_hashed(&mut map.0.table, hash, |k| k == key) + } else { + // This should compile down to a no-op. + match elem { + NeqElem(bucket, ib) => { + InternalEntry::Vacant { + elem: NeqElem(bucket.convert_table(), ib), + hash: hash, + } + } + NoElem(bucket) => { + InternalEntry::Vacant { + elem: NoElem(bucket.convert_table()), + hash: hash, + } + } + } + } } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); } - let this = probe.into_table().0; - // Probe sequence is too long. - // Adapt to safe hashing. - adapt_to_safe_hashing(this); - this.specialized_insert_or_replace_with(hash, k, v, found_existing) } } // For correct creation of HashMap. impl Default for HashMap<$key_type, V, AdaptiveState> { fn default() -> Self { - HashMap::with_hash_state(AdaptiveState::new_fast()) + HashMap::with_hasher(AdaptiveState::new_fast()) } } ) } #[cold] -fn adapt_to_safe_hashing(map: &mut HashMap) +fn maybe_adapt_to_safe_hashing(map: &mut HashMap) where K: Eq + Hash { let capacity = map.table.capacity(); @@ -173,7 +126,7 @@ fn adapt_to_safe_hashing(map: &mut HashMap) if load_factor >= LOAD_FACTOR_THRESHOLD { map.resize(capacity * 2); } else { - map.hash_state.switch_to_safe_hashing(); + map.hash_builder.switch_to_safe_hashing(); let old_table = replace(&mut map.table, RawTable::new(capacity)); for (_, k, v) in old_table.into_iter() { let hash = map.make_hash(&k); @@ -210,6 +163,13 @@ impl<'a, K, V, S> DerefMut for DerefMapToTable<'a, K, V, S> { } } +impl<'a, K, V, S> Into<&'a mut RawTable> for DerefMapToTable<'a, K, V, S> { + #[inline(always)] + fn into(self) -> &'a mut RawTable { + &mut self.0.table + } +} + #[cfg(test)] mod test_adaptive_map { use HashMap; @@ -245,11 +205,11 @@ mod test_adaptive_map { for &value in (&mut values).take(DISPLACEMENT_THRESHOLD - 1) { map.insert(value, ()); } - assert!(!map.hash_state.uses_safe_hashing()); + assert!(!map.hash_builder.uses_safe_hashing()); for &value in values.take(8) { map.insert(value, ()); } - assert!(map.hash_state.uses_safe_hashing()); + assert!(map.hash_builder.uses_safe_hashing()); } #[test] diff --git a/src/lib.rs b/src/lib.rs index 2b2cf66..8364366 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,12 +39,12 @@ use std::fmt::{self, Debug}; use std::hash::{BuildHasher, Hash, SipHasher}; use std::iter::{self, Iterator, ExactSizeIterator, IntoIterator, FromIterator, Extend, Map}; use std::mem::{self, replace}; -use std::ops::{Deref, DerefMut, FnMut, FnOnce, Index}; +use std::ops::{Deref, FnMut, FnOnce, Index}; use std::option::Option::{Some, None}; use rand::{Rng}; use recover::Recover; use adaptive_hashing::AdaptiveState; -use adaptive_map::SpecializedInsert; +use adaptive_map::SafeguardedSearch; use table::{ Bucket, @@ -815,7 +815,7 @@ impl HashMap /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { - let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k); + let entry = self.safeguarded_search(&k, hash).into_entry(k); match entry { Some(Occupied(mut elem)) => { Some(elem.insert(v)) @@ -948,7 +948,8 @@ impl HashMap pub fn entry(&mut self, key: K) -> Entry { // Gotta resize now. self.reserve(1); - self.search_mut(&key).into_entry(key).expect("unreachable") + let hash = self.make_hash(&key); + self.safeguarded_search(&key, hash).into_entry(key).expect("unreachable") } /// Gets the given key's corresponding entry in the map for in-place diff --git a/src/table.rs b/src/table.rs index b5671ff..4fd8ace 100644 --- a/src/table.rs +++ b/src/table.rs @@ -190,6 +190,14 @@ impl FullBucket { pub fn into_table(self) -> M { self.table } + // Convert the table. + pub fn convert_table(self) -> FullBucket where M: Into { + FullBucket { + raw: self.raw, + idx: self.idx, + table: self.table.into(), + } + } /// Get the raw index. pub fn index(&self) -> usize { self.idx @@ -201,6 +209,22 @@ impl EmptyBucket { pub fn table(&self) -> &M { &self.table } + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } + // Convert the table. + pub fn convert_table(self) -> EmptyBucket where M: Into { + EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table.into(), + } + } + /// Get the raw index. + pub fn index(&self) -> usize { + self.idx + } } impl Bucket { From aacbf3007eab585d5dfba7c89f0ebcf2017247ef Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Wed, 23 Mar 2016 19:16:52 +0100 Subject: [PATCH 04/24] update benches --- benches/bench_with_siphash.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/benches/bench_with_siphash.rs b/benches/bench_with_siphash.rs index a345073..ed9350a 100644 --- a/benches/bench_with_siphash.rs +++ b/benches/bench_with_siphash.rs @@ -21,7 +21,7 @@ use hashmap2::{HashMap, RandomState}; #[bench] fn new_drop(b : &mut Bencher) { b.iter(|| { - let m : HashMap = HashMap::with_hash_state(RandomState::new()); + let m : HashMap = HashMap::with_hasher(RandomState::new()); assert_eq!(m.len(), 0); test::black_box(&m); }) @@ -30,7 +30,7 @@ fn new_drop(b : &mut Bencher) { #[bench] fn new_insert_drop(b : &mut Bencher) { b.iter(|| { - let mut m = HashMap::with_hash_state(RandomState::new()); + let mut m = HashMap::with_hasher(RandomState::new()); m.insert(0, 0); assert_eq!(m.len(), 1); test::black_box(&m); @@ -39,7 +39,7 @@ fn new_insert_drop(b : &mut Bencher) { #[bench] fn grow_by_insertion(b: &mut Bencher) { - let mut m = HashMap::with_hash_state(RandomState::new()); + let mut m = HashMap::with_hasher(RandomState::new()); for i in 1..1001 { m.insert(i, i); @@ -58,7 +58,7 @@ fn grow_by_insertion(b: &mut Bencher) { #[bench] fn find_existing(b: &mut Bencher) { - let mut m = HashMap::with_hash_state(RandomState::new()); + let mut m = HashMap::with_hasher(RandomState::new()); for i in 1..1001 { m.insert(i, i); @@ -75,7 +75,7 @@ fn find_existing(b: &mut Bencher) { #[bench] fn find_nonexisting(b: &mut Bencher) { - let mut m = HashMap::with_hash_state(RandomState::new()); + let mut m = HashMap::with_hasher(RandomState::new()); for i in 1..1001 { m.insert(i, i); @@ -92,7 +92,7 @@ fn find_nonexisting(b: &mut Bencher) { #[bench] fn hashmap_as_queue(b: &mut Bencher) { - let mut m = HashMap::with_hash_state(RandomState::new()); + let mut m = HashMap::with_hasher(RandomState::new()); for i in 1..1001 { m.insert(i, i); @@ -112,7 +112,7 @@ fn hashmap_as_queue(b: &mut Bencher) { #[bench] fn get_remove_insert(b: &mut Bencher) { - let mut m = HashMap::with_hash_state(RandomState::new()); + let mut m = HashMap::with_hasher(RandomState::new()); for i in 1..1001 { m.insert(i, i); From db7f552c191508872059b06f7bd3921c20376372 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Wed, 23 Mar 2016 19:50:40 +0100 Subject: [PATCH 05/24] no private types in public interfaces; backwards compatibility --- src/adaptive_hashing.rs | 11 +- src/adaptive_map.rs | 17 +-- src/entry.rs | 198 ++++++++++++++++++++++++++++ src/internal_entry.rs | 41 ++++++ src/lib.rs | 286 ++++------------------------------------ src/sip_hash_state.rs | 48 +++++++ 6 files changed, 332 insertions(+), 269 deletions(-) create mode 100644 src/entry.rs create mode 100644 src/internal_entry.rs create mode 100644 src/sip_hash_state.rs diff --git a/src/adaptive_hashing.rs b/src/adaptive_hashing.rs index a1b88ca..3f5d473 100644 --- a/src/adaptive_hashing.rs +++ b/src/adaptive_hashing.rs @@ -10,14 +10,19 @@ use std::hash::{BuildHasher, SipHasher, Hasher}; -use RandomState; +use sip_hash_state::SipHashState; #[derive(Clone)] pub struct AdaptiveState { - inner: Option + inner: Option } impl AdaptiveState { + #[inline] + pub fn new() -> Self { + Default::default() + } + #[inline] pub fn new_fast() -> Self { AdaptiveState { @@ -27,7 +32,7 @@ impl AdaptiveState { #[inline] pub fn switch_to_safe_hashing(&mut self) { - self.inner = Some(RandomState::new()); + self.inner = Some(SipHashState::new()); } pub fn uses_safe_hashing(&self) -> bool { diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index 9f677f4..dc1d718 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -17,10 +17,10 @@ use table::{ RawTable, SafeHash }; +use internal_entry::InternalEntry; +use entry::VacantEntryState::NeqElem; +use entry::VacantEntryState::NoElem; use HashMap; -use InternalEntry; -use VacantEntryState::NeqElem; -use VacantEntryState::NoElem; use search_hashed; // Beyond this displacement, we switch to safe hashing or grow the table. @@ -29,12 +29,14 @@ const DISPLACEMENT_THRESHOLD: usize = 128; // Otherwise, we grow the table. const LOAD_FACTOR_THRESHOLD: f32 = 0.625; +// Avoid problems with private types in public interfaces. +pub type InternalEntryMut<'a, K: 'a, V: 'a> = InternalEntry>; + // We have this trait, because specialization doesn't work for inherent impls yet. pub trait SafeguardedSearch { // Method names are changed, because inherent methods shadow trait impl // methods. - fn safeguarded_search(&mut self, key: &K, hash: SafeHash) - -> InternalEntry>; + fn safeguarded_search(&mut self, key: &K, hash: SafeHash) -> InternalEntryMut; } impl SafeguardedSearch for HashMap @@ -42,8 +44,7 @@ impl SafeguardedSearch for HashMap S: BuildHasher { #[inline] - default fn safeguarded_search(&mut self, key: &K, hash: SafeHash) - -> InternalEntry> { + default fn safeguarded_search(&mut self, key: &K, hash: SafeHash) -> InternalEntryMut { search_hashed(&mut self.table, hash, |k| k == key) } } @@ -54,7 +55,7 @@ macro_rules! specialize { for HashMap<$key_type, V, AdaptiveState> { #[inline] fn safeguarded_search(&mut self, key: &$key_type, hash: SafeHash) - -> InternalEntry<$key_type, V, &mut RawTable<$key_type, V>> { + -> InternalEntryMut<$key_type, V> { let table_capacity = self.table.capacity(); let entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); match entry { diff --git a/src/entry.rs b/src/entry.rs new file mode 100644 index 0000000..25a5002 --- /dev/null +++ b/src/entry.rs @@ -0,0 +1,198 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::mem; + +use table::{EmptyBucket, FullBucket, SafeHash, RawTable}; +use internal_entry::InternalEntry; +use pop_internal; +use robin_hood; + +pub use self::Entry::*; +pub use self::VacantEntryState::*; + +/// A view into a single location in a map, which may be vacant or occupied. +pub enum Entry<'a, K: 'a, V: 'a> { + /// An occupied Entry. + Occupied(OccupiedEntry<'a, K, V>), + + /// A vacant Entry. + Vacant(VacantEntry<'a, K, V>), +} + +/// A view into a single occupied location in a HashMap. +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + key: Option, + elem: FullBucket>, +} + +/// A view into a single empty location in a HashMap. +pub struct VacantEntry<'a, K: 'a, V: 'a> { + hash: SafeHash, + key: K, + elem: VacantEntryState>, +} + +/// Possible states of a VacantEntry. +pub enum VacantEntryState { + /// The index is occupied, but the key to insert has precedence, + /// and will kick the current one out on insertion. + NeqElem(FullBucket, usize), + /// The index is genuinely vacant. + NoElem(EmptyBucket), +} + +impl<'a, K, V> Entry<'a, K, V> { + /// Returns the entry key + /// + /// # Examples + /// + /// ``` + /// use hashmap2::HashMap; + /// + /// let mut map = HashMap::::new(); + /// + /// assert_eq!("hello", map.entry("hello".to_string()).key()); + /// ``` + pub fn key(&self) -> &K { + match *self { + Occupied(ref entry) => entry.key(), + Vacant(ref entry) => entry.key(), + } + } + + /// Ensures a value is in the entry by inserting the default if empty, and returns + /// a mutable reference to the value in the entry. + pub fn or_insert(self, default: V) -> &'a mut V { + match self { + Occupied(entry) => entry.into_mut(), + Vacant(entry) => entry.insert(default), + } + } + + /// Ensures a value is in the entry by inserting the result of the default function if empty, + /// and returns a mutable reference to the value in the entry. + pub fn or_insert_with V>(self, default: F) -> &'a mut V { + match self { + Occupied(entry) => entry.into_mut(), + Vacant(entry) => entry.insert(default()), + } + } +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry. + pub fn get(&self) -> &V { + self.elem.read().1 + } + + /// Gets a mutable reference to the value in the entry. + pub fn get_mut(&mut self) -> &mut V { + self.elem.read_mut().1 + } + + /// Converts the OccupiedEntry into a mutable reference to the value in the entry + /// with a lifetime bound to the map itself + pub fn into_mut(self) -> &'a mut V { + self.elem.into_mut_refs().1 + } + + /// Sets the value of the entry, and returns the entry's old value + pub fn insert(&mut self, mut value: V) -> V { + let old_value = self.get_mut(); + mem::swap(&mut value, old_value); + value + } + + /// Takes the value out of the entry, and returns it + pub fn remove(self) -> V { + pop_internal(self.elem).1 + } + + /// Gets a reference to the entry key + /// + /// # Examples + /// + /// ``` + /// use hashmap2::HashMap; + /// + /// let mut map = HashMap::new(); + /// + /// map.insert("foo".to_string(), 1); + /// assert_eq!("foo", map.entry("foo".to_string()).key()); + /// ``` + pub fn key(&self) -> &K { + self.elem.read().0 + } + + /// Returns a key that was used for search. + /// + /// The key was retained for further use. + pub fn take_key(&mut self) -> Option { + self.key.take() + } +} + +impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { + /// Sets the value of the entry with the VacantEntry's key, + /// and returns a mutable reference to it + pub fn insert(self, value: V) -> &'a mut V { + match self.elem { + NeqElem(bucket, ib) => { + robin_hood(bucket, ib, self.hash, self.key, value) + } + NoElem(bucket) => { + bucket.put(self.hash, self.key, value).into_mut_refs().1 + } + } + } + + /// Gets a reference to the entry key + /// + /// # Examples + /// + /// ``` + /// use hashmap2::HashMap; + /// + /// let mut map = HashMap::::new(); + /// + /// assert_eq!("foo", map.entry("foo".to_string()).key()); + /// ``` + pub fn key(&self) -> &K { + &self.key + } +} + +#[inline] +pub fn from_internal(internal: InternalEntry>, key: Option) + -> Option> { + match internal { + InternalEntry::Occupied { elem } => { + Some(Entry::Occupied(OccupiedEntry { + key: key, + elem: elem + })) + } + InternalEntry::Vacant { hash, elem } => { + Some(Entry::Vacant(VacantEntry { + hash: hash, + key: key.unwrap(), + elem: elem, + })) + } + InternalEntry::TableIsEmpty => None + } +} + +#[inline] +pub fn occupied_elem<'a, 'r, K, V>(occupied: &'r mut OccupiedEntry<'a, K, V>) + -> &'r mut FullBucket> { + &mut occupied.elem +} diff --git a/src/internal_entry.rs b/src/internal_entry.rs new file mode 100644 index 0000000..7cf6c4c --- /dev/null +++ b/src/internal_entry.rs @@ -0,0 +1,41 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use table::{FullBucket, SafeHash, RawTable}; +use entry::{self, VacantEntryState}; +use Entry; + +pub enum InternalEntry { + Occupied { + elem: FullBucket, + }, + Vacant { + hash: SafeHash, + elem: VacantEntryState, + }, + TableIsEmpty, +} + +impl InternalEntry { + #[inline] + pub fn into_occupied_bucket(self) -> Option> { + match self { + InternalEntry::Occupied { elem } => Some(elem), + _ => None, + } + } +} + +impl<'a, K, V> InternalEntry> { + #[inline] + pub fn into_entry(self, key: K) -> Option> { + entry::from_internal(self, Some(key)) + } +} diff --git a/src/lib.rs b/src/lib.rs index 8364366..4e08179 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,32 +24,30 @@ extern crate alloc; extern crate rand; -mod recover; mod table; mod adaptive_hashing; mod adaptive_map; - -use self::Entry::*; -use self::VacantEntryState::*; +mod entry; +mod internal_entry; +mod recover; +mod sip_hash_state; use std::borrow::{Borrow, Cow}; use std::cmp::{max, Eq, PartialEq}; use std::default::Default; use std::fmt::{self, Debug}; -use std::hash::{BuildHasher, Hash, SipHasher}; +use std::hash::{BuildHasher, Hash}; use std::iter::{self, Iterator, ExactSizeIterator, IntoIterator, FromIterator, Extend, Map}; use std::mem::{self, replace}; -use std::ops::{Deref, FnMut, FnOnce, Index}; +use std::ops::{Deref, FnMut, Index}; use std::option::Option::{Some, None}; -use rand::{Rng}; -use recover::Recover; -use adaptive_hashing::AdaptiveState; use adaptive_map::SafeguardedSearch; +use entry::{NoElem, NeqElem, Occupied, Vacant}; +use internal_entry::InternalEntry; +use recover::Recover; use table::{ Bucket, - EmptyBucket, - FullBucket, FullBucketMut, RawTable, SafeHash @@ -59,6 +57,9 @@ use table::BucketState::{ Full, }; +pub use adaptive_hashing::AdaptiveState as RandomState; +pub use entry::{Entry, OccupiedEntry, VacantEntry}; + const INITIAL_LOG2_CAP: usize = 5; const INITIAL_CAPACITY: usize = 1 << INITIAL_LOG2_CAP; // 2^5 @@ -353,12 +354,12 @@ fn test_resize_policy() { /// } /// ``` #[derive(Clone)] -pub struct HashMap { +pub struct HashMap { + table: RawTable, + // All hashes are keyed on these values, to prevent hash collision attacks. hash_builder: S, - table: RawTable, - resize_policy: DefaultResizePolicy, } @@ -549,7 +550,7 @@ impl HashMap } } -impl HashMap { +impl HashMap { /// Creates an empty HashMap. /// /// # Examples @@ -559,7 +560,7 @@ impl HashMap { /// let mut map: HashMap<&str, isize> = HashMap::new(); /// ``` #[inline] - pub fn new() -> HashMap { + pub fn new() -> HashMap { Default::default() } @@ -572,7 +573,7 @@ impl HashMap { /// let mut map: HashMap<&str, isize> = HashMap::with_capacity(10); /// ``` #[inline] - pub fn with_capacity(capacity: usize) -> HashMap { + pub fn with_capacity(capacity: usize) -> HashMap { let map: Self = Default::default(); HashMap::with_capacity_and_hasher(capacity, map.hash_builder) } @@ -1228,11 +1229,11 @@ fn search_entry_hashed2<'a, K: Eq, V, Q: ?Sized>(table: &'a mut RawTable, h let bucket = match probe.peek() { Empty(bucket) => { // Found a hole! - return Vacant(VacantEntry { + let internal = InternalEntry::Vacant { hash: hash, - key: k.into_owned(), elem: NoElem(bucket), - }); + }; + return entry::from_internal(internal, Some(k.into_owned())).unwrap(); }, Full(bucket) => bucket }; @@ -1243,10 +1244,10 @@ fn search_entry_hashed2<'a, K: Eq, V, Q: ?Sized>(table: &'a mut RawTable, h // key matches? if *b == *bucket.read().0.borrow() { - return Occupied(OccupiedEntry { - key: None, + let internal = InternalEntry::Occupied { elem: bucket, - }); + }; + return entry::from_internal(internal, None).unwrap(); } } @@ -1254,11 +1255,11 @@ fn search_entry_hashed2<'a, K: Eq, V, Q: ?Sized>(table: &'a mut RawTable, h if ib < robin_ib { // Found a luckier bucket than me. Better steal his spot. - return Vacant(VacantEntry { + let internal = InternalEntry::Vacant { hash: hash, - key: k.into_owned(), elem: NeqElem(bucket, robin_ib as usize), - }); + }; + return entry::from_internal(internal, Some(k.into_owned())).unwrap(); } probe = bucket.next(); @@ -1369,80 +1370,6 @@ pub struct Drain<'a, K: 'a, V: 'a> { inner: iter::Map, fn((SafeHash, K, V)) -> (K, V)> } -enum InternalEntry { - Occupied { - elem: FullBucket, - }, - Vacant { - hash: SafeHash, - elem: VacantEntryState, - }, - TableIsEmpty, -} - -impl InternalEntry { - #[inline] - fn into_occupied_bucket(self) -> Option> { - match self { - InternalEntry::Occupied { elem } => Some(elem), - _ => None, - } - } -} - -impl<'a, K, V> InternalEntry> { - #[inline] - fn into_entry(self, key: K) -> Option> { - match self { - InternalEntry::Occupied { elem } => { - Some(Occupied(OccupiedEntry { - key: Some(key), - elem: elem - })) - } - InternalEntry::Vacant { hash, elem } => { - Some(Vacant(VacantEntry { - hash: hash, - key: key, - elem: elem, - })) - } - InternalEntry::TableIsEmpty => None - } - } -} - -/// A view into a single location in a map, which may be vacant or occupied. -pub enum Entry<'a, K: 'a, V: 'a> { - /// An occupied Entry. - Occupied(OccupiedEntry<'a, K, V>), - - /// A vacant Entry. - Vacant(VacantEntry<'a, K, V>), -} - -/// A view into a single occupied location in a HashMap. -pub struct OccupiedEntry<'a, K: 'a, V: 'a> { - key: Option, - elem: FullBucket>, -} - -/// A view into a single empty location in a HashMap. -pub struct VacantEntry<'a, K: 'a, V: 'a> { - hash: SafeHash, - key: K, - elem: VacantEntryState>, -} - -/// Possible states of a VacantEntry. -enum VacantEntryState { - /// The index is occupied, but the key to insert has precedence, - /// and will kick the current one out on insertion. - NeqElem(FullBucket, usize), - /// The index is genuinely vacant. - NoElem(EmptyBucket), -} - impl<'a, K, V, S> IntoIterator for &'a HashMap where K: Eq + Hash, S: BuildHasher { @@ -1558,127 +1485,6 @@ impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { #[inline] fn len(&self) -> usize { self.inner.len() } } -impl<'a, K, V> Entry<'a, K, V> { - /// Returns the entry key - /// - /// # Examples - /// - /// ``` - /// use hashmap2::HashMap; - /// - /// let mut map = HashMap::::new(); - /// - /// assert_eq!("hello", map.entry("hello".to_string()).key()); - /// ``` - pub fn key(&self) -> &K { - match *self { - Occupied(ref entry) => entry.key(), - Vacant(ref entry) => entry.key(), - } - } - - /// Ensures a value is in the entry by inserting the default if empty, and returns - /// a mutable reference to the value in the entry. - pub fn or_insert(self, default: V) -> &'a mut V { - match self { - Occupied(entry) => entry.into_mut(), - Vacant(entry) => entry.insert(default), - } - } - - /// Ensures a value is in the entry by inserting the result of the default function if empty, - /// and returns a mutable reference to the value in the entry. - pub fn or_insert_with V>(self, default: F) -> &'a mut V { - match self { - Occupied(entry) => entry.into_mut(), - Vacant(entry) => entry.insert(default()), - } - } -} - -impl<'a, K, V> OccupiedEntry<'a, K, V> { - /// Gets a reference to the value in the entry. - pub fn get(&self) -> &V { - self.elem.read().1 - } - - /// Gets a mutable reference to the value in the entry. - pub fn get_mut(&mut self) -> &mut V { - self.elem.read_mut().1 - } - - /// Converts the OccupiedEntry into a mutable reference to the value in the entry - /// with a lifetime bound to the map itself - pub fn into_mut(self) -> &'a mut V { - self.elem.into_mut_refs().1 - } - - /// Sets the value of the entry, and returns the entry's old value - pub fn insert(&mut self, mut value: V) -> V { - let old_value = self.get_mut(); - mem::swap(&mut value, old_value); - value - } - - /// Takes the value out of the entry, and returns it - pub fn remove(self) -> V { - pop_internal(self.elem).1 - } - - /// Gets a reference to the entry key - /// - /// # Examples - /// - /// ``` - /// use hashmap2::HashMap; - /// - /// let mut map = HashMap::new(); - /// - /// map.insert("foo".to_string(), 1); - /// assert_eq!("foo", map.entry("foo".to_string()).key()); - /// ``` - pub fn key(&self) -> &K { - self.elem.read().0 - } - - /// Returns a key that was used for search. - /// - /// The key was retained for further use. - fn take_key(&mut self) -> Option { - self.key.take() - } -} - -impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { - /// Sets the value of the entry with the VacantEntry's key, - /// and returns a mutable reference to it - pub fn insert(self, value: V) -> &'a mut V { - match self.elem { - NeqElem(bucket, ib) => { - robin_hood(bucket, ib, self.hash, self.key, value) - } - NoElem(bucket) => { - bucket.put(self.hash, self.key, value).into_mut_refs().1 - } - } - } - - /// Gets a reference to the entry key - /// - /// # Examples - /// - /// ``` - /// use hashmap2::HashMap; - /// - /// let mut map = HashMap::::new(); - /// - /// assert_eq!("foo", map.entry("foo".to_string()).key()); - /// ``` - pub fn key(&self) -> &K { - &self.key - } -} - impl FromIterator<(K, V)> for HashMap where K: Eq + Hash, S: BuildHasher + Default { @@ -1709,42 +1515,6 @@ impl<'a, K, V, S> Extend<(&'a K, &'a V)> for HashMap } } -/// `RandomState` is the default state for `HashMap` types. -/// -/// A particular instance `RandomState` will create the same instances of -/// `Hasher`, but the hashers created by two different `RandomState` -/// instances are unlikely to produce the same result for the same values. -#[derive(Clone)] -pub struct RandomState { - k0: u64, - k1: u64, -} - -impl RandomState { - /// Constructs a new `RandomState` that is initialized with random keys. - #[inline] - #[allow(deprecated)] // rand - pub fn new() -> RandomState { - let mut r = rand::thread_rng(); - RandomState { k0: r.gen(), k1: r.gen() } - } -} - -impl BuildHasher for RandomState { - type Hasher = SipHasher; - #[inline] - fn build_hasher(&self) -> SipHasher { - SipHasher::new_with_keys(self.k0, self.k1) - } -} - -impl Default for RandomState { - #[inline] - fn default() -> RandomState { - RandomState::new() - } -} - impl Recover for HashMap where K: Eq + Hash + Borrow, S: BuildHasher, Q: Eq + Hash { @@ -1768,7 +1538,7 @@ impl Recover for HashMap match self.entry(key) { Occupied(mut occupied) => { let key = occupied.take_key().unwrap(); - Some(mem::replace(occupied.elem.read_mut().0, key)) + Some(mem::replace(entry::occupied_elem(&mut occupied).read_mut().0, key)) } Vacant(vacant) => { vacant.insert(()); diff --git a/src/sip_hash_state.rs b/src/sip_hash_state.rs new file mode 100644 index 0000000..4761b63 --- /dev/null +++ b/src/sip_hash_state.rs @@ -0,0 +1,48 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::hash::{BuildHasher, SipHasher}; +use rand::{self, Rng}; + +/// `SipHashState` is a random state for `HashMap` types. +/// +/// A particular instance `SipHashState` will create the same instances of +/// `Hasher`, but the hashers created by two different `SipHashState` +/// instances are unlikely to produce the same result for the same values. +#[derive(Clone)] +pub struct SipHashState { + k0: u64, + k1: u64, +} + +impl SipHashState { + /// Constructs a new `SipHashState` that is initialized with random keys. + #[inline] + #[allow(deprecated)] // rand + pub fn new() -> SipHashState { + let mut r = rand::thread_rng(); + SipHashState { k0: r.gen(), k1: r.gen() } + } +} + +impl BuildHasher for SipHashState { + type Hasher = SipHasher; + #[inline] + fn build_hasher(&self) -> SipHasher { + SipHasher::new_with_keys(self.k0, self.k1) + } +} + +impl Default for SipHashState { + #[inline] + fn default() -> SipHashState { + SipHashState::new() + } +} From 5ec6a09a3d2d7517ef9c85498b62f33aef55e502 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Wed, 23 Mar 2016 20:05:19 +0100 Subject: [PATCH 06/24] refactor a large function --- src/adaptive_map.rs | 93 +++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index dc1d718..e5b9b78 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -18,6 +18,7 @@ use table::{ SafeHash }; use internal_entry::InternalEntry; +use entry::VacantEntryState; use entry::VacantEntryState::NeqElem; use entry::VacantEntryState::NoElem; use HashMap; @@ -56,6 +57,7 @@ macro_rules! specialize { #[inline] fn safeguarded_search(&mut self, key: &$key_type, hash: SafeHash) -> InternalEntryMut<$key_type, V> { + let table_capacity = self.table.capacity(); let entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); match entry { @@ -67,43 +69,7 @@ macro_rules! specialize { InternalEntry::TableIsEmpty } InternalEntry::Vacant { elem, hash } => { - let index = match elem { - NeqElem(ref bucket, _) => bucket.index(), - NoElem(ref bucket) => bucket.index(), - }; - // Copied from FullBucket::displacement. - let displacement = - index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1); - if displacement > DISPLACEMENT_THRESHOLD { - let map = match elem { - NeqElem(bucket, _) => { - bucket.into_table() - } - NoElem(bucket) => { - bucket.into_table() - } - }; - // Probe sequence is too long. - // Adapt to safe hashing if desirable. - maybe_adapt_to_safe_hashing(map.0); - search_hashed(&mut map.0.table, hash, |k| k == key) - } else { - // This should compile down to a no-op. - match elem { - NeqElem(bucket, ib) => { - InternalEntry::Vacant { - elem: NeqElem(bucket.convert_table(), ib), - hash: hash, - } - } - NoElem(bucket) => { - InternalEntry::Vacant { - elem: NoElem(bucket.convert_table()), - hash: hash, - } - } - } - } + safeguard_vacant_entry(elem, key, hash, table_capacity) } } } @@ -118,10 +84,60 @@ macro_rules! specialize { ) } +#[inline] +fn safeguard_vacant_entry<'a, K, V>( + elem: VacantEntryState>, + key: &K, + hash: SafeHash, + table_capacity: usize, +) -> InternalEntryMut<'a, K, V> + where K: Eq + Hash +{ + let index = match elem { + NeqElem(ref bucket, _) => bucket.index(), + NoElem(ref bucket) => bucket.index(), + }; + // Copied from FullBucket::displacement. + let displacement = index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1); + if displacement > DISPLACEMENT_THRESHOLD { + // Probe sequence is too long. + maybe_adapt_to_safe_hashing(elem, key, hash) + } else { + // This should compile down to a no-op. + match elem { + NeqElem(bucket, ib) => { + InternalEntry::Vacant { + elem: NeqElem(bucket.convert_table(), ib), + hash: hash, + } + } + NoElem(bucket) => { + InternalEntry::Vacant { + elem: NoElem(bucket.convert_table()), + hash: hash, + } + } + } + } +} + +// Adapt to safe hashing if desirable. #[cold] -fn maybe_adapt_to_safe_hashing(map: &mut HashMap) +fn maybe_adapt_to_safe_hashing<'a, K, V>( + elem: VacantEntryState>, + key: &K, + hash: SafeHash +) -> InternalEntryMut<'a, K, V> where K: Eq + Hash { + let map = match elem { + NeqElem(bucket, _) => { + bucket.into_table().0 + } + NoElem(bucket) => { + bucket.into_table().0 + } + }; let capacity = map.table.capacity(); let load_factor = map.len() as f32 / capacity as f32; if load_factor >= LOAD_FACTOR_THRESHOLD { @@ -134,6 +150,7 @@ fn maybe_adapt_to_safe_hashing(map: &mut HashMap) map.insert_hashed_nocheck(hash, k, v); } } + search_hashed(&mut map.table, hash, |k| k == key) } specialize! { K = u8; } From adcd5809454d1ed54a9e0e3fb23134c036919701 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Thu, 24 Mar 2016 09:57:10 +0100 Subject: [PATCH 07/24] doc --- src/adaptive_map.rs | 10 +++++++++- src/entry.rs | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index e5b9b78..cf4ec85 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -30,6 +30,10 @@ const DISPLACEMENT_THRESHOLD: usize = 128; // Otherwise, we grow the table. const LOAD_FACTOR_THRESHOLD: f32 = 0.625; +// The displacement threshold should be high enough so that even with the maximal load factor, +// it's very rarely exceeded. +// As the load approaches 90%, displacements larger than ~ 32 are much more probable. + // Avoid problems with private types in public interfaces. pub type InternalEntryMut<'a, K: 'a, V: 'a> = InternalEntry>; @@ -101,6 +105,7 @@ fn safeguard_vacant_entry<'a, K, V>( let displacement = index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1); if displacement > DISPLACEMENT_THRESHOLD { // Probe sequence is too long. + // This branch is very unlikely. maybe_adapt_to_safe_hashing(elem, key, hash) } else { // This should compile down to a no-op. @@ -143,6 +148,9 @@ fn maybe_adapt_to_safe_hashing<'a, K, V>( if load_factor >= LOAD_FACTOR_THRESHOLD { map.resize(capacity * 2); } else { + // Taking this branch is as rare as proton decay. The average time between two executions of + // this branch is 20 billion years. We assume continuous insertion on a single CPU + // core, without intentional DoS attack. map.hash_builder.switch_to_safe_hashing(); let old_table = replace(&mut map.table, RawTable::new(capacity)); for (_, k, v) in old_table.into_iter() { @@ -217,7 +225,7 @@ mod test_adaptive_map { ]; #[test] - fn test_dos_attack() { + fn test_dos_safeguard() { let mut map = HashMap::new(); let mut values = VALUES.iter(); for &value in (&mut values).take(DISPLACEMENT_THRESHOLD - 1) { diff --git a/src/entry.rs b/src/entry.rs index 25a5002..7aa3424 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -170,6 +170,8 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { } } +// These fns are public, but the entire module is not. + #[inline] pub fn from_internal(internal: InternalEntry>, key: Option) -> Option> { From a8faa200b1323a639415eb3cf3dc1047a32c5fa9 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Thu, 24 Mar 2016 09:57:30 +0100 Subject: [PATCH 08/24] Use a trait instead of a macro for specialization --- src/adaptive_map.rs | 84 ++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index cf4ec85..09c1971 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -54,38 +54,37 @@ impl SafeguardedSearch for HashMap } } -macro_rules! specialize { - (K = $key_type:ty; $($type_var:ident),*) => ( - impl SafeguardedSearch<$key_type, V> - for HashMap<$key_type, V, AdaptiveState> { - #[inline] - fn safeguarded_search(&mut self, key: &$key_type, hash: SafeHash) - -> InternalEntryMut<$key_type, V> { - - let table_capacity = self.table.capacity(); - let entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); - match entry { - InternalEntry::Occupied { elem } => { - // This should compile down to a no-op. - InternalEntry::Occupied { elem: elem.convert_table() } - } - InternalEntry::TableIsEmpty => { - InternalEntry::TableIsEmpty - } - InternalEntry::Vacant { elem, hash } => { - safeguard_vacant_entry(elem, key, hash, table_capacity) - } - } +impl SafeguardedSearch for HashMap + where K: Eq + OneshotHash +{ + #[inline] + fn safeguarded_search(&mut self, key: &K, hash: SafeHash) + -> InternalEntryMut { + + let table_capacity = self.table.capacity(); + let entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); + match entry { + InternalEntry::Occupied { elem } => { + // This should compile down to a no-op. + InternalEntry::Occupied { elem: elem.convert_table() } } - } - - // For correct creation of HashMap. - impl Default for HashMap<$key_type, V, AdaptiveState> { - fn default() -> Self { - HashMap::with_hasher(AdaptiveState::new_fast()) + InternalEntry::TableIsEmpty => { + InternalEntry::TableIsEmpty + } + InternalEntry::Vacant { elem, hash } => { + safeguard_vacant_entry(elem, key, hash, table_capacity) } } - ) + } +} + +// For correct creation of HashMap. +impl Default for HashMap + where K: Eq + OneshotHash +{ + fn default() -> Self { + HashMap::with_hasher(AdaptiveState::new_fast()) + } } #[inline] @@ -161,16 +160,23 @@ fn maybe_adapt_to_safe_hashing<'a, K, V>( search_hashed(&mut map.table, hash, |k| k == key) } -specialize! { K = u8; } -specialize! { K = i8; } -specialize! { K = u16; } -specialize! { K = i16; } -specialize! { K = u32; } -specialize! { K = i32; } -specialize! { K = u64; } -specialize! { K = i64; } -specialize! { K = *const T; T } -specialize! { K = *mut T; T } +pub trait OneshotHash: Hash {} + +impl OneshotHash for i8 {} +impl OneshotHash for u8 {} +impl OneshotHash for u16 {} +impl OneshotHash for i16 {} +impl OneshotHash for u32 {} +impl OneshotHash for i32 {} +impl OneshotHash for u64 {} +impl OneshotHash for i64 {} +impl OneshotHash for usize {} +impl OneshotHash for isize {} +impl OneshotHash for char {} +impl OneshotHash for *const T {} +impl OneshotHash for *mut T {} +impl<'a, T> OneshotHash for &'a T where T: OneshotHash {} +impl<'a, T> OneshotHash for &'a mut T where T: OneshotHash {} struct DerefMapToTable<'a, K: 'a, V: 'a, S: 'a>(&'a mut HashMap); From 70450dd2e3de55f3a502f583e4070cb6cfdda2e6 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Thu, 24 Mar 2016 09:58:14 +0100 Subject: [PATCH 09/24] remove test --- src/adaptive_map.rs | 63 --------------------------------------------- 1 file changed, 63 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index 09c1971..9c9a6b2 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -243,67 +243,4 @@ mod test_adaptive_map { } assert!(map.hash_builder.uses_safe_hashing()); } - - #[test] - fn test_adaptive_lots_of_insertions() { - let mut m = HashMap::new(); - - // Try this a few times to make sure we never screw up the hashmap's - // internal state. - for _ in 0..10 { - assert!(m.is_empty()); - - for i in 1 ... 1000 { - assert!(m.insert(i, i).is_none()); - - for j in 1...i { - let r = m.get(&j); - assert_eq!(r, Some(&j)); - } - - for j in i+1...1000 { - let r = m.get(&j); - assert_eq!(r, None); - } - } - - for i in 1001...2000 { - assert!(!m.contains_key(&i)); - } - - // remove forwards - for i in 1...1000 { - assert!(m.remove(&i).is_some()); - - for j in 1...i { - assert!(!m.contains_key(&j)); - } - - for j in i+1...1000 { - assert!(m.contains_key(&j)); - } - } - - for i in 1...1000 { - assert!(!m.contains_key(&i)); - } - - for i in 1...1000 { - assert!(m.insert(i, i).is_none()); - } - - // remove backwards - for i in (1..1001).rev() { - assert!(m.remove(&i).is_some()); - - for j in i...1000 { - assert!(!m.contains_key(&j)); - } - - for j in 1...i-1 { - assert!(m.contains_key(&j)); - } - } - } - } } From 4ec0ecba7b59c694de0fdb4b6b2917f7c1ed4491 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Thu, 24 Mar 2016 13:50:54 +0100 Subject: [PATCH 10/24] doc --- src/adaptive_hashing.rs | 4 ++++ src/adaptive_map.rs | 27 ++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/adaptive_hashing.rs b/src/adaptive_hashing.rs index 3f5d473..deda4f6 100644 --- a/src/adaptive_hashing.rs +++ b/src/adaptive_hashing.rs @@ -98,8 +98,10 @@ impl Hasher for AdaptiveHasher { #[inline] fn write(&mut self, msg: &[u8]) { if let Some(ref mut hasher) = self.safe_hasher { + // Use safe hashing. hasher.write(msg); } else { + // Use fast hashing. let msg_data = unsafe { if msg.len() <= 8 { load_u64_le(msg, msg.len()) @@ -114,8 +116,10 @@ impl Hasher for AdaptiveHasher { #[inline] fn finish(&self) -> u64 { if let Some(ref hasher) = self.safe_hasher { + // Use safe hashing. hasher.finish() } else { + // Use fast hashing. self.hash } } diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index 9c9a6b2..5029b68 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -32,7 +32,17 @@ const LOAD_FACTOR_THRESHOLD: f32 = 0.625; // The displacement threshold should be high enough so that even with the maximal load factor, // it's very rarely exceeded. -// As the load approaches 90%, displacements larger than ~ 32 are much more probable. +// As the load approaches 90%, displacements larger than ~ 20 are much more probable. +// On the other hand, the threshold should be low enough so that the same number of hashes +// easily fits in the cache and takes a reasonable time to iterate through. + +// The load factor threshold should be relatively low, but high enough so that its half is not very +// low (~ 20%). We choose 62.5%, because it's a simple fraction (5/8), and its half is 31.25%. +// (When a map is grown, the load factor is halved.) + +// TODO: add one-shot hashing for String, str, arrays and other types. +// TODO: consider adding a limit for the number of fully equal hashes in a probe sequence. +// Fully equal hashes cause key comparison, which might be a problem for large string keys. // Avoid problems with private types in public interfaces. pub type InternalEntryMut<'a, K: 'a, V: 'a> = InternalEntry>; @@ -65,7 +75,7 @@ impl SafeguardedSearch for HashMap let entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); match entry { InternalEntry::Occupied { elem } => { - // This should compile down to a no-op. + // This should compile down to a simple copy. InternalEntry::Occupied { elem: elem.convert_table() } } InternalEntry::TableIsEmpty => { @@ -83,6 +93,9 @@ impl Default for HashMap where K: Eq + OneshotHash { fn default() -> Self { + // We use the fast, deterministic hasher. + // TODO: load a seed from the TLS for nondeterministic iteration order. + // See https://github.com/rust-lang/rust/pull/31356 HashMap::with_hasher(AdaptiveState::new_fast()) } } @@ -102,12 +115,13 @@ fn safeguard_vacant_entry<'a, K, V>( }; // Copied from FullBucket::displacement. let displacement = index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1); + // Check displacement. if displacement > DISPLACEMENT_THRESHOLD { // Probe sequence is too long. // This branch is very unlikely. maybe_adapt_to_safe_hashing(elem, key, hash) } else { - // This should compile down to a no-op. + // This should compile down to a simple copy. match elem { NeqElem(bucket, ib) => { InternalEntry::Vacant { @@ -125,7 +139,7 @@ fn safeguard_vacant_entry<'a, K, V>( } } -// Adapt to safe hashing if desirable. +// Adapt to safe hashing, if desirable. #[cold] fn maybe_adapt_to_safe_hashing<'a, K, V>( elem: VacantEntryState>, @@ -147,9 +161,8 @@ fn maybe_adapt_to_safe_hashing<'a, K, V>( if load_factor >= LOAD_FACTOR_THRESHOLD { map.resize(capacity * 2); } else { - // Taking this branch is as rare as proton decay. The average time between two executions of - // this branch is 20 billion years. We assume continuous insertion on a single CPU - // core, without intentional DoS attack. + // Taking this branch is extremely rare -- as rare as proton decay. That's assuming + // continuous insertion on a single CPU core, without intentional DoS attack. map.hash_builder.switch_to_safe_hashing(); let old_table = replace(&mut map.table, RawTable::new(capacity)); for (_, k, v) in old_table.into_iter() { From 14d75b2a76ee0006535e8a031cdd7158e02e01a0 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Thu, 24 Mar 2016 13:52:43 +0100 Subject: [PATCH 11/24] Calculate probabilities --- src/adaptive_map.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index 5029b68..f1817ab 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -40,6 +40,24 @@ const LOAD_FACTOR_THRESHOLD: f32 = 0.625; // low (~ 20%). We choose 62.5%, because it's a simple fraction (5/8), and its half is 31.25%. // (When a map is grown, the load factor is halved.) +// At a load factor of α, the odds of finding the target bucket after exactly n +// unsuccesful probes[1] are +// +// Pr_α{displacement = n} = +// (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1)) +// +// We use this formula to find the probability of loading half of a cache line, as well as +// the probability of triggering the DoS safeguard with an insertion: +// +// Pr_0.625{displacement > 3} = 0.036 +// Pr_0.625{displacement > 128} = 2.284 * 10^-49 + +// Pr_0.909{displacement > 3} = 0.487 +// Pr_0.909{displacement > 128} = 1.601 * 10^-11 +// +// 1. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing +// hashing with buckets. + // TODO: add one-shot hashing for String, str, arrays and other types. // TODO: consider adding a limit for the number of fully equal hashes in a probe sequence. // Fully equal hashes cause key comparison, which might be a problem for large string keys. From 74bf55cd6c93066899b12b0d22dceedda72daf4a Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Fri, 15 Jul 2016 16:42:35 +0200 Subject: [PATCH 12/24] Update for SipHash13 --- src/adaptive_hashing.rs | 4 ++-- src/lib.rs | 3 ++- src/sip_hash_state.rs | 28 ++++++++++++++-------------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/adaptive_hashing.rs b/src/adaptive_hashing.rs index deda4f6..6bc7115 100644 --- a/src/adaptive_hashing.rs +++ b/src/adaptive_hashing.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::hash::{BuildHasher, SipHasher, Hasher}; +use std::hash::{BuildHasher, SipHasher13, Hasher}; use sip_hash_state::SipHashState; @@ -61,7 +61,7 @@ impl BuildHasher for AdaptiveState { } pub struct AdaptiveHasher { - safe_hasher: Option, + safe_hasher: Option, hash: u64, } diff --git a/src/lib.rs b/src/lib.rs index 4e08179..99b20be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,8 @@ oom, unique, unsafe_no_drop_flag, - specialization + specialization, + sip_hash_13, )] #![cfg_attr(test, feature(inclusive_range_syntax))] diff --git a/src/sip_hash_state.rs b/src/sip_hash_state.rs index 4761b63..7670e4e 100644 --- a/src/sip_hash_state.rs +++ b/src/sip_hash_state.rs @@ -8,10 +8,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::hash::{BuildHasher, SipHasher}; +use std::hash::{BuildHasher, SipHasher13}; use rand::{self, Rng}; -/// `SipHashState` is a random state for `HashMap` types. +/// `SipHashState` is the default state for `HashMap` types. /// /// A particular instance `SipHashState` will create the same instances of /// `Hasher`, but the hashers created by two different `SipHashState` @@ -27,22 +27,22 @@ impl SipHashState { #[inline] #[allow(deprecated)] // rand pub fn new() -> SipHashState { - let mut r = rand::thread_rng(); - SipHashState { k0: r.gen(), k1: r.gen() } - } -} + thread_local!(static KEYS: (u64, u64) = { + let r = rand::OsRng::new(); + let mut r = r.expect("failed to create an OS RNG"); + (r.gen(), r.gen()) + }); -impl BuildHasher for SipHashState { - type Hasher = SipHasher; - #[inline] - fn build_hasher(&self) -> SipHasher { - SipHasher::new_with_keys(self.k0, self.k1) + KEYS.with(|&(k0, k1)| { + SipHashState { k0: k0, k1: k1 } + }) } } -impl Default for SipHashState { +impl BuildHasher for SipHashState { + type Hasher = SipHasher13; #[inline] - fn default() -> SipHashState { - SipHashState::new() + fn build_hasher(&self) -> SipHasher13 { + SipHasher13::new_with_keys(self.k0, self.k1) } } From be2a869517879ca40ab6ac2463931bc037b36a05 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Fri, 15 Jul 2016 16:42:43 +0200 Subject: [PATCH 13/24] doc --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 99b20be..5ff88da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -562,6 +562,7 @@ impl HashMap { /// ``` #[inline] pub fn new() -> HashMap { + // This must go though HashMap::default(), which is specialized. Default::default() } @@ -575,6 +576,7 @@ impl HashMap { /// ``` #[inline] pub fn with_capacity(capacity: usize) -> HashMap { + // This must go though HashMap::default(), which is specialized. let map: Self = Default::default(); HashMap::with_capacity_and_hasher(capacity, map.hash_builder) } @@ -1296,6 +1298,7 @@ impl Default for HashMap where K: Eq + Hash, S: BuildHasher + Default, { + // There is a separate implementation of Default for HashMap. default fn default() -> HashMap { HashMap::with_hasher(Default::default()) } From e8fb6cacde8dcb166b009057afce7588b8d27a77 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Fri, 15 Jul 2016 16:42:59 +0200 Subject: [PATCH 14/24] inline things --- src/adaptive_hashing.rs | 1 + src/lib.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/adaptive_hashing.rs b/src/adaptive_hashing.rs index 6bc7115..04881e7 100644 --- a/src/adaptive_hashing.rs +++ b/src/adaptive_hashing.rs @@ -42,6 +42,7 @@ impl AdaptiveState { // For correct creation of HashMap. impl Default for AdaptiveState { + #[inline] fn default() -> Self { let mut this = AdaptiveState::new_fast(); this.switch_to_safe_hashing(); diff --git a/src/lib.rs b/src/lib.rs index 5ff88da..5cf9fa9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1299,6 +1299,7 @@ impl Default for HashMap S: BuildHasher + Default, { // There is a separate implementation of Default for HashMap. + #[inline] default fn default() -> HashMap { HashMap::with_hasher(Default::default()) } From df5f2b4ae448ae9820025c74e5efcd51ded38055 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 18 Sep 2016 09:49:27 +0200 Subject: [PATCH 15/24] Fixes for nightly Rust --- src/lib.rs | 2 -- src/table.rs | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5cf9fa9..43d1221 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,11 +11,9 @@ alloc, core_intrinsics, dropck_parametricity, - filling_drop, heap_api, oom, unique, - unsafe_no_drop_flag, specialization, sip_hash_13, )] diff --git a/src/table.rs b/src/table.rs index 4fd8ace..fd0d137 100644 --- a/src/table.rs +++ b/src/table.rs @@ -59,7 +59,6 @@ const EMPTY_BUCKET: u64 = 0; /// around just the "table" part of the hashtable. It enforces some /// invariants at the type level and employs some performance trickery, /// but in general is just a tricked out `Vec>`. -#[unsafe_no_drop_flag] pub struct RawTable { capacity: usize, size: usize, @@ -1049,7 +1048,7 @@ impl Clone for RawTable { impl Drop for RawTable { #[unsafe_destructor_blind_to_params] fn drop(&mut self) { - if self.capacity == 0 || self.capacity == mem::POST_DROP_USIZE { + if self.capacity == 0 { return; } From f381c2688ae52fcc0efa0d26b9f5d91c061c097f Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 18 Sep 2016 12:31:34 +0200 Subject: [PATCH 16/24] Small refactoring --- src/adaptive_hashing.rs | 18 ++++++----- src/adaptive_map.rs | 68 +++++++++++++++++------------------------ src/lib.rs | 17 +++++++++-- 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/src/adaptive_hashing.rs b/src/adaptive_hashing.rs index 04881e7..af35a50 100644 --- a/src/adaptive_hashing.rs +++ b/src/adaptive_hashing.rs @@ -20,19 +20,25 @@ pub struct AdaptiveState { impl AdaptiveState { #[inline] pub fn new() -> Self { - Default::default() + AdaptiveState::new_for_safe_hashing() } #[inline] - pub fn new_fast() -> Self { + pub fn new_for_fast_hashing() -> Self { AdaptiveState { inner: None } } + #[inline] + pub fn new_for_safe_hashing() -> Self { + AdaptiveState { + inner: Some(SipHashState::new()) + } + } #[inline] pub fn switch_to_safe_hashing(&mut self) { - self.inner = Some(SipHashState::new()); + *self = AdaptiveState::new_for_safe_hashing(); } pub fn uses_safe_hashing(&self) -> bool { @@ -40,13 +46,11 @@ impl AdaptiveState { } } -// For correct creation of HashMap. +// For creating HashMap. impl Default for AdaptiveState { #[inline] fn default() -> Self { - let mut this = AdaptiveState::new_fast(); - this.switch_to_safe_hashing(); - this + AdaptiveState::new_for_safe_hashing() } } diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index f1817ab..f726d55 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -65,6 +65,8 @@ const LOAD_FACTOR_THRESHOLD: f32 = 0.625; // Avoid problems with private types in public interfaces. pub type InternalEntryMut<'a, K: 'a, V: 'a> = InternalEntry>; +pub trait OneshotHash: Hash {} + // We have this trait, because specialization doesn't work for inherent impls yet. pub trait SafeguardedSearch { // Method names are changed, because inherent methods shadow trait impl @@ -72,6 +74,22 @@ pub trait SafeguardedSearch { fn safeguarded_search(&mut self, key: &K, hash: SafeHash) -> InternalEntryMut; } +impl OneshotHash for i8 {} +impl OneshotHash for u8 {} +impl OneshotHash for u16 {} +impl OneshotHash for i16 {} +impl OneshotHash for u32 {} +impl OneshotHash for i32 {} +impl OneshotHash for u64 {} +impl OneshotHash for i64 {} +impl OneshotHash for usize {} +impl OneshotHash for isize {} +impl OneshotHash for char {} +impl OneshotHash for *const T {} +impl OneshotHash for *mut T {} +impl<'a, T> OneshotHash for &'a T where T: OneshotHash {} +impl<'a, T> OneshotHash for &'a mut T where T: OneshotHash {} + impl SafeguardedSearch for HashMap where K: Eq + Hash, S: BuildHasher @@ -106,18 +124,6 @@ impl SafeguardedSearch for HashMap } } -// For correct creation of HashMap. -impl Default for HashMap - where K: Eq + OneshotHash -{ - fn default() -> Self { - // We use the fast, deterministic hasher. - // TODO: load a seed from the TLS for nondeterministic iteration order. - // See https://github.com/rust-lang/rust/pull/31356 - HashMap::with_hasher(AdaptiveState::new_fast()) - } -} - #[inline] fn safeguard_vacant_entry<'a, K, V>( elem: VacantEntryState>, @@ -137,7 +143,15 @@ fn safeguard_vacant_entry<'a, K, V>( if displacement > DISPLACEMENT_THRESHOLD { // Probe sequence is too long. // This branch is very unlikely. - maybe_adapt_to_safe_hashing(elem, key, hash) + let map = match elem { + NeqElem(bucket, _) => { + bucket.into_table().0 + } + NoElem(bucket) => { + bucket.into_table().0 + } + }; + maybe_adapt_to_safe_hashing(map, key, hash) } else { // This should compile down to a simple copy. match elem { @@ -160,20 +174,12 @@ fn safeguard_vacant_entry<'a, K, V>( // Adapt to safe hashing, if desirable. #[cold] fn maybe_adapt_to_safe_hashing<'a, K, V>( - elem: VacantEntryState>, + map: &'a mut HashMap, key: &K, hash: SafeHash ) -> InternalEntryMut<'a, K, V> where K: Eq + Hash { - let map = match elem { - NeqElem(bucket, _) => { - bucket.into_table().0 - } - NoElem(bucket) => { - bucket.into_table().0 - } - }; let capacity = map.table.capacity(); let load_factor = map.len() as f32 / capacity as f32; if load_factor >= LOAD_FACTOR_THRESHOLD { @@ -191,24 +197,6 @@ fn maybe_adapt_to_safe_hashing<'a, K, V>( search_hashed(&mut map.table, hash, |k| k == key) } -pub trait OneshotHash: Hash {} - -impl OneshotHash for i8 {} -impl OneshotHash for u8 {} -impl OneshotHash for u16 {} -impl OneshotHash for i16 {} -impl OneshotHash for u32 {} -impl OneshotHash for i32 {} -impl OneshotHash for u64 {} -impl OneshotHash for i64 {} -impl OneshotHash for usize {} -impl OneshotHash for isize {} -impl OneshotHash for char {} -impl OneshotHash for *const T {} -impl OneshotHash for *mut T {} -impl<'a, T> OneshotHash for &'a T where T: OneshotHash {} -impl<'a, T> OneshotHash for &'a mut T where T: OneshotHash {} - struct DerefMapToTable<'a, K: 'a, V: 'a, S: 'a>(&'a mut HashMap); impl<'a, K, V, S> Deref for DerefMapToTable<'a, K, V, S> { diff --git a/src/lib.rs b/src/lib.rs index 43d1221..f0b8230 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,7 +40,8 @@ use std::iter::{self, Iterator, ExactSizeIterator, IntoIterator, FromIterator, E use std::mem::{self, replace}; use std::ops::{Deref, FnMut, Index}; use std::option::Option::{Some, None}; -use adaptive_map::SafeguardedSearch; +use adaptive_hashing::AdaptiveState; +use adaptive_map::{SafeguardedSearch, OneshotHash}; use entry::{NoElem, NeqElem, Occupied, Vacant}; use internal_entry::InternalEntry; use recover::Recover; @@ -1296,13 +1297,25 @@ impl Default for HashMap where K: Eq + Hash, S: BuildHasher + Default, { - // There is a separate implementation of Default for HashMap. #[inline] default fn default() -> HashMap { HashMap::with_hasher(Default::default()) } } +// For correct creation of HashMap. +impl Default for HashMap + where K: Eq + OneshotHash +{ + #[inline] + fn default() -> Self { + // We use the fast, deterministic hasher. + // TODO: load a seed from the TLS for nondeterministic iteration order. + // See https://github.com/rust-lang/rust/pull/31356 + HashMap::with_hasher(AdaptiveState::new_for_fast_hashing()) + } +} + impl<'a, K, Q: ?Sized, V, S> Index<&'a Q> for HashMap where K: Eq + Hash + Borrow, Q: Eq + Hash, From 548c9b6271dde11d8e8153c5b6b1674e3ad50afe Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 18 Sep 2016 14:01:01 +0200 Subject: [PATCH 17/24] Major refactor --- src/adaptive_map.rs | 92 ++++++++++++++----------------------------- src/entry.rs | 25 ++++++++++++ src/internal_entry.rs | 24 ++++++++++- 3 files changed, 78 insertions(+), 63 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index f726d55..5a9f4c3 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -19,8 +19,6 @@ use table::{ }; use internal_entry::InternalEntry; use entry::VacantEntryState; -use entry::VacantEntryState::NeqElem; -use entry::VacantEntryState::NoElem; use HashMap; use search_hashed; @@ -107,94 +105,64 @@ impl SafeguardedSearch for HashMap fn safeguarded_search(&mut self, key: &K, hash: SafeHash) -> InternalEntryMut { - let table_capacity = self.table.capacity(); - let entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); - match entry { - InternalEntry::Occupied { elem } => { - // This should compile down to a simple copy. - InternalEntry::Occupied { elem: elem.convert_table() } - } - InternalEntry::TableIsEmpty => { - InternalEntry::TableIsEmpty - } - InternalEntry::Vacant { elem, hash } => { - safeguard_vacant_entry(elem, key, hash, table_capacity) - } + let mut entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); + if let InternalEntry::Vacant { elem, hash } = entry { + entry = safeguard_vacant_entry(elem, hash, key) } + entry.convert_table() } } #[inline] fn safeguard_vacant_entry<'a, K, V>( elem: VacantEntryState>, - key: &K, hash: SafeHash, - table_capacity: usize, -) -> InternalEntryMut<'a, K, V> + key: &K, +) -> InternalEntry> where K: Eq + Hash { - let index = match elem { - NeqElem(ref bucket, _) => bucket.index(), - NoElem(ref bucket) => bucket.index(), - }; - // Copied from FullBucket::displacement. - let displacement = index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1); // Check displacement. - if displacement > DISPLACEMENT_THRESHOLD { - // Probe sequence is too long. + if elem.displacement(hash) > DISPLACEMENT_THRESHOLD { + // Probe sequence is too long. We must reduce its length. // This branch is very unlikely. - let map = match elem { - NeqElem(bucket, _) => { - bucket.into_table().0 - } - NoElem(bucket) => { - bucket.into_table().0 - } - }; - maybe_adapt_to_safe_hashing(map, key, hash) + let map = elem.into_table(); + reduce_displacement(map.0); + search_hashed(map, hash, |k| k == key) } else { // This should compile down to a simple copy. - match elem { - NeqElem(bucket, ib) => { - InternalEntry::Vacant { - elem: NeqElem(bucket.convert_table(), ib), - hash: hash, - } - } - NoElem(bucket) => { - InternalEntry::Vacant { - elem: NoElem(bucket.convert_table()), - hash: hash, - } - } + InternalEntry::Vacant { + elem: elem, + hash: hash, } } } // Adapt to safe hashing, if desirable. #[cold] -fn maybe_adapt_to_safe_hashing<'a, K, V>( - map: &'a mut HashMap, - key: &K, - hash: SafeHash -) -> InternalEntryMut<'a, K, V> +fn reduce_displacement<'a, K, V>(map: &'a mut HashMap) where K: Eq + Hash { - let capacity = map.table.capacity(); - let load_factor = map.len() as f32 / capacity as f32; + let table_capacity = map.table.capacity(); + let load_factor = map.len() as f32 / table_capacity as f32; if load_factor >= LOAD_FACTOR_THRESHOLD { - map.resize(capacity * 2); + map.resize(table_capacity * 2); } else { // Taking this branch is extremely rare -- as rare as proton decay. That's assuming // continuous insertion on a single CPU core, without intentional DoS attack. map.hash_builder.switch_to_safe_hashing(); - let old_table = replace(&mut map.table, RawTable::new(capacity)); - for (_, k, v) in old_table.into_iter() { - let hash = map.make_hash(&k); - map.insert_hashed_nocheck(hash, k, v); - } + rebuild_table(map); + } +} + +fn rebuild_table(map: &mut HashMap) + where K: Eq + Hash +{ + let table_capacity = map.table.capacity(); + let old_table = replace(&mut map.table, RawTable::new(table_capacity)); + for (_, k, v) in old_table.into_iter() { + let hash = map.make_hash(&k); + map.insert_hashed_nocheck(hash, k, v); } - search_hashed(&mut map.table, hash, |k| k == key) } struct DerefMapToTable<'a, K: 'a, V: 'a, S: 'a>(&'a mut HashMap); diff --git a/src/entry.rs b/src/entry.rs index 7aa3424..9e03045 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -9,6 +9,7 @@ // except according to those terms. use std::mem; +use std::ops::Deref; use table::{EmptyBucket, FullBucket, SafeHash, RawTable}; use internal_entry::InternalEntry; @@ -170,6 +171,30 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { } } +impl VacantEntryState { + pub fn into_table(self) -> M { + match self { + NeqElem(bucket, _) => { + bucket.into_table() + } + NoElem(bucket) => { + bucket.into_table() + } + } + } +} + +impl VacantEntryState where M: Deref> { + pub fn displacement(&self, hash: SafeHash) -> usize { + let (index, table_capacity) = match self { + &NeqElem(ref bucket, _) => (bucket.index(), bucket.table().capacity()), + &NoElem(ref bucket) => (bucket.index(), bucket.table().capacity()), + }; + // Copied from FullBucket::displacement. + index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1) + } +} + // These fns are public, but the entire module is not. #[inline] diff --git a/src/internal_entry.rs b/src/internal_entry.rs index 7cf6c4c..5961a11 100644 --- a/src/internal_entry.rs +++ b/src/internal_entry.rs @@ -9,7 +9,7 @@ // except according to those terms. use table::{FullBucket, SafeHash, RawTable}; -use entry::{self, VacantEntryState}; +use entry::{self, VacantEntryState, NoElem, NeqElem}; use Entry; pub enum InternalEntry { @@ -39,3 +39,25 @@ impl<'a, K, V> InternalEntry> { entry::from_internal(self, Some(key)) } } + +impl InternalEntry { + #[inline] + pub fn convert_table(self) -> InternalEntry where M: Into { + // This entire expression should compile down to a simple copy. + match self { + InternalEntry::Occupied { elem } => { + InternalEntry::Occupied { elem: elem.convert_table() } + } + InternalEntry::TableIsEmpty => { + InternalEntry::TableIsEmpty + } + InternalEntry::Vacant { elem, hash } => { + let elem = match elem { + NeqElem(bucket, ib) => NeqElem(bucket.convert_table(), ib), + NoElem(bucket) => NoElem(bucket.convert_table()), + }; + InternalEntry::Vacant { elem: elem, hash: hash } + } + } + } +} From ab6330c59a6f1bec47bd298958377ffc76a3f73f Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 18 Sep 2016 14:35:39 +0200 Subject: [PATCH 18/24] Fix insertion under an incorrect hash --- src/adaptive_map.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index 5a9f4c3..dc52775 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -127,6 +127,7 @@ fn safeguard_vacant_entry<'a, K, V>( // This branch is very unlikely. let map = elem.into_table(); reduce_displacement(map.0); + let hash = map.0.make_hash(key); search_hashed(map, hash, |k| k == key) } else { // This should compile down to a simple copy. @@ -230,4 +231,17 @@ mod test_adaptive_map { } assert!(map.hash_builder.uses_safe_hashing()); } + + // Regression test + #[test] + fn test_safeguarded_insertion() { + let mut map = HashMap::new(); + let values = VALUES.iter().enumerate(); + for (i, &value) in values.clone() { + map.insert(value, i); + } + for (i, &value) in values { + assert_eq!(map[&value], i); + } + } } From 7b38667c68ceecdf711fc088b254c1a924bf56ec Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 18 Sep 2016 14:37:58 +0200 Subject: [PATCH 19/24] Refactor --- src/adaptive_map.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index dc52775..3036ae2 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -125,10 +125,10 @@ fn safeguard_vacant_entry<'a, K, V>( if elem.displacement(hash) > DISPLACEMENT_THRESHOLD { // Probe sequence is too long. We must reduce its length. // This branch is very unlikely. - let map = elem.into_table(); - reduce_displacement(map.0); - let hash = map.0.make_hash(key); - search_hashed(map, hash, |k| k == key) + let map = elem.into_table().0; + reduce_displacement(map); + let hash = map.make_hash(key); + search_hashed(DerefMapToTable(map), hash, |k| k == key) } else { // This should compile down to a simple copy. InternalEntry::Vacant { From 066e83aa32990a16363d0df91638349e945956cd Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 30 Oct 2016 01:19:53 +0200 Subject: [PATCH 20/24] try insertion and robin_hood instead of search --- src/adaptive_map.rs | 69 ++++++++++++++++++++++++++++++++++++++++++--- src/entry.rs | 7 +++-- src/lib.rs | 14 +++++++-- 3 files changed, 82 insertions(+), 8 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index 3036ae2..d5ed8e3 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -24,9 +24,11 @@ use search_hashed; // Beyond this displacement, we switch to safe hashing or grow the table. const DISPLACEMENT_THRESHOLD: usize = 128; +const FORWARD_SHIFT_THRESHOLD: usize = 512; // When the map's load factor is below this threshold, we switch to safe hashing. // Otherwise, we grow the table. -const LOAD_FACTOR_THRESHOLD: f32 = 0.625; +// const LOAD_FACTOR_THRESHOLD: f32 = 0.625; +const LOAD_FACTOR_THRESHOLD: f32 = 0.2; // The displacement threshold should be high enough so that even with the maximal load factor, // it's very rarely exceeded. @@ -88,14 +90,58 @@ impl OneshotHash for *mut T {} impl<'a, T> OneshotHash for &'a T where T: OneshotHash {} impl<'a, T> OneshotHash for &'a mut T where T: OneshotHash {} +#[inline] +fn safeguard_insertion(bucket: &mut FullBucketMut) { + if bucket.displacement() > DISPLACEMENT_THRESHOLD { + self.table.set_flag(true); + // let map = bucket.into_table().0; + // reduce_displacement(map); + // let hash = map.make_hash(key); + // match search_hashed(DerefMapToTable(map), hash, |k| k == key) { + // InternalEntry::Occupied { elem } => { + // elem.convert_table() + // } + // _ => { + // unreachable!() + // } + // } + // reduce_displacement_and_search(bucket) + } + bucket +} + +#[inline] +fn safeguard_forward_shifted(bucket: FullBucket>>) -> FullBucket> { + let end_index = bucket.index(); + let bucket = bucket.into_table(); + let start_index = bucket.index(); + if end_index - start_index > FORWARD_SHIFT_THRESHOLD { + self.table.set_flag(true); + // let (hash, key, value) = bucket.take(); + // let map = bucket.into_table(); + // reduce_displacement(map); + // reduce_displacement_and_search(bucket + } + bucket +} + impl SafeguardedSearch for HashMap where K: Eq + Hash, S: BuildHasher { #[inline] - default fn safeguarded_search(&mut self, key: &K, hash: SafeHash) -> InternalEntryMut { + default fn safeguarded_search(key: &K, hash: SafeHash) -> InternalEntryMut { + search_hashed(&mut self.table, hash, |k| k == key) + } + #[inline] + default fn safeguard_insertion(bucket: FullBucketMut<>) { search_hashed(&mut self.table, hash, |k| k == key) } + #[inline] + default fn safeguard_forward_shifted(bucket: EmptyBucket>) -> InternalEntryMut { + // bucket.into_table().into_mut_refs().1; + true + } } impl SafeguardedSearch for HashMap @@ -111,6 +157,18 @@ impl SafeguardedSearch for HashMap } entry.convert_table() } + + #[cold] + fn reduce_displacement(&mut self) { + if self.table.size() as f32 / self.table.capacity() >= LOAD_FACTOR_THRESHOLD { + let new_capacity = max(min_cap.next_power_of_two(), INITIAL_CAPACITY); + self.resize(self.table.capacity() * 2); + } else { + // Taking this branch is extremely rare, assuming no intentional DoS attack. + self.hash_builder.switch_to_safe_hashing(); + rebuild_table(self); + } + } } #[inline] @@ -138,6 +196,11 @@ fn safeguard_vacant_entry<'a, K, V>( } } +#[cold] +fn reduce_displacement_and_search<'a, K, V>() -> FullBucket<> { + +} + // Adapt to safe hashing, if desirable. #[cold] fn reduce_displacement<'a, K, V>(map: &'a mut HashMap) @@ -166,8 +229,6 @@ fn rebuild_table(map: &mut HashMap) } } -struct DerefMapToTable<'a, K: 'a, V: 'a, S: 'a>(&'a mut HashMap); - impl<'a, K, V, S> Deref for DerefMapToTable<'a, K, V, S> { type Target = RawTable; #[inline(always)] diff --git a/src/entry.rs b/src/entry.rs index 9e03045..1562b5c 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -15,6 +15,7 @@ use table::{EmptyBucket, FullBucket, SafeHash, RawTable}; use internal_entry::InternalEntry; use pop_internal; use robin_hood; +use adaptive_map; pub use self::Entry::*; pub use self::VacantEntryState::*; @@ -149,8 +150,10 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { NeqElem(bucket, ib) => { robin_hood(bucket, ib, self.hash, self.key, value) } - NoElem(bucket) => { - bucket.put(self.hash, self.key, value).into_mut_refs().1 + NoElem(bucket) => + let bucket = bucket.put(self.hash, self.key, value); + let bucket = adaptive_map::safeguard_insertion(bucket); + bucket.into_mut_refs().1 } } } diff --git a/src/lib.rs b/src/lib.rs index f0b8230..2e10b78 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -484,7 +484,13 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, // bucket, which is a FullBucket on top of a // FullBucketMut, into just one FullBucketMut. The "table" // refers to the inner FullBucketMut in this context. - return bucket.into_table().into_mut_refs().1; + let bucket = safeguard_forward_shifted(bucket, starting_index); + return bucket.into_mut_refs().1; + // if safeguard_forward_shifted(bucket) { + // return bucket.into_table().into_mut_refs().1; + // } else { + // return + // } }, Full(bucket) => bucket }; @@ -681,6 +687,10 @@ impl HashMap /// map.reserve(10); /// ``` pub fn reserve(&mut self, additional: usize) { + if self.table.get_flag() { + self.reduce_displacement(); + self.table.set_flag(false); + } let new_size = self.len().checked_add(additional).expect("capacity overflow"); let min_cap = self.resize_policy.min_capacity(new_size); @@ -1157,8 +1167,8 @@ impl HashMap /// assert_eq!(map[&37], "c"); /// ``` pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = self.make_hash(&k); self.reserve(1); + let hash = self.make_hash(&k); self.insert_hashed_nocheck(hash, k, v) } From de3ccadd0f83b393e04283407366bde3105ee592 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 30 Oct 2016 01:25:42 +0200 Subject: [PATCH 21/24] update --- src/adaptive_map.rs | 61 +++++---------------------------------------- src/lib.rs | 3 +-- 2 files changed, 7 insertions(+), 57 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index d5ed8e3..e27010a 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -71,7 +71,7 @@ pub trait OneshotHash: Hash {} pub trait SafeguardedSearch { // Method names are changed, because inherent methods shadow trait impl // methods. - fn safeguarded_search(&mut self, key: &K, hash: SafeHash) -> InternalEntryMut; + fn reduce_displacement(&mut self); } impl OneshotHash for i8 {} @@ -133,14 +133,8 @@ impl SafeguardedSearch for HashMap default fn safeguarded_search(key: &K, hash: SafeHash) -> InternalEntryMut { search_hashed(&mut self.table, hash, |k| k == key) } - #[inline] - default fn safeguard_insertion(bucket: FullBucketMut<>) { - search_hashed(&mut self.table, hash, |k| k == key) - } - #[inline] - default fn safeguard_forward_shifted(bucket: EmptyBucket>) -> InternalEntryMut { - // bucket.into_table().into_mut_refs().1; - true + default fn reduce_displacement(&mut self) { + // nothing to do. } } @@ -160,9 +154,10 @@ impl SafeguardedSearch for HashMap #[cold] fn reduce_displacement(&mut self) { - if self.table.size() as f32 / self.table.capacity() >= LOAD_FACTOR_THRESHOLD { - let new_capacity = max(min_cap.next_power_of_two(), INITIAL_CAPACITY); + let load_factor = self.table.size() as f32 / self.table.capacity() as f32; + if load_factor >= LOAD_FACTOR_THRESHOLD { self.resize(self.table.capacity() * 2); + self.table.set_flag(false); } else { // Taking this branch is extremely rare, assuming no intentional DoS attack. self.hash_builder.switch_to_safe_hashing(); @@ -196,28 +191,6 @@ fn safeguard_vacant_entry<'a, K, V>( } } -#[cold] -fn reduce_displacement_and_search<'a, K, V>() -> FullBucket<> { - -} - -// Adapt to safe hashing, if desirable. -#[cold] -fn reduce_displacement<'a, K, V>(map: &'a mut HashMap) - where K: Eq + Hash -{ - let table_capacity = map.table.capacity(); - let load_factor = map.len() as f32 / table_capacity as f32; - if load_factor >= LOAD_FACTOR_THRESHOLD { - map.resize(table_capacity * 2); - } else { - // Taking this branch is extremely rare -- as rare as proton decay. That's assuming - // continuous insertion on a single CPU core, without intentional DoS attack. - map.hash_builder.switch_to_safe_hashing(); - rebuild_table(map); - } -} - fn rebuild_table(map: &mut HashMap) where K: Eq + Hash { @@ -229,28 +202,6 @@ fn rebuild_table(map: &mut HashMap) } } -impl<'a, K, V, S> Deref for DerefMapToTable<'a, K, V, S> { - type Target = RawTable; - #[inline(always)] - fn deref(&self) -> &Self::Target { - &self.0.table - } -} - -impl<'a, K, V, S> DerefMut for DerefMapToTable<'a, K, V, S> { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0.table - } -} - -impl<'a, K, V, S> Into<&'a mut RawTable> for DerefMapToTable<'a, K, V, S> { - #[inline(always)] - fn into(self) -> &'a mut RawTable { - &mut self.0.table - } -} - #[cfg(test)] mod test_adaptive_map { use HashMap; diff --git a/src/lib.rs b/src/lib.rs index 2e10b78..c337ac0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -484,7 +484,7 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, // bucket, which is a FullBucket on top of a // FullBucketMut, into just one FullBucketMut. The "table" // refers to the inner FullBucketMut in this context. - let bucket = safeguard_forward_shifted(bucket, starting_index); + let bucket = adaptive_map::safeguard_forward_shifted(bucket); return bucket.into_mut_refs().1; // if safeguard_forward_shifted(bucket) { // return bucket.into_table().into_mut_refs().1; @@ -689,7 +689,6 @@ impl HashMap pub fn reserve(&mut self, additional: usize) { if self.table.get_flag() { self.reduce_displacement(); - self.table.set_flag(false); } let new_size = self.len().checked_add(additional).expect("capacity overflow"); let min_cap = self.resize_policy.min_capacity(new_size); From 6360071ea6e4c0aa2c4552cba377d4b499636789 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 30 Oct 2016 14:59:05 +0100 Subject: [PATCH 22/24] Add flag for displacement --- src/adaptive_map.rs | 95 +++++++++++++++------------------------------ src/entry.rs | 12 ++++-- src/lib.rs | 29 +++++++++++--- 3 files changed, 63 insertions(+), 73 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index e27010a..68689a7 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -15,7 +15,9 @@ use std::ops::{Deref, DerefMut}; use adaptive_hashing::AdaptiveState; use table::{ RawTable, - SafeHash + SafeHash, + FullBucketMut, + FullBucket, }; use internal_entry::InternalEntry; use entry::VacantEntryState; @@ -72,6 +74,8 @@ pub trait SafeguardedSearch { // Method names are changed, because inherent methods shadow trait impl // methods. fn reduce_displacement(&mut self); + + fn is_safeguarded(&self) -> bool; } impl OneshotHash for i8 {} @@ -91,37 +95,30 @@ impl<'a, T> OneshotHash for &'a T where T: OneshotHash {} impl<'a, T> OneshotHash for &'a mut T where T: OneshotHash {} #[inline] -fn safeguard_insertion(bucket: &mut FullBucketMut) { - if bucket.displacement() > DISPLACEMENT_THRESHOLD { - self.table.set_flag(true); - // let map = bucket.into_table().0; - // reduce_displacement(map); - // let hash = map.make_hash(key); - // match search_hashed(DerefMapToTable(map), hash, |k| k == key) { - // InternalEntry::Occupied { elem } => { - // elem.convert_table() - // } - // _ => { - // unreachable!() - // } - // } - // reduce_displacement_and_search(bucket) +pub fn safeguard_insertion( + bucket: &FullBucketMut, + reduce_displacement_flag: Option<&mut bool>) { + if let Some(flag) = reduce_displacement_flag { + if bucket.displacement() > DISPLACEMENT_THRESHOLD { + *flag = true; + } } - bucket } #[inline] -fn safeguard_forward_shifted(bucket: FullBucket>>) -> FullBucket> { +pub fn safeguard_forward_shifted<'a, K, V>( + bucket: FullBucket>>, + mut reduce_displacement_flag: Option<&'a mut bool>) + -> FullBucket> { let end_index = bucket.index(); let bucket = bucket.into_table(); let start_index = bucket.index(); - if end_index - start_index > FORWARD_SHIFT_THRESHOLD { - self.table.set_flag(true); - // let (hash, key, value) = bucket.take(); - // let map = bucket.into_table(); - // reduce_displacement(map); - // reduce_displacement_and_search(bucket + if let Some(flag) = reduce_displacement_flag.as_mut() { + if end_index - start_index > FORWARD_SHIFT_THRESHOLD { + **flag = true; + } } + safeguard_insertion(&bucket, reduce_displacement_flag); bucket } @@ -129,65 +126,34 @@ impl SafeguardedSearch for HashMap where K: Eq + Hash, S: BuildHasher { - #[inline] - default fn safeguarded_search(key: &K, hash: SafeHash) -> InternalEntryMut { - search_hashed(&mut self.table, hash, |k| k == key) - } default fn reduce_displacement(&mut self) { // nothing to do. } + + default fn is_safeguarded(&self) -> bool { + false + } } impl SafeguardedSearch for HashMap where K: Eq + OneshotHash { - #[inline] - fn safeguarded_search(&mut self, key: &K, hash: SafeHash) - -> InternalEntryMut { - - let mut entry = search_hashed(DerefMapToTable(self), hash, |k| k == key); - if let InternalEntry::Vacant { elem, hash } = entry { - entry = safeguard_vacant_entry(elem, hash, key) - } - entry.convert_table() - } - #[cold] fn reduce_displacement(&mut self) { let load_factor = self.table.size() as f32 / self.table.capacity() as f32; if load_factor >= LOAD_FACTOR_THRESHOLD { - self.resize(self.table.capacity() * 2); - self.table.set_flag(false); + // Probe sequence is too long. We must reduce its length. + let new_capacity = self.table.capacity() * 2; + self.resize(new_capacity); } else { // Taking this branch is extremely rare, assuming no intentional DoS attack. self.hash_builder.switch_to_safe_hashing(); rebuild_table(self); } } -} -#[inline] -fn safeguard_vacant_entry<'a, K, V>( - elem: VacantEntryState>, - hash: SafeHash, - key: &K, -) -> InternalEntry> - where K: Eq + Hash -{ - // Check displacement. - if elem.displacement(hash) > DISPLACEMENT_THRESHOLD { - // Probe sequence is too long. We must reduce its length. - // This branch is very unlikely. - let map = elem.into_table().0; - reduce_displacement(map); - let hash = map.make_hash(key); - search_hashed(DerefMapToTable(map), hash, |k| k == key) - } else { - // This should compile down to a simple copy. - InternalEntry::Vacant { - elem: elem, - hash: hash, - } + fn is_safeguarded(&self) -> bool { + true } } @@ -238,6 +204,7 @@ mod test_adaptive_map { map.insert(value, ()); } assert!(!map.hash_builder.uses_safe_hashing()); + map.reserve(1000); for &value in values.take(8) { map.insert(value, ()); } diff --git a/src/entry.rs b/src/entry.rs index 1562b5c..2d69ec4 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -39,6 +39,7 @@ pub struct OccupiedEntry<'a, K: 'a, V: 'a> { pub struct VacantEntry<'a, K: 'a, V: 'a> { hash: SafeHash, key: K, + reduce_displacement_flag: Option<&'a mut bool>, elem: VacantEntryState>, } @@ -148,11 +149,11 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { pub fn insert(self, value: V) -> &'a mut V { match self.elem { NeqElem(bucket, ib) => { - robin_hood(bucket, ib, self.hash, self.key, value) + robin_hood(bucket, ib, self.hash, self.key, value, self.reduce_displacement_flag) } - NoElem(bucket) => + NoElem(bucket) => { let bucket = bucket.put(self.hash, self.key, value); - let bucket = adaptive_map::safeguard_insertion(bucket); + adaptive_map::safeguard_insertion(&bucket, self.reduce_displacement_flag); bucket.into_mut_refs().1 } } @@ -172,6 +173,10 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { pub fn key(&self) -> &K { &self.key } + + pub fn set_flag_for_reduce_displacement(&mut self, flag: &'a mut bool) { + self.reduce_displacement_flag = Some(flag); + } } impl VacantEntryState { @@ -214,6 +219,7 @@ pub fn from_internal(internal: InternalEntry>, k Some(Entry::Vacant(VacantEntry { hash: hash, key: key.unwrap(), + reduce_displacement_flag: None, elem: elem, })) } diff --git a/src/lib.rs b/src/lib.rs index c337ac0..74ee0e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -360,6 +360,8 @@ pub struct HashMap { // All hashes are keyed on these values, to prevent hash collision attacks. hash_builder: S, + reduce_displacement_flag: bool, + resize_policy: DefaultResizePolicy, } @@ -449,7 +451,8 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, mut ib: usize, mut hash: SafeHash, mut key: K, - mut val: V) + mut val: V, + reduce_displacement_flag: Option<&'a mut bool>) -> &'a mut V { let starting_index = bucket.index(); let size = { @@ -484,7 +487,7 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, // bucket, which is a FullBucket on top of a // FullBucketMut, into just one FullBucketMut. The "table" // refers to the inner FullBucketMut in this context. - let bucket = adaptive_map::safeguard_forward_shifted(bucket); + let bucket = adaptive_map::safeguard_forward_shifted(bucket, reduce_displacement_flag); return bucket.into_mut_refs().1; // if safeguard_forward_shifted(bucket) { // return bucket.into_table().into_mut_refs().1; @@ -612,6 +615,7 @@ impl HashMap hash_builder: hash_builder, resize_policy: DefaultResizePolicy::new(), table: RawTable::new(0), + reduce_displacement_flag: false, } } @@ -646,6 +650,7 @@ impl HashMap hash_builder: hash_builder, resize_policy: resize_policy, table: RawTable::new(internal_cap), + reduce_displacement_flag: false, } } @@ -687,8 +692,9 @@ impl HashMap /// map.reserve(10); /// ``` pub fn reserve(&mut self, additional: usize) { - if self.table.get_flag() { + if self.reduce_displacement_flag { self.reduce_displacement(); + self.reduce_displacement_flag = false; } let new_size = self.len().checked_add(additional).expect("capacity overflow"); let min_cap = self.resize_policy.min_capacity(new_size); @@ -827,12 +833,16 @@ impl HashMap /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { - let entry = self.safeguarded_search(&k, hash).into_entry(k); + let is_safeguarded = self.is_safeguarded(); + let mut entry = search_hashed(&mut self.table, hash, |key| key == &k).into_entry(k); match entry { Some(Occupied(mut elem)) => { Some(elem.insert(v)) } - Some(Vacant(elem)) => { + Some(Vacant(mut elem)) => { + if is_safeguarded { + elem.set_flag_for_reduce_displacement(&mut self.reduce_displacement_flag); + } elem.insert(v); None } @@ -961,7 +971,14 @@ impl HashMap // Gotta resize now. self.reserve(1); let hash = self.make_hash(&key); - self.safeguarded_search(&key, hash).into_entry(key).expect("unreachable") + let is_safeguarded = self.is_safeguarded(); + let mut entry = search_hashed(&mut self.table, hash, |k| k == &key).into_entry(key).expect("unreachable"); + if is_safeguarded { + if let &mut Vacant(ref mut vacant) = &mut entry { + vacant.set_flag_for_reduce_displacement(&mut self.reduce_displacement_flag); + } + } + entry } /// Gets the given key's corresponding entry in the map for in-place From 5e9ca831b0b7982241f83e40b3bb5747d1e3af7e Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 30 Oct 2016 15:05:17 +0100 Subject: [PATCH 23/24] refactor --- src/adaptive_map.rs | 6 +++--- src/lib.rs | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index 68689a7..e054220 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -75,7 +75,7 @@ pub trait SafeguardedSearch { // methods. fn reduce_displacement(&mut self); - fn is_safeguarded(&self) -> bool; + fn is_safeguarded() -> bool; } impl OneshotHash for i8 {} @@ -130,7 +130,7 @@ impl SafeguardedSearch for HashMap // nothing to do. } - default fn is_safeguarded(&self) -> bool { + default fn is_safeguarded() -> bool { false } } @@ -152,7 +152,7 @@ impl SafeguardedSearch for HashMap } } - fn is_safeguarded(&self) -> bool { + fn is_safeguarded() -> bool { true } } diff --git a/src/lib.rs b/src/lib.rs index 74ee0e2..63ccf06 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -833,14 +833,13 @@ impl HashMap /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { - let is_safeguarded = self.is_safeguarded(); let mut entry = search_hashed(&mut self.table, hash, |key| key == &k).into_entry(k); match entry { Some(Occupied(mut elem)) => { Some(elem.insert(v)) } Some(Vacant(mut elem)) => { - if is_safeguarded { + if Self::is_safeguarded() { elem.set_flag_for_reduce_displacement(&mut self.reduce_displacement_flag); } elem.insert(v); @@ -971,9 +970,8 @@ impl HashMap // Gotta resize now. self.reserve(1); let hash = self.make_hash(&key); - let is_safeguarded = self.is_safeguarded(); let mut entry = search_hashed(&mut self.table, hash, |k| k == &key).into_entry(key).expect("unreachable"); - if is_safeguarded { + if Self::is_safeguarded() { if let &mut Vacant(ref mut vacant) = &mut entry { vacant.set_flag_for_reduce_displacement(&mut self.reduce_displacement_flag); } From 76c38753ee418d4919307c9d91fd22a672b982e9 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Sun, 30 Oct 2016 15:14:23 +0100 Subject: [PATCH 24/24] Remove unused code --- src/adaptive_map.rs | 4 ---- src/entry.rs | 25 ------------------------- src/internal_entry.rs | 24 +----------------------- src/lib.rs | 2 +- src/table.rs | 24 ------------------------ 5 files changed, 2 insertions(+), 77 deletions(-) diff --git a/src/adaptive_map.rs b/src/adaptive_map.rs index e054220..bb6b1ca 100644 --- a/src/adaptive_map.rs +++ b/src/adaptive_map.rs @@ -10,19 +10,15 @@ use std::hash::{Hash, BuildHasher}; use std::mem::replace; -use std::ops::{Deref, DerefMut}; use adaptive_hashing::AdaptiveState; use table::{ RawTable, - SafeHash, FullBucketMut, FullBucket, }; use internal_entry::InternalEntry; -use entry::VacantEntryState; use HashMap; -use search_hashed; // Beyond this displacement, we switch to safe hashing or grow the table. const DISPLACEMENT_THRESHOLD: usize = 128; diff --git a/src/entry.rs b/src/entry.rs index 2d69ec4..e94a790 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -9,7 +9,6 @@ // except according to those terms. use std::mem; -use std::ops::Deref; use table::{EmptyBucket, FullBucket, SafeHash, RawTable}; use internal_entry::InternalEntry; @@ -179,30 +178,6 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { } } -impl VacantEntryState { - pub fn into_table(self) -> M { - match self { - NeqElem(bucket, _) => { - bucket.into_table() - } - NoElem(bucket) => { - bucket.into_table() - } - } - } -} - -impl VacantEntryState where M: Deref> { - pub fn displacement(&self, hash: SafeHash) -> usize { - let (index, table_capacity) = match self { - &NeqElem(ref bucket, _) => (bucket.index(), bucket.table().capacity()), - &NoElem(ref bucket) => (bucket.index(), bucket.table().capacity()), - }; - // Copied from FullBucket::displacement. - index.wrapping_sub(hash.inspect() as usize) & (table_capacity - 1) - } -} - // These fns are public, but the entire module is not. #[inline] diff --git a/src/internal_entry.rs b/src/internal_entry.rs index 5961a11..7cf6c4c 100644 --- a/src/internal_entry.rs +++ b/src/internal_entry.rs @@ -9,7 +9,7 @@ // except according to those terms. use table::{FullBucket, SafeHash, RawTable}; -use entry::{self, VacantEntryState, NoElem, NeqElem}; +use entry::{self, VacantEntryState}; use Entry; pub enum InternalEntry { @@ -39,25 +39,3 @@ impl<'a, K, V> InternalEntry> { entry::from_internal(self, Some(key)) } } - -impl InternalEntry { - #[inline] - pub fn convert_table(self) -> InternalEntry where M: Into { - // This entire expression should compile down to a simple copy. - match self { - InternalEntry::Occupied { elem } => { - InternalEntry::Occupied { elem: elem.convert_table() } - } - InternalEntry::TableIsEmpty => { - InternalEntry::TableIsEmpty - } - InternalEntry::Vacant { elem, hash } => { - let elem = match elem { - NeqElem(bucket, ib) => NeqElem(bucket.convert_table(), ib), - NoElem(bucket) => NoElem(bucket.convert_table()), - }; - InternalEntry::Vacant { elem: elem, hash: hash } - } - } - } -} diff --git a/src/lib.rs b/src/lib.rs index 63ccf06..2cfe896 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -833,7 +833,7 @@ impl HashMap /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { - let mut entry = search_hashed(&mut self.table, hash, |key| key == &k).into_entry(k); + let entry = search_hashed(&mut self.table, hash, |key| key == &k).into_entry(k); match entry { Some(Occupied(mut elem)) => { Some(elem.insert(v)) diff --git a/src/table.rs b/src/table.rs index fd0d137..ae06d5a 100644 --- a/src/table.rs +++ b/src/table.rs @@ -189,14 +189,6 @@ impl FullBucket { pub fn into_table(self) -> M { self.table } - // Convert the table. - pub fn convert_table(self) -> FullBucket where M: Into { - FullBucket { - raw: self.raw, - idx: self.idx, - table: self.table.into(), - } - } /// Get the raw index. pub fn index(&self) -> usize { self.idx @@ -208,22 +200,6 @@ impl EmptyBucket { pub fn table(&self) -> &M { &self.table } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } - // Convert the table. - pub fn convert_table(self) -> EmptyBucket where M: Into { - EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table.into(), - } - } - /// Get the raw index. - pub fn index(&self) -> usize { - self.idx - } } impl Bucket {