diff --git a/benches/bench.rs b/benches/bench.rs index 64fcb1c6..67d530a5 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -5,6 +5,7 @@ extern crate fnv; #[macro_use] extern crate lazy_static; +use std::hash::Hash; use fnv::FnvHasher; use std::hash::BuildHasherDefault; type FnvBuilder = BuildHasherDefault; @@ -361,13 +362,16 @@ fn lookup_orderedmap_10_000_noexist(b: &mut Bencher) { // number of items to look up const LOOKUP_MAP_SIZE: u32 = 100_000_u32; const LOOKUP_SAMPLE_SIZE: u32 = 5000; +const SORT_MAP_SIZE: usize = 10_000; +// use lazy_static so that comparison benchmarks use the exact same inputs lazy_static! { static ref KEYS: Vec = { shuffled_keys(0..LOOKUP_MAP_SIZE) }; } + lazy_static! { static ref HMAP_100K: HashMap = { let c = LOOKUP_MAP_SIZE; @@ -392,6 +396,25 @@ lazy_static! { }; } +lazy_static! { + static ref OMAP_SORT_U32: OrderMap = { + let mut map = OrderMap::with_capacity(SORT_MAP_SIZE); + for &key in &KEYS[..SORT_MAP_SIZE] { + map.insert(key, key); + } + map + }; +} +lazy_static! { + static ref OMAP_SORT_S: OrderMap = { + let mut map = OrderMap::with_capacity(SORT_MAP_SIZE); + for &key in &KEYS[..SORT_MAP_SIZE] { + map.insert(format!("{:^16x}", &key), String::new()); + } + map + }; +} + #[bench] fn lookup_hashmap_100_000_multi(b: &mut Bencher) { let map = &*HMAP_100K; @@ -643,3 +666,60 @@ fn many_retain_hashmap_100_000(b: &mut Bencher) { map }); } + + +// simple sort impl for comparison +pub fn simple_sort(m: &mut OrderMap) { + let mut ordered: Vec<_> = m.drain(..).collect(); + ordered.sort_by(|left, right| left.0.cmp(&right.0)); + m.extend(ordered); +} + + +#[bench] +fn ordermap_sort_s(b: &mut Bencher) { + let map = OMAP_SORT_S.clone(); + + // there's a map clone there, but it's still useful to profile this + b.iter(|| { + let mut map = map.clone(); + map.sort_keys(); + map + }); +} + +#[bench] +fn ordermap_simple_sort_s(b: &mut Bencher) { + let map = OMAP_SORT_S.clone(); + + // there's a map clone there, but it's still useful to profile this + b.iter(|| { + let mut map = map.clone(); + simple_sort(&mut map); + map + }); +} + +#[bench] +fn ordermap_sort_u32(b: &mut Bencher) { + let map = OMAP_SORT_U32.clone(); + + // there's a map clone there, but it's still useful to profile this + b.iter(|| { + let mut map = map.clone(); + map.sort_keys(); + map + }); +} + +#[bench] +fn ordermap_simple_sort_u32(b: &mut Bencher) { + let map = OMAP_SORT_U32.clone(); + + // there's a map clone there, but it's still useful to profile this + b.iter(|| { + let mut map = map.clone(); + simple_sort(&mut map); + map + }); +} diff --git a/benches/faststring.rs b/benches/faststring.rs index a4e07b89..b388f45a 100644 --- a/benches/faststring.rs +++ b/benches/faststring.rs @@ -1,16 +1,11 @@ #![feature(test)] extern crate test; extern crate rand; -extern crate fnv; extern crate lazy_static; -use fnv::FnvHasher; -use std::hash::BuildHasherDefault; -type FnvBuilder = BuildHasherDefault; - use test::Bencher; -#[macro_use] extern crate ordermap; +extern crate ordermap; use ordermap::OrderMap; diff --git a/src/lib.rs b/src/lib.rs index 89fc4c90..8d786ca1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,6 +45,11 @@ fn hash_elem_using(build: &B, k: &K) -> HashVa #[derive(Copy, Debug)] struct HashValue(usize); +impl HashValue { + #[inline(always)] + fn get(self) -> usize { self.0 } +} + impl Clone for HashValue { #[inline] fn clone(&self) -> Self { *self } @@ -1044,6 +1049,57 @@ impl OrderMap } } + /// Sort the map’s key-value pairs by the default ordering of the keys. + /// + /// See `sort_by` for details. + pub fn sort_keys(&mut self) + where K: Ord, + { + self.sort_by(|k1, _, k2, _| Ord::cmp(k1, k2)) + } + + /// Sort the map’s key-value pairs in place using the comparison + /// function `compare`. + /// + /// The comparison function receives two key and value pairs to compare (you + /// can sort by keys or values or their combination as needed). + /// + /// Computes in **O(n log n)** time and **O(n)** space. The sort is stable. + pub fn sort_by(&mut self, mut compare: F) + where F: FnMut(&K, &V, &K, &V) -> Ordering, + { + // here we temporarily use the hash field in a bucket to store the old + // index instead. + // + // Save the old hash values in `side_index`. + // Then we can sort `self.entries` in place. + let mut side_index = Vec::from_iter(enumerate(&mut self.entries).map(|(i, elt)| { + replace(&mut elt.hash, HashValue(i)).get() + })); + + self.entries.sort_by(move |ei, ej| compare(&ei.key, &ei.value, &ej.key, &ej.value)); + + // Here we write back the hash values from side_index and fill + // in side_index with a mapping from the old to the new index instead. + for (i, ent) in enumerate(&mut self.entries) { + let old_index = ent.hash.get(); + ent.hash = HashValue(replace(&mut side_index[old_index], i)); + } + + // Apply new index to self.indices + dispatch_32_vs_64!(self.apply_new_index(&side_index)); + } + + fn apply_new_index(&mut self, new_index: &[usize]) + where Sz: Size + { + for pos in self.indices.iter_mut() { + if let Some((i, _)) = pos.resolve::() { + pos.set_pos::(new_index[i]); + } + } + } + /// Sort the key-value pairs of the map and return a by value iterator of /// the key-value pairs with the result. /// diff --git a/src/set.rs b/src/set.rs index 1ba5dc83..52745893 100644 --- a/src/set.rs +++ b/src/set.rs @@ -342,6 +342,24 @@ impl OrderSet self.map.retain(move |x, &mut ()| keep(x)) } + /// Sort the set’s values by their default ordering. + /// + /// See `sort_by` for details. + pub fn sort(&mut self) + where T: Ord, + { + self.map.sort_keys() + } + + /// Sort the set’s values in place using the comparison function `compare`. + /// + /// Computes in **O(n log n)** time and **O(n)** space. The sort is stable. + pub fn sort_by(&mut self, mut compare: F) + where F: FnMut(&T, &T) -> Ordering, + { + self.map.sort_by(move |a, _, b, _| compare(a, b)); + } + /// Sort the values of the set and return a by value iterator of /// the values with the result. /// diff --git a/tests/quick.rs b/tests/quick.rs index 89a320e8..8d2f8268 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -273,6 +273,49 @@ quickcheck! { // check the order itertools::assert_equal(map.keys(), initial_map.keys().filter(|&k| !remove_map.contains_key(k))); } + + fn sort_1(keyvals: Large>) -> () { + let mut map: OrderMap<_, _> = OrderMap::from_iter(keyvals.to_vec()); + let mut answer = keyvals.0; + answer.sort_by_key(|t| t.0); + + // reverse dedup: Because OrderMap::from_iter keeps the last value for + // identical keys + answer.reverse(); + answer.dedup_by_key(|t| t.0); + answer.reverse(); + + map.sort_by(|k1, _, k2, _| Ord::cmp(k1, k2)); + + // check it contains all the values it should + for &(key, val) in &answer { + assert_eq!(map[&key], val); + } + + // check the order + + let mapv = Vec::from_iter(map); + assert_eq!(answer, mapv); + + } + + fn sort_2(keyvals: Large>) -> () { + let mut map: OrderMap<_, _> = OrderMap::from_iter(keyvals.to_vec()); + map.sort_by(|_, v1, _, v2| Ord::cmp(v1, v2)); + assert_sorted_by_key(map, |t| t.1); + } +} + +fn assert_sorted_by_key(iterable: I, key: Key) + where I: IntoIterator, + I::Item: Ord + Clone + Debug, + Key: Fn(&I::Item) -> X, + X: Ord, +{ + let input = Vec::from_iter(iterable); + let mut sorted = input.clone(); + sorted.sort_by_key(key); + assert_eq!(input, sorted); } #[derive(Clone, Debug, Hash, PartialEq, Eq)]