From 06b82be7301cb1efb7e033f5b1f44d71e8625f60 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 2 Feb 2019 17:42:40 +0000 Subject: [PATCH 01/81] Promoted module to directory --- src/{quantile.rs => quantile/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{quantile.rs => quantile/mod.rs} (100%) diff --git a/src/quantile.rs b/src/quantile/mod.rs similarity index 100% rename from src/quantile.rs rename to src/quantile/mod.rs From 47c1696ed1a5f3cf57c5d42402d0577d148ec9ef Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 2 Feb 2019 17:49:09 +0000 Subject: [PATCH 02/81] Moved interpolate to separate file --- src/quantile/interpolate.rs | 163 +++++++++++++++++++++++++++++++++ src/quantile/mod.rs | 176 +----------------------------------- 2 files changed, 166 insertions(+), 173 deletions(-) create mode 100644 src/quantile/interpolate.rs diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs new file mode 100644 index 00000000..86ec82e4 --- /dev/null +++ b/src/quantile/interpolate.rs @@ -0,0 +1,163 @@ +//! Interpolation strategies. +use ndarray::azip; +use ndarray::prelude::*; +use num_traits::{FromPrimitive, ToPrimitive}; +use std::ops::{Add, Div}; + +/// Used to provide an interpolation strategy to [`quantile_axis_mut`]. +/// +/// [`quantile_axis_mut`]: ../trait.QuantileExt.html#tymethod.quantile_axis_mut +pub trait Interpolate { + #[doc(hidden)] + fn float_quantile_index(q: f64, len: usize) -> f64 { + ((len - 1) as f64) * q + } + #[doc(hidden)] + fn lower_index(q: f64, len: usize) -> usize { + Self::float_quantile_index(q, len).floor() as usize + } + #[doc(hidden)] + fn higher_index(q: f64, len: usize) -> usize { + Self::float_quantile_index(q, len).ceil() as usize + } + #[doc(hidden)] + fn float_quantile_index_fraction(q: f64, len: usize) -> f64 { + Self::float_quantile_index(q, len).fract() + } + #[doc(hidden)] + fn needs_lower(q: f64, len: usize) -> bool; + #[doc(hidden)] + fn needs_higher(q: f64, len: usize) -> bool; + #[doc(hidden)] + fn interpolate( + lower: Option>, + higher: Option>, + q: f64, + len: usize, + ) -> Array + where + D: Dimension; +} + +/// Select the higher value. +pub struct Higher; +/// Select the lower value. +pub struct Lower; +/// Select the nearest value. +pub struct Nearest; +/// Select the midpoint of the two values (`(lower + higher) / 2`). +pub struct Midpoint; +/// Linearly interpolate between the two values +/// (`lower + (higher - lower) * fraction`, where `fraction` is the +/// fractional part of the index surrounded by `lower` and `higher`). +pub struct Linear; + +impl Interpolate for Higher { + fn needs_lower(_q: f64, _len: usize) -> bool { + false + } + fn needs_higher(_q: f64, _len: usize) -> bool { + true + } + fn interpolate( + _lower: Option>, + higher: Option>, + _q: f64, + _len: usize, + ) -> Array { + higher.unwrap() + } +} + +impl Interpolate for Lower { + fn needs_lower(_q: f64, _len: usize) -> bool { + true + } + fn needs_higher(_q: f64, _len: usize) -> bool { + false + } + fn interpolate( + lower: Option>, + _higher: Option>, + _q: f64, + _len: usize, + ) -> Array { + lower.unwrap() + } +} + +impl Interpolate for Nearest { + fn needs_lower(q: f64, len: usize) -> bool { + >::float_quantile_index_fraction(q, len) < 0.5 + } + fn needs_higher(q: f64, len: usize) -> bool { + !>::needs_lower(q, len) + } + fn interpolate( + lower: Option>, + higher: Option>, + q: f64, + len: usize, + ) -> Array { + if >::needs_lower(q, len) { + lower.unwrap() + } else { + higher.unwrap() + } + } +} + +impl Interpolate for Midpoint + where + T: Add + Div + Clone + FromPrimitive, +{ + fn needs_lower(_q: f64, _len: usize) -> bool { + true + } + fn needs_higher(_q: f64, _len: usize) -> bool { + true + } + fn interpolate( + lower: Option>, + higher: Option>, + _q: f64, + _len: usize, + ) -> Array + where + D: Dimension, + { + let denom = T::from_u8(2).unwrap(); + (lower.unwrap() + higher.unwrap()).mapv_into(|x| x / denom.clone()) + } +} + +impl Interpolate for Linear + where + T: Add + Clone + FromPrimitive + ToPrimitive, +{ + fn needs_lower(_q: f64, _len: usize) -> bool { + true + } + fn needs_higher(_q: f64, _len: usize) -> bool { + true + } + fn interpolate( + lower: Option>, + higher: Option>, + q: f64, + len: usize, + ) -> Array + where + D: Dimension, + { + let fraction = >::float_quantile_index_fraction(q, len); + let mut a = lower.unwrap(); + let b = higher.unwrap(); + azip!(mut a, ref b in { + let a_f64 = a.to_f64().unwrap(); + let b_f64 = b.to_f64().unwrap(); + *a = a.clone() + T::from_f64((b_f64 - a_f64) * fraction).unwrap(); + }); + a + } +} diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 76c9c457..040bb6ce 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,181 +1,9 @@ -use interpolate::Interpolate; +use self::interpolate::Interpolate; use ndarray::prelude::*; use ndarray::{s, Data, DataMut, RemoveAxis}; use std::cmp; use {MaybeNan, MaybeNanExt, Sort1dExt}; -/// Interpolation strategies. -pub mod interpolate { - use ndarray::azip; - use ndarray::prelude::*; - use num_traits::{FromPrimitive, NumOps, ToPrimitive}; - - /// Used to provide an interpolation strategy to [`quantile_axis_mut`]. - /// - /// [`quantile_axis_mut`]: ../trait.QuantileExt.html#tymethod.quantile_axis_mut - pub trait Interpolate { - #[doc(hidden)] - fn float_quantile_index(q: f64, len: usize) -> f64 { - ((len - 1) as f64) * q - } - #[doc(hidden)] - fn lower_index(q: f64, len: usize) -> usize { - Self::float_quantile_index(q, len).floor() as usize - } - #[doc(hidden)] - fn higher_index(q: f64, len: usize) -> usize { - Self::float_quantile_index(q, len).ceil() as usize - } - #[doc(hidden)] - fn float_quantile_index_fraction(q: f64, len: usize) -> f64 { - Self::float_quantile_index(q, len).fract() - } - #[doc(hidden)] - fn needs_lower(q: f64, len: usize) -> bool; - #[doc(hidden)] - fn needs_higher(q: f64, len: usize) -> bool; - #[doc(hidden)] - fn interpolate( - lower: Option>, - higher: Option>, - q: f64, - len: usize, - ) -> Array - where - D: Dimension; - } - - /// Select the higher value. - pub struct Higher; - /// Select the lower value. - pub struct Lower; - /// Select the nearest value. - pub struct Nearest; - /// Select the midpoint of the two values (`(lower + higher) / 2`). - pub struct Midpoint; - /// Linearly interpolate between the two values - /// (`lower + (higher - lower) * fraction`, where `fraction` is the - /// fractional part of the index surrounded by `lower` and `higher`). - pub struct Linear; - - impl Interpolate for Higher { - fn needs_lower(_q: f64, _len: usize) -> bool { - false - } - fn needs_higher(_q: f64, _len: usize) -> bool { - true - } - fn interpolate( - _lower: Option>, - higher: Option>, - _q: f64, - _len: usize, - ) -> Array { - higher.unwrap() - } - } - - impl Interpolate for Lower { - fn needs_lower(_q: f64, _len: usize) -> bool { - true - } - fn needs_higher(_q: f64, _len: usize) -> bool { - false - } - fn interpolate( - lower: Option>, - _higher: Option>, - _q: f64, - _len: usize, - ) -> Array { - lower.unwrap() - } - } - - impl Interpolate for Nearest { - fn needs_lower(q: f64, len: usize) -> bool { - >::float_quantile_index_fraction(q, len) < 0.5 - } - fn needs_higher(q: f64, len: usize) -> bool { - !>::needs_lower(q, len) - } - fn interpolate( - lower: Option>, - higher: Option>, - q: f64, - len: usize, - ) -> Array { - if >::needs_lower(q, len) { - lower.unwrap() - } else { - higher.unwrap() - } - } - } - - impl Interpolate for Midpoint - where - T: NumOps + Clone + FromPrimitive, - { - fn needs_lower(_q: f64, _len: usize) -> bool { - true - } - fn needs_higher(_q: f64, _len: usize) -> bool { - true - } - fn interpolate( - lower: Option>, - higher: Option>, - _q: f64, - _len: usize, - ) -> Array - where - D: Dimension, - { - let denom = T::from_u8(2).unwrap(); - let mut lower = lower.unwrap(); - let higher = higher.unwrap(); - azip!( - mut lower, ref higher in { - *lower = lower.clone() + (higher.clone() - lower.clone()) / denom.clone() - } - ); - lower - } - } - - impl Interpolate for Linear - where - T: NumOps + Clone + FromPrimitive + ToPrimitive, - { - fn needs_lower(_q: f64, _len: usize) -> bool { - true - } - fn needs_higher(_q: f64, _len: usize) -> bool { - true - } - fn interpolate( - lower: Option>, - higher: Option>, - q: f64, - len: usize, - ) -> Array - where - D: Dimension, - { - let fraction = >::float_quantile_index_fraction(q, len); - let mut a = lower.unwrap(); - let b = higher.unwrap(); - azip!(mut a, ref b in { - let a_f64 = a.to_f64().unwrap(); - let b_f64 = b.to_f64().unwrap(); - *a = a.clone() + T::from_f64((b_f64 - a_f64) * fraction).unwrap(); - }); - a - } - } -} - /// Quantile methods for `ArrayBase`. pub trait QuantileExt where @@ -548,3 +376,5 @@ where } } } + +pub mod interpolate; From 8f1e7cd7bfb12c6f1d3d77fa853cf83ef3ce2028 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 2 Feb 2019 18:20:56 +0000 Subject: [PATCH 03/81] Re-implemented quantile_axis_mut to get closer to something we can use for bulk computation --- src/quantile/mod.rs | 53 ++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 040bb6ce..0addbb84 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,6 +1,7 @@ use self::interpolate::Interpolate; +use std::collections::HashMap; use ndarray::prelude::*; -use ndarray::{s, Data, DataMut, RemoveAxis}; +use ndarray::{Data, DataMut, RemoveAxis, Slice}; use std::cmp; use {MaybeNan, MaybeNanExt, Sort1dExt}; @@ -272,26 +273,42 @@ where I: Interpolate, { assert!((0. <= q) && (q <= 1.)); - let mut lower = None; - let mut higher = None; let axis_len = self.len_of(axis); + let mut searched_indexes: Vec = vec![]; if I::needs_lower(q, axis_len) { - let lower_index = I::lower_index(q, axis_len); - lower = Some(self.map_axis_mut(axis, |mut x| x.sorted_get_mut(lower_index))); - if I::needs_higher(q, axis_len) { - let higher_index = I::higher_index(q, axis_len); - let relative_higher_index = higher_index - lower_index; - higher = Some(self.map_axis_mut(axis, |mut x| { - x.slice_mut(s![lower_index..]) - .sorted_get_mut(relative_higher_index) - })); - }; - } else { - higher = Some( - self.map_axis_mut(axis, |mut x| x.sorted_get_mut(I::higher_index(q, axis_len))), + searched_indexes.push(I::lower_index(q, axis_len)); + } + if I::needs_higher(q, axis_len) { + searched_indexes.push(I::higher_index(q, axis_len)); + } + + let mut quantiles = HashMap::new(); + let mut previous_index = 0; + let mut search_space = self.view_mut(); + for index in searched_indexes.into_iter() { + let relative_index = index - previous_index; + let quantile = Some( + search_space.map_axis_mut( + axis, + |mut x| x.sorted_get_mut(relative_index)) ); - }; - I::interpolate(lower, higher, q, axis_len) + quantiles.insert(index, quantile); + previous_index = index; + search_space.slice_axis_inplace(axis, Slice::from(relative_index..)); + } + + I::interpolate( + match I::needs_lower(q, axis_len) { + true => quantiles.get(&I::lower_index(q, axis_len)).unwrap().clone(), + false => None, + }, + match I::needs_higher(q, axis_len) { + true => quantiles.get(&I::higher_index(q, axis_len)).unwrap().clone(), + false => None, + }, + q, + axis_len + ) } fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: f64) -> Array From c81f6be80939ac571aa09d24afbb1568d1d2db11 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 2 Feb 2019 18:26:15 +0000 Subject: [PATCH 04/81] Use a set instead of a vec to avoid repeating computations --- src/quantile/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 0addbb84..b01d00da 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,5 +1,5 @@ use self::interpolate::Interpolate; -use std::collections::HashMap; +use std::collections::{HashMap, BTreeSet}; use ndarray::prelude::*; use ndarray::{Data, DataMut, RemoveAxis, Slice}; use std::cmp; @@ -274,12 +274,12 @@ where { assert!((0. <= q) && (q <= 1.)); let axis_len = self.len_of(axis); - let mut searched_indexes: Vec = vec![]; + let mut searched_indexes = BTreeSet::new(); if I::needs_lower(q, axis_len) { - searched_indexes.push(I::lower_index(q, axis_len)); + searched_indexes.insert(I::lower_index(q, axis_len)); } if I::needs_higher(q, axis_len) { - searched_indexes.push(I::higher_index(q, axis_len)); + searched_indexes.insert(I::higher_index(q, axis_len)); } let mut quantiles = HashMap::new(); From 7aee4529641f462f24861b8611bed44b8facc045 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 2 Feb 2019 19:04:54 +0000 Subject: [PATCH 05/81] Use bulk method for single quantile --- src/quantile/mod.rs | 79 +++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 25 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index b01d00da..6ea24fd5 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -164,6 +164,13 @@ where S: DataMut, I: Interpolate; + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[f64]) -> Vec> + where + D: RemoveAxis, + A: Ord + Clone, + S: DataMut, + I: Interpolate; + /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// /// See [`quantile_axis_mut`](##tymethod.quantile_axis_mut) for details. @@ -265,21 +272,28 @@ where })) } - fn quantile_axis_mut(&mut self, axis: Axis, q: f64) -> Array - where - D: RemoveAxis, - A: Ord + Clone, - S: DataMut, - I: Interpolate, + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[f64]) -> Vec> + where + D: RemoveAxis, + A: Ord + Clone, + S: DataMut, + I: Interpolate, { - assert!((0. <= q) && (q <= 1.)); + assert!(qs.iter().all(|x| (0. <= *x) && (*x <= 1.))); + + let mut deduped_qs: Vec = qs.to_vec(); + deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); + deduped_qs.dedup(); + let axis_len = self.len_of(axis); let mut searched_indexes = BTreeSet::new(); - if I::needs_lower(q, axis_len) { - searched_indexes.insert(I::lower_index(q, axis_len)); - } - if I::needs_higher(q, axis_len) { - searched_indexes.insert(I::higher_index(q, axis_len)); + for q in deduped_qs.iter() { + if I::needs_lower(*q, axis_len) { + searched_indexes.insert(I::lower_index(*q, axis_len)); + } + if I::needs_higher(*q, axis_len) { + searched_indexes.insert(I::higher_index(*q, axis_len)); + } } let mut quantiles = HashMap::new(); @@ -289,7 +303,7 @@ where let relative_index = index - previous_index; let quantile = Some( search_space.map_axis_mut( - axis, + axis, |mut x| x.sorted_get_mut(relative_index)) ); quantiles.insert(index, quantile); @@ -297,18 +311,33 @@ where search_space.slice_axis_inplace(axis, Slice::from(relative_index..)); } - I::interpolate( - match I::needs_lower(q, axis_len) { - true => quantiles.get(&I::lower_index(q, axis_len)).unwrap().clone(), - false => None, - }, - match I::needs_higher(q, axis_len) { - true => quantiles.get(&I::higher_index(q, axis_len)).unwrap().clone(), - false => None, - }, - q, - axis_len - ) + let mut results = vec![]; + for q in qs { + let result = I::interpolate( + match I::needs_lower(*q, axis_len) { + true => quantiles.get(&I::lower_index(*q, axis_len)).unwrap().clone(), + false => None, + }, + match I::needs_higher(*q, axis_len) { + true => quantiles.get(&I::higher_index(*q, axis_len)).unwrap().clone(), + false => None, + }, + *q, + axis_len + ); + results.push(result); + } + results + } + + fn quantile_axis_mut(&mut self, axis: Axis, q: f64) -> Array + where + D: RemoveAxis, + A: Ord + Clone, + S: DataMut, + I: Interpolate, + { + self.quantiles_axis_mut::(axis, &[q]).into_iter().next().unwrap() } fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: f64) -> Array From 745e45b264fbab9cc4ea9de61d8fe7ff61a8c730 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 4 Feb 2019 09:11:02 +0000 Subject: [PATCH 06/81] Implement bulk method to get sorted --- src/sort.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/sort.rs b/src/sort.rs index eecda8f4..44009e26 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -1,5 +1,6 @@ use ndarray::prelude::*; use ndarray::{s, Data, DataMut}; +use std::collections::HashMap; use rand::prelude::*; use rand::thread_rng; @@ -32,6 +33,11 @@ where A: Ord + Clone, S: DataMut; + fn sorted_get_many_mut(&mut self, is: &[usize]) -> HashMap + where + A: Ord + Clone, + S: DataMut; + /// Return the index of `self[partition_index]` if `self` were to be sorted /// in increasing order. /// @@ -84,6 +90,20 @@ where } } + fn sorted_get_many_mut(&mut self, indexes: &[usize]) -> HashMap + where + A: Ord + Clone, + S: DataMut, + { + let mut deduped_indexes: Vec = is.to_vec(); + deduped_indexes.sort_unstable(); + deduped_indexes.dedup(); + + sorted_get_many_mut_unchecked(self, &deduped_indexes) + } + + + fn partition_mut(&mut self, pivot_index: usize) -> usize where A: Ord + Clone, @@ -122,3 +142,25 @@ where i - 1 } } + +pub(crate) fn sorted_get_many_mut_unchecked( + array: &mut ArrayBase, indexes: &[usize]) -> HashMap +where + A: Ord + Clone, + S: DataMut, +{ + let mut values = HashMap::new(); + + let mut previous_index = 0; + let mut search_space = array.view_mut(); + for index in indexes.into_iter() { + let relative_index = index - previous_index; + let value = array.sorted_get_mut(relative_index); + values.insert(index, quantile); + + previous_index = *index; + search_space.slice_axis_inplace(axis, Slice::from(relative_index..)); + } + + values +} From 74eda814d1e8e7d44ecbfed5c3bf3d48d7a60c95 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 6 Feb 2019 08:43:41 +0000 Subject: [PATCH 07/81] Refactored quantiles_axis_mut to use sorted_get_many_mut --- src/quantile/mod.rs | 36 +++++++++++++++++------------------- src/sort.rs | 8 ++++---- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 6ea24fd5..47eee59a 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,9 +1,10 @@ use self::interpolate::Interpolate; -use std::collections::{HashMap, BTreeSet}; +use std::collections::BTreeSet; use ndarray::prelude::*; -use ndarray::{Data, DataMut, RemoveAxis, Slice}; +use ndarray::{Data, DataMut, RemoveAxis}; use std::cmp; -use {MaybeNan, MaybeNanExt, Sort1dExt}; +use super::sort::sorted_get_many_mut_unchecked; +use {MaybeNan, MaybeNanExt}; /// Quantile methods for `ArrayBase`. pub trait QuantileExt @@ -295,31 +296,28 @@ where searched_indexes.insert(I::higher_index(*q, axis_len)); } } + let searched_indexes: Vec = searched_indexes.into_iter().collect(); - let mut quantiles = HashMap::new(); - let mut previous_index = 0; - let mut search_space = self.view_mut(); - for index in searched_indexes.into_iter() { - let relative_index = index - previous_index; - let quantile = Some( - search_space.map_axis_mut( - axis, - |mut x| x.sorted_get_mut(relative_index)) - ); - quantiles.insert(index, quantile); - previous_index = index; - search_space.slice_axis_inplace(axis, Slice::from(relative_index..)); - } + let values = self.map_axis_mut( + axis, + |mut x| sorted_get_many_mut_unchecked(&mut x, &searched_indexes) + ); let mut results = vec![]; for q in qs { let result = I::interpolate( match I::needs_lower(*q, axis_len) { - true => quantiles.get(&I::lower_index(*q, axis_len)).unwrap().clone(), + true => Some( + values.map( + |x| x.get(&I::lower_index(*q, axis_len)).unwrap().clone()) + ), false => None, }, match I::needs_higher(*q, axis_len) { - true => quantiles.get(&I::higher_index(*q, axis_len)).unwrap().clone(), + true => Some( + values.map( + |x| x.get(&I::higher_index(*q, axis_len)).unwrap().clone()) + ), false => None, }, *q, diff --git a/src/sort.rs b/src/sort.rs index 44009e26..abe1bbe8 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -95,7 +95,7 @@ where A: Ord + Clone, S: DataMut, { - let mut deduped_indexes: Vec = is.to_vec(); + let mut deduped_indexes: Vec = indexes.to_vec(); deduped_indexes.sort_unstable(); deduped_indexes.dedup(); @@ -155,11 +155,11 @@ where let mut search_space = array.view_mut(); for index in indexes.into_iter() { let relative_index = index - previous_index; - let value = array.sorted_get_mut(relative_index); - values.insert(index, quantile); + let value = search_space.sorted_get_mut(relative_index); + values.insert(*index, value); previous_index = *index; - search_space.slice_axis_inplace(axis, Slice::from(relative_index..)); + search_space.slice_collapse(s![relative_index..]); } values From 93531ded144c64df6d95898b4da0780380b6cfcf Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 6 Feb 2019 08:47:47 +0000 Subject: [PATCH 08/81] Avoid recomputing index value --- src/quantile/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 47eee59a..79851aa2 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -307,17 +307,17 @@ where for q in qs { let result = I::interpolate( match I::needs_lower(*q, axis_len) { - true => Some( - values.map( - |x| x.get(&I::lower_index(*q, axis_len)).unwrap().clone()) - ), + true => { + let lower_index = &I::lower_index(*q, axis_len); + Some(values.map(|x| x.get(lower_index).unwrap().clone())) + }, false => None, }, match I::needs_higher(*q, axis_len) { - true => Some( - values.map( - |x| x.get(&I::higher_index(*q, axis_len)).unwrap().clone()) - ), + true => { + let higher_index = &I::higher_index(*q, axis_len); + Some(values.map(|x| x.get(higher_index).unwrap().clone())) + }, false => None, }, *q, From c00620dc5bc64e590b3f9797344cf03c2e536e9f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 6 Feb 2019 08:51:33 +0000 Subject: [PATCH 09/81] Add quantiles_mut to 1d trait --- src/quantile/mod.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 79851aa2..856e4f44 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -401,6 +401,12 @@ where A: Ord + Clone, S: DataMut, I: Interpolate; + + fn quantiles_mut(&mut self, qs: &[f64]) -> Option> + where + A: Ord + Clone, + S: DataMut, + I: Interpolate; } impl Quantile1dExt for ArrayBase @@ -419,6 +425,24 @@ where Some(self.quantile_axis_mut::(Axis(0), q).into_scalar()) } } + + fn quantiles_mut(&mut self, qs: &[f64]) -> Option> + where + A: Ord + Clone, + S: DataMut, + I: Interpolate, + { + if self.is_empty() { + None + } else { + Some( + self.quantiles_axis_mut::(Axis(0), qs) + .into_iter() + .map(|x| x.into_scalar()) + .collect() + ) + } + } } pub mod interpolate; From a7111e9a32a6677a1b90f0c77be98bd46d4b6354 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 15:54:47 +0000 Subject: [PATCH 10/81] Return hashmaps from bulk methods --- src/histogram/strategies.rs | 5 ++-- src/quantile/interpolate.rs | 57 +++++++++++++++++++------------------ src/quantile/mod.rs | 39 ++++++++++++------------- 3 files changed, 52 insertions(+), 49 deletions(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index eeaee686..93e35511 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -24,6 +24,7 @@ use super::{Bins, Edges}; use ndarray::prelude::*; use ndarray::Data; use num_traits::{FromPrimitive, NumOps, Zero}; +use noisy_float::types::n64; /// A trait implemented by all strategies to build [`Bins`] /// with parameters inferred from observations. @@ -307,8 +308,8 @@ where let n_points = a.len(); let mut a_copy = a.to_owned(); - let first_quartile = a_copy.quantile_mut::(0.25).unwrap(); - let third_quartile = a_copy.quantile_mut::(0.75).unwrap(); + let first_quartile = a_copy.quantile_mut::(n64(0.25)).unwrap(); + let third_quartile = a_copy.quantile_mut::(n64(0.75)).unwrap(); let iqr = third_quartile - first_quartile; let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr); diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs index 86ec82e4..f0fd1f6a 100644 --- a/src/quantile/interpolate.rs +++ b/src/quantile/interpolate.rs @@ -1,7 +1,8 @@ //! Interpolation strategies. use ndarray::azip; use ndarray::prelude::*; -use num_traits::{FromPrimitive, ToPrimitive}; +use noisy_float::types::N64; +use num_traits::{Float, FromPrimitive, ToPrimitive}; use std::ops::{Add, Div}; /// Used to provide an interpolation strategy to [`quantile_axis_mut`]. @@ -9,30 +10,30 @@ use std::ops::{Add, Div}; /// [`quantile_axis_mut`]: ../trait.QuantileExt.html#tymethod.quantile_axis_mut pub trait Interpolate { #[doc(hidden)] - fn float_quantile_index(q: f64, len: usize) -> f64 { - ((len - 1) as f64) * q + fn float_quantile_index(q: N64, len: usize) -> N64 { + q * ((len - 1) as f64) } #[doc(hidden)] - fn lower_index(q: f64, len: usize) -> usize { - Self::float_quantile_index(q, len).floor() as usize + fn lower_index(q: N64, len: usize) -> usize { + Self::float_quantile_index(q, len).floor().to_usize().unwrap() } #[doc(hidden)] - fn higher_index(q: f64, len: usize) -> usize { - Self::float_quantile_index(q, len).ceil() as usize + fn higher_index(q: N64, len: usize) -> usize { + Self::float_quantile_index(q, len).ceil().to_usize().unwrap() } #[doc(hidden)] - fn float_quantile_index_fraction(q: f64, len: usize) -> f64 { + fn float_quantile_index_fraction(q: N64, len: usize) -> N64 { Self::float_quantile_index(q, len).fract() } #[doc(hidden)] - fn needs_lower(q: f64, len: usize) -> bool; + fn needs_lower(q: N64, len: usize) -> bool; #[doc(hidden)] - fn needs_higher(q: f64, len: usize) -> bool; + fn needs_higher(q: N64, len: usize) -> bool; #[doc(hidden)] fn interpolate( lower: Option>, higher: Option>, - q: f64, + q: N64, len: usize, ) -> Array where @@ -53,16 +54,16 @@ pub struct Midpoint; pub struct Linear; impl Interpolate for Higher { - fn needs_lower(_q: f64, _len: usize) -> bool { + fn needs_lower(_q: N64, _len: usize) -> bool { false } - fn needs_higher(_q: f64, _len: usize) -> bool { + fn needs_higher(_q: N64, _len: usize) -> bool { true } fn interpolate( _lower: Option>, higher: Option>, - _q: f64, + _q: N64, _len: usize, ) -> Array { higher.unwrap() @@ -70,16 +71,16 @@ impl Interpolate for Higher { } impl Interpolate for Lower { - fn needs_lower(_q: f64, _len: usize) -> bool { + fn needs_lower(_q: N64, _len: usize) -> bool { true } - fn needs_higher(_q: f64, _len: usize) -> bool { + fn needs_higher(_q: N64, _len: usize) -> bool { false } fn interpolate( lower: Option>, _higher: Option>, - _q: f64, + _q: N64, _len: usize, ) -> Array { lower.unwrap() @@ -87,16 +88,16 @@ impl Interpolate for Lower { } impl Interpolate for Nearest { - fn needs_lower(q: f64, len: usize) -> bool { + fn needs_lower(q: N64, len: usize) -> bool { >::float_quantile_index_fraction(q, len) < 0.5 } - fn needs_higher(q: f64, len: usize) -> bool { + fn needs_higher(q: N64, len: usize) -> bool { !>::needs_lower(q, len) } fn interpolate( lower: Option>, higher: Option>, - q: f64, + q: N64, len: usize, ) -> Array { if >::needs_lower(q, len) { @@ -111,16 +112,16 @@ impl Interpolate for Midpoint where T: Add + Div + Clone + FromPrimitive, { - fn needs_lower(_q: f64, _len: usize) -> bool { + fn needs_lower(_q: N64, _len: usize) -> bool { true } - fn needs_higher(_q: f64, _len: usize) -> bool { + fn needs_higher(_q: N64, _len: usize) -> bool { true } fn interpolate( lower: Option>, higher: Option>, - _q: f64, + _q: N64, _len: usize, ) -> Array where @@ -135,28 +136,28 @@ impl Interpolate for Linear where T: Add + Clone + FromPrimitive + ToPrimitive, { - fn needs_lower(_q: f64, _len: usize) -> bool { + fn needs_lower(_q: N64, _len: usize) -> bool { true } - fn needs_higher(_q: f64, _len: usize) -> bool { + fn needs_higher(_q: N64, _len: usize) -> bool { true } fn interpolate( lower: Option>, higher: Option>, - q: f64, + q: N64, len: usize, ) -> Array where D: Dimension, { - let fraction = >::float_quantile_index_fraction(q, len); + let fraction = >::float_quantile_index_fraction(q, len).to_f64().unwrap(); let mut a = lower.unwrap(); let b = higher.unwrap(); azip!(mut a, ref b in { let a_f64 = a.to_f64().unwrap(); let b_f64 = b.to_f64().unwrap(); - *a = a.clone() + T::from_f64((b_f64 - a_f64) * fraction).unwrap(); + *a = a.clone() + T::from_f64(fraction * (b_f64 - a_f64)).unwrap(); }); a } diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 856e4f44..c5c602f1 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,9 +1,10 @@ use self::interpolate::Interpolate; -use std::collections::BTreeSet; +use super::sort::sorted_get_many_mut_unchecked; +use std::cmp; +use std::collections::{HashMap, BTreeSet}; +use noisy_float::types::N64; use ndarray::prelude::*; use ndarray::{Data, DataMut, RemoveAxis}; -use std::cmp; -use super::sort::sorted_get_many_mut_unchecked; use {MaybeNan, MaybeNanExt}; /// Quantile methods for `ArrayBase`. @@ -158,14 +159,14 @@ where /// /// **Panics** if `axis` is out of bounds, if the axis has length 0, or if /// `q` is not between `0.` and `1.` (inclusive). - fn quantile_axis_mut(&mut self, axis: Axis, q: f64) -> Array + fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Array where D: RemoveAxis, A: Ord + Clone, S: DataMut, I: Interpolate; - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[f64]) -> Vec> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> HashMap> where D: RemoveAxis, A: Ord + Clone, @@ -175,7 +176,7 @@ where /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// /// See [`quantile_axis_mut`](##tymethod.quantile_axis_mut) for details. - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: f64) -> Array + fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Array where D: RemoveAxis, A: MaybeNan, @@ -273,16 +274,16 @@ where })) } - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[f64]) -> Vec> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> HashMap> where D: RemoveAxis, A: Ord + Clone, S: DataMut, I: Interpolate, { - assert!(qs.iter().all(|x| (0. <= *x) && (*x <= 1.))); + assert!(qs.iter().all(|x| (*x >= 0.) && (*x <= 1.))); - let mut deduped_qs: Vec = qs.to_vec(); + let mut deduped_qs: Vec = qs.to_vec(); deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); deduped_qs.dedup(); @@ -303,7 +304,7 @@ where |mut x| sorted_get_many_mut_unchecked(&mut x, &searched_indexes) ); - let mut results = vec![]; + let mut results = HashMap::new(); for q in qs { let result = I::interpolate( match I::needs_lower(*q, axis_len) { @@ -323,22 +324,22 @@ where *q, axis_len ); - results.push(result); + results.insert(*q, result); } results } - fn quantile_axis_mut(&mut self, axis: Axis, q: f64) -> Array + fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Array where D: RemoveAxis, A: Ord + Clone, S: DataMut, I: Interpolate, { - self.quantiles_axis_mut::(axis, &[q]).into_iter().next().unwrap() + self.quantiles_axis_mut::(axis, &[q]).into_iter().next().unwrap().1 } - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: f64) -> Array + fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Array where D: RemoveAxis, A: MaybeNan, @@ -396,13 +397,13 @@ where /// Returns `None` if the array is empty. /// /// **Panics** if `q` is not between `0.` and `1.` (inclusive). - fn quantile_mut(&mut self, q: f64) -> Option + fn quantile_mut(&mut self, q: N64) -> Option where A: Ord + Clone, S: DataMut, I: Interpolate; - fn quantiles_mut(&mut self, qs: &[f64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, S: DataMut, @@ -413,7 +414,7 @@ impl Quantile1dExt for ArrayBase where S: Data, { - fn quantile_mut(&mut self, q: f64) -> Option + fn quantile_mut(&mut self, q: N64) -> Option where A: Ord + Clone, S: DataMut, @@ -426,7 +427,7 @@ where } } - fn quantiles_mut(&mut self, qs: &[f64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, S: DataMut, @@ -438,7 +439,7 @@ where Some( self.quantiles_axis_mut::(Axis(0), qs) .into_iter() - .map(|x| x.into_scalar()) + .map(|x| (x.0, x.1.into_scalar())) .collect() ) } From 36284d2281d06df33b37e32f84087e0a7e630010 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:01:06 +0000 Subject: [PATCH 11/81] Fixed tests --- tests/quantile.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/quantile.rs b/tests/quantile.rs index 31b51907..67b7fee7 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -2,7 +2,9 @@ extern crate ndarray; extern crate ndarray_stats; extern crate quickcheck; +extern crate noisy_float; +use noisy_float::types::n64; use ndarray::prelude::*; use ndarray_stats::{ interpolate::{Higher, Linear, Lower, Midpoint, Nearest}, @@ -111,7 +113,7 @@ fn test_max_skipnan_all_nan() { #[test] fn test_quantile_axis_mut_with_odd_axis_length() { let mut a = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12]]); - let p = a.quantile_axis_mut::(Axis(0), 0.5); + let p = a.quantile_axis_mut::(Axis(0), n64(0.5)); assert!(p == a.index_axis(Axis(0), 1)); } @@ -119,41 +121,41 @@ fn test_quantile_axis_mut_with_odd_axis_length() { #[should_panic] fn test_quantile_axis_mut_with_zero_axis_length() { let mut a = Array2::::zeros((5, 0)); - a.quantile_axis_mut::(Axis(1), 0.5); + a.quantile_axis_mut::(Axis(1), n64(0.5)); } #[test] fn test_quantile_axis_mut_with_empty_array() { let mut a = Array2::::zeros((5, 0)); - let p = a.quantile_axis_mut::(Axis(0), 0.5); + let p = a.quantile_axis_mut::(Axis(0), n64(0.5)); assert_eq!(p.shape(), &[0]); } #[test] fn test_quantile_axis_mut_with_even_axis_length() { let mut b = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12], [4, 6, 7, 13]]); - let q = b.quantile_axis_mut::(Axis(0), 0.5); + let q = b.quantile_axis_mut::(Axis(0), n64(0.5)); assert!(q == b.index_axis(Axis(0), 1)); } #[test] fn test_quantile_axis_mut_to_get_minimum() { let mut b = arr2(&[[1, 3, 22, 10]]); - let q = b.quantile_axis_mut::(Axis(1), 0.); + let q = b.quantile_axis_mut::(Axis(1), n64(0.)); assert!(q == arr1(&[1])); } #[test] fn test_quantile_axis_mut_to_get_maximum() { let mut b = arr1(&[1, 3, 22, 10]); - let q = b.quantile_axis_mut::(Axis(0), 1.); + let q = b.quantile_axis_mut::(Axis(0), n64(1.)); assert!(q == arr0(22)); } #[test] fn test_quantile_axis_skipnan_mut_higher_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), 0.6); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); @@ -162,7 +164,7 @@ fn test_quantile_axis_skipnan_mut_higher_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), 0.6); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); @@ -171,7 +173,7 @@ fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), 0.6); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); @@ -180,7 +182,7 @@ fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_linear_f64() { let mut a = arr2(&[[1., 2., ::std::f64::NAN, 3.], [::std::f64::NAN; 4]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), 0.75); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)); assert_eq!(q.shape(), &[2]); assert!((q[0] - 2.5).abs() < 1e-12); assert!(q[1].is_nan()); @@ -189,7 +191,7 @@ fn test_quantile_axis_skipnan_mut_linear_f64() { #[test] fn test_quantile_axis_skipnan_mut_linear_opt_i32() { let mut a = arr2(&[[Some(2), Some(4), None, Some(1)], [None; 4]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), 0.75); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); From fc56ca48de699aa3cb3ebc4dee89286b40a88244 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:13:31 +0000 Subject: [PATCH 12/81] Use IndexSet to preserve insertion order --- Cargo.toml | 1 + src/lib.rs | 2 ++ src/quantile/mod.rs | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 57795ce8..f943708e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ noisy_float = "0.1.8" num-traits = "0.2" rand = "0.6" itertools = { version = "0.7.0", default-features = false } +indexmap = "1.0" [dev-dependencies] quickcheck = "0.7" diff --git a/src/lib.rs b/src/lib.rs index 9cf586f1..0af1bbb1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,8 @@ extern crate ndarray; extern crate noisy_float; extern crate num_traits; extern crate rand; +extern crate itertools; +extern crate indexmap; #[cfg(test)] extern crate approx; diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index c5c602f1..97fe3522 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,10 +1,11 @@ use self::interpolate::Interpolate; use super::sort::sorted_get_many_mut_unchecked; use std::cmp; -use std::collections::{HashMap, BTreeSet}; +use std::collections::HashMap; use noisy_float::types::N64; use ndarray::prelude::*; use ndarray::{Data, DataMut, RemoveAxis}; +use indexmap::IndexSet; use {MaybeNan, MaybeNanExt}; /// Quantile methods for `ArrayBase`. @@ -288,7 +289,10 @@ where deduped_qs.dedup(); let axis_len = self.len_of(axis); - let mut searched_indexes = BTreeSet::new(); + // IndexSet preserves insertion order: + // - indexes will stay sorted; + // - we avoid index duplication. + let mut searched_indexes = IndexSet::new(); for q in deduped_qs.iter() { if I::needs_lower(*q, axis_len) { searched_indexes.insert(I::lower_index(*q, axis_len)); From 67a4477ea575a032fc77fa38d6a8668925c7d0fb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:15:48 +0000 Subject: [PATCH 13/81] Fix indentation --- src/quantile/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 97fe3522..55f33654 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -304,8 +304,8 @@ where let searched_indexes: Vec = searched_indexes.into_iter().collect(); let values = self.map_axis_mut( - axis, - |mut x| sorted_get_many_mut_unchecked(&mut x, &searched_indexes) + axis, + |mut x| sorted_get_many_mut_unchecked(&mut x, &searched_indexes) ); let mut results = HashMap::new(); From ac0ca03b24e619a3ee53bb2d11fc8512dc2f5378 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:37:21 +0000 Subject: [PATCH 14/81] IndexMap provides a more intuitive behaviour --- Cargo.toml | 3 ++- src/quantile/mod.rs | 13 ++++++------- src/sort.rs | 10 +++++----- tests/sort.rs | 40 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f943708e..cf7f8615 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ itertools = { version = "0.7.0", default-features = false } indexmap = "1.0" [dev-dependencies] -quickcheck = "0.7" +quickcheck = { version = "0.8.1", default-features = false } ndarray-rand = "0.9" approx = "0.3" +quickcheck_macros = "0.8" diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 55f33654..8fc0b778 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,11 +1,10 @@ use self::interpolate::Interpolate; use super::sort::sorted_get_many_mut_unchecked; use std::cmp; -use std::collections::HashMap; use noisy_float::types::N64; use ndarray::prelude::*; use ndarray::{Data, DataMut, RemoveAxis}; -use indexmap::IndexSet; +use indexmap::{IndexSet, IndexMap}; use {MaybeNan, MaybeNanExt}; /// Quantile methods for `ArrayBase`. @@ -167,7 +166,7 @@ where S: DataMut, I: Interpolate; - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> HashMap> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> IndexMap> where D: RemoveAxis, A: Ord + Clone, @@ -275,7 +274,7 @@ where })) } - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> HashMap> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> IndexMap> where D: RemoveAxis, A: Ord + Clone, @@ -308,7 +307,7 @@ where |mut x| sorted_get_many_mut_unchecked(&mut x, &searched_indexes) ); - let mut results = HashMap::new(); + let mut results = IndexMap::new(); for q in qs { let result = I::interpolate( match I::needs_lower(*q, axis_len) { @@ -407,7 +406,7 @@ where S: DataMut, I: Interpolate; - fn quantiles_mut(&mut self, qs: &[N64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, S: DataMut, @@ -431,7 +430,7 @@ where } } - fn quantiles_mut(&mut self, qs: &[N64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, S: DataMut, diff --git a/src/sort.rs b/src/sort.rs index abe1bbe8..98168ce8 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -1,6 +1,6 @@ use ndarray::prelude::*; use ndarray::{s, Data, DataMut}; -use std::collections::HashMap; +use indexmap::IndexMap; use rand::prelude::*; use rand::thread_rng; @@ -33,7 +33,7 @@ where A: Ord + Clone, S: DataMut; - fn sorted_get_many_mut(&mut self, is: &[usize]) -> HashMap + fn sorted_get_many_mut(&mut self, is: &[usize]) -> IndexMap where A: Ord + Clone, S: DataMut; @@ -90,7 +90,7 @@ where } } - fn sorted_get_many_mut(&mut self, indexes: &[usize]) -> HashMap + fn sorted_get_many_mut(&mut self, indexes: &[usize]) -> IndexMap where A: Ord + Clone, S: DataMut, @@ -144,12 +144,12 @@ where } pub(crate) fn sorted_get_many_mut_unchecked( - array: &mut ArrayBase, indexes: &[usize]) -> HashMap + array: &mut ArrayBase, indexes: &[usize]) -> IndexMap where A: Ord + Clone, S: DataMut, { - let mut values = HashMap::new(); + let mut values = IndexMap::new(); let mut previous_index = 0; let mut search_space = array.view_mut(); diff --git a/tests/sort.rs b/tests/sort.rs index 3c2cab58..cf8600d7 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -1,8 +1,11 @@ extern crate ndarray; extern crate ndarray_stats; +extern crate quickcheck; +extern crate quickcheck_macros; use ndarray::prelude::*; use ndarray_stats::Sort1dExt; +use quickcheck_macros::quickcheck; #[test] fn test_partition_mut() { @@ -27,7 +30,7 @@ fn test_partition_mut() { for i in 0..partition_index { assert!(a[i] < pivot_value); } - assert!(a[partition_index] == pivot_value); + assert_eq!(a[partition_index], pivot_value); for j in (partition_index + 1)..n { assert!(pivot_value <= a[j]); } @@ -44,3 +47,38 @@ fn test_sorted_get_mut() { let j = a.clone().view_mut().sorted_get_mut(3); assert_eq!(j, 10); } + +#[quickcheck] +fn test_sorted_get_many_mut(mut xs: Vec) -> bool { + let n = xs.len(); + if n == 0 { + true + } else { + let mut v = Array::from_vec(xs.clone()); + let indexes: Vec = (0..n).into_iter().collect(); + let sorted_v: Vec = v.sorted_get_many_mut(&indexes) + .into_iter() + .map(|x| x.1) + .collect(); + xs.sort(); + // println!("xs: {:?}", xs); + // println!("sorted_v: {:?}", sorted_v); + xs == sorted_v + } +} + +#[quickcheck] +fn test_sorted_get_mut_as_sorting_algorithm(mut xs: Vec) -> bool { + let n = xs.len(); + if n == 0 { + true + } else { + let mut v = Array::from_vec(xs.clone()); + let mut sorted_v = vec![]; + for i in 0..n { + sorted_v.push(v.sorted_get_mut(i)) + } + xs.sort(); + xs == sorted_v + } +} From a4c150803c6327b0814a2a86e9f6e81cc735bab1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:37:46 +0000 Subject: [PATCH 15/81] Remove prints --- tests/sort.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/sort.rs b/tests/sort.rs index cf8600d7..b3f96777 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -61,8 +61,6 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { .map(|x| x.1) .collect(); xs.sort(); - // println!("xs: {:?}", xs); - // println!("sorted_v: {:?}", sorted_v); xs == sorted_v } } From aa3a157a51970b4320231f640f075bb46780fc16 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:47:07 +0000 Subject: [PATCH 16/81] Renamed methods --- src/sort.rs | 14 +++++++------- tests/sort.rs | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 98168ce8..07fd0b69 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -28,12 +28,12 @@ where /// where n is the number of elements in the array. /// /// **Panics** if `i` is greater than or equal to `n`. - fn sorted_get_mut(&mut self, i: usize) -> A + fn get_from_sorted_mut(&mut self, i: usize) -> A where A: Ord + Clone, S: DataMut; - fn sorted_get_many_mut(&mut self, is: &[usize]) -> IndexMap + fn get_many_from_sorted_mut(&mut self, is: &[usize]) -> IndexMap where A: Ord + Clone, S: DataMut; @@ -67,7 +67,7 @@ impl Sort1dExt for ArrayBase where S: Data, { - fn sorted_get_mut(&mut self, i: usize) -> A + fn get_from_sorted_mut(&mut self, i: usize) -> A where A: Ord + Clone, S: DataMut, @@ -80,17 +80,17 @@ where let pivot_index = rng.gen_range(0, n); let partition_index = self.partition_mut(pivot_index); if i < partition_index { - self.slice_mut(s![..partition_index]).sorted_get_mut(i) + self.slice_mut(s![..partition_index]).get_from_sorted_mut(i) } else if i == partition_index { self[i].clone() } else { self.slice_mut(s![partition_index + 1..]) - .sorted_get_mut(i - (partition_index + 1)) + .get_from_sorted_mut(i - (partition_index + 1)) } } } - fn sorted_get_many_mut(&mut self, indexes: &[usize]) -> IndexMap + fn get_many_from_sorted_mut(&mut self, indexes: &[usize]) -> IndexMap where A: Ord + Clone, S: DataMut, @@ -155,7 +155,7 @@ where let mut search_space = array.view_mut(); for index in indexes.into_iter() { let relative_index = index - previous_index; - let value = search_space.sorted_get_mut(relative_index); + let value = search_space.get_from_sorted_mut(relative_index); values.insert(*index, value); previous_index = *index; diff --git a/tests/sort.rs b/tests/sort.rs index b3f96777..685eb912 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -40,11 +40,11 @@ fn test_partition_mut() { #[test] fn test_sorted_get_mut() { let a = arr1(&[1, 3, 2, 10]); - let j = a.clone().view_mut().sorted_get_mut(2); + let j = a.clone().view_mut().get_from_sorted_mut(2); assert_eq!(j, 3); - let j = a.clone().view_mut().sorted_get_mut(1); + let j = a.clone().view_mut().get_from_sorted_mut(1); assert_eq!(j, 2); - let j = a.clone().view_mut().sorted_get_mut(3); + let j = a.clone().view_mut().get_from_sorted_mut(3); assert_eq!(j, 10); } @@ -56,7 +56,7 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { } else { let mut v = Array::from_vec(xs.clone()); let indexes: Vec = (0..n).into_iter().collect(); - let sorted_v: Vec = v.sorted_get_many_mut(&indexes) + let sorted_v: Vec = v.get_many_from_sorted_mut(&indexes) .into_iter() .map(|x| x.1) .collect(); @@ -74,7 +74,7 @@ fn test_sorted_get_mut_as_sorting_algorithm(mut xs: Vec) -> bool { let mut v = Array::from_vec(xs.clone()); let mut sorted_v = vec![]; for i in 0..n { - sorted_v.push(v.sorted_get_mut(i)) + sorted_v.push(v.get_from_sorted_mut(i)) } xs.sort(); xs == sorted_v From 2ea92332ec37943b07f9f26918b2c8c390e04905 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:54:36 +0000 Subject: [PATCH 17/81] Docs for get_many_from_sorted_mut --- src/sort.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/sort.rs b/src/sort.rs index 07fd0b69..fdddc08b 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -33,7 +33,18 @@ where A: Ord + Clone, S: DataMut; - fn get_many_from_sorted_mut(&mut self, is: &[usize]) -> IndexMap + /// A bulk version of [get_from_sorted_mut], optimized to retrieve multiple + /// indexes at once. + /// It returns an IndexMap, with indexes as keys and retrieved elements as + /// values. + /// The IndexMap is sorted with respect to indexes in increasing order: + /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). + /// + /// **Panics** if any element in `indexes` is greater than or equal to `n`, + /// where `n` is the length of the array.. + /// + /// [get_sorted_mut]:(##tymethod.get_from_sorted_mut) + fn get_many_from_sorted_mut(&mut self, indexes: &[usize]) -> IndexMap where A: Ord + Clone, S: DataMut; From 12a794460a6203adce7a6be3c31b0fbe3fb0137f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 16:59:19 +0000 Subject: [PATCH 18/81] Added docs for private free function --- src/quantile/mod.rs | 4 ++-- src/sort.rs | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 8fc0b778..60857183 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,5 +1,5 @@ use self::interpolate::Interpolate; -use super::sort::sorted_get_many_mut_unchecked; +use super::sort::get_many_from_sorted_mut_unchecked; use std::cmp; use noisy_float::types::N64; use ndarray::prelude::*; @@ -304,7 +304,7 @@ where let values = self.map_axis_mut( axis, - |mut x| sorted_get_many_mut_unchecked(&mut x, &searched_indexes) + |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) ); let mut results = IndexMap::new(); diff --git a/src/sort.rs b/src/sort.rs index fdddc08b..cbf42ccd 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -110,7 +110,7 @@ where deduped_indexes.sort_unstable(); deduped_indexes.dedup(); - sorted_get_many_mut_unchecked(self, &deduped_indexes) + get_many_from_sorted_mut_unchecked(self, &deduped_indexes) } @@ -154,7 +154,17 @@ where } } -pub(crate) fn sorted_get_many_mut_unchecked( +/// To retrieve multiple indexes from the sorted array in an optimized fashion, +/// [get_many_from_sorted_mut] first of all sorts the `indexes` vector. +/// +/// `get_many_from_sorted_mut_unchecked` does not perform this sorting, +/// assuming that the user has already taken care of it. +/// +/// Useful when you have to call [get_many_from_sorted_mut] multiple times +/// using the same indexes. +/// +/// [get_many_from_sorted_mut]:(##tymethod.get_many_from_sorted_mut) +pub(crate) fn get_many_from_sorted_mut_unchecked( array: &mut ArrayBase, indexes: &[usize]) -> IndexMap where A: Ord + Clone, From ac93a1e1278bb03b9efed27e25a48238e5f2ffba Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 17:09:11 +0000 Subject: [PATCH 19/81] Docs for quantiles_mut --- src/quantile/mod.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 60857183..f2e3fab1 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -406,6 +406,22 @@ where S: DataMut, I: Interpolate; + /// A bulk version of [quantile_mut], optimized to retrieve multiple + /// quantiles at once. + /// It returns an IndexMap, with (quantile index, quantile value) as + /// key-value pairs. + /// + /// The IndexMap is sorted with respect to quantile indexes in increasing order: + /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). + /// + /// It returns `None` if the array is empty. + /// + /// See [quantile_mut] for additional details on quantiles and the algorithm + /// used to retrieve them. + /// + /// **Panics** if any `q` in `qs` is not between `0.` and `1.` (inclusive). + /// + /// [quantile_mut]:(##tymethod.quantile_mut) fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, From c408c671917adb31465be7e54bbdea16005b4518 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 9 Feb 2019 17:21:41 +0000 Subject: [PATCH 20/81] Fixed several typos in docs --- src/quantile/mod.rs | 17 ++++++++++++++++- src/sort.rs | 4 ++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index f2e3fab1..8b5989ff 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -166,6 +166,21 @@ where S: DataMut, I: Interpolate; + /// A bulk version of [quantile_axis_mut], optimized to retrieve multiple + /// quantiles at once. + /// It returns an IndexMap, with (quantile index, quantile over axis) as + /// key-value pairs. + /// + /// The IndexMap is sorted with respect to quantile indexes in increasing order: + /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). + /// + /// See [quantile_axis_mut] for additional details on quantiles and the algorithm + /// used to retrieve them. + /// + /// **Panics** if `axis` is out of bounds, if the axis has length 0, or if + /// any `q` `qs` is not between `0.` and `1.` (inclusive). + /// + /// [quantile_axis_mut]: ##tymethod.quantile_axis_mut fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> IndexMap> where D: RemoveAxis, @@ -421,7 +436,7 @@ where /// /// **Panics** if any `q` in `qs` is not between `0.` and `1.` (inclusive). /// - /// [quantile_mut]:(##tymethod.quantile_mut) + /// [quantile_mut]: ##tymethod.quantile_mut fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, diff --git a/src/sort.rs b/src/sort.rs index cbf42ccd..f75862e5 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -43,7 +43,7 @@ where /// **Panics** if any element in `indexes` is greater than or equal to `n`, /// where `n` is the length of the array.. /// - /// [get_sorted_mut]:(##tymethod.get_from_sorted_mut) + /// [get_from_sorted_mut]: ##tymethod.get_from_sorted_mut fn get_many_from_sorted_mut(&mut self, indexes: &[usize]) -> IndexMap where A: Ord + Clone, @@ -163,7 +163,7 @@ where /// Useful when you have to call [get_many_from_sorted_mut] multiple times /// using the same indexes. /// -/// [get_many_from_sorted_mut]:(##tymethod.get_many_from_sorted_mut) +/// [get_many_from_sorted_mut]: ../trait.Sort1dExt.html#tymethod.get_many_from_sorted_mut pub(crate) fn get_many_from_sorted_mut_unchecked( array: &mut ArrayBase, indexes: &[usize]) -> IndexMap where From c4719550247750339b5618a9f4eec669b68031a7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 11 Feb 2019 07:45:44 +0000 Subject: [PATCH 21/81] More robust test --- tests/sort.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/sort.rs b/tests/sort.rs index 685eb912..30480f75 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -55,7 +55,11 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { true } else { let mut v = Array::from_vec(xs.clone()); - let indexes: Vec = (0..n).into_iter().collect(); + + // Insert each index twice, to get a set of indexes with duplicates, not sorted + let mut indexes: Vec = (0..n).into_iter().collect(); + indexes.append(&mut (0..n).into_iter().collect()); + let sorted_v: Vec = v.get_many_from_sorted_mut(&indexes) .into_iter() .map(|x| x.1) From 1411f1565c7296faf0a45741dc6e38e222b0c759 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 11 Feb 2019 08:08:25 +0000 Subject: [PATCH 22/81] Added test for quantiles --- src/lib.rs | 1 - tests/quantile.rs | 52 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0af1bbb1..03b4de24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,7 +30,6 @@ extern crate ndarray; extern crate noisy_float; extern crate num_traits; extern crate rand; -extern crate itertools; extern crate indexmap; #[cfg(test)] diff --git a/tests/quantile.rs b/tests/quantile.rs index 67b7fee7..77bf2a2d 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -1,16 +1,18 @@ -#[macro_use(array)] extern crate ndarray; extern crate ndarray_stats; -extern crate quickcheck; extern crate noisy_float; +#[macro_use] +extern crate quickcheck; +extern crate quickcheck_macros; -use noisy_float::types::n64; +use noisy_float::types::{n64, N64}; use ndarray::prelude::*; +use ndarray::array; use ndarray_stats::{ - interpolate::{Higher, Linear, Lower, Midpoint, Nearest}, + interpolate::{Interpolate, Higher, Linear, Lower, Midpoint, Nearest}, Quantile1dExt, QuantileExt, }; -use quickcheck::quickcheck; +use quickcheck_macros::quickcheck; #[test] fn test_argmin() { @@ -202,7 +204,45 @@ fn test_midpoint_overflow() { // Regression test // This triggered an overflow panic with a naive Midpoint implementation: (a+b)/2 let mut a: Array1 = array![129, 130, 130, 131]; - let median = a.quantile_mut::(0.5).unwrap(); + let median = a.quantile_mut::(n64(0.5)).unwrap(); let expected_median = 130; assert_eq!(median, expected_median); } + +#[quickcheck] +fn test_quantiles_mut(xs: Vec) -> bool { + let v = Array::from_vec(xs.clone()); + + // Unordered list of quantile indexes to look up, with a duplicate + let quantile_indexes = vec![ + n64(0.75), n64(0.90), n64(0.95), n64(0.99), n64(1.), + n64(0.), n64(0.25), n64(0.5), n64(0.5) + ]; + let mut checks = vec![]; + checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); + checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); + checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); + checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); + checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); + checks.into_iter().all(|x| x) +} + +fn check_one_interpolation_method_for_quantiles_mut>(mut v: Array1, quantile_indexes: &[N64]) -> bool +{ + let bulk_quantiles = v.quantiles_mut::(&quantile_indexes); + + if v.len() == 0 { + bulk_quantiles.is_none() + } else { + let bulk_quantiles = bulk_quantiles.unwrap(); + + let mut checks = vec![]; + for quantile_index in quantile_indexes.iter() { + let quantile = v.quantile_mut::(*quantile_index).unwrap(); + checks.push( + quantile == *bulk_quantiles.get(quantile_index).unwrap() + ); + } + checks.into_iter().all(|x| x) + } +} From 48f2bf00e59be9af9dfab3790c0f9d1a92a64b7c Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 11 Feb 2019 08:21:08 +0000 Subject: [PATCH 23/81] Test quantiles_axis_mut --- tests/quantile.rs | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/quantile.rs b/tests/quantile.rs index 77bf2a2d..d0c8fc4e 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -246,3 +246,66 @@ fn check_one_interpolation_method_for_quantiles_mut>(mut v: checks.into_iter().all(|x| x) } } + +#[quickcheck] +fn test_quantiles_axis_mut(xs: Vec) -> bool { + // We want a square matrix + let axis_length = (xs.len() as f64).sqrt().floor() as usize; + let xs = &xs[..axis_length.pow(2)]; + let m = Array::from_vec(xs.to_vec()) + .into_shape((axis_length, axis_length)) + .unwrap(); + + // Unordered list of quantile indexes to look up, with a duplicate + let quantile_indexes = vec![ + n64(0.75), n64(0.90), n64(0.95), n64(0.99), n64(1.), + n64(0.), n64(0.25), n64(0.5), n64(0.5) + ]; + + // Test out all interpolation methods + let mut checks = vec![]; + checks.push( + check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), &quantile_indexes, Axis(0) + ) + ); + checks.push( + check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), &quantile_indexes, Axis(0) + ) + ); + checks.push( + check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), &quantile_indexes, Axis(0) + ) + ); + checks.push( + check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), &quantile_indexes, Axis(0) + ) + ); + checks.push( + check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), &quantile_indexes, Axis(0) + ) + ); + checks.into_iter().all(|x| x) +} + +fn check_one_interpolation_method_for_quantiles_axis_mut>(mut v: Array2, quantile_indexes: &[N64], axis: Axis) -> bool +{ + let bulk_quantiles = v.quantiles_axis_mut::(axis, &quantile_indexes); + + if v.len() == 0 { + true + } else { + let mut checks = vec![]; + for quantile_index in quantile_indexes.iter() { + let quantile = v.quantile_axis_mut::(axis, *quantile_index); + checks.push( + quantile == *bulk_quantiles.get(quantile_index).unwrap() + ); + } + checks.into_iter().all(|x| x) + } +} From c27feb10bd4e38150ab519b406bacb0559f7407d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 11 Feb 2019 08:22:53 +0000 Subject: [PATCH 24/81] Add comments --- src/quantile/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 8b5989ff..2a33b552 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -317,11 +317,14 @@ where } let searched_indexes: Vec = searched_indexes.into_iter().collect(); + // Retrieve the values corresponding to each index for each slice along the specified axis let values = self.map_axis_mut( axis, |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) ); + // Combine the retrieved values according to specified interpolation strategy to + // get the desired quantiles let mut results = IndexMap::new(); for q in qs { let result = I::interpolate( From 00e14f7008a712accdb1539cc5c536bba9a4fdc2 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 11 Feb 2019 08:43:57 +0000 Subject: [PATCH 25/81] Return options when the lane we are computing against is empty --- src/quantile/mod.rs | 158 +++++++++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 77 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 2a33b552..a03eb48a 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -159,7 +159,7 @@ where /// /// **Panics** if `axis` is out of bounds, if the axis has length 0, or if /// `q` is not between `0.` and `1.` (inclusive). - fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Array + fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Option> where D: RemoveAxis, A: Ord + Clone, @@ -181,7 +181,7 @@ where /// any `q` `qs` is not between `0.` and `1.` (inclusive). /// /// [quantile_axis_mut]: ##tymethod.quantile_axis_mut - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> IndexMap> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option>> where D: RemoveAxis, A: Ord + Clone, @@ -191,7 +191,7 @@ where /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// /// See [`quantile_axis_mut`](##tymethod.quantile_axis_mut) for details. - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Array + fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Option> where D: RemoveAxis, A: MaybeNan, @@ -289,7 +289,7 @@ where })) } - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> IndexMap> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option>> where D: RemoveAxis, A: Ord + Clone, @@ -298,69 +298,75 @@ where { assert!(qs.iter().all(|x| (*x >= 0.) && (*x <= 1.))); - let mut deduped_qs: Vec = qs.to_vec(); - deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); - deduped_qs.dedup(); - let axis_len = self.len_of(axis); - // IndexSet preserves insertion order: - // - indexes will stay sorted; - // - we avoid index duplication. - let mut searched_indexes = IndexSet::new(); - for q in deduped_qs.iter() { - if I::needs_lower(*q, axis_len) { - searched_indexes.insert(I::lower_index(*q, axis_len)); - } - if I::needs_higher(*q, axis_len) { - searched_indexes.insert(I::higher_index(*q, axis_len)); + if axis_len == 0 { + None + } else { + let mut deduped_qs: Vec = qs.to_vec(); + deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); + deduped_qs.dedup(); + + // IndexSet preserves insertion order: + // - indexes will stay sorted; + // - we avoid index duplication. + let mut searched_indexes = IndexSet::new(); + for q in deduped_qs.iter() { + if I::needs_lower(*q, axis_len) { + searched_indexes.insert(I::lower_index(*q, axis_len)); + } + if I::needs_higher(*q, axis_len) { + searched_indexes.insert(I::higher_index(*q, axis_len)); + } } - } - let searched_indexes: Vec = searched_indexes.into_iter().collect(); + let searched_indexes: Vec = searched_indexes.into_iter().collect(); - // Retrieve the values corresponding to each index for each slice along the specified axis - let values = self.map_axis_mut( - axis, - |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) - ); + // Retrieve the values corresponding to each index for each slice along the specified axis + let values = self.map_axis_mut( + axis, + |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) + ); - // Combine the retrieved values according to specified interpolation strategy to - // get the desired quantiles - let mut results = IndexMap::new(); - for q in qs { - let result = I::interpolate( - match I::needs_lower(*q, axis_len) { - true => { - let lower_index = &I::lower_index(*q, axis_len); - Some(values.map(|x| x.get(lower_index).unwrap().clone())) + // Combine the retrieved values according to specified interpolation strategy to + // get the desired quantiles + let mut results = IndexMap::new(); + for q in qs { + let result = I::interpolate( + match I::needs_lower(*q, axis_len) { + true => { + let lower_index = &I::lower_index(*q, axis_len); + Some(values.map(|x| x.get(lower_index).unwrap().clone())) + }, + false => None, }, - false => None, - }, - match I::needs_higher(*q, axis_len) { - true => { - let higher_index = &I::higher_index(*q, axis_len); - Some(values.map(|x| x.get(higher_index).unwrap().clone())) + match I::needs_higher(*q, axis_len) { + true => { + let higher_index = &I::higher_index(*q, axis_len); + Some(values.map(|x| x.get(higher_index).unwrap().clone())) + }, + false => None, }, - false => None, - }, - *q, - axis_len - ); - results.insert(*q, result); + *q, + axis_len + ); + results.insert(*q, result); + } + Some(results) } - results } - fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Array + fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Option> where D: RemoveAxis, A: Ord + Clone, S: DataMut, I: Interpolate, { - self.quantiles_axis_mut::(axis, &[q]).into_iter().next().unwrap().1 + self.quantiles_axis_mut::(axis, &[q]).map( + |x| x.into_iter().next().unwrap().1 + ) } - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Array + fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Option> where D: RemoveAxis, A: MaybeNan, @@ -368,19 +374,26 @@ where S: DataMut, I: Interpolate, { - self.map_axis_mut(axis, |lane| { - let mut not_nan = A::remove_nan_mut(lane); - A::from_not_nan_opt(if not_nan.is_empty() { - None - } else { - Some( - not_nan - .quantile_axis_mut::(Axis(0), q) - .into_raw_vec() - .remove(0), - ) - }) - }) + if self.len_of(axis) > 0 { + Some( + self.map_axis_mut(axis, |lane| { + let mut not_nan = A::remove_nan_mut(lane); + A::from_not_nan_opt(if not_nan.is_empty() { + None + } else { + Some( + not_nan + .quantile_axis_mut::(Axis(0), q) + .unwrap() + .into_raw_vec() + .remove(0), + ) + }) + }) + ) + } else { + None + } } } @@ -457,11 +470,7 @@ where S: DataMut, I: Interpolate, { - if self.is_empty() { - None - } else { - Some(self.quantile_axis_mut::(Axis(0), q).into_scalar()) - } + self.quantile_axis_mut::(Axis(0), q).map(|v| v.into_scalar()) } fn quantiles_mut(&mut self, qs: &[N64]) -> Option> @@ -470,16 +479,11 @@ where S: DataMut, I: Interpolate, { - if self.is_empty() { - None - } else { - Some( - self.quantiles_axis_mut::(Axis(0), qs) - .into_iter() - .map(|x| (x.0, x.1.into_scalar())) - .collect() + self.quantiles_axis_mut::(Axis(0), qs).map( + |v| v.into_iter() + .map(|x| (x.0, x.1.into_scalar())) + .collect() ) - } } } From 846c33689727e212ac88c523037573062ae865b5 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 11 Feb 2019 08:45:00 +0000 Subject: [PATCH 26/81] Fixed docs --- src/quantile/mod.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index a03eb48a..69e86e71 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -157,7 +157,9 @@ where /// - worst case: O(`m`^2); /// where `m` is the number of elements in the array. /// - /// **Panics** if `axis` is out of bounds, if the axis has length 0, or if + /// Returns `None` when the specified axis has length 0. + /// + /// **Panics** if `axis` is out of bounds or if /// `q` is not between `0.` and `1.` (inclusive). fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Option> where @@ -177,7 +179,9 @@ where /// See [quantile_axis_mut] for additional details on quantiles and the algorithm /// used to retrieve them. /// - /// **Panics** if `axis` is out of bounds, if the axis has length 0, or if + /// Returns `None` when the specified axis has length 0. + /// + /// **Panics** if `axis` is out of bounds or if /// any `q` `qs` is not between `0.` and `1.` (inclusive). /// /// [quantile_axis_mut]: ##tymethod.quantile_axis_mut From ab8d701311440488142c7d031d2de926032c6023 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 11 Feb 2019 08:48:34 +0000 Subject: [PATCH 27/81] Fixed tests --- tests/quantile.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/quantile.rs b/tests/quantile.rs index d0c8fc4e..40235554 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -115,49 +115,48 @@ fn test_max_skipnan_all_nan() { #[test] fn test_quantile_axis_mut_with_odd_axis_length() { let mut a = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12]]); - let p = a.quantile_axis_mut::(Axis(0), n64(0.5)); + let p = a.quantile_axis_mut::(Axis(0), n64(0.5)).unwrap(); assert!(p == a.index_axis(Axis(0), 1)); } #[test] -#[should_panic] fn test_quantile_axis_mut_with_zero_axis_length() { let mut a = Array2::::zeros((5, 0)); - a.quantile_axis_mut::(Axis(1), n64(0.5)); + assert!(a.quantile_axis_mut::(Axis(1), n64(0.5)).is_none()); } #[test] fn test_quantile_axis_mut_with_empty_array() { let mut a = Array2::::zeros((5, 0)); - let p = a.quantile_axis_mut::(Axis(0), n64(0.5)); + let p = a.quantile_axis_mut::(Axis(0), n64(0.5)).unwrap(); assert_eq!(p.shape(), &[0]); } #[test] fn test_quantile_axis_mut_with_even_axis_length() { let mut b = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12], [4, 6, 7, 13]]); - let q = b.quantile_axis_mut::(Axis(0), n64(0.5)); + let q = b.quantile_axis_mut::(Axis(0), n64(0.5)).unwrap(); assert!(q == b.index_axis(Axis(0), 1)); } #[test] fn test_quantile_axis_mut_to_get_minimum() { let mut b = arr2(&[[1, 3, 22, 10]]); - let q = b.quantile_axis_mut::(Axis(1), n64(0.)); + let q = b.quantile_axis_mut::(Axis(1), n64(0.)).unwrap(); assert!(q == arr1(&[1])); } #[test] fn test_quantile_axis_mut_to_get_maximum() { let mut b = arr1(&[1, 3, 22, 10]); - let q = b.quantile_axis_mut::(Axis(0), n64(1.)); + let q = b.quantile_axis_mut::(Axis(0), n64(1.)).unwrap(); assert!(q == arr0(22)); } #[test] fn test_quantile_axis_skipnan_mut_higher_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)).unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); @@ -166,7 +165,7 @@ fn test_quantile_axis_skipnan_mut_higher_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)).unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); @@ -175,7 +174,7 @@ fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)).unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); @@ -184,7 +183,7 @@ fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_linear_f64() { let mut a = arr2(&[[1., 2., ::std::f64::NAN, 3.], [::std::f64::NAN; 4]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)).unwrap(); assert_eq!(q.shape(), &[2]); assert!((q[0] - 2.5).abs() < 1e-12); assert!(q[1].is_nan()); @@ -193,7 +192,7 @@ fn test_quantile_axis_skipnan_mut_linear_f64() { #[test] fn test_quantile_axis_skipnan_mut_linear_opt_i32() { let mut a = arr2(&[[Some(2), Some(4), None, Some(1)], [None; 4]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)); + let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)).unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); @@ -297,11 +296,12 @@ fn check_one_interpolation_method_for_quantiles_axis_mut>(mu let bulk_quantiles = v.quantiles_axis_mut::(axis, &quantile_indexes); if v.len() == 0 { - true + bulk_quantiles.is_none() } else { + let bulk_quantiles = bulk_quantiles.unwrap(); let mut checks = vec![]; for quantile_index in quantile_indexes.iter() { - let quantile = v.quantile_axis_mut::(axis, *quantile_index); + let quantile = v.quantile_axis_mut::(axis, *quantile_index).unwrap(); checks.push( quantile == *bulk_quantiles.get(quantile_index).unwrap() ); From 8b3834566990227cb3596610825e2aa59bc99db0 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 9 Mar 2019 16:45:06 -0500 Subject: [PATCH 28/81] Move *index* functions out of Interpolate trait The behavior of these functions should be independent of the interpolation strategy. --- src/quantile/interpolate.rs | 52 ++++++++++++++++++++++++------------- src/quantile/mod.rs | 10 +++---- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs index f0fd1f6a..702e39a6 100644 --- a/src/quantile/interpolate.rs +++ b/src/quantile/interpolate.rs @@ -5,30 +5,46 @@ use noisy_float::types::N64; use num_traits::{Float, FromPrimitive, ToPrimitive}; use std::ops::{Add, Div}; +fn float_quantile_index(q: N64, len: usize) -> N64 { + q * ((len - 1) as f64) +} + +/// Returns the fraction that the quantile is between the lower and higher indices. +/// +/// This ranges from 0, where the quantile exactly corresponds the lower index, +/// to 1, where the quantile exactly corresponds to the higher index. +fn float_quantile_index_fraction(q: N64, len: usize) -> N64 { + float_quantile_index(q, len).fract() +} + +/// Returns the index of the value on the lower side of the quantile. +pub(crate) fn lower_index(q: N64, len: usize) -> usize { + float_quantile_index(q, len).floor().to_usize().unwrap() +} + +/// Returns the index of the value on the higher side of the quantile. +pub(crate) fn higher_index(q: N64, len: usize) -> usize { + float_quantile_index(q, len).ceil().to_usize().unwrap() +} + /// Used to provide an interpolation strategy to [`quantile_axis_mut`]. /// /// [`quantile_axis_mut`]: ../trait.QuantileExt.html#tymethod.quantile_axis_mut pub trait Interpolate { - #[doc(hidden)] - fn float_quantile_index(q: N64, len: usize) -> N64 { - q * ((len - 1) as f64) - } - #[doc(hidden)] - fn lower_index(q: N64, len: usize) -> usize { - Self::float_quantile_index(q, len).floor().to_usize().unwrap() - } - #[doc(hidden)] - fn higher_index(q: N64, len: usize) -> usize { - Self::float_quantile_index(q, len).ceil().to_usize().unwrap() - } - #[doc(hidden)] - fn float_quantile_index_fraction(q: N64, len: usize) -> N64 { - Self::float_quantile_index(q, len).fract() - } + /// Returns `true` iff the lower value is needed to compute the + /// interpolated value. #[doc(hidden)] fn needs_lower(q: N64, len: usize) -> bool; + + /// Returns `true` iff the higher value is needed to compute the + /// interpolated value. #[doc(hidden)] fn needs_higher(q: N64, len: usize) -> bool; + + /// Computes the interpolated value. + /// + /// **Panics** if `None` is provided for the lower value when it's needed + /// or if `None` is provided for the higher value when it's needed. #[doc(hidden)] fn interpolate( lower: Option>, @@ -89,7 +105,7 @@ impl Interpolate for Lower { impl Interpolate for Nearest { fn needs_lower(q: N64, len: usize) -> bool { - >::float_quantile_index_fraction(q, len) < 0.5 + float_quantile_index_fraction(q, len) < 0.5 } fn needs_higher(q: N64, len: usize) -> bool { !>::needs_lower(q, len) @@ -151,7 +167,7 @@ impl Interpolate for Linear where D: Dimension, { - let fraction = >::float_quantile_index_fraction(q, len).to_f64().unwrap(); + let fraction = float_quantile_index_fraction(q, len).to_f64().unwrap(); let mut a = lower.unwrap(); let b = higher.unwrap(); azip!(mut a, ref b in { diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 69e86e71..1c51d8d5 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,4 +1,4 @@ -use self::interpolate::Interpolate; +use self::interpolate::{higher_index, lower_index, Interpolate}; use super::sort::get_many_from_sorted_mut_unchecked; use std::cmp; use noisy_float::types::N64; @@ -316,10 +316,10 @@ where let mut searched_indexes = IndexSet::new(); for q in deduped_qs.iter() { if I::needs_lower(*q, axis_len) { - searched_indexes.insert(I::lower_index(*q, axis_len)); + searched_indexes.insert(lower_index(*q, axis_len)); } if I::needs_higher(*q, axis_len) { - searched_indexes.insert(I::higher_index(*q, axis_len)); + searched_indexes.insert(higher_index(*q, axis_len)); } } let searched_indexes: Vec = searched_indexes.into_iter().collect(); @@ -337,14 +337,14 @@ where let result = I::interpolate( match I::needs_lower(*q, axis_len) { true => { - let lower_index = &I::lower_index(*q, axis_len); + let lower_index = &lower_index(*q, axis_len); Some(values.map(|x| x.get(lower_index).unwrap().clone())) }, false => None, }, match I::needs_higher(*q, axis_len) { true => { - let higher_index = &I::higher_index(*q, axis_len); + let higher_index = &higher_index(*q, axis_len); Some(values.map(|x| x.get(higher_index).unwrap().clone())) }, false => None, From 57715146e965998302967032556e533aee7e0e04 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 9 Mar 2019 17:03:47 -0500 Subject: [PATCH 29/81] Reduce indentation in quantiles_axis_mut --- src/quantile/mod.rs | 92 ++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 1c51d8d5..1d7c8750 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -304,58 +304,58 @@ where let axis_len = self.len_of(axis); if axis_len == 0 { - None - } else { - let mut deduped_qs: Vec = qs.to_vec(); - deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); - deduped_qs.dedup(); - - // IndexSet preserves insertion order: - // - indexes will stay sorted; - // - we avoid index duplication. - let mut searched_indexes = IndexSet::new(); - for q in deduped_qs.iter() { - if I::needs_lower(*q, axis_len) { - searched_indexes.insert(lower_index(*q, axis_len)); - } - if I::needs_higher(*q, axis_len) { - searched_indexes.insert(higher_index(*q, axis_len)); - } + return None; + } + + let mut deduped_qs: Vec = qs.to_vec(); + deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); + deduped_qs.dedup(); + + // IndexSet preserves insertion order: + // - indexes will stay sorted; + // - we avoid index duplication. + let mut searched_indexes = IndexSet::new(); + for q in deduped_qs.iter() { + if I::needs_lower(*q, axis_len) { + searched_indexes.insert(lower_index(*q, axis_len)); + } + if I::needs_higher(*q, axis_len) { + searched_indexes.insert(higher_index(*q, axis_len)); } - let searched_indexes: Vec = searched_indexes.into_iter().collect(); + } + let searched_indexes: Vec = searched_indexes.into_iter().collect(); - // Retrieve the values corresponding to each index for each slice along the specified axis - let values = self.map_axis_mut( - axis, - |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) - ); + // Retrieve the values corresponding to each index for each slice along the specified axis + let values = self.map_axis_mut( + axis, + |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) + ); - // Combine the retrieved values according to specified interpolation strategy to - // get the desired quantiles - let mut results = IndexMap::new(); - for q in qs { - let result = I::interpolate( - match I::needs_lower(*q, axis_len) { - true => { - let lower_index = &lower_index(*q, axis_len); - Some(values.map(|x| x.get(lower_index).unwrap().clone())) - }, - false => None, + // Combine the retrieved values according to specified interpolation strategy to + // get the desired quantiles + let mut results = IndexMap::new(); + for q in qs { + let result = I::interpolate( + match I::needs_lower(*q, axis_len) { + true => { + let lower_index = &lower_index(*q, axis_len); + Some(values.map(|x| x.get(lower_index).unwrap().clone())) }, - match I::needs_higher(*q, axis_len) { - true => { - let higher_index = &higher_index(*q, axis_len); - Some(values.map(|x| x.get(higher_index).unwrap().clone())) - }, - false => None, + false => None, + }, + match I::needs_higher(*q, axis_len) { + true => { + let higher_index = &higher_index(*q, axis_len); + Some(values.map(|x| x.get(higher_index).unwrap().clone())) }, - *q, - axis_len - ); - results.insert(*q, result); - } - Some(results) + false => None, + }, + *q, + axis_len + ); + results.insert(*q, result); } + Some(results) } fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Option> From e0eb6864b2e856b438634f5eb8f617c2683eb537 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 9 Mar 2019 17:09:52 -0500 Subject: [PATCH 30/81] Reduce indentation in quantile_axis_skipnan_mut --- src/quantile/mod.rs | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 1d7c8750..52fc7077 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -378,26 +378,24 @@ where S: DataMut, I: Interpolate, { - if self.len_of(axis) > 0 { - Some( - self.map_axis_mut(axis, |lane| { - let mut not_nan = A::remove_nan_mut(lane); - A::from_not_nan_opt(if not_nan.is_empty() { - None - } else { - Some( - not_nan - .quantile_axis_mut::(Axis(0), q) - .unwrap() - .into_raw_vec() - .remove(0), - ) - }) - }) - ) - } else { - None + if self.len_of(axis) == 0 { + return None; } + let quantile = self.map_axis_mut(axis, |lane| { + let mut not_nan = A::remove_nan_mut(lane); + A::from_not_nan_opt(if not_nan.is_empty() { + None + } else { + Some( + not_nan + .quantile_axis_mut::(Axis(0), q) + .unwrap() + .into_raw_vec() + .remove(0), + ) + }) + }); + Some(quantile) } } From 6c511451e5ad4023707ef12534c8ce4d4e539f92 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 9 Mar 2019 17:11:43 -0500 Subject: [PATCH 31/81] Use .into_scalar() method --- src/quantile/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 52fc7077..17ecb00b 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -390,8 +390,7 @@ where not_nan .quantile_axis_mut::(Axis(0), q) .unwrap() - .into_raw_vec() - .remove(0), + .into_scalar(), ) }) }); From 30c3466afa879b8dc024d64d45f2d1f357c1e587 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 9 Mar 2019 17:31:40 -0500 Subject: [PATCH 32/81] Improve docs of partition_mut --- src/sort.rs | 48 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index f75862e5..a9122652 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -49,15 +49,16 @@ where A: Ord + Clone, S: DataMut; - /// Return the index of `self[partition_index]` if `self` were to be sorted - /// in increasing order. + /// Partitions the array in increasing order based on the value initially + /// located at `pivot_index` and returns the new index of the value. /// - /// `self` elements are rearranged in such a way that `self[partition_index]` - /// is in the position it would be in an array sorted in increasing order. - /// All elements smaller than `self[partition_index]` are moved to its - /// left and all elements equal or greater than `self[partition_index]` - /// are moved to its right. - /// The ordering of the elements in the two partitions is undefined. + /// The elements are rearranged in such a way that the value initially + /// located at `pivot_index` is moved to the position it would be in an + /// array sorted in increasing order. The return value is the new index of + /// the value after rearrangement. All elements smaller than the value are + /// moved to its left and all elements equal or greater than the value are + /// moved to its right. The ordering of the elements in the two partitions + /// is undefined. /// /// `self` is shuffled **in place** to operate the desired partition: /// no copy of the array is allocated. @@ -67,7 +68,36 @@ where /// Average number of element swaps: n/6 - 1/3 (see /// [link](https://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto/11550)) /// - /// **Panics** if `partition_index` is greater than or equal to `n`. + /// **Panics** if `pivot_index` is greater than or equal to `n`. + /// + /// # Example + /// + /// ``` + /// extern crate ndarray; + /// extern crate ndarray_stats; + /// + /// use ndarray::array; + /// use ndarray_stats::Sort1dExt; + /// + /// # fn main() { + /// let mut data = array![3, 1, 4, 5, 2]; + /// let pivot_index = 2; + /// let pivot_value = data[pivot_index]; + /// + /// // Partition by the value located at `pivot_index`. + /// let new_index = data.partition_mut(pivot_index); + /// // The pivot value is now located at `new_index`. + /// assert_eq!(data[new_index], pivot_value); + /// // Elements less than that value are moved to the left. + /// for i in 0..new_index { + /// assert!(data[i] < pivot_value); + /// } + /// // Elements greater than or equal to that value are moved to the right. + /// for i in (new_index + 1)..data.len() { + /// assert!(data[i] >= pivot_value); + /// } + /// # } + /// ``` fn partition_mut(&mut self, pivot_index: usize) -> usize where A: Ord + Clone, From dca9c7bf3c287c6c827f905b1e9956909ff91299 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sat, 9 Mar 2019 17:34:52 -0500 Subject: [PATCH 33/81] Reformat quantiles_axis_mut --- src/quantile/mod.rs | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 17ecb00b..d7381c61 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -335,25 +335,18 @@ where // get the desired quantiles let mut results = IndexMap::new(); for q in qs { - let result = I::interpolate( - match I::needs_lower(*q, axis_len) { - true => { - let lower_index = &lower_index(*q, axis_len); - Some(values.map(|x| x.get(lower_index).unwrap().clone())) - }, - false => None, - }, - match I::needs_higher(*q, axis_len) { - true => { - let higher_index = &higher_index(*q, axis_len); - Some(values.map(|x| x.get(higher_index).unwrap().clone())) - }, - false => None, - }, - *q, - axis_len - ); - results.insert(*q, result); + let lower = if I::needs_lower(*q, axis_len) { + Some(values.map(|x| x[&lower_index(*q, axis_len)].clone())) + } else { + None + }; + let higher = if I::needs_higher(*q, axis_len) { + Some(values.map(|x| x[&higher_index(*q, axis_len)].clone())) + } else { + None + }; + let interpolated = I::interpolate(lower, higher, *q, axis_len); + results.insert(*q, interpolated); } Some(results) } From 92f08a478098a9d9f8f99bc5bd4a538acf729aba Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 10 Mar 2019 10:57:01 +0000 Subject: [PATCH 34/81] Cargo fmt --- src/histogram/strategies.rs | 2 +- src/lib.rs | 2 +- src/quantile/interpolate.rs | 20 +++--- src/quantile/mod.rs | 69 ++++++++++--------- src/sort.rs | 10 +-- tests/quantile.rs | 129 ++++++++++++++++++++++-------------- tests/sort.rs | 9 +-- 7 files changed, 140 insertions(+), 101 deletions(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 93e35511..9f933df0 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -23,8 +23,8 @@ use super::super::{Quantile1dExt, QuantileExt}; use super::{Bins, Edges}; use ndarray::prelude::*; use ndarray::Data; -use num_traits::{FromPrimitive, NumOps, Zero}; use noisy_float::types::n64; +use num_traits::{FromPrimitive, NumOps, Zero}; /// A trait implemented by all strategies to build [`Bins`] /// with parameters inferred from observations. diff --git a/src/lib.rs b/src/lib.rs index 03b4de24..14bbb3a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,12 +25,12 @@ //! [`NumPy`]: https://docs.scipy.org/doc/numpy-1.14.1/reference/routines.statistics.html //! [`StatsBase.jl`]: https://juliastats.github.io/StatsBase.jl/latest/ +extern crate indexmap; extern crate itertools; extern crate ndarray; extern crate noisy_float; extern crate num_traits; extern crate rand; -extern crate indexmap; #[cfg(test)] extern crate approx; diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs index 702e39a6..26fb18b1 100644 --- a/src/quantile/interpolate.rs +++ b/src/quantile/interpolate.rs @@ -52,8 +52,8 @@ pub trait Interpolate { q: N64, len: usize, ) -> Array - where - D: Dimension; + where + D: Dimension; } /// Select the higher value. @@ -125,8 +125,8 @@ impl Interpolate for Nearest { } impl Interpolate for Midpoint - where - T: Add + Div + Clone + FromPrimitive, +where + T: Add + Div + Clone + FromPrimitive, { fn needs_lower(_q: N64, _len: usize) -> bool { true @@ -140,8 +140,8 @@ impl Interpolate for Midpoint _q: N64, _len: usize, ) -> Array - where - D: Dimension, + where + D: Dimension, { let denom = T::from_u8(2).unwrap(); (lower.unwrap() + higher.unwrap()).mapv_into(|x| x / denom.clone()) @@ -149,8 +149,8 @@ impl Interpolate for Midpoint } impl Interpolate for Linear - where - T: Add + Clone + FromPrimitive + ToPrimitive, +where + T: Add + Clone + FromPrimitive + ToPrimitive, { fn needs_lower(_q: N64, _len: usize) -> bool { true @@ -164,8 +164,8 @@ impl Interpolate for Linear q: N64, len: usize, ) -> Array - where - D: Dimension, + where + D: Dimension, { let fraction = float_quantile_index_fraction(q, len).to_f64().unwrap(); let mut a = lower.unwrap(); diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index d7381c61..6f8a7067 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,10 +1,10 @@ use self::interpolate::{higher_index, lower_index, Interpolate}; use super::sort::get_many_from_sorted_mut_unchecked; -use std::cmp; -use noisy_float::types::N64; +use indexmap::{IndexMap, IndexSet}; use ndarray::prelude::*; use ndarray::{Data, DataMut, RemoveAxis}; -use indexmap::{IndexSet, IndexMap}; +use noisy_float::types::N64; +use std::cmp; use {MaybeNan, MaybeNanExt}; /// Quantile methods for `ArrayBase`. @@ -185,12 +185,16 @@ where /// any `q` `qs` is not between `0.` and `1.` (inclusive). /// /// [quantile_axis_mut]: ##tymethod.quantile_axis_mut - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option>> - where - D: RemoveAxis, - A: Ord + Clone, - S: DataMut, - I: Interpolate; + fn quantiles_axis_mut( + &mut self, + axis: Axis, + qs: &[N64], + ) -> Option>> + where + D: RemoveAxis, + A: Ord + Clone, + S: DataMut, + I: Interpolate; /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// @@ -293,12 +297,16 @@ where })) } - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option>> - where - D: RemoveAxis, - A: Ord + Clone, - S: DataMut, - I: Interpolate, + fn quantiles_axis_mut( + &mut self, + axis: Axis, + qs: &[N64], + ) -> Option>> + where + D: RemoveAxis, + A: Ord + Clone, + S: DataMut, + I: Interpolate, { assert!(qs.iter().all(|x| (*x >= 0.) && (*x <= 1.))); @@ -358,9 +366,8 @@ where S: DataMut, I: Interpolate, { - self.quantiles_axis_mut::(axis, &[q]).map( - |x| x.into_iter().next().unwrap().1 - ) + self.quantiles_axis_mut::(axis, &[q]) + .map(|x| x.into_iter().next().unwrap().1) } fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Option> @@ -448,10 +455,10 @@ where /// /// [quantile_mut]: ##tymethod.quantile_mut fn quantiles_mut(&mut self, qs: &[N64]) -> Option> - where - A: Ord + Clone, - S: DataMut, - I: Interpolate; + where + A: Ord + Clone, + S: DataMut, + I: Interpolate; } impl Quantile1dExt for ArrayBase @@ -464,20 +471,18 @@ where S: DataMut, I: Interpolate, { - self.quantile_axis_mut::(Axis(0), q).map(|v| v.into_scalar()) + self.quantile_axis_mut::(Axis(0), q) + .map(|v| v.into_scalar()) } fn quantiles_mut(&mut self, qs: &[N64]) -> Option> - where - A: Ord + Clone, - S: DataMut, - I: Interpolate, + where + A: Ord + Clone, + S: DataMut, + I: Interpolate, { - self.quantiles_axis_mut::(Axis(0), qs).map( - |v| v.into_iter() - .map(|x| (x.0, x.1.into_scalar())) - .collect() - ) + self.quantiles_axis_mut::(Axis(0), qs) + .map(|v| v.into_iter().map(|x| (x.0, x.1.into_scalar())).collect()) } } diff --git a/src/sort.rs b/src/sort.rs index a9122652..6ef2f86a 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -1,6 +1,6 @@ +use indexmap::IndexMap; use ndarray::prelude::*; use ndarray::{s, Data, DataMut}; -use indexmap::IndexMap; use rand::prelude::*; use rand::thread_rng; @@ -143,8 +143,6 @@ where get_many_from_sorted_mut_unchecked(self, &deduped_indexes) } - - fn partition_mut(&mut self, pivot_index: usize) -> usize where A: Ord + Clone, @@ -195,10 +193,12 @@ where /// /// [get_many_from_sorted_mut]: ../trait.Sort1dExt.html#tymethod.get_many_from_sorted_mut pub(crate) fn get_many_from_sorted_mut_unchecked( - array: &mut ArrayBase, indexes: &[usize]) -> IndexMap + array: &mut ArrayBase, + indexes: &[usize], +) -> IndexMap where A: Ord + Clone, - S: DataMut, + S: DataMut, { let mut values = IndexMap::new(); diff --git a/tests/quantile.rs b/tests/quantile.rs index 40235554..83827764 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -5,13 +5,13 @@ extern crate noisy_float; extern crate quickcheck; extern crate quickcheck_macros; -use noisy_float::types::{n64, N64}; -use ndarray::prelude::*; use ndarray::array; +use ndarray::prelude::*; use ndarray_stats::{ interpolate::{Interpolate, Higher, Linear, Lower, Midpoint, Nearest}, Quantile1dExt, QuantileExt, }; +use noisy_float::types::{n64, N64}; use quickcheck_macros::quickcheck; #[test] @@ -156,7 +156,9 @@ fn test_quantile_axis_mut_to_get_maximum() { #[test] fn test_quantile_axis_skipnan_mut_higher_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)).unwrap(); + let q = a + .quantile_axis_skipnan_mut::(Axis(1), n64(0.6)) + .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); @@ -165,7 +167,9 @@ fn test_quantile_axis_skipnan_mut_higher_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)).unwrap(); + let q = a + .quantile_axis_skipnan_mut::(Axis(1), n64(0.6)) + .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); assert!(q[1].is_none()); @@ -174,7 +178,9 @@ fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.6)).unwrap(); + let q = a + .quantile_axis_skipnan_mut::(Axis(1), n64(0.6)) + .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); @@ -183,7 +189,9 @@ fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { #[test] fn test_quantile_axis_skipnan_mut_linear_f64() { let mut a = arr2(&[[1., 2., ::std::f64::NAN, 3.], [::std::f64::NAN; 4]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)).unwrap(); + let q = a + .quantile_axis_skipnan_mut::(Axis(1), n64(0.75)) + .unwrap(); assert_eq!(q.shape(), &[2]); assert!((q[0] - 2.5).abs() < 1e-12); assert!(q[1].is_nan()); @@ -192,7 +200,9 @@ fn test_quantile_axis_skipnan_mut_linear_f64() { #[test] fn test_quantile_axis_skipnan_mut_linear_opt_i32() { let mut a = arr2(&[[Some(2), Some(4), None, Some(1)], [None; 4]]); - let q = a.quantile_axis_skipnan_mut::(Axis(1), n64(0.75)).unwrap(); + let q = a + .quantile_axis_skipnan_mut::(Axis(1), n64(0.75)) + .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); assert!(q[1].is_none()); @@ -214,20 +224,43 @@ fn test_quantiles_mut(xs: Vec) -> bool { // Unordered list of quantile indexes to look up, with a duplicate let quantile_indexes = vec![ - n64(0.75), n64(0.90), n64(0.95), n64(0.99), n64(1.), - n64(0.), n64(0.25), n64(0.5), n64(0.5) + n64(0.75), + n64(0.90), + n64(0.95), + n64(0.99), + n64(1.), + n64(0.), + n64(0.25), + n64(0.5), + n64(0.5), ]; let mut checks = vec![]; - checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); - checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); - checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); - checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); - checks.push(check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes)); + checks.push(check_one_interpolation_method_for_quantiles_mut::( + v.clone(), + &quantile_indexes, + )); + checks.push(check_one_interpolation_method_for_quantiles_mut::( + v.clone(), + &quantile_indexes, + )); + checks.push(check_one_interpolation_method_for_quantiles_mut::( + v.clone(), + &quantile_indexes, + )); + checks.push( + check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes), + ); + checks.push(check_one_interpolation_method_for_quantiles_mut::( + v.clone(), + &quantile_indexes, + )); checks.into_iter().all(|x| x) } -fn check_one_interpolation_method_for_quantiles_mut>(mut v: Array1, quantile_indexes: &[N64]) -> bool -{ +fn check_one_interpolation_method_for_quantiles_mut>( + mut v: Array1, + quantile_indexes: &[N64], +) -> bool { let bulk_quantiles = v.quantiles_mut::(&quantile_indexes); if v.len() == 0 { @@ -238,9 +271,7 @@ fn check_one_interpolation_method_for_quantiles_mut>(mut v: let mut checks = vec![]; for quantile_index in quantile_indexes.iter() { let quantile = v.quantile_mut::(*quantile_index).unwrap(); - checks.push( - quantile == *bulk_quantiles.get(quantile_index).unwrap() - ); + checks.push(quantile == *bulk_quantiles.get(quantile_index).unwrap()); } checks.into_iter().all(|x| x) } @@ -257,42 +288,46 @@ fn test_quantiles_axis_mut(xs: Vec) -> bool { // Unordered list of quantile indexes to look up, with a duplicate let quantile_indexes = vec![ - n64(0.75), n64(0.90), n64(0.95), n64(0.99), n64(1.), - n64(0.), n64(0.25), n64(0.5), n64(0.5) + n64(0.75), + n64(0.90), + n64(0.95), + n64(0.99), + n64(1.), + n64(0.), + n64(0.25), + n64(0.5), + n64(0.5), ]; // Test out all interpolation methods let mut checks = vec![]; - checks.push( - check_one_interpolation_method_for_quantiles_axis_mut::( - m.clone(), &quantile_indexes, Axis(0) - ) - ); - checks.push( - check_one_interpolation_method_for_quantiles_axis_mut::( - m.clone(), &quantile_indexes, Axis(0) - ) - ); + checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< + Linear, + >(m.clone(), &quantile_indexes, Axis(0))); + checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< + Higher, + >(m.clone(), &quantile_indexes, Axis(0))); checks.push( check_one_interpolation_method_for_quantiles_axis_mut::( - m.clone(), &quantile_indexes, Axis(0) - ) - ); - checks.push( - check_one_interpolation_method_for_quantiles_axis_mut::( - m.clone(), &quantile_indexes, Axis(0) - ) - ); - checks.push( - check_one_interpolation_method_for_quantiles_axis_mut::( - m.clone(), &quantile_indexes, Axis(0) - ) + m.clone(), + &quantile_indexes, + Axis(0), + ), ); + checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< + Midpoint, + >(m.clone(), &quantile_indexes, Axis(0))); + checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< + Nearest, + >(m.clone(), &quantile_indexes, Axis(0))); checks.into_iter().all(|x| x) } -fn check_one_interpolation_method_for_quantiles_axis_mut>(mut v: Array2, quantile_indexes: &[N64], axis: Axis) -> bool -{ +fn check_one_interpolation_method_for_quantiles_axis_mut>( + mut v: Array2, + quantile_indexes: &[N64], + axis: Axis, +) -> bool { let bulk_quantiles = v.quantiles_axis_mut::(axis, &quantile_indexes); if v.len() == 0 { @@ -302,9 +337,7 @@ fn check_one_interpolation_method_for_quantiles_axis_mut>(mu let mut checks = vec![]; for quantile_index in quantile_indexes.iter() { let quantile = v.quantile_axis_mut::(axis, *quantile_index).unwrap(); - checks.push( - quantile == *bulk_quantiles.get(quantile_index).unwrap() - ); + checks.push(quantile == *bulk_quantiles.get(quantile_index).unwrap()); } checks.into_iter().all(|x| x) } diff --git a/tests/sort.rs b/tests/sort.rs index 30480f75..560cc00b 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -60,10 +60,11 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { let mut indexes: Vec = (0..n).into_iter().collect(); indexes.append(&mut (0..n).into_iter().collect()); - let sorted_v: Vec = v.get_many_from_sorted_mut(&indexes) - .into_iter() - .map(|x| x.1) - .collect(); + let sorted_v: Vec = v + .get_many_from_sorted_mut(&indexes) + .into_iter() + .map(|x| x.1) + .collect(); xs.sort(); xs == sorted_v } From 35d209471b922cca29022c5e8d2f63cc83ce5983 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 10 Mar 2019 11:06:36 +0000 Subject: [PATCH 35/81] Fmt --- src/quantile/interpolate.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs index 26fb18b1..badf7523 100644 --- a/src/quantile/interpolate.rs +++ b/src/quantile/interpolate.rs @@ -2,7 +2,7 @@ use ndarray::azip; use ndarray::prelude::*; use noisy_float::types::N64; -use num_traits::{Float, FromPrimitive, ToPrimitive}; +use num_traits::{Float, FromPrimitive, NumOps, ToPrimitive}; use std::ops::{Add, Div}; fn float_quantile_index(q: N64, len: usize) -> N64 { @@ -144,7 +144,14 @@ where D: Dimension, { let denom = T::from_u8(2).unwrap(); - (lower.unwrap() + higher.unwrap()).mapv_into(|x| x / denom.clone()) + let mut lower = lower.unwrap(); + let higher = higher.unwrap(); + azip!( + mut lower, ref higher in { + *lower = lower.clone() + (higher.clone() - lower.clone()) / denom.clone() + } + ); + lower } } From 1021507c137d4836065b8d9d16725ec378921845 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 10 Mar 2019 11:17:59 +0000 Subject: [PATCH 36/81] Formatting --- src/quantile/mod.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 6f8a7067..e96a2dea 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -334,10 +334,9 @@ where let searched_indexes: Vec = searched_indexes.into_iter().collect(); // Retrieve the values corresponding to each index for each slice along the specified axis - let values = self.map_axis_mut( - axis, - |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) - ); + let values = self.map_axis_mut(axis, |mut x| { + get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) + }); // Combine the retrieved values according to specified interpolation strategy to // get the desired quantiles From c2ed8057984089b25dd8c890a5fb55024252897d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 12 Mar 2019 09:05:58 +0000 Subject: [PATCH 37/81] Log version works --- src/sort.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++- tests/sort.rs | 1 + 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/sort.rs b/src/sort.rs index 6ef2f86a..d9f84669 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -140,7 +140,13 @@ where deduped_indexes.sort_unstable(); deduped_indexes.dedup(); - get_many_from_sorted_mut_unchecked(self, &deduped_indexes) + let values = log_version(self, &deduped_indexes); + + let mut result = IndexMap::new(); + for (index, value) in deduped_indexes.into_iter().zip(values.into_iter()) { + result.insert(index, value); + } + result } fn partition_mut(&mut self, pivot_index: usize) -> usize @@ -215,3 +221,57 @@ where values } + +pub(crate) fn log_version( + array: &mut ArrayBase, + indexes: &[usize], +) -> Vec +where + A: Ord + Clone, + S: DataMut, +{ + let n = array.len(); + + if n == 0 { + return vec![] + } + + if n == 1 { + let value = array[0].clone(); + return vec![value; indexes.len()] + } + + let mut rng = thread_rng(); + let pivot_index = rng.gen_range(0, n); + let partition_index = array.partition_mut(pivot_index); + match indexes.binary_search(&partition_index) { + Ok(quantile_index) => { + let smaller_indexes = &indexes[..quantile_index]; + let mut smaller_quantiles = log_version( + &mut array.slice_mut(s![..quantile_index]), smaller_indexes + ); + + smaller_quantiles.push(array[quantile_index].clone()); + + let bigger_indexes: Vec = indexes[(quantile_index+1)..].into_iter().map(|x| x - quantile_index - 1).collect(); + let mut bigger_quantiles = log_version( + &mut array.slice_mut(s![(quantile_index+1)..]), &bigger_indexes + ); + smaller_quantiles.append(&mut bigger_quantiles); + smaller_quantiles + }, + Err(quantile_index) => { + let smaller_indexes = &indexes[..quantile_index]; + let mut smaller_quantiles = log_version( + &mut array.slice_mut(s![..quantile_index]), smaller_indexes + ); + + let bigger_indexes: Vec = indexes[quantile_index..].into_iter().map(|x| x - quantile_index).collect(); + let mut bigger_quantiles = log_version( + &mut array.slice_mut(s![(quantile_index+1)..]), &bigger_indexes + ); + smaller_quantiles.append(&mut bigger_quantiles); + smaller_quantiles + } + } +} \ No newline at end of file diff --git a/tests/sort.rs b/tests/sort.rs index 560cc00b..6bbe17b4 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -66,6 +66,7 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { .map(|x| x.1) .collect(); xs.sort(); + println!("Sorted: {:?}. Truth: {:?}", sorted_v, xs); xs == sorted_v } } From 9dc5eef929a211cd11b3c14fd452412b40c9bf86 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 15 Mar 2019 08:58:26 +0000 Subject: [PATCH 38/81] Refactor --- src/sort.rs | 89 ++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index d9f84669..dc0dd28b 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -140,7 +140,7 @@ where deduped_indexes.sort_unstable(); deduped_indexes.dedup(); - let values = log_version(self, &deduped_indexes); + let values = get_many_from_sorted_mut_unchecked(self, &deduped_indexes); let mut result = IndexMap::new(); for (index, value) in deduped_indexes.into_iter().zip(values.into_iter()) { @@ -201,30 +201,6 @@ where pub(crate) fn get_many_from_sorted_mut_unchecked( array: &mut ArrayBase, indexes: &[usize], -) -> IndexMap -where - A: Ord + Clone, - S: DataMut, -{ - let mut values = IndexMap::new(); - - let mut previous_index = 0; - let mut search_space = array.view_mut(); - for index in indexes.into_iter() { - let relative_index = index - previous_index; - let value = search_space.get_from_sorted_mut(relative_index); - values.insert(*index, value); - - previous_index = *index; - search_space.slice_collapse(s![relative_index..]); - } - - values -} - -pub(crate) fn log_version( - array: &mut ArrayBase, - indexes: &[usize], ) -> Vec where A: Ord + Clone, @@ -238,40 +214,63 @@ where if n == 1 { let value = array[0].clone(); - return vec![value; indexes.len()] + return vec![value] } + // Pick a random index let mut rng = thread_rng(); let pivot_index = rng.gen_range(0, n); + // Partition the array with respect to the pivot value let partition_index = array.partition_mut(pivot_index); match indexes.binary_search(&partition_index) { - Ok(quantile_index) => { - let smaller_indexes = &indexes[..quantile_index]; - let mut smaller_quantiles = log_version( - &mut array.slice_mut(s![..quantile_index]), smaller_indexes + // The partition_index is one of indexes we are looking for + Ok(partition_index) => { + let mut results: Vec; + + // Search recursively for the values corresponding to strictly smaller indexes + // to the left of partition_index + let smaller_indexes = &indexes[..partition_index]; + let smaller_values = log_version( + &mut array.slice_mut(s![..partition_index]), smaller_indexes ); - smaller_quantiles.push(array[quantile_index].clone()); + results = smaller_values; + + // Get the value associated to partition index + results.push(array[partition_index].clone()); - let bigger_indexes: Vec = indexes[(quantile_index+1)..].into_iter().map(|x| x - quantile_index - 1).collect(); - let mut bigger_quantiles = log_version( - &mut array.slice_mut(s![(quantile_index+1)..]), &bigger_indexes + // Search recursively for the values corresponding to strictly bigger indexes + // to the right of partition_index+1 + let bigger_indexes: Vec = indexes[(partition_index+1)..].into_iter().map(|x| x - partition_index - 1).collect(); + let mut bigger_values = log_version( + &mut array.slice_mut(s![(partition_index+1)..]), &bigger_indexes ); - smaller_quantiles.append(&mut bigger_quantiles); - smaller_quantiles + + results.append(&mut bigger_values); + results }, - Err(quantile_index) => { - let smaller_indexes = &indexes[..quantile_index]; - let mut smaller_quantiles = log_version( - &mut array.slice_mut(s![..quantile_index]), smaller_indexes + // The partition_index is not one of indexes we are looking for + Err(partition_index) => { + let mut results: Vec; + + // Search recursively for the values corresponding to strictly smaller indexes + // to the left of partition_index + let smaller_indexes = &indexes[..partition_index]; + let smaller_values = log_version( + &mut array.slice_mut(s![..partition_index]), smaller_indexes ); - let bigger_indexes: Vec = indexes[quantile_index..].into_iter().map(|x| x - quantile_index).collect(); - let mut bigger_quantiles = log_version( - &mut array.slice_mut(s![(quantile_index+1)..]), &bigger_indexes + results = smaller_values; + + // Search recursively for the values corresponding to strictly bigger indexes + // to the right of partition_index + let bigger_indexes: Vec = indexes[partition_index..].into_iter().map(|x| x - partition_index).collect(); + let mut bigger_values = log_version( + &mut array.slice_mut(s![(partition_index+1)..]), &bigger_indexes ); - smaller_quantiles.append(&mut bigger_quantiles); - smaller_quantiles + + results.append(&mut bigger_values); + results } } } \ No newline at end of file From c49ad047aeee63290b5ba09899fe94595078661d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 15 Mar 2019 09:06:41 +0000 Subject: [PATCH 39/81] Fix indexes --- src/quantile/mod.rs | 4 ++-- src/sort.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index e96a2dea..238e4dc1 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -343,12 +343,12 @@ where let mut results = IndexMap::new(); for q in qs { let lower = if I::needs_lower(*q, axis_len) { - Some(values.map(|x| x[&lower_index(*q, axis_len)].clone())) + Some(values.map(|x| x[lower_index(*q, axis_len)].clone())) } else { None }; let higher = if I::needs_higher(*q, axis_len) { - Some(values.map(|x| x[&higher_index(*q, axis_len)].clone())) + Some(values.map(|x| x[higher_index(*q, axis_len)].clone())) } else { None }; diff --git a/src/sort.rs b/src/sort.rs index dc0dd28b..41d9e933 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -230,7 +230,7 @@ where // Search recursively for the values corresponding to strictly smaller indexes // to the left of partition_index let smaller_indexes = &indexes[..partition_index]; - let smaller_values = log_version( + let smaller_values = get_many_from_sorted_mut_unchecked( &mut array.slice_mut(s![..partition_index]), smaller_indexes ); @@ -242,7 +242,7 @@ where // Search recursively for the values corresponding to strictly bigger indexes // to the right of partition_index+1 let bigger_indexes: Vec = indexes[(partition_index+1)..].into_iter().map(|x| x - partition_index - 1).collect(); - let mut bigger_values = log_version( + let mut bigger_values = get_many_from_sorted_mut_unchecked( &mut array.slice_mut(s![(partition_index+1)..]), &bigger_indexes ); @@ -256,7 +256,7 @@ where // Search recursively for the values corresponding to strictly smaller indexes // to the left of partition_index let smaller_indexes = &indexes[..partition_index]; - let smaller_values = log_version( + let smaller_values = get_many_from_sorted_mut_unchecked( &mut array.slice_mut(s![..partition_index]), smaller_indexes ); @@ -265,8 +265,8 @@ where // Search recursively for the values corresponding to strictly bigger indexes // to the right of partition_index let bigger_indexes: Vec = indexes[partition_index..].into_iter().map(|x| x - partition_index).collect(); - let mut bigger_values = log_version( - &mut array.slice_mut(s![(partition_index+1)..]), &bigger_indexes + let mut bigger_values = get_many_from_sorted_mut_unchecked( + &mut array.slice_mut(s![partition_index..]), &bigger_indexes ); results.append(&mut bigger_values); From cf7b3628e7d8f479deb82fe382350ee403d72bd3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 18:17:17 +0000 Subject: [PATCH 40/81] Working implementation --- src/quantile/mod.rs | 4 +- src/sort.rs | 97 +++++++++++++++++++++++++++------------------ 2 files changed, 60 insertions(+), 41 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 238e4dc1..23462300 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -343,12 +343,12 @@ where let mut results = IndexMap::new(); for q in qs { let lower = if I::needs_lower(*q, axis_len) { - Some(values.map(|x| x[lower_index(*q, axis_len)].clone())) + Some(values.map(|x| x.get(&lower_index(*q, axis_len)).unwrap().clone())) } else { None }; let higher = if I::needs_higher(*q, axis_len) { - Some(values.map(|x| x[higher_index(*q, axis_len)].clone())) + Some(values.map(|x| x.get(&higher_index(*q, axis_len)).unwrap().clone())) } else { None }; diff --git a/src/sort.rs b/src/sort.rs index 41d9e933..37d3ef7b 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -140,13 +140,7 @@ where deduped_indexes.sort_unstable(); deduped_indexes.dedup(); - let values = get_many_from_sorted_mut_unchecked(self, &deduped_indexes); - - let mut result = IndexMap::new(); - for (index, value) in deduped_indexes.into_iter().zip(values.into_iter()) { - result.insert(index, value); - } - result + get_many_from_sorted_mut_unchecked(self, &deduped_indexes) } fn partition_mut(&mut self, pivot_index: usize) -> usize @@ -188,6 +182,7 @@ where } } + /// To retrieve multiple indexes from the sorted array in an optimized fashion, /// [get_many_from_sorted_mut] first of all sorts the `indexes` vector. /// @@ -201,6 +196,22 @@ where pub(crate) fn get_many_from_sorted_mut_unchecked( array: &mut ArrayBase, indexes: &[usize], +) -> IndexMap + where + A: Ord + Clone, + S: DataMut, +{ + let values = _get_many_from_sorted_mut_unchecked(array, indexes); + let mut result = IndexMap::new(); + for (index, value) in indexes.into_iter().zip(values.into_iter()) { + result.insert(*index, value); + } + result +} + +fn _get_many_from_sorted_mut_unchecked( + array: &mut ArrayBase, + indexes: &[usize], ) -> Vec where A: Ord + Clone, @@ -208,69 +219,77 @@ where { let n = array.len(); + if indexes.len() == 0 { + return vec![]; + } + if n == 0 { - return vec![] + return vec![]; } if n == 1 { let value = array[0].clone(); - return vec![value] + return vec![value]; } // Pick a random index let mut rng = thread_rng(); let pivot_index = rng.gen_range(0, n); // Partition the array with respect to the pivot value - let partition_index = array.partition_mut(pivot_index); - match indexes.binary_search(&partition_index) { + let array_partition_index = array.partition_mut(pivot_index); + match indexes.binary_search(&array_partition_index) { // The partition_index is one of indexes we are looking for - Ok(partition_index) => { - let mut results: Vec; - + Ok(index_split) => { // Search recursively for the values corresponding to strictly smaller indexes // to the left of partition_index - let smaller_indexes = &indexes[..partition_index]; - let smaller_values = get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![..partition_index]), smaller_indexes + let smaller_indexes = &indexes[..index_split]; + let smaller_values = _get_many_from_sorted_mut_unchecked( + &mut array.slice_mut(s![..array_partition_index]), + smaller_indexes, ); - results = smaller_values; - - // Get the value associated to partition index - results.push(array[partition_index].clone()); - // Search recursively for the values corresponding to strictly bigger indexes // to the right of partition_index+1 - let bigger_indexes: Vec = indexes[(partition_index+1)..].into_iter().map(|x| x - partition_index - 1).collect(); - let mut bigger_values = get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![(partition_index+1)..]), &bigger_indexes + let bigger_indexes: Vec = indexes[(index_split + 1)..] + .into_iter() + .map(|x| x - array_partition_index - 1) + .collect(); + let mut bigger_values = _get_many_from_sorted_mut_unchecked( + &mut array.slice_mut(s![(array_partition_index + 1)..]), + &bigger_indexes, ); + let mut results: Vec; + results = smaller_values; + // Get the value associated to partition index + results.push(array[array_partition_index].clone()); results.append(&mut bigger_values); results - }, + } // The partition_index is not one of indexes we are looking for - Err(partition_index) => { - let mut results: Vec; - + Err(index_split) => { // Search recursively for the values corresponding to strictly smaller indexes // to the left of partition_index - let smaller_indexes = &indexes[..partition_index]; - let smaller_values = get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![..partition_index]), smaller_indexes + let smaller_indexes = &indexes[..index_split]; + let smaller_values = _get_many_from_sorted_mut_unchecked( + &mut array.slice_mut(s![..array_partition_index]), + smaller_indexes, ); - - results = smaller_values; - // Search recursively for the values corresponding to strictly bigger indexes // to the right of partition_index - let bigger_indexes: Vec = indexes[partition_index..].into_iter().map(|x| x - partition_index).collect(); - let mut bigger_values = get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![partition_index..]), &bigger_indexes + let bigger_indexes: Vec = indexes[index_split..] + .into_iter() + .map(|x| x - array_partition_index - 1) + .collect(); + let mut bigger_values = _get_many_from_sorted_mut_unchecked( + &mut array.slice_mut(s![(array_partition_index + 1)..]), + &bigger_indexes, ); + let mut results: Vec; + results = smaller_values; results.append(&mut bigger_values); results } } -} \ No newline at end of file +} From cb1d9f881daff45c0218cc04e56cadffb9fb5e20 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 18:19:26 +0000 Subject: [PATCH 41/81] Shorter syntax --- src/quantile/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 23462300..e96a2dea 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -343,12 +343,12 @@ where let mut results = IndexMap::new(); for q in qs { let lower = if I::needs_lower(*q, axis_len) { - Some(values.map(|x| x.get(&lower_index(*q, axis_len)).unwrap().clone())) + Some(values.map(|x| x[&lower_index(*q, axis_len)].clone())) } else { None }; let higher = if I::needs_higher(*q, axis_len) { - Some(values.map(|x| x.get(&higher_index(*q, axis_len)).unwrap().clone())) + Some(values.map(|x| x[&higher_index(*q, axis_len)].clone())) } else { None }; From 75d7d5517aac26deb25fe96acc70fdc6f45f35ee Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 18:19:50 +0000 Subject: [PATCH 42/81] Formatting --- src/sort.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 37d3ef7b..5ccaed61 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -182,7 +182,6 @@ where } } - /// To retrieve multiple indexes from the sorted array in an optimized fashion, /// [get_many_from_sorted_mut] first of all sorts the `indexes` vector. /// @@ -197,9 +196,9 @@ pub(crate) fn get_many_from_sorted_mut_unchecked( array: &mut ArrayBase, indexes: &[usize], ) -> IndexMap - where - A: Ord + Clone, - S: DataMut, +where + A: Ord + Clone, + S: DataMut, { let values = _get_many_from_sorted_mut_unchecked(array, indexes); let mut result = IndexMap::new(); From ca951cf58339286f4047f2c81f1233e0a0e3fbc1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 18:21:02 +0000 Subject: [PATCH 43/81] Better docs --- src/quantile/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index e96a2dea..61b5460f 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -334,6 +334,8 @@ where let searched_indexes: Vec = searched_indexes.into_iter().collect(); // Retrieve the values corresponding to each index for each slice along the specified axis + // For each 1-dimensional slice along the specified axis we get back an IndexMap + // which can be used to retrieve the desired values using searched_indexes let values = self.map_axis_mut(axis, |mut x| { get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) }); From 45e84cdc79cd5d55b34027c645fc950febda24ee Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 18:26:53 +0000 Subject: [PATCH 44/81] Comments --- src/sort.rs | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 5ccaed61..082b7b87 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -200,7 +200,10 @@ where A: Ord + Clone, S: DataMut, { + // The actual routine let values = _get_many_from_sorted_mut_unchecked(array, indexes); + + // We convert the vector to a more search-friendly IndexMap let mut result = IndexMap::new(); for (index, value) in indexes.into_iter().zip(values.into_iter()) { result.insert(*index, value); @@ -218,28 +221,34 @@ where { let n = array.len(); - if indexes.len() == 0 { - return vec![]; - } - - if n == 0 { + // Nothing to do in this case + if indexes.len() == 0 || n == 0 { return vec![]; } + // We can only reach this point with indexes.len() == 1 + // So it's safe to return a vector with a single value if n == 1 { let value = array[0].clone(); return vec![value]; } - // Pick a random index + // We pick a random pivot index: the corresponding element is the pivot value let mut rng = thread_rng(); let pivot_index = rng.gen_range(0, n); - // Partition the array with respect to the pivot value + + // We partition the array with respect to the pivot value + // The pivot value moves to `array_partition_index` + // Elements strictly smaller than the pivot value have indexes < `array_partition_index` + // Elements greater or equal to the pivot value have indexes > `array_partition_index` let array_partition_index = array.partition_mut(pivot_index); + + // We can use a divide et impera strategy, splitting the indexes we are searching + // in two chunks with respect to array_partition_index match indexes.binary_search(&array_partition_index) { - // The partition_index is one of indexes we are looking for + // Option 1: The partition_index is one of the indexes we are looking for Ok(index_split) => { - // Search recursively for the values corresponding to strictly smaller indexes + // We search recursively for the values corresponding to strictly smaller indexes // to the left of partition_index let smaller_indexes = &indexes[..index_split]; let smaller_values = _get_many_from_sorted_mut_unchecked( @@ -247,10 +256,11 @@ where smaller_indexes, ); - // Search recursively for the values corresponding to strictly bigger indexes + // We search recursively for the values corresponding to strictly bigger indexes // to the right of partition_index+1 let bigger_indexes: Vec = indexes[(index_split + 1)..] .into_iter() + // We need to rebase the indexes .map(|x| x - array_partition_index - 1) .collect(); let mut bigger_values = _get_many_from_sorted_mut_unchecked( @@ -265,7 +275,7 @@ where results.append(&mut bigger_values); results } - // The partition_index is not one of indexes we are looking for + // Option 2: The partition_index is not one of indexes we are looking for Err(index_split) => { // Search recursively for the values corresponding to strictly smaller indexes // to the left of partition_index @@ -278,6 +288,7 @@ where // to the right of partition_index let bigger_indexes: Vec = indexes[index_split..] .into_iter() + // We need to rebase the indexes .map(|x| x - array_partition_index - 1) .collect(); let mut bigger_values = _get_many_from_sorted_mut_unchecked( From 46a683486a849f68698b0ba36d6b2ab726120e05 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 18:50:19 +0000 Subject: [PATCH 45/81] Typo --- src/quantile/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 61b5460f..6888d1dc 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -182,7 +182,7 @@ where /// Returns `None` when the specified axis has length 0. /// /// **Panics** if `axis` is out of bounds or if - /// any `q` `qs` is not between `0.` and `1.` (inclusive). + /// any `q` in `qs` is not between `0.` and `1.` (inclusive). /// /// [quantile_axis_mut]: ##tymethod.quantile_axis_mut fn quantiles_axis_mut( From 01e794c1737d173c9b366aa81bacfec25aba7ad3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 19:05:16 +0000 Subject: [PATCH 46/81] Don't lose pieces after rebase --- src/quantile/interpolate.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs index badf7523..ee8fffdd 100644 --- a/src/quantile/interpolate.rs +++ b/src/quantile/interpolate.rs @@ -3,7 +3,6 @@ use ndarray::azip; use ndarray::prelude::*; use noisy_float::types::N64; use num_traits::{Float, FromPrimitive, NumOps, ToPrimitive}; -use std::ops::{Add, Div}; fn float_quantile_index(q: N64, len: usize) -> N64 { q * ((len - 1) as f64) @@ -126,7 +125,7 @@ impl Interpolate for Nearest { impl Interpolate for Midpoint where - T: Add + Div + Clone + FromPrimitive, + T: NumOps + Clone + FromPrimitive, { fn needs_lower(_q: N64, _len: usize) -> bool { true @@ -157,7 +156,7 @@ where impl Interpolate for Linear where - T: Add + Clone + FromPrimitive + ToPrimitive, + T: NumOps + Clone + FromPrimitive + ToPrimitive, { fn needs_lower(_q: N64, _len: usize) -> bool { true From 0c70bbb84851bb6c02d73c1c3bf76fcb10b32a7a Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 16 Mar 2019 19:06:49 +0000 Subject: [PATCH 47/81] Fmt --- tests/quantile.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/quantile.rs b/tests/quantile.rs index 83827764..a9f9bc03 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -8,7 +8,7 @@ extern crate quickcheck_macros; use ndarray::array; use ndarray::prelude::*; use ndarray_stats::{ - interpolate::{Interpolate, Higher, Linear, Lower, Midpoint, Nearest}, + interpolate::{Higher, Interpolate, Linear, Lower, Midpoint, Nearest}, Quantile1dExt, QuantileExt, }; use noisy_float::types::{n64, N64}; From 1ba922af493302f3ad11d3e6ea9bb1ddadb935a6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sun, 17 Mar 2019 18:01:24 +0000 Subject: [PATCH 48/81] Reduce code duplication --- src/sort.rs | 85 ++++++++++++++++++++--------------------------------- 1 file changed, 32 insertions(+), 53 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 082b7b87..2b553a8d 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -245,61 +245,40 @@ where // We can use a divide et impera strategy, splitting the indexes we are searching // in two chunks with respect to array_partition_index - match indexes.binary_search(&array_partition_index) { - // Option 1: The partition_index is one of the indexes we are looking for - Ok(index_split) => { - // We search recursively for the values corresponding to strictly smaller indexes - // to the left of partition_index - let smaller_indexes = &indexes[..index_split]; - let smaller_values = _get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![..array_partition_index]), - smaller_indexes, - ); + let index_split = indexes.binary_search(&array_partition_index); + let (smaller_indexes, bigger_indexes) = match index_split { + Ok(index_split) => (&indexes[..index_split], &indexes[(index_split+1)..]), + Err(index_split) => (&indexes[..index_split], &indexes[index_split..]), + }; + // We are using a recursive search - to look for bigger_indexes in the right + // slice of the array we need to shift the indexes + let bigger_indexes: Vec = bigger_indexes + .into_iter() + .map(|x| x - array_partition_index - 1) + .collect(); - // We search recursively for the values corresponding to strictly bigger indexes - // to the right of partition_index+1 - let bigger_indexes: Vec = indexes[(index_split + 1)..] - .into_iter() - // We need to rebase the indexes - .map(|x| x - array_partition_index - 1) - .collect(); - let mut bigger_values = _get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![(array_partition_index + 1)..]), - &bigger_indexes, - ); + // We search recursively for the values corresponding to strictly smaller indexes + // to the left of partition_index + let smaller_values = _get_many_from_sorted_mut_unchecked( + &mut array.slice_mut(s![..array_partition_index]), + smaller_indexes, + ); - let mut results: Vec; - results = smaller_values; - // Get the value associated to partition index - results.push(array[array_partition_index].clone()); - results.append(&mut bigger_values); - results - } - // Option 2: The partition_index is not one of indexes we are looking for - Err(index_split) => { - // Search recursively for the values corresponding to strictly smaller indexes - // to the left of partition_index - let smaller_indexes = &indexes[..index_split]; - let smaller_values = _get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![..array_partition_index]), - smaller_indexes, - ); - // Search recursively for the values corresponding to strictly bigger indexes - // to the right of partition_index - let bigger_indexes: Vec = indexes[index_split..] - .into_iter() - // We need to rebase the indexes - .map(|x| x - array_partition_index - 1) - .collect(); - let mut bigger_values = _get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![(array_partition_index + 1)..]), - &bigger_indexes, - ); + // We search recursively for the values corresponding to strictly bigger indexes + // to the right of partition_index+1 + let mut bigger_values = _get_many_from_sorted_mut_unchecked( + &mut array.slice_mut(s![(array_partition_index + 1)..]), + &bigger_indexes, + ); - let mut results: Vec; - results = smaller_values; - results.append(&mut bigger_values); - results - } + // We merge the results together, in the correct order + let mut results: Vec; + + results = smaller_values; + if index_split.is_ok() { + // Get the value associated to partition index + results.push(array[array_partition_index].clone()); } + results.append(&mut bigger_values); + results } From d5ab45c5b88da9dca892c22811214c1b8e7695ea Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 18 Mar 2019 08:29:05 +0000 Subject: [PATCH 49/81] Fmt --- src/sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sort.rs b/src/sort.rs index 2b553a8d..568c0f54 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -247,7 +247,7 @@ where // in two chunks with respect to array_partition_index let index_split = indexes.binary_search(&array_partition_index); let (smaller_indexes, bigger_indexes) = match index_split { - Ok(index_split) => (&indexes[..index_split], &indexes[(index_split+1)..]), + Ok(index_split) => (&indexes[..index_split], &indexes[(index_split + 1)..]), Err(index_split) => (&indexes[..index_split], &indexes[index_split..]), }; // We are using a recursive search - to look for bigger_indexes in the right From 7b4e0dea75a27989c56dfbfa8aeebbdaf11304c9 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 22:02:24 -0400 Subject: [PATCH 50/81] Clarify docs of get_many_from_sorted_mut_unchecked --- src/sort.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 568c0f54..f578e24c 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -183,10 +183,11 @@ where } /// To retrieve multiple indexes from the sorted array in an optimized fashion, -/// [get_many_from_sorted_mut] first of all sorts the `indexes` vector. +/// [get_many_from_sorted_mut] first of all sorts and deduplicates the +/// `indexes` vector. /// -/// `get_many_from_sorted_mut_unchecked` does not perform this sorting, -/// assuming that the user has already taken care of it. +/// `get_many_from_sorted_mut_unchecked` does not perform this sorting and +/// deduplication, assuming that the user has already taken care of it. /// /// Useful when you have to call [get_many_from_sorted_mut] multiple times /// using the same indexes. From 64ed72b1368765aac3ed1b3d061fbaac94109651 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 23:58:23 -0400 Subject: [PATCH 51/81] Add get_many_from_sorted_mut benchmark --- Cargo.toml | 5 +++++ benches/sort.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 benches/sort.rs diff --git a/Cargo.toml b/Cargo.toml index cf7f8615..6f9b2755 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,12 @@ itertools = { version = "0.7.0", default-features = false } indexmap = "1.0" [dev-dependencies] +criterion = "0.2" quickcheck = { version = "0.8.1", default-features = false } ndarray-rand = "0.9" approx = "0.3" quickcheck_macros = "0.8" + +[[bench]] +name = "sort" +harness = false diff --git a/benches/sort.rs b/benches/sort.rs new file mode 100644 index 00000000..febcf7b7 --- /dev/null +++ b/benches/sort.rs @@ -0,0 +1,42 @@ +extern crate criterion; +extern crate ndarray; +extern crate ndarray_stats; +extern crate rand; + +use criterion::{ + black_box, criterion_group, criterion_main, AxisScale, BatchSize, Criterion, + ParameterizedBenchmark, PlotConfiguration, +}; +use ndarray::prelude::*; +use ndarray_stats::Sort1dExt; +use rand::prelude::*; + +fn get_many_from_sorted_mut(c: &mut Criterion) { + let lens = vec![10, 100, 1000, 10000]; + let benchmark = ParameterizedBenchmark::new( + "get_many_from_sorted_mut", + |bencher, &len| { + let mut rng = StdRng::seed_from_u64(42); + let mut data: Vec<_> = (0..len).collect(); + data.shuffle(&mut rng); + let indices: Vec<_> = (0..len).step_by(len / 10).collect(); + bencher.iter_batched( + || Array1::from(data.clone()), + |mut arr| { + black_box(arr.get_many_from_sorted_mut(&indices)); + }, + BatchSize::SmallInput, + ) + }, + lens, + ) + .plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + c.bench("get_many_from_sorted_mut", benchmark); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = get_many_from_sorted_mut +} +criterion_main!(benches); From 2c9030985ffc2f287281e35005e9cef4fe465f6c Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 00:39:01 -0400 Subject: [PATCH 52/81] Add get_from_sorted_mut benchmark --- benches/sort.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/benches/sort.rs b/benches/sort.rs index febcf7b7..cdcf8dd3 100644 --- a/benches/sort.rs +++ b/benches/sort.rs @@ -11,6 +11,31 @@ use ndarray::prelude::*; use ndarray_stats::Sort1dExt; use rand::prelude::*; +fn get_from_sorted_mut(c: &mut Criterion) { + let lens = vec![10, 100, 1000, 10000]; + let benchmark = ParameterizedBenchmark::new( + "get_from_sorted_mut", + |bencher, &len| { + let mut rng = StdRng::seed_from_u64(42); + let mut data: Vec<_> = (0..len).collect(); + data.shuffle(&mut rng); + let indices: Vec<_> = (0..len).step_by(len / 10).collect(); + bencher.iter_batched( + || Array1::from(data.clone()), + |mut arr| { + for &i in &indices { + black_box(arr.get_from_sorted_mut(i)); + } + }, + BatchSize::SmallInput, + ) + }, + lens, + ) + .plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + c.bench("get_from_sorted_mut", benchmark); +} + fn get_many_from_sorted_mut(c: &mut Criterion) { let lens = vec![10, 100, 1000, 10000]; let benchmark = ParameterizedBenchmark::new( @@ -37,6 +62,6 @@ fn get_many_from_sorted_mut(c: &mut Criterion) { criterion_group! { name = benches; config = Criterion::default(); - targets = get_many_from_sorted_mut + targets = get_from_sorted_mut, get_many_from_sorted_mut } criterion_main!(benches); From 3a4ea2e0c9fdd5a2c15d2fde18f55a283d6f8749 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 22:02:32 -0400 Subject: [PATCH 53/81] Simplify get_many_from_sorted_mut_unchecked --- src/sort.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index f578e24c..969ca151 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -205,11 +205,7 @@ where let values = _get_many_from_sorted_mut_unchecked(array, indexes); // We convert the vector to a more search-friendly IndexMap - let mut result = IndexMap::new(); - for (index, value) in indexes.into_iter().zip(values.into_iter()) { - result.insert(*index, value); - } - result + indexes.iter().cloned().zip(values.into_iter()).collect() } fn _get_many_from_sorted_mut_unchecked( From e5c94740ffdb5161ca7b967986d6f871e9bc0654 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 00:30:24 -0400 Subject: [PATCH 54/81] Eliminate allocations from _get_many_from_sorted_mut_unchecked This improves performance, especially for small arrays. --- src/sort.rs | 120 +++++++++++++++++++++++++++++----------------------- 1 file changed, 68 insertions(+), 52 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 969ca151..88e37808 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -3,6 +3,7 @@ use ndarray::prelude::*; use ndarray::{s, Data, DataMut}; use rand::prelude::*; use rand::thread_rng; +use std::iter; /// Methods for sorting and partitioning 1-D arrays. pub trait Sort1dExt @@ -201,81 +202,96 @@ where A: Ord + Clone, S: DataMut, { - // The actual routine - let values = _get_many_from_sorted_mut_unchecked(array, indexes); + if indexes.is_empty() { + return IndexMap::new(); + } + + // Since `!indexes.is_empty` and indexes must be in-bounds, `array` must be + // non-empty. + let mut values: Vec<_> = iter::repeat(array[0].clone()).take(indexes.len()).collect(); + _get_many_from_sorted_mut_unchecked(array.view_mut(), &mut indexes.to_owned(), &mut values); - // We convert the vector to a more search-friendly IndexMap + // We convert the vector to a more search-friendly `IndexMap`. indexes.iter().cloned().zip(values.into_iter()).collect() } -fn _get_many_from_sorted_mut_unchecked( - array: &mut ArrayBase, - indexes: &[usize], -) -> Vec -where +/// This is the recursive portion of `get_many_from_sorted_mut_unchecked`. +/// +/// `indexes` is the list of indexes to get. `indexes` is mutable so that it +/// can be used as scratch space for this routine; the value of `indexes` after +/// calling this routine should be ignored. +/// +/// `values` is a pre-allocated slice to use for writing the output. Its +/// initial element values are ignored. +fn _get_many_from_sorted_mut_unchecked( + mut array: ArrayViewMut1, + indexes: &mut [usize], + values: &mut [A], +) where A: Ord + Clone, - S: DataMut, { let n = array.len(); + debug_assert!(n >= indexes.len()); // because indexes must be unique and in-bounds + debug_assert_eq!(indexes.len(), values.len()); - // Nothing to do in this case - if indexes.len() == 0 || n == 0 { - return vec![]; + if indexes.is_empty() { + // Nothing to do in this case. + return; } - // We can only reach this point with indexes.len() == 1 - // So it's safe to return a vector with a single value + // At this point, `n >= 1` since `indexes.len() >= 1`. if n == 1 { - let value = array[0].clone(); - return vec![value]; + // We can only reach this point if `indexes.len() == 1`, so we only + // need to assign the single value, and then we're done. + debug_assert_eq!(indexes.len(), 1); + values[0] = array[0].clone(); + return; } // We pick a random pivot index: the corresponding element is the pivot value let mut rng = thread_rng(); let pivot_index = rng.gen_range(0, n); - // We partition the array with respect to the pivot value - // The pivot value moves to `array_partition_index` - // Elements strictly smaller than the pivot value have indexes < `array_partition_index` - // Elements greater or equal to the pivot value have indexes > `array_partition_index` + // We partition the array with respect to the pivot value. + // The pivot value moves to `array_partition_index`. + // Elements strictly smaller than the pivot value have indexes < `array_partition_index`. + // Elements greater or equal to the pivot value have indexes > `array_partition_index`. let array_partition_index = array.partition_mut(pivot_index); - // We can use a divide et impera strategy, splitting the indexes we are searching - // in two chunks with respect to array_partition_index - let index_split = indexes.binary_search(&array_partition_index); - let (smaller_indexes, bigger_indexes) = match index_split { - Ok(index_split) => (&indexes[..index_split], &indexes[(index_split + 1)..]), - Err(index_split) => (&indexes[..index_split], &indexes[index_split..]), + // We use a divide-and-conquer strategy, splitting the indexes we are + // searching for (`indexes`) and the corresponding portions of the output + // slice (`values`) into pieces with respect to `array_partition_index`. + let (found_exact, index_split) = match indexes.binary_search(&array_partition_index) { + Ok(index) => (true, index), + Err(index) => (false, index), + }; + let (smaller_indexes, other_indexes) = indexes.split_at_mut(index_split); + let (smaller_values, other_values) = values.split_at_mut(index_split); + let (bigger_indexes, bigger_values) = if found_exact { + other_values[0] = array[array_partition_index].clone(); // Write exactly found value. + (&mut other_indexes[1..], &mut other_values[1..]) + } else { + (other_indexes, other_values) }; - // We are using a recursive search - to look for bigger_indexes in the right - // slice of the array we need to shift the indexes - let bigger_indexes: Vec = bigger_indexes - .into_iter() - .map(|x| x - array_partition_index - 1) - .collect(); - // We search recursively for the values corresponding to strictly smaller indexes - // to the left of partition_index - let smaller_values = _get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![..array_partition_index]), + // We search recursively for the values corresponding to strictly smaller + // indexes to the left of `partition_index`. + _get_many_from_sorted_mut_unchecked( + array.slice_mut(s![..array_partition_index]), smaller_indexes, + smaller_values, ); - // We search recursively for the values corresponding to strictly bigger indexes - // to the right of partition_index+1 - let mut bigger_values = _get_many_from_sorted_mut_unchecked( - &mut array.slice_mut(s![(array_partition_index + 1)..]), - &bigger_indexes, + // We search recursively for the values corresponding to strictly bigger + // indexes to the right of `partition_index`. Since only the right portion + // of the array is passed in, the indexes need to be shifted by length of + // the removed portion. + bigger_indexes + .iter_mut() + .for_each(|x| *x -= array_partition_index + 1); + _get_many_from_sorted_mut_unchecked( + array.slice_mut(s![(array_partition_index + 1)..]), + bigger_indexes, + bigger_values, ); - - // We merge the results together, in the correct order - let mut results: Vec; - - results = smaller_values; - if index_split.is_ok() { - // Get the value associated to partition index - results.push(array[array_partition_index].clone()); - } - results.append(&mut bigger_values); - results } From 24ee7105fc541d11c750b8e0b2c3bb3c9785c759 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 00:31:11 -0400 Subject: [PATCH 55/81] Call slice_axis_mut instead of slice_mut This has slightly lower overhead. --- src/sort.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 88e37808..52b27534 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -1,6 +1,6 @@ use indexmap::IndexMap; use ndarray::prelude::*; -use ndarray::{s, Data, DataMut}; +use ndarray::{Data, DataMut, Slice}; use rand::prelude::*; use rand::thread_rng; use std::iter; @@ -122,11 +122,12 @@ where let pivot_index = rng.gen_range(0, n); let partition_index = self.partition_mut(pivot_index); if i < partition_index { - self.slice_mut(s![..partition_index]).get_from_sorted_mut(i) + self.slice_axis_mut(Axis(0), Slice::from(..partition_index)) + .get_from_sorted_mut(i) } else if i == partition_index { self[i].clone() } else { - self.slice_mut(s![partition_index + 1..]) + self.slice_axis_mut(Axis(0), Slice::from(partition_index + 1..)) .get_from_sorted_mut(i - (partition_index + 1)) } } @@ -277,7 +278,7 @@ fn _get_many_from_sorted_mut_unchecked( // We search recursively for the values corresponding to strictly smaller // indexes to the left of `partition_index`. _get_many_from_sorted_mut_unchecked( - array.slice_mut(s![..array_partition_index]), + array.slice_axis_mut(Axis(0), Slice::from(..array_partition_index)), smaller_indexes, smaller_values, ); @@ -290,7 +291,7 @@ fn _get_many_from_sorted_mut_unchecked( .iter_mut() .for_each(|x| *x -= array_partition_index + 1); _get_many_from_sorted_mut_unchecked( - array.slice_mut(s![(array_partition_index + 1)..]), + array.slice_axis_mut(Axis(0), Slice::from(array_partition_index + 1..)), bigger_indexes, bigger_values, ); From 8739c3bc62fdfcdde742269ae222d619e38238bc Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 12:02:22 -0400 Subject: [PATCH 56/81] Replace iter::repeat with vec! This is a bit cleaner. --- src/sort.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 52b27534..3838420e 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -3,7 +3,6 @@ use ndarray::prelude::*; use ndarray::{Data, DataMut, Slice}; use rand::prelude::*; use rand::thread_rng; -use std::iter; /// Methods for sorting and partitioning 1-D arrays. pub trait Sort1dExt @@ -209,7 +208,7 @@ where // Since `!indexes.is_empty` and indexes must be in-bounds, `array` must be // non-empty. - let mut values: Vec<_> = iter::repeat(array[0].clone()).take(indexes.len()).collect(); + let mut values: Vec<_> = vec![array[0].clone(); indexes.len()]; _get_many_from_sorted_mut_unchecked(array.view_mut(), &mut indexes.to_owned(), &mut values); // We convert the vector to a more search-friendly `IndexMap`. From 88d896f5738689ceb797444988b79385abc9b860 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 12:04:48 -0400 Subject: [PATCH 57/81] Fix typo in comment --- src/sort.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 3838420e..c0391cc4 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -206,8 +206,8 @@ where return IndexMap::new(); } - // Since `!indexes.is_empty` and indexes must be in-bounds, `array` must be - // non-empty. + // Since `!indexes.is_empty()` and indexes must be in-bounds, `array` must + // be non-empty. let mut values: Vec<_> = vec![array[0].clone(); indexes.len()]; _get_many_from_sorted_mut_unchecked(array.view_mut(), &mut indexes.to_owned(), &mut values); From 29d507bd29d0ea03fa479d3ed7e1be9205f373aa Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 12:16:13 -0400 Subject: [PATCH 58/81] Remove unnecessary type annotation --- src/sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sort.rs b/src/sort.rs index c0391cc4..605fb14e 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -208,7 +208,7 @@ where // Since `!indexes.is_empty()` and indexes must be in-bounds, `array` must // be non-empty. - let mut values: Vec<_> = vec![array[0].clone(); indexes.len()]; + let mut values = vec![array[0].clone(); indexes.len()]; _get_many_from_sorted_mut_unchecked(array.view_mut(), &mut indexes.to_owned(), &mut values); // We convert the vector to a more search-friendly `IndexMap`. From d0879c8248ab5e69b8979cd66c50d357a2d35417 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 18:05:42 -0400 Subject: [PATCH 59/81] Simplify quantiles tests For me, the tests are easier to understand when they don't collect into a `Vec`. --- tests/quantile.rs | 108 +++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 59 deletions(-) diff --git a/tests/quantile.rs b/tests/quantile.rs index aa0ddbae..dd5eb96c 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -299,27 +299,18 @@ fn test_quantiles_mut(xs: Vec) -> bool { n64(0.5), n64(0.5), ]; - let mut checks = vec![]; - checks.push(check_one_interpolation_method_for_quantiles_mut::( - v.clone(), - &quantile_indexes, - )); - checks.push(check_one_interpolation_method_for_quantiles_mut::( - v.clone(), - &quantile_indexes, - )); - checks.push(check_one_interpolation_method_for_quantiles_mut::( - v.clone(), - &quantile_indexes, - )); - checks.push( - check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes), - ); - checks.push(check_one_interpolation_method_for_quantiles_mut::( - v.clone(), - &quantile_indexes, - )); - checks.into_iter().all(|x| x) + let mut correct = true; + correct &= + check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + correct &= + check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + correct &= + check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + correct &= + check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + correct &= + check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + correct } fn check_one_interpolation_method_for_quantiles_mut>( @@ -332,24 +323,19 @@ fn check_one_interpolation_method_for_quantiles_mut>( bulk_quantiles.is_none() } else { let bulk_quantiles = bulk_quantiles.unwrap(); - - let mut checks = vec![]; - for quantile_index in quantile_indexes.iter() { - let quantile = v.quantile_mut::(*quantile_index).unwrap(); - checks.push(quantile == *bulk_quantiles.get(quantile_index).unwrap()); - } - checks.into_iter().all(|x| x) + quantile_indexes.iter().all(|&quantile_index| { + let quantile = v.quantile_mut::(quantile_index).unwrap(); + quantile == bulk_quantiles[&quantile_index] + }) } } #[quickcheck] -fn test_quantiles_axis_mut(xs: Vec) -> bool { +fn test_quantiles_axis_mut(mut xs: Vec) -> bool { // We want a square matrix let axis_length = (xs.len() as f64).sqrt().floor() as usize; - let xs = &xs[..axis_length.pow(2)]; - let m = Array::from_vec(xs.to_vec()) - .into_shape((axis_length, axis_length)) - .unwrap(); + xs.truncate(axis_length * axis_length); + let m = Array::from_shape_vec((axis_length, axis_length), xs).unwrap(); // Unordered list of quantile indexes to look up, with a duplicate let quantile_indexes = vec![ @@ -365,27 +351,33 @@ fn test_quantiles_axis_mut(xs: Vec) -> bool { ]; // Test out all interpolation methods - let mut checks = vec![]; - checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< - Linear, - >(m.clone(), &quantile_indexes, Axis(0))); - checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< - Higher, - >(m.clone(), &quantile_indexes, Axis(0))); - checks.push( - check_one_interpolation_method_for_quantiles_axis_mut::( - m.clone(), - &quantile_indexes, - Axis(0), - ), + let mut correct = true; + correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), + &quantile_indexes, + Axis(0), ); - checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< - Midpoint, - >(m.clone(), &quantile_indexes, Axis(0))); - checks.push(check_one_interpolation_method_for_quantiles_axis_mut::< - Nearest, - >(m.clone(), &quantile_indexes, Axis(0))); - checks.into_iter().all(|x| x) + correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), + &quantile_indexes, + Axis(0), + ); + correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), + &quantile_indexes, + Axis(0), + ); + correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), + &quantile_indexes, + Axis(0), + ); + correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + m.clone(), + &quantile_indexes, + Axis(0), + ); + correct } fn check_one_interpolation_method_for_quantiles_axis_mut>( @@ -399,11 +391,9 @@ fn check_one_interpolation_method_for_quantiles_axis_mut>( bulk_quantiles.is_none() } else { let bulk_quantiles = bulk_quantiles.unwrap(); - let mut checks = vec![]; - for quantile_index in quantile_indexes.iter() { - let quantile = v.quantile_axis_mut::(axis, *quantile_index).unwrap(); - checks.push(quantile == *bulk_quantiles.get(quantile_index).unwrap()); - } - checks.into_iter().all(|x| x) + quantile_indexes.iter().all(|&quantile_index| { + let quantile = v.quantile_axis_mut::(axis, quantile_index).unwrap(); + quantile == bulk_quantiles[&quantile_index] + }) } } From 54c11be454a4345215ee486b799813755ca170db Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 18:23:32 -0400 Subject: [PATCH 60/81] Check keys in test_sorted_get_many_mut --- tests/sort.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/sort.rs b/tests/sort.rs index 6bbe17b4..2de79b88 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -60,11 +60,13 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { let mut indexes: Vec = (0..n).into_iter().collect(); indexes.append(&mut (0..n).into_iter().collect()); - let sorted_v: Vec = v - .get_many_from_sorted_mut(&indexes) - .into_iter() - .map(|x| x.1) - .collect(); + let mut sorted_v = Vec::with_capacity(n); + for (i, (key, value)) in v.get_many_from_sorted_mut(&indexes).into_iter().enumerate() { + if i != key { + return false; + } + sorted_v.push(value); + } xs.sort(); println!("Sorted: {:?}. Truth: {:?}", sorted_v, xs); xs == sorted_v From 847fcd5096b9223a18d92f17cd8c5eb68f06875e Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 18:26:46 -0400 Subject: [PATCH 61/81] Simplify sort tests --- tests/sort.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/sort.rs b/tests/sort.rs index 2de79b88..037e0d8f 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -58,7 +58,7 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { // Insert each index twice, to get a set of indexes with duplicates, not sorted let mut indexes: Vec = (0..n).into_iter().collect(); - indexes.append(&mut (0..n).into_iter().collect()); + indexes.append(&mut (0..n).collect()); let mut sorted_v = Vec::with_capacity(n); for (i, (key, value)) in v.get_many_from_sorted_mut(&indexes).into_iter().enumerate() { @@ -80,10 +80,7 @@ fn test_sorted_get_mut_as_sorting_algorithm(mut xs: Vec) -> bool { true } else { let mut v = Array::from_vec(xs.clone()); - let mut sorted_v = vec![]; - for i in 0..n { - sorted_v.push(v.get_from_sorted_mut(i)) - } + let sorted_v: Vec<_> = (0..n).map(|i| v.get_from_sorted_mut(i)).collect(); xs.sort(); xs == sorted_v } From c6f762a77d5a84350b0951435855bd9969c4415f Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 18:31:32 -0400 Subject: [PATCH 62/81] Improve sort and quantiles docs --- src/quantile/mod.rs | 22 +++++++++++----------- src/sort.rs | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 1554aa77..dd3539f6 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -222,15 +222,15 @@ where S: DataMut, I: Interpolate; - /// A bulk version of [quantile_axis_mut], optimized to retrieve multiple + /// A bulk version of [`quantile_axis_mut`], optimized to retrieve multiple /// quantiles at once. - /// It returns an IndexMap, with (quantile index, quantile over axis) as + /// It returns an `IndexMap`, with (quantile index, quantile over axis) as /// key-value pairs. /// - /// The IndexMap is sorted with respect to quantile indexes in increasing order: + /// The `IndexMap` is sorted with respect to quantile indexes in increasing order: /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). /// - /// See [quantile_axis_mut] for additional details on quantiles and the algorithm + /// See [`quantile_axis_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. /// /// Returns `None` when the specified axis has length 0. @@ -238,7 +238,7 @@ where /// **Panics** if `axis` is out of bounds or if /// any `q` in `qs` is not between `0.` and `1.` (inclusive). /// - /// [quantile_axis_mut]: ##tymethod.quantile_axis_mut + /// [`quantile_axis_mut`]: #tymethod.quantile_axis_mut fn quantiles_axis_mut( &mut self, axis: Axis, @@ -252,7 +252,7 @@ where /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// - /// See [`quantile_axis_mut`](##tymethod.quantile_axis_mut) for details. + /// See [`quantile_axis_mut`](#tymethod.quantile_axis_mut) for details. fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Option> where D: RemoveAxis, @@ -537,22 +537,22 @@ where S: DataMut, I: Interpolate; - /// A bulk version of [quantile_mut], optimized to retrieve multiple + /// A bulk version of [`quantile_mut`], optimized to retrieve multiple /// quantiles at once. - /// It returns an IndexMap, with (quantile index, quantile value) as + /// It returns an `IndexMap`, with (quantile index, quantile value) as /// key-value pairs. /// - /// The IndexMap is sorted with respect to quantile indexes in increasing order: + /// The `IndexMap` is sorted with respect to quantile indexes in increasing order: /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). /// /// It returns `None` if the array is empty. /// - /// See [quantile_mut] for additional details on quantiles and the algorithm + /// See [`quantile_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. /// /// **Panics** if any `q` in `qs` is not between `0.` and `1.` (inclusive). /// - /// [quantile_mut]: ##tymethod.quantile_mut + /// [`quantile_mut`]: #tymethod.quantile_mut fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, diff --git a/src/sort.rs b/src/sort.rs index 605fb14e..590ad156 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -33,17 +33,17 @@ where A: Ord + Clone, S: DataMut; - /// A bulk version of [get_from_sorted_mut], optimized to retrieve multiple + /// A bulk version of [`get_from_sorted_mut`], optimized to retrieve multiple /// indexes at once. - /// It returns an IndexMap, with indexes as keys and retrieved elements as + /// It returns an `IndexMap`, with indexes as keys and retrieved elements as /// values. - /// The IndexMap is sorted with respect to indexes in increasing order: + /// The `IndexMap` is sorted with respect to indexes in increasing order: /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). /// /// **Panics** if any element in `indexes` is greater than or equal to `n`, /// where `n` is the length of the array.. /// - /// [get_from_sorted_mut]: ##tymethod.get_from_sorted_mut + /// [`get_from_sorted_mut`]: #tymethod.get_from_sorted_mut fn get_many_from_sorted_mut(&mut self, indexes: &[usize]) -> IndexMap where A: Ord + Clone, From 1685095243e75ce76090138f5c80fa8ca8e00c4e Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 20:03:26 -0400 Subject: [PATCH 63/81] Make Interpolate::interpolate operate elementwise --- src/quantile/interpolate.rs | 74 +++++++------------------------------ 1 file changed, 13 insertions(+), 61 deletions(-) diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs index ee8fffdd..a0fe64d4 100644 --- a/src/quantile/interpolate.rs +++ b/src/quantile/interpolate.rs @@ -1,6 +1,4 @@ //! Interpolation strategies. -use ndarray::azip; -use ndarray::prelude::*; use noisy_float::types::N64; use num_traits::{Float, FromPrimitive, NumOps, ToPrimitive}; @@ -45,14 +43,7 @@ pub trait Interpolate { /// **Panics** if `None` is provided for the lower value when it's needed /// or if `None` is provided for the higher value when it's needed. #[doc(hidden)] - fn interpolate( - lower: Option>, - higher: Option>, - q: N64, - len: usize, - ) -> Array - where - D: Dimension; + fn interpolate(lower: Option, higher: Option, q: N64, len: usize) -> T; } /// Select the higher value. @@ -75,12 +66,7 @@ impl Interpolate for Higher { fn needs_higher(_q: N64, _len: usize) -> bool { true } - fn interpolate( - _lower: Option>, - higher: Option>, - _q: N64, - _len: usize, - ) -> Array { + fn interpolate(_lower: Option, higher: Option, _q: N64, _len: usize) -> T { higher.unwrap() } } @@ -92,12 +78,7 @@ impl Interpolate for Lower { fn needs_higher(_q: N64, _len: usize) -> bool { false } - fn interpolate( - lower: Option>, - _higher: Option>, - _q: N64, - _len: usize, - ) -> Array { + fn interpolate(lower: Option, _higher: Option, _q: N64, _len: usize) -> T { lower.unwrap() } } @@ -109,12 +90,7 @@ impl Interpolate for Nearest { fn needs_higher(q: N64, len: usize) -> bool { !>::needs_lower(q, len) } - fn interpolate( - lower: Option>, - higher: Option>, - q: N64, - len: usize, - ) -> Array { + fn interpolate(lower: Option, higher: Option, q: N64, len: usize) -> T { if >::needs_lower(q, len) { lower.unwrap() } else { @@ -133,24 +109,11 @@ where fn needs_higher(_q: N64, _len: usize) -> bool { true } - fn interpolate( - lower: Option>, - higher: Option>, - _q: N64, - _len: usize, - ) -> Array - where - D: Dimension, - { + fn interpolate(lower: Option, higher: Option, _q: N64, _len: usize) -> T { let denom = T::from_u8(2).unwrap(); - let mut lower = lower.unwrap(); + let lower = lower.unwrap(); let higher = higher.unwrap(); - azip!( - mut lower, ref higher in { - *lower = lower.clone() + (higher.clone() - lower.clone()) / denom.clone() - } - ); - lower + lower.clone() + (higher.clone() - lower.clone()) / denom.clone() } } @@ -164,23 +127,12 @@ where fn needs_higher(_q: N64, _len: usize) -> bool { true } - fn interpolate( - lower: Option>, - higher: Option>, - q: N64, - len: usize, - ) -> Array - where - D: Dimension, - { + fn interpolate(lower: Option, higher: Option, q: N64, len: usize) -> T { let fraction = float_quantile_index_fraction(q, len).to_f64().unwrap(); - let mut a = lower.unwrap(); - let b = higher.unwrap(); - azip!(mut a, ref b in { - let a_f64 = a.to_f64().unwrap(); - let b_f64 = b.to_f64().unwrap(); - *a = a.clone() + T::from_f64(fraction * (b_f64 - a_f64)).unwrap(); - }); - a + let lower = lower.unwrap(); + let higher = higher.unwrap(); + let lower_f64 = lower.to_f64().unwrap(); + let higher_f64 = higher.to_f64().unwrap(); + lower.clone() + T::from_f64(fraction * (higher_f64 - lower_f64)).unwrap() } } From e965e85298d21552e32db17b097eaea69d653b75 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 20:06:06 -0400 Subject: [PATCH 64/81] Make quantiles_* return Array instead of IndexMap --- src/quantile/mod.rs | 108 ++++++++++++++++++++++++-------------------- tests/quantile.rs | 20 ++++---- 2 files changed, 70 insertions(+), 58 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index dd3539f6..82c97afd 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,8 +1,8 @@ use self::interpolate::{higher_index, lower_index, Interpolate}; use super::sort::get_many_from_sorted_mut_unchecked; -use indexmap::{IndexMap, IndexSet}; +use indexmap::IndexSet; use ndarray::prelude::*; -use ndarray::{Data, DataMut, RemoveAxis}; +use ndarray::{Data, DataMut, RemoveAxis, Zip}; use noisy_float::types::N64; use std::cmp; use {MaybeNan, MaybeNanExt}; @@ -224,11 +224,9 @@ where /// A bulk version of [`quantile_axis_mut`], optimized to retrieve multiple /// quantiles at once. - /// It returns an `IndexMap`, with (quantile index, quantile over axis) as - /// key-value pairs. /// - /// The `IndexMap` is sorted with respect to quantile indexes in increasing order: - /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). + /// Returns an `Array`, where subviews along `axis` of the array correspond + /// to the elements of `qs`. /// /// See [`quantile_axis_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. @@ -239,11 +237,29 @@ where /// any `q` in `qs` is not between `0.` and `1.` (inclusive). /// /// [`quantile_axis_mut`]: #tymethod.quantile_axis_mut - fn quantiles_axis_mut( - &mut self, - axis: Axis, - qs: &[N64], - ) -> Option>> + /// + /// # Example + /// + /// ```rust + /// # extern crate ndarray; + /// # extern crate ndarray_stats; + /// # extern crate noisy_float; + /// # + /// use ndarray::{array, Axis}; + /// use ndarray_stats::{QuantileExt, interpolate::Nearest}; + /// use noisy_float::types::n64; + /// + /// # fn main() { + /// let mut data = array![[3, 4, 5], [6, 7, 8]]; + /// let axis = Axis(1); + /// let qs = &[n64(0.3), n64(0.7)]; + /// let quantiles = data.quantiles_axis_mut::(axis, qs).unwrap(); + /// for (&q, quantile) in qs.iter().zip(quantiles.axis_iter(axis)) { + /// assert_eq!(quantile, data.quantile_axis_mut::(axis, q).unwrap()); + /// } + /// # } + /// ``` + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option> where D: RemoveAxis, A: Ord + Clone, @@ -395,11 +411,7 @@ where })) } - fn quantiles_axis_mut( - &mut self, - axis: Axis, - qs: &[N64], - ) -> Option>> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option> where D: RemoveAxis, A: Ord + Clone, @@ -413,6 +425,12 @@ where return None; } + let mut results_shape = self.raw_dim(); + results_shape[axis.index()] = qs.len(); + if results_shape.size() == 0 { + return Some(Array::from_shape_vec(results_shape, Vec::new()).unwrap()); + } + let mut deduped_qs: Vec = qs.to_vec(); deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); deduped_qs.dedup(); @@ -431,30 +449,25 @@ where } let searched_indexes: Vec = searched_indexes.into_iter().collect(); - // Retrieve the values corresponding to each index for each slice along the specified axis - // For each 1-dimensional slice along the specified axis we get back an IndexMap - // which can be used to retrieve the desired values using searched_indexes - let values = self.map_axis_mut(axis, |mut x| { - get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) - }); - - // Combine the retrieved values according to specified interpolation strategy to - // get the desired quantiles - let mut results = IndexMap::new(); - for q in qs { - let lower = if I::needs_lower(*q, axis_len) { - Some(values.map(|x| x[&lower_index(*q, axis_len)].clone())) - } else { - None - }; - let higher = if I::needs_higher(*q, axis_len) { - Some(values.map(|x| x[&higher_index(*q, axis_len)].clone())) - } else { - None - }; - let interpolated = I::interpolate(lower, higher, *q, axis_len); - results.insert(*q, interpolated); - } + let mut results = Array::from_elem(results_shape, self.first().unwrap().clone()); + Zip::from(results.lanes_mut(axis)) + .and(self.lanes_mut(axis)) + .apply(|mut results, mut data| { + let index_map = get_many_from_sorted_mut_unchecked(&mut data, &searched_indexes); + for (result, &q) in results.iter_mut().zip(qs) { + let lower = if I::needs_lower(q, axis_len) { + Some(index_map[&lower_index(q, axis_len)].clone()) + } else { + None + }; + let higher = if I::needs_higher(q, axis_len) { + Some(index_map[&higher_index(q, axis_len)].clone()) + } else { + None + }; + *result = I::interpolate(lower, higher, q, axis_len); + } + }); Some(results) } @@ -466,7 +479,7 @@ where I: Interpolate, { self.quantiles_axis_mut::(axis, &[q]) - .map(|x| x.into_iter().next().unwrap().1) + .map(|a| a.index_axis_move(axis, 0)) } fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Option> @@ -539,13 +552,11 @@ where /// A bulk version of [`quantile_mut`], optimized to retrieve multiple /// quantiles at once. - /// It returns an `IndexMap`, with (quantile index, quantile value) as - /// key-value pairs. /// - /// The `IndexMap` is sorted with respect to quantile indexes in increasing order: - /// this ordering is preserved when you iterate over it (using `iter`/`into_iter`). + /// Returns an `Array`, where the elements of the array correspond to the + /// elements of `qs`. /// - /// It returns `None` if the array is empty. + /// Returns `None` if the array is empty. /// /// See [`quantile_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. @@ -553,7 +564,7 @@ where /// **Panics** if any `q` in `qs` is not between `0.` and `1.` (inclusive). /// /// [`quantile_mut`]: #tymethod.quantile_mut - fn quantiles_mut(&mut self, qs: &[N64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, S: DataMut, @@ -574,14 +585,13 @@ where .map(|v| v.into_scalar()) } - fn quantiles_mut(&mut self, qs: &[N64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64]) -> Option> where A: Ord + Clone, S: DataMut, I: Interpolate, { self.quantiles_axis_mut::(Axis(0), qs) - .map(|v| v.into_iter().map(|x| (x.0, x.1.into_scalar())).collect()) } } diff --git a/tests/quantile.rs b/tests/quantile.rs index dd5eb96c..1b98e1a6 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -1,3 +1,4 @@ +extern crate itertools; extern crate ndarray; extern crate ndarray_stats; extern crate noisy_float; @@ -5,6 +6,7 @@ extern crate noisy_float; extern crate quickcheck; extern crate quickcheck_macros; +use itertools::izip; use ndarray::array; use ndarray::prelude::*; use ndarray_stats::{ @@ -317,15 +319,14 @@ fn check_one_interpolation_method_for_quantiles_mut>( mut v: Array1, quantile_indexes: &[N64], ) -> bool { - let bulk_quantiles = v.quantiles_mut::(&quantile_indexes); + let bulk_quantiles = v.clone().quantiles_mut::(&quantile_indexes); if v.len() == 0 { bulk_quantiles.is_none() } else { let bulk_quantiles = bulk_quantiles.unwrap(); - quantile_indexes.iter().all(|&quantile_index| { - let quantile = v.quantile_mut::(quantile_index).unwrap(); - quantile == bulk_quantiles[&quantile_index] + izip!(quantile_indexes, &bulk_quantiles).all(|(&quantile_index, &quantile)| { + quantile == v.quantile_mut::(quantile_index).unwrap() }) } } @@ -385,15 +386,16 @@ fn check_one_interpolation_method_for_quantiles_axis_mut>( quantile_indexes: &[N64], axis: Axis, ) -> bool { - let bulk_quantiles = v.quantiles_axis_mut::(axis, &quantile_indexes); + let bulk_quantiles = v.clone().quantiles_axis_mut::(axis, &quantile_indexes); if v.len() == 0 { bulk_quantiles.is_none() } else { let bulk_quantiles = bulk_quantiles.unwrap(); - quantile_indexes.iter().all(|&quantile_index| { - let quantile = v.quantile_axis_mut::(axis, quantile_index).unwrap(); - quantile == bulk_quantiles[&quantile_index] - }) + izip!(quantile_indexes, bulk_quantiles.axis_iter(axis)).all( + |(&quantile_index, quantile)| { + quantile == v.quantile_axis_mut::(axis, quantile_index).unwrap() + }, + ) } } From cfc408f7bf3668e1de126000fc28645e1ae7cd33 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 21:27:13 -0400 Subject: [PATCH 65/81] Add interpolate parameter to quantile* This has a few advantages: * It's now possible to save the interpolation strategy in a variable and easily re-use it. * We can now freely add more type parameters to the `quantile*` methods as needed without making them more difficult to call. * We now have the flexibility to add more advanced interpolation strategies in the future (e.g. one that wraps a closure). * Calling the `quantile*` methods is now slightly more compact because a turbofish isn't necessary. --- src/histogram/strategies.rs | 4 +-- src/quantile/mod.rs | 36 ++++++++++---------- tests/quantile.rs | 67 +++++++++++++++++++++---------------- 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index 9f933df0..fa90c385 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -308,8 +308,8 @@ where let n_points = a.len(); let mut a_copy = a.to_owned(); - let first_quartile = a_copy.quantile_mut::(n64(0.25)).unwrap(); - let third_quartile = a_copy.quantile_mut::(n64(0.75)).unwrap(); + let first_quartile = a_copy.quantile_mut(n64(0.25), &Nearest).unwrap(); + let third_quartile = a_copy.quantile_mut(n64(0.75), &Nearest).unwrap(); let iqr = third_quartile - first_quartile; let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr); diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 82c97afd..c880c73b 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -191,7 +191,7 @@ where /// in increasing order. /// If `(N-1)q` is not an integer the desired quantile lies between /// two data points: we return the lower, nearest, higher or interpolated - /// value depending on the type `Interpolate` bound `I`. + /// value depending on the `interpolate` strategy. /// /// Some examples: /// - `q=0.` returns the minimum along each 1-dimensional lane; @@ -215,7 +215,7 @@ where /// /// **Panics** if `axis` is out of bounds or if /// `q` is not between `0.` and `1.` (inclusive). - fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Option> + fn quantile_axis_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> where D: RemoveAxis, A: Ord + Clone, @@ -253,13 +253,13 @@ where /// let mut data = array![[3, 4, 5], [6, 7, 8]]; /// let axis = Axis(1); /// let qs = &[n64(0.3), n64(0.7)]; - /// let quantiles = data.quantiles_axis_mut::(axis, qs).unwrap(); + /// let quantiles = data.quantiles_axis_mut(axis, qs, &Nearest).unwrap(); /// for (&q, quantile) in qs.iter().zip(quantiles.axis_iter(axis)) { - /// assert_eq!(quantile, data.quantile_axis_mut::(axis, q).unwrap()); + /// assert_eq!(quantile, data.quantile_axis_mut(axis, q, &Nearest).unwrap()); /// } /// # } /// ``` - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64], interpolate: &I) -> Option> where D: RemoveAxis, A: Ord + Clone, @@ -269,7 +269,7 @@ where /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// /// See [`quantile_axis_mut`](#tymethod.quantile_axis_mut) for details. - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Option> + fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> where D: RemoveAxis, A: MaybeNan, @@ -411,7 +411,7 @@ where })) } - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64]) -> Option> + fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64], _interpolate: &I) -> Option> where D: RemoveAxis, A: Ord + Clone, @@ -471,18 +471,18 @@ where Some(results) } - fn quantile_axis_mut(&mut self, axis: Axis, q: N64) -> Option> + fn quantile_axis_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> where D: RemoveAxis, A: Ord + Clone, S: DataMut, I: Interpolate, { - self.quantiles_axis_mut::(axis, &[q]) + self.quantiles_axis_mut(axis, &[q], interpolate) .map(|a| a.index_axis_move(axis, 0)) } - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64) -> Option> + fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> where D: RemoveAxis, A: MaybeNan, @@ -500,7 +500,7 @@ where } else { Some( not_nan - .quantile_axis_mut::(Axis(0), q) + .quantile_axis_mut::(Axis(0), q, interpolate) .unwrap() .into_scalar(), ) @@ -523,7 +523,7 @@ where /// in increasing order. /// If `(N-1)q` is not an integer the desired quantile lies between /// two data points: we return the lower, nearest, higher or interpolated - /// value depending on the type `Interpolate` bound `I`. + /// value depending on the `interpolate` strategy. /// /// Some examples: /// - `q=0.` returns the minimum; @@ -544,7 +544,7 @@ where /// Returns `None` if the array is empty. /// /// **Panics** if `q` is not between `0.` and `1.` (inclusive). - fn quantile_mut(&mut self, q: N64) -> Option + fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Option where A: Ord + Clone, S: DataMut, @@ -564,7 +564,7 @@ where /// **Panics** if any `q` in `qs` is not between `0.` and `1.` (inclusive). /// /// [`quantile_mut`]: #tymethod.quantile_mut - fn quantiles_mut(&mut self, qs: &[N64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64], interpolate: &I) -> Option> where A: Ord + Clone, S: DataMut, @@ -575,23 +575,23 @@ impl Quantile1dExt for ArrayBase where S: Data, { - fn quantile_mut(&mut self, q: N64) -> Option + fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Option where A: Ord + Clone, S: DataMut, I: Interpolate, { - self.quantile_axis_mut::(Axis(0), q) + self.quantile_axis_mut::(Axis(0), q, interpolate) .map(|v| v.into_scalar()) } - fn quantiles_mut(&mut self, qs: &[N64]) -> Option> + fn quantiles_mut(&mut self, qs: &[N64], interpolate: &I) -> Option> where A: Ord + Clone, S: DataMut, I: Interpolate, { - self.quantiles_axis_mut::(Axis(0), qs) + self.quantiles_axis_mut(Axis(0), qs, interpolate) } } diff --git a/tests/quantile.rs b/tests/quantile.rs index 1b98e1a6..2957afe7 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -182,41 +182,41 @@ fn test_max_skipnan_all_nan() { #[test] fn test_quantile_axis_mut_with_odd_axis_length() { let mut a = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12]]); - let p = a.quantile_axis_mut::(Axis(0), n64(0.5)).unwrap(); + let p = a.quantile_axis_mut(Axis(0), n64(0.5), &Lower).unwrap(); assert!(p == a.index_axis(Axis(0), 1)); } #[test] fn test_quantile_axis_mut_with_zero_axis_length() { let mut a = Array2::::zeros((5, 0)); - assert!(a.quantile_axis_mut::(Axis(1), n64(0.5)).is_none()); + assert!(a.quantile_axis_mut(Axis(1), n64(0.5), &Lower).is_none()); } #[test] fn test_quantile_axis_mut_with_empty_array() { let mut a = Array2::::zeros((5, 0)); - let p = a.quantile_axis_mut::(Axis(0), n64(0.5)).unwrap(); + let p = a.quantile_axis_mut(Axis(0), n64(0.5), &Lower).unwrap(); assert_eq!(p.shape(), &[0]); } #[test] fn test_quantile_axis_mut_with_even_axis_length() { let mut b = arr2(&[[1, 3, 2, 10], [2, 4, 3, 11], [3, 5, 6, 12], [4, 6, 7, 13]]); - let q = b.quantile_axis_mut::(Axis(0), n64(0.5)).unwrap(); + let q = b.quantile_axis_mut(Axis(0), n64(0.5), &Lower).unwrap(); assert!(q == b.index_axis(Axis(0), 1)); } #[test] fn test_quantile_axis_mut_to_get_minimum() { let mut b = arr2(&[[1, 3, 22, 10]]); - let q = b.quantile_axis_mut::(Axis(1), n64(0.)).unwrap(); + let q = b.quantile_axis_mut(Axis(1), n64(0.), &Lower).unwrap(); assert!(q == arr1(&[1])); } #[test] fn test_quantile_axis_mut_to_get_maximum() { let mut b = arr1(&[1, 3, 22, 10]); - let q = b.quantile_axis_mut::(Axis(0), n64(1.)).unwrap(); + let q = b.quantile_axis_mut(Axis(0), n64(1.), &Lower).unwrap(); assert!(q == arr0(22)); } @@ -224,7 +224,7 @@ fn test_quantile_axis_mut_to_get_maximum() { fn test_quantile_axis_skipnan_mut_higher_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); let q = a - .quantile_axis_skipnan_mut::(Axis(1), n64(0.6)) + .quantile_axis_skipnan_mut(Axis(1), n64(0.6), &Higher) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); @@ -235,7 +235,7 @@ fn test_quantile_axis_skipnan_mut_higher_opt_i32() { fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); let q = a - .quantile_axis_skipnan_mut::(Axis(1), n64(0.6)) + .quantile_axis_skipnan_mut(Axis(1), n64(0.6), &Nearest) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(4)); @@ -246,7 +246,7 @@ fn test_quantile_axis_skipnan_mut_nearest_opt_i32() { fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { let mut a = arr2(&[[Some(4), Some(2), None, Some(1), Some(5)], [None; 5]]); let q = a - .quantile_axis_skipnan_mut::(Axis(1), n64(0.6)) + .quantile_axis_skipnan_mut(Axis(1), n64(0.6), &Midpoint) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); @@ -257,7 +257,7 @@ fn test_quantile_axis_skipnan_mut_midpoint_opt_i32() { fn test_quantile_axis_skipnan_mut_linear_f64() { let mut a = arr2(&[[1., 2., ::std::f64::NAN, 3.], [::std::f64::NAN; 4]]); let q = a - .quantile_axis_skipnan_mut::(Axis(1), n64(0.75)) + .quantile_axis_skipnan_mut(Axis(1), n64(0.75), &Linear) .unwrap(); assert_eq!(q.shape(), &[2]); assert!((q[0] - 2.5).abs() < 1e-12); @@ -268,7 +268,7 @@ fn test_quantile_axis_skipnan_mut_linear_f64() { fn test_quantile_axis_skipnan_mut_linear_opt_i32() { let mut a = arr2(&[[Some(2), Some(4), None, Some(1)], [None; 4]]); let q = a - .quantile_axis_skipnan_mut::(Axis(1), n64(0.75)) + .quantile_axis_skipnan_mut(Axis(1), n64(0.75), &Linear) .unwrap(); assert_eq!(q.shape(), &[2]); assert_eq!(q[0], Some(3)); @@ -280,7 +280,7 @@ fn test_midpoint_overflow() { // Regression test // This triggered an overflow panic with a naive Midpoint implementation: (a+b)/2 let mut a: Array1 = array![129, 130, 130, 131]; - let median = a.quantile_mut::(n64(0.5)).unwrap(); + let median = a.quantile_mut(n64(0.5), &Midpoint).unwrap(); let expected_median = 130; assert_eq!(median, expected_median); } @@ -303,30 +303,31 @@ fn test_quantiles_mut(xs: Vec) -> bool { ]; let mut correct = true; correct &= - check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Linear); correct &= - check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Higher); correct &= - check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Lower); correct &= - check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Midpoint); correct &= - check_one_interpolation_method_for_quantiles_mut::(v.clone(), &quantile_indexes); + check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Nearest); correct } -fn check_one_interpolation_method_for_quantiles_mut>( +fn check_one_interpolation_method_for_quantiles_mut( mut v: Array1, quantile_indexes: &[N64], + interpolate: &impl Interpolate, ) -> bool { - let bulk_quantiles = v.clone().quantiles_mut::(&quantile_indexes); + let bulk_quantiles = v.clone().quantiles_mut(&quantile_indexes, interpolate); if v.len() == 0 { bulk_quantiles.is_none() } else { let bulk_quantiles = bulk_quantiles.unwrap(); izip!(quantile_indexes, &bulk_quantiles).all(|(&quantile_index, &quantile)| { - quantile == v.quantile_mut::(quantile_index).unwrap() + quantile == v.quantile_mut(quantile_index, interpolate).unwrap() }) } } @@ -353,40 +354,48 @@ fn test_quantiles_axis_mut(mut xs: Vec) -> bool { // Test out all interpolation methods let mut correct = true; - correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), &quantile_indexes, Axis(0), + &Linear, ); - correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), &quantile_indexes, Axis(0), + &Higher, ); - correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), &quantile_indexes, Axis(0), + &Lower, ); - correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), &quantile_indexes, Axis(0), + &Midpoint, ); - correct &= check_one_interpolation_method_for_quantiles_axis_mut::( + correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), &quantile_indexes, Axis(0), + &Nearest, ); correct } -fn check_one_interpolation_method_for_quantiles_axis_mut>( +fn check_one_interpolation_method_for_quantiles_axis_mut( mut v: Array2, quantile_indexes: &[N64], axis: Axis, + interpolate: &impl Interpolate, ) -> bool { - let bulk_quantiles = v.clone().quantiles_axis_mut::(axis, &quantile_indexes); + let bulk_quantiles = v + .clone() + .quantiles_axis_mut(axis, &quantile_indexes, interpolate); if v.len() == 0 { bulk_quantiles.is_none() @@ -394,7 +403,9 @@ fn check_one_interpolation_method_for_quantiles_axis_mut>( let bulk_quantiles = bulk_quantiles.unwrap(); izip!(quantile_indexes, bulk_quantiles.axis_iter(axis)).all( |(&quantile_index, quantile)| { - quantile == v.quantile_axis_mut::(axis, quantile_index).unwrap() + quantile + == v.quantile_axis_mut(axis, quantile_index, interpolate) + .unwrap() }, ) } From b5d8a085a1d5bd348b44cd65213b66db45458c49 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 21:39:45 -0400 Subject: [PATCH 66/81] Make get_many_from_sorted_mut take array of indexes This is slightly more versatile because `ArrayBase` allows arbitrary strides. --- src/sort.rs | 8 +++++--- tests/sort.rs | 6 +++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/sort.rs b/src/sort.rs index 590ad156..5a0b5d3b 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -44,10 +44,11 @@ where /// where `n` is the length of the array.. /// /// [`get_from_sorted_mut`]: #tymethod.get_from_sorted_mut - fn get_many_from_sorted_mut(&mut self, indexes: &[usize]) -> IndexMap + fn get_many_from_sorted_mut(&mut self, indexes: &ArrayBase) -> IndexMap where A: Ord + Clone, - S: DataMut; + S: DataMut, + S2: Data; /// Partitions the array in increasing order based on the value initially /// located at `pivot_index` and returns the new index of the value. @@ -132,10 +133,11 @@ where } } - fn get_many_from_sorted_mut(&mut self, indexes: &[usize]) -> IndexMap + fn get_many_from_sorted_mut(&mut self, indexes: &ArrayBase) -> IndexMap where A: Ord + Clone, S: DataMut, + S2: Data, { let mut deduped_indexes: Vec = indexes.to_vec(); deduped_indexes.sort_unstable(); diff --git a/tests/sort.rs b/tests/sort.rs index 037e0d8f..2d1df06c 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -61,7 +61,11 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { indexes.append(&mut (0..n).collect()); let mut sorted_v = Vec::with_capacity(n); - for (i, (key, value)) in v.get_many_from_sorted_mut(&indexes).into_iter().enumerate() { + for (i, (key, value)) in v + .get_many_from_sorted_mut(&Array::from(indexes)) + .into_iter() + .enumerate() + { if i != key { return false; } From 00a21c06fa2e02c2c9eb6e8fd69aab4e8973c678 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 22:08:25 -0400 Subject: [PATCH 67/81] Make quantiles* take array instead of slice --- src/quantile/mod.rs | 174 +++++++++++++++++++++++++++++--------------- tests/quantile.rs | 57 +++++++++------ 2 files changed, 152 insertions(+), 79 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index c880c73b..7d59037a 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -215,7 +215,12 @@ where /// /// **Panics** if `axis` is out of bounds or if /// `q` is not between `0.` and `1.` (inclusive). - fn quantile_axis_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> + fn quantile_axis_mut( + &mut self, + axis: Axis, + q: N64, + interpolate: &I, + ) -> Option> where D: RemoveAxis, A: Ord + Clone, @@ -245,7 +250,7 @@ where /// # extern crate ndarray_stats; /// # extern crate noisy_float; /// # - /// use ndarray::{array, Axis}; + /// use ndarray::{array, aview1, Axis}; /// use ndarray_stats::{QuantileExt, interpolate::Nearest}; /// use noisy_float::types::n64; /// @@ -253,23 +258,34 @@ where /// let mut data = array![[3, 4, 5], [6, 7, 8]]; /// let axis = Axis(1); /// let qs = &[n64(0.3), n64(0.7)]; - /// let quantiles = data.quantiles_axis_mut(axis, qs, &Nearest).unwrap(); + /// let quantiles = data.quantiles_axis_mut(axis, &aview1(qs), &Nearest).unwrap(); /// for (&q, quantile) in qs.iter().zip(quantiles.axis_iter(axis)) { /// assert_eq!(quantile, data.quantile_axis_mut(axis, q, &Nearest).unwrap()); /// } /// # } /// ``` - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64], interpolate: &I) -> Option> + fn quantiles_axis_mut( + &mut self, + axis: Axis, + qs: &ArrayBase, + interpolate: &I, + ) -> Option> where D: RemoveAxis, A: Ord + Clone, S: DataMut, + S2: Data, I: Interpolate; /// Return the `q`th quantile of the data along the specified axis, skipping NaN values. /// /// See [`quantile_axis_mut`](#tymethod.quantile_axis_mut) for details. - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> + fn quantile_axis_skipnan_mut( + &mut self, + axis: Axis, + q: N64, + interpolate: &I, + ) -> Option> where D: RemoveAxis, A: MaybeNan, @@ -411,78 +427,110 @@ where })) } - fn quantiles_axis_mut(&mut self, axis: Axis, qs: &[N64], _interpolate: &I) -> Option> + fn quantiles_axis_mut( + &mut self, + axis: Axis, + qs: &ArrayBase, + interpolate: &I, + ) -> Option> where D: RemoveAxis, A: Ord + Clone, S: DataMut, + S2: Data, I: Interpolate, { - assert!(qs.iter().all(|x| (*x >= 0.) && (*x <= 1.))); - - let axis_len = self.len_of(axis); - if axis_len == 0 { - return None; - } - - let mut results_shape = self.raw_dim(); - results_shape[axis.index()] = qs.len(); - if results_shape.size() == 0 { - return Some(Array::from_shape_vec(results_shape, Vec::new()).unwrap()); - } + // Minimize number of type parameters to avoid monomorphization bloat. + fn quantiles_axis_mut( + mut data: ArrayViewMut, + axis: Axis, + qs: ArrayView1, + _interpolate: &I, + ) -> Option> + where + D: RemoveAxis, + A: Ord + Clone, + I: Interpolate, + { + assert!(qs.iter().all(|x| (*x >= 0.) && (*x <= 1.))); + + let axis_len = data.len_of(axis); + if axis_len == 0 { + return None; + } - let mut deduped_qs: Vec = qs.to_vec(); - deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); - deduped_qs.dedup(); - - // IndexSet preserves insertion order: - // - indexes will stay sorted; - // - we avoid index duplication. - let mut searched_indexes = IndexSet::new(); - for q in deduped_qs.iter() { - if I::needs_lower(*q, axis_len) { - searched_indexes.insert(lower_index(*q, axis_len)); + let mut results_shape = data.raw_dim(); + results_shape[axis.index()] = qs.len(); + if results_shape.size() == 0 { + return Some(Array::from_shape_vec(results_shape, Vec::new()).unwrap()); } - if I::needs_higher(*q, axis_len) { - searched_indexes.insert(higher_index(*q, axis_len)); + + let mut deduped_qs: Vec = qs.to_vec(); + deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); + deduped_qs.dedup(); + + // IndexSet preserves insertion order: + // - indexes will stay sorted; + // - we avoid index duplication. + let mut searched_indexes = IndexSet::new(); + for q in deduped_qs.iter() { + if I::needs_lower(*q, axis_len) { + searched_indexes.insert(lower_index(*q, axis_len)); + } + if I::needs_higher(*q, axis_len) { + searched_indexes.insert(higher_index(*q, axis_len)); + } } + let searched_indexes: Vec = searched_indexes.into_iter().collect(); + + let mut results = Array::from_elem(results_shape, data.first().unwrap().clone()); + Zip::from(results.lanes_mut(axis)) + .and(data.lanes_mut(axis)) + .apply(|mut results, mut data| { + let index_map = + get_many_from_sorted_mut_unchecked(&mut data, &searched_indexes); + for (result, &q) in results.iter_mut().zip(qs) { + let lower = if I::needs_lower(q, axis_len) { + Some(index_map[&lower_index(q, axis_len)].clone()) + } else { + None + }; + let higher = if I::needs_higher(q, axis_len) { + Some(index_map[&higher_index(q, axis_len)].clone()) + } else { + None + }; + *result = I::interpolate(lower, higher, q, axis_len); + } + }); + Some(results) } - let searched_indexes: Vec = searched_indexes.into_iter().collect(); - - let mut results = Array::from_elem(results_shape, self.first().unwrap().clone()); - Zip::from(results.lanes_mut(axis)) - .and(self.lanes_mut(axis)) - .apply(|mut results, mut data| { - let index_map = get_many_from_sorted_mut_unchecked(&mut data, &searched_indexes); - for (result, &q) in results.iter_mut().zip(qs) { - let lower = if I::needs_lower(q, axis_len) { - Some(index_map[&lower_index(q, axis_len)].clone()) - } else { - None - }; - let higher = if I::needs_higher(q, axis_len) { - Some(index_map[&higher_index(q, axis_len)].clone()) - } else { - None - }; - *result = I::interpolate(lower, higher, q, axis_len); - } - }); - Some(results) + + quantiles_axis_mut(self.view_mut(), axis, qs.view(), interpolate) } - fn quantile_axis_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> + fn quantile_axis_mut( + &mut self, + axis: Axis, + q: N64, + interpolate: &I, + ) -> Option> where D: RemoveAxis, A: Ord + Clone, S: DataMut, I: Interpolate, { - self.quantiles_axis_mut(axis, &[q], interpolate) + self.quantiles_axis_mut(axis, &aview1(&[q]), interpolate) .map(|a| a.index_axis_move(axis, 0)) } - fn quantile_axis_skipnan_mut(&mut self, axis: Axis, q: N64, interpolate: &I) -> Option> + fn quantile_axis_skipnan_mut( + &mut self, + axis: Axis, + q: N64, + interpolate: &I, + ) -> Option> where D: RemoveAxis, A: MaybeNan, @@ -564,10 +612,15 @@ where /// **Panics** if any `q` in `qs` is not between `0.` and `1.` (inclusive). /// /// [`quantile_mut`]: #tymethod.quantile_mut - fn quantiles_mut(&mut self, qs: &[N64], interpolate: &I) -> Option> + fn quantiles_mut( + &mut self, + qs: &ArrayBase, + interpolate: &I, + ) -> Option> where A: Ord + Clone, S: DataMut, + S2: Data, I: Interpolate; } @@ -585,10 +638,15 @@ where .map(|v| v.into_scalar()) } - fn quantiles_mut(&mut self, qs: &[N64], interpolate: &I) -> Option> + fn quantiles_mut( + &mut self, + qs: &ArrayBase, + interpolate: &I, + ) -> Option> where A: Ord + Clone, S: DataMut, + S2: Data, I: Interpolate, { self.quantiles_axis_mut(Axis(0), qs, interpolate) diff --git a/tests/quantile.rs b/tests/quantile.rs index 2957afe7..f880bab9 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -290,7 +290,7 @@ fn test_quantiles_mut(xs: Vec) -> bool { let v = Array::from_vec(xs.clone()); // Unordered list of quantile indexes to look up, with a duplicate - let quantile_indexes = vec![ + let quantile_indexes = Array::from(vec![ n64(0.75), n64(0.90), n64(0.95), @@ -300,24 +300,39 @@ fn test_quantiles_mut(xs: Vec) -> bool { n64(0.25), n64(0.5), n64(0.5), - ]; + ]); let mut correct = true; - correct &= - check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Linear); - correct &= - check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Higher); - correct &= - check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Lower); - correct &= - check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Midpoint); - correct &= - check_one_interpolation_method_for_quantiles_mut(v.clone(), &quantile_indexes, &Nearest); + correct &= check_one_interpolation_method_for_quantiles_mut( + v.clone(), + quantile_indexes.view(), + &Linear, + ); + correct &= check_one_interpolation_method_for_quantiles_mut( + v.clone(), + quantile_indexes.view(), + &Higher, + ); + correct &= check_one_interpolation_method_for_quantiles_mut( + v.clone(), + quantile_indexes.view(), + &Lower, + ); + correct &= check_one_interpolation_method_for_quantiles_mut( + v.clone(), + quantile_indexes.view(), + &Midpoint, + ); + correct &= check_one_interpolation_method_for_quantiles_mut( + v.clone(), + quantile_indexes.view(), + &Nearest, + ); correct } fn check_one_interpolation_method_for_quantiles_mut( mut v: Array1, - quantile_indexes: &[N64], + quantile_indexes: ArrayView1, interpolate: &impl Interpolate, ) -> bool { let bulk_quantiles = v.clone().quantiles_mut(&quantile_indexes, interpolate); @@ -340,7 +355,7 @@ fn test_quantiles_axis_mut(mut xs: Vec) -> bool { let m = Array::from_shape_vec((axis_length, axis_length), xs).unwrap(); // Unordered list of quantile indexes to look up, with a duplicate - let quantile_indexes = vec![ + let quantile_indexes = Array::from(vec![ n64(0.75), n64(0.90), n64(0.95), @@ -350,37 +365,37 @@ fn test_quantiles_axis_mut(mut xs: Vec) -> bool { n64(0.25), n64(0.5), n64(0.5), - ]; + ]); // Test out all interpolation methods let mut correct = true; correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), - &quantile_indexes, + quantile_indexes.view(), Axis(0), &Linear, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), - &quantile_indexes, + quantile_indexes.view(), Axis(0), &Higher, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), - &quantile_indexes, + quantile_indexes.view(), Axis(0), &Lower, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), - &quantile_indexes, + quantile_indexes.view(), Axis(0), &Midpoint, ); correct &= check_one_interpolation_method_for_quantiles_axis_mut( m.clone(), - &quantile_indexes, + quantile_indexes.view(), Axis(0), &Nearest, ); @@ -389,7 +404,7 @@ fn test_quantiles_axis_mut(mut xs: Vec) -> bool { fn check_one_interpolation_method_for_quantiles_axis_mut( mut v: Array2, - quantile_indexes: &[N64], + quantile_indexes: ArrayView1, axis: Axis, interpolate: &impl Interpolate, ) -> bool { From 8f9f0b615a09de0bdcbb9364e6bd88849b3bf353 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Mon, 1 Apr 2019 22:14:59 -0400 Subject: [PATCH 68/81] Remove unnecessary IndexSet --- src/quantile/mod.rs | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 7d59037a..c53646e5 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,6 +1,5 @@ use self::interpolate::{higher_index, lower_index, Interpolate}; use super::sort::get_many_from_sorted_mut_unchecked; -use indexmap::IndexSet; use ndarray::prelude::*; use ndarray::{Data, DataMut, RemoveAxis, Zip}; use noisy_float::types::N64; @@ -465,23 +464,17 @@ where return Some(Array::from_shape_vec(results_shape, Vec::new()).unwrap()); } - let mut deduped_qs: Vec = qs.to_vec(); - deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); - deduped_qs.dedup(); - - // IndexSet preserves insertion order: - // - indexes will stay sorted; - // - we avoid index duplication. - let mut searched_indexes = IndexSet::new(); - for q in deduped_qs.iter() { - if I::needs_lower(*q, axis_len) { - searched_indexes.insert(lower_index(*q, axis_len)); + let mut searched_indexes = Vec::with_capacity(2 * qs.len()); + for &q in &qs { + if I::needs_lower(q, axis_len) { + searched_indexes.push(lower_index(q, axis_len)); } - if I::needs_higher(*q, axis_len) { - searched_indexes.insert(higher_index(*q, axis_len)); + if I::needs_higher(q, axis_len) { + searched_indexes.push(higher_index(q, axis_len)); } } - let searched_indexes: Vec = searched_indexes.into_iter().collect(); + searched_indexes.sort(); + searched_indexes.dedup(); let mut results = Array::from_elem(results_shape, data.first().unwrap().clone()); Zip::from(results.lanes_mut(axis)) From 5ff4430b44ec4258dfb8e9db7b62868ac8ea0411 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:33:54 +0100 Subject: [PATCH 69/81] Return EmptyInput instead of None --- src/quantile/mod.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 79b337c9..b1dc3260 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -211,7 +211,7 @@ where /// - worst case: O(`m`^2); /// where `m` is the number of elements in the array. /// - /// Returns `None` when the specified axis has length 0. + /// Returns `Err(EmptyInput)` when the specified axis has length 0. /// /// **Panics** if `axis` is out of bounds or if /// `q` is not between `0.` and `1.` (inclusive). @@ -220,7 +220,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where D: RemoveAxis, A: Ord + Clone, @@ -236,7 +236,7 @@ where /// See [`quantile_axis_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. /// - /// Returns `None` when the specified axis has length 0. + /// Returns `Err(EmptyInput)` when the specified axis has length 0. /// /// **Panics** if `axis` is out of bounds or if /// any `q` in `qs` is not between `0.` and `1.` (inclusive). @@ -269,7 +269,7 @@ where axis: Axis, qs: &ArrayBase, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where D: RemoveAxis, A: Ord + Clone, @@ -285,7 +285,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where D: RemoveAxis, A: MaybeNan, @@ -438,7 +438,7 @@ where axis: Axis, qs: &ArrayBase, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where D: RemoveAxis, A: Ord + Clone, @@ -452,7 +452,7 @@ where axis: Axis, qs: ArrayView1, _interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where D: RemoveAxis, A: Ord + Clone, @@ -462,13 +462,13 @@ where let axis_len = data.len_of(axis); if axis_len == 0 { - return None; + return Err(EmptyInput); } let mut results_shape = data.raw_dim(); results_shape[axis.index()] = qs.len(); if results_shape.size() == 0 { - return Some(Array::from_shape_vec(results_shape, Vec::new()).unwrap()); + return Ok(Array::from_shape_vec(results_shape, Vec::new()).unwrap()); } let mut searched_indexes = Vec::with_capacity(2 * qs.len()); @@ -503,7 +503,7 @@ where *result = I::interpolate(lower, higher, q, axis_len); } }); - Some(results) + Ok(results) } quantiles_axis_mut(self.view_mut(), axis, qs.view(), interpolate) @@ -514,7 +514,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where D: RemoveAxis, A: Ord + Clone, @@ -530,7 +530,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where D: RemoveAxis, A: MaybeNan, @@ -539,7 +539,7 @@ where I: Interpolate, { if self.len_of(axis) == 0 { - return None; + return Err(EmptyInput); } let quantile = self.map_axis_mut(axis, |lane| { let mut not_nan = A::remove_nan_mut(lane); @@ -554,7 +554,7 @@ where ) }) }); - Some(quantile) + Ok(quantile) } } @@ -604,7 +604,7 @@ where /// Returns an `Array`, where the elements of the array correspond to the /// elements of `qs`. /// - /// Returns `None` if the array is empty. + /// Returns `Err(EmptyInput)` if the array is empty. /// /// See [`quantile_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. @@ -616,7 +616,7 @@ where &mut self, qs: &ArrayBase, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where A: Ord + Clone, S: DataMut, @@ -645,7 +645,7 @@ where &mut self, qs: &ArrayBase, interpolate: &I, - ) -> Option> + ) -> Result, EmptyInput> where A: Ord + Clone, S: DataMut, From ca9f3db5abd810414f3202a6ca7097794598dce7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:35:03 +0100 Subject: [PATCH 70/81] Fix tests --- tests/quantile.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/quantile.rs b/tests/quantile.rs index 9262d2c2..ea876c13 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -11,7 +11,7 @@ use ndarray::array; use ndarray::prelude::*; use ndarray_stats::{ interpolate::{Higher, Interpolate, Linear, Lower, Midpoint, Nearest}, - errors::MinMaxError, + errors::{MinMaxError, EmptyInput}, Quantile1dExt, QuantileExt, }; use noisy_float::types::{n64, N64}; @@ -190,7 +190,7 @@ fn test_quantile_axis_mut_with_odd_axis_length() { #[test] fn test_quantile_axis_mut_with_zero_axis_length() { let mut a = Array2::::zeros((5, 0)); - assert!(a.quantile_axis_mut(Axis(1), n64(0.5), &Lower).is_none()); + assert_eq!(a.quantile_axis_mut(Axis(1), n64(0.5), &Lower), Err(EmptyInput)); } #[test] @@ -339,7 +339,7 @@ fn check_one_interpolation_method_for_quantiles_mut( let bulk_quantiles = v.clone().quantiles_mut(&quantile_indexes, interpolate); if v.len() == 0 { - bulk_quantiles.is_none() + bulk_quantiles.is_err() } else { let bulk_quantiles = bulk_quantiles.unwrap(); izip!(quantile_indexes, &bulk_quantiles).all(|(&quantile_index, &quantile)| { @@ -414,7 +414,7 @@ fn check_one_interpolation_method_for_quantiles_axis_mut( .quantiles_axis_mut(axis, &quantile_indexes, interpolate); if v.len() == 0 { - bulk_quantiles.is_none() + bulk_quantiles.is_err() } else { let bulk_quantiles = bulk_quantiles.unwrap(); izip!(quantile_indexes, bulk_quantiles.axis_iter(axis)).all( From 7ca6b7f5de135e858b106a52b1127855155ead8f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:39:55 +0100 Subject: [PATCH 71/81] Match output type for argmin/max_skipnan --- src/lib.rs | 1 + src/quantile/mod.rs | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 14bbb3a7..1c96772d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,7 @@ extern crate approx; extern crate ndarray_rand; #[cfg(test)] extern crate quickcheck; +extern crate core; pub use correlation::CorrelationExt; pub use entropy::EntropyExt; diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index b1dc3260..ab8999bd 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -44,7 +44,7 @@ where /// Finds the index of the minimum value of the array skipping NaN values. /// - /// Returns `None` if the array is empty or none of the values in the array + /// Returns `Err(MinMaxError::EmptyInput)` if the array is empty or none of the values in the array /// are non-NaN values. /// /// Even if there are multiple (equal) elements that are minima, only one @@ -62,9 +62,9 @@ where /// /// let a = array![[::std::f64::NAN, 3., 5.], /// [2., 0., 6.]]; - /// assert_eq!(a.argmin_skipnan(), Some((1, 1))); + /// assert_eq!(a.argmin_skipnan(), Ok((1, 1))); /// ``` - fn argmin_skipnan(&self) -> Option + fn argmin_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord; @@ -129,7 +129,7 @@ where /// Finds the index of the maximum value of the array skipping NaN values. /// - /// Returns `None` if the array is empty or none of the values in the array + /// Returns `Err(MinMaxError::EmptyInput)` if the array is empty or none of the values in the array /// are non-NaN values. /// /// Even if there are multiple (equal) elements that are maxima, only one @@ -147,9 +147,9 @@ where /// /// let a = array![[::std::f64::NAN, 3., 5.], /// [2., 0., 6.]]; - /// assert_eq!(a.argmax_skipnan(), Some((1, 2))); + /// assert_eq!(a.argmax_skipnan(), Ok((1, 2))); /// ``` - fn argmax_skipnan(&self) -> Option + fn argmax_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord; @@ -316,7 +316,7 @@ where Ok(current_pattern_min) } - fn argmin_skipnan(&self) -> Option + fn argmin_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord, @@ -332,9 +332,9 @@ where }) }); if min.is_some() { - Some(pattern_min) + Ok(pattern_min) } else { - None + Err(MinMaxError::EmptyInput) } } @@ -383,7 +383,7 @@ where Ok(current_pattern_max) } - fn argmax_skipnan(&self) -> Option + fn argmax_skipnan(&self) -> Result where A: MaybeNan, A::NotNan: Ord, @@ -399,9 +399,9 @@ where }) }); if max.is_some() { - Some(pattern_max) + Ok(pattern_max) } else { - None + Err(MinMaxError::EmptyInput) } } From 22cbfbb199f45ff2e3493bd5087f1c19d845f925 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:41:57 +0100 Subject: [PATCH 72/81] Fix tests --- tests/quantile.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/quantile.rs b/tests/quantile.rs index ea876c13..f1debcc3 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -42,19 +42,19 @@ quickcheck! { #[test] fn test_argmin_skipnan() { let a = array![[1., 5., 3.], [2., 0., 6.]]; - assert_eq!(a.argmin_skipnan(), Some((1, 1))); + assert_eq!(a.argmin_skipnan(), Ok((1, 1))); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, 6.]]; - assert_eq!(a.argmin_skipnan(), Some((0, 0))); + assert_eq!(a.argmin_skipnan(), Ok((0, 0))); let a = array![[::std::f64::NAN, 5., 3.], [2., ::std::f64::NAN, 6.]]; - assert_eq!(a.argmin_skipnan(), Some((1, 0))); + assert_eq!(a.argmin_skipnan(), Ok((1, 0))); let a: Array2 = array![[], []]; - assert_eq!(a.argmin_skipnan(), None); + assert_eq!(a.argmin_skipnan(), Err(MinMaxError::EmptyInput)); let a = arr2(&[[::std::f64::NAN; 2]; 2]); - assert_eq!(a.argmin_skipnan(), None); + assert_eq!(a.argmin_skipnan(), Err(MinMaxError::EmptyInput)); } quickcheck! { @@ -63,7 +63,7 @@ quickcheck! { let min = a.min_skipnan(); let argmin = a.argmin_skipnan(); if min.is_none() { - argmin == None + argmin == Err(MinMaxError::EmptyInput) } else { a[argmin.unwrap()] == *min } @@ -122,22 +122,22 @@ quickcheck! { #[test] fn test_argmax_skipnan() { let a = array![[1., 5., 3.], [2., 0., 6.]]; - assert_eq!(a.argmax_skipnan(), Some((1, 2))); + assert_eq!(a.argmax_skipnan(), Ok((1, 2))); let a = array![[1., 5., 3.], [2., ::std::f64::NAN, ::std::f64::NAN]]; - assert_eq!(a.argmax_skipnan(), Some((0, 1))); + assert_eq!(a.argmax_skipnan(), Ok((0, 1))); let a = array![ [::std::f64::NAN, ::std::f64::NAN, 3.], [2., ::std::f64::NAN, 6.] ]; - assert_eq!(a.argmax_skipnan(), Some((1, 2))); + assert_eq!(a.argmax_skipnan(), Ok((1, 2))); let a: Array2 = array![[], []]; - assert_eq!(a.argmax_skipnan(), None); + assert_eq!(a.argmax_skipnan(), Err(MinMaxError::EmptyInput)); let a = arr2(&[[::std::f64::NAN; 2]; 2]); - assert_eq!(a.argmax_skipnan(), None); + assert_eq!(a.argmax_skipnan(), Err(MinMaxError::EmptyInput)); } quickcheck! { @@ -146,7 +146,7 @@ quickcheck! { let max = a.max_skipnan(); let argmax = a.argmax_skipnan(); if max.is_none() { - argmax == None + argmax == Err(MinMaxError::EmptyInput) } else { a[argmax.unwrap()] == *max } From 56906cf423ab4b96db9c308c52e585a1b4b68075 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:43:40 +0100 Subject: [PATCH 73/81] Fmt --- src/lib.rs | 2 +- src/quantile/mod.rs | 5 ++++- tests/quantile.rs | 7 +++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1c96772d..59391157 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,11 +34,11 @@ extern crate rand; #[cfg(test)] extern crate approx; +extern crate core; #[cfg(test)] extern crate ndarray_rand; #[cfg(test)] extern crate quickcheck; -extern crate core; pub use correlation::CorrelationExt; pub use entropy::EntropyExt; diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index ab8999bd..ebeb5b41 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -637,7 +637,10 @@ where if self.is_empty() { Err(EmptyInput) } else { - Ok(self.quantile_axis_mut(Axis(0), q, interpolate).unwrap().into_scalar()) + Ok(self + .quantile_axis_mut(Axis(0), q, interpolate) + .unwrap() + .into_scalar()) } } diff --git a/tests/quantile.rs b/tests/quantile.rs index f1debcc3..7684ae8e 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -10,8 +10,8 @@ use itertools::izip; use ndarray::array; use ndarray::prelude::*; use ndarray_stats::{ + errors::{EmptyInput, MinMaxError}, interpolate::{Higher, Interpolate, Linear, Lower, Midpoint, Nearest}, - errors::{MinMaxError, EmptyInput}, Quantile1dExt, QuantileExt, }; use noisy_float::types::{n64, N64}; @@ -190,7 +190,10 @@ fn test_quantile_axis_mut_with_odd_axis_length() { #[test] fn test_quantile_axis_mut_with_zero_axis_length() { let mut a = Array2::::zeros((5, 0)); - assert_eq!(a.quantile_axis_mut(Axis(1), n64(0.5), &Lower), Err(EmptyInput)); + assert_eq!( + a.quantile_axis_mut(Axis(1), n64(0.5), &Lower), + Err(EmptyInput) + ); } #[test] From 950cd44a3b0d867bc746e8351a3a4eb4be9b8e5a Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Fri, 5 Apr 2019 08:27:00 +0100 Subject: [PATCH 74/81] Update src/lib.rs Co-Authored-By: LukeMathWalker --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 59391157..14bbb3a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,6 @@ extern crate rand; #[cfg(test)] extern crate approx; -extern crate core; #[cfg(test)] extern crate ndarray_rand; #[cfg(test)] From 37b3b19fdd750c4302e948dec1d7d1f8b7ec5743 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:37:02 +0100 Subject: [PATCH 75/81] Add quantile error --- src/errors.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/errors.rs b/src/errors.rs index d89112a5..aedc0481 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,6 +1,7 @@ //! Custom errors returned from our methods and functions. use std::error::Error; use std::fmt; +use noisy_float::types::N64; /// An error that indicates that the input array was empty. #[derive(Clone, Debug, Eq, PartialEq)] @@ -112,3 +113,31 @@ impl From for MultiInputError { MultiInputError::ShapeMismatch(err) } } + +/// An error computing a quantile. +#[derive(Debug, Clone)] +pub enum QuantileError { + /// The input was empty. + EmptyInput, + /// The `q` was not between `0.` and `1.` (inclusive). + InvalidFraction(N64), +} + +impl fmt::Display for QuantileError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + QuantileError::EmptyInput => write!(f, "Empty input."), + QuantileError::InvalidFraction(q) => { + write!(f, "{:} is not between 0. and 1. (inclusive).", q) + } + } + } +} + +impl Error for QuantileError {} + +impl From for QuantileError { + fn from(_: EmptyInput) -> QuantileError { + QuantileError::EmptyInput + } +} From 1f37d44d78edad1b7e3c6dba85530a0bcc20c70f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:38:01 +0100 Subject: [PATCH 76/81] Renamed InvalidFraction to InvalidQuantile --- src/errors.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index aedc0481..988dee7c 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -120,14 +120,14 @@ pub enum QuantileError { /// The input was empty. EmptyInput, /// The `q` was not between `0.` and `1.` (inclusive). - InvalidFraction(N64), + InvalidQuantile(N64), } impl fmt::Display for QuantileError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { QuantileError::EmptyInput => write!(f, "Empty input."), - QuantileError::InvalidFraction(q) => { + QuantileError::InvalidQuantile(q) => { write!(f, "{:} is not between 0. and 1. (inclusive).", q) } } From 1e9ba1878f5abd070180d8926e25d3eadb399885 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:48:47 +0100 Subject: [PATCH 77/81] Return QuantileError --- src/quantile/mod.rs | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index ebeb5b41..f1b1a6f9 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -6,6 +6,7 @@ use ndarray::{Data, DataMut, RemoveAxis, Zip}; use noisy_float::types::N64; use std::cmp; use {MaybeNan, MaybeNanExt}; +use errors::QuantileError; /// Quantile methods for `ArrayBase`. pub trait QuantileExt @@ -220,7 +221,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, @@ -269,7 +270,7 @@ where axis: Axis, qs: &ArrayBase, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, @@ -285,7 +286,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where D: RemoveAxis, A: MaybeNan, @@ -438,7 +439,7 @@ where axis: Axis, qs: &ArrayBase, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, @@ -452,17 +453,21 @@ where axis: Axis, qs: ArrayView1, _interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, I: Interpolate, { - assert!(qs.iter().all(|x| (*x >= 0.) && (*x <= 1.))); + for &q in qs { + if !((q >= 0.) && (q <= 1.)) { + return Err(QuantileError::InvalidQuantile(q)); + } + } let axis_len = data.len_of(axis); if axis_len == 0 { - return Err(EmptyInput); + return Err(QuantileError::EmptyInput); } let mut results_shape = data.raw_dim(); @@ -514,7 +519,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where D: RemoveAxis, A: Ord + Clone, @@ -530,7 +535,7 @@ where axis: Axis, q: N64, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where D: RemoveAxis, A: MaybeNan, @@ -538,9 +543,14 @@ where S: DataMut, I: Interpolate, { + if !((q >= 0.) && (q <= 1.)) { + return Err(QuantileError::InvalidQuantile(q)); + } + if self.len_of(axis) == 0 { - return Err(EmptyInput); + return Err(QuantileError::EmptyInput); } + let quantile = self.map_axis_mut(axis, |lane| { let mut not_nan = A::remove_nan_mut(lane); A::from_not_nan_opt(if not_nan.is_empty() { @@ -592,7 +602,7 @@ where /// Returns `Err(EmptyInput)` if the array is empty. /// /// **Panics** if `q` is not between `0.` and `1.` (inclusive). - fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Result + fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Result where A: Ord + Clone, S: DataMut, @@ -616,7 +626,7 @@ where &mut self, qs: &ArrayBase, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where A: Ord + Clone, S: DataMut, @@ -628,19 +638,16 @@ impl Quantile1dExt for ArrayBase where S: Data, { - fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Result + fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Result where A: Ord + Clone, S: DataMut, I: Interpolate, { if self.is_empty() { - Err(EmptyInput) + Err(QuantileError::EmptyInput) } else { - Ok(self - .quantile_axis_mut(Axis(0), q, interpolate) - .unwrap() - .into_scalar()) + Ok(self.quantile_axis_mut(Axis(0), q, interpolate)?.into_scalar()) } } @@ -648,7 +655,7 @@ where &mut self, qs: &ArrayBase, interpolate: &I, - ) -> Result, EmptyInput> + ) -> Result, QuantileError> where A: Ord + Clone, S: DataMut, From caad47dfb15b8786aeb287297384c9ee2ad9eb74 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:53:31 +0100 Subject: [PATCH 78/81] Fix tests --- src/errors.rs | 2 +- tests/quantile.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 988dee7c..d3f4a1e9 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -115,7 +115,7 @@ impl From for MultiInputError { } /// An error computing a quantile. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, PartialEq)] pub enum QuantileError { /// The input was empty. EmptyInput, diff --git a/tests/quantile.rs b/tests/quantile.rs index 7684ae8e..36999089 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -10,7 +10,7 @@ use itertools::izip; use ndarray::array; use ndarray::prelude::*; use ndarray_stats::{ - errors::{EmptyInput, MinMaxError}, + errors::{MinMaxError, QuantileError}, interpolate::{Higher, Interpolate, Linear, Lower, Midpoint, Nearest}, Quantile1dExt, QuantileExt, }; @@ -192,7 +192,7 @@ fn test_quantile_axis_mut_with_zero_axis_length() { let mut a = Array2::::zeros((5, 0)); assert_eq!( a.quantile_axis_mut(Axis(1), n64(0.5), &Lower), - Err(EmptyInput) + Err(QuantileError::EmptyInput) ); } From fab842cb2771cd5ce5d8e2c7db33c422f65f265b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:55:52 +0100 Subject: [PATCH 79/81] Fix docs --- src/quantile/mod.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index f1b1a6f9..f9c5d80a 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -214,8 +214,9 @@ where /// /// Returns `Err(EmptyInput)` when the specified axis has length 0. /// - /// **Panics** if `axis` is out of bounds or if - /// `q` is not between `0.` and `1.` (inclusive). + /// Returns `Err(InvalidQuantile(q))` if `q` is not between `0.` and `1.` (inclusive). + /// + /// **Panics** if `axis` is out of bounds. fn quantile_axis_mut( &mut self, axis: Axis, @@ -239,8 +240,9 @@ where /// /// Returns `Err(EmptyInput)` when the specified axis has length 0. /// - /// **Panics** if `axis` is out of bounds or if - /// any `q` in `qs` is not between `0.` and `1.` (inclusive). + /// Returns `Err(InvalidQuantile(q))` if any `q` in `qs` is not between `0.` and `1.` (inclusive). + /// + /// **Panics** if `axis` is out of bounds. /// /// [`quantile_axis_mut`]: #tymethod.quantile_axis_mut /// @@ -601,7 +603,7 @@ where /// /// Returns `Err(EmptyInput)` if the array is empty. /// - /// **Panics** if `q` is not between `0.` and `1.` (inclusive). + /// Returns `Err(InvalidQuantile(q))` if `q` is not between `0.` and `1.` (inclusive). fn quantile_mut(&mut self, q: N64, interpolate: &I) -> Result where A: Ord + Clone, @@ -616,11 +618,11 @@ where /// /// Returns `Err(EmptyInput)` if the array is empty. /// + /// Returns `Err(InvalidQuantile(q))` if any `q` in `qs` is not between `0.` and `1.` (inclusive). + /// /// See [`quantile_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. /// - /// **Panics** if any `q` in `qs` is not between `0.` and `1.` (inclusive). - /// /// [`quantile_mut`]: #tymethod.quantile_mut fn quantiles_mut( &mut self, From a32d9a8063db39a39f85e7dd193a80039c0c937c Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:56:34 +0100 Subject: [PATCH 80/81] Fmt --- src/errors.rs | 2 +- src/quantile/mod.rs | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index d3f4a1e9..e2ee6965 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,7 +1,7 @@ //! Custom errors returned from our methods and functions. +use noisy_float::types::N64; use std::error::Error; use std::fmt; -use noisy_float::types::N64; /// An error that indicates that the input array was empty. #[derive(Clone, Debug, Eq, PartialEq)] diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index f9c5d80a..4985ae74 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,12 +1,12 @@ use self::interpolate::{higher_index, lower_index, Interpolate}; use super::sort::get_many_from_sorted_mut_unchecked; use crate::errors::{EmptyInput, MinMaxError, MinMaxError::UndefinedOrder}; +use errors::QuantileError; use ndarray::prelude::*; use ndarray::{Data, DataMut, RemoveAxis, Zip}; use noisy_float::types::N64; use std::cmp; use {MaybeNan, MaybeNanExt}; -use errors::QuantileError; /// Quantile methods for `ArrayBase`. pub trait QuantileExt @@ -618,7 +618,8 @@ where /// /// Returns `Err(EmptyInput)` if the array is empty. /// - /// Returns `Err(InvalidQuantile(q))` if any `q` in `qs` is not between `0.` and `1.` (inclusive). + /// Returns `Err(InvalidQuantile(q))` if any `q` in + /// `qs` is not between `0.` and `1.` (inclusive). /// /// See [`quantile_mut`] for additional details on quantiles and the algorithm /// used to retrieve them. @@ -649,7 +650,9 @@ where if self.is_empty() { Err(QuantileError::EmptyInput) } else { - Ok(self.quantile_axis_mut(Axis(0), q, interpolate)?.into_scalar()) + Ok(self + .quantile_axis_mut(Axis(0), q, interpolate)? + .into_scalar()) } } From a315f70c7764fb63544a1b6af9ce49e308704759 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 6 Apr 2019 18:20:40 +0100 Subject: [PATCH 81/81] Simplify and deduplicate --- src/quantile/mod.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 4985ae74..3926e24f 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -647,13 +647,9 @@ where S: DataMut, I: Interpolate, { - if self.is_empty() { - Err(QuantileError::EmptyInput) - } else { - Ok(self - .quantile_axis_mut(Axis(0), q, interpolate)? - .into_scalar()) - } + Ok(self + .quantile_axis_mut(Axis(0), q, interpolate)? + .into_scalar()) } fn quantiles_mut(