diff --git a/src/quantile/interpolate.rs b/src/quantile/interpolate.rs index 8099aaef..ee8fffdd 100644 --- a/src/quantile/interpolate.rs +++ b/src/quantile/interpolate.rs @@ -4,36 +4,46 @@ use ndarray::prelude::*; use noisy_float::types::N64; use num_traits::{Float, FromPrimitive, NumOps, ToPrimitive}; +fn float_quantile_index(q: N64, len: usize) -> N64 { + q * ((len - 1) as f64) +} + +/// Returns the fraction that the quantile is between the lower and higher indices. +/// +/// This ranges from 0, where the quantile exactly corresponds the lower index, +/// to 1, where the quantile exactly corresponds to the higher index. +fn float_quantile_index_fraction(q: N64, len: usize) -> N64 { + float_quantile_index(q, len).fract() +} + +/// Returns the index of the value on the lower side of the quantile. +pub(crate) fn lower_index(q: N64, len: usize) -> usize { + float_quantile_index(q, len).floor().to_usize().unwrap() +} + +/// Returns the index of the value on the higher side of the quantile. +pub(crate) fn higher_index(q: N64, len: usize) -> usize { + float_quantile_index(q, len).ceil().to_usize().unwrap() +} + /// Used to provide an interpolation strategy to [`quantile_axis_mut`]. /// /// [`quantile_axis_mut`]: ../trait.QuantileExt.html#tymethod.quantile_axis_mut pub trait Interpolate<T> { - #[doc(hidden)] - fn float_quantile_index(q: N64, len: usize) -> N64 { - q * ((len - 1) as f64) - } - #[doc(hidden)] - fn lower_index(q: N64, len: usize) -> usize { - Self::float_quantile_index(q, len) - .floor() - .to_usize() - .unwrap() - } - #[doc(hidden)] - fn higher_index(q: N64, len: usize) -> usize { - Self::float_quantile_index(q, len) - .ceil() - .to_usize() - .unwrap() - } - #[doc(hidden)] - fn float_quantile_index_fraction(q: N64, len: usize) -> N64 { - Self::float_quantile_index(q, len).fract() - } + /// Returns `true` iff the lower value is needed to compute the + /// interpolated value. #[doc(hidden)] fn needs_lower(q: N64, len: usize) -> bool; + + /// Returns `true` iff the higher value is needed to compute the + /// interpolated value. #[doc(hidden)] fn needs_higher(q: N64, len: usize) -> bool; + + /// Computes the interpolated value. + /// + /// **Panics** if `None` is provided for the lower value when it's needed + /// or if `None` is provided for the higher value when it's needed. #[doc(hidden)] fn interpolate<D>( lower: Option<Array<T, D>>, @@ -94,7 +104,7 @@ impl<T> Interpolate<T> for Lower { impl<T> Interpolate<T> for Nearest { fn needs_lower(q: N64, len: usize) -> bool { - <Self as Interpolate<T>>::float_quantile_index_fraction(q, len) < 0.5 + float_quantile_index_fraction(q, len) < 0.5 } fn needs_higher(q: N64, len: usize) -> bool { !<Self as Interpolate<T>>::needs_lower(q, len) @@ -163,9 +173,7 @@ where where D: Dimension, { - let fraction = <Self as Interpolate<T>>::float_quantile_index_fraction(q, len) - .to_f64() - .unwrap(); + let fraction = float_quantile_index_fraction(q, len).to_f64().unwrap(); let mut a = lower.unwrap(); let b = higher.unwrap(); azip!(mut a, ref b in { diff --git a/src/quantile/mod.rs b/src/quantile/mod.rs index 6af5fe5c..98111bca 100644 --- a/src/quantile/mod.rs +++ b/src/quantile/mod.rs @@ -1,4 +1,4 @@ -use self::interpolate::Interpolate; +use self::interpolate::{higher_index, lower_index, Interpolate}; use super::sort::get_many_from_sorted_mut_unchecked; use indexmap::{IndexMap, IndexSet}; use ndarray::prelude::*; @@ -204,57 +204,51 @@ where let axis_len = self.len_of(axis); if axis_len == 0 { - None - } else { - let mut deduped_qs: Vec<N64> = qs.to_vec(); - deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); - deduped_qs.dedup(); + return None; + } + + let mut deduped_qs: Vec<N64> = qs.to_vec(); + deduped_qs.sort_by(|a, b| a.partial_cmp(b).unwrap()); + deduped_qs.dedup(); - // IndexSet preserves insertion order: - // - indexes will stay sorted; - // - we avoid index duplication. - let mut searched_indexes = IndexSet::new(); - for q in deduped_qs.iter() { - if I::needs_lower(*q, axis_len) { - searched_indexes.insert(I::lower_index(*q, axis_len)); - } - if I::needs_higher(*q, axis_len) { - searched_indexes.insert(I::higher_index(*q, axis_len)); - } + // IndexSet preserves insertion order: + // - indexes will stay sorted; + // - we avoid index duplication. + let mut searched_indexes = IndexSet::new(); + for q in deduped_qs.iter() { + if I::needs_lower(*q, axis_len) { + searched_indexes.insert(lower_index(*q, axis_len)); + } + if I::needs_higher(*q, axis_len) { + searched_indexes.insert(higher_index(*q, axis_len)); } - let searched_indexes: Vec<usize> = searched_indexes.into_iter().collect(); + } + let searched_indexes: Vec<usize> = searched_indexes.into_iter().collect(); - // Retrieve the values corresponding to each index for each slice along the specified axis - let values = self.map_axis_mut(axis, |mut x| { - get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) - }); + // Retrieve the values corresponding to each index for each slice along the specified axis + let values = self.map_axis_mut( + axis, + |mut x| get_many_from_sorted_mut_unchecked(&mut x, &searched_indexes) + ); - // Combine the retrieved values according to specified interpolation strategy to - // get the desired quantiles - let mut results = IndexMap::new(); - for q in qs { - let result = I::interpolate( - match I::needs_lower(*q, axis_len) { - true => { - let lower_index = &I::lower_index(*q, axis_len); - Some(values.map(|x| x.get(lower_index).unwrap().clone())) - } - false => None, - }, - match I::needs_higher(*q, axis_len) { - true => { - let higher_index = &I::higher_index(*q, axis_len); - Some(values.map(|x| x.get(higher_index).unwrap().clone())) - } - false => None, - }, - *q, - axis_len, - ); - results.insert(*q, result); - } - Some(results) + // Combine the retrieved values according to specified interpolation strategy to + // get the desired quantiles + let mut results = IndexMap::new(); + for q in qs { + let lower = if I::needs_lower(*q, axis_len) { + Some(values.map(|x| x[&lower_index(*q, axis_len)].clone())) + } else { + None + }; + let higher = if I::needs_higher(*q, axis_len) { + Some(values.map(|x| x[&higher_index(*q, axis_len)].clone())) + } else { + None + }; + let interpolated = I::interpolate(lower, higher, *q, axis_len); + results.insert(*q, interpolated); } + Some(results) } fn quantile_axis_mut<I>(&mut self, axis: Axis, q: N64) -> Option<Array<A, D::Smaller>> @@ -276,24 +270,23 @@ where S: DataMut, I: Interpolate<A::NotNan>, { - if self.len_of(axis) > 0 { - Some(self.map_axis_mut(axis, |lane| { - let mut not_nan = A::remove_nan_mut(lane); - A::from_not_nan_opt(if not_nan.is_empty() { - None - } else { - Some( - not_nan - .quantile_axis_mut::<I>(Axis(0), q) - .unwrap() - .into_raw_vec() - .remove(0), - ) - }) - })) - } else { - None + if self.len_of(axis) == 0 { + return None; } + let quantile = self.map_axis_mut(axis, |lane| { + let mut not_nan = A::remove_nan_mut(lane); + A::from_not_nan_opt(if not_nan.is_empty() { + None + } else { + Some( + not_nan + .quantile_axis_mut::<I>(Axis(0), q) + .unwrap() + .into_scalar(), + ) + }) + }); + Some(quantile) } } diff --git a/src/sort.rs b/src/sort.rs index 8fc07f3a..6ef2f86a 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -49,15 +49,16 @@ where A: Ord + Clone, S: DataMut; - /// Return the index of `self[partition_index]` if `self` were to be sorted - /// in increasing order. + /// Partitions the array in increasing order based on the value initially + /// located at `pivot_index` and returns the new index of the value. /// - /// `self` elements are rearranged in such a way that `self[partition_index]` - /// is in the position it would be in an array sorted in increasing order. - /// All elements smaller than `self[partition_index]` are moved to its - /// left and all elements equal or greater than `self[partition_index]` - /// are moved to its right. - /// The ordering of the elements in the two partitions is undefined. + /// The elements are rearranged in such a way that the value initially + /// located at `pivot_index` is moved to the position it would be in an + /// array sorted in increasing order. The return value is the new index of + /// the value after rearrangement. All elements smaller than the value are + /// moved to its left and all elements equal or greater than the value are + /// moved to its right. The ordering of the elements in the two partitions + /// is undefined. /// /// `self` is shuffled **in place** to operate the desired partition: /// no copy of the array is allocated. @@ -67,7 +68,36 @@ where /// Average number of element swaps: n/6 - 1/3 (see /// [link](https://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto/11550)) /// - /// **Panics** if `partition_index` is greater than or equal to `n`. + /// **Panics** if `pivot_index` is greater than or equal to `n`. + /// + /// # Example + /// + /// ``` + /// extern crate ndarray; + /// extern crate ndarray_stats; + /// + /// use ndarray::array; + /// use ndarray_stats::Sort1dExt; + /// + /// # fn main() { + /// let mut data = array![3, 1, 4, 5, 2]; + /// let pivot_index = 2; + /// let pivot_value = data[pivot_index]; + /// + /// // Partition by the value located at `pivot_index`. + /// let new_index = data.partition_mut(pivot_index); + /// // The pivot value is now located at `new_index`. + /// assert_eq!(data[new_index], pivot_value); + /// // Elements less than that value are moved to the left. + /// for i in 0..new_index { + /// assert!(data[i] < pivot_value); + /// } + /// // Elements greater than or equal to that value are moved to the right. + /// for i in (new_index + 1)..data.len() { + /// assert!(data[i] >= pivot_value); + /// } + /// # } + /// ``` fn partition_mut(&mut self, pivot_index: usize) -> usize where A: Ord + Clone,