diff --git a/.gitignore b/.gitignore index 69369904..4135c77e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ /target **/*.rs.bk Cargo.lock + +# IDE and similar +rusty-tags.vi +tags +.vscode/* diff --git a/src/histogram/bins/mod.rs b/src/histogram/bins/mod.rs new file mode 100644 index 00000000..927856b5 --- /dev/null +++ b/src/histogram/bins/mod.rs @@ -0,0 +1,5 @@ +pub use self::one_dim::{Bin1d, Bins1d}; +pub use self::n_dim::{BinNd, BinsNd}; + +mod one_dim; +mod n_dim; \ No newline at end of file diff --git a/src/histogram/bins/n_dim.rs b/src/histogram/bins/n_dim.rs new file mode 100644 index 00000000..d9d0d22b --- /dev/null +++ b/src/histogram/bins/n_dim.rs @@ -0,0 +1,265 @@ +use ndarray::prelude::*; +use ndarray::Data; +use std::ops::Index; +use std::fmt; +use histogram::bins::Bin1d; + +/// `n`-dimensional bin: `I_1xI_2x..xI_n` where +/// `I_k` is a one-dimensional interval ([`Bin1d`]). +/// +/// It is instantiated by specifying the ordered sequence +/// of its 1-dimensional projections on the coordinate axes. +/// +/// # Example +/// +/// ``` +/// #[macro_use(array)] +/// extern crate ndarray; +/// extern crate ndarray_stats; +/// extern crate noisy_float; +/// use noisy_float::types::n64; +/// use ndarray_stats::bins::{BinNd, Bin1d}; +/// +/// # fn main() { +/// let projections = vec![ +/// Bin1d::RangeInclusive(n64(0.)..=n64(1.)), +/// Bin1d::RangeInclusive(n64(0.)..=n64(1.)), +/// ]; +/// let unit_square = BinNd::new(projections); +/// let point = array![n64(0.5), n64(0.5)]; +/// assert!(unit_square.contains(point.view())); +/// # } +/// ``` +/// +/// [`Bin1d`]: enum.Bin1d.html +#[derive(Hash, PartialEq, Eq, Clone, Debug)] +pub struct BinNd { + projections: Vec>, +} + +impl fmt::Display for BinNd +where + T: fmt::Debug +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let repr = self.projections.iter().map( + |p| format!("{}", p) + ).collect::>().join("x"); + write!(f, "{}", repr) + } +} + +impl BinNd +where + T: fmt::Debug +{ + /// Creates a new instance of `BinNd` from the ordered sequence + /// of its 1-dimensional projections on the coordinate axes. + /// + /// **Panics** if `projections` is empty. + pub fn new(projections: Vec>) -> Self { + if projections.is_empty() { + panic!( + "The 1-dimensional projections of an n-dimensional + bin can't be empty!" + ) + } else { + Self { projections } + } + } +} + +impl BinNd +{ + /// Return `n`, the number of dimensions. + pub fn ndim(&self) -> usize { + self.projections.len() + } +} + +impl<'a, T: 'a> BinNd +where + T: PartialOrd + fmt::Debug, +{ + /// Return `true` if `point` is in the bin, `false` otherwise. + /// + /// **Panics** if `point`'s dimensionality + /// (`point.len()`) is different from `self.ndim()`. + /// + /// # Example + /// + /// ``` + /// #[macro_use(array)] + /// extern crate ndarray; + /// extern crate ndarray_stats; + /// extern crate noisy_float; + /// use noisy_float::types::n64; + /// use ndarray_stats::bins::{BinNd, Bin1d}; + /// + /// # fn main() { + /// let projections = vec![ + /// Bin1d::RangeFrom(n64(0.)..), + /// Bin1d::RangeFrom(n64(0.)..), + /// ]; + /// let first_quadrant = BinNd::new(projections); + /// let good_point = array![n64(1e6), n64(1e8)]; + /// let bad_point = array![n64(-1.), n64(0.)]; + /// assert!(first_quadrant.contains(good_point.view())); + /// assert!(!first_quadrant.contains(bad_point.view())); + /// # } + /// ``` + pub fn contains(&self, point: ArrayBase) -> bool + where + S: Data, + { + assert_eq!(point.len(), self.ndim(), + "Dimensionalities do not match. Point {0:?} has {1} dimensions. \ + Bin {2:?} has {3} dimensions", point, point.len(), self, self.ndim()); + point.iter().zip(self.projections.iter()). + map(|(element, projection)| projection.contains(element)). + fold(true, |acc, v| acc & v) + } +} + +/// `BinsNd` is a collection of sub-regions ([`BinNd`]) +/// in an `n`-dimensional space. +/// +/// It is not required (or enforced) that the sub-regions +/// in `self` must be not-overlapping. +/// +/// [`BinNd`]: struct.BinNd.html +#[derive(Clone, Debug)] +pub struct BinsNd { + bins: Vec>, + ndim: usize, +} + +impl BinsNd +{ + /// Creates a new instance of `BinsNd` from a vector + /// of `BinNd`. + /// + /// **Panics** if `bins` is empty or if there are two bins in `bins` + /// with different dimensionality. + pub fn new(bins: Vec>) -> Self { + assert!(!bins.is_empty(), "The bins collection cannot be empty!"); + // All bins must have the same number of dimensions! + let ndim = { + let ndim = bins.index(0).ndim(); + let flag = &bins.iter(). + map(|b| b.ndim() == ndim). + fold(true, |acc, new| acc & new); + // It would be better to print the bad bins as well! + assert!(flag, "There at least two bins with different \ + number of dimensions"); + ndim + }; + Self { bins, ndim } + } + + /// Return `n`, the number of dimensions. + pub fn ndim(&self) -> usize { + self.ndim + } +} + +impl<'a, T: 'a> BinsNd +where + T: PartialOrd + Clone + fmt::Debug, +{ + /// Given a point `P`, it returns an `Option`: + /// - `Some(B)`, if `P` belongs to the `Bin` `B`; + /// - `None`, if `P` does not belong to any `Bin` in `self`. + /// + /// If more than one bin in `self` contains `P`, no assumptions + /// can be made on which bin will be returned by `find`. + /// + /// **Panics** if `P.ndim()` is different from `Bins.ndim()`. + pub fn find(&self, point: ArrayBase) -> Option> + where + S: Data, + { + for bin in self.bins.iter() { + if bin.contains(point.view()){ + return Some((*bin).clone()) + } + } + None + } +} + +#[cfg(test)] +mod bin_nd_tests { + use super::*; + + #[test] + #[should_panic] + fn new_w_empty_vec() { + let _: BinNd = BinNd::new(vec![]); + } + + #[test] + #[should_panic] + fn contains_w_dimensions_mismatch() { + let bin2d = BinNd::new( + vec![Bin1d::Range(0..1), Bin1d::Range(2..4)] + ); + let point1d = array![1]; + bin2d.contains(point1d.view()); + } + + #[test] + fn contains_w_matching_dimensions() { + let bin2d = BinNd::new( + vec![Bin1d::Range(0..1), Bin1d::Range(2..4)] + ); + let point2d = array![0, 3]; + bin2d.contains(point2d.view()); + } +} + +mod bins_nd_tests { + use super::*; + + #[test] + #[should_panic] + fn new_w_empty_vec() { + let _: BinsNd = BinsNd::new(vec![]); + } + + #[test] + #[should_panic] + fn new_w_bins_of_different_dimensions() { + let bin1d = BinNd::new(vec![Bin1d::Range(0..5)]); + let bin2d = BinNd::new(vec![Bin1d::Range(0..5), + Bin1d::RangeFrom(2..)]); + assert_eq!(bin1d.ndim(), 1); + assert_eq!(bin2d.ndim(), 2); + let _: BinsNd = BinsNd::new( + vec![bin1d, bin2d], + ); + } + + #[test] + #[should_panic] + fn find_w_mismatched_dimensions() { + let bin2d = BinNd::new(vec![Bin1d::Range(0..5), + Bin1d::RangeFrom(2..)]); + assert_eq!(bin2d.ndim(), 2); + let point3d = array![1, 2, 3]; + assert_eq!(point3d.len(), 3); + let bins = BinsNd::new(vec![bin2d]); + bins.find(point3d); + } + + #[test] + fn find_w_matching_dimensions() { + let bin = BinNd::new(vec![Bin1d::Range(0..5), + Bin1d::RangeFrom(2..)]); + let bins = BinsNd::new(vec![bin.clone()]); + let p = array![1, 2]; + let q = array![1, 1]; + assert_eq!(bins.find(p), Some(bin)); + assert_eq!(bins.find(q), None); + } +} \ No newline at end of file diff --git a/src/histogram/bins/one_dim.rs b/src/histogram/bins/one_dim.rs new file mode 100644 index 00000000..097f91fc --- /dev/null +++ b/src/histogram/bins/one_dim.rs @@ -0,0 +1,151 @@ +use std::fmt; +use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, + RangeInclusive, RangeTo, RangeToInclusive}; + +/// One-dimensional intervals. +/// +/// # Example +/// +/// ``` +/// extern crate ndarray_stats; +/// extern crate noisy_float; +/// use ndarray_stats::bins::Bin1d; +/// use noisy_float::types::n64; +/// +/// let unit_interval = Bin1d::RangeInclusive(n64(0.)..=n64(1.)); +/// assert!(unit_interval.contains(&n64(1.))); +/// assert!(unit_interval.contains(&n64(0.))); +/// assert!(unit_interval.contains(&n64(0.5))); +/// ``` +#[derive(Hash, PartialEq, Eq, Clone, Debug)] +pub enum Bin1d { + Range(Range), + RangeFrom(RangeFrom), + RangeFull(RangeFull), + RangeInclusive(RangeInclusive), + RangeTo(RangeTo), + RangeToInclusive(RangeToInclusive), +} + +impl fmt::Display for Bin1d +where + T: fmt::Debug +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Bin1d::Range(x) => write!(f, "{:?}", x), + Bin1d::RangeFrom(x) => write!(f, "{:?}", x), + Bin1d::RangeFull(x) => write!(f, "{:?}", x), + Bin1d::RangeInclusive(x) => write!(f, "{:?}", x), + Bin1d::RangeTo(x) => write!(f, "{:?}", x), + Bin1d::RangeToInclusive(x) => write!(f, "{:?}", x), + } + } +} + +impl Bin1d +where + T: PartialOrd +{ + /// Return `true` if `point` belongs to the interval, `false` otherwise. + pub fn contains(&self, point: &T) -> bool + { + match self { + Bin1d::Range(x) => contains::, T>(x, point), + Bin1d::RangeFrom(x) => contains::, T>(x, point), + Bin1d::RangeFull(_) => true, + Bin1d::RangeInclusive(x) => contains::, T>(x, point), + Bin1d::RangeTo(x) => contains::, T>(x, point), + Bin1d::RangeToInclusive(x) => contains::, T>(x, point), + } + } +} + +// Reimplemented here given that [RFC 1434](https://github.com/nox/rust-rfcs/blob/master/text/1434-contains-method-for-ranges.md) +// has not being stabilized yet and we don't want to force nightly +// for the whole library because of it +fn contains(range: &R, item: &T) -> bool +where + R: RangeBounds, + T: PartialOrd, +{ + (match range.start_bound() { + Bound::Included(ref start) => *start <= item, + Bound::Excluded(ref start) => *start < item, + Bound::Unbounded => true, + }) + && + (match range.end_bound() { + Bound::Included(ref end) => item <= *end, + Bound::Excluded(ref end) => item < *end, + Bound::Unbounded => true, + }) +} + +/// `Bins1d` is a collection of intervals ([`Bin1d`]) +/// in a 1-dimensional space. +/// +/// [`Bin1d`]: enum.Bin1d.html +#[derive(Debug, Clone)] +pub struct Bins1d { + bins: Vec>, +} + +impl Bins1d +where + T: PartialOrd + Clone +{ + /// Given a point `P`, it returns an `Option`: + /// - `Some(B)`, if `P` belongs to the bin `B`; + /// - `None`, if `P` does not belong to any bin in `self`. + /// + /// If more than one bin in `self` contains `P`, no assumptions + /// can be made on which bin will be returned by `find`. + pub fn find(&self, point: &T) -> Option> + { + for bin in self.bins.iter() { + if bin.contains(point) { + return Some((*bin).clone()) + } + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + extern crate noisy_float; + + #[test] + fn find() { + let bins = vec![ + Bin1d::RangeTo(..0), + Bin1d::Range(0..5), + Bin1d::Range(5..9), + Bin1d::Range(10..15), + Bin1d::RangeFrom(15..), + ]; + let b = Bins1d { bins }; + assert_eq!(b.find(&9), None); + assert_eq!(b.find(&15), Some(Bin1d::RangeFrom(15..))); + } + + #[test] + fn find_with_overlapping_bins() { + let bins = vec![ + Bin1d::RangeToInclusive(..=0), + Bin1d::Range(0..5), + ]; + let b = Bins1d { bins }; + // The first one is matched and returned + assert_eq!(b.find(&0), Some(Bin1d::RangeToInclusive(..=0))); + } + + quickcheck! { + fn find_with_empty_bins(point: i64) -> bool { + let b = Bins1d { bins: vec![] }; + b.find(&point).is_none() + } + } +} \ No newline at end of file diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs new file mode 100644 index 00000000..6a5b3dc7 --- /dev/null +++ b/src/histogram/mod.rs @@ -0,0 +1,132 @@ +/// Bins for histogram computation. +pub mod bins; +use self::bins::{BinNd, BinsNd, Bin1d, Bins1d}; + +use std::collections::HashMap; +use std::hash::Hash; +use std::fmt; +use ndarray::prelude::*; +use ndarray::Data; + +type HistogramNd = HashMap, usize>; + +/// Extension trait for ArrayBase providing methods to compute n-dimensional histograms. +pub trait HistogramNdExt +where + S: Data, + A: Hash + Eq + fmt::Debug + Clone, +{ + /// Return the [histogram](https://en.wikipedia.org/wiki/Histogram) + /// for a 2-dimensional array of points `M`. + /// + /// Let `(n, d)` be the shape of `M`: + /// - `n` is the number of points; + /// - `d` is the number of dimensions of the space those points belong to. + /// It follows that every column in `M` is a `d`-dimensional point. + /// + /// For example: a (3, 4) matrix `M` is a collection of 3 points in a + /// 4-dimensional space. + /// + /// **Panics** if `d` is different from `bins.ndim()`. + fn histogram(&self, bins: BinsNd) -> HistogramNd; +} + +impl HistogramNdExt for ArrayBase +where + S: Data, + A: Hash + Eq + fmt::Debug + Clone + PartialOrd, +{ + fn histogram(&self, bins: BinsNd) -> HistogramNd + { + let mut histogram = HashMap::new(); + for point in self.axis_iter(Axis(0)) { + let bin = bins.find(point.view()); + if let Some(b) = bin { + let counter = histogram.entry(b).or_insert(0); + *counter += 1; + }; + } + histogram + } +} + +type Histogram1d = HashMap, usize>; + +/// Extension trait for one-dimensional ArrayBase providing methods to compute histograms. +pub trait Histogram1dExt +where + S: Data, + A: Hash + Eq + fmt::Debug + Clone, +{ + /// Return the [histogram](https://en.wikipedia.org/wiki/Histogram) + /// for a 1-dimensional array of points `M`. + fn histogram(&self, bins: Bins1d) -> Histogram1d; +} + +impl Histogram1dExt for ArrayBase +where + S: Data, + A: Hash + Eq + fmt::Debug + Clone + PartialOrd, +{ + fn histogram(&self, bins: Bins1d) -> Histogram1d + { + let mut histogram = HashMap::new(); + for point in self.iter() { + let bin = bins.find(point); + if let Some(b) = bin { + let counter = histogram.entry(b).or_insert(0); + *counter += 1; + }; + } + histogram + } +} + +#[cfg(test)] +mod histogram_nd_tests { + use super::*; + + #[test] + fn histogram() { + let first_quadrant = BinNd::new( + vec![Bin1d::RangeFrom(0..), + Bin1d::RangeFrom(0..) + ] + ); + let second_quadrant = BinNd::new( + vec![Bin1d::RangeTo(..0), + Bin1d::RangeFrom(0..) + ] + ); + let bins = BinsNd::new(vec![first_quadrant.clone(), + second_quadrant.clone()]); + let points = array![ + [1, 1], + [1, 2], + [0, 1], + [-1, 2], + [-1, -1], // a point that doesn't belong to any bin in bins + ]; + assert_eq!(points.shape(), &[5, 2]); + let histogram = points.histogram(bins); + + let mut expected = HashMap::new(); + expected.insert(first_quadrant, 3); + expected.insert(second_quadrant, 1); + + assert_eq!(expected, histogram); + } + + #[test] + #[should_panic] + fn histogram_w_mismatched_dimensions() { + let bin = BinNd::new(vec![Bin1d::RangeFrom(0..)]); + let bins = BinsNd::new(vec![bin.clone()]); + let points = array![ + [1, 1], + [1, 2], + ]; + assert_eq!(points.shape(), &[2, 2]); + points.histogram(bins); + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index cf8e56d8..3d673c0b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,8 +15,10 @@ pub use maybe_nan::{MaybeNan, MaybeNanExt}; pub use quantile::{interpolate, QuantileExt}; pub use sort::Sort1dExt; pub use correlation::CorrelationExt; +pub use histogram::{Histogram1dExt, HistogramNdExt, bins}; mod maybe_nan; mod quantile; mod sort; -mod correlation; \ No newline at end of file +mod correlation; +mod histogram; \ No newline at end of file