From 8df170bde278d5295e1b49d27b5809553267a8c6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 20 Jul 2019 14:23:24 +0100 Subject: [PATCH 1/9] Return a result for correlation functions --- Cargo.toml | 3 +++ src/correlation.rs | 37 ++++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f384438e..30fa64d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,3 +35,6 @@ num-bigint = "0.2.2" [[bench]] name = "sort" harness = false + +[patch.crates-io] +ndarray = {git = "https://github.com/rust-ndarray/ndarray", branch = "master"} diff --git a/src/correlation.rs b/src/correlation.rs index 9985ad87..da338cbf 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -1,6 +1,7 @@ use ndarray::prelude::*; use ndarray::Data; use num_traits::{Float, FromPrimitive}; +use crate::errors::EmptyInput; /// Extension trait for `ArrayBase` providing functions /// to compute different correlation measures. @@ -60,7 +61,7 @@ where /// aview2(&[[4., 4.], [4., 4.]]) /// ); /// ``` - fn cov(&self, ddof: A) -> Array2 + fn cov(&self, ddof: A) -> Result, EmptyInput> where A: Float + FromPrimitive; @@ -112,7 +113,7 @@ where /// ) /// ); /// ``` - fn pearson_correlation(&self) -> Array2 + fn pearson_correlation(&self) -> Result, EmptyInput> where A: Float + FromPrimitive; @@ -123,7 +124,7 @@ impl CorrelationExt for ArrayBase where S: Data, { - fn cov(&self, ddof: A) -> Array2 + fn cov(&self, ddof: A) -> Result, EmptyInput> where A: Float + FromPrimitive, { @@ -139,12 +140,17 @@ where n_observations - ddof }; let mean = self.mean_axis(observation_axis); - let denoised = self - &mean.insert_axis(observation_axis); - let covariance = denoised.dot(&denoised.t()); - covariance.mapv_into(|x| x / dof) + match mean { + Some(mean) => { + let denoised = self - &mean.insert_axis(observation_axis); + let covariance = denoised.dot(&denoised.t()); + Ok(covariance.mapv_into(|x| x / dof)) + }, + None => Err(EmptyInput) + } } - fn pearson_correlation(&self) -> Array2 + fn pearson_correlation(&self) -> Result, EmptyInput> where A: Float + FromPrimitive, { @@ -155,12 +161,17 @@ where // observation per random variable (or no observations at all) let ddof = -A::one(); let cov = self.cov(ddof); - let std = self - .std_axis(observation_axis, ddof) - .insert_axis(observation_axis); - let std_matrix = std.dot(&std.t()); - // element-wise division - cov / std_matrix + match cov { + Ok(cov) => { + let std = self + .std_axis(observation_axis, ddof) + .insert_axis(observation_axis); + let std_matrix = std.dot(&std.t()); + // element-wise division + Ok(cov / std_matrix) + }, + Err(EmptyInput) => Err(EmptyInput) + } } private_impl! {} From b07eba1535480a52cde5cf2d9a0b7368bf71fdbd Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 20 Jul 2019 14:25:46 +0100 Subject: [PATCH 2/9] Fix geometric and harmonic mean --- src/summary_statistics/means.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 89d4df9d..9424beff 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -28,14 +28,14 @@ where where A: Float + FromPrimitive, { - self.map(|x| x.recip()).mean().map(|x| x.recip()) + self.map(|x| x.recip()).mean().map(|x| x.recip()).ok_or(EmptyInput) } fn geometric_mean(&self) -> Result where A: Float + FromPrimitive, { - self.map(|x| x.ln()).mean().map(|x| x.exp()) + self.map(|x| x.ln()).mean().map(|x| x.exp()).ok_or(EmptyInput) } fn kurtosis(&self) -> Result From c2c129684e278d74534ce781c075d5fc89d70cc4 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 20 Jul 2019 15:19:08 +0100 Subject: [PATCH 3/9] Fix all_close everywhere --- Cargo.toml | 3 ++- src/correlation.rs | 19 ++++++++++--------- src/lib.rs | 4 ++++ src/summary_statistics/means.rs | 4 ++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 30fa64d4..8e948079 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ keywords = ["array", "multidimensional", "statistics", "matrix", "ndarray"] categories = ["data-structures", "science"] [dependencies] -ndarray = "0.12.1" +ndarray = {git = "https://github.com/rust-ndarray/ndarray", branch = "master"} noisy_float = "0.1.8" num-integer = "0.1" num-traits = "0.2" @@ -25,6 +25,7 @@ itertools = { version = "0.8.0", default-features = false } indexmap = "1.0" [dev-dependencies] +ndarray = {git = "https://github.com/rust-ndarray/ndarray", branch = "master", features = ["approx"]} criterion = "0.2" quickcheck = { version = "0.8.1", default-features = false } ndarray-rand = "0.9" diff --git a/src/correlation.rs b/src/correlation.rs index da338cbf..615f4053 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -191,9 +191,10 @@ mod cov_tests { let n_random_variables = 3; let n_observations = 4; let a = Array::from_elem((n_random_variables, n_observations), value); - a.cov(1.).all_close( + abs_diff_eq!( + a.cov(1.).unwrap(), &Array::zeros((n_random_variables, n_random_variables)), - 1e-8, + epsilon = 1e-8, ) } @@ -205,8 +206,8 @@ mod cov_tests { (n_random_variables, n_observations), Uniform::new(-bound.abs(), bound.abs()), ); - let covariance = a.cov(1.); - covariance.all_close(&covariance.t(), 1e-8) + let covariance = a.cov(1.).unwrap(); + abs_diff_eq!(covariance, &covariance.t(), epsilon = 1e-8) } #[test] @@ -266,7 +267,7 @@ mod cov_tests { ] ]; assert_eq!(a.ndim(), 2); - assert!(a.cov(1.).all_close(&numpy_covariance, 1e-8)); + assert_abs_diff_eq!(a.cov(1.).unwrap(), &numpy_covariance, epsilon = 1e-8); } #[test] @@ -275,7 +276,7 @@ mod cov_tests { fn test_covariance_for_badly_conditioned_array() { let a: Array2 = array![[1e12 + 1., 1e12 - 1.], [1e-6 + 1e-12, 1e-6 - 1e-12],]; let expected_covariance = array![[2., 2e-12], [2e-12, 2e-24]]; - assert!(a.cov(1.).all_close(&expected_covariance, 1e-24)); + assert_abs_diff_eq!(a.cov(1.).unwrap(), &expected_covariance, epsilon = 1e-24); } } @@ -295,8 +296,8 @@ mod pearson_correlation_tests { (n_random_variables, n_observations), Uniform::new(-bound.abs(), bound.abs()), ); - let pearson_correlation = a.pearson_correlation(); - pearson_correlation.all_close(&pearson_correlation.t(), 1e-8) + let pearson_correlation = a.pearson_correlation().unwrap(); + abs_diff_eq!(pearson_correlation.view(), pearson_correlation.t(), epsilon = 1e-8) } #[quickcheck] @@ -349,7 +350,7 @@ mod pearson_correlation_tests { [0.1365648, 0.38954398, -0.17324776, -0.8743213, 1.] ]; assert_eq!(a.ndim(), 2); - assert!(a.pearson_correlation().all_close(&numpy_corrcoeff, 1e-7)); + assert_abs_diff_eq!(a.pearson_correlation().unwrap(), numpy_corrcoeff, epsilon = 1e-7); } } diff --git a/src/lib.rs b/src/lib.rs index 9ee3d350..4b3615cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,10 @@ pub use crate::quantile::{interpolate, Quantile1dExt, QuantileExt}; pub use crate::sort::Sort1dExt; pub use crate::summary_statistics::SummaryStatisticsExt; +#[cfg(test)] +#[macro_use] +extern crate approx; + #[macro_use] mod private { /// This is a public type in a private module, so it can be included in diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 9424beff..04b26a43 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -207,7 +207,7 @@ mod tests { #[test] fn test_means_with_empty_array_of_floats() { let a: Array1 = array![]; - assert_eq!(a.mean(), Err(EmptyInput)); + assert_eq!(a.mean(), None); assert_eq!(a.harmonic_mean(), Err(EmptyInput)); assert_eq!(a.geometric_mean(), Err(EmptyInput)); } @@ -215,7 +215,7 @@ mod tests { #[test] fn test_means_with_empty_array_of_noisy_floats() { let a: Array1 = array![]; - assert_eq!(a.mean(), Err(EmptyInput)); + assert_eq!(a.mean(), None); assert_eq!(a.harmonic_mean(), Err(EmptyInput)); assert_eq!(a.geometric_mean(), Err(EmptyInput)); } From a3ef7c2d0bbc6b544ea8149d897158e4becc89bb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 20 Jul 2019 15:49:10 +0100 Subject: [PATCH 4/9] Green tests --- src/correlation.rs | 63 +++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/src/correlation.rs b/src/correlation.rs index 615f4053..b3b90261 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -42,10 +42,10 @@ where /// ``` /// and similarly for ̅y. /// - /// **Panics** if `ddof` is greater than or equal to the number of - /// observations, if the number of observations is zero and division by - /// zero panics for type `A`, or if the type cast of `n_observations` from - /// `usize` to `A` fails. + /// If `M` is empty (either zero observations or zero random variables), it returns `Err(EmptyInput)`. + /// + /// **Panics** if `ddof` is negative or greater than or equal to the number of + /// observations, or if the type cast of `n_observations` from `usize` to `A` fails. /// /// # Example /// @@ -55,7 +55,7 @@ where /// /// let a = arr2(&[[1., 3., 5.], /// [2., 4., 6.]]); - /// let covariance = a.cov(1.); + /// let covariance = a.cov(1.).unwrap(); /// assert_eq!( /// covariance, /// aview2(&[[4., 4.], [4., 4.]]) @@ -90,26 +90,31 @@ where /// R_ij = rho(X_i, X_j) /// ``` /// - /// **Panics** if `M` is empty, if the type cast of `n_observations` - /// from `usize` to `A` fails or if the standard deviation of one of the random + /// If `M` is empty (either zero observations or zero random variables), it returns `Err(EmptyInput)`. + /// + /// **Panics** if the type cast of `n_observations` from `usize` to `A` fails or + /// if the standard deviation of one of the random variables is zero and + /// division by zero panics for type A. /// /// # Example /// - /// variables is zero and division by zero panics for type A. /// ``` + /// use approx; /// use ndarray::arr2; /// use ndarray_stats::CorrelationExt; + /// use approx::AbsDiffEq; /// /// let a = arr2(&[[1., 3., 5.], /// [2., 4., 6.]]); - /// let corr = a.pearson_correlation(); + /// let corr = a.pearson_correlation().unwrap(); + /// let epsilon = 1e-7; /// assert!( - /// corr.all_close( + /// corr.abs_diff_eq( /// &arr2(&[ /// [1., 1.], /// [1., 1.], /// ]), - /// 1e-7 + /// epsilon /// ) /// ); /// ``` @@ -154,15 +159,14 @@ where where A: Float + FromPrimitive, { - let observation_axis = Axis(1); - // The ddof value doesn't matter, as long as we use the same one - // for computing covariance and standard deviation - // We choose -1 to avoid panicking when we only have one - // observation per random variable (or no observations at all) - let ddof = -A::one(); - let cov = self.cov(ddof); - match cov { - Ok(cov) => { + match self.dim() { + (n, m) if n > 0 && m > 0 => { + let observation_axis = Axis(1); + // The ddof value doesn't matter, as long as we use the same one + // for computing covariance and standard deviation + // We choose 0 as it is the smallest number admitted by std_axis + let ddof = A::zero(); + let cov = self.cov(ddof).unwrap(); let std = self .std_axis(observation_axis, ddof) .insert_axis(observation_axis); @@ -170,7 +174,7 @@ where // element-wise division Ok(cov / std_matrix) }, - Err(EmptyInput) => Err(EmptyInput) + _ => Err(EmptyInput) } } @@ -217,14 +221,15 @@ mod cov_tests { let n_observations = 4; let a = Array::random((n_random_variables, n_observations), Uniform::new(0., 10.)); let invalid_ddof = (n_observations as f64) + rand::random::().abs(); - a.cov(invalid_ddof); + let _ = a.cov(invalid_ddof); } #[test] fn test_covariance_zero_variables() { let a = Array2::::zeros((0, 2)); let cov = a.cov(1.); - assert_eq!(cov.shape(), &[0, 0]); + assert!(cov.is_ok()); + assert_eq!(cov.unwrap().shape(), &[0, 0]); } #[test] @@ -232,8 +237,7 @@ mod cov_tests { let a = Array2::::zeros((2, 0)); // Negative ddof (-1 < 0) to avoid invalid-ddof panic let cov = a.cov(-1.); - assert_eq!(cov.shape(), &[2, 2]); - cov.mapv(|x| assert_eq!(x, 0.)); + assert_eq!(cov, Err(EmptyInput)); } #[test] @@ -241,7 +245,7 @@ mod cov_tests { let a = Array2::::zeros((0, 0)); // Negative ddof (-1 < 0) to avoid invalid-ddof panic let cov = a.cov(-1.); - assert_eq!(cov.shape(), &[0, 0]); + assert_eq!(cov, Err(EmptyInput)); } #[test] @@ -307,6 +311,7 @@ mod pearson_correlation_tests { let a = Array::from_elem((n_random_variables, n_observations), value); let pearson_correlation = a.pearson_correlation(); pearson_correlation + .unwrap() .iter() .map(|x| x.is_nan()) .fold(true, |acc, flag| acc & flag) @@ -316,21 +321,21 @@ mod pearson_correlation_tests { fn test_zero_variables() { let a = Array2::::zeros((0, 2)); let pearson_correlation = a.pearson_correlation(); - assert_eq!(pearson_correlation.shape(), &[0, 0]); + assert_eq!(pearson_correlation, Err(EmptyInput)) } #[test] fn test_zero_observations() { let a = Array2::::zeros((2, 0)); let pearson = a.pearson_correlation(); - pearson.mapv(|x| x.is_nan()); + assert_eq!(pearson, Err(EmptyInput)); } #[test] fn test_zero_variables_zero_observations() { let a = Array2::::zeros((0, 0)); let pearson = a.pearson_correlation(); - assert_eq!(pearson.shape(), &[0, 0]); + assert_eq!(pearson, Err(EmptyInput)); } #[test] From 8a826990a86a66ceeb23e7ccb0b869b103e96d42 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 24 Sep 2019 08:57:07 +0100 Subject: [PATCH 5/9] Update to released versions --- Cargo.toml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e948079..f91f9bd9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,19 +16,18 @@ keywords = ["array", "multidimensional", "statistics", "matrix", "ndarray"] categories = ["data-structures", "science"] [dependencies] -ndarray = {git = "https://github.com/rust-ndarray/ndarray", branch = "master"} +ndarray = "0.13" noisy_float = "0.1.8" num-integer = "0.1" num-traits = "0.2" -rand = "0.6" +rand = "0.7" itertools = { version = "0.8.0", default-features = false } indexmap = "1.0" [dev-dependencies] -ndarray = {git = "https://github.com/rust-ndarray/ndarray", branch = "master", features = ["approx"]} criterion = "0.2" quickcheck = { version = "0.8.1", default-features = false } -ndarray-rand = "0.9" +ndarray-rand = "0.11" approx = "0.3" quickcheck_macros = "0.8" num-bigint = "0.2.2" @@ -36,6 +35,3 @@ num-bigint = "0.2.2" [[bench]] name = "sort" harness = false - -[patch.crates-io] -ndarray = {git = "https://github.com/rust-ndarray/ndarray", branch = "master"} From 279f8c77c004caa3e9aed0f89096ffd7eac16791 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 24 Sep 2019 08:59:38 +0100 Subject: [PATCH 6/9] Enable approx feature for the test suite --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index f91f9bd9..c6f847f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ itertools = { version = "0.8.0", default-features = false } indexmap = "1.0" [dev-dependencies] +ndarray = { version = "0.13", features = ["approx"] } criterion = "0.2" quickcheck = { version = "0.8.1", default-features = false } ndarray-rand = "0.11" From bb303f8347f738e727a30860b353af801ae08ddd Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 24 Sep 2019 09:00:36 +0100 Subject: [PATCH 7/9] Fix deprecation warnings --- src/histogram/bins.rs | 2 +- tests/quantile.rs | 2 +- tests/sort.rs | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index 887f3e04..fee580ab 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -349,7 +349,7 @@ mod edges_tests { #[quickcheck] fn check_sorted_from_array(v: Vec) -> bool { - let a = Array1::from_vec(v); + let a = Array1::from(v); let edges = Edges::from(a); let n = edges.len(); for i in 1..n { diff --git a/tests/quantile.rs b/tests/quantile.rs index 4b312a07..9d58071f 100644 --- a/tests/quantile.rs +++ b/tests/quantile.rs @@ -279,7 +279,7 @@ fn test_midpoint_overflow() { #[quickcheck] fn test_quantiles_mut(xs: Vec) -> bool { - let v = Array::from_vec(xs.clone()); + let v = Array::from(xs.clone()); // Unordered list of quantile indexes to look up, with a duplicate let quantile_indexes = Array::from(vec![ diff --git a/tests/sort.rs b/tests/sort.rs index af2717c4..b2bd12f1 100644 --- a/tests/sort.rs +++ b/tests/sort.rs @@ -49,7 +49,7 @@ fn test_sorted_get_many_mut(mut xs: Vec) -> bool { if n == 0 { true } else { - let mut v = Array::from_vec(xs.clone()); + let mut v = Array::from(xs.clone()); // Insert each index twice, to get a set of indexes with duplicates, not sorted let mut indexes: Vec = (0..n).into_iter().collect(); @@ -78,7 +78,7 @@ fn test_sorted_get_mut_as_sorting_algorithm(mut xs: Vec) -> bool { if n == 0 { true } else { - let mut v = Array::from_vec(xs.clone()); + let mut v = Array::from(xs.clone()); let sorted_v: Vec<_> = (0..n).map(|i| v.get_from_sorted_mut(i)).collect(); xs.sort(); xs == sorted_v From be9c148c79556990016b4947309748f05b3faf11 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 24 Sep 2019 09:13:42 +0100 Subject: [PATCH 8/9] Formatting --- src/correlation.rs | 22 +++++++++++++++------- src/summary_statistics/means.rs | 10 ++++++++-- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/correlation.rs b/src/correlation.rs index c18691eb..70221ecd 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -1,7 +1,7 @@ +use crate::errors::EmptyInput; use ndarray::prelude::*; use ndarray::Data; use num_traits::{Float, FromPrimitive}; -use crate::errors::EmptyInput; /// Extension trait for `ArrayBase` providing functions /// to compute different correlation measures. @@ -150,8 +150,8 @@ where let denoised = self - &mean.insert_axis(observation_axis); let covariance = denoised.dot(&denoised.t()); Ok(covariance.mapv_into(|x| x / dof)) - }, - None => Err(EmptyInput) + } + None => Err(EmptyInput), } } @@ -173,8 +173,8 @@ where let std_matrix = std.dot(&std.t()); // element-wise division Ok(cov / std_matrix) - }, - _ => Err(EmptyInput) + } + _ => Err(EmptyInput), } } @@ -301,7 +301,11 @@ mod pearson_correlation_tests { Uniform::new(-bound.abs(), bound.abs()), ); let pearson_correlation = a.pearson_correlation().unwrap(); - abs_diff_eq!(pearson_correlation.view(), pearson_correlation.t(), epsilon = 1e-8) + abs_diff_eq!( + pearson_correlation.view(), + pearson_correlation.t(), + epsilon = 1e-8 + ) } #[quickcheck] @@ -355,6 +359,10 @@ mod pearson_correlation_tests { [0.1365648, 0.38954398, -0.17324776, -0.8743213, 1.] ]; assert_eq!(a.ndim(), 2); - assert_abs_diff_eq!(a.pearson_correlation().unwrap(), numpy_corrcoeff, epsilon = 1e-7); + assert_abs_diff_eq!( + a.pearson_correlation().unwrap(), + numpy_corrcoeff, + epsilon = 1e-7 + ); } } diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 04b26a43..50cd8395 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -28,14 +28,20 @@ where where A: Float + FromPrimitive, { - self.map(|x| x.recip()).mean().map(|x| x.recip()).ok_or(EmptyInput) + self.map(|x| x.recip()) + .mean() + .map(|x| x.recip()) + .ok_or(EmptyInput) } fn geometric_mean(&self) -> Result where A: Float + FromPrimitive, { - self.map(|x| x.ln()).mean().map(|x| x.exp()).ok_or(EmptyInput) + self.map(|x| x.ln()) + .mean() + .map(|x| x.exp()) + .ok_or(EmptyInput) } fn kurtosis(&self) -> Result From 09cf11abd1afc87ef25747a69f6590763c8395a7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 24 Sep 2019 10:08:17 +0100 Subject: [PATCH 9/9] Bump minimum Rust version --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6c6139a7..827dba65 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ addons: - libssl-dev cache: cargo rust: - - 1.34.0 + - 1.37.0 - stable - beta - nightly