2.1.4

liborty · May 6, 2024 · 1962ab7 · 1962ab7
1 parent fc88951
commit 1962ab7
Show file tree

Hide file tree

Showing 10 changed files with 141 additions and 124 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rstats"
-version = "2.1.3"
+version = "2.1.4"
 authors = ["Libor Spacek"]
 edition = "2021"
 description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.."

diff --git a/README.md b/README.md
@@ -196,11 +196,11 @@ if dif <= 0_f64 {
 pub type RE = RError<String>;
 ```
 
-Convenience function `re_error` can be used to simplify constructing and returning these errors. Its message argument can be either literal `&str`, or `String` (e.g. constructed by `format!`). It returns a Result, thus it needs `?` operator to unwrap its `Err` variant.
+Convenience functions `nodata_error, data_error, arith_error, other_error` are used to construct and return these errors. Their message argument can be either literal `&str`, or `String` (e.g. constructed by `format!`). They return `ReError<String>` already wrapped up as an `Err` variant of `Result`. cf.:
 
 ```rust
 if dif <= 0_f64 {
-    return re_error("arith","cholesky needs a positive definite matrix")?;
+    return arith_error("cholesky needs a positive definite matrix");
 };
 ```
 
@@ -258,11 +258,11 @@ The remaining general cases previously required new manual implementations to be
 
 * `sumn`: the sum of the sequence `1..n = n*(n+1)/2`. It is also the size of a lower/upper triangular matrix.
 
-* `t_stat`: of a value x: (x-centre)/spread. In one dimension.
+* `tm_stat`: (x-centre)/dispersion. Generalised t-statistic in one dimension.
 
 * `unit_matrix`: - generates full square unit matrix.
 
-* `re_error` - helps to construct custom RE errors (see Errors above).
+* `nodata_error, data_error, arith_error, other_error` - construct custom RE errors (see section Errors above).
 
 ## Trait Stats
 
@@ -336,7 +336,9 @@ Methods which take an additional generic vector argument, such as a vector of we
 
 ## Appendix: Recent Releases
 
-* **Version 2.1.3** - Added `pca_reduction` to `struct TriangMat`. Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize`, should you still need it. `Eigenvectors` is slower, as it needs to do forward substitution to find each one.
+* **Version 2.1.4** - Tidied up some error processing.
+
+* **Version 2.1.3** - Added `pca_reduction` to `struct TriangMat`. Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` is somewhat slower, as it needs to solve forward substitution to find each vector.
 
 * **Version 2.1.2** - Added function `project` to project a `TriangMat` to a lower dimensional space of selected dimensions. Removed `rows` which was a duplicate of `dim`.
 

diff --git a/src/error.rs b/src/error.rs
@@ -10,6 +10,8 @@ pub type RE = RError<String>;
 
 #[derive(Debug)]
 /// Custom RStats Error
+/// Parameter <T> is future proofing, so that any type of argument may be returned.
+/// Currently only messages of type <String> and <&str> are used
 pub enum RError<T>
 where
     T: Sized + Debug,
@@ -39,6 +41,44 @@ where
         }
     }
 }
+/// Convenience function for building RError::NoDataError(String)  
+/// from payload message, which can be either literal `&str` or `String`.
+/// `String` allows splicing in values of variables for debugging, using `format!`
+pub fn nodata_error<T>(msg: impl Into<String>) -> Result<T,RError<String>> {
+    Err(RError::NoDataError(msg.into()))
+} 
+/// Convenience function for building RError::DataError(String)  
+/// from payload message, which can be either literal `&str` or `String`.
+/// `String` allows splicing in values of variables for debugging, using `format!`
+pub fn data_error<T>(msg: impl Into<String>) -> Result<T,RError<String>> {
+    Err(RError::DataError(msg.into()))
+}
+/// Convenience function for building RError::ArithError(String)  
+/// from payload message, which can be either literal `&str` or `String`.
+/// `String` allows splicing in values of variables for debugging, using `format!`
+pub fn arith_error<T>(msg: impl Into<String>) -> Result<T,RError<String>> {
+    Err(RError::ArithError(msg.into()))
+} 
+/// Convenience function for building RError::ArithError(String)  
+/// from payload message, which can be either literal `&str` or `String`.
+/// `String` allows splicing in values of variables for debugging, using `format!`
+pub fn other_error<T>(msg: impl Into<String>) -> Result<T,RError<String>> {
+    Err(RError::OtherError(msg.into()))
+} 
+/*
+/// Convenience function for building RError<String>  
+/// from short name and payload message, which can be either literal `&str` or `String`.
+/// `String` allows splicing in values of variables for debugging, using `format!`
+pub fn re_error<T>(kind: &str, msg: impl Into<String>) -> Result<T,RError<String>> {
+    match kind {
+        "empty" => Err(RError::NoDataError(msg.into())), 
+        "size"  => Err(RError::DataError(msg.into())), 
+        "arith" => Err(RError::ArithError(msg.into())),
+        "other" => Err(RError::OtherError(msg.into())),
+        _ => Err(RError::OtherError("Wrong error kind given to re_error".into()))
+    } 
+}
+*/
 /// Automatically converting any RanError to RError::OtherError
 impl From<RanError<String>> for RError<String> {
     fn from(e: RanError<String>) -> Self {
@@ -66,15 +106,3 @@ impl From<std::io::Error> for RError<String> {
         RError::OtherError(format!("IOError: {e}"))
     }
 }
-
-/// Convenience function for building RError<String>  
-/// from short name and payload message, which can be either &str or String
-pub fn re_error<T>(kind: &str, msg: impl Into<String>) -> Result<T,RError<String>> {
-    match kind {
-        "empty" => Err(RError::NoDataError(msg.into())), 
-        "size"  => Err(RError::DataError(msg.into())), 
-        "arith" => Err(RError::ArithError(msg.into())),
-        "other" => Err(RError::OtherError(msg.into())),
-        _ => Err(RError::OtherError("Wrong error kind given to re_error".into()))
-    }
-}
diff --git a/src/lib.rs b/src/lib.rs
@@ -19,7 +19,7 @@ pub mod vecvec;
 /// Multidimensional operations on sets of vectors, with additional inputs
 pub mod vecvecg;
 
-pub use crate::error::{re_error, RError, RE};
+pub use crate::error::*;
 // reexporting useful related methods
 pub use indxvec::{here, printing::*, MinMax, Printing};
 pub use medians::{MedError, Median, Medianf64};

diff --git a/src/stats.rs b/src/stats.rs
@@ -1,4 +1,4 @@
-use crate::{error::RError, re_error, sumn, fromop, Params, MutVecg, Stats, Vecg, RE};
+use crate::*; //{RError, nodata_error, sumn, fromop, Params, MutVecg, Stats, Vecg, RE};
 use indxvec::Vecops;
 use medians::{Medianf64,Median};
 use core::cmp::Ordering::*;
@@ -28,17 +28,15 @@ where
     /// Vector with reciprocal components
     fn vreciprocal(self) -> Result<Vec<f64>, RE> {
         if self.is_empty() {
-            return Err(RError::NoDataError("empty self vec".to_owned()));
+            return nodata_error("vreciprocal: empty self vec");
         };
         self.iter()
             .map(|component| -> Result<f64, RE> {
                 let c: f64 = component.clone().into();
                 if c.is_normal() {
                     Ok(1.0 / c)
                 } else {
-                    Err(RError::ArithError(
-                        "no reciprocal for zero component".to_owned(),
-                    ))
+                    arith_error(format!("vreciprocal: bad component {c}"))
                 }
             })
             .collect::<Result<Vec<f64>, RE>>()
@@ -47,46 +45,42 @@ where
     /// Vector with inverse magnitude
     fn vinverse(self) -> Result<Vec<f64>, RE> {
         if self.is_empty() {
-            return Err(RError::NoDataError("empty self vec".to_owned()));
+            return nodata_error("vinverse: empty self vec");
         };
         let vmagsq = self.vmagsq();
         if vmagsq > 0.0 {
             Ok(self.iter().map(|x| x.clone().into() / vmagsq).collect())
         } else {
-            Err(RError::DataError(
-                "no inverse of zero vector magnitude".to_owned(),
-            ))
+            data_error("vinverse: can not invert zero vector")
         }
     }
 
     // Negated vector (all components swap sign)
     fn negv(self) -> Result<Vec<f64>, RE> {
         if self.is_empty() {
-            return Err(RError::NoDataError("empty self vec".to_owned()));
+            return nodata_error("negv: empty self vec");
         };
         Ok(self.iter().map(|x| (-x.clone().into())).collect())
     }
 
     /// Unit vector
     fn vunit(self) -> Result<Vec<f64>, RE> {
         if self.is_empty() {
-            return Err(RError::NoDataError("empty self vec".to_owned()));
+            return nodata_error("vunit: empty self vec");
         };
         let mag = self.vmag();
         if mag > 0.0 {
             Ok(self.iter().map(|x| x.clone().into() / mag).collect())
         } else {
-            Err(RError::DataError(
-                "vector of zero magnitude cannot be made into a unit vector".to_owned(),
-            ))
+            data_error("vunit: can not make zero vector into a unit vector")
         }
     }
 
     /// Harmonic spread from median
     fn hmad(self) -> Result<f64, RE> {
         let n = self.len();
         if n == 0 {
-            return Err(RError::NoDataError("empty self vec".to_owned()));
+            return nodata_error("hmad: empty self");
         };
         let fself = self.iter().map(|x| x.clone().into()).collect::<Vec<f64>>();
         let recmedian = 1.0 / fself.medf_checked()?;
@@ -95,7 +89,7 @@ where
             .map(|x| -> Result<f64, RE> {
                 let fx: f64 = x.clone().into();
                 if !fx.is_normal() {
-                    return re_error("ArithError","attempt to divide by zero");
+                    return arith_error("hmad: attempt to divide by zero");
                 };
                 Ok((recmedian - 1.0 / fx).abs())
             })
@@ -116,7 +110,7 @@ where
         if n > 0 {
             Ok(self.iter().map(|x| x.clone().into()).sum::<f64>() / (n as f64))
         } else {
-            re_error("NoDataError","empty self vec")
+            nodata_error("amean: empty self vec")
         }
     }
 

diff --git a/src/triangmat.rs b/src/triangmat.rs
@@ -1,4 +1,4 @@
-use crate::{re_error, sumn, RError, Stats, TriangMat, Vecg, MutVecg, RE}; // MStats, MinMax, MutVecg, Stats, VecVec };
+use crate::*; // MStats, MinMax, MutVecg, Stats, VecVec };
 pub use indxvec::{printing::*, Printing, Indices, Vecops};
 
 /// Meanings of 'kind' field. Note that 'Upper Symmetric' would represent the same full matrix as
@@ -43,7 +43,7 @@ impl TriangMat {
     /// Squared euclidian vector magnitude (norm) of the data vector
     pub fn magsq(&self) -> f64 {
         self.data.vmagsq()
-    }
+    }    
     /// Sum of the elements:  
     /// when applied to the wedge product **a∧b**, returns det(**a,b**)
     pub fn sum(&self) -> f64 {
@@ -67,7 +67,7 @@ impl TriangMat {
             })
             .collect::<Vec<f64>>()
     }
-    /// New unit (symmetric) TriangMat matrix with total data size `n*(n+1)/2`
+    /// New unit (symmetric) TriangMat matrix (data size `n*(n+1)/2`)
     pub fn unit(n: usize) -> Self {
         let mut data = Vec::new();
         for i in 0..n {
@@ -94,9 +94,13 @@ impl TriangMat {
         fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit());
         fullcov
     }
-    /// Eigenvectors (normalized and indexed) of A=LL', given L (the lower triangular Cholesky decomposition).
-    /// The index gives the ordering by eigenvalues.
+
+    /// Normalized eigenvectors of A, given L.
+    /// Where L is the lower triangular Cholesky decomposition of covariance/comediance matrix for A.
+    /// Index gives the descending order of eigenvalues.
+    /// Can be used to order eigenvectors by their relative significance (covariance in their direction).
     pub fn eigenvectors(&self) -> Result<(Vec<Vec<f64>>,Vec<usize>),RE> {
+        if self.is_empty() { return nodata_error("eigenvectors applied to empty L") };
         let n = self.dim();
         let mut evectors = Vec::new();
         let eigenvals = self.eigenvalues();
@@ -108,18 +112,23 @@ impl TriangMat {
             eigenvec.munit(); // normalize the eigenvector
             evectors.push(eigenvec);
         };
+        // descending sort index of eigenvalues
         let index = eigenvals
             .isort_indexed(0..n, |a, b| b.total_cmp(a));
         Ok((evectors,index))
     }
-    /// PCA dimensional reduction using cholesky lower triangular matrix L (self).
-    /// Projecting data using only `new_dims` number of eigenvectors,
+
+    /// PCA dimensional reduction using cholesky lower triangular matrix L (as self).
+    /// Projecting data using only `dimensions` number of eigenvectors,
     /// corresponding to the largest eigenvalues.
-    pub fn pca_reduction(self, data: &[Vec<f64>], new_dims: usize) -> Result<Vec<Vec<f64>>,RE> {
-        if new_dims > data.len() { re_error("size","pca_reduction: new_dims exceeds L dimension")? };
+    pub fn pca_reduction(self, data: &[Vec<f64>], dimensions: usize) -> Result<Vec<Vec<f64>>,RE> {
+        if data.is_empty() { return nodata_error("pca_reduction: empty data") };
+        let n = data[0].len();        
+        if dimensions > n { return data_error("pca_reduction: new dimensions exceed those of data") };
+        if self.dim() != n { return data_error("pca_reduction: L and data dimensions mismatch") };
         let mut res = Vec::with_capacity(data.len());
-        let (evecs,mut index) = self.eigenvectors()?;
-        index.truncate(new_dims);
+        let (evecs,mut index) = self.eigenvectors()?; 
+        index.truncate(dimensions);
         let pruned_evecs = index.unindex(&evecs, true); 
         for dvec in data {
             res.push( 
@@ -233,15 +242,15 @@ impl TriangMat {
         let sl = self.data.len();
         // input not long enough to compute anything
         if sl < 3 {
-            return re_error("empty", "cholesky needs at least 3x3 TriangMat: {self}")?;
+            return nodata_error("cholesky needs at least 3x3 TriangMat");
         };
         // n is the dimension of the implied square matrix.
         // Not needed as an extra argument. We compute it
         // by solving a quadratic equation in seqtosubs()
         let (n, c) = TriangMat::rowcol(sl);
         // input is not a triangular number, is of wrong size
         if c != 0 {
-            return re_error("size", "cholesky needs a triangular matrix")?;
+            return data_error("cholesky needs a triangular matrix");
         };
         let mut res = vec![0.0; sl]; // result L is of the same size as the input
         for i in 0..n {
@@ -259,7 +268,7 @@ impl TriangMat {
                     // dif <= 0 means that the input matrix is not positive definite,
                     // or is ill-conditioned, so we return ArithError
                     if dif <= 0_f64 {
-                        return re_error("arith", "cholesky matrix is not positive definite")?;
+                        return arith_error("cholesky matrix is not positive definite");
                     };
                     dif.sqrt()
                 }