From 90aaa0763c75201db4492ec26a7951d20e451010 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Thu, 16 Jan 2025 12:14:40 +0300 Subject: [PATCH 1/5] Add doctests and fix exiting ones --- Statistics/Test/Internal.hs | 9 +++++---- Statistics/Types.hs | 10 +++++----- statistics.cabal | 17 +++++++++++++++++ tests/doctest.hs | 5 +++++ 4 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 tests/doctest.hs diff --git a/Statistics/Test/Internal.hs b/Statistics/Test/Internal.hs index 225cfdaf..5081118b 100644 --- a/Statistics/Test/Internal.hs +++ b/Statistics/Test/Internal.hs @@ -27,11 +27,12 @@ data Rank v a = Rank { -- In case of ties average of ranks of equal elements is assigned -- to each -- --- >>> rank (==) (fromList [10,20,30::Int]) --- > fromList [1.0,2.0,3.0] +-- >>> import qualified Data.Vector.Unboxed as VU +-- >>> rank (==) (VU.fromList [10,20,30::Int]) +-- [1.0,2.0,3.0] -- --- >>> rank (==) (fromList [10,10,10,30::Int]) --- > fromList [2.0,2.0,2.0,4.0] +-- >>> rank (==) (VU.fromList [10,10,10,30::Int]) +-- [2.0,2.0,2.0,4.0] rank :: (G.Vector v a, G.Vector v Double) => (a -> a -> Bool) -- ^ Equivalence relation -> v a -- ^ Vector to rank diff --git a/Statistics/Types.hs b/Statistics/Types.hs index d326e684..82df3921 100644 --- a/Statistics/Types.hs +++ b/Statistics/Types.hs @@ -94,7 +94,7 @@ import Statistics.Distribution.Normal -- second from @1 - CL@ or significance level. -- -- >>> cl95 --- mkCLFromSignificance 0.05 +-- mkCLFromSignificance 5.0e-2 -- -- Prior to 0.14 confidence levels were passed to function as plain -- @Doubles@. Use 'mkCL' to convert them to @CL@. @@ -134,7 +134,7 @@ instance Ord a => Ord (CL a) where -- exception if parameter is out of [0,1] range -- -- >>> mkCL 0.95 -- same as cl95 --- mkCLFromSignificance 0.05 +-- mkCLFromSignificance 5.0000000000000044e-2 mkCL :: (Ord a, Num a) => a -> CL a mkCL = fromMaybe (error "Statistics.Types.mkCL: probability is out if [0,1] range") @@ -144,7 +144,7 @@ mkCL -- parameter is out of [0,1] range -- -- >>> mkCLE 0.95 -- same as cl95 --- Just (mkCLFromSignificance 0.05) +-- Just (mkCLFromSignificance 5.0000000000000044e-2) mkCLE :: (Ord a, Num a) => a -> Maybe (CL a) mkCLE p | p >= 0 && p <= 1 = Just $ CL (1 - p) @@ -155,7 +155,7 @@ mkCLE p -- throw exception if parameter is out of [0,1] range -- -- >>> mkCLFromSignificance 0.05 -- same as cl95 --- mkCLFromSignificance 0.05 +-- mkCLFromSignificance 5.0e-2 mkCLFromSignificance :: (Ord a, Num a) => a -> CL a mkCLFromSignificance = fromMaybe (error errMkCL) . mkCLFromSignificanceE @@ -163,7 +163,7 @@ mkCLFromSignificance = fromMaybe (error errMkCL) . mkCLFromSignificanceE -- parameter is out of [0,1] range -- -- >>> mkCLFromSignificanceE 0.05 -- same as cl95 --- Just (mkCLFromSignificance 0.05) +-- Just (mkCLFromSignificance 5.0e-2) mkCLFromSignificanceE :: (Ord a, Num a) => a -> Maybe (CL a) mkCLFromSignificanceE p | p >= 0 && p <= 1 = Just $ CL p diff --git a/statistics.cabal b/statistics.cabal index af810d5c..640503db 100644 --- a/statistics.cabal +++ b/statistics.cabal @@ -188,6 +188,23 @@ test-suite statistics-tests , vector , vector-algorithms +test-suite statistics-doctests + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: tests + main-is: doctest.hs + if impl(ghcjs) || impl(ghc < 8.0) + Buildable: False + -- Linker on macos prints warnings to console which confuses doctests. + -- We simply disable doctests on ma for older GHC + -- > warning: -single_module is obsolete + if os(darwin) && impl(ghc < 9.6) + buildable: False + build-depends: + base -any + , statistics -any + , doctest >=0.15 && <0.24 + -- We want to be able to build benchmarks using both tasty-bench and tasty-papi. -- They have similar API so we just create two shim modules which reexport -- definitions from corresponding library and pick one in cabal file. diff --git a/tests/doctest.hs b/tests/doctest.hs new file mode 100644 index 00000000..702b562b --- /dev/null +++ b/tests/doctest.hs @@ -0,0 +1,5 @@ +import Test.DocTest (doctest) + +main :: IO () +main = doctest ["-XHaskell2010", "Statistics"] + From caa4d118df178e28f9ee5b6fbebcea019db555f4 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Thu, 16 Jan 2025 12:35:17 +0300 Subject: [PATCH 2/5] Add doctests for regressions. Fixes #200 --- Statistics/Regression.hs | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/Statistics/Regression.hs b/Statistics/Regression.hs index a0020f21..eb12b919 100644 --- a/Statistics/Regression.hs +++ b/Statistics/Regression.hs @@ -41,8 +41,15 @@ import qualified Data.Vector.Unboxed.Mutable as M -- element than the list of predictors; the last element is the -- /y/-intercept value. -- --- * /R²/, the coefficient of determination (see 'rSquare' for +-- * /R²/, the coefficient of determination (see 'rSquare' for -- details). +-- +-- >>> import qualified Data.Vector.Unboxed as VU +-- >>> :{ +-- olsRegress [ VU.fromList [0,1,2,3] +-- ] (VU.fromList [1000, 1001, 1002, 1003]) +-- :} +-- ([1.0000000000000218,999.9999999999999],1.0) olsRegress :: [Vector] -- ^ Non-empty list of predictor vectors. Must all have -- the same length. These will become the columns of @@ -65,7 +72,30 @@ olsRegress preds@(_:_) resps lss@(n:ls) = map G.length preds olsRegress _ _ = error "no predictors given" --- | Compute the ordinary least-squares solution to /A x = b/. +-- | Compute the ordinary least-squares solution to overdetermined +-- linear system \(Ax = b\). In other words it finds +-- +-- \[ \operatorname{argmin}|Ax-b|^2 \]. +-- +-- All columns of \(A\) must be linearly independent. It's not +-- checked function will return nonsensical result if resulting +-- linear system is poorly conditioned. +-- +-- >>> import qualified Data.Vector.Unboxed as VU +-- >>> :{ +-- ols (fromColumns [ VU.fromList [0,1,2,3] +-- , VU.fromList [1,1,1,1] +-- ]) (VU.fromList [1000, 1001, 1002, 1003]) +-- :} +-- [1.0000000000000218,999.9999999999999] +-- +-- >>> :{ +-- ols (fromColumns [ VU.fromList [0,1,2,3] +-- , VU.fromList [4,2,1,1] +-- , VU.fromList [1,1,1,1] +-- ]) (VU.fromList [1000, 1001, 1002, 1003]) +-- :} +-- [1.0000000000005393,4.2290644612446807e-13,999.9999999999983] ols :: Matrix -- ^ /A/ has at least as many rows as columns. -> Vector -- ^ /b/ has the same length as columns in /A/. -> Vector @@ -92,7 +122,7 @@ solve r b where n = rows r l = U.length b --- | Compute /R²/, the coefficient of determination that +-- | Compute /R²/, the coefficient of determination that -- indicates goodness-of-fit of a regression. -- -- This value will be 1 if the predictors fit perfectly, dropping to 0 From 052f51c52702176e465ebc953bcf752514ffca13 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Thu, 16 Jan 2025 14:25:27 +0300 Subject: [PATCH 3/5] Update changelog --- changelog.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/changelog.md b/changelog.md index a5c9ced6..5ad8717e 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,12 @@ +## Changes in NEXT_VERSIONS + + * Doctests added. + + * Benchmarks using `tasty-bench` and `tasty-papi` added. + + * Spurious test failures fixed. + + ## Changes in 0.16.2.1 * Unnecessary constraint dropped from `tStatisticsPaired`. From 931ab79ae867b65992909070c1ceab5e6727a98f Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Thu, 16 Jan 2025 17:50:09 +0300 Subject: [PATCH 4/5] Add special case for rSquare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When data variance is zero we can't compute R² directly and have to special case it. And if R² gets out of range for example due to singular A^T·A we simply set it to 0. We explained nothing Fixes #118 --- Statistics/Regression.hs | 16 ++++++++++++---- changelog.md | 2 ++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Statistics/Regression.hs b/Statistics/Regression.hs index eb12b919..b2abe890 100644 --- a/Statistics/Regression.hs +++ b/Statistics/Regression.hs @@ -131,11 +131,19 @@ rSquare :: Matrix -- ^ Predictors (regressors). -> Vector -- ^ Responders. -> Vector -- ^ Regression coefficients. -> Double -rSquare pred resp coeff = 1 - r / t +rSquare pred resp coeff + -- Data has zero variance. If fit is perfect we set R² to 1 else to + -- 0. This is not perfect heuristic. Fit residuals may be nonzero + -- due to rounding. + | t == 0 = if r == 0 then 1 else 0 + -- If fit residuals are worse than average we simply set R² to 0 + | r2 >= 0 && r2 <= 1 = r2 + | otherwise = 0 where - r = sum $ flip U.imap resp $ \i x -> square (x - p i) - t = sum $ flip U.map resp $ \x -> square (x - mean resp) - p i = sum . flip U.imap coeff $ \j -> (* unsafeIndex pred i j) + r2 = 1 - r / t + r = sum $ flip U.imap resp $ \i x -> square (x - p i) + t = sum $ flip U.map resp $ \x -> square (x - mean resp) + p i = sum $ flip U.imap coeff $ \j x -> x * unsafeIndex pred i j -- | Bootstrap a regression function. Returns both the results of the -- regression and the requested confidence interval values. diff --git a/changelog.md b/changelog.md index 5ad8717e..878c51d2 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,7 @@ ## Changes in NEXT_VERSIONS + * Computation of `rSquare` has special case for case when data variation is 0. + * Doctests added. * Benchmarks using `tasty-bench` and `tasty-papi` added. From 13e3a2a640811a8378c0a1598ea5b474080e67ab Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Thu, 16 Jan 2025 18:10:35 +0300 Subject: [PATCH 5/5] Delete unnecessary files tests/Tests/Math/* are remants from time when math-function were part of statistics --- Setup.lhs | 3 --- dense-linear-algebra/Setup.hs | 2 -- statistics.cabal | 2 -- tests/Tests/Math/Tables.hs | 47 -------------------------------- tests/Tests/Math/gen.py | 51 ----------------------------------- 5 files changed, 105 deletions(-) delete mode 100644 Setup.lhs delete mode 100644 dense-linear-algebra/Setup.hs delete mode 100644 tests/Tests/Math/Tables.hs delete mode 100644 tests/Tests/Math/gen.py diff --git a/Setup.lhs b/Setup.lhs deleted file mode 100644 index 5bde0de9..00000000 --- a/Setup.lhs +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env runhaskell -> import Distribution.Simple -> main = defaultMain diff --git a/dense-linear-algebra/Setup.hs b/dense-linear-algebra/Setup.hs deleted file mode 100644 index 9a994af6..00000000 --- a/dense-linear-algebra/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/statistics.cabal b/statistics.cabal index 640503db..2f341a49 100644 --- a/statistics.cabal +++ b/statistics.cabal @@ -41,8 +41,6 @@ extra-source-files: examples/kde/data/faithful.csv examples/kde/kde.html examples/kde/kde.tpl - tests/Tests/Math/Tables.hs - tests/Tests/Math/gen.py tests/utils/Makefile tests/utils/fftw.c diff --git a/tests/Tests/Math/Tables.hs b/tests/Tests/Math/Tables.hs deleted file mode 100644 index 89ac560a..00000000 --- a/tests/Tests/Math/Tables.hs +++ /dev/null @@ -1,47 +0,0 @@ -module Tests.Math.Tables where - -tableLogGamma :: [(Double,Double)] -tableLogGamma = - [(0.000001250000000, 13.592366285131769033) - , (0.000068200000000, 9.5930266308318756785) - , (0.000246000000000, 8.3100370767447966358) - , (0.000880000000000, 7.03508133735248542) - , (0.003120000000000, 5.768129358365567505) - , (0.026700000000000, 3.6082588918892977148) - , (0.077700000000000, 2.5148371858768232556) - , (0.234000000000000, 1.3579557559432759994) - , (0.860000000000000, 0.098146578027685615897) - , (1.340000000000000, -0.11404757557207759189) - , (1.890000000000000, -0.0425116422978701336) - , (2.450000000000000, 0.25014296569217625565) - , (3.650000000000000, 1.3701041997380685178) - , (4.560000000000000, 2.5375143317949580002) - , (6.660000000000000, 5.9515377269550207018) - , (8.250000000000000, 9.0331869196051233217) - , (11.300000000000001, 15.814180681373947834) - , (25.600000000000001, 56.711261598328121636) - , (50.399999999999999, 146.12815158702164808) - , (123.299999999999997, 468.85500075897556371) - , (487.399999999999977, 2526.9846647543727158) - , (853.399999999999977, 4903.9359135978220365) - , (2923.300000000000182, 20402.93198938705973) - , (8764.299999999999272, 70798.268343590112636) - , (12630.000000000000000, 106641.77264982508495) - , (34500.000000000000000, 325976.34838781820145) - , (82340.000000000000000, 849629.79603036714252) - , (234800.000000000000000, 2668846.4390507959761) - , (834300.000000000000000, 10540830.912557534873) - , (1230000.000000000000000, 16017699.322315014899) - ] -tableIncompleteBeta :: [(Double,Double,Double,Double)] -tableIncompleteBeta = - [(2.000000000000000, 3.000000000000000, 0.030000000000000, 0.0051864299999999996862) - , (2.000000000000000, 3.000000000000000, 0.230000000000000, 0.22845923000000001313) - , (2.000000000000000, 3.000000000000000, 0.760000000000000, 0.95465728000000005249) - , (4.000000000000000, 2.300000000000000, 0.890000000000000, 0.93829812158347802864) - , (1.000000000000000, 1.000000000000000, 0.550000000000000, 0.55000000000000004441) - , (0.300000000000000, 12.199999999999999, 0.110000000000000, 0.95063000053947077639) - , (13.100000000000000, 9.800000000000001, 0.120000000000000, 1.3483109941962659385e-07) - , (13.100000000000000, 9.800000000000001, 0.420000000000000, 0.071321857831804780226) - , (13.100000000000000, 9.800000000000001, 0.920000000000000, 0.99999578339197081611) - ] diff --git a/tests/Tests/Math/gen.py b/tests/Tests/Math/gen.py deleted file mode 100644 index fde9da69..00000000 --- a/tests/Tests/Math/gen.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/python -""" -""" - -from mpmath import * - -def printListLiteral(lines) : - print " [" + "\n , ".join(lines) + "\n ]" - -################################################################ -# Generate header -print "module Tests.Math.Tables where" -print - -################################################################ -## Generate table for logGamma -print "tableLogGamma :: [(Double,Double)]" -print "tableLogGamma =" - -gammaArg = [ 1.25e-6, 6.82e-5, 2.46e-4, 8.8e-4, 3.12e-3, 2.67e-2, - 7.77e-2, 0.234, 0.86, 1.34, 1.89, 2.45, - 3.65, 4.56, 6.66, 8.25, 11.3, 25.6, - 50.4, 123.3, 487.4, 853.4, 2923.3, 8764.3, - 1.263e4, 3.45e4, 8.234e4, 2.348e5, 8.343e5, 1.23e6, - ] -printListLiteral( - [ '(%.15f, %.20g)' % (x, log(gamma(x))) for x in gammaArg ] - ) - - -################################################################ -## Generate table for incompleteBeta - -print "tableIncompleteBeta :: [(Double,Double,Double,Double)]" -print "tableIncompleteBeta =" - -incompleteBetaArg = [ - (2, 3, 0.03), - (2, 3, 0.23), - (2, 3, 0.76), - (4, 2.3, 0.89), - (1, 1, 0.55), - (0.3, 12.2, 0.11), - (13.1, 9.8, 0.12), - (13.1, 9.8, 0.42), - (13.1, 9.8, 0.92), - ] -printListLiteral( - [ '(%.15f, %.15f, %.15f, %.20g)' % (p,q,x, betainc(p,q,0,x, regularized=True)) - for (p,q,x) in incompleteBetaArg - ])