Skip to content

Commit

Permalink
Merge pull request #212 from Shimuuar/doctest-ols
Browse files Browse the repository at this point in the history
Add special case for computation of rSquare and add doctests
  • Loading branch information
Shimuuar authored Jan 16, 2025
2 parents 1372649 + 13e3a2a commit 36d1cc0
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 121 deletions.
3 changes: 0 additions & 3 deletions Setup.lhs

This file was deleted.

52 changes: 45 additions & 7 deletions Statistics/Regression.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,15 @@ import qualified Data.Vector.Unboxed.Mutable as M
-- element than the list of predictors; the last element is the
-- /y/-intercept value.
--
-- * /R²/, the coefficient of determination (see 'rSquare' for
-- * /R²/, the coefficient of determination (see 'rSquare' for
-- details).
--
-- >>> import qualified Data.Vector.Unboxed as VU
-- >>> :{
-- olsRegress [ VU.fromList [0,1,2,3]
-- ] (VU.fromList [1000, 1001, 1002, 1003])
-- :}
-- ([1.0000000000000218,999.9999999999999],1.0)
olsRegress :: [Vector]
-- ^ Non-empty list of predictor vectors. Must all have
-- the same length. These will become the columns of
Expand All @@ -65,7 +72,30 @@ olsRegress preds@(_:_) resps
lss@(n:ls) = map G.length preds

Check warning on line 72 in Statistics/Regression.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.10.1

Pattern match(es) are non-exhaustive

Check warning on line 72 in Statistics/Regression.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.2.8

Pattern match(es) are non-exhaustive

Check warning on line 72 in Statistics/Regression.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.4.8

Pattern match(es) are non-exhaustive

Check warning on line 72 in Statistics/Regression.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.8.4

Pattern match(es) are non-exhaustive

Check warning on line 72 in Statistics/Regression.hs

View workflow job for this annotation

GitHub Actions / ubuntu-latest / ghc 9.6.6

Pattern match(es) are non-exhaustive
olsRegress _ _ = error "no predictors given"

-- | Compute the ordinary least-squares solution to /A x = b/.
-- | Compute the ordinary least-squares solution to overdetermined
-- linear system \(Ax = b\). In other words it finds
--
-- \[ \operatorname{argmin}|Ax-b|^2 \].
--
-- All columns of \(A\) must be linearly independent. It's not
-- checked function will return nonsensical result if resulting
-- linear system is poorly conditioned.
--
-- >>> import qualified Data.Vector.Unboxed as VU
-- >>> :{
-- ols (fromColumns [ VU.fromList [0,1,2,3]
-- , VU.fromList [1,1,1,1]
-- ]) (VU.fromList [1000, 1001, 1002, 1003])
-- :}
-- [1.0000000000000218,999.9999999999999]
--
-- >>> :{
-- ols (fromColumns [ VU.fromList [0,1,2,3]
-- , VU.fromList [4,2,1,1]
-- , VU.fromList [1,1,1,1]
-- ]) (VU.fromList [1000, 1001, 1002, 1003])
-- :}
-- [1.0000000000005393,4.2290644612446807e-13,999.9999999999983]
ols :: Matrix -- ^ /A/ has at least as many rows as columns.
-> Vector -- ^ /b/ has the same length as columns in /A/.
-> Vector
Expand All @@ -92,7 +122,7 @@ solve r b
where n = rows r
l = U.length b

-- | Compute /R²/, the coefficient of determination that
-- | Compute /R²/, the coefficient of determination that
-- indicates goodness-of-fit of a regression.
--
-- This value will be 1 if the predictors fit perfectly, dropping to 0
Expand All @@ -101,11 +131,19 @@ rSquare :: Matrix -- ^ Predictors (regressors).
-> Vector -- ^ Responders.
-> Vector -- ^ Regression coefficients.
-> Double
rSquare pred resp coeff = 1 - r / t
rSquare pred resp coeff
-- Data has zero variance. If fit is perfect we set R² to 1 else to
-- 0. This is not perfect heuristic. Fit residuals may be nonzero
-- due to rounding.
| t == 0 = if r == 0 then 1 else 0
-- If fit residuals are worse than average we simply set R² to 0
| r2 >= 0 && r2 <= 1 = r2
| otherwise = 0
where
r = sum $ flip U.imap resp $ \i x -> square (x - p i)
t = sum $ flip U.map resp $ \x -> square (x - mean resp)
p i = sum . flip U.imap coeff $ \j -> (* unsafeIndex pred i j)
r2 = 1 - r / t
r = sum $ flip U.imap resp $ \i x -> square (x - p i)
t = sum $ flip U.map resp $ \x -> square (x - mean resp)
p i = sum $ flip U.imap coeff $ \j x -> x * unsafeIndex pred i j

-- | Bootstrap a regression function. Returns both the results of the
-- regression and the requested confidence interval values.
Expand Down
9 changes: 5 additions & 4 deletions Statistics/Test/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ data Rank v a = Rank {
-- In case of ties average of ranks of equal elements is assigned
-- to each
--
-- >>> rank (==) (fromList [10,20,30::Int])
-- > fromList [1.0,2.0,3.0]
-- >>> import qualified Data.Vector.Unboxed as VU
-- >>> rank (==) (VU.fromList [10,20,30::Int])
-- [1.0,2.0,3.0]
--
-- >>> rank (==) (fromList [10,10,10,30::Int])
-- > fromList [2.0,2.0,2.0,4.0]
-- >>> rank (==) (VU.fromList [10,10,10,30::Int])
-- [2.0,2.0,2.0,4.0]
rank :: (G.Vector v a, G.Vector v Double)
=> (a -> a -> Bool) -- ^ Equivalence relation
-> v a -- ^ Vector to rank
Expand Down
10 changes: 5 additions & 5 deletions Statistics/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ import Statistics.Distribution.Normal
-- second from @1 - CL@ or significance level.
--
-- >>> cl95
-- mkCLFromSignificance 0.05
-- mkCLFromSignificance 5.0e-2
--
-- Prior to 0.14 confidence levels were passed to function as plain
-- @Doubles@. Use 'mkCL' to convert them to @CL@.
Expand Down Expand Up @@ -134,7 +134,7 @@ instance Ord a => Ord (CL a) where
-- exception if parameter is out of [0,1] range
--
-- >>> mkCL 0.95 -- same as cl95
-- mkCLFromSignificance 0.05
-- mkCLFromSignificance 5.0000000000000044e-2
mkCL :: (Ord a, Num a) => a -> CL a
mkCL
= fromMaybe (error "Statistics.Types.mkCL: probability is out if [0,1] range")
Expand All @@ -144,7 +144,7 @@ mkCL
-- parameter is out of [0,1] range
--
-- >>> mkCLE 0.95 -- same as cl95
-- Just (mkCLFromSignificance 0.05)
-- Just (mkCLFromSignificance 5.0000000000000044e-2)
mkCLE :: (Ord a, Num a) => a -> Maybe (CL a)
mkCLE p
| p >= 0 && p <= 1 = Just $ CL (1 - p)
Expand All @@ -155,15 +155,15 @@ mkCLE p
-- throw exception if parameter is out of [0,1] range
--
-- >>> mkCLFromSignificance 0.05 -- same as cl95
-- mkCLFromSignificance 0.05
-- mkCLFromSignificance 5.0e-2
mkCLFromSignificance :: (Ord a, Num a) => a -> CL a
mkCLFromSignificance = fromMaybe (error errMkCL) . mkCLFromSignificanceE

-- | Same as 'mkCLFromSignificance' but returns @Nothing@ instead of error if
-- parameter is out of [0,1] range
--
-- >>> mkCLFromSignificanceE 0.05 -- same as cl95
-- Just (mkCLFromSignificance 0.05)
-- Just (mkCLFromSignificance 5.0e-2)
mkCLFromSignificanceE :: (Ord a, Num a) => a -> Maybe (CL a)
mkCLFromSignificanceE p
| p >= 0 && p <= 1 = Just $ CL p
Expand Down
11 changes: 11 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## Changes in NEXT_VERSIONS

* Computation of `rSquare` has special case for case when data variation is 0.

* Doctests added.

* Benchmarks using `tasty-bench` and `tasty-papi` added.

* Spurious test failures fixed.


## Changes in 0.16.2.1

* Unnecessary constraint dropped from `tStatisticsPaired`.
Expand Down
2 changes: 0 additions & 2 deletions dense-linear-algebra/Setup.hs

This file was deleted.

19 changes: 17 additions & 2 deletions statistics.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ extra-source-files:
examples/kde/data/faithful.csv
examples/kde/kde.html
examples/kde/kde.tpl
tests/Tests/Math/Tables.hs
tests/Tests/Math/gen.py
tests/utils/Makefile
tests/utils/fftw.c

Expand Down Expand Up @@ -188,6 +186,23 @@ test-suite statistics-tests
, vector
, vector-algorithms

test-suite statistics-doctests
default-language: Haskell2010
type: exitcode-stdio-1.0
hs-source-dirs: tests
main-is: doctest.hs
if impl(ghcjs) || impl(ghc < 8.0)
Buildable: False
-- Linker on macos prints warnings to console which confuses doctests.
-- We simply disable doctests on ma for older GHC
-- > warning: -single_module is obsolete
if os(darwin) && impl(ghc < 9.6)
buildable: False
build-depends:
base -any
, statistics -any
, doctest >=0.15 && <0.24

-- We want to be able to build benchmarks using both tasty-bench and tasty-papi.
-- They have similar API so we just create two shim modules which reexport
-- definitions from corresponding library and pick one in cabal file.
Expand Down
47 changes: 0 additions & 47 deletions tests/Tests/Math/Tables.hs

This file was deleted.

51 changes: 0 additions & 51 deletions tests/Tests/Math/gen.py

This file was deleted.

5 changes: 5 additions & 0 deletions tests/doctest.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import Test.DocTest (doctest)

main :: IO ()
main = doctest ["-XHaskell2010", "Statistics"]

0 comments on commit 36d1cc0

Please sign in to comment.