Skip to content

Commit

Permalink
Skewness and excess kurtosis.
Browse files Browse the repository at this point in the history
  • Loading branch information
ncruces committed Jan 22, 2025
1 parent 40090d8 commit 42bad58
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 34 deletions.
13 changes: 12 additions & 1 deletion ext/stats/moments.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package stats

import "math"

// Fisher–Pearson skewness and kurtosis using
// Terriberry's algorithm with Kahan summation:
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics

Expand Down Expand Up @@ -45,6 +46,10 @@ func (m moments) skewness_samp() float64 {
}

func (m moments) kurtosis_pop() float64 {
return m.raw_kurtosis_pop() - 3
}

func (m moments) raw_kurtosis_pop() float64 {
m2 := m.m2.hi
if div := m2 * m2; div != 0 {
return m.m4.hi * float64(m.n) / div
Expand All @@ -53,9 +58,15 @@ func (m moments) kurtosis_pop() float64 {
}

func (m moments) kurtosis_samp() float64 {
n := m.n
k := math.FMA(m.raw_kurtosis_pop(), float64(n+1), float64(3-3*n))
return k * float64(n-1) / float64((n-2)*(n-3))
}

func (m moments) raw_kurtosis_samp() float64 {
n := m.n
// https://mathworks.com/help/stats/kurtosis.html#f4975293
k := math.FMA(m.kurtosis_pop(), float64(n+1), float64(3-3*n))
k := math.FMA(m.raw_kurtosis_pop(), float64(n+1), float64(3-3*n))
return math.FMA(k, float64(n-1)/float64((n-2)*(n-3)), 3)
}

Expand Down
18 changes: 9 additions & 9 deletions ext/stats/moments_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func Test_moments(t *testing.T) {
if !math.IsNaN(s1.skewness_pop()) {
t.Errorf("want NaN")
}
if !math.IsNaN(s1.kurtosis_pop()) {
if !math.IsNaN(s1.raw_kurtosis_pop()) {
t.Errorf("want NaN")
}

Expand All @@ -29,16 +29,16 @@ func Test_moments(t *testing.T) {
s1.enqueue(+3.5784)
s1.enqueue(+2.7694)

if got := float32(s1.skewness_pop()); got != 0.106098293 {
if got := s1.skewness_pop(); float32(got) != 0.106098293 {
t.Errorf("got %v, want 0.1061", got)
}
if got := float32(s1.skewness_samp()); got != 0.1258171 {
if got := s1.skewness_samp(); float32(got) != 0.1258171 {
t.Errorf("got %v, want 0.1258", got)
}
if got := float32(s1.kurtosis_pop()); got != 2.3121266 {
if got := s1.raw_kurtosis_pop(); float32(got) != 2.3121266 {
t.Errorf("got %v, want 2.3121", got)
}
if got := float32(s1.kurtosis_samp()); got != 2.7482237 {
if got := s1.raw_kurtosis_samp(); float32(got) != 2.7482237 {
t.Errorf("got %v, want 2.7483", got)
}

Expand Down Expand Up @@ -72,16 +72,16 @@ func Test_moments(t *testing.T) {
s1.dequeue(math.E)
s1.dequeue(math.Sqrt2)

if got := float32(s1.skewness_pop()); got != 0.106098293 {
if got := s1.skewness_pop(); float32(got) != 0.106098293 {
t.Errorf("got %v, want 0.1061", got)
}
if got := float32(s1.skewness_samp()); got != 0.1258171 {
if got := s1.skewness_samp(); float32(got) != 0.1258171 {
t.Errorf("got %v, want 0.1258", got)
}
if got := float32(s1.kurtosis_pop()); got != 2.3121266 {
if got := s1.raw_kurtosis_pop(); float32(got) != 2.3121266 {
t.Errorf("got %v, want 2.3121", got)
}
if got := float32(s1.kurtosis_samp()); got != 2.7482237 {
if got := s1.raw_kurtosis_samp(); float32(got) != 2.7482237 {
t.Errorf("got %v, want 2.7483", got)
}
}
111 changes: 89 additions & 22 deletions ext/stats/stats.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
// Package stats provides aggregate functions for statistics.
//
// Provided functions:
// - stddev_pop: population standard deviation
// - stddev_samp: sample standard deviation
// - var_pop: population variance
// - var_samp: sample variance
// - stddev_pop: population standard deviation
// - stddev_samp: sample standard deviation
// - skewness_pop: Pearson population skewness
// - skewness_samp: Pearson sample skewness
// - kurtosis_pop: Fisher population excess kurtosis
// - kurtosis_samp: Fisher sample excess kurtosis
// - covar_pop: population covariance
// - covar_samp: sample covariance
// - corr: correlation coefficient
// - corr: Pearson correlation coefficient
// - regr_r2: correlation coefficient squared
// - regr_avgx: average of the independent variable
// - regr_avgy: average of the dependent variable
Expand Down Expand Up @@ -61,6 +65,10 @@ func Register(db *sqlite3.Conn) error {
db.CreateWindowFunction("var_samp", 1, flags, newVariance(var_samp)),
db.CreateWindowFunction("stddev_pop", 1, flags, newVariance(stddev_pop)),
db.CreateWindowFunction("stddev_samp", 1, flags, newVariance(stddev_samp)),
db.CreateWindowFunction("skewness_pop", 1, flags, newMoments(skewness_pop)),
db.CreateWindowFunction("skewness_samp", 1, flags, newMoments(skewness_samp)),
db.CreateWindowFunction("kurtosis_pop", 1, flags, newMoments(kurtosis_pop)),
db.CreateWindowFunction("kurtosis_samp", 1, flags, newMoments(kurtosis_samp)),
db.CreateWindowFunction("covar_pop", 2, flags, newCovariance(var_pop)),
db.CreateWindowFunction("covar_samp", 2, flags, newCovariance(var_samp)),
db.CreateWindowFunction("corr", 2, flags, newCovariance(corr)),
Expand Down Expand Up @@ -88,6 +96,10 @@ const (
var_samp
stddev_pop
stddev_samp
skewness_pop
skewness_samp
kurtosis_pop
kurtosis_samp
corr
regr_r2
regr_sxx
Expand All @@ -101,6 +113,23 @@ const (
regr_json
)

func special(kind int, n int64) (null, zero bool) {
switch kind {
case var_pop, stddev_pop, regr_sxx, regr_syy, regr_sxy:
return n <= 0, n == 1
case regr_avgx, regr_avgy:
return n <= 0, false
case kurtosis_samp:
return n <= 3, false
case skewness_samp:
return n <= 2, false
case skewness_pop:
return n <= 1, n == 2
default:
return n <= 1, false
}
}

func newVariance(kind int) func() sqlite3.AggregateFunction {
return func() sqlite3.AggregateFunction { return &variance{kind: kind} }
}
Expand All @@ -111,14 +140,11 @@ type variance struct {
}

func (fn *variance) Value(ctx sqlite3.Context) {
switch fn.n {
case 1:
switch fn.kind {
case var_pop, stddev_pop:
ctx.ResultFloat(0)
}
switch null, zero := special(fn.kind, fn.n); {
case zero:
ctx.ResultFloat(0)
return
case 0:
case null:
return
}

Expand Down Expand Up @@ -166,18 +192,11 @@ func (fn *covariance) Value(ctx sqlite3.Context) {
ctx.ResultInt64(fn.regr_count())
return
}
switch fn.n {
case 1:
switch fn.kind {
case var_pop, stddev_pop, regr_sxx, regr_syy, regr_sxy:
ctx.ResultFloat(0)
return
case regr_avgx, regr_avgy:
break
default:
return
}
case 0:
switch null, zero := special(fn.kind, fn.n); {
case zero:
ctx.ResultFloat(0)
return
case null:
return
}

Expand Down Expand Up @@ -234,3 +253,51 @@ func (fn *covariance) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
fn.dequeue(fa, fb)
}
}

func newMoments(kind int) func() sqlite3.AggregateFunction {
return func() sqlite3.AggregateFunction { return &momentfn{kind: kind} }
}

type momentfn struct {
kind int
moments
}

func (fn *momentfn) Value(ctx sqlite3.Context) {
switch null, zero := special(fn.kind, fn.n); {
case zero:
ctx.ResultFloat(0)
return
case null:
return
}

var r float64
switch fn.kind {
case skewness_pop:
r = fn.skewness_pop()
case skewness_samp:
r = fn.skewness_samp()
case kurtosis_pop:
r = fn.kurtosis_pop()
case kurtosis_samp:
r = fn.kurtosis_samp()
}
ctx.ResultFloat(r)
}

func (fn *momentfn) Step(ctx sqlite3.Context, arg ...sqlite3.Value) {
a := arg[0]
f := a.Float()
if f != 0.0 || a.NumericType() != sqlite3.NULL {
fn.enqueue(f)
}
}

func (fn *momentfn) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
a := arg[0]
f := a.Float()
if f != 0.0 || a.NumericType() != sqlite3.NULL {
fn.dequeue(f)
}
}
19 changes: 17 additions & 2 deletions ext/stats/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ func TestRegister_variance(t *testing.T) {
SELECT
sum(x), avg(x),
var_samp(x), var_pop(x),
stddev_samp(x), stddev_pop(x)
stddev_samp(x), stddev_pop(x),
skewness_samp(x), skewness_pop(x),
kurtosis_samp(x), kurtosis_pop(x)
FROM data`)
if err != nil {
t.Fatal(err)
Expand All @@ -73,13 +75,26 @@ func TestRegister_variance(t *testing.T) {
if got := stmt.ColumnFloat(5); got != math.Sqrt(22.5) {
t.Errorf("got %v, want √22.5", got)
}
if got := stmt.ColumnFloat(6); got != 0 {
t.Errorf("got %v, want zero", got)
}
if got := stmt.ColumnFloat(7); got != 0 {
t.Errorf("got %v, want zero", got)
}
if got := stmt.ColumnFloat(8); float32(got) != -3.3 {
t.Errorf("got %v, want -3.3", got)
}
if got := stmt.ColumnFloat(9); got != -1.64 {
t.Errorf("got %v, want -1.64", got)
}
}
stmt.Close()

stmt, _, err = db.Prepare(`
SELECT
var_samp(x) OVER (ROWS 1 PRECEDING),
var_pop(x) OVER (ROWS 1 PRECEDING)
var_pop(x) OVER (ROWS 1 PRECEDING),
skewness_pop(x) OVER (ROWS 1 PRECEDING)
FROM data`)
if err != nil {
t.Fatal(err)
Expand Down

0 comments on commit 42bad58

Please sign in to comment.