abraunst · abraunst · Feb 21, 2019 · Feb 24, 2019 · Feb 25, 2019 · Feb 25, 2019
diff --git a/LICENSE.md b/LICENSE.md
@@ -1,7 +1,7 @@
-Julia GaussianEP package Copyright (C) 2019 Alfredo Braunstein, Anna-Paola Muntoni, Andrea Pagnani and Mirko Pieropan (the "Authors"). All Rights Reserved.
+Julia GaussianEP package Copyright (C) 2019 Alfredo Braunstein, Giovanni Catania, Anna-Paola Muntoni, Andrea Pagnani and Mirko Pieropan (the "Authors"). All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the Licence, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 
-The file COPYING contains a copy of the GNU General Public License, also available at [https://www.gnu.org/licenses/gpl-3.0.en.html].
+The file COPYING contains a copy of the GNU General Public License, also available at [https://www.gnu.org/licenses/gpl-3.0.en.html].
diff --git a/src/Factor.jl b/src/Factor.jl
@@ -0,0 +1,31 @@
+export Factor
+
+
+abstract type Factor end
+
+
+"""
+    moments!(av, va, p0::T, h, J) where T <:Factor -> (mean, variance)
+
+    input: ``p_0, h, J``
+
+    output: mean and variance of
+
+    `` p(x) ∝ p_0(x) exp(-½⋅J⋅x² + h⋅x)``
+"""
+function moments!(av, va, p0::T, h, J) where T <: Factor
+    error("undefined moment calculation, assuming uniform prior")
+    return J\h,J\I
+end
+
+"""
+
+    learn!(p0::T, h, J) -> nothing
+
+    update parameters with a single learning gradient step (learning rate is stored in p0)
+"""
+function learn!(p0::T, h, J) where T <: Factor
+    #by default, do nothing
+    return
+end
+
diff --git a/src/FactorGraph.jl b/src/FactorGraph.jl
@@ -0,0 +1,14 @@
+struct FactorGraph{T<:Real,F<:Factor}
+    factors::Vector{F}
+    idx::Vector{Vector{Int}}
+    N::Int
+    P::AbstractMatrix{T}
+    d::AbstractVector{T}
+end
+
+
+FactorGraph(factors::Vector{F}, idx::Vector{Vector{Int}}, N::Int) where {F<:Factor} = FactorGraph(factors,idx,N,Diagonal(ones(N)),zeros(N))
+
+FactorGraph(factors::Vector{F}, idx::Vector{Vector{Int}}, S::AbstractMatrix{T}, b::AbstractVector{T} = zeros(size(S,1))) where {T<:Real, F<:Factor} = FactorGraph(factors, idx, size(S,2), nullspace(Matrix(S)), S\b)
+
+
diff --git a/src/GaussianEP.jl b/src/GaussianEP.jl
@@ -1,13 +1,15 @@
 module GaussianEP
 
-export expectation_propagation, Term, EPState, EPOut
-export Prior, IntervalPrior, SpikeSlabPrior, BinaryPrior, GaussianPrior, PosteriorPrior, QuadraturePrior, AutoPrior, ThetaPrior
-
 using ExtractMacro, SpecialFunctions, LinearAlgebra
 
-include("Term.jl")
-include("priors.jl")
-include("expectation_propagation.jl")
+export FactorGraph, FactorGauss, EPState, expectation_propagation
+
 include("ProgressReporter.jl")
+include("Factor.jl")
+include("FactorGraph.jl")
+include("expectation_propagation.jl")
+include("univariate.jl")
+include("multivariate.jl")
+
 
 end # end module
diff --git a/src/Term.jl b/src/Term.jl
diff --git a/src/expectation_propagation.jl b/src/expectation_propagation.jl
@@ -1,77 +1,88 @@
 using Random, LinearAlgebra, ExtractMacro
 
-```@meta
-CurrentModule = GaussianEP
-```
 
-function update_err!(dst, i, val)
-    r=abs(val - dst[i])
-    dst[i] = val
+function update!(old::Array{T}, new::Array{T}, ρ::T = zero(T))::T where {T<:Real}
+    r = maximum(abs, new - old)
+    old .*= ρ
+    old .+= (1 - ρ) * new
     return r
 end
 
-"""
-    Instantaneous state of an expectation propagation run.
-"""
-struct EPState{T<:AbstractFloat}
-    A::Matrix{T}
-    y::Vector{T}
-    Σ::Matrix{T}
-    v::Vector{T}
-    av::Vector{T}
-    va::Vector{T}
-    a::Vector{T}
-    μ::Vector{T}
-    b::Vector{T}
-    s::Vector{T}
+struct EPState{T <: Real, F <: Factor}
+     Σ   :: Matrix{T}
+     μ   :: Vector{T}
+     J   :: Vector{Matrix{T}}
+     h   :: Vector{Vector{T}}
+     Jc  :: Vector{Matrix{T}}
+     hc  :: Vector{Vector{T}}
+     Jt  :: Vector{Matrix{T}}
+     ht  :: Vector{Vector{T}}
+     FG  :: FactorGraph{T,F}
 end
-EPState{T}(N, Nx = N) where {T <: AbstractFloat} = EPState{T}(Matrix{T}(undef,Nx,Nx), zeros(T,Nx), Matrix{T}(undef,Nx,Nx), zeros(T,Nx),zeros(T,N), zeros(T,N), zeros(T,N), zeros(T,N), ones(T,N), ones(T,N))
 
-"""
-Output of EP algorithm
+eye(::Type{T}, n::Integer) where T = Matrix(T(1)*I, n, n)
 
-"""
-struct EPOut{T<:AbstractFloat}
-    av::Vector{T}
-    va::Vector{T}
-    μ::Vector{T}
-    s::Vector{T}
-    converged::Symbol
-    state::EPState{T}
+function EPState(FG::FactorGraph{T,F}) where {T <: Real, F <: Factor}
+    d(a) = length(FG.idx[a])
+    M,N = length(FG.idx), FG.N
+    return EPState{T,F}(eye(T, N), zeros(T, N),
+                    [eye(T, d(a)) for a=1:M], [zeros(T, d(a)) for a=1:M],
+                    [eye(T, d(a)) for a=1:M], [zeros(T, d(a)) for a=1:M],
+                    [eye(T, d(a)) for a=1:M], [zeros(T, d(a)) for a=1:M],
+                    FG)
 end
-function EPOut(s, converged::Symbol) where {T <: AbstractFloat}
-    converged ∈ (:converged,:unconverged) || error("$converged is not a valid symbol")
-    return EPOut(s.av,s.va, s.μ,s.s,converged,s)
+
+function update!(state::EPState{T}, ψ::Factor, a::Integer, ρ::T, epsvar::T = zero(T)) where {T <: Real}
+    @extract state : Σ μ J h Jc hc Jt ht FG
+    ∂a = FG.idx[a]
+    # J, h are cavity coeffs
+    hca, Jca = hc[a], Jc[a]
+    Jca .= (Σ[∂a, ∂a]+epsvar*I)\I .- J[a]
+    hca .= Σ[∂a, ∂a]\μ[∂a] .- h[a]
+    # Jta, hta are moments
+    hta, Jta = ht[a], Jt[a]
+    moments!(hta, Jta, ψ, hca, Jca)
+    # Jta, hta are now total exponents
+    Jta .= (Jta+epsvar*I)\I
+    hta .= Jta*hta
+    # Jta - Jc, hta - hc are new approximated factors
+    ε = max(update!(J[a], Jta .- Jca, ρ), update!(h[a], hta .- hca, ρ))
+    # learn params
+    learn!(ψ, hca, Jca)
+    return ε
 end
 
 """
-    expectation_propagation(H::Vector{Term{T}}, P0::Vector{Prior}, F::AbstractMatrix{T} = zeros(0,length(P0)), d::Vector{T} = zeros(size(F,1));
-        maxiter::Int = 2000,
-        callback = (x...)->nothing,
-        # state::EPState{T} = EPState{T}(sum(size(F)), size(F)[2]),
-        damp::T = 0.9,
-        epsconv::T = 1e-6,
-        maxvar::T = 1e50,
-        minvar::T = 1e-50,
-        inverter::Function = inv) where {T <: Real, P <: Prior}
+        expectation_propagation(FG::FactorGraph;
+                                maxiter::Int = 2000,
+                                callback = (state,iter,ε)->nothing,
+                                damp::T = 0.9,
+                                epsconv::T = 1e-6,
+                                maxvar::T = 1e50,
+                                minvar::T = 1e-50,
+                                state::EPState{T} = EPState{T}(FG),
+                                inverter::Function = inv) -> (state, converged, iter, ε)
 
 
 EP for approximate inference of
 
-``P(\\bf{x})=\\frac1Z exp(-\\frac12\\bf{x}' A \\bf{x} + \\bf{x'} \\bf{y}))×\\prod_i p_{i}(x_i)``
+``P(\\bf{x})=\\frac1Z \\prod_a ψ_{a}(x_a)``
 
 Arguments:
 
 * `A::Array{Term{T}}`: Gaussian Term (involving only x)
-* `P0::Array{Prior}`: Prior terms (involving x and y)
-* `F::AbstractMatrix{T}`: If included, the unknown becomes ``(\\bf{x},\\bf{y})^T`` and a term ``\\delta(F \\bf{x}+\\bf{d}-\\bf{y})`` is added.
+
+Optional Arguments:
+
+* `P::AbstractMatrix{Prior}`: Projector
+* `d::AbstractVector{T}`: Contant shift
 
 Optional named arguments:
 
 * `maxiter::Int = 2000`: maximum number of iterations
 * `callback = (x...)->nothing`: your own function to report progress, see [`ProgressReporter`](@ref)
 * `state::EPState{T} = EPState{T}(sum(size(F)), size(F)[2])`: If supplied, all internal state is updated here
-* `damp::T = 0.9`: damping parameter
+* `damp::T = 0.0`: damping parameter
 * `epsconv::T = 1e-6`: convergence criterion
 * `maxvar::T = 1e50`: maximum variance
 * `minvar::T = 1e-50`: minimum variance
@@ -80,82 +91,62 @@ Optional named arguments:
 # Example
 
 ```jldoctest
-julia> t=Term(zeros(2,2),zeros(2),1.0)
-Term{Float64}([0.0 0.0; 0.0 0.0], [0.0, 0.0], 0.0, 1.0, 0.0, 0)
-
-julia> P=[IntervalPrior(i...) for i in [(0,1),(0,1),(-2,2)]]
-3-element Array{IntervalPrior{Int64},1}:
- IntervalPrior{Int64}(0, 1)
- IntervalPrior{Int64}(0, 1)
- IntervalPrior{Int64}(-2, 2)
+julia> FG=FactorGraph([FactorInterval(a,b) for (a,b) in [(0,1),(0,1),(-2,2)]], [[i] for i=1:3], [1.0 -1.0 -1.0])
+FactorGraph(Factor[FactorInterval{Int64}(0, 1), FactorInterval{Int64}(0, 1), FactorInterval{Int64}(-2, 2)], Array{Int64,1}[[1], [2], [3]], 3)
 
-julia> F=[1.0 -1.0];
+julia> using LinearAlgebra
 
-julia> res = expectation_propagation([t], P, F)
-GaussianEP.EPOut{Float64}([0.499997, 0.499997, 3.66527e-15], [0.083325, 0.083325, 0.204301], [0.489862, 0.489862, 3.66599e-15], [334.018, 334.018, 0.204341], :converged, EPState{Float64}([9.79055 -0.00299477; -0.00299477 9.79055], [0.0, 0.0], [0.102139 3.12427e-5; 3.12427e-5 0.102139], [0.489862, 0.489862], [0.499997, 0.499997, 3.66527e-15], [0.083325, 0.083325, 0.204301], [0.490876, 0.490876, -1.86785e-17], [0.489862, 0.489862, 3.66599e-15], [0.100288, 0.100288, 403.599], [334.018, 334.018, 0.204341]))
+julia> res = expectation_propagation(FG)
+(EPState{Float64}([0.0833329 1.00114e-6 0.0833319; 1.00114e-6 0.0833329 -0.0833319; 0.0833319 -0.0833319 0.166664], [0.499994, 0.499994, 1.39058e-13], Array{Float64,2}[[11.9999], [11.9999], [0.00014416]], Array{Float64,1}[[5.99988], [5.99988], [-1.14443e-13]], Array{Float64,2}[[1.0], [1.0], [1.0]], Array{Float64,1}[[0.0], [0.0], [0.0]], FactorGraph(Factor[FactorInterval{Int64}(0, 1), FactorInterval{Int64}(0, 1), FactorInterval{Int64}(-2, 2)], Array{Int64,1}[[1], [2], [3]], 3)), :converged, 162, 9.829257408000558e-7)
 ```
+
+Note on subspace restriction
+
+P(x) ∝ ∫dz δ(x-Pz-d) ∏ₐψₐ(xₐ)
+   x = Pz + d
+Q(x) ∝ ∏ₐϕₐ(xₐ)
+     ∝ exp(-½ xᵀAx + xᵀy)
+     ∝ ∫dz δ(x-Pz-d) Q(z)
+Q(z) ∝ exp(-½ (Pz+d)ᵀA(Pz+d) + (Pz-d)ᵀy)
+     ∝ exp(-½ zᵀPᵀAPz - zᵀPᵀAd -½dᵀAdᵀ + (zᵀPᵀ-dᵀ)y)
+     ∝ exp(-½ zᵀPᵀAPz + zᵀ(Pᵀ(y - Ad))
+Σz = (PᵀAP)⁻¹
+μz = (PᵀAP)⁻¹Pᵀ(y-Ad)
+Σx = P(PᵀAP)⁻¹Pᵀ
+μx = P*Σz + d
+= P((PᵀAP)⁻¹Pᵀ(y-Ad))+d
+= Σx(y-Ad)+d
 """
-function expectation_propagation(H::Vector{Term{T}}, P0::Vector{P}, F::AbstractMatrix{T} = zeros(T,0,length(P0)), d::AbstractVector{T} = zeros(T,size(F,1));
-                     maxiter::Int = 2000,
-                     callback = (x...)->nothing,
-                     state::EPState{T} = EPState{T}(sum(size(F)), size(F)[2]),
-                     damp::T = 0.9,
-                     epsconv::T = 1e-6,
-                     maxvar::T = 1e50,
-                     minvar::T = 1e-50,
-                     inverter::Function = inv) where {T <: Real, P <: Prior}
-    @extract state A y Σ v av va a μ b s
-    Ny,Nx = size(F)
-    N = Nx + Ny
-    @assert size(P0,1) == N
-    Fp = copy(F')
+function expectation_propagation(FG::FactorGraph{T,F};
+                                 maxiter::Integer = 2000,
+                                 callback = (x...)->nothing,
+                                 damp::T = zero(T),
+                                 epsconv::T = 1e-6,
+                                 inverter = inv,
+                                 epsvar::T = zero(T),
+                                 state::EPState{T} = EPState(FG)) where {F<:Factor, T<:Real}
+
+    @extract state : Σ μ J h
+    N, M = FG.N, length(FG.factors)
+    A, y = zeros(N,N), zeros(N)
+    ε = 0.0
     for iter = 1:maxiter
-        sum!(A,y,H)
-        Δμ, Δs, Δav, Δva = 0.0, 0.0, 0.0, 0.0
-        A .+= Diagonal(1 ./ b[1:Nx]) .+ Fp * Diagonal(1 ./ b[Nx+1:end]) * F
-        Σ .= inverter(A)
-        v .= Σ * (y .+ a[1:Nx] ./ b[1:Nx] .+ Fp * ((a[Nx+1:end]-d) ./ b[Nx+1:end]))
-        for i in 1:N
-            if i <= Nx
-                ss = clamp(Σ[i,i], minvar, maxvar)
-                vv = v[i]
-            else
-                x = Fp[:, i-Nx]
-                ss = clamp(dot(x, Σ*x), minvar, maxvar)
-                vv = dot(x, v) + d[i-Nx]
-            end
-
-            if ss < b[i]
-                Δs = max(Δs, update_err!(s, i, clamp(1/(1/ss - 1/b[i]), minvar, maxvar)))
-                Δμ = max(Δμ, update_err!(μ, i, s[i] * (vv/ss - a[i]/b[i])))
-            else
-                ss == b[i] && @warn "infinite var, ss = $ss"
-                Δs = max(Δs, update_err!(s, i, maxvar))
-                Δμ = max(Δμ, update_err!(μ, i, 0))
-            end
-            tav, tva = moments(P0[i], μ[i], sqrt(s[i]));
-            Δav = max(Δav, update_err!(av, i, tav))
-            Δva = max(Δva, update_err!(va, i, tva))
-            (isnan(av[i]) || isnan(va[i])) && @warn "avnew = $(av[i]) varnew = $(va[i])"
-
-            new_b = clamp(1/(1/va[i] - 1/s[i]), minvar, maxvar)
-            new_a = av[i] + new_b * (av[i] - μ[i])/s[i]
-            a[i] = damp * a[i] + (1 - damp) * new_a
-            b[i] = damp * b[i] + (1 - damp) * new_b
+        A .= 0.0
+        y .= 0.0
+        for a in 1:M
+            ∂a = FG.idx[a]
+            A[∂a, ∂a] .+= J[a]
+            y[∂a] .+= h[a]
         end
-
-        # learn prior's params
-        for i in randperm(N)
-            gradient(P0[i], μ[i], sqrt(s[i]));
-        end
-        # learn β params
-        for i in 1:length(H)
-            updateβ(H[i], av[1:Nx])
-        end
-        callback(av,Δav,epsconv,maxiter,H,P0)
-        if Δav < epsconv
-            return EPOut(state, :converged)
+        Σ .= FG.P*inverter(FG.P'*A*FG.P)*FG.P'
+        μ .= Σ * (y .- A*FG.d) .+ FG.d
+        ε = 0.0
+        for a=1:M
+            ε = max(ε, update!(state, FG.factors[a], a, damp, epsvar))
         end
+        callback(state,iter,ε) != nothing && break
+        ε < epsconv && return (state, :converged, iter, ε)
     end
-    return EPOut(state, :unconverged)
+    return (state, :unconverged, maxiter, ε)
 end
+