Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Multivariate factors #9

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Julia GaussianEP package Copyright (C) 2019 Alfredo Braunstein, Anna-Paola Muntoni, Andrea Pagnani and Mirko Pieropan (the "Authors"). All Rights Reserved.
Julia GaussianEP package Copyright (C) 2019 Alfredo Braunstein, Giovanni Catania, Anna-Paola Muntoni, Andrea Pagnani and Mirko Pieropan (the "Authors"). All Rights Reserved.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the Licence, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

The file COPYING contains a copy of the GNU General Public License, also available at [https://www.gnu.org/licenses/gpl-3.0.en.html].
The file COPYING contains a copy of the GNU General Public License, also available at [https://www.gnu.org/licenses/gpl-3.0.en.html].
31 changes: 31 additions & 0 deletions src/Factor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export Factor


abstract type Factor end


"""
moments!(av, va, p0::T, h, J) where T <:Factor -> (mean, variance)

input: ``p_0, h, J``

output: mean and variance of

`` p(x) ∝ p_0(x) exp(-½⋅J⋅x² + h⋅x)``
"""
function moments!(av, va, p0::T, h, J) where T <: Factor
error("undefined moment calculation, assuming uniform prior")
return J\h,J\I
end

"""

learn!(p0::T, h, J) -> nothing

update parameters with a single learning gradient step (learning rate is stored in p0)
"""
function learn!(p0::T, h, J) where T <: Factor
#by default, do nothing
return
end

14 changes: 14 additions & 0 deletions src/FactorGraph.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
struct FactorGraph{T<:Real,F<:Factor}
factors::Vector{F}
idx::Vector{Vector{Int}}
N::Int
P::AbstractMatrix{T}
d::AbstractVector{T}
end


FactorGraph(factors::Vector{F}, idx::Vector{Vector{Int}}, N::Int) where {F<:Factor} = FactorGraph(factors,idx,N,Diagonal(ones(N)),zeros(N))

FactorGraph(factors::Vector{F}, idx::Vector{Vector{Int}}, S::AbstractMatrix{T}, b::AbstractVector{T} = zeros(size(S,1))) where {T<:Real, F<:Factor} = FactorGraph(factors, idx, size(S,2), nullspace(Matrix(S)), S\b)


14 changes: 8 additions & 6 deletions src/GaussianEP.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
module GaussianEP

export expectation_propagation, Term, EPState, EPOut
export Prior, IntervalPrior, SpikeSlabPrior, BinaryPrior, GaussianPrior, PosteriorPrior, QuadraturePrior, AutoPrior, ThetaPrior

using ExtractMacro, SpecialFunctions, LinearAlgebra

include("Term.jl")
include("priors.jl")
include("expectation_propagation.jl")
export FactorGraph, FactorGauss, EPState, expectation_propagation

include("ProgressReporter.jl")
include("Factor.jl")
include("FactorGraph.jl")
include("expectation_propagation.jl")
include("univariate.jl")
include("multivariate.jl")


end # end module
41 changes: 0 additions & 41 deletions src/Term.jl

This file was deleted.

225 changes: 108 additions & 117 deletions src/expectation_propagation.jl
Original file line number Diff line number Diff line change
@@ -1,77 +1,88 @@
using Random, LinearAlgebra, ExtractMacro

```@meta
CurrentModule = GaussianEP
```

function update_err!(dst, i, val)
r=abs(val - dst[i])
dst[i] = val
function update!(old::Array{T}, new::Array{T}, ρ::T = zero(T))::T where {T<:Real}
r = maximum(abs, new - old)
old .*= ρ
old .+= (1 - ρ) * new
return r
end

"""
Instantaneous state of an expectation propagation run.
"""
struct EPState{T<:AbstractFloat}
A::Matrix{T}
y::Vector{T}
Σ::Matrix{T}
v::Vector{T}
av::Vector{T}
va::Vector{T}
a::Vector{T}
μ::Vector{T}
b::Vector{T}
s::Vector{T}
struct EPState{T <: Real, F <: Factor}
Σ :: Matrix{T}
μ :: Vector{T}
J :: Vector{Matrix{T}}
h :: Vector{Vector{T}}
Jc :: Vector{Matrix{T}}
hc :: Vector{Vector{T}}
Jt :: Vector{Matrix{T}}
ht :: Vector{Vector{T}}
FG :: FactorGraph{T,F}
end
EPState{T}(N, Nx = N) where {T <: AbstractFloat} = EPState{T}(Matrix{T}(undef,Nx,Nx), zeros(T,Nx), Matrix{T}(undef,Nx,Nx), zeros(T,Nx),zeros(T,N), zeros(T,N), zeros(T,N), zeros(T,N), ones(T,N), ones(T,N))

"""
Output of EP algorithm
eye(::Type{T}, n::Integer) where T = Matrix(T(1)*I, n, n)

"""
struct EPOut{T<:AbstractFloat}
av::Vector{T}
va::Vector{T}
μ::Vector{T}
s::Vector{T}
converged::Symbol
state::EPState{T}
function EPState(FG::FactorGraph{T,F}) where {T <: Real, F <: Factor}
d(a) = length(FG.idx[a])
M,N = length(FG.idx), FG.N
return EPState{T,F}(eye(T, N), zeros(T, N),
[eye(T, d(a)) for a=1:M], [zeros(T, d(a)) for a=1:M],
[eye(T, d(a)) for a=1:M], [zeros(T, d(a)) for a=1:M],
[eye(T, d(a)) for a=1:M], [zeros(T, d(a)) for a=1:M],
FG)
end
function EPOut(s, converged::Symbol) where {T <: AbstractFloat}
converged ∈ (:converged,:unconverged) || error("$converged is not a valid symbol")
return EPOut(s.av,s.va, s.μ,s.s,converged,s)

function update!(state::EPState{T}, ψ::Factor, a::Integer, ρ::T, epsvar::T = zero(T)) where {T <: Real}
@extract state : Σ μ J h Jc hc Jt ht FG
∂a = FG.idx[a]
# J, h are cavity coeffs
hca, Jca = hc[a], Jc[a]
Jca .= (Σ[∂a, ∂a]+epsvar*I)\I .- J[a]
hca .= Σ[∂a, ∂a]\μ[∂a] .- h[a]
# Jta, hta are moments
hta, Jta = ht[a], Jt[a]
moments!(hta, Jta, ψ, hca, Jca)
# Jta, hta are now total exponents
Jta .= (Jta+epsvar*I)\I
hta .= Jta*hta
# Jta - Jc, hta - hc are new approximated factors
ε = max(update!(J[a], Jta .- Jca, ρ), update!(h[a], hta .- hca, ρ))
# learn params
learn!(ψ, hca, Jca)
return ε
end

"""
expectation_propagation(H::Vector{Term{T}}, P0::Vector{Prior}, F::AbstractMatrix{T} = zeros(0,length(P0)), d::Vector{T} = zeros(size(F,1));
maxiter::Int = 2000,
callback = (x...)->nothing,
# state::EPState{T} = EPState{T}(sum(size(F)), size(F)[2]),
damp::T = 0.9,
epsconv::T = 1e-6,
maxvar::T = 1e50,
minvar::T = 1e-50,
inverter::Function = inv) where {T <: Real, P <: Prior}
expectation_propagation(FG::FactorGraph;
maxiter::Int = 2000,
callback = (state,iter,ε)->nothing,
damp::T = 0.9,
epsconv::T = 1e-6,
maxvar::T = 1e50,
minvar::T = 1e-50,
state::EPState{T} = EPState{T}(FG),
inverter::Function = inv) -> (state, converged, iter, ε)


EP for approximate inference of

``P(\\bf{x})=\\frac1Z exp(-\\frac12\\bf{x}' A \\bf{x} + \\bf{x'} \\bf{y}))×\\prod_i p_{i}(x_i)``
``P(\\bf{x})=\\frac1Z \\prod_a ψ_{a}(x_a)``

Arguments:

* `A::Array{Term{T}}`: Gaussian Term (involving only x)
* `P0::Array{Prior}`: Prior terms (involving x and y)
* `F::AbstractMatrix{T}`: If included, the unknown becomes ``(\\bf{x},\\bf{y})^T`` and a term ``\\delta(F \\bf{x}+\\bf{d}-\\bf{y})`` is added.

Optional Arguments:

* `P::AbstractMatrix{Prior}`: Projector
* `d::AbstractVector{T}`: Contant shift

Optional named arguments:

* `maxiter::Int = 2000`: maximum number of iterations
* `callback = (x...)->nothing`: your own function to report progress, see [`ProgressReporter`](@ref)
* `state::EPState{T} = EPState{T}(sum(size(F)), size(F)[2])`: If supplied, all internal state is updated here
* `damp::T = 0.9`: damping parameter
* `damp::T = 0.0`: damping parameter
* `epsconv::T = 1e-6`: convergence criterion
* `maxvar::T = 1e50`: maximum variance
* `minvar::T = 1e-50`: minimum variance
Expand All @@ -80,82 +91,62 @@ Optional named arguments:
# Example

```jldoctest
julia> t=Term(zeros(2,2),zeros(2),1.0)
Term{Float64}([0.0 0.0; 0.0 0.0], [0.0, 0.0], 0.0, 1.0, 0.0, 0)

julia> P=[IntervalPrior(i...) for i in [(0,1),(0,1),(-2,2)]]
3-element Array{IntervalPrior{Int64},1}:
IntervalPrior{Int64}(0, 1)
IntervalPrior{Int64}(0, 1)
IntervalPrior{Int64}(-2, 2)
julia> FG=FactorGraph([FactorInterval(a,b) for (a,b) in [(0,1),(0,1),(-2,2)]], [[i] for i=1:3], [1.0 -1.0 -1.0])
FactorGraph(Factor[FactorInterval{Int64}(0, 1), FactorInterval{Int64}(0, 1), FactorInterval{Int64}(-2, 2)], Array{Int64,1}[[1], [2], [3]], 3)

julia> F=[1.0 -1.0];
julia> using LinearAlgebra

julia> res = expectation_propagation([t], P, F)
GaussianEP.EPOut{Float64}([0.499997, 0.499997, 3.66527e-15], [0.083325, 0.083325, 0.204301], [0.489862, 0.489862, 3.66599e-15], [334.018, 334.018, 0.204341], :converged, EPState{Float64}([9.79055 -0.00299477; -0.00299477 9.79055], [0.0, 0.0], [0.102139 3.12427e-5; 3.12427e-5 0.102139], [0.489862, 0.489862], [0.499997, 0.499997, 3.66527e-15], [0.083325, 0.083325, 0.204301], [0.490876, 0.490876, -1.86785e-17], [0.489862, 0.489862, 3.66599e-15], [0.100288, 0.100288, 403.599], [334.018, 334.018, 0.204341]))
julia> res = expectation_propagation(FG)
(EPState{Float64}([0.0833329 1.00114e-6 0.0833319; 1.00114e-6 0.0833329 -0.0833319; 0.0833319 -0.0833319 0.166664], [0.499994, 0.499994, 1.39058e-13], Array{Float64,2}[[11.9999], [11.9999], [0.00014416]], Array{Float64,1}[[5.99988], [5.99988], [-1.14443e-13]], Array{Float64,2}[[1.0], [1.0], [1.0]], Array{Float64,1}[[0.0], [0.0], [0.0]], FactorGraph(Factor[FactorInterval{Int64}(0, 1), FactorInterval{Int64}(0, 1), FactorInterval{Int64}(-2, 2)], Array{Int64,1}[[1], [2], [3]], 3)), :converged, 162, 9.829257408000558e-7)
```

Note on subspace restriction

P(x) ∝ ∫dz δ(x-Pz-d) ∏ₐψₐ(xₐ)
x = Pz + d
Q(x) ∝ ∏ₐϕₐ(xₐ)
∝ exp(-½ xᵀAx + xᵀy)
∝ ∫dz δ(x-Pz-d) Q(z)
Q(z) ∝ exp(-½ (Pz+d)ᵀA(Pz+d) + (Pz-d)ᵀy)
∝ exp(-½ zᵀPᵀAPz - zᵀPᵀAd -½dᵀAdᵀ + (zᵀPᵀ-dᵀ)y)
∝ exp(-½ zᵀPᵀAPz + zᵀ(Pᵀ(y - Ad))
Σz = (PᵀAP)⁻¹
μz = (PᵀAP)⁻¹Pᵀ(y-Ad)
Σx = P(PᵀAP)⁻¹Pᵀ
μx = P*Σz + d
= P((PᵀAP)⁻¹Pᵀ(y-Ad))+d
= Σx(y-Ad)+d
"""
function expectation_propagation(H::Vector{Term{T}}, P0::Vector{P}, F::AbstractMatrix{T} = zeros(T,0,length(P0)), d::AbstractVector{T} = zeros(T,size(F,1));
maxiter::Int = 2000,
callback = (x...)->nothing,
state::EPState{T} = EPState{T}(sum(size(F)), size(F)[2]),
damp::T = 0.9,
epsconv::T = 1e-6,
maxvar::T = 1e50,
minvar::T = 1e-50,
inverter::Function = inv) where {T <: Real, P <: Prior}
@extract state A y Σ v av va a μ b s
Ny,Nx = size(F)
N = Nx + Ny
@assert size(P0,1) == N
Fp = copy(F')
function expectation_propagation(FG::FactorGraph{T,F};
maxiter::Integer = 2000,
callback = (x...)->nothing,
damp::T = zero(T),
epsconv::T = 1e-6,
inverter = inv,
epsvar::T = zero(T),
state::EPState{T} = EPState(FG)) where {F<:Factor, T<:Real}

@extract state : Σ μ J h
N, M = FG.N, length(FG.factors)
A, y = zeros(N,N), zeros(N)
ε = 0.0
for iter = 1:maxiter
sum!(A,y,H)
Δμ, Δs, Δav, Δva = 0.0, 0.0, 0.0, 0.0
A .+= Diagonal(1 ./ b[1:Nx]) .+ Fp * Diagonal(1 ./ b[Nx+1:end]) * F
Σ .= inverter(A)
v .= Σ * (y .+ a[1:Nx] ./ b[1:Nx] .+ Fp * ((a[Nx+1:end]-d) ./ b[Nx+1:end]))
for i in 1:N
if i <= Nx
ss = clamp(Σ[i,i], minvar, maxvar)
vv = v[i]
else
x = Fp[:, i-Nx]
ss = clamp(dot(x, Σ*x), minvar, maxvar)
vv = dot(x, v) + d[i-Nx]
end

if ss < b[i]
Δs = max(Δs, update_err!(s, i, clamp(1/(1/ss - 1/b[i]), minvar, maxvar)))
Δμ = max(Δμ, update_err!(μ, i, s[i] * (vv/ss - a[i]/b[i])))
else
ss == b[i] && @warn "infinite var, ss = $ss"
Δs = max(Δs, update_err!(s, i, maxvar))
Δμ = max(Δμ, update_err!(μ, i, 0))
end
tav, tva = moments(P0[i], μ[i], sqrt(s[i]));
Δav = max(Δav, update_err!(av, i, tav))
Δva = max(Δva, update_err!(va, i, tva))
(isnan(av[i]) || isnan(va[i])) && @warn "avnew = $(av[i]) varnew = $(va[i])"

new_b = clamp(1/(1/va[i] - 1/s[i]), minvar, maxvar)
new_a = av[i] + new_b * (av[i] - μ[i])/s[i]
a[i] = damp * a[i] + (1 - damp) * new_a
b[i] = damp * b[i] + (1 - damp) * new_b
A .= 0.0
y .= 0.0
for a in 1:M
∂a = FG.idx[a]
A[∂a, ∂a] .+= J[a]
y[∂a] .+= h[a]
end

# learn prior's params
for i in randperm(N)
gradient(P0[i], μ[i], sqrt(s[i]));
end
# learn β params
for i in 1:length(H)
updateβ(H[i], av[1:Nx])
end
callback(av,Δav,epsconv,maxiter,H,P0)
if Δav < epsconv
return EPOut(state, :converged)
Σ .= FG.P*inverter(FG.P'*A*FG.P)*FG.P'
μ .= Σ * (y .- A*FG.d) .+ FG.d
ε = 0.0
for a=1:M
ε = max(ε, update!(state, FG.factors[a], a, damp, epsvar))
end
callback(state,iter,ε) != nothing && break
ε < epsconv && return (state, :converged, iter, ε)
end
return EPOut(state, :unconverged)
return (state, :unconverged, maxiter, ε)
end

Loading