diff --git a/bench/nonbacktracking.jl b/bench/nonbacktracking.jl new file mode 100644 index 000000000..dc8847648 --- /dev/null +++ b/bench/nonbacktracking.jl @@ -0,0 +1,48 @@ +using LightGraphs + +sizesnbm1 = Int64[@allocated non_backtracking_matrix(CycleGraph(2^i))for i in 4:10] +sizesnbm2 = Int64[@allocated Nonbacktracking(CycleGraph(2^i)) for i in 4:10] + + +println("Relative Sizes:\n $(float(sizesnbm1./sizesnbm2))") + +macro storetime(name, expression) + ex = quote + val = $expression; + timeinfo[$name] = @elapsed $expression; + val + end + return ex +end + +function bench(g) + nbt = @storetime :Construction_Nonbacktracking Nonbacktracking(g) + x = ones(Float64, size(nbt)[1]) + info("Cycle with $n vertices has nbt in R^$(size(nbt))") + + B, nmap = @storetime :Construction_Dense non_backtracking_matrix(g) + y = @storetime :Multiplication_Nonbacktracking nbt*x + z = @storetime :Multiplication_Dense B*x; + S = @storetime :Construction_DS sparse(B) + z = @storetime :Multiplication_Sparse z = S*x; + Sp = @storetime :Construction_Sparse sparse(nbt) + z = @storetime :Multiplication_Sparse Sp*x +end + +function report(timeinfo) + info("Times") + println("Function\t Constructors\t Multiplication") + println("Dense \t $(timeinfo[:Construction_Dense])\t $(timeinfo[:Multiplication_Dense])") + println("Sparse \t $(timeinfo[:Construction_Sparse])\t $(timeinfo[:Multiplication_Sparse])") + println("Implicit\t $(timeinfo[:Construction_Nonbacktracking])\t $(timeinfo[:Multiplication_Nonbacktracking])") + info("Implicit Multiplication is $(timeinfo[:Multiplication_Dense]/timeinfo[:Multiplication_Nonbacktracking]) faster than dense.") + info("Sparse Multiplication is $(timeinfo[:Multiplication_Nonbacktracking]/timeinfo[:Multiplication_Sparse]) faster than implicit.") + info("Direct Sparse Construction took $(timeinfo[:Construction_Sparse]) + Dense to Sparse took: $(timeinfo[:Construction_DS])") +end + +n = 2^13 +C = CycleGraph(n) +timeinfo = Dict{Symbol, Float64}() +bench(C) +report(timeinfo) diff --git a/src/LightGraphs.jl b/src/LightGraphs.jl index 96f10bb5e..c01715b79 100644 --- a/src/LightGraphs.jl +++ b/src/LightGraphs.jl @@ -82,6 +82,8 @@ indegree_centrality, outdegree_centrality, katz_centrality, pagerank, # spectral adjacency_matrix,laplacian_matrix, adjacency_spectrum, laplacian_spectrum, CombinatorialAdjacency, non_backtracking_matrix, incidence_matrix, +nonbacktrack_embedding, Nonbacktracking, +contract, # astar a_star, diff --git a/src/community/detection.jl b/src/community/detection.jl index 86600ed78..d955c338b 100644 --- a/src/community/detection.jl +++ b/src/community/detection.jl @@ -9,13 +9,13 @@ return : array containing vertex assignments """ function community_detection_nback(g::Graph, k::Int) #TODO insert check on connected_components - ϕ = nonbacktrack_embedding(g, k) + ϕ = real(nonbacktrack_embedding(g, k)) if k==2 c = community_detection_threshold(g, ϕ[1,:]) else c = kmeans(ϕ, k).assignments end - c + return c end function community_detection_threshold(g::SimpleGraph, coords::AbstractArray) @@ -30,7 +30,6 @@ function community_detection_threshold(g::SimpleGraph, coords::AbstractArray) end - """ Spectral embedding of the non-backtracking matrix of `g` (see [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short)). @@ -38,23 +37,22 @@ end `k`: number of dimensions in which to embed return : a matrix ϕ where ϕ[:,i] are the coordinates for vertex i. + +Note does not explicitly construct the `non_backtracking_matrix`. +See `Nonbacktracking` for details. + """ function nonbacktrack_embedding(g::Graph, k::Int) - B, edgeid = non_backtracking_matrix(g) - λ,eigv,_ = eigs(B, nev=k+1) - ϕ = zeros(Float64, k-1, nv(g)) + B = Nonbacktracking(g) + λ,eigv,conv = eigs(B, nev=k+1, v0=ones(Float64, B.m)) + ϕ = zeros(Complex64, nv(g), k-1) # TODO decide what to do with the stationary distribution ϕ[:,1] # this code just throws it away in favor of eigv[:,2:k+1]. # we might also use the degree distribution to scale these vectors as is # common with the laplacian/adjacency methods. for n=1:k-1 v= eigv[:,n+1] - for i=1:nv(g) - for j in neighbors(g, i) - u = edgeid[Edge(j,i)] - ϕ[n,i] += v[u] - end - end + ϕ[:,n] = contract(B, v) end - return ϕ + return ϕ' end diff --git a/src/spectral.jl b/src/spectral.jl index af082f932..b1a0ae2ce 100644 --- a/src/spectral.jl +++ b/src/spectral.jl @@ -49,45 +49,6 @@ function adjacency_matrix(g::SimpleGraph, dir::Symbol=:out, T::DataType=Int) end -""" -Given two oriented edges i->j and k->l in g, the -non-backtraking matrix B is defined as - -B[i->j, k->l] = δ(j,k)* (1 - δ(i,l)) - -returns a matrix B, and an edgemap storing the oriented edges' positions in B -""" -function non_backtracking_matrix(g::SimpleGraph) - # idedgemap = Dict{Int, Edge}() - edgeidmap = Dict{Edge, Int}() - m = 0 - for e in edges(g) - m += 1 - edgeidmap[e] = m - end - - if !is_directed(g) - for e in edges(g) - m += 1 - edgeidmap[reverse(e)] = m - end - end - - B = zeros(Float64, m, m) - - for (e,u) in edgeidmap - i, j = src(e), dst(e) - for k in in_neighbors(g,i) - k == j && continue - v = edgeidmap[Edge(k, i)] - B[v, u] = 1 - end - end - - return B, edgeidmap -end - - """Returns a sparse [Laplacian matrix](https://en.wikipedia.org/wiki/Laplacian_matrix) for a graph `g`, indexed by `[src, dst]` vertices. For undirected graphs, `dir` defaults to `:out`; for directed graphs, `dir` defaults to `:both`. `T` @@ -126,6 +87,7 @@ adjacency_spectrum(g::DiGraph, dir::Symbol=:both, T::DataType=Int) = eigvals(ful # GraphMatrices integration +# CombinatorialAdjacency(g) returns a type that supports iterative linear solvers and eigenvector solvers. @require GraphMatrices begin function CombinatorialAdjacency(g::Graph) @@ -167,3 +129,151 @@ function incidence_matrix(g::SimpleGraph, T::DataType=Int) spmx = SparseMatrixCSC(n_v,n_e,colpt,rowval,nzval) return spmx end + +""" +Given two oriented edges i->j and k->l in g, the +non-backtraking matrix B is defined as + +B[i->j, k->l] = δ(j,k)* (1 - δ(i,l)) + +returns a matrix B, and an edgemap storing the oriented edges' positions in B +""" +function non_backtracking_matrix(g::SimpleGraph) + # idedgemap = Dict{Int, Edge}() + edgeidmap = Dict{Edge, Int}() + m = 0 + for e in edges(g) + m += 1 + edgeidmap[e] = m + end + + if !is_directed(g) + for e in edges(g) + m += 1 + edgeidmap[reverse(e)] = m + end + end + + B = zeros(Float64, m, m) + + for (e,u) in edgeidmap + i, j = src(e), dst(e) + for k in in_neighbors(g,i) + k == j && continue + v = edgeidmap[Edge(k, i)] + B[v, u] = 1 + end + end + + return B, edgeidmap +end + +"""Nonbacktracking: a compact representation of the nonbacktracking operator + + g: the underlying graph + edgeidmap: the association between oriented edges and index into the NBT matrix + +The Nonbacktracking operator can be used for community detection. +This representation is compact in that it uses only ne(g) additional storage +and provides an implicit representation of the matrix B_g defined below. + +Given two oriented edges i->j and k->l in g, the +non-backtraking matrix B is defined as + +B[i->j, k->l] = δ(j,k)* (1 - δ(i,l)) + +This type is in the style of GraphMatrices.jl and supports the necessary operations +for computed eigenvectors and conducting linear solves. + +Additionally the contract!(vertexspace, nbt, edgespace) method takes vectors represented in +the domain of B and represents them in the domain of the adjacency matrix of g. +""" +type Nonbacktracking{G} + g::G + edgeidmap::Dict{Edge,Int} + m::Int +end + +function Nonbacktracking(g::SimpleGraph) + edgeidmap = Dict{Edge, Int}() + m = 0 + for e in edges(g) + m += 1 + edgeidmap[e] = m + end + if !is_directed(g) + for e in edges(g) + m += 1 + edgeidmap[reverse(e)] = m + end + end + return Nonbacktracking(g, edgeidmap, m) +end + +size(nbt::Nonbacktracking) = (nbt.m,nbt.m) +eltype(nbt::Nonbacktracking) = Float64 +issym(nbt::Nonbacktracking) = false + +function *{G, T<:Number}(nbt::Nonbacktracking{G}, x::Vector{T}) + length(x) == nbt.m || error("dimension mismatch") + y = zeros(T, length(x)) + for (e,u) in nbt.edgeidmap + i, j = src(e), dst(e) + for k in in_neighbors(nbt.g,i) + k == j && continue + v = nbt.edgeidmap[Edge(k, i)] + y[v] += x[u] + end + end + return y +end + +function coo_sparse(nbt::Nonbacktracking) + m = nbt.m + #= I,J = zeros(Int, m), zeros(Int, m) =# + I,J = zeros(Int, 0), zeros(Int, 0) + for (e,u) in nbt.edgeidmap + i, j = src(e), dst(e) + for k in in_neighbors(nbt.g,i) + k == j && continue + v = nbt.edgeidmap[Edge(k, i)] + #= J[u] = v =# + #= I[u] = u =# + push!(I, v) + push!(J, u) + end + end + return I,J,1.0 +end + +sparse(nbt::Nonbacktracking) = sparse(coo_sparse(nbt)..., nbt.m,nbt.m) + +function *{G, T<:Number}(nbt::Nonbacktracking{G}, x::AbstractMatrix{T}) + y = zeros(x) + for i in 1:nbt.m + y[:,i] = nbt * x[:,i] + end + return y +end + +"""contract!(vertexspace, nbt, edgespace) in place version of +contract(nbt, edgespace). modifies first argument +""" +function contract!(vertexspace::Vector, nbt::Nonbacktracking, edgespace::Vector) + for i=1:nv(nbt.g) + for j in neighbors(nbt.g, i) + u = nbt.edgeidmap[Edge(j,i)] + vertexspace[i] += edgespace[u] + end + end +end + +"""contract(nbt, edgespace) +Integrates out the edges by summing over the edges incident to each vertex. +""" +function contract(nbt::Nonbacktracking, edgespace::Vector) + y = zeros(eltype(edgespace), nv(nbt.g)) + contract!(y,nbt,edgespace) + return y +end + diff --git a/test/community/detection.jl b/test/community/detection.jl index 95b2a9af3..e2e9f3efb 100644 --- a/test/community/detection.jl +++ b/test/community/detection.jl @@ -1,3 +1,55 @@ +""" Spectral embedding of the non-backtracking matrix of `g` +(see [Krzakala et al.](http://www.pnas.org/content/110/52/20935.short)). + +`g`: imput Graph +`k`: number of dimensions in which to embed + +return : a matrix ϕ where ϕ[:,i] are the coordinates for vertex i. +""" +function nonbacktrack_embedding_dense(g::Graph, k::Int) + B, edgeid = non_backtracking_matrix(g) + λ,eigv,conv = eigs(B, nev=k+1, v0=ones(Float64, size(B,1))) + ϕ = zeros(Complex64, k-1, nv(g)) + # TODO decide what to do with the stationary distribution ϕ[:,1] + # this code just throws it away in favor of eigv[:,2:k+1]. + # we might also use the degree distribution to scale these vectors as is + # common with the laplacian/adjacency methods. + for n=1:k-1 + v= eigv[:,n+1] + for i=1:nv(g) + for j in neighbors(g, i) + u = edgeid[Edge(j,i)] + ϕ[n,i] += v[u] + end + end + end + return ϕ +end + +n = 10; k = 5 +pg = PathGraph(n) +ϕ1 = nonbacktrack_embedding(pg, k)' + +nbt = Nonbacktracking(pg) +B, emap = non_backtracking_matrix(pg) +Bs = sparse(nbt) +@test sparse(B) == Bs + +# check that matvec works +x = ones(Float64, nbt.m) +y = nbt * x +z = B * x +@test norm(y-z) < 1e-8 + +#check that matmat works and full(nbt) == B +@test norm(nbt*eye(nbt.m) - B) < 1e-8 + +#check that we can use the implicit matvec in nonbacktrack_embedding +@test size(y) == size(x) +ϕ2 = nonbacktrack_embedding_dense(pg, k)' +@test size(ϕ2) == size(ϕ1) + +#check that this recovers communities in the path of cliques n=10 g10 = CompleteGraph(n) z = copy(g10) @@ -13,3 +65,4 @@ for k=2:5 end end end +