Skip to content

Commit

Permalink
documentation and refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
pfgherardini committed Apr 4, 2018
1 parent be40317 commit 8515702
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 50 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Generated by roxygen2: do not edit by hand

export(build_graph)
export(complete_forceatlas2)
export(get_unsupervised_graph)
export(get_unsupervised_graph_from_files)
export(layout_forceatlas2)
import(Rcpp)
importFrom(igraph,E)
importFrom(igraph,V)
Expand Down
76 changes: 60 additions & 16 deletions R/forceatlas2.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,83 @@ adaptive_expand <- function(G, max.iter) {
}



layout_forceatlas2 <- function(G, ew_influence = 1, kgrav = 1, iter = 1000, prevent.overlap = FALSE, fixed = rep(FALSE, vcount(G)), stopping_tolerance = 0.001, barnes_hut = FALSE) {
#' ForceAtlas2 force-directed layout
#'
#' @param G The input \code{igraph} object. The graph must have an edge attribute named \code{weight}, representing edge weights
#' @param ew.influence Edge weight influence. The edge weights are set to \code{edge.weight ^ ew.influence} before the
#' calculation (see original ForceAtlas2 publication)
#' @param kgrav The gravity constant. Higher values will result in more compact graphs (see original ForceAtlas2 publication)
#' @param iter Maximum number of iterations. The algorithm will stop after this
#' many iterations, or when the average displacement of the nodes between two
#' iterations is less than the \code{stopping.tolerance} threshold (see below)
#' @param prevent.overlap Set this option to \code{TRUE} to prevent the nodes
#' from overlapping (see ForceAtlas2 description)
#' @param fixed A boolean vector of length equal to the number of nodes in the
#' graph which specifies which nodes, need to be held in a fixed
#' position. If this is \code{NULL} (default), no nodes are held fixed
#' @param stopping.tolerance The algorithm will stop after either \code{iter}
#' number of iterations, or when the average displacement of the nodes between
#' two iterations is less than this threshold
#' @param barnes.hut Whether to use the Barnes-Hut approximation for speeding up
#' the calculations when dealing with large graphs. This option is
#' automatically set to true when the graph has more than 2000 nodes
#' @return this function returns a list with three elements
#' \itemize{
#' \item{\code{lay}}: a numeric matrix with two columns containing the x and y coordinates of each node in the final
#' layout
#' \item{\code{avg.displ}}: a numeric vector containing the average displacement of the vertices at each iteration
#' \item{\code{max.displ}}: a numeric vector containing the maximum displacement between all the vertices after
#' each iteration
#' }
#' @references \url{http://gephi.github.io}
#' @references Jacomy M1, Venturini T, Heymann S, Bastian M. ForceAtlas2, a
#' continuous graph layout algorithm for handy network visualization designed
#' for the Gephi software. PLoS One. 2014 Jun 10;9(6):e98679. doi:
#' 10.1371/journal.pone.0098679
#' @export
#'
layout_forceatlas2 <- function(G, ew.influence = 1, kgrav = 1, iter = 1000, prevent.overlap = FALSE, fixed = NULL, stopping.tolerance = 0.001, barnes.hut = FALSE) {
if(vcount(G) >= 2000)
barnes_hut <- TRUE
barnes.hut <- TRUE
if(vcount(G) > 2000)
stopping_tolerance <- 0.01
stopping.tolerance <- 0.01
else if(vcount(G) > 800)
stopping_tolerance <- 0.005
stopping.tolerance <- 0.005
else
stopping_tolerance <- 0.001
stopping.tolerance <- 0.001

lay <- NULL
if(is.null(get.vertex.attribute(G, "x"))) {
n.vertices <- igraph::vcount(G)
lay <- matrix(ncol = 2, nrow = n.vertices, data = rnorm(n.vertices * 2, 10, 2))
colnames(lay) <- c("x", "y")
}

if(is.null(get.vertex.attribute(G, "x")))
lay <- cbind(x = rnorm(vcount(G)), y = rnorm(vcount(G)))
else
lay <- cbind(x = V(G)$x, y = V(G)$y)


if(is.null(fixed))
fixed <- rep(FALSE, igraph::vcount(G))

#This is only used with prevent.overlap
if(is.null(igraph::get.vertex.attribute(G, "size")))
V(G)$size <- rep(10, igraph::vcount(G))
mass <- 1 + igraph::degree(G)
F_att <- (E(G)$weight ^ ew_influence)
F_att <- (E(G)$weight ^ ew.influence)
edge_list <- igraph::get.edgelist(G, names = F) - 1 #This is gonna be used in the C code where the indexing is 0-based

avg_displ <- numeric(iter)
max_displ <- numeric(iter)

if(barnes_hut)
if(barnes.hut)
message("Using Barnes-Hut approximation\n")

message(sprintf("Stopping tolerance: %f\n", stopping_tolerance))
message(sprintf("Stopping tolerance: %f\n", stopping.tolerance))
flush.console()

layout_forceatlas2Cpp(lay, F_att, mass, V(G)$size, edge_list, avg_displ,
kgrav, iter, prevent.overlap, fixed, max_displ, stopping_tolerance, barnes_hut)
kgrav, iter, prevent.overlap, fixed, max_displ, stopping.tolerance, barnes.hut)

return(list(lay = lay, avg_displ = avg_displ, max_displ = max_displ))
}
Expand All @@ -71,20 +113,22 @@ layout_forceatlas2 <- function(G, ew_influence = 1, kgrav = 1, iter = 1000, prev
#' This function performs a complete (i.e. possibly including overlap resolution) cycle of the ForceAtlas2 force-directed layout algorithm
#'
#' @param G The input graph
#' @param first.iter The number of iterations in the first cycle, which is performed without overlap resolution
#' @param overlap_method If this is \code{NULL} overlap resolution is not performed. Otherwise this should be a string specifying the
#' @param first.iter The maximum number of iterations in the first step, which is performed without overlap resolution
#' @param overlap.method If this is \code{NULL} overlap resolution is not performed. Otherwise this should be a string specifying the
#' overlap resolution method. Two options are possible
#' \itemize{
#' \item{\code{"repel"}}: This is the method used in the original ForceAtlas2 implementation. Using this method, a repulsive force
#' is applied to nodes that overlap each other. This method can cause problem in cases where the layout is extremely crowded,
#' as this repulsive force becomes the major determinant of the layout, and the nodes end up being arranged essentially in a grid
#' \item{\code{"expand"}}: In this method, the graph is linearly expanded, until no two nodes overlap anymore
#' }
#' @param overlap.iter The maximum number of iterations for the overlap resolution step. This is only used if \code{overlap.method} is not
#' \code{NULL}
#' @return Returns an \code{igraph} object with two additional vertex attributes \code{x} and \code{y}, containing the x and y coordinates
#' of the vertices in the final layout
#'
#'
complete_forceatlas2 <- function(G, first.iter = 1000, overlap.iter, overlap_method = NULL, ...) {
#' @export
complete_forceatlas2 <- function(G, first.iter = 1000, overlap.method = NULL, overlap.iter = NULL, ...) {

message("First iteration")
flush.console()
Expand Down
22 changes: 11 additions & 11 deletions R/unsupervised.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@




#' Calculate cosine similarity between a vector and the rows of a matrix
#'
#' @param x A numeric vector of length \code{P}
#' @param m An \code{NxP} matrix
#'
#' @return Returns a vector of length \code{N} containing the cosine similarity between
#' the vector \code{x} and all the rows of \code{m}
#'
cosine_similarity_from_matrix <- function(x, m) {
x <- x / sqrt(crossprod(x))
return(as.vector((m %*% x) / sqrt(rowSums(m^2))))
Expand All @@ -15,7 +19,7 @@ cosine_similarity_from_matrix <- function(x, m) {
#'
#' @param m An \code{N x P} matrix
#'
#' @return Returns a \code{N x N} matrix with the cosine similarity between the corresponding rows in \code{m}
#' @return Returns an \code{N x N} matrix with the cosine similarity between the corresponding rows in \code{m}
#'
cosine_similarity_matrix <- function(m){
ret <- m %*% t(m) / (sqrt(rowSums(m^2) %*% t(rowSums(m^2))))
Expand Down Expand Up @@ -49,11 +53,7 @@ build_graph <- function(tab, col.names, filtering_T = 0.8) {
filter_matrix(dd, filtering_T)

G <- igraph::graph.adjacency(dd, mode = "undirected", weighted = T)
n.vertices <- igraph::vcount(G)
lay <- igraph::layout.kamada.kawai(G)
colnames(lay) <- c("x", "y")
G <- igraph::set.vertex.attribute(G, name = "x", value = lay[, "x"])
G <- igraph::set.vertex.attribute(G, name = "y", value = lay[, "y"])

for(i in names(tab))
G <- igraph::set.vertex.attribute(G, name = i, value = tab[, i])

Expand Down Expand Up @@ -87,7 +87,7 @@ get_unsupervised_graph <- function(tab, col.names, filtering.threshold, output.n

message("Running ForceAtlas2...")
flush.console()
G <- complete_forceatlas2(G, first.iter = 50000, overlap.iter = 1, overlap_method = NULL, ew_influence = 5)
G <- complete_forceatlas2(G, first.iter = 50000, overlap.method = NULL, ew.influence = 5)
message("ForceAtlas2 done")
flush.console()

Expand Down
11 changes: 7 additions & 4 deletions man/complete_forceatlas2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/cosine_similarity_from_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/cosine_similarity_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 0 additions & 18 deletions man/get_common_columns.Rd

This file was deleted.

58 changes: 58 additions & 0 deletions man/layout_forceatlas2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8515702

Please sign in to comment.