From 8515702a1135e88f0da483726c7d0d0910e88b3d Mon Sep 17 00:00:00 2001 From: pfgherardini Date: Wed, 4 Apr 2018 13:44:16 -0700 Subject: [PATCH] documentation and refactoring --- NAMESPACE | 2 + R/forceatlas2.R | 76 ++++++++++++++++++++++------ R/unsupervised.R | 22 ++++---- man/complete_forceatlas2.Rd | 11 ++-- man/cosine_similarity_from_matrix.Rd | 20 ++++++++ man/cosine_similarity_matrix.Rd | 2 +- man/get_common_columns.Rd | 18 ------- man/layout_forceatlas2.Rd | 58 +++++++++++++++++++++ 8 files changed, 159 insertions(+), 50 deletions(-) create mode 100644 man/cosine_similarity_from_matrix.Rd delete mode 100644 man/get_common_columns.Rd create mode 100644 man/layout_forceatlas2.Rd diff --git a/NAMESPACE b/NAMESPACE index 0d29d50..0710d5d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,10 @@ # Generated by roxygen2: do not edit by hand export(build_graph) +export(complete_forceatlas2) export(get_unsupervised_graph) export(get_unsupervised_graph_from_files) +export(layout_forceatlas2) import(Rcpp) importFrom(igraph,E) importFrom(igraph,V) diff --git a/R/forceatlas2.R b/R/forceatlas2.R index 64da96f..e71f23c 100644 --- a/R/forceatlas2.R +++ b/R/forceatlas2.R @@ -26,41 +26,83 @@ adaptive_expand <- function(G, max.iter) { } - -layout_forceatlas2 <- function(G, ew_influence = 1, kgrav = 1, iter = 1000, prevent.overlap = FALSE, fixed = rep(FALSE, vcount(G)), stopping_tolerance = 0.001, barnes_hut = FALSE) { +#' ForceAtlas2 force-directed layout +#' +#' @param G The input \code{igraph} object. The graph must have an edge attribute named \code{weight}, representing edge weights +#' @param ew.influence Edge weight influence. The edge weights are set to \code{edge.weight ^ ew.influence} before the +#' calculation (see original ForceAtlas2 publication) +#' @param kgrav The gravity constant. Higher values will result in more compact graphs (see original ForceAtlas2 publication) +#' @param iter Maximum number of iterations. The algorithm will stop after this +#' many iterations, or when the average displacement of the nodes between two +#' iterations is less than the \code{stopping.tolerance} threshold (see below) +#' @param prevent.overlap Set this option to \code{TRUE} to prevent the nodes +#' from overlapping (see ForceAtlas2 description) +#' @param fixed A boolean vector of length equal to the number of nodes in the +#' graph which specifies which nodes, need to be held in a fixed +#' position. If this is \code{NULL} (default), no nodes are held fixed +#' @param stopping.tolerance The algorithm will stop after either \code{iter} +#' number of iterations, or when the average displacement of the nodes between +#' two iterations is less than this threshold +#' @param barnes.hut Whether to use the Barnes-Hut approximation for speeding up +#' the calculations when dealing with large graphs. This option is +#' automatically set to true when the graph has more than 2000 nodes +#' @return this function returns a list with three elements +#' \itemize{ +#' \item{\code{lay}}: a numeric matrix with two columns containing the x and y coordinates of each node in the final +#' layout +#' \item{\code{avg.displ}}: a numeric vector containing the average displacement of the vertices at each iteration +#' \item{\code{max.displ}}: a numeric vector containing the maximum displacement between all the vertices after +#' each iteration +#' } +#' @references \url{http://gephi.github.io} +#' @references Jacomy M1, Venturini T, Heymann S, Bastian M. ForceAtlas2, a +#' continuous graph layout algorithm for handy network visualization designed +#' for the Gephi software. PLoS One. 2014 Jun 10;9(6):e98679. doi: +#' 10.1371/journal.pone.0098679 +#' @export +#' +layout_forceatlas2 <- function(G, ew.influence = 1, kgrav = 1, iter = 1000, prevent.overlap = FALSE, fixed = NULL, stopping.tolerance = 0.001, barnes.hut = FALSE) { if(vcount(G) >= 2000) - barnes_hut <- TRUE + barnes.hut <- TRUE if(vcount(G) > 2000) - stopping_tolerance <- 0.01 + stopping.tolerance <- 0.01 else if(vcount(G) > 800) - stopping_tolerance <- 0.005 + stopping.tolerance <- 0.005 else - stopping_tolerance <- 0.001 + stopping.tolerance <- 0.001 + + lay <- NULL + if(is.null(get.vertex.attribute(G, "x"))) { + n.vertices <- igraph::vcount(G) + lay <- matrix(ncol = 2, nrow = n.vertices, data = rnorm(n.vertices * 2, 10, 2)) + colnames(lay) <- c("x", "y") + } - if(is.null(get.vertex.attribute(G, "x"))) - lay <- cbind(x = rnorm(vcount(G)), y = rnorm(vcount(G))) else lay <- cbind(x = V(G)$x, y = V(G)$y) + if(is.null(fixed)) + fixed <- rep(FALSE, igraph::vcount(G)) + #This is only used with prevent.overlap if(is.null(igraph::get.vertex.attribute(G, "size"))) V(G)$size <- rep(10, igraph::vcount(G)) mass <- 1 + igraph::degree(G) - F_att <- (E(G)$weight ^ ew_influence) + F_att <- (E(G)$weight ^ ew.influence) edge_list <- igraph::get.edgelist(G, names = F) - 1 #This is gonna be used in the C code where the indexing is 0-based avg_displ <- numeric(iter) max_displ <- numeric(iter) - if(barnes_hut) + if(barnes.hut) message("Using Barnes-Hut approximation\n") - message(sprintf("Stopping tolerance: %f\n", stopping_tolerance)) + message(sprintf("Stopping tolerance: %f\n", stopping.tolerance)) flush.console() layout_forceatlas2Cpp(lay, F_att, mass, V(G)$size, edge_list, avg_displ, - kgrav, iter, prevent.overlap, fixed, max_displ, stopping_tolerance, barnes_hut) + kgrav, iter, prevent.overlap, fixed, max_displ, stopping.tolerance, barnes.hut) return(list(lay = lay, avg_displ = avg_displ, max_displ = max_displ)) } @@ -71,8 +113,8 @@ layout_forceatlas2 <- function(G, ew_influence = 1, kgrav = 1, iter = 1000, prev #' This function performs a complete (i.e. possibly including overlap resolution) cycle of the ForceAtlas2 force-directed layout algorithm #' #' @param G The input graph -#' @param first.iter The number of iterations in the first cycle, which is performed without overlap resolution -#' @param overlap_method If this is \code{NULL} overlap resolution is not performed. Otherwise this should be a string specifying the +#' @param first.iter The maximum number of iterations in the first step, which is performed without overlap resolution +#' @param overlap.method If this is \code{NULL} overlap resolution is not performed. Otherwise this should be a string specifying the #' overlap resolution method. Two options are possible #' \itemize{ #' \item{\code{"repel"}}: This is the method used in the original ForceAtlas2 implementation. Using this method, a repulsive force @@ -80,11 +122,13 @@ layout_forceatlas2 <- function(G, ew_influence = 1, kgrav = 1, iter = 1000, prev #' as this repulsive force becomes the major determinant of the layout, and the nodes end up being arranged essentially in a grid #' \item{\code{"expand"}}: In this method, the graph is linearly expanded, until no two nodes overlap anymore #' } +#' @param overlap.iter The maximum number of iterations for the overlap resolution step. This is only used if \code{overlap.method} is not +#' \code{NULL} #' @return Returns an \code{igraph} object with two additional vertex attributes \code{x} and \code{y}, containing the x and y coordinates #' of the vertices in the final layout #' -#' -complete_forceatlas2 <- function(G, first.iter = 1000, overlap.iter, overlap_method = NULL, ...) { +#' @export +complete_forceatlas2 <- function(G, first.iter = 1000, overlap.method = NULL, overlap.iter = NULL, ...) { message("First iteration") flush.console() diff --git a/R/unsupervised.R b/R/unsupervised.R index 7a24602..12392a1 100644 --- a/R/unsupervised.R +++ b/R/unsupervised.R @@ -1,7 +1,11 @@ - - - - +#' Calculate cosine similarity between a vector and the rows of a matrix +#' +#' @param x A numeric vector of length \code{P} +#' @param m An \code{NxP} matrix +#' +#' @return Returns a vector of length \code{N} containing the cosine similarity between +#' the vector \code{x} and all the rows of \code{m} +#' cosine_similarity_from_matrix <- function(x, m) { x <- x / sqrt(crossprod(x)) return(as.vector((m %*% x) / sqrt(rowSums(m^2)))) @@ -15,7 +19,7 @@ cosine_similarity_from_matrix <- function(x, m) { #' #' @param m An \code{N x P} matrix #' -#' @return Returns a \code{N x N} matrix with the cosine similarity between the corresponding rows in \code{m} +#' @return Returns an \code{N x N} matrix with the cosine similarity between the corresponding rows in \code{m} #' cosine_similarity_matrix <- function(m){ ret <- m %*% t(m) / (sqrt(rowSums(m^2) %*% t(rowSums(m^2)))) @@ -49,11 +53,7 @@ build_graph <- function(tab, col.names, filtering_T = 0.8) { filter_matrix(dd, filtering_T) G <- igraph::graph.adjacency(dd, mode = "undirected", weighted = T) - n.vertices <- igraph::vcount(G) - lay <- igraph::layout.kamada.kawai(G) - colnames(lay) <- c("x", "y") - G <- igraph::set.vertex.attribute(G, name = "x", value = lay[, "x"]) - G <- igraph::set.vertex.attribute(G, name = "y", value = lay[, "y"]) + for(i in names(tab)) G <- igraph::set.vertex.attribute(G, name = i, value = tab[, i]) @@ -87,7 +87,7 @@ get_unsupervised_graph <- function(tab, col.names, filtering.threshold, output.n message("Running ForceAtlas2...") flush.console() - G <- complete_forceatlas2(G, first.iter = 50000, overlap.iter = 1, overlap_method = NULL, ew_influence = 5) + G <- complete_forceatlas2(G, first.iter = 50000, overlap.method = NULL, ew.influence = 5) message("ForceAtlas2 done") flush.console() diff --git a/man/complete_forceatlas2.Rd b/man/complete_forceatlas2.Rd index 81f136c..809f855 100644 --- a/man/complete_forceatlas2.Rd +++ b/man/complete_forceatlas2.Rd @@ -4,15 +4,15 @@ \alias{complete_forceatlas2} \title{Performs a Complete cycle of ForceAtlas2} \usage{ -complete_forceatlas2(G, first.iter = 1000, overlap.iter, - overlap_method = NULL, ...) +complete_forceatlas2(G, first.iter = 1000, overlap.method = NULL, + overlap.iter = NULL, ...) } \arguments{ \item{G}{The input graph} -\item{first.iter}{The number of iterations in the first cycle, which is performed without overlap resolution} +\item{first.iter}{The maximum number of iterations in the first step, which is performed without overlap resolution} -\item{overlap_method}{If this is \code{NULL} overlap resolution is not performed. Otherwise this should be a string specifying the +\item{overlap.method}{If this is \code{NULL} overlap resolution is not performed. Otherwise this should be a string specifying the overlap resolution method. Two options are possible \itemize{ \item{\code{"repel"}}: This is the method used in the original ForceAtlas2 implementation. Using this method, a repulsive force @@ -20,6 +20,9 @@ overlap resolution method. Two options are possible as this repulsive force becomes the major determinant of the layout, and the nodes end up being arranged essentially in a grid \item{\code{"expand"}}: In this method, the graph is linearly expanded, until no two nodes overlap anymore }} + +\item{overlap.iter}{The maximum number of iterations for the overlap resolution step. This is only used if \code{overlap.method} is not +\code{NULL}} } \value{ Returns an \code{igraph} object with two additional vertex attributes \code{x} and \code{y}, containing the x and y coordinates diff --git a/man/cosine_similarity_from_matrix.Rd b/man/cosine_similarity_from_matrix.Rd new file mode 100644 index 0000000..9598e36 --- /dev/null +++ b/man/cosine_similarity_from_matrix.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/unsupervised.R +\name{cosine_similarity_from_matrix} +\alias{cosine_similarity_from_matrix} +\title{Calculate cosine similarity between a vector and the rows of a matrix} +\usage{ +cosine_similarity_from_matrix(x, m) +} +\arguments{ +\item{x}{A numeric vector of length \code{P}} + +\item{m}{An \code{NxP} matrix} +} +\value{ +Returns a vector of length \code{N} containing the cosine similarity between + the vector \code{x} and all the rows of \code{m} +} +\description{ +Calculate cosine similarity between a vector and the rows of a matrix +} diff --git a/man/cosine_similarity_matrix.Rd b/man/cosine_similarity_matrix.Rd index 53cd08a..6765067 100644 --- a/man/cosine_similarity_matrix.Rd +++ b/man/cosine_similarity_matrix.Rd @@ -10,7 +10,7 @@ cosine_similarity_matrix(m) \item{m}{An \code{N x P} matrix} } \value{ -Returns a \code{N x N} matrix with the cosine similarity between the corresponding rows in \code{m} +Returns an \code{N x N} matrix with the cosine similarity between the corresponding rows in \code{m} } \description{ This function calculates the cosine similarity between the rows of an input matrix, according to the values diff --git a/man/get_common_columns.Rd b/man/get_common_columns.Rd deleted file mode 100644 index 4233b9c..0000000 --- a/man/get_common_columns.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{get_common_columns} -\alias{get_common_columns} -\title{Get the columns that are common to a set of input tabular files} -\usage{ -get_common_columns(files.list) -} -\arguments{ -\item{files.list}{A vector of input file names. Each file should be a tab-separated table, with the first row -representing column headers} -} -\value{ -Returns a vector of column names that are present in all the files in \code{files.list} -} -\description{ -Get the columns that are common to a set of input tabular files -} diff --git a/man/layout_forceatlas2.Rd b/man/layout_forceatlas2.Rd new file mode 100644 index 0000000..ac4a402 --- /dev/null +++ b/man/layout_forceatlas2.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/forceatlas2.R +\name{layout_forceatlas2} +\alias{layout_forceatlas2} +\title{ForceAtlas2 force-directed layout} +\usage{ +layout_forceatlas2(G, ew.influence = 1, kgrav = 1, iter = 1000, + prevent.overlap = FALSE, fixed = NULL, stopping.tolerance = 0.001, + barnes.hut = FALSE) +} +\arguments{ +\item{G}{The input \code{igraph} object. The graph must have an edge attribute named \code{weight}, representing edge weights} + +\item{ew.influence}{Edge weight influence. The edge weights are set to \code{edge.weight ^ ew.influence} before the +calculation (see original ForceAtlas2 publication)} + +\item{kgrav}{The gravity constant. Higher values will result in more compact graphs (see original ForceAtlas2 publication)} + +\item{iter}{Maximum number of iterations. The algorithm will stop after this +many iterations, or when the average displacement of the nodes between two +iterations is less than the \code{stopping.tolerance} threshold (see below)} + +\item{prevent.overlap}{Set this option to \code{TRUE} to prevent the nodes +from overlapping (see ForceAtlas2 description)} + +\item{fixed}{A boolean vector of length equal to the number of nodes in the +graph which specifies which nodes, need to be held in a fixed +position. If this is \code{NULL} (default), no nodes are held fixed} + +\item{stopping.tolerance}{The algorithm will stop after either \code{iter} +number of iterations, or when the average displacement of the nodes between +two iterations is less than this threshold} + +\item{barnes.hut}{Whether to use the Barnes-Hut approximation for speeding up +the calculations when dealing with large graphs. This option is +automatically set to true when the graph has more than 2000 nodes} +} +\value{ +this function returns a list with three elements + \itemize{ + \item{\code{lay}}: a numeric matrix with two columns containing the x and y coordinates of each node in the final + layout + \item{\code{avg.displ}}: a numeric vector containing the average displacement of the vertices at each iteration + \item{\code{max.displ}}: a numeric vector containing the maximum displacement between all the vertices after + each iteration + } +} +\description{ +ForceAtlas2 force-directed layout +} +\references{ +\url{http://gephi.github.io} + +Jacomy M1, Venturini T, Heymann S, Bastian M. ForceAtlas2, a + continuous graph layout algorithm for handy network visualization designed + for the Gephi software. PLoS One. 2014 Jun 10;9(6):e98679. doi: + 10.1371/journal.pone.0098679 +}