Skip to content

Commit

Permalink
Updated documentation and minor refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
pfgherardini committed Apr 5, 2018
1 parent 8515702 commit 5495cfa
Show file tree
Hide file tree
Showing 12 changed files with 311 additions and 55 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(build_graph)
export(complete_forceatlas2)
export(filter_matrix_by_rank)
export(get_unsupervised_graph)
export(get_unsupervised_graph_from_files)
export(layout_forceatlas2)
Expand Down
13 changes: 13 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,19 @@ filter_matrix <- function(m, threshold) {
invisible(.Call('_scgraphs_filter_matrix', PACKAGE = 'scgraphs', m, threshold))
}

#' Filter a matrix based on the rank of the values in each row
#'
#' This function filters an input matrix, by ranking the data in each row from largest to smallest,
#' and setting any element whose rank is greater than the input threshold to 0. In other words,
#' if the treshold is X, only the X greatest values in each row will be kept, and the rest will
#' be set to 0. WARNING: the input matrix is modified in-place.
#'
#' @param m The input matrix
#' @param threshold The threshold rank. Values with rank greater than the threhsold will be set to
#' 0. Note that the rank is 1-based (i.e. the largest observation has rank 1)
#' @return This function does not return any value. The input matrix is modified in-place.
#' @export
#'
filter_matrix_by_rank <- function(m, threshold) {
invisible(.Call('_scgraphs_filter_matrix_by_rank', PACKAGE = 'scgraphs', m, threshold))
}
Expand Down
134 changes: 92 additions & 42 deletions R/scaffold.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
downsample_by <- function(tab, col.name, size)
{
#' Downsample an input data.frame
#'
#' This function downsamples an input \code{data.frame}, so that for each value of \code{col.name}, there are
#' at most \code{size} rows.
#'
#' @param tab Input \code{data.frame}
#' @param col.name The name of a column in tab (typically the data in the column should either be characters or factors).
#' For each value of \code{tab$col.name}, there will be at most \code{size} row in the output table
#' @param size The number of rows to select for each value of \code{tab$col.name}. If size is larger than the total number of rows
#' available for a particular value, all rows for that value will be returned
#' @return Returns a \code{data.frame}
#'
downsample_by <- function(tab, col.name, size) {
print(sprintf("Downsampling to %d events", size))
return(ddply(tab, col.name, function(x, size)
{
#s <- min(c(size, nrow(x)))
return(ddply(tab, col.name, function(x, size) {
if(nrow(x) <= size)
return(x)
else
Expand All @@ -14,7 +23,24 @@ downsample_by <- function(tab, col.name, size)



#... additional named arguments for read.FCS
#' Load landmark populations from a list of FCS files
#'
#' @param files.list The list of files to load. The population names will be derived by splitting the file name
#' using \code{"_"} as separator, and taking the last field
#' @param asinh.cofactor The cofactor to use for \code{asinh} transformation, only used if \code{transform.data} is \code{TRUE}
#' @param transform.data Logical, whether \code{asinh} transformation should be applied to the data
#' @param ... Additional arguments passed to \code{flowCore::read.FCS}
#' @return Returns a list with the following elements:
#' \itemize{
#' \item{\code{downsampled.data}}: a \code{data.frame} containing the data, downsampled to a fixed number of events
#' \item{\code{tab.attractors}}: a \code{data.frame} with the data for the landmark nodes. Each row is a separate landmark
#' population, and the columns correspond to the median expression values of each marker
#' \item{\code{celltype_key}}: a \code{data.frame} containing the key for the names of the landmark populations. The
#' \code{cellType} column is a numeric id corresponding to the column of the same name in \code{tab.attractors}
#' }
#'
#'
#'
load_attractors <- function(files.list, asinh.cofactor, transform.data = T, ...) {
res <- NULL
for(f in files.list) {
Expand All @@ -39,7 +65,13 @@ load_attractors <- function(files.list, asinh.cofactor, transform.data = T, ...)
}


load_attrators_from_dir <- function(dir, ...) {
#' Load landmark populations from a directory
#'
#' @param dir A directory name. All the FCS files in the directory will be loaded
#' @inheritDotParams load_attractors -files.list
#' @inherit load_attractors return
#'
load_attractors_from_dir <- function(dir, ...) {
files.list <- list.files(dir, full.names = T)
load_attractors(files.list, ...)
}
Expand All @@ -60,18 +92,50 @@ mark_nearest_neighbour <- function(G) {
return(G)
}


#' Remove connections to landmark nodes based on an expression threshold
#'
#' This function removes connections to a landmark node, in cases where the expression of all markers
#' for that landmark is less than a threshold. This is useful when mapping data that contains markers which
#' are all expected to be absent in certain landmarks
#'
#' @param dd A matrix of cosine similarity values between cluster nodes (rows) and landmark nodes (columns)
#' @param tab A \code{data.frame} containing the data (median expression values) for the landmark nodes
#' @param col.names A vector of colum names to be used for the computation
#' @param thresh The threshold to use. Connections to landmarks for which the expression of all the markers
#' is \code{< thresh}, will be removed
#' @return Returns the same dd matrix, but with the values corresponding to the connections that have been
#' removed set to 0
#'
distance_from_attractor_hard_filter <- function(dd, tab, col.names, thresh = 0.5) {
tab <- tab[, col.names]
w <- apply(tab[,col.names], 1, function(x, thresh) {all(x < thresh)}, thresh = thresh)
if(any(w))
print("Hard removing some connections to unstained landmarks")
dd[, w] <- 0
if(any(w)) {
message(sprintf("Hard removing some connections to landmarks using threshold: %f", thresh))
flush.console()
dd[, w] <- 0
}
return(dd)
}


get_distances_from_attractors <- function(m, tab, col.names, dist.thresh) {
#' Calculate (and filter) disatances between cluster and landmark ondes
#'
#' This function calculates the cosine similarity between the cluster and landmark nodes, and then filters
#' out the resulting matrix to include only the top-scoring edges. The result of this function can be used
#' as an adjacency matrix for graph construction
#'
#' @param m A \code{data.frame} containing expression values for the cluster nodes
#' @param tab A \code{data.frame} containing expression values for the landmark nodes
#' @param col.names A vector of column names to be used for the computation
#' @param q.thresh The quantile probability to be used for filtering edges. The algorithm
#' will calculate a weight threshold based on this quantile of the weight distribution
#' @param min.thresh The minimum value to be used for thresholding edges. Irrespective of the results of the
#' quantile computation, the actual threshold used will never be less than this value
#' @return Returns a matrix with each row corresponding to a cluster node, and each column
#' corresponding to the similarity between the cluster node and the respective landmark node. This matrix
#' can be directly used as an adjacency matrix for graph construction
#'
#'
get_distances_from_attractors <- function(m, tab, col.names, q.thresh, min.thresh) {
att <- as.matrix(tab[, col.names])
row.names(att) <- as.character(1:nrow(tab))
m <- as.matrix(m[, col.names])
Expand Down Expand Up @@ -135,16 +199,6 @@ add_vertices_to_attractors_graph <- function(G, tab.clustered, tab.median, col.n
}
}

maxx <- maxy <- rep(Inf, vcount(G))
minx <- miny <- rep(-Inf, vcount(G))

maxx[1:num.vertices] <- minx[1:num.vertices] <- V(G)$x[1:num.vertices]
maxy[1:num.vertices] <- miny[1:num.vertices] <- V(G)$y[1:num.vertices]
lay <- igraph::layout.kamada.kawai(G, minx = minx, maxx = maxx, miny = miny, maxy = maxy)
colnames(lay) <- c("x", "y")
G <- igraph::set.vertex.attribute(G, name = "x", value = lay[, "x"])
G <- igraph::set.vertex.attribute(G, name = "y", value = lay[, "y"])

V(G)[1:num.vertices]$type <- 1 #attractor
V(G)[(num.vertices + 1):vcount(G)]$type <- 2 #cell

Expand All @@ -156,24 +210,20 @@ add_vertices_to_attractors_graph <- function(G, tab.clustered, tab.median, col.n
}


get_vertex_table <- function(G) {
att <- list.vertex.attributes(G)
ret <- NULL

for(a in att) {
d <- data.frame(get.vertex.attribute(G, a), stringsAsFactors = FALSE)
if(is.null(ret))
ret <- d
else
ret <- cbind(ret, d, stringsAsFactors = FALSE)
}
names(ret) <- att
return(ret)
}


#' Add connections between cluster nodes
#'
#' This function takes an existing Scaffold map and adds connections between the cluster nodes
#'
#' @param G An \code{igraph} object, representing a Scaffold map, as returned by \code{add_vertices_to_attractors_graph}
#' @param col.names A vector of column names to be used for the comptation of similarities
#' @param weight.factor Weight factor. The edge weights for the inter-cluster connections will be multiplied
#' by this weight factor. This is useful in case one wants to weight the connections between the clusters differently
#' from the connections between the clusters and the landmarks
#' @return Returns an \code{igraph} object, representing a Scaffold map with inter-cluster connections
#'
#'
add_inter_clusters_connections <- function(G, col.names, weight.factor) {
tab <- get_vertex_table(G)
tab <- igraph::as_data_frame(G, what = "vertices")
tab <- tab[tab$type == 2,]
m <- as.matrix(tab[, col.names])
row.names(m) <- tab$name
Expand Down Expand Up @@ -246,15 +296,15 @@ process_data <- function(tab.clustered, G.attractors = NULL, tab.attractors = NU
fixed[1:vcount(G.attractors)] <- TRUE

G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
overlap_method = "repel", ew_influence = ew_influence, fixed = fixed)
overlap_method = "repel", ew.influence = ew_influence, fixed = fixed)
if(inter.cluster.connections)
{
print("Adding inter-cluster connections with markers:")
print(col.names.inter_cluster)
print(sprintf("Weight factor:%f", inter_cluster.weight_factor))
G.complete <- add_inter_clusters_connections(G.complete, col.names.inter_cluster, weight.factor = inter_cluster.weight_factor)
G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
overlap_method = overlap_method, ew_influence = ew_influence, fixed = fixed)
overlap_method = overlap_method, ew.influence = ew_influence, fixed = fixed)
}

}
Expand Down
23 changes: 23 additions & 0 deletions man/add_inter_clusters_connections.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/distance_from_attractor_hard_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/downsample_by.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/filter_matrix_by_rank.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/get_distances_from_attractors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 0 additions & 12 deletions man/hello.Rd

This file was deleted.

Loading

0 comments on commit 5495cfa

Please sign in to comment.