From 64872ac1e1736882182846c4afed8a0674418057 Mon Sep 17 00:00:00 2001 From: Lingxue Zhu Date: Mon, 19 Mar 2018 00:16:50 -0400 Subject: [PATCH] clean up documentation --- R/SOUP.R | 16 +++++++-------- R/SOUPtime.R | 4 ++-- R/cvSOUP.R | 35 ++++++++++++++++++--------------- R/geneSelect.R | 6 +++--- R/utils_plot.R | 6 +++--- man/DESCENDselect.Rd | 4 ++-- man/SOUP.Rd | 8 +------- man/cv.error.SOUP.Rd | 16 +++++++-------- man/cvSOUP.Rd | 20 ++++++++++--------- man/findPure.Rd | 2 +- man/getClusterOrder.Rd | 2 +- man/getMajorMatrix.Rd | 4 ++-- man/getTimeline.Rd | 2 +- man/heatmapKseq.Rd | 3 ++- man/plotGeneTimeline.Rd | 6 +++--- man/plotMultipleGeneTimeline.Rd | 2 +- man/predictTheta.Rd | 8 ++++++++ man/projMembership.Rd | 17 ++++++++++++++++ man/selectGenes.Rd | 2 +- 19 files changed, 93 insertions(+), 70 deletions(-) create mode 100644 man/projMembership.Rd diff --git a/R/SOUP.R b/R/SOUP.R index 771fb79..9b94ed0 100644 --- a/R/SOUP.R +++ b/R/SOUP.R @@ -6,7 +6,7 @@ #' @param expr a cell-by-gene expression matrix, either the raw counts or log-transformed expressions. #' @param Ks number of clusters, can be a single integer or a list of integers. #' @param type "log" if \code{expr} has been normalized and log-transformed (default), -#' or "count" (default) \code{expr} contains the raw counts. +#' or "count" if \code{expr} contains the raw counts. #' It is recommended to use the log scale, which usually gives better results in practice. #' @param i.pure (optional) the indices of the pure cells. By default is \code{NULL}, and SOUP will infer the pure list. #' If the list is already known (for example, from previous runs), then providing it will reduce the computation time. @@ -24,13 +24,7 @@ #' \item{i.pure}{the indices of pure cells with the highest purity scores.} #' } #' -#' @examples -#' select.genes = zeisel$select.genes -#' counts = zeisel$counts[, colnames(counts) %in% select.genes] -#' soup.out = SOUP(counts, Ks=7, type="count") -#' #' @export - SOUP <- function(expr, Ks=3, type="log", i.pure=NULL, ext.prop=NULL, pure.prop=0.5, @@ -108,7 +102,7 @@ SOUP <- function(expr, Ks=3, #' #' @param expr a cell-by-gene expression matrix, either the raw counts or log-transformed expressions. #' @param type "log" if \code{expr} has been normalized and log-transformed (default), -#' or "count" (default) \code{expr} contains the raw counts. +#' or "count" if \code{expr} contains the raw counts. #' @param ext.prop (optional) the proportion of extreme neighbors for each cell, such that \code{ext.prop*n.cells} is roughly the number of pure cells \emph{per cluster}. #' By default, \code{ext.prop=0.1} for less than 1,000 cells, and \code{ext.prop=0.05} for larger datasets. #' @param pure.prop (optional) the proportion of pure cells in the data. By default \code{pure.prop=0.5}. @@ -257,7 +251,11 @@ getTheta <- function(expr, i.pure, pure.cluster, G) { } - +#' Clean up membership matrix +#' +#' @param theta The estimated raw theta +#' +#' @return The cleaned-up membership matrix. projMembership <- function(theta) { membership = theta membership[membership < 0] = 0 diff --git a/R/SOUPtime.R b/R/SOUPtime.R index 7be58ad..0a2a20a 100644 --- a/R/SOUPtime.R +++ b/R/SOUPtime.R @@ -1,5 +1,5 @@ -#' Estimate Developmental Timeline +#' Estimate developmental trajectory #' #' Estimate the developmental timeline for cells from SOUP soft membership. #' @@ -37,7 +37,7 @@ getTimeline <- function(membership, centers, return(timeline) } -#' Order Clusters +#' Order clusters #' #' Re-order cluster labels, starting from \code{k.start}, #' and consecutively append the next one with the highest correlation in cluster centers. diff --git a/R/cvSOUP.R b/R/cvSOUP.R index 82413e4..cd4c407 100644 --- a/R/cvSOUP.R +++ b/R/cvSOUP.R @@ -1,20 +1,20 @@ -#' Cross Validation for SOUP +#' Cross validation for SOUP #' #' @param expr A cell-by-gene expression matrix, either the raw counts or log-transformed expressions. #' @param type "log" if \code{expr} has been normalized and log-transformed (default), #' or "count" (default) \code{expr} contains the raw counts. -#' @param nfold Number of folds -#' @param nCV Number of repetitions #' @param Ks A sequence of cluster numbers -#' @param mc.cores Number of cores for parallelization -#' @param verbose Whether to print progress +#' @param nfold (optional) Number of folds, default is 10 +#' @param nCV (optional) Number of repetitions, default is 10 +#' @param mc.cores (optional) Number of cores for parallelization, default is 1 without parallelization +#' @param seeds (optional) A list of seeds to be used, with length nCV, default is NULL +#' @param verbose (optional) Whether to print progress, default is TRUE #' #' @export #' -cvSOUP <- function(expr, type="log", - nfold=10, nCV=10, Ks=c(2:10), mc.cores=10, - seeds=NULL, - verbose=TRUE) { +cvSOUP <- function(expr, type="log", Ks=c(2:10), + nfold=10, nCV=10, mc.cores=1, + seeds=NULL, verbose=TRUE) { cv.errors = matrix(NA, nrow=nCV, ncol=length(Ks)) cv.sds = matrix(NA, nrow=nCV, ncol=length(Ks)) @@ -48,20 +48,19 @@ cvSOUP <- function(expr, type="log", K.cv = K.cv)) } -#' Compute Cross Validation Errors +#' Compute cross validation errors #' #' @param expr A cell-by-gene expression matrix, either the raw counts or log-transformed expressions. #' @param type "log" if \code{expr} has been normalized and log-transformed (default), #' or "count" (default) \code{expr} contains the raw counts. -#' @param nfold Number of folds #' @param Ks A sequence of cluster numbers -#' @param seed (optional) random seed -#' @param mc.cores Number of cores for parallelization +#' @param nfold (optional) Number of folds, default is 10 +#' @param seed (optional) random seed, default is NULL +#' @param mc.cores (optional) Number of cores for parallelization, default is 1 without parallelization #' #' @export -cv.error.SOUP <- function(expr, type="log", - nfold=10, Ks=c(2:10), seed=NULL, - mc.cores=2) { +cv.error.SOUP <- function(expr, type="log", Ks=c(2:10), + nfold=10, seed=NULL, mc.cores=1) { ## cross validation doCV <- function(fold, nfold, i.permute.ind, @@ -116,6 +115,10 @@ cv.error.SOUP <- function(expr, type="log", #' Predict the membership for new data points #' +#' @param new.expr cell-by-gene expression matrix +#' @param t.centers transposed center matrix, n.gene-by-K +#' +#' @return The predicted membership matrix. #' @export #' predictTheta <- function(new.expr, t.centers) { diff --git a/R/geneSelect.R b/R/geneSelect.R index 640c0d5..b94ff62 100644 --- a/R/geneSelect.R +++ b/R/geneSelect.R @@ -4,7 +4,7 @@ #' #' @param expr a cell-by-gene expression matrix, either the raw counts or log-transformed expressions. #' @param type "count" if \code{expr} contains the raw counts (default), -#' or "log" if \code{expr} has been normalized and log-transformed (default). +#' or "log" if \code{expr} has been normalized and log-transformed. #' @param SPCA boolean, whether to use SPCA or not. #' @param DESCEND boolean, whether to use DESCEND or not. #' @param n.cores the number of cores used for parallel computing of DESCEND. @@ -86,9 +86,9 @@ selectGenes <- function(expr, type="count", #' Select highly variable genes for clustering using DESCEND. #' #' @param counts the cell-by-gene expression counts. -#' Note that DESCEND uses a Poisson model, so the count data should be provided (without normalization or log transformation). +#' Note that DESCEND uses a Poisson model, so the count data should be provided. #' @param n.cores the number of cores used for parallel computing. DESCEND can be slow so parallelization is highly recommended. -#' @param threshold the threshold for Gini index. Higer threshold leads to fewer selected genes. +#' @param threshold (optional) the threshold for Gini index, default is 3. Higer threshold leads to fewer selected genes. #' #' @return A list containing \describe{ #' \item{select.genes}{the names of selected genes, ordered by decreasing scores.} diff --git a/R/utils_plot.R b/R/utils_plot.R index 9e02374..629b285 100644 --- a/R/utils_plot.R +++ b/R/utils_plot.R @@ -48,7 +48,7 @@ heatmapKseq <- function(memberships, Ks, cell.type, ref.lab="Reference", } -#' Get the SOUP majority matrix +#' Get the SOUP hard assignment matrix #' #' @export #' @@ -169,7 +169,7 @@ plotContTable <- function(est_label, true_label, short.names=NULL, xlab="Referen return(g) } -#' Plot Gene Expression along Trajectory +#' Plot gene expression along trajectory #' #' @param expr The cell-by-gene expression matrix to be visualized #' @param marker.gene The gene to be visualized; must be one of the columns of \code{expr} @@ -203,7 +203,7 @@ plotGeneTimeline <- function(expr, marker.gene, timeline, return(g) } -#' Plot Multiple Gene Expressions along Trajectory +#' Plot multiple gene expressions along trajectory #' #' Expression levels of multiple genes along trajectory, arranged on a grid of plots. #' diff --git a/man/DESCENDselect.Rd b/man/DESCENDselect.Rd index 1be6427..5b2f6d5 100644 --- a/man/DESCENDselect.Rd +++ b/man/DESCENDselect.Rd @@ -8,11 +8,11 @@ DESCENDselect(counts, n.cores = 1, threshold = 3) } \arguments{ \item{counts}{the cell-by-gene expression counts. -Note that DESCEND uses a Poisson model, so the count data should be provided (without normalization or log transformation).} +Note that DESCEND uses a Poisson model, so the count data should be provided.} \item{n.cores}{the number of cores used for parallel computing. DESCEND can be slow so parallelization is highly recommended.} -\item{threshold}{the threshold for Gini index. Higer threshold leads to fewer selected genes.} +\item{threshold}{(optional) the threshold for Gini index, default is 3. Higer threshold leads to fewer selected genes.} } \value{ A list containing \describe{ diff --git a/man/SOUP.Rd b/man/SOUP.Rd index def2528..3118ba8 100644 --- a/man/SOUP.Rd +++ b/man/SOUP.Rd @@ -13,7 +13,7 @@ SOUP(expr, Ks = 3, type = "log", i.pure = NULL, ext.prop = NULL, \item{Ks}{number of clusters, can be a single integer or a list of integers.} \item{type}{"log" if \code{expr} has been normalized and log-transformed (default), -or "count" (default) \code{expr} contains the raw counts. +or "count" if \code{expr} contains the raw counts. It is recommended to use the log scale, which usually gives better results in practice.} \item{i.pure}{(optional) the indices of the pure cells. By default is \code{NULL}, and SOUP will infer the pure list. @@ -39,9 +39,3 @@ A list containing \describe{ \description{ A semi-soft clustering algorithm for single cells. } -\examples{ -select.genes = zeisel$select.genes -counts = zeisel$counts[, colnames(counts) \%in\% select.genes] -soup.out = SOUP(counts, Ks=7, type="count") - -} diff --git a/man/cv.error.SOUP.Rd b/man/cv.error.SOUP.Rd index be76f14..7dac271 100644 --- a/man/cv.error.SOUP.Rd +++ b/man/cv.error.SOUP.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/cvSOUP.R \name{cv.error.SOUP} \alias{cv.error.SOUP} -\title{Compute Cross Validation Errors} +\title{Compute cross validation errors} \usage{ -cv.error.SOUP(expr, type = "log", nfold = 10, Ks = c(2:10), seed = NULL, - mc.cores = 2) +cv.error.SOUP(expr, type = "log", Ks = c(2:10), nfold = 10, seed = NULL, + mc.cores = 1) } \arguments{ \item{expr}{A cell-by-gene expression matrix, either the raw counts or log-transformed expressions.} @@ -13,14 +13,14 @@ cv.error.SOUP(expr, type = "log", nfold = 10, Ks = c(2:10), seed = NULL, \item{type}{"log" if \code{expr} has been normalized and log-transformed (default), or "count" (default) \code{expr} contains the raw counts.} -\item{nfold}{Number of folds} - \item{Ks}{A sequence of cluster numbers} -\item{seed}{(optional) random seed} +\item{nfold}{(optional) Number of folds, default is 10} + +\item{seed}{(optional) random seed, default is NULL} -\item{mc.cores}{Number of cores for parallelization} +\item{mc.cores}{(optional) Number of cores for parallelization, default is 1 without parallelization} } \description{ -Compute Cross Validation Errors +Compute cross validation errors } diff --git a/man/cvSOUP.Rd b/man/cvSOUP.Rd index 2507b81..87d5ae8 100644 --- a/man/cvSOUP.Rd +++ b/man/cvSOUP.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/cvSOUP.R \name{cvSOUP} \alias{cvSOUP} -\title{Cross Validation for SOUP} +\title{Cross validation for SOUP} \usage{ -cvSOUP(expr, type = "log", nfold = 10, nCV = 10, Ks = c(2:10), - mc.cores = 10, verbose = TRUE) +cvSOUP(expr, type = "log", Ks = c(2:10), nfold = 10, nCV = 10, + mc.cores = 1, seeds = NULL, verbose = TRUE) } \arguments{ \item{expr}{A cell-by-gene expression matrix, either the raw counts or log-transformed expressions.} @@ -13,16 +13,18 @@ cvSOUP(expr, type = "log", nfold = 10, nCV = 10, Ks = c(2:10), \item{type}{"log" if \code{expr} has been normalized and log-transformed (default), or "count" (default) \code{expr} contains the raw counts.} -\item{nfold}{Number of folds} +\item{Ks}{A sequence of cluster numbers} -\item{nCV}{Number of repetitions} +\item{nfold}{(optional) Number of folds, default is 10} -\item{Ks}{A sequence of cluster numbers} +\item{nCV}{(optional) Number of repetitions, default is 10} + +\item{mc.cores}{(optional) Number of cores for parallelization, default is 1 without parallelization} -\item{mc.cores}{Number of cores for parallelization} +\item{seeds}{(optional) A list of seeds to be used, with length nCV, default is NULL} -\item{verbose}{Whether to print progress} +\item{verbose}{(optional) Whether to print progress, default is TRUE} } \description{ -Cross Validation for SOUP +Cross validation for SOUP } diff --git a/man/findPure.Rd b/man/findPure.Rd index 24f9fef..ab0357a 100644 --- a/man/findPure.Rd +++ b/man/findPure.Rd @@ -10,7 +10,7 @@ findPure(expr, type = "log", ext.prop = NULL, pure.prop = 0.5) \item{expr}{a cell-by-gene expression matrix, either the raw counts or log-transformed expressions.} \item{type}{"log" if \code{expr} has been normalized and log-transformed (default), -or "count" (default) \code{expr} contains the raw counts.} +or "count" if \code{expr} contains the raw counts.} \item{ext.prop}{(optional) the proportion of extreme neighbors for each cell, such that \code{ext.prop*n.cells} is roughly the number of pure cells \emph{per cluster}. By default, \code{ext.prop=0.1} for less than 1,000 cells, and \code{ext.prop=0.05} for larger datasets.} diff --git a/man/getClusterOrder.Rd b/man/getClusterOrder.Rd index c366414..707778a 100644 --- a/man/getClusterOrder.Rd +++ b/man/getClusterOrder.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/SOUPtime.R \name{getClusterOrder} \alias{getClusterOrder} -\title{Order Clusters} +\title{Order clusters} \usage{ getClusterOrder(cor.center, k.start = 1) } diff --git a/man/getMajorMatrix.Rd b/man/getMajorMatrix.Rd index 7fb9fa5..b84dc21 100644 --- a/man/getMajorMatrix.Rd +++ b/man/getMajorMatrix.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/utils_plot.R \name{getMajorMatrix} \alias{getMajorMatrix} -\title{Get the SOUP majority matrix} +\title{Get the SOUP hard assignment matrix} \usage{ getMajorMatrix(memberships, Ks, cell.type, ref.lab = "Reference") } \description{ -Get the SOUP majority matrix +Get the SOUP hard assignment matrix } diff --git a/man/getTimeline.Rd b/man/getTimeline.Rd index af7ccb3..eeef13e 100644 --- a/man/getTimeline.Rd +++ b/man/getTimeline.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/SOUPtime.R \name{getTimeline} \alias{getTimeline} -\title{Estimate Developmental Timeline} +\title{Estimate developmental trajectory} \usage{ getTimeline(membership, centers, k.start = NULL, k.end = NULL) } diff --git a/man/heatmapKseq.Rd b/man/heatmapKseq.Rd index 9d594cf..59b4e89 100644 --- a/man/heatmapKseq.Rd +++ b/man/heatmapKseq.Rd @@ -4,7 +4,8 @@ \alias{heatmapKseq} \title{Visualize SOUP hard assignments.} \usage{ -heatmapKseq(memberships, Ks, cell.type, ref.lab = "Reference") +heatmapKseq(memberships, Ks, cell.type, ref.lab = "Reference", + font.size = 10) } \arguments{ \item{memberships}{A list of membership matrices of different K} diff --git a/man/plotGeneTimeline.Rd b/man/plotGeneTimeline.Rd index fa4a27e..8d5b5fd 100644 --- a/man/plotGeneTimeline.Rd +++ b/man/plotGeneTimeline.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/utils_plot.R \name{plotGeneTimeline} \alias{plotGeneTimeline} -\title{Plot Gene Expression along Trajectory} +\title{Plot gene expression along trajectory} \usage{ plotGeneTimeline(expr, marker.gene, timeline, x.title = "SOUP trajectory", - y.title = "Expression", title = "") + y.title = "Expression", title = "", font.size = 10) } \arguments{ \item{expr}{The cell-by-gene expression matrix to be visualized} @@ -24,5 +24,5 @@ plotGeneTimeline(expr, marker.gene, timeline, x.title = "SOUP trajectory", A ggplot object } \description{ -Plot Gene Expression along Trajectory +Plot gene expression along trajectory } diff --git a/man/plotMultipleGeneTimeline.Rd b/man/plotMultipleGeneTimeline.Rd index 86ba3ef..b930551 100644 --- a/man/plotMultipleGeneTimeline.Rd +++ b/man/plotMultipleGeneTimeline.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils_plot.R \name{plotMultipleGeneTimeline} \alias{plotMultipleGeneTimeline} -\title{Plot Multiple Gene Expressions along Trajectory} +\title{Plot multiple gene expressions along trajectory} \usage{ plotMultipleGeneTimeline(expr, genelist, timeline, x.title = "SOUP trajectory", y.title = "Expression", nrow = NULL, diff --git a/man/predictTheta.Rd b/man/predictTheta.Rd index b6c3084..118537a 100644 --- a/man/predictTheta.Rd +++ b/man/predictTheta.Rd @@ -6,6 +6,14 @@ \usage{ predictTheta(new.expr, t.centers) } +\arguments{ +\item{new.expr}{cell-by-gene expression matrix} + +\item{t.centers}{transposed center matrix, n.gene-by-K} +} +\value{ +The predicted membership matrix. +} \description{ Predict the membership for new data points } diff --git a/man/projMembership.Rd b/man/projMembership.Rd new file mode 100644 index 0000000..7fb0581 --- /dev/null +++ b/man/projMembership.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SOUP.R +\name{projMembership} +\alias{projMembership} +\title{Clean up membership matrix} +\usage{ +projMembership(theta) +} +\arguments{ +\item{theta}{The estimated raw theta} +} +\value{ +The cleaned-up membership matrix. +} +\description{ +Clean up membership matrix +} diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd index 3de0702..15156ca 100644 --- a/man/selectGenes.Rd +++ b/man/selectGenes.Rd @@ -11,7 +11,7 @@ selectGenes(expr, type = "count", SPCA = TRUE, DESCEND = TRUE, \item{expr}{a cell-by-gene expression matrix, either the raw counts or log-transformed expressions.} \item{type}{"count" if \code{expr} contains the raw counts (default), -or "log" if \code{expr} has been normalized and log-transformed (default).} +or "log" if \code{expr} has been normalized and log-transformed.} \item{SPCA}{boolean, whether to use SPCA or not.}