From 64872ac1e1736882182846c4afed8a0674418057 Mon Sep 17 00:00:00 2001
From: Lingxue Zhu <lingxue.zhu@gmail.com>
Date: Mon, 19 Mar 2018 00:16:50 -0400
Subject: [PATCH] clean up documentation

---
 R/SOUP.R                        | 16 +++++++--------
 R/SOUPtime.R                    |  4 ++--
 R/cvSOUP.R                      | 35 ++++++++++++++++++---------------
 R/geneSelect.R                  |  6 +++---
 R/utils_plot.R                  |  6 +++---
 man/DESCENDselect.Rd            |  4 ++--
 man/SOUP.Rd                     |  8 +-------
 man/cv.error.SOUP.Rd            | 16 +++++++--------
 man/cvSOUP.Rd                   | 20 ++++++++++---------
 man/findPure.Rd                 |  2 +-
 man/getClusterOrder.Rd          |  2 +-
 man/getMajorMatrix.Rd           |  4 ++--
 man/getTimeline.Rd              |  2 +-
 man/heatmapKseq.Rd              |  3 ++-
 man/plotGeneTimeline.Rd         |  6 +++---
 man/plotMultipleGeneTimeline.Rd |  2 +-
 man/predictTheta.Rd             |  8 ++++++++
 man/projMembership.Rd           | 17 ++++++++++++++++
 man/selectGenes.Rd              |  2 +-
 19 files changed, 93 insertions(+), 70 deletions(-)
 create mode 100644 man/projMembership.Rd

diff --git a/R/SOUP.R b/R/SOUP.R
index 771fb79..9b94ed0 100644
--- a/R/SOUP.R
+++ b/R/SOUP.R
@@ -6,7 +6,7 @@
 #' @param expr a cell-by-gene expression matrix, either the raw counts or log-transformed expressions. 
 #' @param Ks number of clusters, can be a single integer or a list of integers.
 #' @param type "log" if \code{expr} has been normalized and log-transformed (default),
-#'     or "count" (default) \code{expr} contains the raw counts.
+#'     or "count" if \code{expr} contains the raw counts.
 #'     It is recommended to use the log scale, which usually gives better results in practice.
 #' @param i.pure (optional) the indices of the pure cells. By default is \code{NULL}, and SOUP will infer the pure list.
 #' If the list is already known (for example, from previous runs), then providing it will reduce the computation time.
@@ -24,13 +24,7 @@
 #'   \item{i.pure}{the indices of pure cells with the highest purity scores.}
 #' }
 #' 
-#' @examples 
-#' select.genes = zeisel$select.genes
-#' counts = zeisel$counts[, colnames(counts) %in% select.genes]
-#' soup.out = SOUP(counts, Ks=7, type="count")
-#' 
 #' @export  
-
 SOUP <- function(expr, Ks=3, 
                  type="log", 
                  i.pure=NULL, ext.prop=NULL, pure.prop=0.5,
@@ -108,7 +102,7 @@ SOUP <- function(expr, Ks=3,
 #' 
 #' @param expr a cell-by-gene expression matrix, either the raw counts or log-transformed expressions. 
 #' @param type "log" if \code{expr} has been normalized and log-transformed (default),
-#'     or "count" (default) \code{expr} contains the raw counts.
+#'     or "count" if \code{expr} contains the raw counts.
 #' @param ext.prop (optional) the proportion of extreme neighbors for each cell, such that \code{ext.prop*n.cells} is roughly the number of pure cells \emph{per cluster}. 
 #' By default, \code{ext.prop=0.1} for less than 1,000 cells, and \code{ext.prop=0.05} for larger datasets.
 #' @param pure.prop (optional) the proportion of pure cells in the data. By default \code{pure.prop=0.5}.
@@ -257,7 +251,11 @@ getTheta <- function(expr, i.pure, pure.cluster, G) {
   
 }
 
-
+#' Clean up membership matrix
+#' 
+#' @param theta The estimated raw theta
+#' 
+#' @return The cleaned-up membership matrix.
 projMembership <- function(theta) {
   membership = theta
   membership[membership < 0] = 0
diff --git a/R/SOUPtime.R b/R/SOUPtime.R
index 7be58ad..0a2a20a 100644
--- a/R/SOUPtime.R
+++ b/R/SOUPtime.R
@@ -1,5 +1,5 @@
 
-#' Estimate Developmental Timeline
+#' Estimate developmental trajectory
 #' 
 #' Estimate the developmental timeline for cells from SOUP soft membership.
 #' 
@@ -37,7 +37,7 @@ getTimeline <- function(membership, centers,
   return(timeline)
 }
 
-#' Order Clusters
+#' Order clusters
 #' 
 #' Re-order cluster labels, starting from \code{k.start}, 
 #' and consecutively append the next one with the highest correlation in cluster centers.
diff --git a/R/cvSOUP.R b/R/cvSOUP.R
index 82413e4..cd4c407 100644
--- a/R/cvSOUP.R
+++ b/R/cvSOUP.R
@@ -1,20 +1,20 @@
-#' Cross Validation for SOUP
+#' Cross validation for SOUP
 #' 
 #' @param expr A cell-by-gene expression matrix, either the raw counts or log-transformed expressions. 
 #' @param type "log" if \code{expr} has been normalized and log-transformed (default),
 #'     or "count" (default) \code{expr} contains the raw counts.
-#' @param nfold Number of folds
-#' @param nCV Number of repetitions
 #' @param Ks A sequence of cluster numbers
-#' @param mc.cores Number of cores for parallelization
-#' @param verbose Whether to print progress
+#' @param nfold (optional) Number of folds, default is 10
+#' @param nCV (optional) Number of repetitions, default is 10
+#' @param mc.cores (optional) Number of cores for parallelization, default is 1 without parallelization
+#' @param seeds (optional) A list of seeds to be used, with length nCV, default is NULL
+#' @param verbose (optional) Whether to print progress, default is TRUE
 #' 
 #' @export
 #' 
-cvSOUP <- function(expr, type="log", 
-                   nfold=10, nCV=10, Ks=c(2:10), mc.cores=10,
-                   seeds=NULL,
-                   verbose=TRUE) {
+cvSOUP <- function(expr, type="log", Ks=c(2:10), 
+                   nfold=10, nCV=10, mc.cores=1,
+                   seeds=NULL, verbose=TRUE) {
   
   cv.errors = matrix(NA, nrow=nCV, ncol=length(Ks))
   cv.sds = matrix(NA, nrow=nCV, ncol=length(Ks))
@@ -48,20 +48,19 @@ cvSOUP <- function(expr, type="log",
               K.cv = K.cv))
 }
 
-#' Compute Cross Validation Errors
+#' Compute cross validation errors
 #' 
 #' @param expr A cell-by-gene expression matrix, either the raw counts or log-transformed expressions. 
 #' @param type "log" if \code{expr} has been normalized and log-transformed (default),
 #'     or "count" (default) \code{expr} contains the raw counts.
-#' @param nfold Number of folds
 #' @param Ks A sequence of cluster numbers
-#' @param seed (optional) random seed
-#' @param mc.cores Number of cores for parallelization
+#' @param nfold (optional) Number of folds, default is 10
+#' @param seed (optional) random seed, default is NULL
+#' @param mc.cores (optional) Number of cores for parallelization, default is 1 without parallelization
 #' 
 #' @export
-cv.error.SOUP <- function(expr, type="log", 
-                          nfold=10, Ks=c(2:10), seed=NULL,
-                          mc.cores=2) {
+cv.error.SOUP <- function(expr, type="log", Ks=c(2:10), 
+                          nfold=10, seed=NULL, mc.cores=1) {
   
   ## cross validation
   doCV <- function(fold, nfold, i.permute.ind, 
@@ -116,6 +115,10 @@ cv.error.SOUP <- function(expr, type="log",
 
 #' Predict the membership for new data points
 #' 
+#' @param new.expr cell-by-gene expression matrix
+#' @param t.centers transposed center matrix, n.gene-by-K
+#' 
+#' @return The predicted membership matrix.
 #' @export
 #' 
 predictTheta <- function(new.expr, t.centers) {
diff --git a/R/geneSelect.R b/R/geneSelect.R
index 640c0d5..b94ff62 100644
--- a/R/geneSelect.R
+++ b/R/geneSelect.R
@@ -4,7 +4,7 @@
 #' 
 #' @param expr a cell-by-gene expression matrix, either the raw counts or log-transformed expressions. 
 #' @param type "count" if \code{expr} contains the raw counts (default), 
-#'      or "log" if \code{expr} has been normalized and log-transformed (default).
+#'      or "log" if \code{expr} has been normalized and log-transformed.
 #' @param SPCA boolean, whether to use SPCA or not.
 #' @param DESCEND boolean, whether to use DESCEND or not.
 #' @param n.cores the number of cores used for parallel computing of DESCEND. 
@@ -86,9 +86,9 @@ selectGenes <- function(expr, type="count",
 #' Select highly variable genes for clustering using DESCEND.
 #' 
 #' @param counts the cell-by-gene expression counts. 
-#'    Note that DESCEND uses a Poisson model, so the count data should be provided (without normalization or log transformation).
+#'    Note that DESCEND uses a Poisson model, so the count data should be provided.
 #' @param n.cores the number of cores used for parallel computing. DESCEND can be slow so parallelization is highly recommended.
-#' @param threshold the threshold for Gini index. Higer threshold leads to fewer selected genes.
+#' @param threshold (optional) the threshold for Gini index, default is 3. Higer threshold leads to fewer selected genes.
 #' 
 #' @return A list containing \describe{
 #'   \item{select.genes}{the names of selected genes, ordered by decreasing scores.}
diff --git a/R/utils_plot.R b/R/utils_plot.R
index 9e02374..629b285 100644
--- a/R/utils_plot.R
+++ b/R/utils_plot.R
@@ -48,7 +48,7 @@ heatmapKseq <- function(memberships, Ks, cell.type, ref.lab="Reference",
 }
 
 
-#' Get the SOUP majority matrix
+#' Get the SOUP hard assignment matrix
 #' 
 #' @export
 #' 
@@ -169,7 +169,7 @@ plotContTable <- function(est_label, true_label, short.names=NULL, xlab="Referen
   return(g)
 }
 
-#' Plot Gene Expression along Trajectory
+#' Plot gene expression along trajectory
 #' 
 #' @param expr The cell-by-gene expression matrix to be visualized
 #' @param marker.gene The gene to be visualized; must be one of the columns of \code{expr}
@@ -203,7 +203,7 @@ plotGeneTimeline <- function(expr, marker.gene, timeline,
   return(g)
 }
 
-#' Plot Multiple Gene Expressions along Trajectory
+#' Plot multiple gene expressions along trajectory
 #' 
 #' Expression levels of multiple genes along trajectory, arranged on a grid of plots.
 #' 
diff --git a/man/DESCENDselect.Rd b/man/DESCENDselect.Rd
index 1be6427..5b2f6d5 100644
--- a/man/DESCENDselect.Rd
+++ b/man/DESCENDselect.Rd
@@ -8,11 +8,11 @@ DESCENDselect(counts, n.cores = 1, threshold = 3)
 }
 \arguments{
 \item{counts}{the cell-by-gene expression counts. 
-Note that DESCEND uses a Poisson model, so the count data should be provided (without normalization or log transformation).}
+Note that DESCEND uses a Poisson model, so the count data should be provided.}
 
 \item{n.cores}{the number of cores used for parallel computing. DESCEND can be slow so parallelization is highly recommended.}
 
-\item{threshold}{the threshold for Gini index. Higer threshold leads to fewer selected genes.}
+\item{threshold}{(optional) the threshold for Gini index, default is 3. Higer threshold leads to fewer selected genes.}
 }
 \value{
 A list containing \describe{
diff --git a/man/SOUP.Rd b/man/SOUP.Rd
index def2528..3118ba8 100644
--- a/man/SOUP.Rd
+++ b/man/SOUP.Rd
@@ -13,7 +13,7 @@ SOUP(expr, Ks = 3, type = "log", i.pure = NULL, ext.prop = NULL,
 \item{Ks}{number of clusters, can be a single integer or a list of integers.}
 
 \item{type}{"log" if \code{expr} has been normalized and log-transformed (default),
-or "count" (default) \code{expr} contains the raw counts.
+or "count" if \code{expr} contains the raw counts.
 It is recommended to use the log scale, which usually gives better results in practice.}
 
 \item{i.pure}{(optional) the indices of the pure cells. By default is \code{NULL}, and SOUP will infer the pure list.
@@ -39,9 +39,3 @@ A list containing  \describe{
 \description{
 A semi-soft clustering algorithm for single cells.
 }
-\examples{
-select.genes = zeisel$select.genes
-counts = zeisel$counts[, colnames(counts) \%in\% select.genes]
-soup.out = SOUP(counts, Ks=7, type="count")
-
-}
diff --git a/man/cv.error.SOUP.Rd b/man/cv.error.SOUP.Rd
index be76f14..7dac271 100644
--- a/man/cv.error.SOUP.Rd
+++ b/man/cv.error.SOUP.Rd
@@ -2,10 +2,10 @@
 % Please edit documentation in R/cvSOUP.R
 \name{cv.error.SOUP}
 \alias{cv.error.SOUP}
-\title{Compute Cross Validation Errors}
+\title{Compute cross validation errors}
 \usage{
-cv.error.SOUP(expr, type = "log", nfold = 10, Ks = c(2:10), seed = NULL,
-  mc.cores = 2)
+cv.error.SOUP(expr, type = "log", Ks = c(2:10), nfold = 10, seed = NULL,
+  mc.cores = 1)
 }
 \arguments{
 \item{expr}{A cell-by-gene expression matrix, either the raw counts or log-transformed expressions.}
@@ -13,14 +13,14 @@ cv.error.SOUP(expr, type = "log", nfold = 10, Ks = c(2:10), seed = NULL,
 \item{type}{"log" if \code{expr} has been normalized and log-transformed (default),
 or "count" (default) \code{expr} contains the raw counts.}
 
-\item{nfold}{Number of folds}
-
 \item{Ks}{A sequence of cluster numbers}
 
-\item{seed}{(optional) random seed}
+\item{nfold}{(optional) Number of folds, default is 10}
+
+\item{seed}{(optional) random seed, default is NULL}
 
-\item{mc.cores}{Number of cores for parallelization}
+\item{mc.cores}{(optional) Number of cores for parallelization, default is 1 without parallelization}
 }
 \description{
-Compute Cross Validation Errors
+Compute cross validation errors
 }
diff --git a/man/cvSOUP.Rd b/man/cvSOUP.Rd
index 2507b81..87d5ae8 100644
--- a/man/cvSOUP.Rd
+++ b/man/cvSOUP.Rd
@@ -2,10 +2,10 @@
 % Please edit documentation in R/cvSOUP.R
 \name{cvSOUP}
 \alias{cvSOUP}
-\title{Cross Validation for SOUP}
+\title{Cross validation for SOUP}
 \usage{
-cvSOUP(expr, type = "log", nfold = 10, nCV = 10, Ks = c(2:10),
-  mc.cores = 10, verbose = TRUE)
+cvSOUP(expr, type = "log", Ks = c(2:10), nfold = 10, nCV = 10,
+  mc.cores = 1, seeds = NULL, verbose = TRUE)
 }
 \arguments{
 \item{expr}{A cell-by-gene expression matrix, either the raw counts or log-transformed expressions.}
@@ -13,16 +13,18 @@ cvSOUP(expr, type = "log", nfold = 10, nCV = 10, Ks = c(2:10),
 \item{type}{"log" if \code{expr} has been normalized and log-transformed (default),
 or "count" (default) \code{expr} contains the raw counts.}
 
-\item{nfold}{Number of folds}
+\item{Ks}{A sequence of cluster numbers}
 
-\item{nCV}{Number of repetitions}
+\item{nfold}{(optional) Number of folds, default is 10}
 
-\item{Ks}{A sequence of cluster numbers}
+\item{nCV}{(optional) Number of repetitions, default is 10}
+
+\item{mc.cores}{(optional) Number of cores for parallelization, default is 1 without parallelization}
 
-\item{mc.cores}{Number of cores for parallelization}
+\item{seeds}{(optional) A list of seeds to be used, with length nCV, default is NULL}
 
-\item{verbose}{Whether to print progress}
+\item{verbose}{(optional) Whether to print progress, default is TRUE}
 }
 \description{
-Cross Validation for SOUP
+Cross validation for SOUP
 }
diff --git a/man/findPure.Rd b/man/findPure.Rd
index 24f9fef..ab0357a 100644
--- a/man/findPure.Rd
+++ b/man/findPure.Rd
@@ -10,7 +10,7 @@ findPure(expr, type = "log", ext.prop = NULL, pure.prop = 0.5)
 \item{expr}{a cell-by-gene expression matrix, either the raw counts or log-transformed expressions.}
 
 \item{type}{"log" if \code{expr} has been normalized and log-transformed (default),
-or "count" (default) \code{expr} contains the raw counts.}
+or "count" if \code{expr} contains the raw counts.}
 
 \item{ext.prop}{(optional) the proportion of extreme neighbors for each cell, such that \code{ext.prop*n.cells} is roughly the number of pure cells \emph{per cluster}. 
 By default, \code{ext.prop=0.1} for less than 1,000 cells, and \code{ext.prop=0.05} for larger datasets.}
diff --git a/man/getClusterOrder.Rd b/man/getClusterOrder.Rd
index c366414..707778a 100644
--- a/man/getClusterOrder.Rd
+++ b/man/getClusterOrder.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/SOUPtime.R
 \name{getClusterOrder}
 \alias{getClusterOrder}
-\title{Order Clusters}
+\title{Order clusters}
 \usage{
 getClusterOrder(cor.center, k.start = 1)
 }
diff --git a/man/getMajorMatrix.Rd b/man/getMajorMatrix.Rd
index 7fb9fa5..b84dc21 100644
--- a/man/getMajorMatrix.Rd
+++ b/man/getMajorMatrix.Rd
@@ -2,10 +2,10 @@
 % Please edit documentation in R/utils_plot.R
 \name{getMajorMatrix}
 \alias{getMajorMatrix}
-\title{Get the SOUP majority matrix}
+\title{Get the SOUP hard assignment matrix}
 \usage{
 getMajorMatrix(memberships, Ks, cell.type, ref.lab = "Reference")
 }
 \description{
-Get the SOUP majority matrix
+Get the SOUP hard assignment matrix
 }
diff --git a/man/getTimeline.Rd b/man/getTimeline.Rd
index af7ccb3..eeef13e 100644
--- a/man/getTimeline.Rd
+++ b/man/getTimeline.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/SOUPtime.R
 \name{getTimeline}
 \alias{getTimeline}
-\title{Estimate Developmental Timeline}
+\title{Estimate developmental trajectory}
 \usage{
 getTimeline(membership, centers, k.start = NULL, k.end = NULL)
 }
diff --git a/man/heatmapKseq.Rd b/man/heatmapKseq.Rd
index 9d594cf..59b4e89 100644
--- a/man/heatmapKseq.Rd
+++ b/man/heatmapKseq.Rd
@@ -4,7 +4,8 @@
 \alias{heatmapKseq}
 \title{Visualize SOUP hard assignments.}
 \usage{
-heatmapKseq(memberships, Ks, cell.type, ref.lab = "Reference")
+heatmapKseq(memberships, Ks, cell.type, ref.lab = "Reference",
+  font.size = 10)
 }
 \arguments{
 \item{memberships}{A list of membership matrices of different K}
diff --git a/man/plotGeneTimeline.Rd b/man/plotGeneTimeline.Rd
index fa4a27e..8d5b5fd 100644
--- a/man/plotGeneTimeline.Rd
+++ b/man/plotGeneTimeline.Rd
@@ -2,10 +2,10 @@
 % Please edit documentation in R/utils_plot.R
 \name{plotGeneTimeline}
 \alias{plotGeneTimeline}
-\title{Plot Gene Expression along Trajectory}
+\title{Plot gene expression along trajectory}
 \usage{
 plotGeneTimeline(expr, marker.gene, timeline, x.title = "SOUP trajectory",
-  y.title = "Expression", title = "")
+  y.title = "Expression", title = "", font.size = 10)
 }
 \arguments{
 \item{expr}{The cell-by-gene expression matrix to be visualized}
@@ -24,5 +24,5 @@ plotGeneTimeline(expr, marker.gene, timeline, x.title = "SOUP trajectory",
 A ggplot object
 }
 \description{
-Plot Gene Expression along Trajectory
+Plot gene expression along trajectory
 }
diff --git a/man/plotMultipleGeneTimeline.Rd b/man/plotMultipleGeneTimeline.Rd
index 86ba3ef..b930551 100644
--- a/man/plotMultipleGeneTimeline.Rd
+++ b/man/plotMultipleGeneTimeline.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/utils_plot.R
 \name{plotMultipleGeneTimeline}
 \alias{plotMultipleGeneTimeline}
-\title{Plot Multiple Gene Expressions along Trajectory}
+\title{Plot multiple gene expressions along trajectory}
 \usage{
 plotMultipleGeneTimeline(expr, genelist, timeline,
   x.title = "SOUP trajectory", y.title = "Expression", nrow = NULL,
diff --git a/man/predictTheta.Rd b/man/predictTheta.Rd
index b6c3084..118537a 100644
--- a/man/predictTheta.Rd
+++ b/man/predictTheta.Rd
@@ -6,6 +6,14 @@
 \usage{
 predictTheta(new.expr, t.centers)
 }
+\arguments{
+\item{new.expr}{cell-by-gene expression matrix}
+
+\item{t.centers}{transposed center matrix, n.gene-by-K}
+}
+\value{
+The predicted membership matrix.
+}
 \description{
 Predict the membership for new data points
 }
diff --git a/man/projMembership.Rd b/man/projMembership.Rd
new file mode 100644
index 0000000..7fb0581
--- /dev/null
+++ b/man/projMembership.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SOUP.R
+\name{projMembership}
+\alias{projMembership}
+\title{Clean up membership matrix}
+\usage{
+projMembership(theta)
+}
+\arguments{
+\item{theta}{The estimated raw theta}
+}
+\value{
+The cleaned-up membership matrix.
+}
+\description{
+Clean up membership matrix
+}
diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd
index 3de0702..15156ca 100644
--- a/man/selectGenes.Rd
+++ b/man/selectGenes.Rd
@@ -11,7 +11,7 @@ selectGenes(expr, type = "count", SPCA = TRUE, DESCEND = TRUE,
 \item{expr}{a cell-by-gene expression matrix, either the raw counts or log-transformed expressions.}
 
 \item{type}{"count" if \code{expr} contains the raw counts (default), 
-or "log" if \code{expr} has been normalized and log-transformed (default).}
+or "log" if \code{expr} has been normalized and log-transformed.}
 
 \item{SPCA}{boolean, whether to use SPCA or not.}