From bc585682eb2492f3a3ca4031a8da4acde6b7ff54 Mon Sep 17 00:00:00 2001
From: Xiuwen Zheng <zhengxwen@users.noreply.github.com>
Date: Fri, 23 Feb 2024 01:11:49 -0600
Subject: [PATCH] snpgdsAdmixProp()

---
 NEWS                   |  2 ++
 R/PCA.R                | 14 ++++++--------
 README.md              |  2 +-
 man/snpgdsAdmixPlot.Rd | 18 +++++++++++++-----
 man/snpgdsAdmixProp.Rd | 16 +++++++---------
 5 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/NEWS b/NEWS
index 8f4aa85..e967f23 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@ UTILITIES
 
     o show a progress bar in `snpgdsLDpruning()` when 'verbose=TRUE'
 
+    o update the help files of `snpgdsAdmixProp()` and `snpgdsAdmixPlot()`
+
 
 CHANGES IN VERSION 1.34.0
 -------------------------
diff --git a/R/PCA.R b/R/PCA.R
index faf7e24..39c1540 100755
--- a/R/PCA.R
+++ b/R/PCA.R
@@ -6,7 +6,7 @@
 #     A High-performance Computing Toolset for Relatedness and
 # Principal Component Analysis of SNP Data
 #
-# Copyright (C) 2011 - 2020        Xiuwen Zheng
+# Copyright (C) 2011 - 2024        Xiuwen Zheng
 # License: GPL-3
 #
 
@@ -343,7 +343,6 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
     # 'sample.id' and 'eigenvect' should exist
     stopifnot(!is.null(eigobj$sample.id))
     stopifnot(is.matrix(eigobj$eigenvect))
-
     stopifnot(is.list(groups))
     stopifnot(length(groups) > 1)
     if (length(groups) > (ncol(eigobj$eigenvect)+1))
@@ -374,8 +373,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
         grlist <- c(grlist, groups[[i]])
     }
 
-    stopifnot(is.logical(bound) & is.vector(bound))
-    stopifnot(length(bound) == 1)
+    stopifnot(is.logical(bound), length(bound)==1L)
 
     # calculate ...
 
@@ -393,7 +391,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
     }
 
     # check
-    if (any(is.na(mat)))
+    if (anyNA(mat))
         stop("The eigenvectors should not have missing value!")
 
     T.P <- mat[length(groups), ]
@@ -407,11 +405,11 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE)
     rownames(new.p) <- eigobj$sample.id
 
     # whether bounded
-    if (bound)
+    if (isTRUE(bound))
     {
         new.p[new.p < 0] <- 0
-        r <- 1.0 / rowSums(new.p)
-        new.p <- new.p * r
+        new.p[new.p > 1] <- 1
+        new.p <- new.p / rowSums(new.p)
     }
 
     new.p
diff --git a/README.md b/README.md
index 260c266..5c844fb 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ The GDS format offers the efficient operations specifically designed for integer
 
 ## Bioconductor
 
-Release Version: v1.36.0
+Release Version: v1.36.1
 
 [http://www.bioconductor.org/packages/SNPRelate](http://www.bioconductor.org/packages/SNPRelate)
 
diff --git a/man/snpgdsAdmixPlot.Rd b/man/snpgdsAdmixPlot.Rd
index ad4f01a..2d9d947 100644
--- a/man/snpgdsAdmixPlot.Rd
+++ b/man/snpgdsAdmixPlot.Rd
@@ -15,12 +15,15 @@ snpgdsAdmixTable(propmat, group, sort=FALSE)
 \arguments{
     \item{propmat}{a sample-by-ancestry matrix of proportion estimates,
         returned from \code{\link{snpgdsAdmixProp}()}}
-    \item{group}{a character vector of a factor according to the samples
+    \item{group}{a character vector of a factor according to the rows
         in \code{propmat}}
-    \item{col}{specify colors}
+    \item{col}{specify colors; if \code{group} is not specified, it is a color
+        for each sample; otherwise specify colors for the groups}
     \item{multiplot}{single plot or multiple plots}
-    \item{showgrp}{show group names in the plot}
-    \item{shownum}{\code{TRUE}: show the number of each group in the figure}
+    \item{showgrp}{show group names in the plot; applicable when \code{group}
+        is used}
+    \item{shownum}{\code{TRUE}: show the number of each group on the X-axis
+        in the figure; applicable when \code{group} is used}
     \item{ylim}{\code{TRUE}: y-axis is limited to [0, 1];
         \code{FALSE}: \code{ylim <- range(propmat)};
         a 2-length numeric vector: \code{ylim} used in \code{plot()}}
@@ -71,11 +74,16 @@ groups <- list(CEU = samp.id[pop_code == "CEU"],
     YRI = samp.id[pop_code == "YRI"],
     CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])
 
-prop <- snpgdsAdmixProp(RV, groups=groups)
+prop <- snpgdsAdmixProp(RV, groups=groups, bound=TRUE)
 
 # draw
 snpgdsAdmixPlot(prop, group=pop_code)
 
+# use user-defined colors for the groups
+snpgdsAdmixPlot(prop, group=pop_code, multiplot=FALSE, col=c(3,2,4))
+
+snpgdsAdmixTable(prop, group=pop_code)
+
 # close the genotype file
 snpgdsClose(genofile)
 }
diff --git a/man/snpgdsAdmixProp.Rd b/man/snpgdsAdmixProp.Rd
index 6c44c01..0b4729c 100644
--- a/man/snpgdsAdmixProp.Rd
+++ b/man/snpgdsAdmixProp.Rd
@@ -16,20 +16,17 @@ snpgdsAdmixProp(eigobj, groups, bound=FALSE)
     \item{groups}{a list of sample IDs, such like \code{groups = list(
         CEU = c("NA0101", "NA1022", ...), YRI = c("NAxxxx", ...),
         Asia = c("NA1234", ...))}}
-    \item{bound}{if \code{TRUE}, the estimates are bounded so that no
-        component < 0 or > 1, and the sum of proportions is one}
+    \item{bound}{if \code{TRUE}, the estimates are bounded in [0, 1], and
+        the sum of proportions is one; \code{bound=FALSE} for unbiased
+        estimates}
 }
 \details{
     The minor allele frequency and missing rate for each SNP passed in
 \code{snp.id} are calculated over all the samples in \code{sample.id}.
 }
 \value{
-    Return a \code{snpgdsEigMixClass} object, and it is a list:
-    \item{sample.id}{the sample ids used in the analysis}
-    \item{snp.id}{the SNP ids used in the analysis}
-    \item{eigenval}{eigenvalues}
-    \item{eigenvect}{eigenvactors, "# of samples" x "eigen.cnt"}
-    \item{ibdmat}{the IBD matrix}
+    Return a matrix of ancestral proportions with rows for study individuals
+(\code{rownames()} is sample ID).
 }
 
 \references{
@@ -72,7 +69,7 @@ head(tab)
 # draw
 plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
     xlab="eigenvector 2", ylab="eigenvector 1")
-legend("topleft", legend=levels(tab$pop), pch="o", col=1:4)
+legend("bottomleft", legend=levels(tab$pop), pch="o", col=1:4)
 
 
 # define groups
@@ -81,6 +78,7 @@ groups <- list(CEU = samp.id[pop_code == "CEU"],
     CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))])
 
 prop <- snpgdsAdmixProp(RV, groups=groups)
+head(prop)
 
 # draw
 plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),