From bc585682eb2492f3a3ca4031a8da4acde6b7ff54 Mon Sep 17 00:00:00 2001 From: Xiuwen Zheng Date: Fri, 23 Feb 2024 01:11:49 -0600 Subject: [PATCH] snpgdsAdmixProp() --- NEWS | 2 ++ R/PCA.R | 14 ++++++-------- README.md | 2 +- man/snpgdsAdmixPlot.Rd | 18 +++++++++++++----- man/snpgdsAdmixProp.Rd | 16 +++++++--------- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/NEWS b/NEWS index 8f4aa85..e967f23 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ UTILITIES o show a progress bar in `snpgdsLDpruning()` when 'verbose=TRUE' + o update the help files of `snpgdsAdmixProp()` and `snpgdsAdmixPlot()` + CHANGES IN VERSION 1.34.0 ------------------------- diff --git a/R/PCA.R b/R/PCA.R index faf7e24..39c1540 100755 --- a/R/PCA.R +++ b/R/PCA.R @@ -6,7 +6,7 @@ # A High-performance Computing Toolset for Relatedness and # Principal Component Analysis of SNP Data # -# Copyright (C) 2011 - 2020 Xiuwen Zheng +# Copyright (C) 2011 - 2024 Xiuwen Zheng # License: GPL-3 # @@ -343,7 +343,6 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE) # 'sample.id' and 'eigenvect' should exist stopifnot(!is.null(eigobj$sample.id)) stopifnot(is.matrix(eigobj$eigenvect)) - stopifnot(is.list(groups)) stopifnot(length(groups) > 1) if (length(groups) > (ncol(eigobj$eigenvect)+1)) @@ -374,8 +373,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE) grlist <- c(grlist, groups[[i]]) } - stopifnot(is.logical(bound) & is.vector(bound)) - stopifnot(length(bound) == 1) + stopifnot(is.logical(bound), length(bound)==1L) # calculate ... @@ -393,7 +391,7 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE) } # check - if (any(is.na(mat))) + if (anyNA(mat)) stop("The eigenvectors should not have missing value!") T.P <- mat[length(groups), ] @@ -407,11 +405,11 @@ snpgdsAdmixProp <- function(eigobj, groups, bound=FALSE) rownames(new.p) <- eigobj$sample.id # whether bounded - if (bound) + if (isTRUE(bound)) { new.p[new.p < 0] <- 0 - r <- 1.0 / rowSums(new.p) - new.p <- new.p * r + new.p[new.p > 1] <- 1 + new.p <- new.p / rowSums(new.p) } new.p diff --git a/README.md b/README.md index 260c266..5c844fb 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ The GDS format offers the efficient operations specifically designed for integer ## Bioconductor -Release Version: v1.36.0 +Release Version: v1.36.1 [http://www.bioconductor.org/packages/SNPRelate](http://www.bioconductor.org/packages/SNPRelate) diff --git a/man/snpgdsAdmixPlot.Rd b/man/snpgdsAdmixPlot.Rd index ad4f01a..2d9d947 100644 --- a/man/snpgdsAdmixPlot.Rd +++ b/man/snpgdsAdmixPlot.Rd @@ -15,12 +15,15 @@ snpgdsAdmixTable(propmat, group, sort=FALSE) \arguments{ \item{propmat}{a sample-by-ancestry matrix of proportion estimates, returned from \code{\link{snpgdsAdmixProp}()}} - \item{group}{a character vector of a factor according to the samples + \item{group}{a character vector of a factor according to the rows in \code{propmat}} - \item{col}{specify colors} + \item{col}{specify colors; if \code{group} is not specified, it is a color + for each sample; otherwise specify colors for the groups} \item{multiplot}{single plot or multiple plots} - \item{showgrp}{show group names in the plot} - \item{shownum}{\code{TRUE}: show the number of each group in the figure} + \item{showgrp}{show group names in the plot; applicable when \code{group} + is used} + \item{shownum}{\code{TRUE}: show the number of each group on the X-axis + in the figure; applicable when \code{group} is used} \item{ylim}{\code{TRUE}: y-axis is limited to [0, 1]; \code{FALSE}: \code{ylim <- range(propmat)}; a 2-length numeric vector: \code{ylim} used in \code{plot()}} @@ -71,11 +74,16 @@ groups <- list(CEU = samp.id[pop_code == "CEU"], YRI = samp.id[pop_code == "YRI"], CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))]) -prop <- snpgdsAdmixProp(RV, groups=groups) +prop <- snpgdsAdmixProp(RV, groups=groups, bound=TRUE) # draw snpgdsAdmixPlot(prop, group=pop_code) +# use user-defined colors for the groups +snpgdsAdmixPlot(prop, group=pop_code, multiplot=FALSE, col=c(3,2,4)) + +snpgdsAdmixTable(prop, group=pop_code) + # close the genotype file snpgdsClose(genofile) } diff --git a/man/snpgdsAdmixProp.Rd b/man/snpgdsAdmixProp.Rd index 6c44c01..0b4729c 100644 --- a/man/snpgdsAdmixProp.Rd +++ b/man/snpgdsAdmixProp.Rd @@ -16,20 +16,17 @@ snpgdsAdmixProp(eigobj, groups, bound=FALSE) \item{groups}{a list of sample IDs, such like \code{groups = list( CEU = c("NA0101", "NA1022", ...), YRI = c("NAxxxx", ...), Asia = c("NA1234", ...))}} - \item{bound}{if \code{TRUE}, the estimates are bounded so that no - component < 0 or > 1, and the sum of proportions is one} + \item{bound}{if \code{TRUE}, the estimates are bounded in [0, 1], and + the sum of proportions is one; \code{bound=FALSE} for unbiased + estimates} } \details{ The minor allele frequency and missing rate for each SNP passed in \code{snp.id} are calculated over all the samples in \code{sample.id}. } \value{ - Return a \code{snpgdsEigMixClass} object, and it is a list: - \item{sample.id}{the sample ids used in the analysis} - \item{snp.id}{the SNP ids used in the analysis} - \item{eigenval}{eigenvalues} - \item{eigenvect}{eigenvactors, "# of samples" x "eigen.cnt"} - \item{ibdmat}{the IBD matrix} + Return a matrix of ancestral proportions with rows for study individuals +(\code{rownames()} is sample ID). } \references{ @@ -72,7 +69,7 @@ head(tab) # draw plot(tab$EV2, tab$EV1, col=as.integer(tab$pop), xlab="eigenvector 2", ylab="eigenvector 1") -legend("topleft", legend=levels(tab$pop), pch="o", col=1:4) +legend("bottomleft", legend=levels(tab$pop), pch="o", col=1:4) # define groups @@ -81,6 +78,7 @@ groups <- list(CEU = samp.id[pop_code == "CEU"], CHB = samp.id[is.element(pop_code, c("HCB", "JPT"))]) prop <- snpgdsAdmixProp(RV, groups=groups) +head(prop) # draw plot(prop[, "YRI"], prop[, "CEU"], col=as.integer(tab$pop),