diff --git a/.Rbuildignore b/.Rbuildignore index e005ee4..446dfe8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,5 @@ ^CRAN-SUBMISSION$ ^BGmisc\.code-workspace$ ^\.lintr$ +^CODE_OF_CONDUCT\.md +^CONTRIBUTING\.md diff --git a/NAMESPACE b/NAMESPACE index 5523f5a..206bdaa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(plotPedigree) export(related_coef) export(relatedness) export(simulatePedigree) +export(twinImpute) export(vech) import(kinship2) importFrom(stats,runif) diff --git a/NEWS.md b/NEWS.md index ba3fee5..f5929ec 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ -# BGmisc 1.0.1 - Hot fix to resolve plotPedigree wrapper function breaking for pedigrees that contained multiple families +# BGmisc 1.1.0 +* Added ability to simulate twins +# BGmisc 1.0.1 +* Hot fix to resolve plotPedigree wrapper function breaking for pedigrees that contained multiple families # BGmisc 1.0 diff --git a/R/famSizeCal.R b/R/famSizeCal.R index eedc981..1ef2b5a 100644 --- a/R/famSizeCal.R +++ b/R/famSizeCal.R @@ -74,3 +74,95 @@ famSizeCal <- function(kpc, Ngen, marR) { } return(size) } + +#' twinImpute +#' A function to impute twins in the simulated pedigree \code{data.frame}. +#' Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. +#' This is a supplementary function for \code{simulatePedigree}. +#' @param ped A \code{data.frame} in the same format as the output of \code{simulatePedigree}. +#' @param ID_twin1 A vector of \code{ID} of the first twin. +#' @param ID_twin2 A vector of \code{ID} of the second twin. +#' @param gen_twin A vector of \code{generation} of the twin to be imputed. +#' @return Returns a \code{data.frame} with MZ twins infomration added as a new column. +#' @export + +# A function to impute twins in the simulated pedigree \code{data.frame}. +# Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. +twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2){ + # a support function + resample <- function(x, ...) x[sample.int(length(x), ...)] + # Check if the ped is the same format as the output of simulatePedigree + if(paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")){ + stop("The input pedigree is not in the same format as the output of simulatePedigree") + } + ped$MZtwin <- NA_integer_ + # Check if the two IDs are provided + if(is.na(ID_twin1) || is.na(ID_twin2)){ + # Check if the generation is provided + if(is.na(gen_twin)){ + stop("You should provide either the IDs of the twins or the generation of the twins") + } else { + # Check if the generation is valid + if(gen_twin < 2 || gen_twin > max(ped$gen)){ + stop("The generation of the twins should be an integer between 2 and the maximum generation in the pedigree") + } else { + idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID),]) + usedID = c() + # randomly loop through all the indivuduals in the generation until find an individual who is the same sex and shares the same dadID and momID with another individual + for (i in 1: idx) { + cat("loop",i) + # check if i is equal to the number of individuals in the generation + usedID <- c(usedID, ID_twin1) + #print(usedID) + if(i < idx){ + # randomly select one individual from the generation + ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1) + #cat("twin1", ID_twin1, "\n") + # find one same sex sibling who has the same dadID and momID as the selected individual + twin2_Pool = ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]] + # if there is an non-NA value in the twin2_Pool, get rid of the NA value + if (all(is.na(twin2_Pool))) { + cat("twin2_Pool is all NA\n") + next + } else { + twin2_Pool <- twin2_Pool[!is.na(twin2_Pool)] + ID_twin2 <- resample( twin2_Pool , 1) + break + } + # test if the ID_twin2 is missing + # if(!is.na(ID_twin2)){ + # break + # } + } else { + # randomly select all males or females in the generation and put them in a vector + selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)] + #print(selectGender) + # randomly select two individuals from the vector + ID_DoubleTwin <- sample(selectGender, 2) + #print(ID_DoubleTwin) + # change the second person's dadID and momID to the first person's dadID and momID + ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]] + ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]] + # let the two individuals be twins! + ID_twin1 <- ID_DoubleTwin[1] + ID_twin2 <- ID_DoubleTwin[2] + break + + } + + } + # Impute the IDs of the twin in the MZtwin column + ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 + ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 + } + } +} else { + # Impute the IDs of the twin in the MZtwin column + ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 + ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 + } + cat("twin1", ID_twin1, "\n") + cat("twin2", ID_twin2, "\n") + return(ped) +} + diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R index cb2fb7a..fc09f9f 100644 --- a/R/simulatePedigree.R +++ b/R/simulatePedigree.R @@ -1,5 +1,4 @@ #' Simulate Pedigrees -#' #' This function simulates "balanced" pedigrees based on a group of parameters: #' 1) k - Kids per couple; #' 2) G - Number of generations; @@ -13,6 +12,8 @@ #' @param marR Mating rate. A numeric value ranging from 0 to 1 which determines the proportion of mated (fertilized) couples in the pedigree within each generation. For instance, marR = 0.5 suggests 50 percent of the offspring in a specific generation will be mated and have their offspring. #' @param balancedSex Not fully developed yet. Always \code{TRUE} in the current version. #' @param balancedmar Not fully developed yet. Always \code{TRUE} in the current version. +#' @param verbose logical If TRUE, print progress through stages of algorithm + #' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows: #' \itemize{ #' \item{fam: The family id of each simulated individual. It is 'fam1' in a single simulated pedigree.} @@ -30,15 +31,16 @@ simulatePedigree <- function(kpc = 3, sexR = .5, marR = 2 / 3, balancedSex = TRUE, - balancedmar = TRUE) { + balancedmar = TRUE, + verbose = FALSE) { # SexRatio: ratio of male over female in the offspring setting; used in the between generation combinations SexRatio <- sexR / (1 - sexR) # Calculate the expected family size in each generations sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR) famSizeIndex <- 1:sum(sizeGens) - - # Step 1: Let's build the connection within each generation first + if(verbose){print( + "Step 1: Let's build the connection within each generation first")} for (i in 1:Ngen) { idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = "")) # idGen <- ifelse(i==1, @@ -167,7 +169,8 @@ simulatePedigree <- function(kpc = 3, } } - # Step 2: Let's try to build connection between each two generations + if(verbose){ print( + "Step 2: Let's try to build connection between each two generations")} df_Fam$ifparent <- FALSE df_Fam$ifson <- FALSE df_Fam$ifdau <- FALSE @@ -277,8 +280,8 @@ simulatePedigree <- function(kpc = 3, df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE] df_Ngen <- df_Ngen[, -ncol(df_Ngen)] df_Fam[df_Fam$gen == i, ] <- df_Ngen - - # Step 2.2: mark a group of potential parents in the i-1 th generation + if(verbose){print( + "Step 2.2: mark a group of potential parents in the i-1 th generation")} df_Ngen <- df_Fam[df_Fam$gen == i - 1, ] df_Ngen$ifparent <- FALSE df_Ngen$ifson <- FALSE @@ -305,8 +308,8 @@ simulatePedigree <- function(kpc = 3, df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE] df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen - - # Step 2.3: connect the i and i-1 th generation + if(verbose){print( + "Step 2.3: connect the i and i-1 th generation")} if (i == 1) { next } else { diff --git a/man/simulatePedigree.Rd b/man/simulatePedigree.Rd index 7e45068..d9569b9 100644 --- a/man/simulatePedigree.Rd +++ b/man/simulatePedigree.Rd @@ -2,7 +2,12 @@ % Please edit documentation in R/simulatePedigree.R \name{simulatePedigree} \alias{simulatePedigree} -\title{Simulate Pedigrees} +\title{Simulate Pedigrees +This function simulates "balanced" pedigrees based on a group of parameters: +1) k - Kids per couple; +2) G - Number of generations; +3) p - Proportion of males in offspring; +4) r - Mating rate.} \usage{ simulatePedigree( kpc = 3, @@ -10,7 +15,8 @@ simulatePedigree( sexR = 0.5, marR = 2/3, balancedSex = TRUE, - balancedmar = TRUE + balancedmar = TRUE, + verbose = FALSE ) } \arguments{ @@ -25,6 +31,8 @@ simulatePedigree( \item{balancedSex}{Not fully developed yet. Always \code{TRUE} in the current version.} \item{balancedmar}{Not fully developed yet. Always \code{TRUE} in the current version.} + +\item{verbose}{logical If TRUE, print progress through stages of algorithm} } \value{ A \code{data.frame} with each row representing a simulated individual. The columns are as follows: @@ -39,6 +47,7 @@ A \code{data.frame} with each row representing a simulated individual. The colum } } \description{ +Simulate Pedigrees This function simulates "balanced" pedigrees based on a group of parameters: 1) k - Kids per couple; 2) G - Number of generations; diff --git a/man/twinImpute.Rd b/man/twinImpute.Rd new file mode 100644 index 0000000..4c60c70 --- /dev/null +++ b/man/twinImpute.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/famSizeCal.R +\name{twinImpute} +\alias{twinImpute} +\title{twinImpute +A function to impute twins in the simulated pedigree \code{data.frame}. +Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. +This is a supplementary function for \code{simulatePedigree}.} +\usage{ +twinImpute(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2) +} +\arguments{ +\item{ped}{A \code{data.frame} in the same format as the output of \code{simulatePedigree}.} + +\item{ID_twin1}{A vector of \code{ID} of the first twin.} + +\item{ID_twin2}{A vector of \code{ID} of the second twin.} + +\item{gen_twin}{A vector of \code{generation} of the twin to be imputed.} +} +\value{ +Returns a \code{data.frame} with MZ twins infomration added as a new column. +} +\description{ +twinImpute +A function to impute twins in the simulated pedigree \code{data.frame}. +Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. +This is a supplementary function for \code{simulatePedigree}. +}