diff --git a/R/checkSex.R b/R/checkSex.R index 4d03468..e81c66b 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -13,13 +13,12 @@ #' @return Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned. #' @examples #' \dontrun{ -#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M")) -#' checkSex(ped, code_male = "M", verbose = TRUE, repair = FALSE, recode = TRUE) +#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M")) +#' checkSex(ped, code_male = "M", verbose = TRUE, repair = FALSE, recode = TRUE) #' } #' @export #' checkSex <- function(ped, code_male = NULL, verbose = FALSE, repair = FALSE, recode = FALSE) { - # Initialize a list to store validation results validation_results <- list() @@ -71,8 +70,8 @@ checkSex <- function(ped, code_male = NULL, verbose = FALSE, repair = FALSE, rec #' @return A dataframe where the sex coding has been repaired. #' @examples #' \dontrun{ -#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M")) -#' repairSex(ped, code_male = "M", verbose = TRUE) +#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M")) +#' repairSex(ped, code_male = "M", verbose = TRUE) #' } #' @export #' @@ -90,6 +89,6 @@ repairSex <- function(ped, verbose = FALSE, code_male = NULL, recode = TRUE) { #' @return A modified version of the input data.frame \code{ped}, containing an additional or modified 'sex_recode' column where the 'sex' values are recoded according to \code{code_male}. NA values in the 'sex' column are preserved. #' @keywords internal #' @seealso \code{\link{plotPedigree}} -recodeSex <- function(ped, verbose = FALSE,code_male = NULL) { +recodeSex <- function(ped, verbose = FALSE, code_male = NULL) { checkSex(ped = ped, verbose = verbose, repair = FALSE, code_male = code_male, recode = TRUE) } diff --git a/R/famSizeCal.R b/R/famSizeCal.R index 1ef2b5a..6dde136 100644 --- a/R/famSizeCal.R +++ b/R/famSizeCal.R @@ -69,14 +69,14 @@ famSizeCal <- function(kpc, Ngen, marR) { ) size <- sum(allGens) } else { - stop("You should never see this message. + stop("You should never see this message. If you do, that means that famSizeCal is not working properly.") } return(size) } #' twinImpute -#' A function to impute twins in the simulated pedigree \code{data.frame}. +#' A function to impute twins in the simulated pedigree \code{data.frame}. #' Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. #' This is a supplementary function for \code{simulatePedigree}. #' @param ped A \code{data.frame} in the same format as the output of \code{simulatePedigree}. @@ -86,47 +86,47 @@ famSizeCal <- function(kpc, Ngen, marR) { #' @return Returns a \code{data.frame} with MZ twins infomration added as a new column. #' @export -# A function to impute twins in the simulated pedigree \code{data.frame}. +# A function to impute twins in the simulated pedigree \code{data.frame}. # Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. -twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2){ +twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2) { # a support function resample <- function(x, ...) x[sample.int(length(x), ...)] # Check if the ped is the same format as the output of simulatePedigree - if(paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")){ + if (paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")) { stop("The input pedigree is not in the same format as the output of simulatePedigree") } ped$MZtwin <- NA_integer_ # Check if the two IDs are provided - if(is.na(ID_twin1) || is.na(ID_twin2)){ + if (is.na(ID_twin1) || is.na(ID_twin2)) { # Check if the generation is provided - if(is.na(gen_twin)){ + if (is.na(gen_twin)) { stop("You should provide either the IDs of the twins or the generation of the twins") } else { # Check if the generation is valid - if(gen_twin < 2 || gen_twin > max(ped$gen)){ + if (gen_twin < 2 || gen_twin > max(ped$gen)) { stop("The generation of the twins should be an integer between 2 and the maximum generation in the pedigree") } else { - idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID),]) - usedID = c() + idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID), ]) + usedID <- c() # randomly loop through all the indivuduals in the generation until find an individual who is the same sex and shares the same dadID and momID with another individual - for (i in 1: idx) { - cat("loop",i) + for (i in 1:idx) { + cat("loop", i) # check if i is equal to the number of individuals in the generation usedID <- c(usedID, ID_twin1) - #print(usedID) - if(i < idx){ + # print(usedID) + if (i < idx) { # randomly select one individual from the generation ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1) - #cat("twin1", ID_twin1, "\n") + # cat("twin1", ID_twin1, "\n") # find one same sex sibling who has the same dadID and momID as the selected individual - twin2_Pool = ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]] + twin2_Pool <- ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]] # if there is an non-NA value in the twin2_Pool, get rid of the NA value if (all(is.na(twin2_Pool))) { cat("twin2_Pool is all NA\n") next } else { twin2_Pool <- twin2_Pool[!is.na(twin2_Pool)] - ID_twin2 <- resample( twin2_Pool , 1) + ID_twin2 <- resample(twin2_Pool, 1) break } # test if the ID_twin2 is missing @@ -136,10 +136,10 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_ } else { # randomly select all males or females in the generation and put them in a vector selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)] - #print(selectGender) + # print(selectGender) # randomly select two individuals from the vector ID_DoubleTwin <- sample(selectGender, 2) - #print(ID_DoubleTwin) + # print(ID_DoubleTwin) # change the second person's dadID and momID to the first person's dadID and momID ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]] ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]] @@ -147,16 +147,14 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_ ID_twin1 <- ID_DoubleTwin[1] ID_twin2 <- ID_DoubleTwin[2] break - } - + } + # Impute the IDs of the twin in the MZtwin column + ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 + ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 } - # Impute the IDs of the twin in the MZtwin column - ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 - ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 } - } -} else { + } else { # Impute the IDs of the twin in the MZtwin column ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 @@ -165,4 +163,3 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_ cat("twin2", ID_twin2, "\n") return(ped) } - diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R index fc09f9f..ea83016 100644 --- a/R/simulatePedigree.R +++ b/R/simulatePedigree.R @@ -39,8 +39,11 @@ simulatePedigree <- function(kpc = 3, # Calculate the expected family size in each generations sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR) famSizeIndex <- 1:sum(sizeGens) - if(verbose){print( - "Step 1: Let's build the connection within each generation first")} + if (verbose) { + print( + "Step 1: Let's build the connection within each generation first" + ) + } for (i in 1:Ngen) { idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = "")) # idGen <- ifelse(i==1, @@ -169,8 +172,11 @@ simulatePedigree <- function(kpc = 3, } } - if(verbose){ print( - "Step 2: Let's try to build connection between each two generations")} + if (verbose) { + print( + "Step 2: Let's try to build connection between each two generations" + ) + } df_Fam$ifparent <- FALSE df_Fam$ifson <- FALSE df_Fam$ifdau <- FALSE @@ -280,8 +286,11 @@ simulatePedigree <- function(kpc = 3, df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE] df_Ngen <- df_Ngen[, -ncol(df_Ngen)] df_Fam[df_Fam$gen == i, ] <- df_Ngen - if(verbose){print( - "Step 2.2: mark a group of potential parents in the i-1 th generation")} + if (verbose) { + print( + "Step 2.2: mark a group of potential parents in the i-1 th generation" + ) + } df_Ngen <- df_Fam[df_Fam$gen == i - 1, ] df_Ngen$ifparent <- FALSE df_Ngen$ifson <- FALSE @@ -308,8 +317,11 @@ simulatePedigree <- function(kpc = 3, df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE] df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen - if(verbose){print( - "Step 2.3: connect the i and i-1 th generation")} + if (verbose) { + print( + "Step 2.3: connect the i and i-1 th generation" + ) + } if (i == 1) { next } else { diff --git a/vignettes/articles/paper.pdf b/vignettes/articles/paper.pdf index 0f52374..d8540c6 100644 Binary files a/vignettes/articles/paper.pdf and b/vignettes/articles/paper.pdf differ