Skip to content

Commit

Permalink
Saved new PDF of paper
Browse files Browse the repository at this point in the history
  • Loading branch information
smasongarrison committed Oct 6, 2023
1 parent 4788596 commit 67f2e5d
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 41 deletions.
11 changes: 5 additions & 6 deletions R/checkSex.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@
#' @return Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned.
#' @examples
#' \dontrun{
#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M"))
#' checkSex(ped, code_male = "M", verbose = TRUE, repair = FALSE, recode = TRUE)
#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M"))
#' checkSex(ped, code_male = "M", verbose = TRUE, repair = FALSE, recode = TRUE)
#' }
#' @export
#'
checkSex <- function(ped, code_male = NULL, verbose = FALSE, repair = FALSE, recode = FALSE) {

# Initialize a list to store validation results
validation_results <- list()

Expand Down Expand Up @@ -71,8 +70,8 @@ checkSex <- function(ped, code_male = NULL, verbose = FALSE, repair = FALSE, rec
#' @return A dataframe where the sex coding has been repaired.
#' @examples
#' \dontrun{
#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M"))
#' repairSex(ped, code_male = "M", verbose = TRUE)
#' ped <- data.frame(ID = c(1, 2, 3), sex = c("M", "F", "M"))
#' repairSex(ped, code_male = "M", verbose = TRUE)
#' }
#' @export
#'
Expand All @@ -90,6 +89,6 @@ repairSex <- function(ped, verbose = FALSE, code_male = NULL, recode = TRUE) {
#' @return A modified version of the input data.frame \code{ped}, containing an additional or modified 'sex_recode' column where the 'sex' values are recoded according to \code{code_male}. NA values in the 'sex' column are preserved.
#' @keywords internal
#' @seealso \code{\link{plotPedigree}}
recodeSex <- function(ped, verbose = FALSE,code_male = NULL) {
recodeSex <- function(ped, verbose = FALSE, code_male = NULL) {
checkSex(ped = ped, verbose = verbose, repair = FALSE, code_male = code_male, recode = TRUE)
}
51 changes: 24 additions & 27 deletions R/famSizeCal.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ famSizeCal <- function(kpc, Ngen, marR) {
)
size <- sum(allGens)
} else {
stop("You should never see this message.
stop("You should never see this message.
If you do, that means that famSizeCal is not working properly.")
}
return(size)
}

#' twinImpute
#' A function to impute twins in the simulated pedigree \code{data.frame}.
#' A function to impute twins in the simulated pedigree \code{data.frame}.
#' Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
#' This is a supplementary function for \code{simulatePedigree}.
#' @param ped A \code{data.frame} in the same format as the output of \code{simulatePedigree}.
Expand All @@ -86,47 +86,47 @@ famSizeCal <- function(kpc, Ngen, marR) {
#' @return Returns a \code{data.frame} with MZ twins infomration added as a new column.
#' @export

# A function to impute twins in the simulated pedigree \code{data.frame}.
# A function to impute twins in the simulated pedigree \code{data.frame}.
# Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2){
twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2) {
# a support function
resample <- function(x, ...) x[sample.int(length(x), ...)]
# Check if the ped is the same format as the output of simulatePedigree
if(paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")){
if (paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")) {
stop("The input pedigree is not in the same format as the output of simulatePedigree")
}
ped$MZtwin <- NA_integer_
# Check if the two IDs are provided
if(is.na(ID_twin1) || is.na(ID_twin2)){
if (is.na(ID_twin1) || is.na(ID_twin2)) {
# Check if the generation is provided
if(is.na(gen_twin)){
if (is.na(gen_twin)) {
stop("You should provide either the IDs of the twins or the generation of the twins")
} else {
# Check if the generation is valid
if(gen_twin < 2 || gen_twin > max(ped$gen)){
if (gen_twin < 2 || gen_twin > max(ped$gen)) {
stop("The generation of the twins should be an integer between 2 and the maximum generation in the pedigree")
} else {
idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID),])
usedID = c()
idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID), ])
usedID <- c()
# randomly loop through all the indivuduals in the generation until find an individual who is the same sex and shares the same dadID and momID with another individual
for (i in 1: idx) {
cat("loop",i)
for (i in 1:idx) {
cat("loop", i)
# check if i is equal to the number of individuals in the generation
usedID <- c(usedID, ID_twin1)
#print(usedID)
if(i < idx){
# print(usedID)
if (i < idx) {
# randomly select one individual from the generation
ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1)
#cat("twin1", ID_twin1, "\n")
# cat("twin1", ID_twin1, "\n")
# find one same sex sibling who has the same dadID and momID as the selected individual
twin2_Pool = ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]]
twin2_Pool <- ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]]
# if there is an non-NA value in the twin2_Pool, get rid of the NA value
if (all(is.na(twin2_Pool))) {
cat("twin2_Pool is all NA\n")
next
} else {
twin2_Pool <- twin2_Pool[!is.na(twin2_Pool)]
ID_twin2 <- resample( twin2_Pool , 1)
ID_twin2 <- resample(twin2_Pool, 1)
break
}
# test if the ID_twin2 is missing
Expand All @@ -136,27 +136,25 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_
} else {
# randomly select all males or females in the generation and put them in a vector
selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)]
#print(selectGender)
# print(selectGender)
# randomly select two individuals from the vector
ID_DoubleTwin <- sample(selectGender, 2)
#print(ID_DoubleTwin)
# print(ID_DoubleTwin)
# change the second person's dadID and momID to the first person's dadID and momID
ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]]
ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]]
# let the two individuals be twins!
ID_twin1 <- ID_DoubleTwin[1]
ID_twin2 <- ID_DoubleTwin[2]
break

}

}
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
}
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
}
}
} else {
} else {
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
Expand All @@ -165,4 +163,3 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_
cat("twin2", ID_twin2, "\n")
return(ped)
}

28 changes: 20 additions & 8 deletions R/simulatePedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ simulatePedigree <- function(kpc = 3,
# Calculate the expected family size in each generations
sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR)
famSizeIndex <- 1:sum(sizeGens)
if(verbose){print(
"Step 1: Let's build the connection within each generation first")}
if (verbose) {
print(
"Step 1: Let's build the connection within each generation first"
)
}
for (i in 1:Ngen) {
idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = ""))
# idGen <- ifelse(i==1,
Expand Down Expand Up @@ -169,8 +172,11 @@ simulatePedigree <- function(kpc = 3,
}
}

if(verbose){ print(
"Step 2: Let's try to build connection between each two generations")}
if (verbose) {
print(
"Step 2: Let's try to build connection between each two generations"
)
}
df_Fam$ifparent <- FALSE
df_Fam$ifson <- FALSE
df_Fam$ifdau <- FALSE
Expand Down Expand Up @@ -280,8 +286,11 @@ simulatePedigree <- function(kpc = 3,
df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
df_Ngen <- df_Ngen[, -ncol(df_Ngen)]
df_Fam[df_Fam$gen == i, ] <- df_Ngen
if(verbose){print(
"Step 2.2: mark a group of potential parents in the i-1 th generation")}
if (verbose) {
print(
"Step 2.2: mark a group of potential parents in the i-1 th generation"
)
}
df_Ngen <- df_Fam[df_Fam$gen == i - 1, ]
df_Ngen$ifparent <- FALSE
df_Ngen$ifson <- FALSE
Expand All @@ -308,8 +317,11 @@ simulatePedigree <- function(kpc = 3,

df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen
if(verbose){print(
"Step 2.3: connect the i and i-1 th generation")}
if (verbose) {
print(
"Step 2.3: connect the i and i-1 th generation"
)
}
if (i == 1) {
next
} else {
Expand Down
Binary file modified vignettes/articles/paper.pdf
Binary file not shown.

0 comments on commit 67f2e5d

Please sign in to comment.