Skip to content

Commit

Permalink
Merge pull request #4 from R-Computing-Lab/update-simPed
Browse files Browse the repository at this point in the history
Twin imputation function is done
  • Loading branch information
smasongarrison committed Oct 6, 2023
2 parents 5230af3 + d1be89e commit 38ebf71
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@
^CRAN-SUBMISSION$
^BGmisc\.code-workspace$
^\.lintr$
^CODE_OF_CONDUCT\.md
^CONTRIBUTING\.md
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(plotPedigree)
export(related_coef)
export(relatedness)
export(simulatePedigree)
export(twinImpute)
export(vech)
import(kinship2)
importFrom(stats,runif)
6 changes: 4 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# BGmisc 1.0.1
Hot fix to resolve plotPedigree wrapper function breaking for pedigrees that contained multiple families
# BGmisc 1.1.0
* Added ability to simulate twins

# BGmisc 1.0.1
* Hot fix to resolve plotPedigree wrapper function breaking for pedigrees that contained multiple families

# BGmisc 1.0

Expand Down
92 changes: 92 additions & 0 deletions R/famSizeCal.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,95 @@ famSizeCal <- function(kpc, Ngen, marR) {
}
return(size)
}

#' twinImpute
#' A function to impute twins in the simulated pedigree \code{data.frame}.
#' Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
#' This is a supplementary function for \code{simulatePedigree}.
#' @param ped A \code{data.frame} in the same format as the output of \code{simulatePedigree}.
#' @param ID_twin1 A vector of \code{ID} of the first twin.
#' @param ID_twin2 A vector of \code{ID} of the second twin.
#' @param gen_twin A vector of \code{generation} of the twin to be imputed.
#' @return Returns a \code{data.frame} with MZ twins infomration added as a new column.
#' @export

# A function to impute twins in the simulated pedigree \code{data.frame}.
# Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2){
# a support function
resample <- function(x, ...) x[sample.int(length(x), ...)]
# Check if the ped is the same format as the output of simulatePedigree
if(paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")){
stop("The input pedigree is not in the same format as the output of simulatePedigree")
}
ped$MZtwin <- NA_integer_
# Check if the two IDs are provided
if(is.na(ID_twin1) || is.na(ID_twin2)){
# Check if the generation is provided
if(is.na(gen_twin)){
stop("You should provide either the IDs of the twins or the generation of the twins")
} else {
# Check if the generation is valid
if(gen_twin < 2 || gen_twin > max(ped$gen)){
stop("The generation of the twins should be an integer between 2 and the maximum generation in the pedigree")
} else {
idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID),])
usedID = c()
# randomly loop through all the indivuduals in the generation until find an individual who is the same sex and shares the same dadID and momID with another individual
for (i in 1: idx) {
cat("loop",i)
# check if i is equal to the number of individuals in the generation
usedID <- c(usedID, ID_twin1)
#print(usedID)
if(i < idx){
# randomly select one individual from the generation
ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1)
#cat("twin1", ID_twin1, "\n")
# find one same sex sibling who has the same dadID and momID as the selected individual
twin2_Pool = ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]]
# if there is an non-NA value in the twin2_Pool, get rid of the NA value
if (all(is.na(twin2_Pool))) {
cat("twin2_Pool is all NA\n")
next
} else {
twin2_Pool <- twin2_Pool[!is.na(twin2_Pool)]
ID_twin2 <- resample( twin2_Pool , 1)
break
}
# test if the ID_twin2 is missing
# if(!is.na(ID_twin2)){
# break
# }
} else {
# randomly select all males or females in the generation and put them in a vector
selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)]
#print(selectGender)
# randomly select two individuals from the vector
ID_DoubleTwin <- sample(selectGender, 2)
#print(ID_DoubleTwin)
# change the second person's dadID and momID to the first person's dadID and momID
ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]]
ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]]
# let the two individuals be twins!
ID_twin1 <- ID_DoubleTwin[1]
ID_twin2 <- ID_DoubleTwin[2]
break

}

}
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
}
}
} else {
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
}
cat("twin1", ID_twin1, "\n")
cat("twin2", ID_twin2, "\n")
return(ped)
}

21 changes: 12 additions & 9 deletions R/simulatePedigree.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#' Simulate Pedigrees
#'
#' This function simulates "balanced" pedigrees based on a group of parameters:
#' 1) k - Kids per couple;
#' 2) G - Number of generations;
Expand All @@ -13,6 +12,8 @@
#' @param marR Mating rate. A numeric value ranging from 0 to 1 which determines the proportion of mated (fertilized) couples in the pedigree within each generation. For instance, marR = 0.5 suggests 50 percent of the offspring in a specific generation will be mated and have their offspring.
#' @param balancedSex Not fully developed yet. Always \code{TRUE} in the current version.
#' @param balancedmar Not fully developed yet. Always \code{TRUE} in the current version.
#' @param verbose logical If TRUE, print progress through stages of algorithm

#' @return A \code{data.frame} with each row representing a simulated individual. The columns are as follows:
#' \itemize{
#' \item{fam: The family id of each simulated individual. It is 'fam1' in a single simulated pedigree.}
Expand All @@ -30,15 +31,16 @@ simulatePedigree <- function(kpc = 3,
sexR = .5,
marR = 2 / 3,
balancedSex = TRUE,
balancedmar = TRUE) {
balancedmar = TRUE,
verbose = FALSE) {
# SexRatio: ratio of male over female in the offspring setting; used in the between generation combinations
SexRatio <- sexR / (1 - sexR)

# Calculate the expected family size in each generations
sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR)
famSizeIndex <- 1:sum(sizeGens)

# Step 1: Let's build the connection within each generation first
if(verbose){print(
"Step 1: Let's build the connection within each generation first")}
for (i in 1:Ngen) {
idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = ""))
# idGen <- ifelse(i==1,
Expand Down Expand Up @@ -167,7 +169,8 @@ simulatePedigree <- function(kpc = 3,
}
}

# Step 2: Let's try to build connection between each two generations
if(verbose){ print(
"Step 2: Let's try to build connection between each two generations")}
df_Fam$ifparent <- FALSE
df_Fam$ifson <- FALSE
df_Fam$ifdau <- FALSE
Expand Down Expand Up @@ -277,8 +280,8 @@ simulatePedigree <- function(kpc = 3,
df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
df_Ngen <- df_Ngen[, -ncol(df_Ngen)]
df_Fam[df_Fam$gen == i, ] <- df_Ngen

# Step 2.2: mark a group of potential parents in the i-1 th generation
if(verbose){print(
"Step 2.2: mark a group of potential parents in the i-1 th generation")}
df_Ngen <- df_Fam[df_Fam$gen == i - 1, ]
df_Ngen$ifparent <- FALSE
df_Ngen$ifson <- FALSE
Expand All @@ -305,8 +308,8 @@ simulatePedigree <- function(kpc = 3,

df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen

# Step 2.3: connect the i and i-1 th generation
if(verbose){print(
"Step 2.3: connect the i and i-1 th generation")}
if (i == 1) {
next
} else {
Expand Down
13 changes: 11 additions & 2 deletions man/simulatePedigree.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/twinImpute.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 38ebf71

Please sign in to comment.