Skip to content

Commit

Permalink
Saved new PDF of paper
Browse files Browse the repository at this point in the history
harmonize white space
  • Loading branch information
smasongarrison committed Oct 6, 2023
1 parent 38ebf71 commit 5398d85
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 72 deletions.
51 changes: 24 additions & 27 deletions R/famSizeCal.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ famSizeCal <- function(kpc, Ngen, marR) {
)
size <- sum(allGens)
} else {
stop("You should never see this message.
stop("You should never see this message.
If you do, that means that famSizeCal is not working properly.")
}
return(size)
}

#' twinImpute
#' A function to impute twins in the simulated pedigree \code{data.frame}.
#' A function to impute twins in the simulated pedigree \code{data.frame}.
#' Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
#' This is a supplementary function for \code{simulatePedigree}.
#' @param ped A \code{data.frame} in the same format as the output of \code{simulatePedigree}.
Expand All @@ -86,47 +86,47 @@ famSizeCal <- function(kpc, Ngen, marR) {
#' @return Returns a \code{data.frame} with MZ twins infomration added as a new column.
#' @export

# A function to impute twins in the simulated pedigree \code{data.frame}.
# A function to impute twins in the simulated pedigree \code{data.frame}.
# Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2){
twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2) {
# a support function
resample <- function(x, ...) x[sample.int(length(x), ...)]
# Check if the ped is the same format as the output of simulatePedigree
if(paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")){
if (paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")) {
stop("The input pedigree is not in the same format as the output of simulatePedigree")
}
ped$MZtwin <- NA_integer_
# Check if the two IDs are provided
if(is.na(ID_twin1) || is.na(ID_twin2)){
if (is.na(ID_twin1) || is.na(ID_twin2)) {
# Check if the generation is provided
if(is.na(gen_twin)){
if (is.na(gen_twin)) {
stop("You should provide either the IDs of the twins or the generation of the twins")
} else {
# Check if the generation is valid
if(gen_twin < 2 || gen_twin > max(ped$gen)){
if (gen_twin < 2 || gen_twin > max(ped$gen)) {
stop("The generation of the twins should be an integer between 2 and the maximum generation in the pedigree")
} else {
idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID),])
usedID = c()
idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID), ])
usedID <- c()
# randomly loop through all the indivuduals in the generation until find an individual who is the same sex and shares the same dadID and momID with another individual
for (i in 1: idx) {
cat("loop",i)
for (i in 1:idx) {
cat("loop", i)
# check if i is equal to the number of individuals in the generation
usedID <- c(usedID, ID_twin1)
#print(usedID)
if(i < idx){
# print(usedID)
if (i < idx) {
# randomly select one individual from the generation
ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1)
#cat("twin1", ID_twin1, "\n")
# cat("twin1", ID_twin1, "\n")
# find one same sex sibling who has the same dadID and momID as the selected individual
twin2_Pool = ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]]
twin2_Pool <- ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]]
# if there is an non-NA value in the twin2_Pool, get rid of the NA value
if (all(is.na(twin2_Pool))) {
cat("twin2_Pool is all NA\n")
next
} else {
twin2_Pool <- twin2_Pool[!is.na(twin2_Pool)]
ID_twin2 <- resample( twin2_Pool , 1)
ID_twin2 <- resample(twin2_Pool, 1)
break
}
# test if the ID_twin2 is missing
Expand All @@ -136,27 +136,25 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_
} else {
# randomly select all males or females in the generation and put them in a vector
selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)]
#print(selectGender)
# print(selectGender)
# randomly select two individuals from the vector
ID_DoubleTwin <- sample(selectGender, 2)
#print(ID_DoubleTwin)
# print(ID_DoubleTwin)
# change the second person's dadID and momID to the first person's dadID and momID
ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]]
ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]]
# let the two individuals be twins!
ID_twin1 <- ID_DoubleTwin[1]
ID_twin2 <- ID_DoubleTwin[2]
break

}

}
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
}
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
}
}
} else {
} else {
# Impute the IDs of the twin in the MZtwin column
ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
Expand All @@ -165,4 +163,3 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_
cat("twin2", ID_twin2, "\n")
return(ped)
}

60 changes: 36 additions & 24 deletions R/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,35 +147,47 @@ relatedness <- function(...) {
standardize_colnames <- function(df) {
# Internal mapping of standardized names to possible variants
mapping <- list(
'fam' = c('fam', 'Fam', 'FAM',
'famID', 'FamID', 'FAMID',
'famid', 'Famid', 'FAMid',
'famfam','Famfam', 'FAMFAM'),
'ID' = c('ID', 'id', 'Id',
'Indiv', 'indiv', 'INDIV',
'individual', 'Individual', 'INDIVIDUAL'),
'gen' = c('gen', 'Gen', 'GEN',
'gens', 'Gens', 'GENS',
'generation', 'Generation', 'GENERATION'),
'dadID' = c('dadID', 'DadID', 'DADID',
'fatherID', 'FatherID', 'FATHERID'),
'momID' = c('momID', 'MomID', 'MOMID',
'motherID', 'MotherID', 'MOTHERID'),
'spt' = c('spt', 'Spt', 'SPT'),
'sex' = c('sex', 'Sex', 'SEX',
'female', 'Female', 'FEMALE',
'gender', 'Gender', 'GENDER',
'male', 'Male', 'MALE',
'man', 'Man', 'MAN',
'men', 'Men', 'MEN',
'woman', 'Woman', 'WOMAN',
'women', 'Women', 'WOMEN')
"fam" = c(
"fam", "Fam", "FAM",
"famID", "FamID", "FAMID",
"famid", "Famid", "FAMid",
"famfam", "Famfam", "FAMFAM"
),
"ID" = c(
"ID", "id", "Id",
"Indiv", "indiv", "INDIV",
"individual", "Individual", "INDIVIDUAL"
),
"gen" = c(
"gen", "Gen", "GEN",
"gens", "Gens", "GENS",
"generation", "Generation", "GENERATION"
),
"dadID" = c(
"dadID", "DadID", "DADID",
"fatherID", "FatherID", "FATHERID"
),
"momID" = c(
"momID", "MomID", "MOMID",
"motherID", "MotherID", "MOTHERID"
),
"spt" = c("spt", "Spt", "SPT"),
"sex" = c(
"sex", "Sex", "SEX",
"female", "Female", "FEMALE",
"gender", "Gender", "GENDER",
"male", "Male", "MALE",
"man", "Man", "MAN",
"men", "Men", "MEN",
"woman", "Woman", "WOMAN",
"women", "Women", "WOMEN"
)
)
for (standard_name in names(mapping)) {
for (variant in mapping[[standard_name]]) {
if (variant %in% colnames(df)) {
colnames(df)[colnames(df) == variant] <- standard_name
break # Exit the loop once a match is found
break # Exit the loop once a match is found
}
}
}
Expand Down
6 changes: 2 additions & 4 deletions R/plotPedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@ plotPedigree <- function(ped,
col = 1,
symbolsize = 1, branch = 0.6,
packed = TRUE, align = c(1.5, 2), width = 8,
density = c(-1, 35, 65, 20), mar=c(2.1, 1, 2.1, 1),
density = c(-1, 35, 65, 20), mar = c(2.1, 1, 2.1, 1),
angle = c(90, 65, 40, 0), keep.par = FALSE,
pconnect = .5,
...) {

# Standardize column names in the input dataframe
ped <- standardize_colnames(ped)

Expand All @@ -48,9 +47,8 @@ plotPedigree <- function(ped,
# family id
if (length(unique(p$ped)) == 1) { # only one family
p$ped <- 1

} else {
# Assign a unique string pattern "ped #" for each unique family
# Assign a unique string pattern "ped #" for each unique family
unique_families <- unique(p$ped)
named_families <- 1:length(unique_families)
p$ped <- named_families[match(p$ped, unique_families)]
Expand Down
28 changes: 20 additions & 8 deletions R/simulatePedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ simulatePedigree <- function(kpc = 3,
# Calculate the expected family size in each generations
sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR)
famSizeIndex <- 1:sum(sizeGens)
if(verbose){print(
"Step 1: Let's build the connection within each generation first")}
if (verbose) {
print(
"Step 1: Let's build the connection within each generation first"
)
}
for (i in 1:Ngen) {
idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = ""))
# idGen <- ifelse(i==1,
Expand Down Expand Up @@ -169,8 +172,11 @@ simulatePedigree <- function(kpc = 3,
}
}

if(verbose){ print(
"Step 2: Let's try to build connection between each two generations")}
if (verbose) {
print(
"Step 2: Let's try to build connection between each two generations"
)
}
df_Fam$ifparent <- FALSE
df_Fam$ifson <- FALSE
df_Fam$ifdau <- FALSE
Expand Down Expand Up @@ -280,8 +286,11 @@ simulatePedigree <- function(kpc = 3,
df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
df_Ngen <- df_Ngen[, -ncol(df_Ngen)]
df_Fam[df_Fam$gen == i, ] <- df_Ngen
if(verbose){print(
"Step 2.2: mark a group of potential parents in the i-1 th generation")}
if (verbose) {
print(
"Step 2.2: mark a group of potential parents in the i-1 th generation"
)
}
df_Ngen <- df_Fam[df_Fam$gen == i - 1, ]
df_Ngen$ifparent <- FALSE
df_Ngen$ifson <- FALSE
Expand All @@ -308,8 +317,11 @@ simulatePedigree <- function(kpc = 3,

df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen
if(verbose){print(
"Step 2.3: connect the i and i-1 th generation")}
if (verbose) {
print(
"Step 2.3: connect the i and i-1 th generation"
)
}
if (i == 1) {
next
} else {
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-network.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ test_that("ped2graph produces a graph for inbreeding data", {


test_that("ped2add produces correct matrix dims, values, and dimnames for hazard", {
data(hazard)
data(hazard)
add <- ped2add(hazard)
# Check dimension
expect_equal(dim(add), c(nrow(hazard), nrow(hazard)))
Expand Down
Binary file modified vignettes/articles/paper.pdf
Binary file not shown.
5 changes: 1 addition & 4 deletions vignettes/network.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,8 @@ capture.output(plotPedigree(hazard, code_male = 0))
We will use the `hazard` pedigree data as an example.

```{r}
ds <- ped2fam(hazard, famID = "newFamID")
table(ds$FamID, ds$newFamID)
```

Because the `hazard` data already had a family ID variable we compare our newly created variable to the pre-existing one. They match!
Expand Down Expand Up @@ -107,7 +104,7 @@ Here we calculate the relatedness between all pairs of individuals in the `hazar
through sharing both parents.
```{r}
commonNuclear <- ped2cn(hazard)
commonNuclear [1:7, 1:7]
commonNuclear[1:7, 1:7]
table(commonNuclear)
```
Expand Down
10 changes: 6 additions & 4 deletions vignettes/pedigree.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ To illustrate this, let us generate a pedigree. This pedigree has a total of fou

```{r}
set.seed(5)
df_ped <- simulatePedigree(kpc = 4,
Ngen = 4,
sexR = .5,
marR = .7)
df_ped <- simulatePedigree(
kpc = 4,
Ngen = 4,
sexR = .5,
marR = .7
)
summary(df_ped)
```

Expand Down

0 comments on commit 5398d85

Please sign in to comment.