From 5398d85177fc10a4bd3206c0f5e428c97b947e54 Mon Sep 17 00:00:00 2001 From: smasongarrison Date: Fri, 6 Oct 2023 16:35:52 +0000 Subject: [PATCH] Saved new PDF of paper harmonize white space --- R/famSizeCal.R | 51 ++++++++++++++--------------- R/helper.R | 60 ++++++++++++++++++++-------------- R/plotPedigree.R | 6 ++-- R/simulatePedigree.R | 28 +++++++++++----- tests/testthat/test-network.R | 2 +- vignettes/articles/paper.pdf | Bin 216590 -> 216590 bytes vignettes/network.Rmd | 5 +-- vignettes/pedigree.Rmd | 10 +++--- 8 files changed, 90 insertions(+), 72 deletions(-) diff --git a/R/famSizeCal.R b/R/famSizeCal.R index 1ef2b5a..6dde136 100644 --- a/R/famSizeCal.R +++ b/R/famSizeCal.R @@ -69,14 +69,14 @@ famSizeCal <- function(kpc, Ngen, marR) { ) size <- sum(allGens) } else { - stop("You should never see this message. + stop("You should never see this message. If you do, that means that famSizeCal is not working properly.") } return(size) } #' twinImpute -#' A function to impute twins in the simulated pedigree \code{data.frame}. +#' A function to impute twins in the simulated pedigree \code{data.frame}. #' Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. #' This is a supplementary function for \code{simulatePedigree}. #' @param ped A \code{data.frame} in the same format as the output of \code{simulatePedigree}. @@ -86,47 +86,47 @@ famSizeCal <- function(kpc, Ngen, marR) { #' @return Returns a \code{data.frame} with MZ twins infomration added as a new column. #' @export -# A function to impute twins in the simulated pedigree \code{data.frame}. +# A function to impute twins in the simulated pedigree \code{data.frame}. # Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed. -twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2){ +twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2) { # a support function resample <- function(x, ...) x[sample.int(length(x), ...)] # Check if the ped is the same format as the output of simulatePedigree - if(paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")){ + if (paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")) { stop("The input pedigree is not in the same format as the output of simulatePedigree") } ped$MZtwin <- NA_integer_ # Check if the two IDs are provided - if(is.na(ID_twin1) || is.na(ID_twin2)){ + if (is.na(ID_twin1) || is.na(ID_twin2)) { # Check if the generation is provided - if(is.na(gen_twin)){ + if (is.na(gen_twin)) { stop("You should provide either the IDs of the twins or the generation of the twins") } else { # Check if the generation is valid - if(gen_twin < 2 || gen_twin > max(ped$gen)){ + if (gen_twin < 2 || gen_twin > max(ped$gen)) { stop("The generation of the twins should be an integer between 2 and the maximum generation in the pedigree") } else { - idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID),]) - usedID = c() + idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID), ]) + usedID <- c() # randomly loop through all the indivuduals in the generation until find an individual who is the same sex and shares the same dadID and momID with another individual - for (i in 1: idx) { - cat("loop",i) + for (i in 1:idx) { + cat("loop", i) # check if i is equal to the number of individuals in the generation usedID <- c(usedID, ID_twin1) - #print(usedID) - if(i < idx){ + # print(usedID) + if (i < idx) { # randomly select one individual from the generation ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1) - #cat("twin1", ID_twin1, "\n") + # cat("twin1", ID_twin1, "\n") # find one same sex sibling who has the same dadID and momID as the selected individual - twin2_Pool = ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]] + twin2_Pool <- ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]] # if there is an non-NA value in the twin2_Pool, get rid of the NA value if (all(is.na(twin2_Pool))) { cat("twin2_Pool is all NA\n") next } else { twin2_Pool <- twin2_Pool[!is.na(twin2_Pool)] - ID_twin2 <- resample( twin2_Pool , 1) + ID_twin2 <- resample(twin2_Pool, 1) break } # test if the ID_twin2 is missing @@ -136,10 +136,10 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_ } else { # randomly select all males or females in the generation and put them in a vector selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)] - #print(selectGender) + # print(selectGender) # randomly select two individuals from the vector ID_DoubleTwin <- sample(selectGender, 2) - #print(ID_DoubleTwin) + # print(ID_DoubleTwin) # change the second person's dadID and momID to the first person's dadID and momID ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]] ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]] @@ -147,16 +147,14 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_ ID_twin1 <- ID_DoubleTwin[1] ID_twin2 <- ID_DoubleTwin[2] break - } - + } + # Impute the IDs of the twin in the MZtwin column + ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 + ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 } - # Impute the IDs of the twin in the MZtwin column - ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 - ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 } - } -} else { + } else { # Impute the IDs of the twin in the MZtwin column ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2 ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1 @@ -165,4 +163,3 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_ cat("twin2", ID_twin2, "\n") return(ped) } - diff --git a/R/helper.R b/R/helper.R index 8213f96..5cc99fc 100644 --- a/R/helper.R +++ b/R/helper.R @@ -147,35 +147,47 @@ relatedness <- function(...) { standardize_colnames <- function(df) { # Internal mapping of standardized names to possible variants mapping <- list( - 'fam' = c('fam', 'Fam', 'FAM', - 'famID', 'FamID', 'FAMID', - 'famid', 'Famid', 'FAMid', - 'famfam','Famfam', 'FAMFAM'), - 'ID' = c('ID', 'id', 'Id', - 'Indiv', 'indiv', 'INDIV', - 'individual', 'Individual', 'INDIVIDUAL'), - 'gen' = c('gen', 'Gen', 'GEN', - 'gens', 'Gens', 'GENS', - 'generation', 'Generation', 'GENERATION'), - 'dadID' = c('dadID', 'DadID', 'DADID', - 'fatherID', 'FatherID', 'FATHERID'), - 'momID' = c('momID', 'MomID', 'MOMID', - 'motherID', 'MotherID', 'MOTHERID'), - 'spt' = c('spt', 'Spt', 'SPT'), - 'sex' = c('sex', 'Sex', 'SEX', - 'female', 'Female', 'FEMALE', - 'gender', 'Gender', 'GENDER', - 'male', 'Male', 'MALE', - 'man', 'Man', 'MAN', - 'men', 'Men', 'MEN', - 'woman', 'Woman', 'WOMAN', - 'women', 'Women', 'WOMEN') + "fam" = c( + "fam", "Fam", "FAM", + "famID", "FamID", "FAMID", + "famid", "Famid", "FAMid", + "famfam", "Famfam", "FAMFAM" + ), + "ID" = c( + "ID", "id", "Id", + "Indiv", "indiv", "INDIV", + "individual", "Individual", "INDIVIDUAL" + ), + "gen" = c( + "gen", "Gen", "GEN", + "gens", "Gens", "GENS", + "generation", "Generation", "GENERATION" + ), + "dadID" = c( + "dadID", "DadID", "DADID", + "fatherID", "FatherID", "FATHERID" + ), + "momID" = c( + "momID", "MomID", "MOMID", + "motherID", "MotherID", "MOTHERID" + ), + "spt" = c("spt", "Spt", "SPT"), + "sex" = c( + "sex", "Sex", "SEX", + "female", "Female", "FEMALE", + "gender", "Gender", "GENDER", + "male", "Male", "MALE", + "man", "Man", "MAN", + "men", "Men", "MEN", + "woman", "Woman", "WOMAN", + "women", "Women", "WOMEN" + ) ) for (standard_name in names(mapping)) { for (variant in mapping[[standard_name]]) { if (variant %in% colnames(df)) { colnames(df)[colnames(df) == variant] <- standard_name - break # Exit the loop once a match is found + break # Exit the loop once a match is found } } } diff --git a/R/plotPedigree.R b/R/plotPedigree.R index 8bf6e99..46fe05e 100644 --- a/R/plotPedigree.R +++ b/R/plotPedigree.R @@ -18,11 +18,10 @@ plotPedigree <- function(ped, col = 1, symbolsize = 1, branch = 0.6, packed = TRUE, align = c(1.5, 2), width = 8, - density = c(-1, 35, 65, 20), mar=c(2.1, 1, 2.1, 1), + density = c(-1, 35, 65, 20), mar = c(2.1, 1, 2.1, 1), angle = c(90, 65, 40, 0), keep.par = FALSE, pconnect = .5, ...) { - # Standardize column names in the input dataframe ped <- standardize_colnames(ped) @@ -48,9 +47,8 @@ plotPedigree <- function(ped, # family id if (length(unique(p$ped)) == 1) { # only one family p$ped <- 1 - } else { - # Assign a unique string pattern "ped #" for each unique family + # Assign a unique string pattern "ped #" for each unique family unique_families <- unique(p$ped) named_families <- 1:length(unique_families) p$ped <- named_families[match(p$ped, unique_families)] diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R index fc09f9f..ea83016 100644 --- a/R/simulatePedigree.R +++ b/R/simulatePedigree.R @@ -39,8 +39,11 @@ simulatePedigree <- function(kpc = 3, # Calculate the expected family size in each generations sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR) famSizeIndex <- 1:sum(sizeGens) - if(verbose){print( - "Step 1: Let's build the connection within each generation first")} + if (verbose) { + print( + "Step 1: Let's build the connection within each generation first" + ) + } for (i in 1:Ngen) { idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = "")) # idGen <- ifelse(i==1, @@ -169,8 +172,11 @@ simulatePedigree <- function(kpc = 3, } } - if(verbose){ print( - "Step 2: Let's try to build connection between each two generations")} + if (verbose) { + print( + "Step 2: Let's try to build connection between each two generations" + ) + } df_Fam$ifparent <- FALSE df_Fam$ifson <- FALSE df_Fam$ifdau <- FALSE @@ -280,8 +286,11 @@ simulatePedigree <- function(kpc = 3, df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE] df_Ngen <- df_Ngen[, -ncol(df_Ngen)] df_Fam[df_Fam$gen == i, ] <- df_Ngen - if(verbose){print( - "Step 2.2: mark a group of potential parents in the i-1 th generation")} + if (verbose) { + print( + "Step 2.2: mark a group of potential parents in the i-1 th generation" + ) + } df_Ngen <- df_Fam[df_Fam$gen == i - 1, ] df_Ngen$ifparent <- FALSE df_Ngen$ifson <- FALSE @@ -308,8 +317,11 @@ simulatePedigree <- function(kpc = 3, df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE] df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen - if(verbose){print( - "Step 2.3: connect the i and i-1 th generation")} + if (verbose) { + print( + "Step 2.3: connect the i and i-1 th generation" + ) + } if (i == 1) { next } else { diff --git a/tests/testthat/test-network.R b/tests/testthat/test-network.R index 1f4a096..4274a31 100755 --- a/tests/testthat/test-network.R +++ b/tests/testthat/test-network.R @@ -35,7 +35,7 @@ test_that("ped2graph produces a graph for inbreeding data", { test_that("ped2add produces correct matrix dims, values, and dimnames for hazard", { - data(hazard) + data(hazard) add <- ped2add(hazard) # Check dimension expect_equal(dim(add), c(nrow(hazard), nrow(hazard))) diff --git a/vignettes/articles/paper.pdf b/vignettes/articles/paper.pdf index c84a19b50a48bdbfc2d0f7cdeb3fb041d9757a4b..0f5237410bd8b50f2c326020c946283f165f48ef 100644 GIT binary patch delta 228 zcmeC1!`nB9w_yuo^8pSsD`QhDV~eQi8}~6vOxHieD6oC=0mg17cBsU3*Tamaa8Bc4 zMspZv`~Jg>5{xQFNoMAW7Dh?BX6B}rx+bP3$-0S&sV2IH#;IvWX33VOmIkKnzmG6( z|9ymMP7|w{v8l1e^f~U#5)kJ0o$k!#%x)$YMy8G~CXS9SMkXf4CeCgKW=>9~&W;Ao criP}DZpL;BHiVVLPT%;RNpAZTZ{{i{0JTR!mH+?% delta 228 zcmeC1!`nB9w_yuo^8pT1D-#nd1JkJK8}~6vOxHieD6oC=0mg17cBsU3*Tamaa8Bc4 zMspZv`~Jg>5{xRzsfnp+Mh2$3Y36C>x+ZBUhPsyKrY5?{CWgkTY37L*MrP*izmG6( z|9ymMP7|xCiHU*f^f~U#5)kJ0o$k!#%x*>|hHe&4#*R+ThQ=<&&ZchWhOS0#M$XQT cW^T@Aj%IcWHiVVLPT%;RNpAZTZ{{i{00Ra>TmS$7 diff --git a/vignettes/network.Rmd b/vignettes/network.Rmd index ef71a5f..c6734b7 100644 --- a/vignettes/network.Rmd +++ b/vignettes/network.Rmd @@ -42,11 +42,8 @@ capture.output(plotPedigree(hazard, code_male = 0)) We will use the `hazard` pedigree data as an example. ```{r} - ds <- ped2fam(hazard, famID = "newFamID") table(ds$FamID, ds$newFamID) - - ``` Because the `hazard` data already had a family ID variable we compare our newly created variable to the pre-existing one. They match! @@ -107,7 +104,7 @@ Here we calculate the relatedness between all pairs of individuals in the `hazar through sharing both parents. ```{r} commonNuclear <- ped2cn(hazard) -commonNuclear [1:7, 1:7] +commonNuclear[1:7, 1:7] table(commonNuclear) ``` diff --git a/vignettes/pedigree.Rmd b/vignettes/pedigree.Rmd index 923377e..cc19655 100644 --- a/vignettes/pedigree.Rmd +++ b/vignettes/pedigree.Rmd @@ -38,10 +38,12 @@ To illustrate this, let us generate a pedigree. This pedigree has a total of fou ```{r} set.seed(5) -df_ped <- simulatePedigree(kpc = 4, - Ngen = 4, - sexR = .5, - marR = .7) +df_ped <- simulatePedigree( + kpc = 4, + Ngen = 4, + sexR = .5, + marR = .7 +) summary(df_ped) ```