Saved new PDF of paper

harmonize white space
R-Computing-Lab · Oct 6, 2023 · 5398d85 · 5398d85
1 parent 38ebf71
commit 5398d85
Show file tree

Hide file tree

Showing 8 changed files with 90 additions and 72 deletions.
diff --git a/R/famSizeCal.R b/R/famSizeCal.R
@@ -69,14 +69,14 @@ famSizeCal <- function(kpc, Ngen, marR) {
     )
     size <- sum(allGens)
   } else {
-    stop("You should never see this message. 
+    stop("You should never see this message.
     If you do, that means that famSizeCal is not working properly.")
   }
   return(size)
 }
 
 #' twinImpute
-#' A function to impute twins in the simulated pedigree \code{data.frame}. 
+#' A function to impute twins in the simulated pedigree \code{data.frame}.
 #' Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
 #' This is a supplementary function for \code{simulatePedigree}.
 #' @param ped A \code{data.frame} in the same format as the output of \code{simulatePedigree}.
@@ -86,47 +86,47 @@ famSizeCal <- function(kpc, Ngen, marR) {
 #' @return Returns a \code{data.frame} with MZ twins infomration added as a new column.
 #' @export
 
-# A function to impute twins in the simulated pedigree \code{data.frame}. 
+# A function to impute twins in the simulated pedigree \code{data.frame}.
 # Twins can be imputed by specifying their IDs or by specifying the generation the twin should be imputed.
-twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2){
+twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2) {
   # a support function
   resample <- function(x, ...) x[sample.int(length(x), ...)]
   # Check if the ped is the same format as the output of simulatePedigree
-  if(paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")){
+  if (paste0(colnames(ped), collapse = "") != paste0(c("fam", "ID", "gen", "dadID", "momID", "spt", "sex"), collapse = "")) {
     stop("The input pedigree is not in the same format as the output of simulatePedigree")
   }
   ped$MZtwin <- NA_integer_
   # Check if the two IDs are provided
-  if(is.na(ID_twin1) || is.na(ID_twin2)){
+  if (is.na(ID_twin1) || is.na(ID_twin2)) {
     # Check if the generation is provided
-    if(is.na(gen_twin)){
+    if (is.na(gen_twin)) {
       stop("You should provide either the IDs of the twins or the generation of the twins")
     } else {
       # Check if the generation is valid
-      if(gen_twin < 2 || gen_twin > max(ped$gen)){
+      if (gen_twin < 2 || gen_twin > max(ped$gen)) {
         stop("The generation of the twins should be an integer between 2 and the maximum generation in the pedigree")
       } else {
-        idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID),]) 
-        usedID = c()
+        idx <- nrow(ped[ped$gen == gen_twin & !is.na(ped$dadID), ])
+        usedID <- c()
         # randomly loop through all the indivuduals in the generation until find an individual who is the same sex and shares the same dadID and momID with another individual
-        for (i in 1: idx) {
-          cat("loop",i)
+        for (i in 1:idx) {
+          cat("loop", i)
           # check if i is equal to the number of individuals in the generation
           usedID <- c(usedID, ID_twin1)
-          #print(usedID)
-          if(i < idx){
+          # print(usedID)
+          if (i < idx) {
             # randomly select one individual from the generation
             ID_twin1 <- resample(ped$ID[ped$gen == gen_twin & !(ped$ID %in% usedID) & !is.na(ped$dadID)], 1)
-            #cat("twin1", ID_twin1, "\n")
+            # cat("twin1", ID_twin1, "\n")
             # find one same sex sibling who has the same dadID and momID as the selected individual
-            twin2_Pool = ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]] 
+            twin2_Pool <- ped$ID[ped$ID != ID_twin1 & ped$gen == gen_twin & ped$sex == ped$sex[ped$ID == ID_twin1] & ped$dadID == ped$dadID[ped$ID == ID_twin1] & ped$momID == ped$momID[ped$ID == ID_twin1]]
             # if there is an non-NA value in the twin2_Pool, get rid of the NA value
             if (all(is.na(twin2_Pool))) {
               cat("twin2_Pool is all NA\n")
               next
             } else {
               twin2_Pool <- twin2_Pool[!is.na(twin2_Pool)]
-              ID_twin2 <- resample( twin2_Pool , 1)
+              ID_twin2 <- resample(twin2_Pool, 1)
               break
             }
             # test if the ID_twin2 is missing
@@ -136,27 +136,25 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_
           } else {
             # randomly select all males or females in the generation and put them in a vector
             selectGender <- ped$ID[ped$gen == gen_twin & ped$sex == resample(c("M", "F"), 1) & !is.na(ped$dadID) & !is.na(ped$momID)]
-            #print(selectGender)
+            # print(selectGender)
             # randomly select two individuals from the vector
             ID_DoubleTwin <- sample(selectGender, 2)
-            #print(ID_DoubleTwin)
+            # print(ID_DoubleTwin)
             # change the second person's dadID and momID to the first person's dadID and momID
             ped$dadID[ped$ID == ID_DoubleTwin[2]] <- ped$dadID[ped$ID == ID_DoubleTwin[1]]
             ped$momID[ped$ID == ID_DoubleTwin[2]] <- ped$momID[ped$ID == ID_DoubleTwin[1]]
             # let the two individuals be twins!
             ID_twin1 <- ID_DoubleTwin[1]
             ID_twin2 <- ID_DoubleTwin[2]
             break
-
           }
-
+        }
+        # Impute the IDs of the twin in the MZtwin column
+        ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
+        ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
       }
-      # Impute the IDs of the twin in the MZtwin column
-      ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
-      ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
     }
-  }
-} else {
+  } else {
     # Impute the IDs of the twin in the MZtwin column
     ped$MZtwin[ped$ID == ID_twin1] <- ID_twin2
     ped$MZtwin[ped$ID == ID_twin2] <- ID_twin1
@@ -165,4 +163,3 @@ twinImpute <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_
   cat("twin2", ID_twin2, "\n")
   return(ped)
 }
-
diff --git a/R/helper.R b/R/helper.R
@@ -147,35 +147,47 @@ relatedness <- function(...) {
 standardize_colnames <- function(df) {
   # Internal mapping of standardized names to possible variants
   mapping <- list(
-    'fam' = c('fam', 'Fam', 'FAM',
-              'famID', 'FamID', 'FAMID',
-              'famid', 'Famid', 'FAMid',
-              'famfam','Famfam', 'FAMFAM'),
-    'ID' = c('ID', 'id', 'Id',
-             'Indiv', 'indiv', 'INDIV',
-             'individual', 'Individual', 'INDIVIDUAL'),
-    'gen' = c('gen', 'Gen', 'GEN',
-              'gens', 'Gens', 'GENS',
-              'generation', 'Generation', 'GENERATION'),
-    'dadID' = c('dadID', 'DadID', 'DADID',
-                'fatherID', 'FatherID', 'FATHERID'),
-    'momID' = c('momID', 'MomID', 'MOMID',
-                'motherID', 'MotherID', 'MOTHERID'),
-    'spt' = c('spt', 'Spt', 'SPT'),
-    'sex' = c('sex', 'Sex', 'SEX',
-              'female', 'Female', 'FEMALE',
-              'gender', 'Gender', 'GENDER',
-              'male', 'Male', 'MALE',
-              'man', 'Man', 'MAN',
-              'men', 'Men', 'MEN',
-              'woman', 'Woman', 'WOMAN',
-              'women', 'Women', 'WOMEN')
+    "fam" = c(
+      "fam", "Fam", "FAM",
+      "famID", "FamID", "FAMID",
+      "famid", "Famid", "FAMid",
+      "famfam", "Famfam", "FAMFAM"
+    ),
+    "ID" = c(
+      "ID", "id", "Id",
+      "Indiv", "indiv", "INDIV",
+      "individual", "Individual", "INDIVIDUAL"
+    ),
+    "gen" = c(
+      "gen", "Gen", "GEN",
+      "gens", "Gens", "GENS",
+      "generation", "Generation", "GENERATION"
+    ),
+    "dadID" = c(
+      "dadID", "DadID", "DADID",
+      "fatherID", "FatherID", "FATHERID"
+    ),
+    "momID" = c(
+      "momID", "MomID", "MOMID",
+      "motherID", "MotherID", "MOTHERID"
+    ),
+    "spt" = c("spt", "Spt", "SPT"),
+    "sex" = c(
+      "sex", "Sex", "SEX",
+      "female", "Female", "FEMALE",
+      "gender", "Gender", "GENDER",
+      "male", "Male", "MALE",
+      "man", "Man", "MAN",
+      "men", "Men", "MEN",
+      "woman", "Woman", "WOMAN",
+      "women", "Women", "WOMEN"
+    )
   )
   for (standard_name in names(mapping)) {
     for (variant in mapping[[standard_name]]) {
       if (variant %in% colnames(df)) {
         colnames(df)[colnames(df) == variant] <- standard_name
-        break  # Exit the loop once a match is found
+        break # Exit the loop once a match is found
       }
     }
   }

diff --git a/R/plotPedigree.R b/R/plotPedigree.R
@@ -18,11 +18,10 @@ plotPedigree <- function(ped,
                          col = 1,
                          symbolsize = 1, branch = 0.6,
                          packed = TRUE, align = c(1.5, 2), width = 8,
-                         density = c(-1, 35, 65, 20), mar=c(2.1, 1, 2.1, 1),
+                         density = c(-1, 35, 65, 20), mar = c(2.1, 1, 2.1, 1),
                          angle = c(90, 65, 40, 0), keep.par = FALSE,
                          pconnect = .5,
                          ...) {
-
   # Standardize column names in the input dataframe
   ped <- standardize_colnames(ped)
 
@@ -48,9 +47,8 @@ plotPedigree <- function(ped,
     # family id
     if (length(unique(p$ped)) == 1) { # only one family
       p$ped <- 1
-
     } else {
-     # Assign a unique string pattern "ped #" for each unique family
+      # Assign a unique string pattern "ped #" for each unique family
       unique_families <- unique(p$ped)
       named_families <- 1:length(unique_families)
       p$ped <- named_families[match(p$ped, unique_families)]

diff --git a/R/simulatePedigree.R b/R/simulatePedigree.R
@@ -39,8 +39,11 @@ simulatePedigree <- function(kpc = 3,
   # Calculate the expected family size in each generations
   sizeGens <- allGens(kpc = kpc, Ngen = Ngen, marR = marR)
   famSizeIndex <- 1:sum(sizeGens)
-  if(verbose){print(
-    "Step 1: Let's build the connection within each generation first")}
+  if (verbose) {
+    print(
+      "Step 1: Let's build the connection within each generation first"
+    )
+  }
   for (i in 1:Ngen) {
     idGen <- as.numeric(paste(100, i, 1:sizeGens[i], sep = ""))
     # idGen <- ifelse(i==1,
@@ -169,8 +172,11 @@ simulatePedigree <- function(kpc = 3,
     }
   }
 
-  if(verbose){ print(
-    "Step 2: Let's try to build connection between each two generations")}
+  if (verbose) {
+    print(
+      "Step 2: Let's try to build connection between each two generations"
+    )
+  }
   df_Fam$ifparent <- FALSE
   df_Fam$ifson <- FALSE
   df_Fam$ifdau <- FALSE
@@ -280,8 +286,11 @@ simulatePedigree <- function(kpc = 3,
       df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
       df_Ngen <- df_Ngen[, -ncol(df_Ngen)]
       df_Fam[df_Fam$gen == i, ] <- df_Ngen
-      if(verbose){print(
-      "Step 2.2: mark a group of potential parents in the i-1 th generation")}
+      if (verbose) {
+        print(
+          "Step 2.2: mark a group of potential parents in the i-1 th generation"
+        )
+      }
       df_Ngen <- df_Fam[df_Fam$gen == i - 1, ]
       df_Ngen$ifparent <- FALSE
       df_Ngen$ifson <- FALSE
@@ -308,8 +317,11 @@ simulatePedigree <- function(kpc = 3,
 
       df_Ngen <- df_Ngen[order(as.numeric(rownames(df_Ngen))), , drop = FALSE]
       df_Fam[df_Fam$gen == i - 1, ] <- df_Ngen
-      if(verbose){print(
-      "Step 2.3: connect the i and i-1 th generation")}
+      if (verbose) {
+        print(
+          "Step 2.3: connect the i and i-1 th generation"
+        )
+      }
       if (i == 1) {
         next
       } else {

diff --git a/tests/testthat/test-network.R b/tests/testthat/test-network.R
@@ -35,7 +35,7 @@ test_that("ped2graph produces a graph for inbreeding data", {
 
 
 test_that("ped2add produces correct matrix dims, values, and dimnames for hazard", {
-       data(hazard)
+  data(hazard)
   add <- ped2add(hazard)
   # Check dimension
   expect_equal(dim(add), c(nrow(hazard), nrow(hazard)))

diff --git a/vignettes/articles/paper.pdf b/vignettes/articles/paper.pdf
diff --git a/vignettes/network.Rmd b/vignettes/network.Rmd
@@ -42,11 +42,8 @@ capture.output(plotPedigree(hazard, code_male = 0))
 We will use the `hazard` pedigree data as an example.
 
 ```{r}
-
 ds <- ped2fam(hazard, famID = "newFamID")
 table(ds$FamID, ds$newFamID)
-
-
 ```
 
 Because the `hazard` data already had a family ID variable we compare our newly created variable to the pre-existing one.  They match!
@@ -107,7 +104,7 @@ Here we calculate the relatedness between all pairs of individuals in the `hazar
 through sharing both parents.
 ```{r}
 commonNuclear <- ped2cn(hazard)
-commonNuclear [1:7, 1:7]
+commonNuclear[1:7, 1:7]
 
 table(commonNuclear)
 ```

diff --git a/vignettes/pedigree.Rmd b/vignettes/pedigree.Rmd
@@ -38,10 +38,12 @@ To illustrate this, let us generate a pedigree. This pedigree has a total of fou
 
 ```{r}
 set.seed(5)
-df_ped <- simulatePedigree(kpc = 4,
-                        Ngen = 4,
-                        sexR = .5,
-                        marR = .7)
+df_ped <- simulatePedigree(
+  kpc = 4,
+  Ngen = 4,
+  sexR = .5,
+  marR = .7
+)
 summary(df_ped)
 ```