Skip to content

Commit

Permalink
make additional functions
Browse files Browse the repository at this point in the history
  • Loading branch information
smasongarrison committed May 21, 2024
1 parent 5928f37 commit 3e51dfa
Show file tree
Hide file tree
Showing 7 changed files with 330 additions and 106 deletions.
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ export(relatedness)
export(repairSex)
export(resample)
export(simulatePedigree)
export(summarizePedigree)
export(summarizeFamilies)
export(summarizeMatrilines)
export(summarizePatrilines)
export(summarizePedigrees)
export(vech)
import(data.table)
import(kinship2)
Expand Down
208 changes: 138 additions & 70 deletions R/summarizePedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,43 @@
#'
#' This function summarizes pedigree data, including calculating summary statistics for all numeric variables,
#' and finding the originating member for each family, maternal, and paternal line.
#'
#' @param pedigree_data A data frame containing the pedigree data.
#' @param personID A character string indicating the column name for the person ID variable.
#' @param momID A character string indicating the column name for the mother ID variable.
#' @param dadID A character string indicating the column name for the father ID variable.
#' @inheritParams ped2fam
#' @inheritParams ped2maternal
#' @inheritParams ped2paternal
#' @param nbiggest The number of biggest lines to return.
#' @param noldest The number of oldest lines to return.
#' @param byr The column name for birth year.
#' @param type The type of summary statistics to calculate. Options are "fathers", "mothers", and "families".
#' @returns A list containing summary statistics for family, maternal, and paternal lines, as well as the 5 oldest and biggest lines.
#' @import data.table
#' @export
summarizePedigree <- function(ped, famID = "famID", personID = "ID",
summarizePedigrees <- function(ped, famID = "famID", personID = "ID",
momID = "momID", dadID = "dadID",
matID = "matID", patID = "patID",
yrb = NULL,
byr = NULL, type = c("fathers", "mothers", "families"),
nbiggest = 5, noldest = 5) {

# checks
if(!requireNamespace("data.table", quietly = TRUE)) {
stop("The 'data.table' package is required for this function. Please install it and try again.")
}
if(personID %in% c(famID, momID, dadID, matID)) {
stop("personID cannot be the same as any of the other ID variables.")

Check warning on line 22 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L21-L22

Added lines #L21 - L22 were not covered by tests
}
if(!all(c(personID, momID, dadID) %in% names(ped))) {
stop("personID, momID, and dadID must be columns in the pedigree data.")

Check warning on line 25 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L24-L25

Added lines #L24 - L25 were not covered by tests
}
if(!is.null(yrb) && !yrb %in% names(ped)) {
stop("yrb must be a column in the pedigree data.")
if(!is.null(byr) && !byr %in% names(ped)) {
stop("byr must be a column in the pedigree data.")

Check warning on line 28 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L27-L28

Added lines #L27 - L28 were not covered by tests
}
# Convert to data.table
ped_dt <- data.table::as.data.table(ped)

Check warning on line 31 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L31

Added line #L31 was not covered by tests

# Build the pedigree using the provided functions
if(!famID %in% names(ped_dt)) {
ped_dt <- BGmisc::ped2fam(ped_dt, personID = personID, momID = momID, dadID = dadID, famID = famID)
if(!famID %in% names(ped_dt) & "families" %in% type ) {
ped_dt <- ped2fam(ped_dt, personID = personID, momID = momID, dadID = dadID, famID = famID)

Check warning on line 35 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L34-L35

Added lines #L34 - L35 were not covered by tests
}
if(!matID %in% names(ped_dt)) {
ped_dt <- BGmisc::ped2maternal(ped_dt, personID = personID, momID = momID, dadID = dadID, matID = matID)
if(!matID %in% names(ped_dt) & "mothers" %in% type ) {
ped_dt <- ped2maternal(ped_dt, personID = personID, momID = momID, dadID = dadID, matID = matID)

Check warning on line 38 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L37-L38

Added lines #L37 - L38 were not covered by tests
}
if(!patID %in% names(ped_dt)) {
ped_dt <- BGmisc::ped2paternal(ped_dt, personID = personID, momID = momID, dadID = dadID, patID = patID)
if(!patID %in% names(ped_dt) & "fathers" %in% type) {
ped_dt <- ped2paternal(ped_dt, personID = personID, momID = momID, dadID = dadID, patID = patID)

Check warning on line 41 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L40-L41

Added lines #L40 - L41 were not covered by tests
}

# Function to calculate summary statistics for all numeric variables
Expand All @@ -49,11 +47,11 @@ summarizePedigree <- function(ped, famID = "famID", personID = "ID",
summary_stats <- data[, lapply(.SD, function(x) {
list(
count = .N,
mean = mean(x, na.rm = TRUE),
median = median(x, na.rm = TRUE),
min = min(x, na.rm = TRUE),
max = max(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE)
mean = base::mean(x, na.rm = TRUE),
median = stats::median(x, na.rm = TRUE),
min = base::min(x, na.rm = TRUE),
max = base::max(x, na.rm = TRUE),
sd = stats::sd(x, na.rm = TRUE)

Check warning on line 54 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L45-L54

Added lines #L45 - L54 were not covered by tests
)
}), by = group_var, .SDcols = numeric_cols]

Check warning on line 56 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L56

Added line #L56 was not covered by tests

Expand All @@ -67,55 +65,125 @@ summarizePedigree <- function(ped, famID = "famID", personID = "ID",
data[order(byr), .SD[1], by = group_var]

Check warning on line 65 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L64-L65

Added lines #L64 - L65 were not covered by tests
}

# Initialize output list
output <- list()

Check warning on line 69 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L69

Added line #L69 was not covered by tests

# Calculate summary statistics for families, maternal lines, and paternal lines
family_summary_dt <- calculate_summary_dt(ped_dt, famID)
maternal_summary_dt <- calculate_summary_dt(ped_dt, matID)
paternal_summary_dt <- calculate_summary_dt(ped_dt, patID)

# Find the originating member for each line
originating_member_family <- find_originating_member(ped_dt, famID)
originating_member_maternal <- find_originating_member(ped_dt, matID)
originating_member_paternal <- find_originating_member(ped_dt, patID)

# Merge summary statistics with originating members for additional information
family_summary_dt <- merge(family_summary_dt, originating_member_family, by = famID, suffixes = c("", "_founder"))
maternal_summary_dt <- merge(maternal_summary_dt, originating_member_maternal, by = matID, suffixes = c("", "_founder"))
paternal_summary_dt <- merge(paternal_summary_dt, originating_member_paternal, by = patID, suffixes = c("", "_founder"))

if(!is.null(byr)){
# Identify the 5 oldest lines
oldest_families <- family_summary_dt[order(byr)][1:noldest]
oldest_maternal <- maternal_summary_dt[order(byr)][1:noldest]
oldest_paternal <- paternal_summary_dt[order(byr)][1:noldest]



if("families" %in% type) {
family_summary_dt <- calculate_summary_dt(ped_dt, famID)

Check warning on line 76 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L75-L76

Added lines #L75 - L76 were not covered by tests
# Find the originating member for each line
originating_member_family <- find_originating_member(ped_dt, famID)

Check warning on line 78 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L78

Added line #L78 was not covered by tests
# Merge summary statistics with originating members for additional information
family_summary_dt <- merge(family_summary_dt, originating_member_family,
by = famID, suffixes = c("", "_founder"))
output$family_summary <- family_summary_dt

Check warning on line 82 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L80-L82

Added lines #L80 - L82 were not covered by tests
}
# Identify the 5 biggest lines
biggest_families <- family_summary_dt[order(-count)][1:nbiggest]
biggest_maternal <- maternal_summary_dt[order(-count)][1:nbiggest]
biggest_paternal <- paternal_summary_dt[order(-count)][1:nbiggest]

# Output the results as a list
if(!is.null(byr)){
output <- list(
family_summary = family_summary_dt,
maternal_summary = maternal_summary_dt,
paternal_summary = paternal_summary_dt,
oldest_families = oldest_families,
oldest_maternal = oldest_maternal,
oldest_paternal = oldest_paternal,
biggest_families = biggest_families,
biggest_maternal = biggest_maternal,
biggest_paternal = biggest_paternal
)
} else {
output <- list(
family_summary = family_summary_dt,
maternal_summary = maternal_summary_dt,
paternal_summary = paternal_summary_dt,
biggest_families = biggest_families,
biggest_maternal = biggest_maternal,
biggest_paternal = biggest_paternal
)
if("mothers" %in% type) {
maternal_summary_dt <- calculate_summary_dt(ped_dt, matID)
originating_member_maternal <- find_originating_member(ped_dt, matID)
maternal_summary_dt <- merge(maternal_summary_dt, originating_member_maternal, by = matID, suffixes = c("", "_founder"))
output$maternal_summary <- maternal_summary_dt

Check warning on line 88 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L84-L88

Added lines #L84 - L88 were not covered by tests
}
if("fathers" %in% type) {
paternal_summary_dt <- calculate_summary_dt(ped_dt, patID)
originating_member_paternal <- find_originating_member(ped_dt, patID)
paternal_summary_dt <- merge(paternal_summary_dt, originating_member_paternal, by = patID, suffixes = c("", "_founder"))
output$paternal_summary <- paternal_summary_dt

Check warning on line 94 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L90-L94

Added lines #L90 - L94 were not covered by tests
}

# Optionally find the superlative lines

## oldest
if(!is.null(byr) && noldest > 0 & noldest <= nrow(ped_dt)) {
if("families" %in% type) {
output$oldest_families <- family_summary_dt[order(get(byr))][1:noldest]

Check warning on line 102 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L100-L102

Added lines #L100 - L102 were not covered by tests
}
if("mothers" %in% type) {
output$oldest_maternal <- maternal_summary_dt[order(get(byr))][1:noldest]

Check warning on line 105 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L104-L105

Added lines #L104 - L105 were not covered by tests
}
if("fathers" %in% type) {
output$oldest_paternal <- paternal_summary_dt[order(get(byr))][1:noldest]

Check warning on line 108 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L107-L108

Added lines #L107 - L108 were not covered by tests
}
}

# biggest lines
if(!is.null(nbiggest) & nbiggest > 0 & nbiggest <= nrow(ped_dt)) {
if("families" %in% type) {
output$biggest_families <- family_summary_dt[order(-get("count"))][1:nbiggest]

Check warning on line 115 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L113-L115

Added lines #L113 - L115 were not covered by tests
}
if("mothers" %in% type) {
output$biggest_maternal <- maternal_summary_dt[order(-get("count"))][1:nbiggest]

Check warning on line 118 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L117-L118

Added lines #L117 - L118 were not covered by tests
}
if("fathers" %in% type) {
output$biggest_paternal <- paternal_summary_dt[order(-get("count"))][1:nbiggest]

Check warning on line 121 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L120-L121

Added lines #L120 - L121 were not covered by tests
}
}

return(output)

Check warning on line 125 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L125

Added line #L125 was not covered by tests
}

#' Summarize the maternal lines in a pedigree
#' @inheritParams summarizePedigrees
#' @seealso [summarizePedigrees ()]
#' @export
#'
summarizeMatrilines <- function(ped, famID = "famID", personID = "ID",
momID = "momID", dadID = "dadID",
matID = "matID", patID = "patID",
byr = NULL,
nbiggest = 5, noldest = 5) {
# Call to wrapper function
summarizePedigrees(ped = ped,
personID = personID,
nbiggest = nbiggest,
noldest = noldest,
byr=byr,
momID = momID, dadID = dadID,
famID = famID, matID=matID, patID=patID,
type = "mothers")

Check warning on line 146 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L139-L146

Added lines #L139 - L146 were not covered by tests
}

#' Summarize the paternal lines in a pedigree
#' @inheritParams summarizePedigrees
#' @seealso [summarizePedigrees ()]
#' @export
#'
summarizePatrilines <- function(ped, famID = "famID", personID = "ID",
momID = "momID", dadID = "dadID",
matID = "matID", patID = "patID",
byr = NULL,
nbiggest = 5, noldest = 5) {
# Call to wrapper function
summarizePedigrees(ped = ped,
personID = personID,
nbiggest = nbiggest,
noldest = noldest,
byr=byr,
momID = momID, dadID = dadID,
famID = famID, matID=matID, patID = patID,
type = "fathers")

Check warning on line 167 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L160-L167

Added lines #L160 - L167 were not covered by tests
}

#' Summarize the families in a pedigree
#' @inheritParams summarizePedigrees
#' @seealso [summarizePedigrees ()]
#' @export

summarizeFamilies <- function(ped, famID = "famID", personID = "ID",
momID = "momID", dadID = "dadID",
matID = "matID", patID = "patID",
byr = NULL,
nbiggest = 5, noldest = 5) {
# Call to wrapper function
summarizePedigrees(ped = ped,
personID = personID,
nbiggest = nbiggest,
noldest = noldest,
byr=byr,
momID = momID, dadID = dadID,
famID = famID, matID=matID, patID = patID,
type = "families")

Check warning on line 188 in R/summarizePedigree.R

View check run for this annotation

Codecov / codecov/patch

R/summarizePedigree.R#L181-L188

Added lines #L181 - L188 were not covered by tests
}
46 changes: 46 additions & 0 deletions man/summarizeFamilies.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions man/summarizeMatrilines.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3e51dfa

Please sign in to comment.