Skip to content

Commit

Permalink
Merge pull request #5 from Boehringer-Ingelheim/dev
Browse files Browse the repository at this point in the history
Dev to rc/3.0.0
  • Loading branch information
mingstat authored Oct 14, 2024
2 parents f724639 + cf83f52 commit c424f1a
Show file tree
Hide file tree
Showing 51 changed files with 1,088 additions and 454 deletions.
7 changes: 3 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
.Rhistory
.RData
.Ruserdata
*.sqlite
*.sqlite-journal
*.rds
.Rprofile

.vscode

docs
pkgdown
8 changes: 4 additions & 4 deletions .lintr
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
linters: linters_with_defaults(
line_length_linter(120),
object_usage_linter = NULL,
indentation_linter = NULL,
trailing_whitespace_linter = NULL
line_length_linter(120),
object_usage_linter = NULL,
indentation_linter = NULL,
trailing_whitespace_linter = NULL
)
33 changes: 20 additions & 13 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
Package: dv.loader
Type: Package
Title: Data loading module
Version: 2.0.0
Title: Data Loader for DaVinci Modular Applications
Version: 3.0.0
Authors@R: c(
person( "Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")),
person( given = "Ming", family = "Yang", role = c("aut", "cre"), email = "[email protected]"),
person( given = "Steven", family = "Brooks", role = "aut", email = "[email protected]"),
person( given = "Sorin", family = "Voicu", role = "aut", email = "[email protected]")
person("Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")),
person("Ming", "Yang", email = "[email protected]", role = c("aut", "cre")),
person("Steven", "Brooks", email = "[email protected]", role = "aut"),
person("Sorin", "Voicu", email = "[email protected]", role = "aut")
)
Description: This is a module for loading .RDS / .sas7bdat data files from a network file storage environment. It also allows loading data locally.
Description: dv.loader offers a streamlined method for importing multiple data files in R,
tailored for seamless integration with DaVinci modular applications.
License: Apache License (>= 2)
Encoding: UTF-8
LazyData: true
Depends: R (>= 3.5.0)
Imports: haven
Depends: R (>= 4.0.0)
Imports:
checkmate (>= 2.3.2),
haven (>= 2.5.4),
lifecycle (>= 1.0.4)
Suggests:
testthat,
knitr,
rmarkdown
RoxygenNote: 7.3.0
knitr (>= 1.48),
pharmaverseadam (>= 1.0.0),
pharmaversesdtm (>= 1.0.0),
rmarkdown (>= 2.38),
testthat (>= 3.2.1.1)
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Config/testthat/edition: 3
6 changes: 4 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(get_cre_path)
export(get_nfs_path)
export(load_data)
export(load_rds)
export(load_sas)
export(load_xpt)
importFrom(lifecycle,deprecate_warn)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# dv.loader 3.0.0

- Introduced a new set of functions `load_rds()`, `load_sas()`, and `load_xpt()`.
- Deprecated the function `load_data()`.

# dv.loader 2.0.0

- GitHub release with QC report
Expand Down
61 changes: 0 additions & 61 deletions R/dvloader.R

This file was deleted.

146 changes: 146 additions & 0 deletions R/load_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#' Loads data into memory based on study directory and one or more file_names.
#' @param sub_dir A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`.
#' If the argument is left as NULL, the function will load data from the working directory `getwd()`.
#' @param file_names Study file or file_names name(s) - can be a vector of strings.
#' This is the only required argument.
#' @param use_wd for "use working directory" - a flag used when importing local files
#' not on NFS - default value is FALSE
#' @param prefer_sas if set to TRUE, imports sas7bdat files first before looking for
#' RDS files (the opposite of default behavior)
#' @return a list of dataframes
#' @export
#' @examples
#' \dontrun{
#' test_data_path <- "../inst/extdata/"
#' data_list <- load_data(
#' sub_dir = test_data_path,
#' file_names = "dummyads2",
#' use_wd = TRUE
#' )
#' }
#' @export
#' @importFrom lifecycle deprecate_warn
load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) {
lifecycle::deprecate_warn("3.0.0", "load_data()", "read_data()")

if (is.null(file_names)) {
stop("Usage: load_data: file_names: Must supply at least one file name")
}

study_path <- "" # will be built using args

if (is.null(sub_dir)) {
study_path <- getwd()
} else {
if (use_wd) {
study_path <- file.path(getwd(), sub_dir)
} else {
study_path <- file.path(get_cre_path(), sub_dir)
}
}

# create the output
data_list <- create_data_list(study_path, file_names, prefer_sas) # nolint

return(data_list)
}

#' gets the NFS base path from an env var
#' It assumes there is an env var
#' called RXD_DATA which holds the path suffix.
#' @return the NFS base path
get_nfs_path <- function() {
base_path <- Sys.getenv("RXD_DATA")
# check that RXD_DATA is set
if (base_path == "") {
stop("Usage: get_nfs_path: RXD_DATA must be set")
}
return(base_path)
}

#' gets the NFS base path from an env var
#' alias for get_nfs_path to maintain backwards compatibility
get_cre_path <- get_nfs_path


#' For each file name provided, reads in the first matching file and its meta data/attributes.
#' Preference is given to RDS because its faster
#' @param file_path the folder where the files are
#' @param file_names CDISC names for the files
#' @param prefer_sas if TRUE, imports .sas7bdat files first instead of .RDS files
#' @return returns a list of dataframes with metadata as an attribute on each dataframe
create_data_list <- function(file_path, file_names, prefer_sas) {
data_list <- lapply(file_names, function(x) {
extensions <- c("", ".rds", ".sas7bdat")
if (prefer_sas) {
extensions <- c("", ".sas7bdat", ".rds")
}

file_name_to_load <- NULL

candidates <- list.files(file_path)
uppercase_candidates <- Map(toupper, candidates)

for (ext in extensions) {
# Case insensitive file name match
uppercase_file_name <- toupper(paste0(x, ext))

match_count <- sum(uppercase_candidates == uppercase_file_name)
if (match_count > 1) {
stop(paste("create_data_list(): More than one case-insensitive file name match for", file_path, x))
}

index <- match(uppercase_file_name, uppercase_candidates)
if (!is.na(index)) {
file_name_to_load <- candidates[[index]]
break
}
}

if (is.null(file_name_to_load)) {
stop(paste("create_data_list(): No RDS or SAS files found for", file_path, x))
}

output <- read_file(file_path, file_name_to_load)

return(output)
})

names(data_list) <- file_names

return(data_list)
}


#' Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path
#' @param file_path a path to a file
#' @param file_name name of a file
#' @return a data object with an extra attribute of metadata
read_file <- function(file_path, file_name) {
ext <- tools::file_ext(file_name)

if (!(toupper(ext) %in% c("RDS", "SAS7BDAT"))) {
stop("Usage error: read_file: file_name: file must either be RDS or SAS7BDAT.")
}

is_rds <- toupper(ext) == "RDS"

file <- file.path(file_path, file_name)
file_name <- tools::file_path_sans_ext(file_name)

# grab file info
meta <- file.info(file)[1L:6L]
meta[["path"]] <- row.names(meta)
meta[["file_name"]] <- file_name
meta <- data.frame(meta, stringsAsFactors = FALSE)
row.names(meta) <- NULL

if (is_rds) {
out <- readRDS(file)
} else {
out <- haven::read_sas(file)
}
attr(out, "meta") <- meta

return(out)
}
47 changes: 47 additions & 0 deletions R/load_rds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' Load RDS files
#'
#' This function loads RDS files via readRDS() and returns a list of data frames.
#'
#' @param files A character vector of file paths to RDS files.
#' @return A list of data frames, each containing the data from an RDS file.
#' @examples
#' # Create temporary directory and files
#' temp_dir <- tempdir()
#' adsl_rds_file <- file.path(temp_dir, "adsl.rds")
#' adae_rds_file <- file.path(temp_dir, "adae.rds")
#'
#' # Write example data to RDS files
#' saveRDS(pharmaverseadam::adsl, adsl_rds_file)
#' saveRDS(pharmaverseadam::adae, adae_rds_file)
#'
#' # Load RDS files
#' rds_data_list <- load_rds(c(adsl_rds_file, adae_rds_file))
#'
#' # Clean up
#' unlink(c(adsl_rds_file, adae_rds_file))
#' @export
load_rds <- function(files) {
# Check if files is a character vector
checkmate::assert_character(files)

# Read each file and add metadata
data_list <- lapply(files, function(file) {
# Check if file exists
checkmate::assert_file_exists(file)
# Check if file is an RDS file
check_file_ext(file, extension = "rds")

# Read RDS file
data <- readRDS(file)

# Get file info and add to data as an attribute
attr(data, "meta") <- file_info(file)

return(data)
})

# Set names of data_list to the file names
names(data_list) <- basename(files)

return(data_list)
}
46 changes: 46 additions & 0 deletions R/load_sas.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Load SAS files
#'
#' This function loads SAS files via haven::read_sas() and returns a list of data frames.
#'
#' @param files A character vector of file paths to SAS files.
#' @return A list of data frames, each containing the data from a SAS file.
#' @examples
#' # Create temporary directory and files
#' temp_dir <- tempdir()
#' adsl_sas_file <- file.path(temp_dir, "adsl.sas7bdat")
#' adae_sas_file <- file.path(temp_dir, "adae.sas7bdat")
#'
#' # Write example data to SAS files
#' haven::write_sas(pharmaverseadam::adsl, adsl_sas_file)
#' haven::write_sas(pharmaverseadam::adae, adae_sas_file)
#'
#' # Load SAS files
#' sas_data_list <- load_sas(c(adsl_sas_file, adae_sas_file))
#'
#' # Clean up
#' unlink(c(adsl_sas_file, adae_sas_file))
#' @export
load_sas <- function(files) {
# Check if files is a character vector
checkmate::assert_character(files)

# Read each file and add metadata
data_list <- lapply(files, function(file) {
# Check if file exists
checkmate::assert_file_exists(file)
# Check if file is a SAS file
check_file_ext(file, extension = "sas7bdat")

# Read SAS file
data <- haven::read_sas(file)

# Get file info and add to data as an attribute
attr(data, "meta") <- file_info(file)

return(data)
})

# Set names of data_list to the file names
names(data_list) <- basename(files)
return(data_list)
}
Loading

0 comments on commit c424f1a

Please sign in to comment.