Merge pull request #5 from Boehringer-Ingelheim/dev

Dev to rc/3.0.0
Boehringer-Ingelheim · Oct 14, 2024 · c424f1a · c424f1a
2 parents f724639 + cf83f52
commit c424f1a
Show file tree

Hide file tree

Showing 51 changed files with 1,088 additions and 454 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,9 +2,8 @@
 .Rhistory
 .RData
 .Ruserdata
-*.sqlite
-*.sqlite-journal
-*.rds
+.Rprofile
+
 .vscode
+
 docs
-pkgdown
diff --git a/.lintr b/.lintr
@@ -1,6 +1,6 @@
 linters: linters_with_defaults(
-  line_length_linter(120),
-  object_usage_linter = NULL,
-  indentation_linter = NULL,
-  trailing_whitespace_linter = NULL
+    line_length_linter(120),
+    object_usage_linter = NULL,
+    indentation_linter = NULL,
+    trailing_whitespace_linter = NULL
   )
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,22 +1,29 @@
 Package: dv.loader
 Type: Package
-Title: Data loading module 
-Version: 2.0.0
+Title: Data Loader for DaVinci Modular Applications
+Version: 3.0.0
 Authors@R: c(
-        person( "Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")),
-        person( given = "Ming", family = "Yang", role = c("aut", "cre"), email = "[email protected]"),
-        person( given = "Steven", family = "Brooks", role = "aut", email = "[email protected]"),
-        person( given = "Sorin", family = "Voicu", role = "aut", email = "[email protected]")
+        person("Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")),
+        person("Ming", "Yang", email = "[email protected]", role = c("aut", "cre")),
+        person("Steven", "Brooks", email = "[email protected]", role = "aut"),
+        person("Sorin", "Voicu", email = "[email protected]", role = "aut")
     )
-Description: This is a module for loading .RDS / .sas7bdat data files from a network file storage environment. It also allows loading data locally.
+Description: dv.loader offers a streamlined method for importing multiple data files in R, 
+    tailored for seamless integration with DaVinci modular applications.
 License: Apache License (>= 2)
 Encoding: UTF-8
 LazyData: true
-Depends: R (>= 3.5.0)
-Imports: haven
+Depends: R (>= 4.0.0)
+Imports:
+    checkmate (>= 2.3.2),
+    haven (>= 2.5.4),
+    lifecycle (>= 1.0.4)
 Suggests: 
-    testthat,
-    knitr,
-    rmarkdown
-RoxygenNote: 7.3.0
+    knitr (>= 1.48),
+    pharmaverseadam (>= 1.0.0),
+    pharmaversesdtm (>= 1.0.0),
+    rmarkdown (>= 2.38),
+    testthat (>= 3.2.1.1)
+RoxygenNote: 7.3.2
 VignetteBuilder: knitr
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
-export(get_cre_path)
-export(get_nfs_path)
 export(load_data)
+export(load_rds)
+export(load_sas)
+export(load_xpt)
+importFrom(lifecycle,deprecate_warn)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# dv.loader 3.0.0
+
+- Introduced a new set of functions `load_rds()`, `load_sas()`, and `load_xpt()`.
+- Deprecated the function `load_data()`.
+
 # dv.loader 2.0.0
 
 - GitHub release with QC report

diff --git a/R/dvloader.R b/R/dvloader.R
diff --git a/R/load_data.R b/R/load_data.R
@@ -0,0 +1,146 @@
+#' Loads data into memory based on study directory and one or more file_names.
+#' @param sub_dir A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`.
+#' If the argument is left as NULL, the function will load data from the working directory `getwd()`.
+#' @param file_names Study file or file_names name(s) - can be a vector of strings.
+#' This is the only required argument.
+#' @param use_wd for "use working directory" - a flag used when importing local files
+#' not on NFS - default value is FALSE
+#' @param prefer_sas if set to TRUE, imports sas7bdat files first before looking for
+#' RDS files (the opposite of default behavior)
+#' @return a list of dataframes
+#' @export
+#' @examples
+#' \dontrun{
+#' test_data_path <- "../inst/extdata/"
+#' data_list <- load_data(
+#'   sub_dir = test_data_path,
+#'   file_names = "dummyads2",
+#'   use_wd = TRUE
+#' )
+#' }
+#' @export
+#' @importFrom lifecycle deprecate_warn
+load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) {
+  lifecycle::deprecate_warn("3.0.0", "load_data()", "read_data()")
+
+  if (is.null(file_names)) {
+    stop("Usage: load_data: file_names: Must supply at least one file name")
+  }
+
+  study_path <- "" # will be built using args
+
+  if (is.null(sub_dir)) {
+    study_path <- getwd()
+  } else {
+    if (use_wd) {
+      study_path <- file.path(getwd(), sub_dir)
+    } else {
+      study_path <- file.path(get_cre_path(), sub_dir)
+    }
+  }
+
+  # create the output
+  data_list <- create_data_list(study_path, file_names, prefer_sas) # nolint
+
+  return(data_list)
+}
+
+#' gets the NFS base path from an env var
+#' It assumes there is an env var
+#' called RXD_DATA which holds the path suffix.
+#' @return the NFS base path
+get_nfs_path <- function() {
+  base_path <- Sys.getenv("RXD_DATA")
+  # check that RXD_DATA is set
+  if (base_path == "") {
+    stop("Usage: get_nfs_path: RXD_DATA must be set")
+  }
+  return(base_path)
+}
+
+#' gets the NFS base path from an env var
+#' alias for get_nfs_path to maintain backwards compatibility
+get_cre_path <- get_nfs_path
+
+
+#' For each file name provided, reads in the first matching file and its meta data/attributes.
+#' Preference is given to RDS because its faster
+#' @param file_path the folder where the files are
+#' @param file_names CDISC names for the files
+#' @param prefer_sas if TRUE, imports .sas7bdat files first instead of .RDS files
+#' @return returns a list of dataframes with metadata as an attribute on each dataframe
+create_data_list <- function(file_path, file_names, prefer_sas) {
+  data_list <- lapply(file_names, function(x) {
+    extensions <- c("", ".rds", ".sas7bdat")
+    if (prefer_sas) {
+      extensions <- c("", ".sas7bdat", ".rds")
+    }
+
+    file_name_to_load <- NULL
+
+    candidates <- list.files(file_path)
+    uppercase_candidates <- Map(toupper, candidates)
+
+    for (ext in extensions) {
+      # Case insensitive file name match
+      uppercase_file_name <- toupper(paste0(x, ext))
+
+      match_count <- sum(uppercase_candidates == uppercase_file_name)
+      if (match_count > 1) {
+        stop(paste("create_data_list(): More than one case-insensitive file name match for", file_path, x))
+      }
+
+      index <- match(uppercase_file_name, uppercase_candidates)
+      if (!is.na(index)) {
+        file_name_to_load <- candidates[[index]]
+        break
+      }
+    }
+
+    if (is.null(file_name_to_load)) {
+      stop(paste("create_data_list(): No RDS or SAS files found for", file_path, x))
+    }
+
+    output <- read_file(file_path, file_name_to_load)
+
+    return(output)
+  })
+
+  names(data_list) <- file_names
+
+  return(data_list)
+}
+
+
+#' Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path
+#' @param file_path a path to a file
+#' @param file_name name of a file
+#' @return a data object with an extra attribute of metadata
+read_file <- function(file_path, file_name) {
+  ext <- tools::file_ext(file_name)
+
+  if (!(toupper(ext) %in% c("RDS", "SAS7BDAT"))) {
+    stop("Usage error: read_file: file_name: file must either be RDS or SAS7BDAT.")
+  }
+
+  is_rds <- toupper(ext) == "RDS"
+
+  file <- file.path(file_path, file_name)
+  file_name <- tools::file_path_sans_ext(file_name)
+
+  # grab file info
+  meta <- file.info(file)[1L:6L]
+  meta[["path"]] <- row.names(meta)
+  meta[["file_name"]] <- file_name
+  meta <- data.frame(meta, stringsAsFactors = FALSE)
+  row.names(meta) <- NULL
+
+  if (is_rds) {
+    out <- readRDS(file)
+  } else {
+    out <- haven::read_sas(file)
+  }
+  attr(out, "meta") <- meta
+
+  return(out)
+}
diff --git a/R/load_rds.R b/R/load_rds.R
@@ -0,0 +1,47 @@
+#' Load RDS files
+#'
+#' This function loads RDS files via readRDS() and returns a list of data frames.
+#'
+#' @param files A character vector of file paths to RDS files.
+#' @return A list of data frames, each containing the data from an RDS file.
+#' @examples
+#' # Create temporary directory and files
+#' temp_dir <- tempdir()
+#' adsl_rds_file <- file.path(temp_dir, "adsl.rds")
+#' adae_rds_file <- file.path(temp_dir, "adae.rds")
+#' 
+#' # Write example data to RDS files
+#' saveRDS(pharmaverseadam::adsl, adsl_rds_file)
+#' saveRDS(pharmaverseadam::adae, adae_rds_file)
+#' 
+#' # Load RDS files
+#' rds_data_list <- load_rds(c(adsl_rds_file, adae_rds_file))
+#' 
+#' # Clean up
+#' unlink(c(adsl_rds_file, adae_rds_file))
+#' @export
+load_rds <- function(files) {
+  # Check if files is a character vector
+  checkmate::assert_character(files)
+
+  # Read each file and add metadata
+  data_list <- lapply(files, function(file) {
+    # Check if file exists
+    checkmate::assert_file_exists(file)
+    # Check if file is an RDS file
+    check_file_ext(file, extension = "rds")
+
+    # Read RDS file
+    data <- readRDS(file)
+
+    # Get file info and add to data as an attribute
+    attr(data, "meta") <- file_info(file)
+
+    return(data)
+  })
+
+  # Set names of data_list to the file names
+  names(data_list) <- basename(files)
+
+  return(data_list)
+}
diff --git a/R/load_sas.R b/R/load_sas.R
@@ -0,0 +1,46 @@
+#' Load SAS files
+#'
+#' This function loads SAS files via haven::read_sas() and returns a list of data frames.
+#'
+#' @param files A character vector of file paths to SAS files.
+#' @return A list of data frames, each containing the data from a SAS file.
+#' @examples
+#' # Create temporary directory and files
+#' temp_dir <- tempdir()
+#' adsl_sas_file <- file.path(temp_dir, "adsl.sas7bdat")
+#' adae_sas_file <- file.path(temp_dir, "adae.sas7bdat")
+#' 
+#' # Write example data to SAS files
+#' haven::write_sas(pharmaverseadam::adsl, adsl_sas_file)
+#' haven::write_sas(pharmaverseadam::adae, adae_sas_file)
+#' 
+#' # Load SAS files
+#' sas_data_list <- load_sas(c(adsl_sas_file, adae_sas_file))
+#' 
+#' # Clean up
+#' unlink(c(adsl_sas_file, adae_sas_file))
+#' @export
+load_sas <- function(files) {
+  # Check if files is a character vector
+  checkmate::assert_character(files)
+
+  # Read each file and add metadata
+  data_list <- lapply(files, function(file) {
+    # Check if file exists
+    checkmate::assert_file_exists(file)
+    # Check if file is a SAS file
+    check_file_ext(file, extension = "sas7bdat")
+
+    # Read SAS file
+    data <- haven::read_sas(file)
+
+    # Get file info and add to data as an attribute
+    attr(data, "meta") <- file_info(file)
+
+    return(data)
+  })
+
+  # Set names of data_list to the file names
+  names(data_list) <- basename(files)
+  return(data_list)
+}