From 532aaf5785ab188f2018c9e041cb6ee3e4c69ec4 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Thu, 17 Oct 2024 23:20:46 +0800 Subject: [PATCH 01/45] Refactor code in the R/ folder --- R/dvloader.R | 150 +++++++++++++++++++++++++++++------------- R/utils.R | 180 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 218 insertions(+), 112 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index a36a7b9..2d22547 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -1,61 +1,119 @@ -#' gets the NFS base path from an env var -#' It assumes there is an env var -#' called RXD_DATA which holds the path suffix. -#' @return the NFS base path +#' Get Base Directory Path +#' +#' This function retrieves the base directory path from a specified environment variable. +#' It checks if the environment variable is set and if the directory exists. +#' +#' @param env_var [character(1)] The name of the environment variable containing the base directory path. +#' +#' @return [character(1)] The normalized path to the base directory. +#' +#' @examples +#' # Create a temporary directory +#' temp_dir <- tempdir() +#' +#' # Set the BASE_DIR environment variable +#' Sys.setenv(BASE_DIR = temp_dir) +#' +#' # Get the base directory path +#' get_base_dir("BASE_DIR") +#' #' @export -get_nfs_path <- function() { - base_path <- Sys.getenv("RXD_DATA") - # check that RXD_DATA is set - if (base_path == "") { - stop("Usage: get_nfs_path: RXD_DATA must be set") +get_base_dir <- function(env_var) { + # Ensure env_var is a single character string + checkmate::assert_character(env_var, len = 1) + + # Get the value of the environment variable + base_dir <- Sys.getenv(env_var) + + # Stop if the environment variable is not set + if (base_dir == "") { + stop("Environment variable ", env_var, " is not set") } - return(base_path) + + # Ensure the directory exists + checkmate::assert_directory_exists(base_dir) + + # Return the normalized path + return(normalizePath(base_dir)) } -#' gets the NFS base path from an env var -#' alias for get_nfs_path to maintain backwards compatibility -#' @export -get_cre_path <- get_nfs_path - -#' Loads data into memory based on study directory and one or more file_names. -#' @param sub_dir A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. -#' If the argument is left as NULL, the function will load data from the working directory `getwd()`. -#' @param file_names Study file or file_names name(s) - can be a vector of strings. -#' This is the only required argument. -#' @param use_wd for "use working directory" - a flag used when importing local files -#' not on NFS - default value is FALSE -#' @param prefer_sas if set to TRUE, imports sas7bdat files first before looking for -#' RDS files (the opposite of default behavior) -#' @return a list of dataframes + +#' Get CRE Path +#' +#' This function retrieves the path to the CRE (Clinical Research Environment) directory. +#' It uses the "RXD_DATA" environment variable as the base directory. +#' +#' @return [character(1)] The path to the CRE directory. +#' #' @export +get_cre_path <- function() { + get_base_dir(env_var = "RXD_DATA") +} + + +#' Load Data Files +#' +#' This function loads data files from a specified directory or the current working directory. +#' It supports loading both RDS and SAS7BDAT files. +#' +#' @param sub_dir [character(1)] Optional character string specifying a subdirectory. Default is NULL. +#' @param file_names [character(1+)] Character vector of file names to load (without extension). +#' @param use_wd [logical(1)] Logical indicating whether to use the current working directory. Default is FALSE. +#' @param prefer_sas [logical(1)] Logical indicating whether to prefer SAS7BDAT files over RDS. Default is FALSE. +#' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". +#' +#' @return A named list of data frames, where each name corresponds to a loaded file. +#' #' @examples #' \dontrun{ -#' test_data_path <- "../inst/extdata/" -#' data_list <- load_data( -#' sub_dir = test_data_path, -#' file_names = "dummyads2", -#' use_wd = TRUE -#' ) +#' # Load RDS files from the directory specified by RXD_DATA environment variable +#' data_list <- load_data(file_names = c("adsl", "adae")) +#' +#' # Load SAS files from a subdirectory in the current working directory +#' data_list <- load_data(sub_dir = "adam", file_names = c("adsl", "adae"), use_wd = TRUE, prefer_sas = TRUE) #' } -load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) { - if (is.null(file_names)) { - stop("Usage: load_data: file_names: Must supply at least one file name") - } - - study_path <- "" # will be built using args +#' +#' # Set the BASE_DIR environment variable +#' Sys.setenv(BASE_DIR = find.package("haven")) +#' +#' # Get the base directory path +#' base_dir <- get_base_dir("BASE_DIR") +#' list.files(base_dir) +#' list.files(file.path(base_dir, "examples")) +#' +#' # Load data files +#' data_list <- load_data(sub_dir = "examples", file_names = "iris.sas7bdat", env_var = "BASE_DIR") +#' str(data_list) +#' +#' @export +load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE, env_var = "RXD_DATA") { + # Input validation + checkmate::assert_character(sub_dir, len = 1, null.ok = TRUE) + checkmate::assert_character(file_names, min.len = 1) + checkmate::assert_logical(use_wd, len = 1) + checkmate::assert_logical(prefer_sas, len = 1) + checkmate::assert_character(env_var, len = 1) - if (is.null(sub_dir)) { - study_path <- getwd() + # Determine the base directory + if (use_wd) { + base_dir <- getwd() } else { - if (use_wd) { - study_path <- file.path(getwd(), sub_dir) - } else { - study_path <- file.path(get_cre_path(), sub_dir) - } + base_dir <- get_base_dir(env_var = env_var) } + + # Construct the full directory path + dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) + + # Determine the file extension based on preference + file_ext <- if (prefer_sas) "sas7bdat" else "rds" + + # Get the full file paths + file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) + + # Load the data files + data_list <- load_data_files(file_paths) - # create the output - data_list <- create_data_list(study_path, file_names, prefer_sas) # nolint + names(data_list) <- file_names return(data_list) } diff --git a/R/utils.R b/R/utils.R index a3791e1..fed9f31 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,81 +1,129 @@ -#' For each file name provided, reads in the first matching file and its meta data/attributes. -#' Preference is given to RDS because its faster -#' @param file_path the folder where the files are -#' @param file_names CDISC names for the files -#' @param prefer_sas if TRUE, imports .sas7bdat files first instead of .RDS files -#' @return returns a list of dataframes with metadata as an attribute on each dataframe -create_data_list <- function(file_path, file_names, prefer_sas) { - data_list <- lapply(file_names, function(x) { - extensions <- c("", ".rds", ".sas7bdat") - if (prefer_sas) { - extensions <- c("", ".sas7bdat", ".rds") - } - - file_name_to_load <- NULL - - candidates <- list.files(file_path) - uppercase_candidates <- Map(toupper, candidates) - - for (ext in extensions) { - # Case insensitive file name match - uppercase_file_name <- toupper(paste0(x, ext)) - - match_count <- sum(uppercase_candidates == uppercase_file_name) - if (match_count > 1) { - stop(paste("create_data_list(): More than one case-insensitive file name match for", file_path, x)) +#' Get File Paths +#' +#' This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. +#' It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. +#' +#' @param dir_path [character(1)] The directory path where the files are located. +#' @param file_names [character(1+)] A vector of file names to process. +#' @param prefer_sas [logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE. +#' +#' @return [character] A vector of normalized file paths. +#' +#' @examples +#' temp_dir <- tempdir() +#' +#' file_names <- c("adsl", "adae") +#' +#' file.create(file.path(temp_dir, paste0(file_names, ".rds"))) +#' file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) +#' +#' list.files(temp_dir) +#' +#' get_file_paths(dir_path = temp_dir, file_names = file_names) +#' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) +#' +#' unlink(temp_dir, recursive = TRUE) +#' +#' @export +get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { + # Input validation + checkmate::assert_character(dir_path, len = 1) + checkmate::assert_character(file_names, min.len = 1) + checkmate::assert_logical(prefer_sas, len = 1) + + file_paths <- lapply(file_names, function(file_name) { + file_path <- file.path(dir_path, file_name) + file_ext <- tools::file_ext(file_name) + + if (file_ext == "") { + # If no extension is provided, check for both RDS and SAS files + rds_file_name <- paste0(file_name, ".rds") + sas_file_name <- paste0(file_name, ".sas7bdat") + rds_file_path <- file.path(dir_path, rds_file_name) + sas_file_path <- file.path(dir_path, sas_file_name) + + if (isTRUE(prefer_sas)) { + # Prefer SAS file if it exists, otherwise use RDS + if (file.exists(sas_file_path)) { + return(sas_file_path) + } else if (file.exists(rds_file_path)) { + return(rds_file_path) + } else { + stop(dir_path, " does not contain: ", rds_file_name, " or ", sas_file_name) + } + } else if (isFALSE(prefer_sas)) { + # Prefer RDS file if it exists, otherwise use SAS + if (file.exists(rds_file_path)) { + return(rds_file_path) + } else if (file.exists(sas_file_path)) { + return(sas_file_path) + } else { + stop(dir_path, " does not contain: ", rds_file_name, " or ", sas_file_name) + } } - - index <- match(uppercase_file_name, uppercase_candidates) - if (!is.na(index)) { - file_name_to_load <- candidates[[index]] - break + } else { + # If an extension is provided, use the exact file name + if (file.exists(file_path)) { + return(file_path) + } else { + stop(dir_path, " does not contain: ", file_name) } } - - if (is.null(file_name_to_load)) { - stop(paste("create_data_list(): No RDS or SAS files found for", file_path, x)) - } - - output <- read_file(file_path, file_name_to_load) - - return(output) }) - names(data_list) <- file_names - - return(data_list) + # Normalize all file paths + return(normalizePath(unlist(file_paths))) } -#' Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path -#' @param file_path a path to a file -#' @param file_name name of a file -#' @return a data object with an extra attribute of metadata -read_file <- function(file_path, file_name) { - ext <- tools::file_ext(file_name) - if (!(toupper(ext) %in% c("RDS", "SAS7BDAT"))) { - stop("Usage error: read_file: file_name: file must either be RDS or SAS7BDAT.") - } +#' Load Data Files +#' +#' This function reads data from multiple file paths and returns a list of data frames. +#' It supports reading RDS and SAS7BDAT files. +#' +#' @param file_paths [character(1+)] A vector of file paths to read. +#' +#' @return [list] A named list of data frames, where each name is the basename of the corresponding file path. +#' +#' @examples +#' path <- system.file("examples", "iris.sas7bdat", package = "haven") +#' data_list <- load_data_files(file_paths = path) +#' str(data_list) +#' +#' @export +load_data_files <- function(file_paths) { + # Validate input parameters + checkmate::assert_character(file_paths, min.len = 1) + checkmate::assert_file_exists(file_paths) + + # Read each file and store in a list + data_list <- lapply(file_paths, function(file_path) { + # Get file extension + extension <- tools::file_ext(file_path) + + # Read file based on its extension + if (tolower(extension) == "rds") { + data <- readRDS(file_path) + } else if (tolower(extension) == "sas7bdat") { + data <- haven::read_sas(file_path) + } else { + stop("Unsupported file extension: ", extension) + } - is_rds <- toupper(ext) == "RDS" + # Get file metadata + meta <- file.info(file_path, extra_cols = FALSE) + meta[["path"]] <- file_path + meta[["file_name"]] <- basename(file_path) - file <- file.path(file_path, file_name) - file_name <- tools::file_path_sans_ext(file_name) + # Add metadata as an attribute to the data + attr(data, "meta") <- as.list(meta) - # grab file info - meta <- file.info(file)[1L:6L] - meta[["path"]] <- row.names(meta) - meta[["file_name"]] <- file_name - meta <- data.frame(meta, stringsAsFactors = FALSE) - row.names(meta) <- NULL + return(data) + }) - if (is_rds) { - out <- readRDS(file) - } else { - out <- haven::read_sas(file) - } - attr(out, "meta") <- meta + # Set names of the list elements to the basenames of the file paths + names(data_list) <- basename(file_paths) - return(out) + return(data_list) } From ff5d20262174b036f00791a481ef3f1c08b491e4 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Fri, 18 Oct 2024 10:41:47 +0800 Subject: [PATCH 02/45] Update examples --- R/dvloader.R | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index 2d22547..4a3e7b0 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -65,26 +65,18 @@ get_cre_path <- function() { #' @return A named list of data frames, where each name corresponds to a loaded file. #' #' @examples -#' \dontrun{ -#' # Load RDS files from the directory specified by RXD_DATA environment variable -#' data_list <- load_data(file_names = c("adsl", "adae")) -#' -#' # Load SAS files from a subdirectory in the current working directory -#' data_list <- load_data(sub_dir = "adam", file_names = c("adsl", "adae"), use_wd = TRUE, prefer_sas = TRUE) -#' } +#' # Get the current value of the RXD_DATA environment variable +#' base_dir <- Sys.getenv("RXD_DATA") #' -#' # Set the BASE_DIR environment variable -#' Sys.setenv(BASE_DIR = find.package("haven")) +#' # Set the RXD_DATA environment variable to the path of the haven package +#' Sys.setenv(RXD_DATA = find.package("haven")) #' -#' # Get the base directory path -#' base_dir <- get_base_dir("BASE_DIR") -#' list.files(base_dir) -#' list.files(file.path(base_dir, "examples")) -#' -#' # Load data files -#' data_list <- load_data(sub_dir = "examples", file_names = "iris.sas7bdat", env_var = "BASE_DIR") +#' data_list <- load_data(sub_dir = "examples", file_names = c("iris.sas7bdat")) #' str(data_list) #' +#' # Reset the RXD_DATA environment variable to its original value +#' Sys.setenv(RXD_DATA = base_dir) +#' #' @export load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE, env_var = "RXD_DATA") { # Input validation From 432e835beb4e876e549410c600b10a955c2c45b5 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Fri, 18 Oct 2024 10:42:38 +0800 Subject: [PATCH 03/45] Print the directory path and file names --- R/dvloader.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/dvloader.R b/R/dvloader.R index 4a3e7b0..3e3d557 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -102,6 +102,10 @@ load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = F # Get the full file paths file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) + # Print the directory path and file names + cat("Loading data from", dir_path, "\n") + cat("Loading data file(s):", basename(file_paths), "\n") + # Load the data files data_list <- load_data_files(file_paths) From 9ec6f69872d8f46c6e109df8437fcdd74669b8d0 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Mon, 21 Oct 2024 16:12:22 +0800 Subject: [PATCH 04/45] Add an arugment to print file paths if requested --- R/dvloader.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index 3e3d557..b6cb43d 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -61,6 +61,7 @@ get_cre_path <- function() { #' @param use_wd [logical(1)] Logical indicating whether to use the current working directory. Default is FALSE. #' @param prefer_sas [logical(1)] Logical indicating whether to prefer SAS7BDAT files over RDS. Default is FALSE. #' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". +#' @param print_file_paths [logical(1)] Logical indicating whether to print the directory path and file names. Default is FALSE. #' #' @return A named list of data frames, where each name corresponds to a loaded file. #' @@ -78,7 +79,7 @@ get_cre_path <- function() { #' Sys.setenv(RXD_DATA = base_dir) #' #' @export -load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE, env_var = "RXD_DATA") { +load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE, env_var = "RXD_DATA", print_file_paths = FALSE) { # Input validation checkmate::assert_character(sub_dir, len = 1, null.ok = TRUE) checkmate::assert_character(file_names, min.len = 1) @@ -102,9 +103,11 @@ load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = F # Get the full file paths file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) - # Print the directory path and file names - cat("Loading data from", dir_path, "\n") - cat("Loading data file(s):", basename(file_paths), "\n") + # Print the directory path and file names if requested + if (isTRUE(print_file_paths)) { + cat("Loading data from", dir_path, "\n") + cat("Loading data file(s):", basename(file_paths), "\n") + } # Load the data files data_list <- load_data_files(file_paths) From bda40b4bac503ac130c98d8c5701b94dc88ffe69 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Mon, 21 Oct 2024 16:14:58 +0800 Subject: [PATCH 05/45] Do not export get_base_dir() --- R/dvloader.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index b6cb43d..ae8d0fb 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -16,8 +16,7 @@ #' #' # Get the base directory path #' get_base_dir("BASE_DIR") -#' -#' @export +#' get_base_dir <- function(env_var) { # Ensure env_var is a single character string checkmate::assert_character(env_var, len = 1) From b155b1aec2fac95fdd8422cc1d036810d75efb72 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Mon, 21 Oct 2024 16:42:13 +0800 Subject: [PATCH 06/45] Add get_nfs_path() function back --- R/dvloader.R | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/R/dvloader.R b/R/dvloader.R index ae8d0fb..ee8bb82 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -36,12 +36,25 @@ get_base_dir <- function(env_var) { return(normalizePath(base_dir)) } +#' Get NFS Path +#' +#' This function retrieves the path to the NFS (Network File System) directory. +#' +#' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". +#' +#' @return [character(1)] The path to the NFS directory. +#' +#' @export +get_nfs_path <- function(env_var = "RXD_DATA") { + get_base_dir(env_var = env_var) +} + #' Get CRE Path #' #' This function retrieves the path to the CRE (Clinical Research Environment) directory. #' It uses the "RXD_DATA" environment variable as the base directory. -#' +#' #' @return [character(1)] The path to the CRE directory. #' #' @export From f7601bf9c6454f570f753ced13641451eac0955f Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Mon, 21 Oct 2024 16:49:08 +0800 Subject: [PATCH 07/45] Keep the metadata format unchanged --- R/utils.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index fed9f31..93ac178 100644 --- a/R/utils.R +++ b/R/utils.R @@ -117,7 +117,8 @@ load_data_files <- function(file_paths) { meta[["file_name"]] <- basename(file_path) # Add metadata as an attribute to the data - attr(data, "meta") <- as.list(meta) + rownames(data) <- NULL + attr(data, "meta") <- meta return(data) }) From 65704612be3e7b30dce670fee17324e90cee46d1 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 22 Oct 2024 19:30:50 +0800 Subject: [PATCH 08/45] Update README --- README.md | 77 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 1199129..ab3eadc 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,75 @@ -# Data Loading - -The {dv.loader} package provides a simple interface for loading data from a network file storage folder or -locally. It is designed to be used with `.RDS` and `.sas7bdat` file formats. -The package provides a simple function, `load_data()`, which loads R and SAS data files into memory. -Loading data from SQL databases is not yet supported. The function returns a list named by the file names passed, -and containing data frames, along with metadata for that table. By default, the function will look for files in a -sub-directory `sub_dir` of the base path defined by a environment variable "RXD_DATA". You can check if the base path -is set by running `Sys.getenv("RXD_DATA")`. A single file or multiple files can be loaded at once. -To make the loading process faster for large datasets, it is suggested that '.sas7bdat' files are converted to -'.RDS' files. The function will prefer '.RDS' files over '.sas7bdat' files by default. +# dv.loader + +The `dv.loader` package is designed to simplify the data loading process for creating modular Shiny applications within the DaVinci framework. + +Key features of `dv.loader` include: + +1. Capability to import multiple files from a data directory. + +2. Compatibility with both `.rds` and `.sas7bdat` file types. + +3. Smooth integration with other packages in the DaVinci ecosystem. + ## Installation +You can install the `dv.loader` package from GitHub using the `remotes` package: + ```r +# Install the remotes package if not already installed if (!require("remotes")) install.packages("remotes") + +# Install the dv.loader package from GitHub remotes::install_github("Boehringer-Ingelheim/dv.loader") ``` -## Basic usage +## Examples + +The main function is `dv.loader::load_data()`, which loads data files from sub-directories of a network file system (NFS) or the working directory. + +### Example 1: Sub-directory of Network File System (NFS) + +To load data files from a NFS, you need to set the NFS path as an environment variable. By default, the environment variable name is `RXD_DATA`. + +You can run the following command to check the NFS path if it is already set. ```r -# getting data from a network file storage folder -dv.loader::load_data(sub_dir = "subdir1/subdir2", file_names = c("adsl", "adae")) +# Check the NFS path +dv.loader::get_nfs_path() ``` +If the NFS path is not set, you can set it by running the following command. + ```r -# getting data locally (e.g., if you have file `./data/adsl.RDS`) -dv.loader::load_data(sub_dir = "data", file_names = c("adsl"), use_wd = TRUE) +# Set the NFS path as an environment variable +Sys.setenv(RXD_DATA = "path/to/network-file-system") ``` + +The environment variable setup is not needed if you have already set the NFS path as an environment variable in your `.Renviron` or `.Rprofile` file. + +If the NFS path has been properly set, you can load data files from the NFS and its sub-directories. + +```r +# Load data files from the specified sub-directory of a network file system (NFS) +dv.loader::load_data( + sub_dir = "sub-directory/of/network-file-system", + file_names = c("adsl.sas7bdat", "adae.sas7bdat") +) +``` + +For the `file_names` argument, it is recommended to use the full file names including the file extension. + +### Example 2: Sub-directory of the Working Directory + +To load data files from a local directory, there is no need to set an environment variable. You use `use_wd = TRUE` to indicate that the data files are loaded from a sub-directory of the working directory. + +```r +# Load data files from the specified sub-directory of the working directory +dv.loader::load_data( + sub_dir = "sub-directory/of/working-directory", + file_names = c("adsl.sas7bdat", "adae.sas7bdat"), + use_wd = TRUE +) +``` + +Additional examples can be found in the package vignettes. From 488d6792b9cbdb5904e99c6dfc0ef8b5aad5cccf Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 22 Oct 2024 19:47:44 +0800 Subject: [PATCH 09/45] Update package vignettes --- vignettes/.gitignore | 2 + vignettes/integration-guide.Rmd | 65 ++++++++++++++++++++++++++ vignettes/loading-data-into-memory.Rmd | 45 ------------------ 3 files changed, 67 insertions(+), 45 deletions(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/integration-guide.Rmd delete mode 100644 vignettes/loading-data-into-memory.Rmd diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/integration-guide.Rmd b/vignettes/integration-guide.Rmd new file mode 100644 index 0000000..802f252 --- /dev/null +++ b/vignettes/integration-guide.Rmd @@ -0,0 +1,65 @@ +--- +title: "Integration Guide" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Integration Guide} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This vignette showcases the integration of the {dv.loader} package with other packages from the DaVinci framework. It guides you through the process of combining {dv.loader} with {dv.manager} and {dv.listings} to create a modular Shiny application. + +```{r, eval=FALSE} +# Install the {remotes} package if you haven't already +if (!require("remotes")) install.packages("remotes") + +# Define the packages to install +pkgs <- c("dv.loader", "dv.manager", "dv.listings") + +# Install the packages from GitHub +remotes::install_github(repo = paste0("Boehringer-Ingelheim/", pkgs)) +``` + +## Step 1: Load data files + +```{r, eval=FALSE} +# Set the NFS path as an environment variable +Sys.setenv(RXD_DATA = "path/to/network-file-system") + +# Load data files from the specified sub-directory of a network file system (NFS) +data_list <- dv.loader::load_data( + sub_dir = "sub-directory/of/network-file-system", + file_names = c("adsl.sas7bdat", "adae.sas7bdat") +) +``` + +## Step 2: Create a module list + +```{r, eval=FALSE} +# Include a module containing two data listings +module_list <- list( + "Data Listings" = dv.listings::mod_listings( + module_id = "data_listings", + dataset_names = c("adsl.sas7bdat", "adae.sas7bdat") + ) +) +``` + +## Step 3: Run the DaVinci application + +```{r, eval=FALSE} +# Run the DaVinci application with data and module lists defined earlier +dv.manager::run_app( + data = list("Datasets" = data_list), + module_list = module_list, + filter_data = "adsl.sas7bdat", + title = "DaVinci Application" +) +``` diff --git a/vignettes/loading-data-into-memory.Rmd b/vignettes/loading-data-into-memory.Rmd deleted file mode 100644 index 577da15..0000000 --- a/vignettes/loading-data-into-memory.Rmd +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: "Loading Data into Memory" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Loading Data into Memory} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -library(dv.loader) -``` - -Note: `use_wd = TRUE` can be used to source from local folder. Just set your working directory before using with `setwd()`, or use an explicit path in `sub_dir`. - -## Usage: `load_data()` - -```{r} -test_data_path <- "../tests/testthat/inst/extdata" -data_list <- load_data( - sub_dir = test_data_path, - file_names = "dummyads2", - use_wd = TRUE -) - - -class(data_list) - -class(data_list[["dummyads2"]]) - -head(data_list[["dummyads2"]]) -``` - -Get the dataframe's metadata through its attributes: - -```{r} -attr(data_list[["dummyads2"]], "meta") -``` From 1f256bcb4a5839b28fca458f643150030044fdf3 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 22 Oct 2024 22:53:56 +0800 Subject: [PATCH 10/45] Add sample data for docs and tests --- inst/extdata/data.R | 41 ++++++++++++++++++ inst/extdata/pharmaverseadam/adae_sample.rds | Bin 0 -> 5592 bytes .../pharmaverseadam/adae_sample.sas7bdat | Bin 0 -> 69632 bytes inst/extdata/pharmaverseadam/adsl_sample.rds | Bin 0 -> 2201 bytes .../pharmaverseadam/adsl_sample.sas7bdat | Bin 0 -> 28672 bytes 5 files changed, 41 insertions(+) create mode 100644 inst/extdata/data.R create mode 100644 inst/extdata/pharmaverseadam/adae_sample.rds create mode 100644 inst/extdata/pharmaverseadam/adae_sample.sas7bdat create mode 100644 inst/extdata/pharmaverseadam/adsl_sample.rds create mode 100644 inst/extdata/pharmaverseadam/adsl_sample.sas7bdat diff --git a/inst/extdata/data.R b/inst/extdata/data.R new file mode 100644 index 0000000..29c1d03 --- /dev/null +++ b/inst/extdata/data.R @@ -0,0 +1,41 @@ +# Create directory for pharmaverseadam data if it doesn't exist +data_dir <- file.path("inst", "extdata", "pharmaverseadam") +if (!dir.exists(data_dir)) { + dir.create(data_dir, recursive = TRUE) +} + +# Sample 10 subjects from adsl data +set.seed(123) # For reproducibility +adsl_sample <- pharmaverseadam::adsl |> + dplyr::sample_n(size = 10) + +# Filter adae data for the 10 subjects in adsl_sample +adae_sample <- pharmaverseadam::adae |> + dplyr::filter(USUBJID %in% adsl_sample$USUBJID) + +# Save adsl_sample to rds file +saveRDS( + object = adsl_sample, + file = file.path(data_dir, "adsl_sample.rds") +) + +# Save adae_sample to rds file +saveRDS( + object = adae_sample, + file = file.path(data_dir, "adae_sample.rds") +) + +# Write adsl_sample to sas7bdat file +haven::write_sas( + data = adsl_sample, + path = file.path(data_dir, "adsl_sample.sas7bdat") +) + +# Write adae_sample to sas7bdat file +haven::write_sas( + data = adae_sample, + path = file.path(data_dir, "adae_sample.sas7bdat") +) + +# Add a message to confirm data creation +message("Sample data files have been created in ", data_dir) diff --git a/inst/extdata/pharmaverseadam/adae_sample.rds b/inst/extdata/pharmaverseadam/adae_sample.rds new file mode 100644 index 0000000000000000000000000000000000000000..f806ce5f2455dcda329ba63dd5458b71127e9414 GIT binary patch literal 5592 zcmV;}6({N+iwFP!000001MOW|Y~)6I=8QBteC&*8JU+&o9ojqVwGp@6>`Ri5SQJUA z>1mN|lC2qefs9)%N}}Cr%4|ww8OV;X39yR|^0L@Ck2W?h0rn+GkRS;V?7ZY*k=>af zF%aYc;(bY8l8qlW$L7IK@n#iS-OW}@Ey)}}^VR>?_1FJb6|1W2xcLA<5F^Cs=ow<< z9B>lnYRW=p2JkKcLM4c^7XUs>j1rRoA7WTuW~zKeRI*gcOL?cBM~t2Yyfd!fxn_M; zZ34%I5c7g;?yT*R{F>S_wRKH30Pi!T;hd$p)OF-8+jwU_N9=Wp)wk+ei=DKNwaiLuCupkXbW8u429q)+}J*(9^@_Z$x7xaqVbyi5rLJ`z1e{;yCAvcL!UD%AyePQrQHhqzu6v>?)g)f2 z$u2!6axAw*dqWdm0-p3dU*VON4!lX0rzNQ@a$dv-xtokA@pLHH7#5m2N`qwIe3KVA z8OwOEyiyevPLP8{ugzCPktONMb-DtAlGU;(SIe|Q%N)=IBXPjN`fcPoFOW`wLULi) z5pg+bMJWf`n&4=OtkS%Iqr1c(msbQvta<~ANqP~^KT`UKLFqZE%(Idx^B(icyvVMQ z3bzEjXr(4aOZ|`?4c6LJm6jA9R2G@z-vp6l@&>Pz0mX;{OG_){A}4_KD!d$)=qb8d zt?-P^B4u9TqDs45rYn^i!wWX^1#+2}Yjnjc@dDs3*1Qsr33N^N>rfAgkJG|Up7X0A zLQa%9nx&aCZd~?NN!`{B6SN{lHMYog-5||Pm892pRimwv+%9NZ{k5ubUSZ0-u;`NK z`e{=WPMh-8F-gyZ96)|Dj)CNgklh>8#fo@?bXrdp)BqV&1Fy)SHQ5b`lxRNGd;)`z zR+H(>cB?;sz~+OEk2If4ezhmjhIGEdFP0Unc5rOCGXWY0g#<l2#O&x!#f6tyMiz(Qm75ldPz_YSV66L}+k#=TpLx z_XjEVh8QJ37EvpkSc2)?bVr&h%pm!7p{G03C2F=KJzb&@ z{~|;eXYw6sG{4+T0ZuPZb)>t+FBGzfezG0QQ=FMb^aZDv%OU#7!sW>#`pB2y^s@z| z{DoY{_{HLEM><==O{ve9wT;bmv@#IHsQ6Jt#ki(FS*|QvbXY4lP{XZ<;tpBwK10Nwkpf#^|0#-d~OL zLQH476fT=Z63s`7KaCh_+V5DrCMm2SnWBW~`!doyAlBltw0LfwgBivI0$upS8#EmZw~sXKD&9aAHj+6<(HW9Le&s zD6yQ>U+9867!Ew_s5HC83%smIp&_d^eCES)U9QS2Oc_jrB%X;R+|l#OA`Q|Oc|ieX z1>@;TB*YXiyk3)5ULdO?@DX@n(Uz+!i44cqEP)wu$sbc6EY@M__X(igWy@b)k- z@!C$m!08t_{Q{?7;PeapMf(C*4Q;D#_@9OJ3p^0SeSz`X#(jZDm-q^sz&)`fveqv` z9EMJs-k|eON+->KOy-t3e^&!c^_$mVQt70nc)$2cbWv7!RYNoPUgTS*+G+z=OuacH zoG!=85ypyh#M|Ow8CXK!X~*m&B8bV)!M_xH#aIPDP20cSl1lv@wME|6%uUic8lf0! z-Q0@bzBMX{g5Ns9@Oqw}uk=ofvdrV@F^O9egLWIr3C=Z1DlmnH#pRKtl>Np_s-2{C zXIYY_4ae`KV<+GEDmTg0ciQ!4e7Aa0;uz8T7_cu&oGezB{p}W41W_RaUxyk(2)u*` zk^T6^JzPr++s?y4pw^$*^qm z^(yG8w6H#ffk zmN=fv3)dv|-5t#U-xJVQs)pVKy;_?r>+Nl;XVc!toijf!T0Jce%x-7CO;)sZl~gvZ z#R%}ZX>Fv&7?Lp_6B~^kd*`{mNF;*c@S5I^TTy=?m!`cZ#5!^qgkjYXRe;>5V zna<>9(J)jbR+N*^!km00FC=AK9%=^W%x0);H%>9*Z&7tQAn{U!gQQ%_s^?O|^mf z2Dph$!`12l)!aYM(1GKhfKO2Iqt!egBM(~5<2bn)RQ+f*&quToUg(>jiAEj3EWQ$U3%xZ{yIB@a?oDBmwrFmU-!~G+nlKExL&%{p}ckosF`}4PI0Wt zlgIhQRK{o(?#8hyS}|VUQlx(_Myj&S>F)=nuW+rk?lsKb^e{I~*#G8U!C)3a(#0tR zs|YS4SaGrbk%M#lFy5GR@Edm>WC$2h|Ce3ViQ8AN;!(Qt3f#iE}s>&9c_^Pem4JN_<4{9l3N`D%X| zgDjlR9OI^+B_Ql`ugTMp+O1k2uk;Z|>GvOWGR0z}Pn?X$dw;O+;E#y|;r*#(c<*2L z6Cqk2S8iNgw|W*!(F>ZH8&1_LiC5fioroOhXo=Tb}MJMa(8SFkGP)&?^9tZxirJ$(qRiiHm@YYHlj;7nUeH}!&;CmYdz*ZWpla{Xl|ANL1e%Gb9oeuq+M;IZtq<$cjUGlr$L7#o25gAF#O@)| zAdJzi&zigcnAI4;VDE|jczhedb&n=s{MJ5{6xp)8!o=x0%NWS-e=BngpvGM^EK=A?-V5aBy{tpg+@0V`=KPST2=Vb7aNGH_Y z0O?w-jhMCCyT3UB(gq~83HqE})mla*;?`(!^?lH5v|hUC8ZDYHx<-ro zzXHeWd5zW!k6WYduf9JC)pxA2AMKpU+@>p(hlDE}TDMpCVVoggd;w7o>GH|={O6u? z{h!156EOd^PYGB)6fYq8d;AF)`&9O2U)k)A;e1J-6P_nxjt{NX_pAlkP6Hb!%u%|# z<=?BEox%;Gj7ieyZ*=r{)R487X1WKKPs{q0huzkvR@Ju>;Na$QS0X>8+H2~pz3Hmo zxtv=$(lgKLnP=R%q?g0b=>gF60BHXQKri~0t+9wwZ1z%$6-S@#d+=+Q9a(hmCTuHd8*6WFfb zECybf9v9arFicshzh^0a%2N6i>Fyo3Uedy>bdH8z-s{aBJexK+ckq5*2e<#A-e~9% zyTfq@*99NU9Xv#LFgI3j*!SvU9;)+S39^3`kEsj(&T4d%cwZbUm+>!hKYETOPaz>E zXQhK~;m<|3`45*f8`-mcc%0}FheP9Zx2b0x+^#^kzDw3a4R((g*-_A8k9B0rePT|J zJFpLllVk@r#3@C#%Fm)Zp@+*k(i1ZSsR@IsEuM8S!3}a$8&&9sUzX^XwwcI{L|4j=uQ=W z=(}%D8pbO9S0JVHEih%;QrAdEZ?!x1FxK(0$T`JCoD0DaK%w$46^h50?+zp_K6OC8 zRsR(#`;tE_y%6*gR`}w~3FEshJs&wkd`O(58npIVPeS(XrI&NJ?N`CXO>1b|(Q814 z-fNsRjQ=43QhM|%oS{p7(hzf>u{%Sv6^vVe9hvs$D7`pKgLKYxbI$rss&Gionc+Jz z+L^`ew{eDHvSw{1_E%Sl@W!S$2>Txvt9@eaZagY;4$AunLxo6&Z_Fh_v^=id7)6iJ z8*$4R=T!c`=^n%1Qv_R5Job5;hBn!}w_D?l2FqOtPf>Ej7i2FJ{W?gVx=H)6a@Qjss zg;sb$Onp<|)Hn4VurFqe#F?8d4RhD<*v)$DRz*)tjZ{|$=NwYD2NfnERn`Vdbtnkt z!sBYQ-fF39B#nQI`Vz=O-%^`eOC>A%ZD*hRA$g4hc^axO`Qec#SMnn+*TB6V4SCsb zq90Sg9@UOHBEqWehM{fSM|7<9>Y$mW9qt#o!#z(o%pL1#hP1yuA(E&4?Fr~_pG5lG zpq0fmu#uWfr_ST)j5qH%qwlm#V=u;-{x%sZa<_RO9BYGFWE0wvaV&YJ6LRcLap({J zv6~o&$eGFHB3lv;i&Kc)nS5xR?l$$T)7PE$_AiBZX9@cs7O(Y*6b4BbCk}|EggBKD z(ek)*<5UgZ3Ec7dfOPzItRzpzUngMvb=8~T2Ip3-vt(84jp#KpOQp^1xJ#ZkvlGzF zzT(bnI<_+J8kpVvIl{XB1c!h4t^ROl>TWC+zxS4nuYY#;K=?b~+fRhQzWqreM9bsI zo%|Y@uUn%x6FP6w%eCr>?}Qop{j#UbfjK^;#9Pw7eR3-1w%q;m&aJdK{I5Uzyf6Ir z@Bb(kjcMmL-S7VAodY56R$Lr@^DmPjS{_I4kqvvEK%HWadYlqh`UP>hd1|=AzB_+Zrq?;(M@Qu5ZwL;=E3dfiSU~q zpIXV_Z@W$rMvTfY$qhqQTjWArYg!e&$BSGozSki=#PTvD3JNdOICcQMNk&|%RygH| z87ENW!!6WFf5>0AC&<-OhJJ?yutP8#Bj5WZ0wJ;InOdT@%q z#x?8PZPmI~X0J`I8~Rpo8Ll-?3+cVr=lo8~`(;U=^Go!8nbXVIs=nURZ?{Og32J)} zUA=c>6(%hDt6P`pz~JXcUbcRkzRXxSPsl1NpgK%HTTG>zP;1z_yo_n z8=IP`CL<;Vy_LCr5*YoI?o<;!s|+&_aeDykP*%_dL@L!;flggV?!uUCQPnq$`u3*Q z027-Ht1Ey_HuPH^P8!DljujoSkr`(^=MC-_4Yaiky(lLQV^#k(N82|vb5m=P`K+UL zZ<$;%%;>jo=mj}xi1~5JRy@6Ks)n!kVD^$~{;)M4t@6(AuCV6emxEjNE!CS0_}eLD zrN*xC{@Wd_xJ2{b@$B)MT$_L0_dG{)GWUiT5)joO9?dD7v=juaR1_}}n_$?0WI09* zMXUy%AYwTju^fsxW|51Y=Y&J~zJQ(=S$Rbc(SQu6EQ?H#G9Vy7ZaGXy0YNiDKuBCA zAjFyy(nWBn-AbJig~dP;ndN1AJ`|W$%AvqzQLcu_3ctXGr4EV7D&Zn15XlJvIcEE= z1QhV}DMeC%03nL=sdGTEfZ)VXINHi?38EH4l(H01mLy9M74{24p0iU4N+*PfkodMw zK(IAQ2op6=&lOgbxgh6JIxy-e9awJMD)G`nU`P-wFde8Cyze&#RsV5m0aCfN5LWqw zv~V-*ZP8(W5S(Wq?9Hl6@q&zTN?G9j zWNB%M2@>+#>jcdxVXrZV_9SP-nxII&1=yuP&{05yD^`}OLKGcRx*XWXCWDGF?o`6C zF9-VqI|t7TG15X%FF_>*wa=4nL0S&HBwAnt3aFI!Wd)T5&|L6}P(@bgN+>Q+?V{xE zL_a6f3%)tVE%gR|;}W;Xivk5D9P$QKKI=!Gu%tlfd7Fm&>PbVtJ>~V+){@iNeefUn zD*Pj|?BXrzjG3#=TWjl{Z2{#vA6ToK^{I8E?md+3=W+hld)vBU8dGcQx9Y~mE#yNp mO0zWXYL3V0wO!Q!172=dZJBNVX%*o0+5ZEBV@J-!*8l***DSRF literal 0 HcmV?d00001 diff --git a/inst/extdata/pharmaverseadam/adae_sample.sas7bdat b/inst/extdata/pharmaverseadam/adae_sample.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..28722160375269cf77e12bd2ce35fdac40c42677 GIT binary patch literal 69632 zcmeG_ZHybobtCB{ourc^$BN@Na=fusS4nj;dAu*#O0;|A-jOknWR^Th=NgdcJL-<< ziM;T5`UupTMU++v{KE*;51>J#qWX~p{!ySt>Y{E!6hULOZmKp0>HtQQ7AcUX3Lq3} z8?;K?_h#Pga+lmYE;q_uk|DsIeKYUP$D4U?=FQhK=KTKZd%t<+Pxs&d_ka1Bezxmt z(H9%>hx`L)+Y? zZj%2)Zf^xC4k!*N4k!*N4k!*N4k!*N4k!*N4*b|T@Za#mNK2lW{|9~^ua4mFlRfiz z$I$k8+t6U&ai`zgo?j;%<)=8X`8lu!w*NEs5KaQX@4I`2%RT8;IX`1!S=xMICNY~zutav+NM#tC$h0h<JKAk)p%QEca#xpcjxI=uUFCFpVxo$N2=VfQ3Ke@$|tTU6CBehvb-% zn6Y>{!JPp(oyg`P;b}O0bHae4Lw?NZ$u!WJCpaE(D+MrB7qccYfjGFLJl5nX?g1|M zuqIP+5uW2PFAp#RT!@9Se5zpbdY%GaO_?)!^Axj;xM>!WnH1m|)-+Zxy3B|dI4E4k z;@O!@!J1`>X-om)oKDO_ek0Kw=U*fWr+x%Kr@4y^!SW!Z&?Oj3o?!X7HHi!YKIaSI zPFg28zbqq@z!%7`f%TIHC~2k>>3qRR0}AAM%7PP4d1I272lI{|mN}KmW}*TgJb)?| z0St@>U%;I}H(y_!zoez+OO<;0Y`IiRRL>R570r0=xmxLbxmYh(D{Q7x{@7|s%dbB7 z(b8PK1sVBQf}qM=Nz1IBd#+St`Q>V5rCJ+Il+KnbrFj>!Ra&mr>fl)@)y`>Wt2M2@ zSkjF7^QGEKNi)ww9$311p|n!h(xvmIC5{JawE1cA>O2gZ66HC}U$J%xlB<=^Q_L(^ zE1fOXYBC{?oGdRaw#Mp#d5kT7Q?=^qvP{3DKjmSf3hc{YTB(=LX;7eSZJ}7v;!DMq z6~_F-1aYobJ_nhX-OHZUK^-^U8-moreKAb@I;|3N)@jzEkW+EP3P;yT3vI6x*DQcnO`Fmc+KS| zQG}wO1-4>jUbeY$Fv2zr$e#iF0P=`oljRz4%)Gcl2ln(w{>fG{bo!5dkxl~+W9vseTFHT!JF6Kfz zmFY|Wwp2Vg9a9nIvEy4+9nuRvrrtod0&D)U-w@}^kPQYP0FC*}z9mmB5w|oi#Xj4*l zgG3?WJZnR7$CIi+=W-78KWM5Iq32w2U=|>%97t_>uDs0a5app&SCH_?{Bw{PBT~o5N-J}<@-pfqo+T$3 z;+w0lqK+k0=D~G>A>Md(wNkHLa+krS@35aIebwSG^W(H9?3Y1%m#gzyBr5aZ7i_m} z{@zEs$vfgR~bi$zfxbc#<`ZxlX1FnTv4(@6}u~^7`BKW0-mhApO+&)S1UnJ zIaw?(q5j3>88sEEa?pIMLY^nfptL|S)N5rvR5?+GK3I_OxR4kt)GAiytLLcWf_iFI zB^WMsw7FDVUV(OjxzWzns^>&QN9BMW0Y^i=@OXJapRQCdz(8#YG#0k2YIO;l6*NCF zcEK@W@ocGnNy}B2S5dP;3loA(xY#sp049eV2 z)s_KI$i_(+U_dV(iwG1css-p7NuLNj<7~ZDv(cQQI*5Mpi8kZlL9y5QJ-{9Sj8|!$ zKe3I_aZ&(&xgEk?B3I4}>?2qg!S?g!cJb=Q3=i|T6$Ny%ZPy9aqIUoOhKK?hU z{B8O7@z44A@AL7$N%Jgi@%#AaeEj$M_zUj%KY#Rp!tnRU=oE6j?-$9tH~L23BcbT$ zL#Ce$NgwHZSE&3S=-VHPexOg@ebF2Lf3OW@9Dlvhe|y`z1$mG3uWfrU6#dKF?hi%( z;^(bu4AUC1OK)MzUqNL<$*s<@NP=~ng{+v5Bzz8C4c_J1Ha;dn;!Tz4}8-Dzv+R$O)xsh zpMUYd|LVd2o+tc2Jn$_K{v8i|*8}%aKP<~T;DPrOjKdAgpIm2=_?ioUD?8+YBLriz zNFQ~DZ(&D0;ZG7Q)B6y?vi_1D*z&;7df>AJ%k&orMty+!x!?)E=z%X0Eam$$!Ls~U z2*%+a#`h@?{ig{=y@BC>OfdE*2>&_3sE!c+3c=XlBK!?c_%}V_-zQk+?>51bU;j(6 z%=y%j8}mycfv5M($_Hy+Q6vg#R+RuTs2UCHQr6-=KWFMecXV{}$ojq4@5S+qX^d zV_=)$&whdrkjpEh%f}HuwrbJpuRZ zz}wOT`G2kdouSJAQvZ8; zq`%sais!}taPvjW{o&@@PW6YIuRPW-=V^MAKdoo}_4lJf8R+kIzVG%+JHySF&hLDz$M!48^q%Q8p8rET<$2Md?WdpD-@wjZ>;KLUtp6YE zpkZ9#`Rdm>{mmV{>i-*@{_{J+@xQSnT>E=AWctT;yf;+)Ij}Dpm8Bpz3LB<{=w~JMg*3B zV%zp`@^`zhSN$W>f2%Lt_~li5y_L$dhVwXk972`-Pb^aQKWBup|3!P)DEpsUqjD4n zeyTap+xX>`z1Sz=uQ&P^L((tq{UwonkLl}s!;QZV?+rKpd?+M6+Z%5Dd~2ZB@ypEt zwBOzs2seN6(m=TJ_qCqWzsU1{xo7%Qoc`H?aPvorfnM9sVV-`ZXL=zv0|Vja|GvAY z*YWo&dvN}wxhI_dxVEQP`)8h~zqBWu{3rH=v%e4S3CI6|J>l%{JG;Zt-`c%1l>WWB zJKX$bb9XrV^~UaS@_TJ}IQ#$V?r`?+mEEX?@z-1b+t@9)Z1hH751Ievkm;Y=9j^aL z?C!Py6I_pkq#xKV^{0q$jVJBYh{Fl#crKOB!qHsAj6?^=BEy5x*icN6-ac+d#!jRx zBdw(}lV-dC2WQua6oZ0c^T__;2haCL1?S06N5*0zvk(iOCqE4z1ss>r+L_L4a9Eg^ z0S*UaDe@e}_W_ZGp`A{p6Bu`fWwIF)-z+#1e*zAyC!Vn2*m?Sd$-_BLhW~~l(b!;Q zd@wRB-o^%F5!;WdV@yW^#-7sg*?jOD!hKk{3^I-WjrVf+%$K9zG*}Ywj?c%$8?4hG z5V*ReVeU9#hnqeR=WHMvPHz{4iZP1`Fn0U+EH)CE)5y4;Q+gYNqS$^TjK4-BgK%n! ze$dXu+eh?S)3EZBCzEj8T!yx~nQTEzrt&!h4xMXROG`ujWwNJ=mDA}eJmFOC5*(-2 z;3PCZki+~#X7Yx}(1VeANZz$41IfGn7?2gAJp-Qyhl;WW*~Dj zRJl|ivYur9NVjR%LBBqGauLqVVMG}NpX=(F_0>(3NX<4dEJ7XB^|BXq3OE?gnJ5JVEl=eTTmj#it9O#i?X>c;2;{9cG*kk{XjN2Yla0jWCO=SvZ9_BGy4GxjA?-L>q zL+jWXmN}ipdEhSPGCCN=c{whZkum#C6o_fM$nc}wwe_%1fBE&Z)L%Y-VZrV%6QaLt zk_(-HD^7^t?k|yE7DN`KWnND8m%uExF>;CXa%1MHc-Df!;QDJ=m{DT_=DycoC_+^E z10e{O1}BtqITo|UP>f?oqGFH&2&v46XRO)7S}qH7;~AJPDwXA{XS8k3C@6 zhIB@ zN^iEU|NC3{{vkg*_Wwg5m*c_K0M$ApjD;MzloMKAj%Q$N4vlA=H)mchC&nIUx|^4a zUm`o>c+0$;CB`w%oRB+C)_6{iXPQVa3nGi5> zspiXF`&h?elivt!^TU0tM-!=Mo=IiQe0Ljjf?@OVJ#;X=QHgb|s95EJScLsQHe%cV zu=;-jcgVq4lHi*^0hY2LvM{ucT*?WpZu(2Uk2PxV|CDb>VPR3&K~ti?)IY-A=DS4S z*LaL|-5q_Oa3) zKg=m#$AT3txgU2NR_4S$)(8gUI#!ggV~IJ9=1gzvSpK_J{X^t9HUdjRV(pVfgY09y ze)#=j9qV&{^psr3VrgC%xHjdxkCp04u3t&lsRP)T<5|bTdQ`tn$N_)*zg2JIdWO=Q z!F8@wHyK#R=zcu|^roYEpc+VnZc4og`;3*|M7h2F2#DrPfjjDJ;!2V=nl)9W@JQ2nUQ|lNQ1ed4XDlam$1k zdy5dPGp3*BD4J!YTmhnKb$Wsj1WSWQBIR-vhpq7AFzz!Ra?a(XGAD55f2wUAkyEVk zfXKqoI{R|udATm_jFG|UkQk}snW9=Gj`1zrcpH^2RIvYu?-Qf=7v z)?uG@ESoqBFM4*1&tF1?BSrj*_cen#0=H-XmwK?i}3B&&au>rJpTl->lX zzDIgI(FGussHx45)nmO41e=&|F zTN*~Rw5kphGBh0%*Lg`BN$#%GUp9{HA^ZRBN8e#fDB1nZEhL}J9VcUlr2Wt7?Si=H zp~P&Os`@NkW4bTKoNf{xNGKOXy^E*O=?AUi{iXYTQO z#yWDTDf@q;x>=b686hQ$6)6K{&2oLmY>bUv$^%Ry$O4Bq zbnuNEf2-6@Xqncn^X`i$VQtS5WU6L7H}sh;4i{a#KCSHDfj0e}0yRd3>Y z#;`F3*Z*~~V@j0L-r}frO$Y6$Yg~twK*jZD)T=k8)(3=*y8lnA8|OS@cXo!Oc~Dz= z*E860ofluq-P(Hd`pVPxY$;KI-jw>%_HoB)%QkGFLcJ>s;@M~159^FdZ}Q5Zeh2vZ z^U-lrEQM!+okJNdIp47QRW7~D)viWx@l1n};#uGXq zmt<$yvdr9Z0*bUVIK3>0r@vHkd5`Ukb^1%bk98uOnAPC;JV>Wefb(yiPUDF7g3ne& zGi{#eVgw8V6f6y%8c^~6ax7+V_ZRE`G3We$n6ouuXj|sXd+#qdgaqDa(4GCy*Z=u=rt5>o!LT`FPy^z4rt36pDm-UAM7QH{J&NysYDtsxg3pn&NGfV69Qq@B*M_j>8#l<_vOg-47h1$ zh*k+k;h>;cFMv1CenxSgvC$`N`6hj9Q_iW;_y4Y6e=pe?kNoE&a)z$Cm+b%hgbQDQ z)HydBB2LEkNjpQId(fGelgBd2{>SwUXe1(k!}fVb_@)@0DsuTn={zF_i+1I#XTV(s zJ%5)oS#J5LX0p7n$Un@9_Zf8#vG?V`4pD00M_Si2TBDHP<+S}E^ot6#za)kZV!rKq(l3(bqBbWLY-Xo<^wgjNrL-Ghb z=W(@sSF$y3c;|8{3*uR46z6i3TmnNkL@r_dKN?FK>GVt-K5Kz%S%*bu8JOgU-wO^_R5%FZ(v(t|OOD ze|b99)?c!={*vN$=H=XS>9+rsT;AjUvV&kLug$=LHSK@f&fxc^OoGCkn(21mR4{C* z@p_{YHW5d5QCL45j>x@b+g_3A}c0QNmZ{_=k{Os8OxXw5p z%r*~(E#^0Y^?`$YO9@ zc))%+^<7S6hWh@WY!W{o?v|~*oa^XKJZC%!kPJ>5Q@VZZJf&uAZSw#MlN9J#Te;}iMjcb~DWC!4o`izI8^Gmhcvr#PTEpg5p7pg5p7pg6D@Iq?7O CR6)D| literal 0 HcmV?d00001 diff --git a/inst/extdata/pharmaverseadam/adsl_sample.rds b/inst/extdata/pharmaverseadam/adsl_sample.rds new file mode 100644 index 0000000000000000000000000000000000000000..35fe401dadf7e9f59eb1dd032dcb23efcaf2e934 GIT binary patch literal 2201 zcmV;K2xj*miwFP!000001MOPNbKAxlheS}6oQKuAPR5>TjihcK)1eB86eT6o4q+0M zm{24GfU$gNRhV2#$dCXFg0_`It?8wm&h(N)ubF8tJ@%MOr@VIBu`d1tdg`^kw9}K_ zT>uO00t-rt8abW>Gve;|*v0RA>=W9(#Bp4POEk&X5R!3`sZiLYSW;e+ifbfG5)q5mD&-}yC{bD)P-|VJSg^0vO`27NZ(LuCH!Q4E zeTj&*R-jm@;cCG*f9`3_;%gnt{MiHu+T`v>pzF>yohNMO31fM}N}e!}C+y-0gLuLc zo-lzYu=6uOGtdfs+Y#IYXb6fR1N)^_QCfvw&7)qqrKj)-%SfYuGzt>ML9s3|bw$`) ziRKg&oOQ@da5hpbdktHrfi+NO)!fKc$kXwql z8&Q?eEwU6uqFPMBc-0?Hn(2b%+WRtsS=P%OddiI>ztpCF+2;IG56UK%2jvGryH240 z0T(<>AQ^d|H3etyD6*>(JK77!ZteW7ZP&(!`~MEh-X z|A6dZM&C>k+aK4fVBB@g5n4>W)irX@^Ay(0a@U<0gRi3+%$BjJM&4lswWG-wQDsBT zRqTo7I;=?~JR-%mjA+vt+eAv}NEF|yCJzVc+u7& zVh;vz(-v)9y+%eNT8846J$7`Sbl#$2s<3IkQ3~Tb^-+XdJ z$mPfHJ(+s^pN#(Qm6H{pzIRj(^EkTIbU(H&5QCTL=#>=YZ=#{+QdbwsgONRk&Jv zj*PRuH)TcU-=965B9H&}!KYI)GQa-kLy!OSKYi+zzx@55z4GYtjVX@IeB#`Y9*j50 zV0d>eGh&g!t-q}9e@$mvlH&dHqd%au=N|qy@P1I1kGo!$myi94Jmf$6i_d@bk0W2c z6zILcapAZ~fch?S@u_k>lQlpH> zGlGFQxJuaP*oZ$W`-tA9<>iBbzkB4D4PRdVAvX*04c&IB*|0isyqhTDEx53b-zJ@i zM^pLut-u-nHAj3DmXCkT=pxrRF#v$7(s zEFb@h8Im(*#r2wL!9-Po>zLpk7ziJlW79oba^QI@fiO02VgBcvx31zzKsBtMJv2WK zZgx)xGCtnVTc&_~34;tSd{3FZ7x>X21m`b9{-i|Q3tyzSuBu=F0o()lv_W2zUnG#5 z%D@m8c7L5mx=Wi&FBO7P-xqu$sjSjNI==!2c`>7B$k@CmA`)~)w6e! zvN*MEdjN+6x(^q&9D6bnSO?t9k4pHfPgdH53f z1?+vt7#)~hxW4YAM4+Kv%&h-rawZe<{zQjoy)7s1wTprFJs-_&(>*XPp&DMmCYhwAkTC2o`Ag{ryU0qecO%sI^d-Ra!(lNN9Q} zs;M7FB2{iyqgGxdsIb%sN)GK^Y`{j_N+ZkM1dg0kSd5_SBnU7jK1a3^x#->?E5QtfV>3%=8xbH9ESf% zUFCy+tY~04#(y=wk6466WqIlyA0F&>n3WtH1R2iQikzHlPd|Y%gQO0rt$H zm>DzYV$jB+oy?Xh+fN5{G^r8L2xtT}0vZ90fJQ(gpb^jrXaqC@8Uc-fMnEH=5zq)| z1T+E~0gZr0KqH_L&3_qRh*xt)*=35M6Zxt znZC2}h(5{0lCJYQ!PqIHpCkG->760Fvm`%9e!oxh3)J6Bq}L$(9g@FH?Hi=`J8Ive z_HAn4A-#*#Mh3+A_Y8<}?;jAQ=>XBAMBhbpoapCS#X}uI4N+)tRpG>74oU0EN3NCDQ9**1A10& zE(v06nU!=nDO$#o3x#MaFn*@iuQeb4cf%xiq#ZqEkeJoa+If zw{{^1<4fk!NT(LK7~8x9bUOD4vLVKK8!z~HfaYzQx0b!cSQezSDSl2`g*=xp%yD`0 zNX}&0yklj-Ro+T-9B|CY2Mn-bxzv%&!aV4qAK<`wJO-N3T6h8NVQll>_R0w(v*K2~ z(rU@A6)L6AZ@Wf*`-vyrWv@kmSa&du)oRV~)A8QH~}XV^v0z1t~m zxGg@KI;qM^i-xajR9B63skY?_--c6ji{1u=2^dI!sFN*rQ)$=9R3Od`cg09nD_h(} zyBtFRSBnsAX}Ppn^h(tV=A^YB8@gX_V!%lwT`sNxk7JlGd2Tyi)|$)UX8@+&7LMJ0 zn&pbiZpvG)KtJ9I;4PMml?p^=)ixly71wsRmTRR=jDUyK6vG@|_O^>r ztZrAl+KKitB&+2zWEDpzVz!GXu$LCTjK_zYr+Cq=m8vU74Ca@JK_zDJEo92c%%u=J z4^ocqMH*92Z&tUqYp&r{Tat;>qKi&uV{_X(MEG{|$D=XEF(lh>CZjsSc=5qtbFXl4G zN5m|qHOskWpr67>`81Jssd9013+612%vh~eH^d^et6My|T+NS#gZ5CRdc0y-WpIY; zpt$OKCyd4F<~DW!GhKAJSgn@9EiMt`NUgfPX_$<7xQrx=9dWB`wc_S_X&Hc6Lwl%m zSO(f6Frp$r8SRl8`V(V)wB)UqD#mzBr~n4s-hpC8V;6eXs^`{xwdPQpiYk9%&Nub` z2?q?FP2#EL_!k^4r1J9Hv`_l5LL=z6Lk+$=?smw1m~6Z0Xr4yk%12<7jY5#8=sEu+ zJ-^H6cANN#z(67j{?0W*F{pn^!M7Fsh=RXE3%SF;f^RGM5e0weYDN4CzOCR#6#O0W zkik0QSMY5GKce98(6dB`e+A!G@FNQTj(EuGj9Tzat*rI{hp7wt^o~@OSWK z7aR$GZk|pl__l%{QSf(Y{RI6td)ocgd;b=9nM<{d4`XoC!`G6*ZtV|ndt-QCpZ3D+ zV&s-S?cd!%3& z^x#dQ;$IKhzdhLR{YQj7IvB40v5@iqaljCP^tpa^2Ex^^9}md-agiR9*X4qwxbP7o z!Uh>VN-*5=BQ5Jbu79LIMf$j9NBZ^veRqJK3D8NR@l^oY+eBl0NaK5DJfyrJNS}U$ zXbd0qmx#tKcm%YHMB|nd<*P(Xe``b|o+z&qeFM=i5H0)-vhNVRpX5Ib$bTA;|B7fC z->XDRynfqeKfwMJu$M1VWPjIS;Bd(C?IT)_XMaF`ARxafAiq68M+5XkfWALKr-+vG z<>7#QDL@wkbctwe5dS9w@>+m?CZPX?0DUq*e=%VHm4N)KMC1Js`~Moza=x4)dXVHV z1nj>NpuZWQzZIat%`T384EOmB+H52OmURU`q2MzGzwuGwxWm7KpHT3bg5UUv!oPx_ zQ1F?8FTaE6h+pv@B%$ETVi)w&P`vM`EBFZoU!b7R8Uc-fMnEH=5zq)|1T+HuBGBKv zqjyHaz59JRr2UH{@)^0m_)m_6dw0A(67GKo&Pcd-SBa4Q9~{AFb3As7-<}bCYq57E z+`Bv8H9zgP|E8t5`e5w;GCWFw^w<9@!{P9|Fx>CE!?XNu%UuXFoTeaC-{ z+pi3Vdv~Ap|92Oh4;}P?cuSwOGx_9VCc6NiamLK(RBR?{j*rjaZ!BhxMd5F39InQr zvB~IIyc>@-#f}-f$BZ!sZg__YC$W^Wa{0MOXW_fkju?P3zu*|Nnf#&!U!NKaIU{R1 znfZmqC#qZ-zOGw#=~`O%k`?lnjL@7A21cW+lP-=yKk@uu-?V1ot;pvl?)VK}FjoKb zYYA)<`(EcQV{4*)_1$MBKe;fsm`%ZlZ^*!dWYSjBzM#qAewQ9yhmRrA@PWxVU}E8A zM`p~ZX~rk;hvG{*L}nnR^3^D020nI$qn;I+N$)Z<@J(K26+Xyo|29tq(@Q2Y1G8u* z8WZsYMhYh_AN>i({7jfOG4w%{5{dfllqtz?aiX$mdTlYnOFuK_baXP_Ju_wuGc$v~ z)~T+bJ*P5A(nz6WvKMD9aUd5xa zspuG1xN)p-Exc*0W+ZmZ@q2KwO~nK*Y>_f^IBO*z!fIuubD5+yZ&-6FesMXf8IN#G ztXej@n`&m%S9lR?{NX1*pla4QOc^*%ZQ{v<%sI9R;H#>cZ|g4+c_CY2zcgR1M2}-d z>$PU4Va-4>f*;QU-ghQNKFny0?l?^ztgtc44B+7xz$tT@N6+X!v)gKRv{-pO3%43* zs~c{)RB?@L^*H}77I{jUU(KeYV-(C}L*b;uF+_aqc}ryG*{2?)%p?wqj5H2WX4t(6 zJ~je35q~{__v(dtxIq@gttL?ry3*FKZvI6}X2AZE?lV{i#%AE&KO^dq0J!&AE~GKv zWXd$TNqznUs%Efn5y1cim#3Ps1ZAoTauX%V3$|S{!&#SfpNYfXY-$33V$Hx9#hSsp z>Qpx#?afe()ocQm%ajRQ3^?k2-e-F4zk131HG?}!0n}Bysb=Rcz9;r(kN^DvSso&gL&13?kt)#o{&Cu^9WhRE1nHa~P$ih@KhEFkkCu-8Yr^$nQ&UXML z#x;X?n<+|CqtM?Dpx3>mm(0%$>|>_HG%;H<(|WU#nelm`f9|e3MP|PHy?f=Fxn5+X zej8 Date: Wed, 23 Oct 2024 10:23:27 +0800 Subject: [PATCH 11/45] Update Rd files --- man/create_data_list.Rd | 23 ------------------ man/get_base_dir.Rd | 30 ++++++++++++++++++++++++ man/get_cre_path.Rd | 10 ++++---- man/get_file_paths.Rd | 38 ++++++++++++++++++++++++++++++ man/get_nfs_path.Rd | 15 ++++++------ man/load_data.Rd | 52 +++++++++++++++++++++++++---------------- man/load_data_files.Rd | 24 +++++++++++++++++++ man/read_file.Rd | 19 --------------- 8 files changed, 137 insertions(+), 74 deletions(-) delete mode 100644 man/create_data_list.Rd create mode 100644 man/get_base_dir.Rd create mode 100644 man/get_file_paths.Rd create mode 100644 man/load_data_files.Rd delete mode 100644 man/read_file.Rd diff --git a/man/create_data_list.Rd b/man/create_data_list.Rd deleted file mode 100644 index 2368307..0000000 --- a/man/create_data_list.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{create_data_list} -\alias{create_data_list} -\title{For each file name provided, reads in the first matching file and its meta data/attributes. -Preference is given to RDS because its faster} -\usage{ -create_data_list(file_path, file_names, prefer_sas) -} -\arguments{ -\item{file_path}{the folder where the files are} - -\item{file_names}{CDISC names for the files} - -\item{prefer_sas}{if TRUE, imports .sas7bdat files first instead of .RDS files} -} -\value{ -returns a list of dataframes with metadata as an attribute on each dataframe -} -\description{ -For each file name provided, reads in the first matching file and its meta data/attributes. -Preference is given to RDS because its faster -} diff --git a/man/get_base_dir.Rd b/man/get_base_dir.Rd new file mode 100644 index 0000000..891d825 --- /dev/null +++ b/man/get_base_dir.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dvloader.R +\name{get_base_dir} +\alias{get_base_dir} +\title{Get Base Directory Path} +\usage{ +get_base_dir(env_var) +} +\arguments{ +\item{env_var}{[character(1)] The name of the environment variable containing the base directory path.} +} +\value{ +[character(1)] The normalized path to the base directory. +} +\description{ +This function retrieves the base directory path from a specified environment variable. +It checks if the environment variable is set and if the directory exists. +} +\examples{ +# Create a temporary directory +temp_dir <- tempdir() + +# Set the BASE_DIR environment variable +Sys.setenv(BASE_DIR = temp_dir) + +# Get the base directory path +dv.loader:::get_base_dir("BASE_DIR") + +} +\keyword{internal} diff --git a/man/get_cre_path.Rd b/man/get_cre_path.Rd index 86b2b78..03686b2 100644 --- a/man/get_cre_path.Rd +++ b/man/get_cre_path.Rd @@ -2,12 +2,14 @@ % Please edit documentation in R/dvloader.R \name{get_cre_path} \alias{get_cre_path} -\title{gets the NFS base path from an env var -alias for get_nfs_path to maintain backwards compatibility} +\title{Get CRE Path} \usage{ get_cre_path() } +\value{ +[character(1)] The path to the CRE directory. +} \description{ -gets the NFS base path from an env var -alias for get_nfs_path to maintain backwards compatibility +This function retrieves the path to the CRE (Clinical Research Environment) directory. +It uses the "RXD_DATA" environment variable as the base directory. } diff --git a/man/get_file_paths.Rd b/man/get_file_paths.Rd new file mode 100644 index 0000000..dc19143 --- /dev/null +++ b/man/get_file_paths.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_file_paths} +\alias{get_file_paths} +\title{Get File Paths} +\usage{ +get_file_paths(dir_path, file_names, prefer_sas = FALSE) +} +\arguments{ +\item{dir_path}{[character(1)] The directory path where the files are located.} + +\item{file_names}{[character(1+)] A vector of file names to process.} + +\item{prefer_sas}{[logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE.} +} +\value{ +[character] A vector of normalized file paths. +} +\description{ +This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. +It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. +} +\examples{ +temp_dir <- tempdir() + +file_names <- c("adsl", "adae") + +file.create(file.path(temp_dir, paste0(file_names, ".rds"))) +file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) + +list.files(temp_dir) + +get_file_paths(dir_path = temp_dir, file_names = file_names) +get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) + +unlink(temp_dir, recursive = TRUE) + +} diff --git a/man/get_nfs_path.Rd b/man/get_nfs_path.Rd index 5cbad09..3c4ba73 100644 --- a/man/get_nfs_path.Rd +++ b/man/get_nfs_path.Rd @@ -2,17 +2,16 @@ % Please edit documentation in R/dvloader.R \name{get_nfs_path} \alias{get_nfs_path} -\title{gets the NFS base path from an env var -It assumes there is an env var -called RXD_DATA which holds the path suffix.} +\title{Get NFS Path} \usage{ -get_nfs_path() +get_nfs_path(env_var = "RXD_DATA") +} +\arguments{ +\item{env_var}{[character(1)] The environment variable name for the base directory. Default is "RXD_DATA".} } \value{ -the NFS base path +[character(1)] The path to the NFS directory. } \description{ -gets the NFS base path from an env var -It assumes there is an env var -called RXD_DATA which holds the path suffix. +This function retrieves the path to the NFS (Network File System) directory. } diff --git a/man/load_data.Rd b/man/load_data.Rd index 5367158..50f7187 100644 --- a/man/load_data.Rd +++ b/man/load_data.Rd @@ -2,36 +2,48 @@ % Please edit documentation in R/dvloader.R \name{load_data} \alias{load_data} -\title{Loads data into memory based on study directory and one or more file_names.} +\title{Load Data Files} \usage{ -load_data(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) +load_data( + sub_dir = NULL, + file_names, + use_wd = FALSE, + prefer_sas = FALSE, + env_var = "RXD_DATA", + print_file_paths = FALSE +) } \arguments{ -\item{sub_dir}{A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. -If the argument is left as NULL, the function will load data from the working directory `getwd()`.} +\item{sub_dir}{[character(1)] Optional character string specifying a subdirectory. Default is NULL.} + +\item{file_names}{[character(1+)] Character vector of file names to load (without extension).} + +\item{use_wd}{[logical(1)] Logical indicating whether to use the current working directory. Default is FALSE.} -\item{file_names}{Study file or file_names name(s) - can be a vector of strings. -This is the only required argument.} +\item{prefer_sas}{[logical(1)] Logical indicating whether to prefer SAS7BDAT files over RDS. Default is FALSE.} -\item{use_wd}{for "use working directory" - a flag used when importing local files -not on NFS - default value is FALSE} +\item{env_var}{[character(1)] The environment variable name for the base directory. Default is "RXD_DATA".} -\item{prefer_sas}{if set to TRUE, imports sas7bdat files first before looking for -RDS files (the opposite of default behavior)} +\item{print_file_paths}{[logical(1)] Logical indicating whether to print the directory path and file names. Default is FALSE.} } \value{ -a list of dataframes +A named list of data frames, where each name corresponds to a loaded file. } \description{ -Loads data into memory based on study directory and one or more file_names. +This function loads data files from a specified directory or the current working directory. +It supports loading both RDS and SAS7BDAT files. } \examples{ -\dontrun{ -test_data_path <- "../inst/extdata/" -data_list <- load_data( - sub_dir = test_data_path, - file_names = "dummyads2", - use_wd = TRUE -) -} +# Get the current value of the RXD_DATA environment variable +base_dir <- Sys.getenv("RXD_DATA") + +# Set the RXD_DATA environment variable to the path of the haven package +Sys.setenv(RXD_DATA = find.package("haven")) + +data_list <- load_data(sub_dir = "examples", file_names = c("iris.sas7bdat")) +str(data_list) + +# Reset the RXD_DATA environment variable to its original value +Sys.setenv(RXD_DATA = base_dir) + } diff --git a/man/load_data_files.Rd b/man/load_data_files.Rd new file mode 100644 index 0000000..ccaac13 --- /dev/null +++ b/man/load_data_files.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{load_data_files} +\alias{load_data_files} +\title{Load Data Files} +\usage{ +load_data_files(file_paths) +} +\arguments{ +\item{file_paths}{[character(1+)] A vector of file paths to read.} +} +\value{ +[list] A named list of data frames, where each name is the basename of the corresponding file path. +} +\description{ +This function reads data from multiple file paths and returns a list of data frames. +It supports reading RDS and SAS7BDAT files. +} +\examples{ +path <- system.file("examples", "iris.sas7bdat", package = "haven") +data_list <- load_data_files(file_paths = path) +str(data_list) + +} diff --git a/man/read_file.Rd b/man/read_file.Rd deleted file mode 100644 index e46767b..0000000 --- a/man/read_file.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{read_file} -\alias{read_file} -\title{Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path} -\usage{ -read_file(file_path, file_name) -} -\arguments{ -\item{file_path}{a path to a file} - -\item{file_name}{name of a file} -} -\value{ -a data object with an extra attribute of metadata -} -\description{ -Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path -} From 30619aba5051fdbd973ea81d63a89919a2378a8e Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 13:50:20 +0800 Subject: [PATCH 12/45] Update tests --- tests/testthat.R | 5 ++++- tests/testthat/tests.R | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/testthat.R b/tests/testthat.R index 8d34285..784ab0f 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,3 +1,6 @@ pkg_name <- "dv.loader" + +library(testthat) library(pkg_name, character.only = TRUE) -testthat::test_check(pkg_name) + +test_check(pkg_name) diff --git a/tests/testthat/tests.R b/tests/testthat/tests.R index d5e5747..b5e0527 100644 --- a/tests/testthat/tests.R +++ b/tests/testthat/tests.R @@ -205,3 +205,18 @@ test_that( expect_equal(grepl(".RDS$", actual), TRUE) } ) + +test_that( + "prefer_sas is not used if file extension is included in file_names" %>% + vdoc[["add_spec"]](specs$prefer_sas), + { + data_list <- load_data( + sub_dir = local_test_path, + file_names = c("dummyads1.RDS", "dummyads2.sas7bdat"), + use_wd = TRUE, + prefer_sas = FALSE + ) + expect_equal(tools::file_ext(attr(data_list[[1]], "meta")$path), "RDS") + expect_equal(tools::file_ext(attr(data_list[[2]], "meta")$path), "sas7bdat") + } +) From 8298ec2c1beb7927434516b948d129418def066f Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 13:59:31 +0800 Subject: [PATCH 13/45] Update test setup file --- tests/testthat/setup.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 1dab7a6..47bcc78 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,9 +1,7 @@ # validation (S) vdoc <- local({ - # ########## # package_name is used # INSIDE # the sourced file below - # ########## - package_name <- read.dcf("../../DESCRIPTION")[, "Package"] + package_name <- "dv.loader" # read.dcf("../../DESCRIPTION")[, "Package"] utils_file_path <- system.file("validation", "utils-validation.R", package = package_name, mustWork = TRUE) source(utils_file_path, local = TRUE)[["value"]] }) From 9cb51ac4d4da716bdc2426e3556de34ef6597401 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 14:00:35 +0800 Subject: [PATCH 14/45] Update integration guide --- vignettes/integration-guide.Rmd | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vignettes/integration-guide.Rmd b/vignettes/integration-guide.Rmd index 802f252..f142d6c 100644 --- a/vignettes/integration-guide.Rmd +++ b/vignettes/integration-guide.Rmd @@ -31,12 +31,13 @@ remotes::install_github(repo = paste0("Boehringer-Ingelheim/", pkgs)) ```{r, eval=FALSE} # Set the NFS path as an environment variable -Sys.setenv(RXD_DATA = "path/to/network-file-system") +Sys.setenv(RXD_DATA = find.package("dv.loader")) # Load data files from the specified sub-directory of a network file system (NFS) data_list <- dv.loader::load_data( - sub_dir = "sub-directory/of/network-file-system", - file_names = c("adsl.sas7bdat", "adae.sas7bdat") + sub_dir = "extdata/pharmaverseadam", + file_names = c("adsl_sample", "adae_sample"), + print_file_paths = TRUE ) ``` @@ -47,7 +48,7 @@ data_list <- dv.loader::load_data( module_list <- list( "Data Listings" = dv.listings::mod_listings( module_id = "data_listings", - dataset_names = c("adsl.sas7bdat", "adae.sas7bdat") + dataset_names = c("adsl_sample", "adae_sample") ) ) ``` @@ -57,9 +58,9 @@ module_list <- list( ```{r, eval=FALSE} # Run the DaVinci application with data and module lists defined earlier dv.manager::run_app( - data = list("Datasets" = data_list), + data = list("Demo Data" = data_list), module_list = module_list, - filter_data = "adsl.sas7bdat", + filter_data = "adsl_sample", title = "DaVinci Application" ) ``` From 01de80362398a9f717523100b8b16152fcba1473 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 14:01:23 +0800 Subject: [PATCH 15/45] Make get_base_dir internal function --- R/dvloader.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/dvloader.R b/R/dvloader.R index ee8bb82..c07a534 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -15,8 +15,9 @@ #' Sys.setenv(BASE_DIR = temp_dir) #' #' # Get the base directory path -#' get_base_dir("BASE_DIR") +#' dv.loader:::get_base_dir("BASE_DIR") #' +#' @keywords internal get_base_dir <- function(env_var) { # Ensure env_var is a single character string checkmate::assert_character(env_var, len = 1) From b111f9a5c38456acdcca990fe1ce2deff19bf14b Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 14:01:53 +0800 Subject: [PATCH 16/45] Export get_file_paths and load_data_files --- NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 79ce8a0..be207da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand export(get_cre_path) +export(get_file_paths) export(get_nfs_path) export(load_data) +export(load_data_files) From a4801246b54c1fd1a58ee69ab2366c5de2f91ce7 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 14:02:11 +0800 Subject: [PATCH 17/45] Update DESCRIPTION file --- DESCRIPTION | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a12e282..05bf0ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dv.loader Type: Package Title: Data loading module -Version: 2.0.0 +Version: 2.1.0 Authors@R: c( person( "Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")), person( given = "Ming", family = "Yang", role = c("aut", "cre"), email = "ming.yang.ext@boehringer-ingelheim.com"), @@ -13,10 +13,13 @@ License: Apache License (>= 2) Encoding: UTF-8 LazyData: true Depends: R (>= 3.5.0) -Imports: haven +Imports: + checkmate, + haven Suggests: - testthat, + testthat (>= 3.0.0), knitr, rmarkdown RoxygenNote: 7.3.0 VignetteBuilder: knitr +Config/testthat/edition: 3 From 8722c94e24ca4f589a1ef706b3aca953a2251a52 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 14:20:45 +0800 Subject: [PATCH 18/45] Update changelog --- NEWS.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS.md b/NEWS.md index a5c5124..e3df5dc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# dv.loader 2.1.0 + +- Refactored code to improve readability and maintainability. + +- Fixed issue of partial matching when the `file_names` argument contains no file extensions. + +- Added arguments `env_var` and `print_file_paths` in `load_data()` function to provide more flexibility and control. + # dv.loader 2.0.0 - GitHub release with QC report From 8bcefd3ffa78c10c7a969d652d10f0ab7d947478 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 14:43:10 +0800 Subject: [PATCH 19/45] Update roxygen examples --- R/utils.R | 2 ++ man/get_file_paths.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/utils.R b/R/utils.R index 93ac178..c7a0089 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,6 +10,7 @@ #' @return [character] A vector of normalized file paths. #' #' @examples +#' \dontrun{ #' temp_dir <- tempdir() #' #' file_names <- c("adsl", "adae") @@ -23,6 +24,7 @@ #' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) #' #' unlink(temp_dir, recursive = TRUE) +#' } #' #' @export get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { diff --git a/man/get_file_paths.Rd b/man/get_file_paths.Rd index dc19143..57751eb 100644 --- a/man/get_file_paths.Rd +++ b/man/get_file_paths.Rd @@ -21,6 +21,7 @@ This function constructs file paths for given file names, handling both RDS and It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. } \examples{ +\dontrun{ temp_dir <- tempdir() file_names <- c("adsl", "adae") @@ -34,5 +35,6 @@ get_file_paths(dir_path = temp_dir, file_names = file_names) get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) unlink(temp_dir, recursive = TRUE) +} } From 5b719dd80cb812141672ba4b5ee379f496f9b12f Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 15:08:16 +0800 Subject: [PATCH 20/45] Fix lintr issues --- R/dvloader.R | 12 ++++++++++-- tests/testthat/setup.R | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index c07a534..b983c00 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -74,7 +74,8 @@ get_cre_path <- function() { #' @param use_wd [logical(1)] Logical indicating whether to use the current working directory. Default is FALSE. #' @param prefer_sas [logical(1)] Logical indicating whether to prefer SAS7BDAT files over RDS. Default is FALSE. #' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". -#' @param print_file_paths [logical(1)] Logical indicating whether to print the directory path and file names. Default is FALSE. +#' @param print_file_paths [logical(1)] Logical indicating whether to print the directory path and file names. +#' Default is FALSE. #' #' @return A named list of data frames, where each name corresponds to a loaded file. #' @@ -92,7 +93,14 @@ get_cre_path <- function() { #' Sys.setenv(RXD_DATA = base_dir) #' #' @export -load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE, env_var = "RXD_DATA", print_file_paths = FALSE) { +load_data <- function( + sub_dir = NULL, + file_names, + use_wd = FALSE, + prefer_sas = FALSE, + env_var = "RXD_DATA", + print_file_paths = FALSE + ) { # Input validation checkmate::assert_character(sub_dir, len = 1, null.ok = TRUE) checkmate::assert_character(file_names, min.len = 1) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 47bcc78..7e92070 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,7 +1,7 @@ # validation (S) vdoc <- local({ # package_name is used # INSIDE # the sourced file below - package_name <- "dv.loader" # read.dcf("../../DESCRIPTION")[, "Package"] + package_name <- "dv.loader" utils_file_path <- system.file("validation", "utils-validation.R", package = package_name, mustWork = TRUE) source(utils_file_path, local = TRUE)[["value"]] }) From 62545e96e276152044cdd4ae60091aff978fb9c1 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 15:09:24 +0800 Subject: [PATCH 21/45] Fix styler issues --- R/dvloader.R | 41 ++++++++++++++++----------------- R/utils.R | 16 ++++++------- tests/testthat/setup.R | 2 +- vignettes/integration-guide.Rmd | 6 ++--- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index b983c00..139ea8c 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -10,13 +10,13 @@ #' @examples #' # Create a temporary directory #' temp_dir <- tempdir() -#' +#' #' # Set the BASE_DIR environment variable #' Sys.setenv(BASE_DIR = temp_dir) -#' +#' #' # Get the base directory path #' dv.loader:::get_base_dir("BASE_DIR") -#' +#' #' @keywords internal get_base_dir <- function(env_var) { # Ensure env_var is a single character string @@ -42,12 +42,12 @@ get_base_dir <- function(env_var) { #' This function retrieves the path to the NFS (Network File System) directory. #' #' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". -#' +#' #' @return [character(1)] The path to the NFS directory. #' #' @export get_nfs_path <- function(env_var = "RXD_DATA") { - get_base_dir(env_var = env_var) + get_base_dir(env_var = env_var) } @@ -55,12 +55,12 @@ get_nfs_path <- function(env_var = "RXD_DATA") { #' #' This function retrieves the path to the CRE (Clinical Research Environment) directory. #' It uses the "RXD_DATA" environment variable as the base directory. -#' +#' #' @return [character(1)] The path to the CRE directory. #' #' @export get_cre_path <- function() { - get_base_dir(env_var = "RXD_DATA") + get_base_dir(env_var = "RXD_DATA") } @@ -74,7 +74,7 @@ get_cre_path <- function() { #' @param use_wd [logical(1)] Logical indicating whether to use the current working directory. Default is FALSE. #' @param prefer_sas [logical(1)] Logical indicating whether to prefer SAS7BDAT files over RDS. Default is FALSE. #' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". -#' @param print_file_paths [logical(1)] Logical indicating whether to print the directory path and file names. +#' @param print_file_paths [logical(1)] Logical indicating whether to print the directory path and file names. #' Default is FALSE. #' #' @return A named list of data frames, where each name corresponds to a loaded file. @@ -82,25 +82,24 @@ get_cre_path <- function() { #' @examples #' # Get the current value of the RXD_DATA environment variable #' base_dir <- Sys.getenv("RXD_DATA") -#' +#' #' # Set the RXD_DATA environment variable to the path of the haven package #' Sys.setenv(RXD_DATA = find.package("haven")) -#' +#' #' data_list <- load_data(sub_dir = "examples", file_names = c("iris.sas7bdat")) #' str(data_list) -#' +#' #' # Reset the RXD_DATA environment variable to its original value #' Sys.setenv(RXD_DATA = base_dir) -#' +#' #' @export load_data <- function( - sub_dir = NULL, - file_names, - use_wd = FALSE, - prefer_sas = FALSE, - env_var = "RXD_DATA", - print_file_paths = FALSE - ) { + sub_dir = NULL, + file_names, + use_wd = FALSE, + prefer_sas = FALSE, + env_var = "RXD_DATA", + print_file_paths = FALSE) { # Input validation checkmate::assert_character(sub_dir, len = 1, null.ok = TRUE) checkmate::assert_character(file_names, min.len = 1) @@ -114,7 +113,7 @@ load_data <- function( } else { base_dir <- get_base_dir(env_var = env_var) } - + # Construct the full directory path dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) @@ -125,7 +124,7 @@ load_data <- function( file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) # Print the directory path and file names if requested - if (isTRUE(print_file_paths)) { + if (isTRUE(print_file_paths)) { cat("Loading data from", dir_path, "\n") cat("Loading data file(s):", basename(file_paths), "\n") } diff --git a/R/utils.R b/R/utils.R index c7a0089..b84d8c1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -12,17 +12,17 @@ #' @examples #' \dontrun{ #' temp_dir <- tempdir() -#' +#' #' file_names <- c("adsl", "adae") -#' +#' #' file.create(file.path(temp_dir, paste0(file_names, ".rds"))) #' file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) -#' +#' #' list.files(temp_dir) -#' +#' #' get_file_paths(dir_path = temp_dir, file_names = file_names) #' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) -#' +#' #' unlink(temp_dir, recursive = TRUE) #' } #' @@ -36,14 +36,14 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { file_paths <- lapply(file_names, function(file_name) { file_path <- file.path(dir_path, file_name) file_ext <- tools::file_ext(file_name) - + if (file_ext == "") { # If no extension is provided, check for both RDS and SAS files rds_file_name <- paste0(file_name, ".rds") sas_file_name <- paste0(file_name, ".sas7bdat") rds_file_path <- file.path(dir_path, rds_file_name) sas_file_path <- file.path(dir_path, sas_file_name) - + if (isTRUE(prefer_sas)) { # Prefer SAS file if it exists, otherwise use RDS if (file.exists(sas_file_path)) { @@ -92,7 +92,7 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { #' path <- system.file("examples", "iris.sas7bdat", package = "haven") #' data_list <- load_data_files(file_paths = path) #' str(data_list) -#' +#' #' @export load_data_files <- function(file_paths) { # Validate input parameters diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 7e92070..36b2e31 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,7 +1,7 @@ # validation (S) vdoc <- local({ # package_name is used # INSIDE # the sourced file below - package_name <- "dv.loader" + package_name <- "dv.loader" utils_file_path <- system.file("validation", "utils-validation.R", package = package_name, mustWork = TRUE) source(utils_file_path, local = TRUE)[["value"]] }) diff --git a/vignettes/integration-guide.Rmd b/vignettes/integration-guide.Rmd index f142d6c..5a38316 100644 --- a/vignettes/integration-guide.Rmd +++ b/vignettes/integration-guide.Rmd @@ -35,9 +35,9 @@ Sys.setenv(RXD_DATA = find.package("dv.loader")) # Load data files from the specified sub-directory of a network file system (NFS) data_list <- dv.loader::load_data( - sub_dir = "extdata/pharmaverseadam", - file_names = c("adsl_sample", "adae_sample"), - print_file_paths = TRUE + sub_dir = "extdata/pharmaverseadam", + file_names = c("adsl_sample", "adae_sample"), + print_file_paths = TRUE ) ``` From 2bbf397e7802579e944da524a2891065511832f3 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 18:15:25 +0800 Subject: [PATCH 22/45] Try to identify issue from tests --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index b84d8c1..331c9c1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -39,7 +39,7 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { if (file_ext == "") { # If no extension is provided, check for both RDS and SAS files - rds_file_name <- paste0(file_name, ".rds") + rds_file_name <- paste0(file_name, ".RDS") sas_file_name <- paste0(file_name, ".sas7bdat") rds_file_path <- file.path(dir_path, rds_file_name) sas_file_path <- file.path(dir_path, sas_file_name) From a02950e21ecb10efb94d17dcfafaae17a83ff13a Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 19:57:31 +0800 Subject: [PATCH 23/45] Use pattern matching to find files when no file extension is provided --- R/utils.R | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/R/utils.R b/R/utils.R index 331c9c1..8b2ad1d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -38,29 +38,41 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { file_ext <- tools::file_ext(file_name) if (file_ext == "") { - # If no extension is provided, check for both RDS and SAS files - rds_file_name <- paste0(file_name, ".RDS") - sas_file_name <- paste0(file_name, ".sas7bdat") - rds_file_path <- file.path(dir_path, rds_file_name) - sas_file_path <- file.path(dir_path, sas_file_name) + # Get all files in the directory + candidates <- basename(list.files(dir_path)) + # Find matching RDS files + rds_match <- grep( + pattern = paste0("^", file_name, "\\.rds$"), + x = candidates, + ignore.case = TRUE, + value = TRUE + ) + + # Find matching SAS files + sas_match <- grep( + pattern = paste0("^", file_name, "\\.sas7bdat$"), + x = candidates, + ignore.case = TRUE, + value = TRUE + ) + + # Prefer SAS file if it exists, otherwise use RDS if (isTRUE(prefer_sas)) { - # Prefer SAS file if it exists, otherwise use RDS - if (file.exists(sas_file_path)) { - return(sas_file_path) - } else if (file.exists(rds_file_path)) { - return(rds_file_path) + if (length(sas_match) > 0) { + return(file.path(dir_path, sas_match[1])) + } else if (length(rds_match) > 0) { + return(file.path(dir_path, rds_match[1])) } else { - stop(dir_path, " does not contain: ", rds_file_name, " or ", sas_file_name) + stop(dir_path, " does not contain SAS or RDS file: ", file_name) } } else if (isFALSE(prefer_sas)) { - # Prefer RDS file if it exists, otherwise use SAS - if (file.exists(rds_file_path)) { - return(rds_file_path) - } else if (file.exists(sas_file_path)) { - return(sas_file_path) + if (length(rds_match) > 0) { + return(file.path(dir_path, rds_match[1])) + } else if (length(sas_match) > 0) { + return(file.path(dir_path, sas_match[1])) } else { - stop(dir_path, " does not contain: ", rds_file_name, " or ", sas_file_name) + stop(dir_path, " does not contain RDS or SAS file: ", file_name) } } } else { From 51ba3ce9bef83c94c617985b6ea35077ba71d091 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 23 Oct 2024 20:04:25 +0800 Subject: [PATCH 24/45] Fix styler issues --- R/utils.R | 6 +++--- man/load_data.Rd | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 8b2ad1d..17c4e12 100644 --- a/R/utils.R +++ b/R/utils.R @@ -43,7 +43,7 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { # Find matching RDS files rds_match <- grep( - pattern = paste0("^", file_name, "\\.rds$"), + pattern = paste0("^", file_name, "\\.rds$"), x = candidates, ignore.case = TRUE, value = TRUE @@ -51,8 +51,8 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { # Find matching SAS files sas_match <- grep( - pattern = paste0("^", file_name, "\\.sas7bdat$"), - x = candidates, + pattern = paste0("^", file_name, "\\.sas7bdat$"), + x = candidates, ignore.case = TRUE, value = TRUE ) diff --git a/man/load_data.Rd b/man/load_data.Rd index 50f7187..9659710 100644 --- a/man/load_data.Rd +++ b/man/load_data.Rd @@ -24,7 +24,8 @@ load_data( \item{env_var}{[character(1)] The environment variable name for the base directory. Default is "RXD_DATA".} -\item{print_file_paths}{[logical(1)] Logical indicating whether to print the directory path and file names. Default is FALSE.} +\item{print_file_paths}{[logical(1)] Logical indicating whether to print the directory path and file names. +Default is FALSE.} } \value{ A named list of data frames, where each name corresponds to a loaded file. From 6e4a18acbf8f1819c865e471891b1c145441f748 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Fri, 1 Nov 2024 15:41:01 +0800 Subject: [PATCH 25/45] Save demo data to a temp dir --- vignettes/integration-guide.Rmd | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/vignettes/integration-guide.Rmd b/vignettes/integration-guide.Rmd index 5a38316..99cc47f 100644 --- a/vignettes/integration-guide.Rmd +++ b/vignettes/integration-guide.Rmd @@ -27,16 +27,31 @@ pkgs <- c("dv.loader", "dv.manager", "dv.listings") remotes::install_github(repo = paste0("Boehringer-Ingelheim/", pkgs)) ``` +For the purpose of this vignette, we will use the data from the {pharmaverseadam} package and save the `adsl` and `adae` datasets as .sas7bdat files in a temporary directory. + +```{r, eval=FALSE} +# Create a temporary directory +temp_dir <- tempdir() + +# Create a sub-directory for saving the pharmaverseadam data +data_dir <- file.path(temp_dir, "pharmaverseadam") +dir.create(data_dir) + +# Save the adsl and adae datasets to the temporary directory +haven::write_sas(pharmaverseadam::adsl, file.path(data_dir, "adsl.sas7bdat")) +haven::write_sas(pharmaverseadam::adae, file.path(data_dir, "adae.sas7bdat")) +``` + ## Step 1: Load data files ```{r, eval=FALSE} # Set the NFS path as an environment variable -Sys.setenv(RXD_DATA = find.package("dv.loader")) +Sys.setenv(RXD_DATA = temp_dir) # Load data files from the specified sub-directory of a network file system (NFS) data_list <- dv.loader::load_data( - sub_dir = "extdata/pharmaverseadam", - file_names = c("adsl_sample", "adae_sample"), + sub_dir = "pharmaverseadam", + file_names = c("adsl", "adae"), print_file_paths = TRUE ) ``` @@ -48,7 +63,7 @@ data_list <- dv.loader::load_data( module_list <- list( "Data Listings" = dv.listings::mod_listings( module_id = "data_listings", - dataset_names = c("adsl_sample", "adae_sample") + dataset_names = c("adsl", "adae") ) ) ``` @@ -60,7 +75,7 @@ module_list <- list( dv.manager::run_app( data = list("Demo Data" = data_list), module_list = module_list, - filter_data = "adsl_sample", + filter_data = "adsl", title = "DaVinci Application" ) ``` From 89413a77f7b4bdca1d2ceab35e7f3b6c9bc3bf08 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Fri, 1 Nov 2024 15:42:30 +0800 Subject: [PATCH 26/45] Remove pharmaverseadam data from package --- inst/extdata/data.R | 41 ------------------ inst/extdata/pharmaverseadam/adae_sample.rds | Bin 5592 -> 0 bytes .../pharmaverseadam/adae_sample.sas7bdat | Bin 69632 -> 0 bytes inst/extdata/pharmaverseadam/adsl_sample.rds | Bin 2201 -> 0 bytes .../pharmaverseadam/adsl_sample.sas7bdat | Bin 28672 -> 0 bytes 5 files changed, 41 deletions(-) delete mode 100644 inst/extdata/data.R delete mode 100644 inst/extdata/pharmaverseadam/adae_sample.rds delete mode 100644 inst/extdata/pharmaverseadam/adae_sample.sas7bdat delete mode 100644 inst/extdata/pharmaverseadam/adsl_sample.rds delete mode 100644 inst/extdata/pharmaverseadam/adsl_sample.sas7bdat diff --git a/inst/extdata/data.R b/inst/extdata/data.R deleted file mode 100644 index 29c1d03..0000000 --- a/inst/extdata/data.R +++ /dev/null @@ -1,41 +0,0 @@ -# Create directory for pharmaverseadam data if it doesn't exist -data_dir <- file.path("inst", "extdata", "pharmaverseadam") -if (!dir.exists(data_dir)) { - dir.create(data_dir, recursive = TRUE) -} - -# Sample 10 subjects from adsl data -set.seed(123) # For reproducibility -adsl_sample <- pharmaverseadam::adsl |> - dplyr::sample_n(size = 10) - -# Filter adae data for the 10 subjects in adsl_sample -adae_sample <- pharmaverseadam::adae |> - dplyr::filter(USUBJID %in% adsl_sample$USUBJID) - -# Save adsl_sample to rds file -saveRDS( - object = adsl_sample, - file = file.path(data_dir, "adsl_sample.rds") -) - -# Save adae_sample to rds file -saveRDS( - object = adae_sample, - file = file.path(data_dir, "adae_sample.rds") -) - -# Write adsl_sample to sas7bdat file -haven::write_sas( - data = adsl_sample, - path = file.path(data_dir, "adsl_sample.sas7bdat") -) - -# Write adae_sample to sas7bdat file -haven::write_sas( - data = adae_sample, - path = file.path(data_dir, "adae_sample.sas7bdat") -) - -# Add a message to confirm data creation -message("Sample data files have been created in ", data_dir) diff --git a/inst/extdata/pharmaverseadam/adae_sample.rds b/inst/extdata/pharmaverseadam/adae_sample.rds deleted file mode 100644 index f806ce5f2455dcda329ba63dd5458b71127e9414..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5592 zcmV;}6({N+iwFP!000001MOW|Y~)6I=8QBteC&*8JU+&o9ojqVwGp@6>`Ri5SQJUA z>1mN|lC2qefs9)%N}}Cr%4|ww8OV;X39yR|^0L@Ck2W?h0rn+GkRS;V?7ZY*k=>af zF%aYc;(bY8l8qlW$L7IK@n#iS-OW}@Ey)}}^VR>?_1FJb6|1W2xcLA<5F^Cs=ow<< z9B>lnYRW=p2JkKcLM4c^7XUs>j1rRoA7WTuW~zKeRI*gcOL?cBM~t2Yyfd!fxn_M; zZ34%I5c7g;?yT*R{F>S_wRKH30Pi!T;hd$p)OF-8+jwU_N9=Wp)wk+ei=DKNwaiLuCupkXbW8u429q)+}J*(9^@_Z$x7xaqVbyi5rLJ`z1e{;yCAvcL!UD%AyePQrQHhqzu6v>?)g)f2 z$u2!6axAw*dqWdm0-p3dU*VON4!lX0rzNQ@a$dv-xtokA@pLHH7#5m2N`qwIe3KVA z8OwOEyiyevPLP8{ugzCPktONMb-DtAlGU;(SIe|Q%N)=IBXPjN`fcPoFOW`wLULi) z5pg+bMJWf`n&4=OtkS%Iqr1c(msbQvta<~ANqP~^KT`UKLFqZE%(Idx^B(icyvVMQ z3bzEjXr(4aOZ|`?4c6LJm6jA9R2G@z-vp6l@&>Pz0mX;{OG_){A}4_KD!d$)=qb8d zt?-P^B4u9TqDs45rYn^i!wWX^1#+2}Yjnjc@dDs3*1Qsr33N^N>rfAgkJG|Up7X0A zLQa%9nx&aCZd~?NN!`{B6SN{lHMYog-5||Pm892pRimwv+%9NZ{k5ubUSZ0-u;`NK z`e{=WPMh-8F-gyZ96)|Dj)CNgklh>8#fo@?bXrdp)BqV&1Fy)SHQ5b`lxRNGd;)`z zR+H(>cB?;sz~+OEk2If4ezhmjhIGEdFP0Unc5rOCGXWY0g#<l2#O&x!#f6tyMiz(Qm75ldPz_YSV66L}+k#=TpLx z_XjEVh8QJ37EvpkSc2)?bVr&h%pm!7p{G03C2F=KJzb&@ z{~|;eXYw6sG{4+T0ZuPZb)>t+FBGzfezG0QQ=FMb^aZDv%OU#7!sW>#`pB2y^s@z| z{DoY{_{HLEM><==O{ve9wT;bmv@#IHsQ6Jt#ki(FS*|QvbXY4lP{XZ<;tpBwK10Nwkpf#^|0#-d~OL zLQH476fT=Z63s`7KaCh_+V5DrCMm2SnWBW~`!doyAlBltw0LfwgBivI0$upS8#EmZw~sXKD&9aAHj+6<(HW9Le&s zD6yQ>U+9867!Ew_s5HC83%smIp&_d^eCES)U9QS2Oc_jrB%X;R+|l#OA`Q|Oc|ieX z1>@;TB*YXiyk3)5ULdO?@DX@n(Uz+!i44cqEP)wu$sbc6EY@M__X(igWy@b)k- z@!C$m!08t_{Q{?7;PeapMf(C*4Q;D#_@9OJ3p^0SeSz`X#(jZDm-q^sz&)`fveqv` z9EMJs-k|eON+->KOy-t3e^&!c^_$mVQt70nc)$2cbWv7!RYNoPUgTS*+G+z=OuacH zoG!=85ypyh#M|Ow8CXK!X~*m&B8bV)!M_xH#aIPDP20cSl1lv@wME|6%uUic8lf0! z-Q0@bzBMX{g5Ns9@Oqw}uk=ofvdrV@F^O9egLWIr3C=Z1DlmnH#pRKtl>Np_s-2{C zXIYY_4ae`KV<+GEDmTg0ciQ!4e7Aa0;uz8T7_cu&oGezB{p}W41W_RaUxyk(2)u*` zk^T6^JzPr++s?y4pw^$*^qm z^(yG8w6H#ffk zmN=fv3)dv|-5t#U-xJVQs)pVKy;_?r>+Nl;XVc!toijf!T0Jce%x-7CO;)sZl~gvZ z#R%}ZX>Fv&7?Lp_6B~^kd*`{mNF;*c@S5I^TTy=?m!`cZ#5!^qgkjYXRe;>5V zna<>9(J)jbR+N*^!km00FC=AK9%=^W%x0);H%>9*Z&7tQAn{U!gQQ%_s^?O|^mf z2Dph$!`12l)!aYM(1GKhfKO2Iqt!egBM(~5<2bn)RQ+f*&quToUg(>jiAEj3EWQ$U3%xZ{yIB@a?oDBmwrFmU-!~G+nlKExL&%{p}ckosF`}4PI0Wt zlgIhQRK{o(?#8hyS}|VUQlx(_Myj&S>F)=nuW+rk?lsKb^e{I~*#G8U!C)3a(#0tR zs|YS4SaGrbk%M#lFy5GR@Edm>WC$2h|Ce3ViQ8AN;!(Qt3f#iE}s>&9c_^Pem4JN_<4{9l3N`D%X| zgDjlR9OI^+B_Ql`ugTMp+O1k2uk;Z|>GvOWGR0z}Pn?X$dw;O+;E#y|;r*#(c<*2L z6Cqk2S8iNgw|W*!(F>ZH8&1_LiC5fioroOhXo=Tb}MJMa(8SFkGP)&?^9tZxirJ$(qRiiHm@YYHlj;7nUeH}!&;CmYdz*ZWpla{Xl|ANL1e%Gb9oeuq+M;IZtq<$cjUGlr$L7#o25gAF#O@)| zAdJzi&zigcnAI4;VDE|jczhedb&n=s{MJ5{6xp)8!o=x0%NWS-e=BngpvGM^EK=A?-V5aBy{tpg+@0V`=KPST2=Vb7aNGH_Y z0O?w-jhMCCyT3UB(gq~83HqE})mla*;?`(!^?lH5v|hUC8ZDYHx<-ro zzXHeWd5zW!k6WYduf9JC)pxA2AMKpU+@>p(hlDE}TDMpCVVoggd;w7o>GH|={O6u? z{h!156EOd^PYGB)6fYq8d;AF)`&9O2U)k)A;e1J-6P_nxjt{NX_pAlkP6Hb!%u%|# z<=?BEox%;Gj7ieyZ*=r{)R487X1WKKPs{q0huzkvR@Ju>;Na$QS0X>8+H2~pz3Hmo zxtv=$(lgKLnP=R%q?g0b=>gF60BHXQKri~0t+9wwZ1z%$6-S@#d+=+Q9a(hmCTuHd8*6WFfb zECybf9v9arFicshzh^0a%2N6i>Fyo3Uedy>bdH8z-s{aBJexK+ckq5*2e<#A-e~9% zyTfq@*99NU9Xv#LFgI3j*!SvU9;)+S39^3`kEsj(&T4d%cwZbUm+>!hKYETOPaz>E zXQhK~;m<|3`45*f8`-mcc%0}FheP9Zx2b0x+^#^kzDw3a4R((g*-_A8k9B0rePT|J zJFpLllVk@r#3@C#%Fm)Zp@+*k(i1ZSsR@IsEuM8S!3}a$8&&9sUzX^XwwcI{L|4j=uQ=W z=(}%D8pbO9S0JVHEih%;QrAdEZ?!x1FxK(0$T`JCoD0DaK%w$46^h50?+zp_K6OC8 zRsR(#`;tE_y%6*gR`}w~3FEshJs&wkd`O(58npIVPeS(XrI&NJ?N`CXO>1b|(Q814 z-fNsRjQ=43QhM|%oS{p7(hzf>u{%Sv6^vVe9hvs$D7`pKgLKYxbI$rss&Gionc+Jz z+L^`ew{eDHvSw{1_E%Sl@W!S$2>Txvt9@eaZagY;4$AunLxo6&Z_Fh_v^=id7)6iJ z8*$4R=T!c`=^n%1Qv_R5Job5;hBn!}w_D?l2FqOtPf>Ej7i2FJ{W?gVx=H)6a@Qjss zg;sb$Onp<|)Hn4VurFqe#F?8d4RhD<*v)$DRz*)tjZ{|$=NwYD2NfnERn`Vdbtnkt z!sBYQ-fF39B#nQI`Vz=O-%^`eOC>A%ZD*hRA$g4hc^axO`Qec#SMnn+*TB6V4SCsb zq90Sg9@UOHBEqWehM{fSM|7<9>Y$mW9qt#o!#z(o%pL1#hP1yuA(E&4?Fr~_pG5lG zpq0fmu#uWfr_ST)j5qH%qwlm#V=u;-{x%sZa<_RO9BYGFWE0wvaV&YJ6LRcLap({J zv6~o&$eGFHB3lv;i&Kc)nS5xR?l$$T)7PE$_AiBZX9@cs7O(Y*6b4BbCk}|EggBKD z(ek)*<5UgZ3Ec7dfOPzItRzpzUngMvb=8~T2Ip3-vt(84jp#KpOQp^1xJ#ZkvlGzF zzT(bnI<_+J8kpVvIl{XB1c!h4t^ROl>TWC+zxS4nuYY#;K=?b~+fRhQzWqreM9bsI zo%|Y@uUn%x6FP6w%eCr>?}Qop{j#UbfjK^;#9Pw7eR3-1w%q;m&aJdK{I5Uzyf6Ir z@Bb(kjcMmL-S7VAodY56R$Lr@^DmPjS{_I4kqvvEK%HWadYlqh`UP>hd1|=AzB_+Zrq?;(M@Qu5ZwL;=E3dfiSU~q zpIXV_Z@W$rMvTfY$qhqQTjWArYg!e&$BSGozSki=#PTvD3JNdOICcQMNk&|%RygH| z87ENW!!6WFf5>0AC&<-OhJJ?yutP8#Bj5WZ0wJ;InOdT@%q z#x?8PZPmI~X0J`I8~Rpo8Ll-?3+cVr=lo8~`(;U=^Go!8nbXVIs=nURZ?{Og32J)} zUA=c>6(%hDt6P`pz~JXcUbcRkzRXxSPsl1NpgK%HTTG>zP;1z_yo_n z8=IP`CL<;Vy_LCr5*YoI?o<;!s|+&_aeDykP*%_dL@L!;flggV?!uUCQPnq$`u3*Q z027-Ht1Ey_HuPH^P8!DljujoSkr`(^=MC-_4Yaiky(lLQV^#k(N82|vb5m=P`K+UL zZ<$;%%;>jo=mj}xi1~5JRy@6Ks)n!kVD^$~{;)M4t@6(AuCV6emxEjNE!CS0_}eLD zrN*xC{@Wd_xJ2{b@$B)MT$_L0_dG{)GWUiT5)joO9?dD7v=juaR1_}}n_$?0WI09* zMXUy%AYwTju^fsxW|51Y=Y&J~zJQ(=S$Rbc(SQu6EQ?H#G9Vy7ZaGXy0YNiDKuBCA zAjFyy(nWBn-AbJig~dP;ndN1AJ`|W$%AvqzQLcu_3ctXGr4EV7D&Zn15XlJvIcEE= z1QhV}DMeC%03nL=sdGTEfZ)VXINHi?38EH4l(H01mLy9M74{24p0iU4N+*PfkodMw zK(IAQ2op6=&lOgbxgh6JIxy-e9awJMD)G`nU`P-wFde8Cyze&#RsV5m0aCfN5LWqw zv~V-*ZP8(W5S(Wq?9Hl6@q&zTN?G9j zWNB%M2@>+#>jcdxVXrZV_9SP-nxII&1=yuP&{05yD^`}OLKGcRx*XWXCWDGF?o`6C zF9-VqI|t7TG15X%FF_>*wa=4nL0S&HBwAnt3aFI!Wd)T5&|L6}P(@bgN+>Q+?V{xE zL_a6f3%)tVE%gR|;}W;Xivk5D9P$QKKI=!Gu%tlfd7Fm&>PbVtJ>~V+){@iNeefUn zD*Pj|?BXrzjG3#=TWjl{Z2{#vA6ToK^{I8E?md+3=W+hld)vBU8dGcQx9Y~mE#yNp mO0zWXYL3V0wO!Q!172=dZJBNVX%*o0+5ZEBV@J-!*8l***DSRF diff --git a/inst/extdata/pharmaverseadam/adae_sample.sas7bdat b/inst/extdata/pharmaverseadam/adae_sample.sas7bdat deleted file mode 100644 index 28722160375269cf77e12bd2ce35fdac40c42677..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 69632 zcmeG_ZHybobtCB{ourc^$BN@Na=fusS4nj;dAu*#O0;|A-jOknWR^Th=NgdcJL-<< ziM;T5`UupTMU++v{KE*;51>J#qWX~p{!ySt>Y{E!6hULOZmKp0>HtQQ7AcUX3Lq3} z8?;K?_h#Pga+lmYE;q_uk|DsIeKYUP$D4U?=FQhK=KTKZd%t<+Pxs&d_ka1Bezxmt z(H9%>hx`L)+Y? zZj%2)Zf^xC4k!*N4k!*N4k!*N4k!*N4k!*N4*b|T@Za#mNK2lW{|9~^ua4mFlRfiz z$I$k8+t6U&ai`zgo?j;%<)=8X`8lu!w*NEs5KaQX@4I`2%RT8;IX`1!S=xMICNY~zutav+NM#tC$h0h<JKAk)p%QEca#xpcjxI=uUFCFpVxo$N2=VfQ3Ke@$|tTU6CBehvb-% zn6Y>{!JPp(oyg`P;b}O0bHae4Lw?NZ$u!WJCpaE(D+MrB7qccYfjGFLJl5nX?g1|M zuqIP+5uW2PFAp#RT!@9Se5zpbdY%GaO_?)!^Axj;xM>!WnH1m|)-+Zxy3B|dI4E4k z;@O!@!J1`>X-om)oKDO_ek0Kw=U*fWr+x%Kr@4y^!SW!Z&?Oj3o?!X7HHi!YKIaSI zPFg28zbqq@z!%7`f%TIHC~2k>>3qRR0}AAM%7PP4d1I272lI{|mN}KmW}*TgJb)?| z0St@>U%;I}H(y_!zoez+OO<;0Y`IiRRL>R570r0=xmxLbxmYh(D{Q7x{@7|s%dbB7 z(b8PK1sVBQf}qM=Nz1IBd#+St`Q>V5rCJ+Il+KnbrFj>!Ra&mr>fl)@)y`>Wt2M2@ zSkjF7^QGEKNi)ww9$311p|n!h(xvmIC5{JawE1cA>O2gZ66HC}U$J%xlB<=^Q_L(^ zE1fOXYBC{?oGdRaw#Mp#d5kT7Q?=^qvP{3DKjmSf3hc{YTB(=LX;7eSZJ}7v;!DMq z6~_F-1aYobJ_nhX-OHZUK^-^U8-moreKAb@I;|3N)@jzEkW+EP3P;yT3vI6x*DQcnO`Fmc+KS| zQG}wO1-4>jUbeY$Fv2zr$e#iF0P=`oljRz4%)Gcl2ln(w{>fG{bo!5dkxl~+W9vseTFHT!JF6Kfz zmFY|Wwp2Vg9a9nIvEy4+9nuRvrrtod0&D)U-w@}^kPQYP0FC*}z9mmB5w|oi#Xj4*l zgG3?WJZnR7$CIi+=W-78KWM5Iq32w2U=|>%97t_>uDs0a5app&SCH_?{Bw{PBT~o5N-J}<@-pfqo+T$3 z;+w0lqK+k0=D~G>A>Md(wNkHLa+krS@35aIebwSG^W(H9?3Y1%m#gzyBr5aZ7i_m} z{@zEs$vfgR~bi$zfxbc#<`ZxlX1FnTv4(@6}u~^7`BKW0-mhApO+&)S1UnJ zIaw?(q5j3>88sEEa?pIMLY^nfptL|S)N5rvR5?+GK3I_OxR4kt)GAiytLLcWf_iFI zB^WMsw7FDVUV(OjxzWzns^>&QN9BMW0Y^i=@OXJapRQCdz(8#YG#0k2YIO;l6*NCF zcEK@W@ocGnNy}B2S5dP;3loA(xY#sp049eV2 z)s_KI$i_(+U_dV(iwG1css-p7NuLNj<7~ZDv(cQQI*5Mpi8kZlL9y5QJ-{9Sj8|!$ zKe3I_aZ&(&xgEk?B3I4}>?2qg!S?g!cJb=Q3=i|T6$Ny%ZPy9aqIUoOhKK?hU z{B8O7@z44A@AL7$N%Jgi@%#AaeEj$M_zUj%KY#Rp!tnRU=oE6j?-$9tH~L23BcbT$ zL#Ce$NgwHZSE&3S=-VHPexOg@ebF2Lf3OW@9Dlvhe|y`z1$mG3uWfrU6#dKF?hi%( z;^(bu4AUC1OK)MzUqNL<$*s<@NP=~ng{+v5Bzz8C4c_J1Ha;dn;!Tz4}8-Dzv+R$O)xsh zpMUYd|LVd2o+tc2Jn$_K{v8i|*8}%aKP<~T;DPrOjKdAgpIm2=_?ioUD?8+YBLriz zNFQ~DZ(&D0;ZG7Q)B6y?vi_1D*z&;7df>AJ%k&orMty+!x!?)E=z%X0Eam$$!Ls~U z2*%+a#`h@?{ig{=y@BC>OfdE*2>&_3sE!c+3c=XlBK!?c_%}V_-zQk+?>51bU;j(6 z%=y%j8}mycfv5M($_Hy+Q6vg#R+RuTs2UCHQr6-=KWFMecXV{}$ojq4@5S+qX^d zV_=)$&whdrkjpEh%f}HuwrbJpuRZ zz}wOT`G2kdouSJAQvZ8; zq`%sais!}taPvjW{o&@@PW6YIuRPW-=V^MAKdoo}_4lJf8R+kIzVG%+JHySF&hLDz$M!48^q%Q8p8rET<$2Md?WdpD-@wjZ>;KLUtp6YE zpkZ9#`Rdm>{mmV{>i-*@{_{J+@xQSnT>E=AWctT;yf;+)Ij}Dpm8Bpz3LB<{=w~JMg*3B zV%zp`@^`zhSN$W>f2%Lt_~li5y_L$dhVwXk972`-Pb^aQKWBup|3!P)DEpsUqjD4n zeyTap+xX>`z1Sz=uQ&P^L((tq{UwonkLl}s!;QZV?+rKpd?+M6+Z%5Dd~2ZB@ypEt zwBOzs2seN6(m=TJ_qCqWzsU1{xo7%Qoc`H?aPvorfnM9sVV-`ZXL=zv0|Vja|GvAY z*YWo&dvN}wxhI_dxVEQP`)8h~zqBWu{3rH=v%e4S3CI6|J>l%{JG;Zt-`c%1l>WWB zJKX$bb9XrV^~UaS@_TJ}IQ#$V?r`?+mEEX?@z-1b+t@9)Z1hH751Ievkm;Y=9j^aL z?C!Py6I_pkq#xKV^{0q$jVJBYh{Fl#crKOB!qHsAj6?^=BEy5x*icN6-ac+d#!jRx zBdw(}lV-dC2WQua6oZ0c^T__;2haCL1?S06N5*0zvk(iOCqE4z1ss>r+L_L4a9Eg^ z0S*UaDe@e}_W_ZGp`A{p6Bu`fWwIF)-z+#1e*zAyC!Vn2*m?Sd$-_BLhW~~l(b!;Q zd@wRB-o^%F5!;WdV@yW^#-7sg*?jOD!hKk{3^I-WjrVf+%$K9zG*}Ywj?c%$8?4hG z5V*ReVeU9#hnqeR=WHMvPHz{4iZP1`Fn0U+EH)CE)5y4;Q+gYNqS$^TjK4-BgK%n! ze$dXu+eh?S)3EZBCzEj8T!yx~nQTEzrt&!h4xMXROG`ujWwNJ=mDA}eJmFOC5*(-2 z;3PCZki+~#X7Yx}(1VeANZz$41IfGn7?2gAJp-Qyhl;WW*~Dj zRJl|ivYur9NVjR%LBBqGauLqVVMG}NpX=(F_0>(3NX<4dEJ7XB^|BXq3OE?gnJ5JVEl=eTTmj#it9O#i?X>c;2;{9cG*kk{XjN2Yla0jWCO=SvZ9_BGy4GxjA?-L>q zL+jWXmN}ipdEhSPGCCN=c{whZkum#C6o_fM$nc}wwe_%1fBE&Z)L%Y-VZrV%6QaLt zk_(-HD^7^t?k|yE7DN`KWnND8m%uExF>;CXa%1MHc-Df!;QDJ=m{DT_=DycoC_+^E z10e{O1}BtqITo|UP>f?oqGFH&2&v46XRO)7S}qH7;~AJPDwXA{XS8k3C@6 zhIB@ zN^iEU|NC3{{vkg*_Wwg5m*c_K0M$ApjD;MzloMKAj%Q$N4vlA=H)mchC&nIUx|^4a zUm`o>c+0$;CB`w%oRB+C)_6{iXPQVa3nGi5> zspiXF`&h?elivt!^TU0tM-!=Mo=IiQe0Ljjf?@OVJ#;X=QHgb|s95EJScLsQHe%cV zu=;-jcgVq4lHi*^0hY2LvM{ucT*?WpZu(2Uk2PxV|CDb>VPR3&K~ti?)IY-A=DS4S z*LaL|-5q_Oa3) zKg=m#$AT3txgU2NR_4S$)(8gUI#!ggV~IJ9=1gzvSpK_J{X^t9HUdjRV(pVfgY09y ze)#=j9qV&{^psr3VrgC%xHjdxkCp04u3t&lsRP)T<5|bTdQ`tn$N_)*zg2JIdWO=Q z!F8@wHyK#R=zcu|^roYEpc+VnZc4og`;3*|M7h2F2#DrPfjjDJ;!2V=nl)9W@JQ2nUQ|lNQ1ed4XDlam$1k zdy5dPGp3*BD4J!YTmhnKb$Wsj1WSWQBIR-vhpq7AFzz!Ra?a(XGAD55f2wUAkyEVk zfXKqoI{R|udATm_jFG|UkQk}snW9=Gj`1zrcpH^2RIvYu?-Qf=7v z)?uG@ESoqBFM4*1&tF1?BSrj*_cen#0=H-XmwK?i}3B&&au>rJpTl->lX zzDIgI(FGussHx45)nmO41e=&|F zTN*~Rw5kphGBh0%*Lg`BN$#%GUp9{HA^ZRBN8e#fDB1nZEhL}J9VcUlr2Wt7?Si=H zp~P&Os`@NkW4bTKoNf{xNGKOXy^E*O=?AUi{iXYTQO z#yWDTDf@q;x>=b686hQ$6)6K{&2oLmY>bUv$^%Ry$O4Bq zbnuNEf2-6@Xqncn^X`i$VQtS5WU6L7H}sh;4i{a#KCSHDfj0e}0yRd3>Y z#;`F3*Z*~~V@j0L-r}frO$Y6$Yg~twK*jZD)T=k8)(3=*y8lnA8|OS@cXo!Oc~Dz= z*E860ofluq-P(Hd`pVPxY$;KI-jw>%_HoB)%QkGFLcJ>s;@M~159^FdZ}Q5Zeh2vZ z^U-lrEQM!+okJNdIp47QRW7~D)viWx@l1n};#uGXq zmt<$yvdr9Z0*bUVIK3>0r@vHkd5`Ukb^1%bk98uOnAPC;JV>Wefb(yiPUDF7g3ne& zGi{#eVgw8V6f6y%8c^~6ax7+V_ZRE`G3We$n6ouuXj|sXd+#qdgaqDa(4GCy*Z=u=rt5>o!LT`FPy^z4rt36pDm-UAM7QH{J&NysYDtsxg3pn&NGfV69Qq@B*M_j>8#l<_vOg-47h1$ zh*k+k;h>;cFMv1CenxSgvC$`N`6hj9Q_iW;_y4Y6e=pe?kNoE&a)z$Cm+b%hgbQDQ z)HydBB2LEkNjpQId(fGelgBd2{>SwUXe1(k!}fVb_@)@0DsuTn={zF_i+1I#XTV(s zJ%5)oS#J5LX0p7n$Un@9_Zf8#vG?V`4pD00M_Si2TBDHP<+S}E^ot6#za)kZV!rKq(l3(bqBbWLY-Xo<^wgjNrL-Ghb z=W(@sSF$y3c;|8{3*uR46z6i3TmnNkL@r_dKN?FK>GVt-K5Kz%S%*bu8JOgU-wO^_R5%FZ(v(t|OOD ze|b99)?c!={*vN$=H=XS>9+rsT;AjUvV&kLug$=LHSK@f&fxc^OoGCkn(21mR4{C* z@p_{YHW5d5QCL45j>x@b+g_3A}c0QNmZ{_=k{Os8OxXw5p z%r*~(E#^0Y^?`$YO9@ zc))%+^<7S6hWh@WY!W{o?v|~*oa^XKJZC%!kPJ>5Q@VZZJf&uAZSw#MlN9J#Te;}iMjcb~DWC!4o`izI8^Gmhcvr#PTEpg5p7pg5p7pg6D@Iq?7O CR6)D| diff --git a/inst/extdata/pharmaverseadam/adsl_sample.rds b/inst/extdata/pharmaverseadam/adsl_sample.rds deleted file mode 100644 index 35fe401dadf7e9f59eb1dd032dcb23efcaf2e934..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2201 zcmV;K2xj*miwFP!000001MOPNbKAxlheS}6oQKuAPR5>TjihcK)1eB86eT6o4q+0M zm{24GfU$gNRhV2#$dCXFg0_`It?8wm&h(N)ubF8tJ@%MOr@VIBu`d1tdg`^kw9}K_ zT>uO00t-rt8abW>Gve;|*v0RA>=W9(#Bp4POEk&X5R!3`sZiLYSW;e+ifbfG5)q5mD&-}yC{bD)P-|VJSg^0vO`27NZ(LuCH!Q4E zeTj&*R-jm@;cCG*f9`3_;%gnt{MiHu+T`v>pzF>yohNMO31fM}N}e!}C+y-0gLuLc zo-lzYu=6uOGtdfs+Y#IYXb6fR1N)^_QCfvw&7)qqrKj)-%SfYuGzt>ML9s3|bw$`) ziRKg&oOQ@da5hpbdktHrfi+NO)!fKc$kXwql z8&Q?eEwU6uqFPMBc-0?Hn(2b%+WRtsS=P%OddiI>ztpCF+2;IG56UK%2jvGryH240 z0T(<>AQ^d|H3etyD6*>(JK77!ZteW7ZP&(!`~MEh-X z|A6dZM&C>k+aK4fVBB@g5n4>W)irX@^Ay(0a@U<0gRi3+%$BjJM&4lswWG-wQDsBT zRqTo7I;=?~JR-%mjA+vt+eAv}NEF|yCJzVc+u7& zVh;vz(-v)9y+%eNT8846J$7`Sbl#$2s<3IkQ3~Tb^-+XdJ z$mPfHJ(+s^pN#(Qm6H{pzIRj(^EkTIbU(H&5QCTL=#>=YZ=#{+QdbwsgONRk&Jv zj*PRuH)TcU-=965B9H&}!KYI)GQa-kLy!OSKYi+zzx@55z4GYtjVX@IeB#`Y9*j50 zV0d>eGh&g!t-q}9e@$mvlH&dHqd%au=N|qy@P1I1kGo!$myi94Jmf$6i_d@bk0W2c z6zILcapAZ~fch?S@u_k>lQlpH> zGlGFQxJuaP*oZ$W`-tA9<>iBbzkB4D4PRdVAvX*04c&IB*|0isyqhTDEx53b-zJ@i zM^pLut-u-nHAj3DmXCkT=pxrRF#v$7(s zEFb@h8Im(*#r2wL!9-Po>zLpk7ziJlW79oba^QI@fiO02VgBcvx31zzKsBtMJv2WK zZgx)xGCtnVTc&_~34;tSd{3FZ7x>X21m`b9{-i|Q3tyzSuBu=F0o()lv_W2zUnG#5 z%D@m8c7L5mx=Wi&FBO7P-xqu$sjSjNI==!2c`>7B$k@CmA`)~)w6e! zvN*MEdjN+6x(^q&9D6bnSO?t9k4pHfPgdH53f z1?+vt7#)~hxW4YAM4+Kv%&h-rawZe<{zQjoy)7s1wTprFJs-_&(>*XPp&DMmCYhwAkTC2o`Ag{ryU0qecO%sI^d-Ra!(lNN9Q} zs;M7FB2{iyqgGxdsIb%sN)GK^Y`{j_N+ZkM1dg0kSd5_SBnU7jK1a3^x#->?E5QtfV>3%=8xbH9ESf% zUFCy+tY~04#(y=wk6466WqIlyA0F&>n3WtH1R2iQikzHlPd|Y%gQO0rt$H zm>DzYV$jB+oy?Xh+fN5{G^r8L2xtT}0vZ90fJQ(gpb^jrXaqC@8Uc-fMnEH=5zq)| z1T+E~0gZr0KqH_L&3_qRh*xt)*=35M6Zxt znZC2}h(5{0lCJYQ!PqIHpCkG->760Fvm`%9e!oxh3)J6Bq}L$(9g@FH?Hi=`J8Ive z_HAn4A-#*#Mh3+A_Y8<}?;jAQ=>XBAMBhbpoapCS#X}uI4N+)tRpG>74oU0EN3NCDQ9**1A10& zE(v06nU!=nDO$#o3x#MaFn*@iuQeb4cf%xiq#ZqEkeJoa+If zw{{^1<4fk!NT(LK7~8x9bUOD4vLVKK8!z~HfaYzQx0b!cSQezSDSl2`g*=xp%yD`0 zNX}&0yklj-Ro+T-9B|CY2Mn-bxzv%&!aV4qAK<`wJO-N3T6h8NVQll>_R0w(v*K2~ z(rU@A6)L6AZ@Wf*`-vyrWv@kmSa&du)oRV~)A8QH~}XV^v0z1t~m zxGg@KI;qM^i-xajR9B63skY?_--c6ji{1u=2^dI!sFN*rQ)$=9R3Od`cg09nD_h(} zyBtFRSBnsAX}Ppn^h(tV=A^YB8@gX_V!%lwT`sNxk7JlGd2Tyi)|$)UX8@+&7LMJ0 zn&pbiZpvG)KtJ9I;4PMml?p^=)ixly71wsRmTRR=jDUyK6vG@|_O^>r ztZrAl+KKitB&+2zWEDpzVz!GXu$LCTjK_zYr+Cq=m8vU74Ca@JK_zDJEo92c%%u=J z4^ocqMH*92Z&tUqYp&r{Tat;>qKi&uV{_X(MEG{|$D=XEF(lh>CZjsSc=5qtbFXl4G zN5m|qHOskWpr67>`81Jssd9013+612%vh~eH^d^et6My|T+NS#gZ5CRdc0y-WpIY; zpt$OKCyd4F<~DW!GhKAJSgn@9EiMt`NUgfPX_$<7xQrx=9dWB`wc_S_X&Hc6Lwl%m zSO(f6Frp$r8SRl8`V(V)wB)UqD#mzBr~n4s-hpC8V;6eXs^`{xwdPQpiYk9%&Nub` z2?q?FP2#EL_!k^4r1J9Hv`_l5LL=z6Lk+$=?smw1m~6Z0Xr4yk%12<7jY5#8=sEu+ zJ-^H6cANN#z(67j{?0W*F{pn^!M7Fsh=RXE3%SF;f^RGM5e0weYDN4CzOCR#6#O0W zkik0QSMY5GKce98(6dB`e+A!G@FNQTj(EuGj9Tzat*rI{hp7wt^o~@OSWK z7aR$GZk|pl__l%{QSf(Y{RI6td)ocgd;b=9nM<{d4`XoC!`G6*ZtV|ndt-QCpZ3D+ zV&s-S?cd!%3& z^x#dQ;$IKhzdhLR{YQj7IvB40v5@iqaljCP^tpa^2Ex^^9}md-agiR9*X4qwxbP7o z!Uh>VN-*5=BQ5Jbu79LIMf$j9NBZ^veRqJK3D8NR@l^oY+eBl0NaK5DJfyrJNS}U$ zXbd0qmx#tKcm%YHMB|nd<*P(Xe``b|o+z&qeFM=i5H0)-vhNVRpX5Ib$bTA;|B7fC z->XDRynfqeKfwMJu$M1VWPjIS;Bd(C?IT)_XMaF`ARxafAiq68M+5XkfWALKr-+vG z<>7#QDL@wkbctwe5dS9w@>+m?CZPX?0DUq*e=%VHm4N)KMC1Js`~Moza=x4)dXVHV z1nj>NpuZWQzZIat%`T384EOmB+H52OmURU`q2MzGzwuGwxWm7KpHT3bg5UUv!oPx_ zQ1F?8FTaE6h+pv@B%$ETVi)w&P`vM`EBFZoU!b7R8Uc-fMnEH=5zq)|1T+HuBGBKv zqjyHaz59JRr2UH{@)^0m_)m_6dw0A(67GKo&Pcd-SBa4Q9~{AFb3As7-<}bCYq57E z+`Bv8H9zgP|E8t5`e5w;GCWFw^w<9@!{P9|Fx>CE!?XNu%UuXFoTeaC-{ z+pi3Vdv~Ap|92Oh4;}P?cuSwOGx_9VCc6NiamLK(RBR?{j*rjaZ!BhxMd5F39InQr zvB~IIyc>@-#f}-f$BZ!sZg__YC$W^Wa{0MOXW_fkju?P3zu*|Nnf#&!U!NKaIU{R1 znfZmqC#qZ-zOGw#=~`O%k`?lnjL@7A21cW+lP-=yKk@uu-?V1ot;pvl?)VK}FjoKb zYYA)<`(EcQV{4*)_1$MBKe;fsm`%ZlZ^*!dWYSjBzM#qAewQ9yhmRrA@PWxVU}E8A zM`p~ZX~rk;hvG{*L}nnR^3^D020nI$qn;I+N$)Z<@J(K26+Xyo|29tq(@Q2Y1G8u* z8WZsYMhYh_AN>i({7jfOG4w%{5{dfllqtz?aiX$mdTlYnOFuK_baXP_Ju_wuGc$v~ z)~T+bJ*P5A(nz6WvKMD9aUd5xa zspuG1xN)p-Exc*0W+ZmZ@q2KwO~nK*Y>_f^IBO*z!fIuubD5+yZ&-6FesMXf8IN#G ztXej@n`&m%S9lR?{NX1*pla4QOc^*%ZQ{v<%sI9R;H#>cZ|g4+c_CY2zcgR1M2}-d z>$PU4Va-4>f*;QU-ghQNKFny0?l?^ztgtc44B+7xz$tT@N6+X!v)gKRv{-pO3%43* zs~c{)RB?@L^*H}77I{jUU(KeYV-(C}L*b;uF+_aqc}ryG*{2?)%p?wqj5H2WX4t(6 zJ~je35q~{__v(dtxIq@gttL?ry3*FKZvI6}X2AZE?lV{i#%AE&KO^dq0J!&AE~GKv zWXd$TNqznUs%Efn5y1cim#3Ps1ZAoTauX%V3$|S{!&#SfpNYfXY-$33V$Hx9#hSsp z>Qpx#?afe()ocQm%ajRQ3^?k2-e-F4zk131HG?}!0n}Bysb=Rcz9;r(kN^DvSso&gL&13?kt)#o{&Cu^9WhRE1nHa~P$ih@KhEFkkCu-8Yr^$nQ&UXML z#x;X?n<+|CqtM?Dpx3>mm(0%$>|>_HG%;H<(|WU#nelm`f9|e3MP|PHy?f=Fxn5+X zej8 Date: Wed, 6 Nov 2024 16:19:19 +0800 Subject: [PATCH 27/45] Remove comments inside functions --- R/dvloader.R | 12 ------------ R/utils.R | 14 -------------- 2 files changed, 26 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index 139ea8c..91b7f34 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -19,21 +19,16 @@ #' #' @keywords internal get_base_dir <- function(env_var) { - # Ensure env_var is a single character string checkmate::assert_character(env_var, len = 1) - # Get the value of the environment variable base_dir <- Sys.getenv(env_var) - # Stop if the environment variable is not set if (base_dir == "") { stop("Environment variable ", env_var, " is not set") } - # Ensure the directory exists checkmate::assert_directory_exists(base_dir) - # Return the normalized path return(normalizePath(base_dir)) } @@ -100,36 +95,29 @@ load_data <- function( prefer_sas = FALSE, env_var = "RXD_DATA", print_file_paths = FALSE) { - # Input validation checkmate::assert_character(sub_dir, len = 1, null.ok = TRUE) checkmate::assert_character(file_names, min.len = 1) checkmate::assert_logical(use_wd, len = 1) checkmate::assert_logical(prefer_sas, len = 1) checkmate::assert_character(env_var, len = 1) - # Determine the base directory if (use_wd) { base_dir <- getwd() } else { base_dir <- get_base_dir(env_var = env_var) } - # Construct the full directory path dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) - # Determine the file extension based on preference file_ext <- if (prefer_sas) "sas7bdat" else "rds" - # Get the full file paths file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) - # Print the directory path and file names if requested if (isTRUE(print_file_paths)) { cat("Loading data from", dir_path, "\n") cat("Loading data file(s):", basename(file_paths), "\n") } - # Load the data files data_list <- load_data_files(file_paths) names(data_list) <- file_names diff --git a/R/utils.R b/R/utils.R index 17c4e12..a856cb7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -28,7 +28,6 @@ #' #' @export get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { - # Input validation checkmate::assert_character(dir_path, len = 1) checkmate::assert_character(file_names, min.len = 1) checkmate::assert_logical(prefer_sas, len = 1) @@ -38,10 +37,8 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { file_ext <- tools::file_ext(file_name) if (file_ext == "") { - # Get all files in the directory candidates <- basename(list.files(dir_path)) - # Find matching RDS files rds_match <- grep( pattern = paste0("^", file_name, "\\.rds$"), x = candidates, @@ -49,7 +46,6 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { value = TRUE ) - # Find matching SAS files sas_match <- grep( pattern = paste0("^", file_name, "\\.sas7bdat$"), x = candidates, @@ -57,7 +53,6 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { value = TRUE ) - # Prefer SAS file if it exists, otherwise use RDS if (isTRUE(prefer_sas)) { if (length(sas_match) > 0) { return(file.path(dir_path, sas_match[1])) @@ -76,7 +71,6 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { } } } else { - # If an extension is provided, use the exact file name if (file.exists(file_path)) { return(file_path) } else { @@ -85,7 +79,6 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { } }) - # Normalize all file paths return(normalizePath(unlist(file_paths))) } @@ -107,16 +100,12 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { #' #' @export load_data_files <- function(file_paths) { - # Validate input parameters checkmate::assert_character(file_paths, min.len = 1) checkmate::assert_file_exists(file_paths) - # Read each file and store in a list data_list <- lapply(file_paths, function(file_path) { - # Get file extension extension <- tools::file_ext(file_path) - # Read file based on its extension if (tolower(extension) == "rds") { data <- readRDS(file_path) } else if (tolower(extension) == "sas7bdat") { @@ -125,19 +114,16 @@ load_data_files <- function(file_paths) { stop("Unsupported file extension: ", extension) } - # Get file metadata meta <- file.info(file_path, extra_cols = FALSE) meta[["path"]] <- file_path meta[["file_name"]] <- basename(file_path) - # Add metadata as an attribute to the data rownames(data) <- NULL attr(data, "meta") <- meta return(data) }) - # Set names of the list elements to the basenames of the file paths names(data_list) <- basename(file_paths) return(data_list) From 19ccc3d7fc389ab2e975a69225fb62d6d31a8ded Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 6 Nov 2024 16:53:38 +0800 Subject: [PATCH 28/45] Remvoe get_base_dir() and keep get_nfs_path() --- R/dvloader.R | 65 +++++++++++++--------------------------------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index 91b7f34..a539b16 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -1,62 +1,32 @@ -#' Get Base Directory Path +#' Get NFS Base Path from an Environment Variable #' -#' This function retrieves the base directory path from a specified environment variable. -#' It checks if the environment variable is set and if the directory exists. +#' This function assumes that there is an environment variable called `RXD_DATA` +#' which is set to the base path of the NFS directory. #' -#' @param env_var [character(1)] The name of the environment variable containing the base directory path. +#' @return [character(1)] The normalized path to the NFS directory. #' -#' @return [character(1)] The normalized path to the base directory. -#' -#' @examples -#' # Create a temporary directory -#' temp_dir <- tempdir() -#' -#' # Set the BASE_DIR environment variable -#' Sys.setenv(BASE_DIR = temp_dir) -#' -#' # Get the base directory path -#' dv.loader:::get_base_dir("BASE_DIR") -#' -#' @keywords internal -get_base_dir <- function(env_var) { - checkmate::assert_character(env_var, len = 1) - - base_dir <- Sys.getenv(env_var) +#' @export +get_nfs_path <- function() { + base_path <- Sys.getenv("RXD_DATA") - if (base_dir == "") { - stop("Environment variable ", env_var, " is not set") + if (base_path == "") { + stop("Environment variable RXD_DATA must be set") } - checkmate::assert_directory_exists(base_dir) + checkmate::assert_directory_exists(base_path) - return(normalizePath(base_dir)) -} - -#' Get NFS Path -#' -#' This function retrieves the path to the NFS (Network File System) directory. -#' -#' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". -#' -#' @return [character(1)] The path to the NFS directory. -#' -#' @export -get_nfs_path <- function(env_var = "RXD_DATA") { - get_base_dir(env_var = env_var) + return(normalizePath(base_path)) } -#' Get CRE Path +#' Get CRE Base Path from an Environment Variable #' -#' This function retrieves the path to the CRE (Clinical Research Environment) directory. -#' It uses the "RXD_DATA" environment variable as the base directory. +#' This function is an alias for `get_nfs_path()` to maintain backwards compatibility. #' -#' @return [character(1)] The path to the CRE directory. +#' @return [character(1)] The normalized path to the CRE directory. #' #' @export -get_cre_path <- function() { - get_base_dir(env_var = "RXD_DATA") -} +get_cre_path <- get_nfs_path #' Load Data Files @@ -68,7 +38,6 @@ get_cre_path <- function() { #' @param file_names [character(1+)] Character vector of file names to load (without extension). #' @param use_wd [logical(1)] Logical indicating whether to use the current working directory. Default is FALSE. #' @param prefer_sas [logical(1)] Logical indicating whether to prefer SAS7BDAT files over RDS. Default is FALSE. -#' @param env_var [character(1)] The environment variable name for the base directory. Default is "RXD_DATA". #' @param print_file_paths [logical(1)] Logical indicating whether to print the directory path and file names. #' Default is FALSE. #' @@ -93,18 +62,16 @@ load_data <- function( file_names, use_wd = FALSE, prefer_sas = FALSE, - env_var = "RXD_DATA", print_file_paths = FALSE) { checkmate::assert_character(sub_dir, len = 1, null.ok = TRUE) checkmate::assert_character(file_names, min.len = 1) checkmate::assert_logical(use_wd, len = 1) checkmate::assert_logical(prefer_sas, len = 1) - checkmate::assert_character(env_var, len = 1) if (use_wd) { base_dir <- getwd() } else { - base_dir <- get_base_dir(env_var = env_var) + base_dir <- get_nfs_path() } dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) From d164c100f7fe40ec005f787c7e6e2bed87358fce Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 6 Nov 2024 17:41:54 +0800 Subject: [PATCH 29/45] Update function docs --- R/dvloader.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index a539b16..de19181 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -1,9 +1,9 @@ -#' Get NFS Base Path from an Environment Variable +#' Get Base Path from an Environment Variable #' #' This function assumes that there is an environment variable called `RXD_DATA` -#' which is set to the base path of the NFS directory. +#' which is set to the base path of the data directory. #' -#' @return [character(1)] The normalized path to the NFS directory. +#' @return [character(1)] The normalized base path. #' #' @export get_nfs_path <- function() { @@ -18,12 +18,11 @@ get_nfs_path <- function() { return(normalizePath(base_path)) } - -#' Get CRE Base Path from an Environment Variable +#' Get Base Path from an Environment Variable #' #' This function is an alias for `get_nfs_path()` to maintain backwards compatibility. #' -#' @return [character(1)] The normalized path to the CRE directory. +#' @return [character(1)] The normalized base path. #' #' @export get_cre_path <- get_nfs_path From 912957f270779303963d69e8658e8e3ca1b311a9 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 6 Nov 2024 17:43:08 +0800 Subject: [PATCH 30/45] Update examples in README --- README.md | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index ab3eadc..c90a943 100644 --- a/README.md +++ b/README.md @@ -25,50 +25,30 @@ remotes::install_github("Boehringer-Ingelheim/dv.loader") ## Examples -The main function is `dv.loader::load_data()`, which loads data files from sub-directories of a network file system (NFS) or the working directory. +The `dv.loader` package provides two main functions for loading data: -### Example 1: Sub-directory of Network File System (NFS) +1. `load_data_files()`: A flexible function that loads multiple data files from any specified file paths, regardless of whether they are in the same directory or not. -To load data files from a NFS, you need to set the NFS path as an environment variable. By default, the environment variable name is `RXD_DATA`. +2. `load_data()`: A convenience wrapper around `load_data_files()` that simplifies loading multiple files from a single sub-directory of a base path. -You can run the following command to check the NFS path if it is already set. +### Example 1: `load_data_files()` ```r -# Check the NFS path -dv.loader::get_nfs_path() -``` - -If the NFS path is not set, you can set it by running the following command. - -```r -# Set the NFS path as an environment variable -Sys.setenv(RXD_DATA = "path/to/network-file-system") -``` - -The environment variable setup is not needed if you have already set the NFS path as an environment variable in your `.Renviron` or `.Rprofile` file. - -If the NFS path has been properly set, you can load data files from the NFS and its sub-directories. - -```r -# Load data files from the specified sub-directory of a network file system (NFS) -dv.loader::load_data( - sub_dir = "sub-directory/of/network-file-system", - file_names = c("adsl.sas7bdat", "adae.sas7bdat") +# Load data files from the specified file paths +dv.loader::load_data_files( + file_paths = c("path/to/adsl.sas7bdat", "path/to/adae.sas7bdat") ) ``` -For the `file_names` argument, it is recommended to use the full file names including the file extension. - -### Example 2: Sub-directory of the Working Directory +### Example 2: `load_data()` -To load data files from a local directory, there is no need to set an environment variable. You use `use_wd = TRUE` to indicate that the data files are loaded from a sub-directory of the working directory. +In order to use `load_data()`, you need to set the base path as an environment variable called `RXD_DATA`. ```r -# Load data files from the specified sub-directory of the working directory +# Load data files from the specified sub-directory of a base path dv.loader::load_data( - sub_dir = "sub-directory/of/working-directory", - file_names = c("adsl.sas7bdat", "adae.sas7bdat"), - use_wd = TRUE + sub_dir = "sub-directory/of/base-path", + file_names = c("adsl.sas7bdat", "adae.sas7bdat") ) ``` From 52d058bd1817e160e90858ece0849cfebd4bc0b2 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 6 Nov 2024 17:43:32 +0800 Subject: [PATCH 31/45] Update example in vignettes --- vignettes/integration-guide.Rmd | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/vignettes/integration-guide.Rmd b/vignettes/integration-guide.Rmd index 99cc47f..d7d36f5 100644 --- a/vignettes/integration-guide.Rmd +++ b/vignettes/integration-guide.Rmd @@ -45,14 +45,9 @@ haven::write_sas(pharmaverseadam::adae, file.path(data_dir, "adae.sas7bdat")) ## Step 1: Load data files ```{r, eval=FALSE} -# Set the NFS path as an environment variable -Sys.setenv(RXD_DATA = temp_dir) - -# Load data files from the specified sub-directory of a network file system (NFS) -data_list <- dv.loader::load_data( - sub_dir = "pharmaverseadam", - file_names = c("adsl", "adae"), - print_file_paths = TRUE +# Load data files from the specified file paths +data_list <- dv.loader::load_data_files( + file_paths = file.path(data_dir, c("adsl.sas7bdat", "adae.sas7bdat")) ) ``` @@ -63,7 +58,7 @@ data_list <- dv.loader::load_data( module_list <- list( "Data Listings" = dv.listings::mod_listings( module_id = "data_listings", - dataset_names = c("adsl", "adae") + dataset_names = c("adsl.sas7bdat", "adae.sas7bdat") ) ) ``` @@ -75,7 +70,7 @@ module_list <- list( dv.manager::run_app( data = list("Demo Data" = data_list), module_list = module_list, - filter_data = "adsl", + filter_data = "adsl.sas7bdat", title = "DaVinci Application" ) ``` From c06a26a9b90826be9c98612d2f9d74c45a5d81f0 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 6 Nov 2024 17:44:44 +0800 Subject: [PATCH 32/45] Update R document files --- man/get_base_dir.Rd | 30 ------------------------------ man/get_cre_path.Rd | 7 +++---- man/get_nfs_path.Rd | 12 +++++------- man/load_data.Rd | 3 --- 4 files changed, 8 insertions(+), 44 deletions(-) delete mode 100644 man/get_base_dir.Rd diff --git a/man/get_base_dir.Rd b/man/get_base_dir.Rd deleted file mode 100644 index 891d825..0000000 --- a/man/get_base_dir.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dvloader.R -\name{get_base_dir} -\alias{get_base_dir} -\title{Get Base Directory Path} -\usage{ -get_base_dir(env_var) -} -\arguments{ -\item{env_var}{[character(1)] The name of the environment variable containing the base directory path.} -} -\value{ -[character(1)] The normalized path to the base directory. -} -\description{ -This function retrieves the base directory path from a specified environment variable. -It checks if the environment variable is set and if the directory exists. -} -\examples{ -# Create a temporary directory -temp_dir <- tempdir() - -# Set the BASE_DIR environment variable -Sys.setenv(BASE_DIR = temp_dir) - -# Get the base directory path -dv.loader:::get_base_dir("BASE_DIR") - -} -\keyword{internal} diff --git a/man/get_cre_path.Rd b/man/get_cre_path.Rd index 03686b2..897a61c 100644 --- a/man/get_cre_path.Rd +++ b/man/get_cre_path.Rd @@ -2,14 +2,13 @@ % Please edit documentation in R/dvloader.R \name{get_cre_path} \alias{get_cre_path} -\title{Get CRE Path} +\title{Get Base Path from an Environment Variable} \usage{ get_cre_path() } \value{ -[character(1)] The path to the CRE directory. +[character(1)] The normalized base path. } \description{ -This function retrieves the path to the CRE (Clinical Research Environment) directory. -It uses the "RXD_DATA" environment variable as the base directory. +This function is an alias for `get_nfs_path()` to maintain backwards compatibility. } diff --git a/man/get_nfs_path.Rd b/man/get_nfs_path.Rd index 3c4ba73..516256b 100644 --- a/man/get_nfs_path.Rd +++ b/man/get_nfs_path.Rd @@ -2,16 +2,14 @@ % Please edit documentation in R/dvloader.R \name{get_nfs_path} \alias{get_nfs_path} -\title{Get NFS Path} +\title{Get Base Path from an Environment Variable} \usage{ -get_nfs_path(env_var = "RXD_DATA") -} -\arguments{ -\item{env_var}{[character(1)] The environment variable name for the base directory. Default is "RXD_DATA".} +get_nfs_path() } \value{ -[character(1)] The path to the NFS directory. +[character(1)] The normalized base path. } \description{ -This function retrieves the path to the NFS (Network File System) directory. +This function assumes that there is an environment variable called `RXD_DATA` +which is set to the base path of the data directory. } diff --git a/man/load_data.Rd b/man/load_data.Rd index 9659710..2342a61 100644 --- a/man/load_data.Rd +++ b/man/load_data.Rd @@ -9,7 +9,6 @@ load_data( file_names, use_wd = FALSE, prefer_sas = FALSE, - env_var = "RXD_DATA", print_file_paths = FALSE ) } @@ -22,8 +21,6 @@ load_data( \item{prefer_sas}{[logical(1)] Logical indicating whether to prefer SAS7BDAT files over RDS. Default is FALSE.} -\item{env_var}{[character(1)] The environment variable name for the base directory. Default is "RXD_DATA".} - \item{print_file_paths}{[logical(1)] Logical indicating whether to print the directory path and file names. Default is FALSE.} } From b8d3bc3a18953e67d975569c06d7c7164268cba3 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 6 Nov 2024 17:56:38 +0800 Subject: [PATCH 33/45] Fix styler issues --- R/dvloader.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index de19181..6ae0044 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -1,6 +1,6 @@ #' Get Base Path from an Environment Variable #' -#' This function assumes that there is an environment variable called `RXD_DATA` +#' This function assumes that there is an environment variable called `RXD_DATA` #' which is set to the base path of the data directory. #' #' @return [character(1)] The normalized base path. @@ -20,7 +20,7 @@ get_nfs_path <- function() { #' Get Base Path from an Environment Variable #' -#' This function is an alias for `get_nfs_path()` to maintain backwards compatibility. +#' This function is an alias for `get_nfs_path()` to maintain backwards compatibility. #' #' @return [character(1)] The normalized base path. #' From abe10d67aca9bcabab78944b98c5a5f7c448614d Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 13 Nov 2024 14:09:02 +0800 Subject: [PATCH 34/45] Check argument print_file_paths --- R/dvloader.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/dvloader.R b/R/dvloader.R index 6ae0044..82327c6 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -66,6 +66,7 @@ load_data <- function( checkmate::assert_character(file_names, min.len = 1) checkmate::assert_logical(use_wd, len = 1) checkmate::assert_logical(prefer_sas, len = 1) + checkmate::assert_logical(print_file_paths, len = 1) if (use_wd) { base_dir <- getwd() From cf48a948f09bae10c5bd80bf5791a88dda816db0 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 13 Nov 2024 14:10:42 +0800 Subject: [PATCH 35/45] Remove unused code --- R/dvloader.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index 82327c6..c032317 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -76,8 +76,6 @@ load_data <- function( dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) - file_ext <- if (prefer_sas) "sas7bdat" else "rds" - file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) if (isTRUE(print_file_paths)) { From 6dc6e0ea18920904284ac1956130a3d29a9f10b7 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 13 Nov 2024 14:25:46 +0800 Subject: [PATCH 36/45] Updated changelog --- NEWS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index e3df5dc..fbc1679 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,10 @@ # dv.loader 2.1.0 -- Refactored code to improve readability and maintainability. +- Introduced new `load_data_files()` function that provides flexible loading of data files from any file paths. -- Fixed issue of partial matching when the `file_names` argument contains no file extensions. +- Updated `load_data()` function to serve as a convenience wrapper around `load_data_files()` for loading files from a single directory. -- Added arguments `env_var` and `print_file_paths` in `load_data()` function to provide more flexibility and control. +- Added `print_file_paths` option to `load_data()` to display the full paths of loaded files, helping users identify potential file name matching issues. # dv.loader 2.0.0 From a14c5003b5438ad00169c6a6403a59920030f20d Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 13 Nov 2024 14:37:19 +0800 Subject: [PATCH 37/45] Update Rd file --- man/get_nfs_path.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/get_nfs_path.Rd b/man/get_nfs_path.Rd index 516256b..4d331fe 100644 --- a/man/get_nfs_path.Rd +++ b/man/get_nfs_path.Rd @@ -10,6 +10,6 @@ get_nfs_path() [character(1)] The normalized base path. } \description{ -This function assumes that there is an environment variable called `RXD_DATA` +This function assumes that there is an environment variable called `RXD_DATA` which is set to the base path of the data directory. } From 350f4ed234cf344a55d2d8215b36bd6c35549fe8 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 19 Nov 2024 15:22:08 +0800 Subject: [PATCH 38/45] Reuse create_data_list() and keep legacy code --- NAMESPACE | 1 - R/dvloader.R | 10 ++-- R/utils.R | 116 ++++++++++++++++------------------------ man/create_data_list.Rd | 23 ++++++++ man/get_file_paths.Rd | 40 -------------- 5 files changed, 74 insertions(+), 116 deletions(-) create mode 100644 man/create_data_list.Rd delete mode 100644 man/get_file_paths.Rd diff --git a/NAMESPACE b/NAMESPACE index be207da..2dcc8cd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand export(get_cre_path) -export(get_file_paths) export(get_nfs_path) export(load_data) export(load_data_files) diff --git a/R/dvloader.R b/R/dvloader.R index c032317..9a4dac3 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -76,14 +76,16 @@ load_data <- function( dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) - file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) - if (isTRUE(print_file_paths)) { cat("Loading data from", dir_path, "\n") - cat("Loading data file(s):", basename(file_paths), "\n") + cat("Loading data file(s):", file_names, "\n") } - data_list <- load_data_files(file_paths) + data_list <- create_data_list( + dir_path = dir_path, + file_names = file_names, + prefer_sas = prefer_sas + ) names(data_list) <- file_names diff --git a/R/utils.R b/R/utils.R index a856cb7..33915ac 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,87 +1,61 @@ -#' Get File Paths +#' Create a List of Data Frames with Metadata #' -#' This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. -#' It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. +#' For each file name provided, this function reads the first matching file and its metadata/attributes. +#' By default, RDS files are preferred over SAS files for faster loading. +#' The function performs case-insensitive matching of file names. #' -#' @param dir_path [character(1)] The directory path where the files are located. -#' @param file_names [character(1+)] A vector of file names to process. -#' @param prefer_sas [logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE. +#' @param dir_path [character(1)] Directory path where the files are located +#' @param file_names [character(1+)] Vector of file names +#' @param prefer_sas [logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files #' -#' @return [character] A vector of normalized file paths. -#' -#' @examples -#' \dontrun{ -#' temp_dir <- tempdir() -#' -#' file_names <- c("adsl", "adae") -#' -#' file.create(file.path(temp_dir, paste0(file_names, ".rds"))) -#' file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) -#' -#' list.files(temp_dir) -#' -#' get_file_paths(dir_path = temp_dir, file_names = file_names) -#' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) -#' -#' unlink(temp_dir, recursive = TRUE) -#' } -#' -#' @export -get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { +#' @return [list] A named list of data frames, where each name is the basename of the corresponding file path. +create_data_list <- function(dir_path, file_names, prefer_sas = FALSE) { checkmate::assert_character(dir_path, len = 1) checkmate::assert_character(file_names, min.len = 1) checkmate::assert_logical(prefer_sas, len = 1) + checkmate::assert_directory_exists(dir_path) + + data_list <- lapply(file_names, function(x) { + extensions <- c("", ".rds", ".sas7bdat") + if (prefer_sas) { + extensions <- c("", ".sas7bdat", ".rds") + } - file_paths <- lapply(file_names, function(file_name) { - file_path <- file.path(dir_path, file_name) - file_ext <- tools::file_ext(file_name) - - if (file_ext == "") { - candidates <- basename(list.files(dir_path)) - - rds_match <- grep( - pattern = paste0("^", file_name, "\\.rds$"), - x = candidates, - ignore.case = TRUE, - value = TRUE - ) - - sas_match <- grep( - pattern = paste0("^", file_name, "\\.sas7bdat$"), - x = candidates, - ignore.case = TRUE, - value = TRUE - ) - - if (isTRUE(prefer_sas)) { - if (length(sas_match) > 0) { - return(file.path(dir_path, sas_match[1])) - } else if (length(rds_match) > 0) { - return(file.path(dir_path, rds_match[1])) - } else { - stop(dir_path, " does not contain SAS or RDS file: ", file_name) - } - } else if (isFALSE(prefer_sas)) { - if (length(rds_match) > 0) { - return(file.path(dir_path, rds_match[1])) - } else if (length(sas_match) > 0) { - return(file.path(dir_path, sas_match[1])) - } else { - stop(dir_path, " does not contain RDS or SAS file: ", file_name) - } + file_name_to_load <- NULL + + candidates <- list.files(dir_path) + uppercase_candidates <- Map(toupper, candidates) + + for (ext in extensions) { + # Case insensitive file name match + uppercase_file_name <- toupper(paste0(x, ext)) + + match_count <- sum(uppercase_candidates == uppercase_file_name) + if (match_count > 1) { + stop(paste("create_data_list(): More than one case-insensitive file name match for", dir_path, x)) } - } else { - if (file.exists(file_path)) { - return(file_path) - } else { - stop(dir_path, " does not contain: ", file_name) + + index <- match(uppercase_file_name, uppercase_candidates) + if (!is.na(index)) { + file_name_to_load <- candidates[[index]] + break } } + + if (is.null(file_name_to_load)) { + stop(paste("create_data_list(): No RDS or SAS files found for", dir_path, x)) + } + + # Load a single data file and get the first element of the list + output <- load_data_files(file.path(dir_path, file_name_to_load))[[1]] + + return(output) }) - return(normalizePath(unlist(file_paths))) -} + names(data_list) <- file_names + return(data_list) +} #' Load Data Files diff --git a/man/create_data_list.Rd b/man/create_data_list.Rd new file mode 100644 index 0000000..f1d405e --- /dev/null +++ b/man/create_data_list.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{create_data_list} +\alias{create_data_list} +\title{Create a List of Data Frames with Metadata} +\usage{ +create_data_list(dir_path, file_names, prefer_sas = FALSE) +} +\arguments{ +\item{dir_path}{[character(1)] Directory path where the files are located} + +\item{file_names}{[character(1+)] Vector of file names} + +\item{prefer_sas}{[logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files} +} +\value{ +[list] A named list of data frames, where each name is the basename of the corresponding file path. +} +\description{ +For each file name provided, this function reads the first matching file and its metadata/attributes. +By default, RDS files are preferred over SAS files for faster loading. +The function performs case-insensitive matching of file names. +} diff --git a/man/get_file_paths.Rd b/man/get_file_paths.Rd deleted file mode 100644 index 57751eb..0000000 --- a/man/get_file_paths.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{get_file_paths} -\alias{get_file_paths} -\title{Get File Paths} -\usage{ -get_file_paths(dir_path, file_names, prefer_sas = FALSE) -} -\arguments{ -\item{dir_path}{[character(1)] The directory path where the files are located.} - -\item{file_names}{[character(1+)] A vector of file names to process.} - -\item{prefer_sas}{[logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE.} -} -\value{ -[character] A vector of normalized file paths. -} -\description{ -This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. -It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. -} -\examples{ -\dontrun{ -temp_dir <- tempdir() - -file_names <- c("adsl", "adae") - -file.create(file.path(temp_dir, paste0(file_names, ".rds"))) -file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) - -list.files(temp_dir) - -get_file_paths(dir_path = temp_dir, file_names = file_names) -get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) - -unlink(temp_dir, recursive = TRUE) -} - -} From e91f221cc1ba811fd0f2daa139e4a47b4bf4c8cb Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 19 Nov 2024 16:26:15 +0800 Subject: [PATCH 39/45] Add data loading vignette --- .gitignore | 1 + vignettes/data-loading.Rmd | 45 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 vignettes/data-loading.Rmd diff --git a/.gitignore b/.gitignore index 67b8271..651475e 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ .vscode docs pkgdown +inst/doc diff --git a/vignettes/data-loading.Rmd b/vignettes/data-loading.Rmd new file mode 100644 index 0000000..c47aa4b --- /dev/null +++ b/vignettes/data-loading.Rmd @@ -0,0 +1,45 @@ +--- +title: "Data Loading" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Data Loading} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +The `dv.loader` package simplifies the process of loading data files into R. It can read both `.rds` files (R's native data storage format) and `.sas7bdat` files (SAS data files) into memory. Note that the current version does not include functionality for direct database connections. + +The package's primary function is `load_data_files()`, which provides flexible data loading capabilities. This versatile function can load multiple data files from any location on your system - the files can be scattered across different directories or stored together. Unlike the legacy `load_data()` function which requires all files to be in a single subdirectory, `load_data_files()` accepts arbitrary file paths, making it more powerful and flexible for data loading. + +Below is an example of how to use `load_data_files()` to load multiple data files from different directories. + +```{r} +# Create a temporary directory for the example +temp_dir <- tempdir() + +# Create subdirectories for ADAM and SDTM data +dir.create(file.path(temp_dir, "sub_dir1")) +dir.create(file.path(temp_dir, "sub_dir2")) + +# Save example datasets from pharmaverse packages as RDS files +saveRDS(iris, file.path(temp_dir, "sub_dir1", "iris.rds")) +saveRDS(mtcars, file.path(temp_dir, "sub_dir2", "mtcars.rds")) + +# Load both data files using load_data_files() +data_list <- dv.loader::load_data_files( + file_paths = c( + file.path(temp_dir, "sub_dir1", "iris.rds"), + file.path(temp_dir, "sub_dir2", "mtcars.rds") + ) +) + +# Display the structure of the loaded data +str(data_list) +``` From 96fa7de9514f5c58c83b0d8d08694072091e9ce3 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 19 Nov 2024 17:42:17 +0800 Subject: [PATCH 40/45] Remove file extensions and check duplicated file names --- R/utils.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 33915ac..d1a60fd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -98,7 +98,11 @@ load_data_files <- function(file_paths) { return(data) }) - names(data_list) <- basename(file_paths) + names(data_list) <- tools::file_path_sans_ext(basename(file_paths)) + + if (any(duplicated(names(data_list)))) { + stop("load_data_files(): Duplicate file names detected. Please ensure all file names are unique.") + } return(data_list) } From b971bb9242d62b4d00fb247d3acb7333a09478c6 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 19 Nov 2024 17:46:28 +0800 Subject: [PATCH 41/45] Remove redundant code --- R/dvloader.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index 9a4dac3..ad51e0f 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -87,7 +87,5 @@ load_data <- function( prefer_sas = prefer_sas ) - names(data_list) <- file_names - return(data_list) } From 6f65e5c81cefdae7b3f77d692d9247e071018995 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 19 Nov 2024 19:10:15 +0800 Subject: [PATCH 42/45] Move create_data_list() logic to get_file_paths() --- NAMESPACE | 1 + R/dvloader.R | 10 ++++----- R/utils.R | 49 ++++++++++++++++++++++++++--------------- man/create_data_list.Rd | 23 ------------------- man/get_file_paths.Rd | 40 +++++++++++++++++++++++++++++++++ tests/testthat/tests.R | 2 +- 6 files changed, 77 insertions(+), 48 deletions(-) delete mode 100644 man/create_data_list.Rd create mode 100644 man/get_file_paths.Rd diff --git a/NAMESPACE b/NAMESPACE index 2dcc8cd..be207da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(get_cre_path) +export(get_file_paths) export(get_nfs_path) export(load_data) export(load_data_files) diff --git a/R/dvloader.R b/R/dvloader.R index ad51e0f..d88d0d0 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -76,16 +76,14 @@ load_data <- function( dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir) + file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas) + if (isTRUE(print_file_paths)) { cat("Loading data from", dir_path, "\n") - cat("Loading data file(s):", file_names, "\n") + cat("Loading data file(s):", basename(file_paths), "\n") } - data_list <- create_data_list( - dir_path = dir_path, - file_names = file_names, - prefer_sas = prefer_sas - ) + data_list <- load_data_files(file_paths) return(data_list) } diff --git a/R/utils.R b/R/utils.R index d1a60fd..d16cc99 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,21 +1,38 @@ -#' Create a List of Data Frames with Metadata +#' Get File Paths #' -#' For each file name provided, this function reads the first matching file and its metadata/attributes. -#' By default, RDS files are preferred over SAS files for faster loading. -#' The function performs case-insensitive matching of file names. +#' This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. +#' It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. #' -#' @param dir_path [character(1)] Directory path where the files are located -#' @param file_names [character(1+)] Vector of file names -#' @param prefer_sas [logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files +#' @param dir_path [character(1)] The directory path where the files are located. +#' @param file_names [character(1+)] A vector of file names to process. +#' @param prefer_sas [logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE. #' -#' @return [list] A named list of data frames, where each name is the basename of the corresponding file path. -create_data_list <- function(dir_path, file_names, prefer_sas = FALSE) { +#' @return [character] A vector of normalized file paths. +#' +#' @examples +#' \dontrun{ +#' temp_dir <- tempdir() +#' +#' file_names <- c("adsl", "adae") +#' +#' file.create(file.path(temp_dir, paste0(file_names, ".rds"))) +#' file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) +#' +#' list.files(temp_dir) +#' +#' get_file_paths(dir_path = temp_dir, file_names = file_names) +#' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) +#' +#' unlink(temp_dir, recursive = TRUE) +#' } +#' +#' @export +get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { checkmate::assert_character(dir_path, len = 1) checkmate::assert_character(file_names, min.len = 1) checkmate::assert_logical(prefer_sas, len = 1) - checkmate::assert_directory_exists(dir_path) - data_list <- lapply(file_names, function(x) { + file_paths <- lapply(file_names, function(x) { extensions <- c("", ".rds", ".sas7bdat") if (prefer_sas) { extensions <- c("", ".sas7bdat", ".rds") @@ -46,18 +63,14 @@ create_data_list <- function(dir_path, file_names, prefer_sas = FALSE) { stop(paste("create_data_list(): No RDS or SAS files found for", dir_path, x)) } - # Load a single data file and get the first element of the list - output <- load_data_files(file.path(dir_path, file_name_to_load))[[1]] - - return(output) + return(file.path(dir_path, file_name_to_load)) }) - names(data_list) <- file_names - - return(data_list) + return(normalizePath(unlist(file_paths))) } + #' Load Data Files #' #' This function reads data from multiple file paths and returns a list of data frames. diff --git a/man/create_data_list.Rd b/man/create_data_list.Rd deleted file mode 100644 index f1d405e..0000000 --- a/man/create_data_list.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{create_data_list} -\alias{create_data_list} -\title{Create a List of Data Frames with Metadata} -\usage{ -create_data_list(dir_path, file_names, prefer_sas = FALSE) -} -\arguments{ -\item{dir_path}{[character(1)] Directory path where the files are located} - -\item{file_names}{[character(1+)] Vector of file names} - -\item{prefer_sas}{[logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files} -} -\value{ -[list] A named list of data frames, where each name is the basename of the corresponding file path. -} -\description{ -For each file name provided, this function reads the first matching file and its metadata/attributes. -By default, RDS files are preferred over SAS files for faster loading. -The function performs case-insensitive matching of file names. -} diff --git a/man/get_file_paths.Rd b/man/get_file_paths.Rd new file mode 100644 index 0000000..57751eb --- /dev/null +++ b/man/get_file_paths.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_file_paths} +\alias{get_file_paths} +\title{Get File Paths} +\usage{ +get_file_paths(dir_path, file_names, prefer_sas = FALSE) +} +\arguments{ +\item{dir_path}{[character(1)] The directory path where the files are located.} + +\item{file_names}{[character(1+)] A vector of file names to process.} + +\item{prefer_sas}{[logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE.} +} +\value{ +[character] A vector of normalized file paths. +} +\description{ +This function constructs file paths for given file names, handling both RDS and SAS7BDAT files. +It can prioritize SAS files over RDS files based on the `prefer_sas` parameter. +} +\examples{ +\dontrun{ +temp_dir <- tempdir() + +file_names <- c("adsl", "adae") + +file.create(file.path(temp_dir, paste0(file_names, ".rds"))) +file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat"))) + +list.files(temp_dir) + +get_file_paths(dir_path = temp_dir, file_names = file_names) +get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE) + +unlink(temp_dir, recursive = TRUE) +} + +} diff --git a/tests/testthat/tests.R b/tests/testthat/tests.R index b5e0527..ba7dcfd 100644 --- a/tests/testthat/tests.R +++ b/tests/testthat/tests.R @@ -53,7 +53,7 @@ test_that( { actual <- load_data( sub_dir = local_test_path, - file_names = c("dummyads1.RDS", "dummyads1.sas7bdat"), + file_names = c("dummyads1.RDS", "dummyads2.sas7bdat"), use_wd = TRUE ) actual <- c( From 2b9719fd1123858e7b11706dc6b96b1454d493ef Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 20 Nov 2024 14:04:52 +0800 Subject: [PATCH 43/45] Reassign names to match the original load_data() function naming convention --- R/dvloader.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/dvloader.R b/R/dvloader.R index d88d0d0..c032317 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -85,5 +85,7 @@ load_data <- function( data_list <- load_data_files(file_paths) + names(data_list) <- file_names + return(data_list) } From 966780f26e266469df0b21845fe3efc3ac539424 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 20 Nov 2024 14:07:03 +0800 Subject: [PATCH 44/45] Update error message to use get_file_paths() instead of create_data_list() --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index d16cc99..45a19c3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -49,7 +49,7 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { match_count <- sum(uppercase_candidates == uppercase_file_name) if (match_count > 1) { - stop(paste("create_data_list(): More than one case-insensitive file name match for", dir_path, x)) + stop(paste("get_file_paths(): More than one case-insensitive file name match for", dir_path, x)) } index <- match(uppercase_file_name, uppercase_candidates) From b6244f6d1c9867f949218b985afc4dea10929b9d Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Wed, 20 Nov 2024 19:00:30 +0800 Subject: [PATCH 45/45] Add support for named file paths --- R/dvloader.R | 62 ++++++++++++++++++++++++++++++++++++++ R/utils.R | 51 ------------------------------- man/load_data_files.Rd | 4 +-- vignettes/data-loading.Rmd | 21 +++++++++++-- 4 files changed, 82 insertions(+), 56 deletions(-) diff --git a/R/dvloader.R b/R/dvloader.R index c032317..3cc575d 100644 --- a/R/dvloader.R +++ b/R/dvloader.R @@ -89,3 +89,65 @@ load_data <- function( return(data_list) } + + +#' Load Data Files +#' +#' This function reads data from multiple file paths and returns a list of data frames. +#' It supports reading RDS and SAS7BDAT files. +#' +#' @param file_paths [character(1+)] A vector of file paths to read. +#' +#' @return [list] A named list of data frames, where each name corresponds to a loaded file. +#' +#' @examples +#' path <- system.file("examples", "iris.sas7bdat", package = "haven") +#' data_list <- load_data_files(file_paths = path) +#' str(data_list) +#' +#' @export +load_data_files <- function(file_paths) { + checkmate::assert_character(file_paths, min.len = 1) + checkmate::assert_file_exists(file_paths, access = "r", extension = c("rds", "sas7bdat")) + + # Load each file and store in a list + data_list <- lapply(file_paths, function(file_path) { + extension <- tools::file_ext(file_path) + + data <- switch(tolower(extension), + rds = readRDS(file_path), + sas7bdat = haven::read_sas(file_path), + stop("Unsupported file extension: ", extension) + ) + + meta <- file.info(file_path, extra_cols = FALSE) + meta[["path"]] <- normalizePath(file_path) + meta[["file_name"]] <- basename(file_path) + row.names(meta) <- NULL + attr(data, "meta") <- meta + + return(data) + }) + + # Set names for the list elements + data_list_names <- sapply(seq_along(file_paths), function(i) { + if (!is.null(names(file_paths)) && names(file_paths)[i] != "") { + names(file_paths)[i] + } else { + tools::file_path_sans_ext(basename(file_paths[i])) + } + }) + names(data_list) <- data_list_names + + # Check for duplicate names + if (any(duplicated(names(data_list)))) { + dup_names <- unique(names(data_list)[duplicated(names(data_list))]) + stop( + "load_data_files(): Duplicate file names detected: ", + paste(dup_names, collapse = ", "), + ". Please ensure all file names are unique." + ) + } + + return(data_list) +} diff --git a/R/utils.R b/R/utils.R index 45a19c3..9950cbe 100644 --- a/R/utils.R +++ b/R/utils.R @@ -68,54 +68,3 @@ get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) { return(normalizePath(unlist(file_paths))) } - - - -#' Load Data Files -#' -#' This function reads data from multiple file paths and returns a list of data frames. -#' It supports reading RDS and SAS7BDAT files. -#' -#' @param file_paths [character(1+)] A vector of file paths to read. -#' -#' @return [list] A named list of data frames, where each name is the basename of the corresponding file path. -#' -#' @examples -#' path <- system.file("examples", "iris.sas7bdat", package = "haven") -#' data_list <- load_data_files(file_paths = path) -#' str(data_list) -#' -#' @export -load_data_files <- function(file_paths) { - checkmate::assert_character(file_paths, min.len = 1) - checkmate::assert_file_exists(file_paths) - - data_list <- lapply(file_paths, function(file_path) { - extension <- tools::file_ext(file_path) - - if (tolower(extension) == "rds") { - data <- readRDS(file_path) - } else if (tolower(extension) == "sas7bdat") { - data <- haven::read_sas(file_path) - } else { - stop("Unsupported file extension: ", extension) - } - - meta <- file.info(file_path, extra_cols = FALSE) - meta[["path"]] <- file_path - meta[["file_name"]] <- basename(file_path) - - rownames(data) <- NULL - attr(data, "meta") <- meta - - return(data) - }) - - names(data_list) <- tools::file_path_sans_ext(basename(file_paths)) - - if (any(duplicated(names(data_list)))) { - stop("load_data_files(): Duplicate file names detected. Please ensure all file names are unique.") - } - - return(data_list) -} diff --git a/man/load_data_files.Rd b/man/load_data_files.Rd index ccaac13..e0ae1d0 100644 --- a/man/load_data_files.Rd +++ b/man/load_data_files.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/dvloader.R \name{load_data_files} \alias{load_data_files} \title{Load Data Files} @@ -10,7 +10,7 @@ load_data_files(file_paths) \item{file_paths}{[character(1+)] A vector of file paths to read.} } \value{ -[list] A named list of data frames, where each name is the basename of the corresponding file path. +[list] A named list of data frames, where each name corresponds to a loaded file. } \description{ This function reads data from multiple file paths and returns a list of data frames. diff --git a/vignettes/data-loading.Rmd b/vignettes/data-loading.Rmd index c47aa4b..b0ac24d 100644 --- a/vignettes/data-loading.Rmd +++ b/vignettes/data-loading.Rmd @@ -31,8 +31,11 @@ dir.create(file.path(temp_dir, "sub_dir2")) # Save example datasets from pharmaverse packages as RDS files saveRDS(iris, file.path(temp_dir, "sub_dir1", "iris.rds")) saveRDS(mtcars, file.path(temp_dir, "sub_dir2", "mtcars.rds")) +``` + +Now we can load the data files into a list using `load_data_files()`. This function will read each file and return a named list of data frames. By default, the names in the list will be derived from the file names (without extensions). -# Load both data files using load_data_files() +```{r} data_list <- dv.loader::load_data_files( file_paths = c( file.path(temp_dir, "sub_dir1", "iris.rds"), @@ -40,6 +43,18 @@ data_list <- dv.loader::load_data_files( ) ) -# Display the structure of the loaded data -str(data_list) +names(data_list) +``` + +You can also customize the names of the data frames in the returned list by providing named file paths. This is useful when you want to give the loaded data frames more descriptive or meaningful names than their file names. + +```{r} +data_list2 <- dv.loader::load_data_files( + file_paths = c( + iris_data = file.path(temp_dir, "sub_dir1", "iris.rds"), + mtcars_data = file.path(temp_dir, "sub_dir2", "mtcars.rds") + ) +) + +names(data_list2) ```