diff --git a/.gitignore b/.gitignore index 67b8271..b852c3a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,8 @@ .Rhistory .RData .Ruserdata -*.sqlite -*.sqlite-journal -*.rds +.Rprofile + .vscode + docs -pkgdown diff --git a/.lintr b/.lintr index d2d542e..767a8e8 100644 --- a/.lintr +++ b/.lintr @@ -1,6 +1,6 @@ linters: linters_with_defaults( - line_length_linter(120), - object_usage_linter = NULL, - indentation_linter = NULL, - trailing_whitespace_linter = NULL + line_length_linter(120), + object_usage_linter = NULL, + indentation_linter = NULL, + trailing_whitespace_linter = NULL ) diff --git a/DESCRIPTION b/DESCRIPTION index a12e282..f8877f8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,22 +1,29 @@ Package: dv.loader Type: Package -Title: Data loading module -Version: 2.0.0 +Title: Data Loader for DaVinci Modular Applications +Version: 3.0.0 Authors@R: c( - person( "Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")), - person( given = "Ming", family = "Yang", role = c("aut", "cre"), email = "ming.yang.ext@boehringer-ingelheim.com"), - person( given = "Steven", family = "Brooks", role = "aut", email = "steven.brooks@boehringer-ingelheim.com"), - person( given = "Sorin", family = "Voicu", role = "aut", email = "sorin.voicu.ext@boehringer-ingelheim.com") + person("Boehringer-Ingelheim Pharma GmbH & Co.KG", role = c("cph", "fnd")), + person("Ming", "Yang", email = "ming.yang.ext@boehringer-ingelheim.com", role = c("aut", "cre")), + person("Steven", "Brooks", email = "steven.brooks@boehringer-ingelheim.com", role = "aut"), + person("Sorin", "Voicu", email = "sorin.voicu.ext@boehringer-ingelheim.com", role = "aut") ) -Description: This is a module for loading .RDS / .sas7bdat data files from a network file storage environment. It also allows loading data locally. +Description: dv.loader offers a streamlined method for importing multiple data files in R, + tailored for seamless integration with DaVinci modular applications. License: Apache License (>= 2) Encoding: UTF-8 LazyData: true -Depends: R (>= 3.5.0) -Imports: haven +Depends: R (>= 4.0.0) +Imports: + checkmate (>= 2.3.2), + haven (>= 2.5.4), + lifecycle (>= 1.0.4) Suggests: - testthat, - knitr, - rmarkdown -RoxygenNote: 7.3.0 + knitr (>= 1.48), + pharmaverseadam (>= 1.0.0), + pharmaversesdtm (>= 1.0.0), + rmarkdown (>= 2.38), + testthat (>= 3.2.1.1) +RoxygenNote: 7.3.2 VignetteBuilder: knitr +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 79ce8a0..595a91d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand -export(get_cre_path) -export(get_nfs_path) export(load_data) +export(load_rds) +export(load_sas) +export(load_xpt) +importFrom(lifecycle,deprecate_warn) diff --git a/NEWS.md b/NEWS.md index a5c5124..63ce9ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# dv.loader 3.0.0 + +- Introduced a new set of functions `load_rds()`, `load_sas()`, and `load_xpt()`. +- Deprecated the function `load_data()`. + # dv.loader 2.0.0 - GitHub release with QC report diff --git a/R/dvloader.R b/R/dvloader.R deleted file mode 100644 index a36a7b9..0000000 --- a/R/dvloader.R +++ /dev/null @@ -1,61 +0,0 @@ -#' gets the NFS base path from an env var -#' It assumes there is an env var -#' called RXD_DATA which holds the path suffix. -#' @return the NFS base path -#' @export -get_nfs_path <- function() { - base_path <- Sys.getenv("RXD_DATA") - # check that RXD_DATA is set - if (base_path == "") { - stop("Usage: get_nfs_path: RXD_DATA must be set") - } - return(base_path) -} - -#' gets the NFS base path from an env var -#' alias for get_nfs_path to maintain backwards compatibility -#' @export -get_cre_path <- get_nfs_path - -#' Loads data into memory based on study directory and one or more file_names. -#' @param sub_dir A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. -#' If the argument is left as NULL, the function will load data from the working directory `getwd()`. -#' @param file_names Study file or file_names name(s) - can be a vector of strings. -#' This is the only required argument. -#' @param use_wd for "use working directory" - a flag used when importing local files -#' not on NFS - default value is FALSE -#' @param prefer_sas if set to TRUE, imports sas7bdat files first before looking for -#' RDS files (the opposite of default behavior) -#' @return a list of dataframes -#' @export -#' @examples -#' \dontrun{ -#' test_data_path <- "../inst/extdata/" -#' data_list <- load_data( -#' sub_dir = test_data_path, -#' file_names = "dummyads2", -#' use_wd = TRUE -#' ) -#' } -load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) { - if (is.null(file_names)) { - stop("Usage: load_data: file_names: Must supply at least one file name") - } - - study_path <- "" # will be built using args - - if (is.null(sub_dir)) { - study_path <- getwd() - } else { - if (use_wd) { - study_path <- file.path(getwd(), sub_dir) - } else { - study_path <- file.path(get_cre_path(), sub_dir) - } - } - - # create the output - data_list <- create_data_list(study_path, file_names, prefer_sas) # nolint - - return(data_list) -} diff --git a/R/load_data.R b/R/load_data.R new file mode 100644 index 0000000..0d4194f --- /dev/null +++ b/R/load_data.R @@ -0,0 +1,146 @@ +#' Loads data into memory based on study directory and one or more file_names. +#' @param sub_dir A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. +#' If the argument is left as NULL, the function will load data from the working directory `getwd()`. +#' @param file_names Study file or file_names name(s) - can be a vector of strings. +#' This is the only required argument. +#' @param use_wd for "use working directory" - a flag used when importing local files +#' not on NFS - default value is FALSE +#' @param prefer_sas if set to TRUE, imports sas7bdat files first before looking for +#' RDS files (the opposite of default behavior) +#' @return a list of dataframes +#' @export +#' @examples +#' \dontrun{ +#' test_data_path <- "../inst/extdata/" +#' data_list <- load_data( +#' sub_dir = test_data_path, +#' file_names = "dummyads2", +#' use_wd = TRUE +#' ) +#' } +#' @export +#' @importFrom lifecycle deprecate_warn +load_data <- function(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) { + lifecycle::deprecate_warn("3.0.0", "load_data()", "read_data()") + + if (is.null(file_names)) { + stop("Usage: load_data: file_names: Must supply at least one file name") + } + + study_path <- "" # will be built using args + + if (is.null(sub_dir)) { + study_path <- getwd() + } else { + if (use_wd) { + study_path <- file.path(getwd(), sub_dir) + } else { + study_path <- file.path(get_cre_path(), sub_dir) + } + } + + # create the output + data_list <- create_data_list(study_path, file_names, prefer_sas) # nolint + + return(data_list) +} + +#' gets the NFS base path from an env var +#' It assumes there is an env var +#' called RXD_DATA which holds the path suffix. +#' @return the NFS base path +get_nfs_path <- function() { + base_path <- Sys.getenv("RXD_DATA") + # check that RXD_DATA is set + if (base_path == "") { + stop("Usage: get_nfs_path: RXD_DATA must be set") + } + return(base_path) +} + +#' gets the NFS base path from an env var +#' alias for get_nfs_path to maintain backwards compatibility +get_cre_path <- get_nfs_path + + +#' For each file name provided, reads in the first matching file and its meta data/attributes. +#' Preference is given to RDS because its faster +#' @param file_path the folder where the files are +#' @param file_names CDISC names for the files +#' @param prefer_sas if TRUE, imports .sas7bdat files first instead of .RDS files +#' @return returns a list of dataframes with metadata as an attribute on each dataframe +create_data_list <- function(file_path, file_names, prefer_sas) { + data_list <- lapply(file_names, function(x) { + extensions <- c("", ".rds", ".sas7bdat") + if (prefer_sas) { + extensions <- c("", ".sas7bdat", ".rds") + } + + file_name_to_load <- NULL + + candidates <- list.files(file_path) + uppercase_candidates <- Map(toupper, candidates) + + for (ext in extensions) { + # Case insensitive file name match + uppercase_file_name <- toupper(paste0(x, ext)) + + match_count <- sum(uppercase_candidates == uppercase_file_name) + if (match_count > 1) { + stop(paste("create_data_list(): More than one case-insensitive file name match for", file_path, x)) + } + + index <- match(uppercase_file_name, uppercase_candidates) + if (!is.na(index)) { + file_name_to_load <- candidates[[index]] + break + } + } + + if (is.null(file_name_to_load)) { + stop(paste("create_data_list(): No RDS or SAS files found for", file_path, x)) + } + + output <- read_file(file_path, file_name_to_load) + + return(output) + }) + + names(data_list) <- file_names + + return(data_list) +} + + +#' Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path +#' @param file_path a path to a file +#' @param file_name name of a file +#' @return a data object with an extra attribute of metadata +read_file <- function(file_path, file_name) { + ext <- tools::file_ext(file_name) + + if (!(toupper(ext) %in% c("RDS", "SAS7BDAT"))) { + stop("Usage error: read_file: file_name: file must either be RDS or SAS7BDAT.") + } + + is_rds <- toupper(ext) == "RDS" + + file <- file.path(file_path, file_name) + file_name <- tools::file_path_sans_ext(file_name) + + # grab file info + meta <- file.info(file)[1L:6L] + meta[["path"]] <- row.names(meta) + meta[["file_name"]] <- file_name + meta <- data.frame(meta, stringsAsFactors = FALSE) + row.names(meta) <- NULL + + if (is_rds) { + out <- readRDS(file) + } else { + out <- haven::read_sas(file) + } + attr(out, "meta") <- meta + + return(out) +} diff --git a/R/load_rds.R b/R/load_rds.R new file mode 100644 index 0000000..a1a9008 --- /dev/null +++ b/R/load_rds.R @@ -0,0 +1,47 @@ +#' Load RDS files +#' +#' This function loads RDS files via readRDS() and returns a list of data frames. +#' +#' @param files A character vector of file paths to RDS files. +#' @return A list of data frames, each containing the data from an RDS file. +#' @examples +#' # Create temporary directory and files +#' temp_dir <- tempdir() +#' adsl_rds_file <- file.path(temp_dir, "adsl.rds") +#' adae_rds_file <- file.path(temp_dir, "adae.rds") +#' +#' # Write example data to RDS files +#' saveRDS(pharmaverseadam::adsl, adsl_rds_file) +#' saveRDS(pharmaverseadam::adae, adae_rds_file) +#' +#' # Load RDS files +#' rds_data_list <- load_rds(c(adsl_rds_file, adae_rds_file)) +#' +#' # Clean up +#' unlink(c(adsl_rds_file, adae_rds_file)) +#' @export +load_rds <- function(files) { + # Check if files is a character vector + checkmate::assert_character(files) + + # Read each file and add metadata + data_list <- lapply(files, function(file) { + # Check if file exists + checkmate::assert_file_exists(file) + # Check if file is an RDS file + check_file_ext(file, extension = "rds") + + # Read RDS file + data <- readRDS(file) + + # Get file info and add to data as an attribute + attr(data, "meta") <- file_info(file) + + return(data) + }) + + # Set names of data_list to the file names + names(data_list) <- basename(files) + + return(data_list) +} diff --git a/R/load_sas.R b/R/load_sas.R new file mode 100644 index 0000000..01c7a4c --- /dev/null +++ b/R/load_sas.R @@ -0,0 +1,46 @@ +#' Load SAS files +#' +#' This function loads SAS files via haven::read_sas() and returns a list of data frames. +#' +#' @param files A character vector of file paths to SAS files. +#' @return A list of data frames, each containing the data from a SAS file. +#' @examples +#' # Create temporary directory and files +#' temp_dir <- tempdir() +#' adsl_sas_file <- file.path(temp_dir, "adsl.sas7bdat") +#' adae_sas_file <- file.path(temp_dir, "adae.sas7bdat") +#' +#' # Write example data to SAS files +#' haven::write_sas(pharmaverseadam::adsl, adsl_sas_file) +#' haven::write_sas(pharmaverseadam::adae, adae_sas_file) +#' +#' # Load SAS files +#' sas_data_list <- load_sas(c(adsl_sas_file, adae_sas_file)) +#' +#' # Clean up +#' unlink(c(adsl_sas_file, adae_sas_file)) +#' @export +load_sas <- function(files) { + # Check if files is a character vector + checkmate::assert_character(files) + + # Read each file and add metadata + data_list <- lapply(files, function(file) { + # Check if file exists + checkmate::assert_file_exists(file) + # Check if file is a SAS file + check_file_ext(file, extension = "sas7bdat") + + # Read SAS file + data <- haven::read_sas(file) + + # Get file info and add to data as an attribute + attr(data, "meta") <- file_info(file) + + return(data) + }) + + # Set names of data_list to the file names + names(data_list) <- basename(files) + return(data_list) +} diff --git a/R/load_xpt.R b/R/load_xpt.R new file mode 100644 index 0000000..debf34b --- /dev/null +++ b/R/load_xpt.R @@ -0,0 +1,47 @@ +#' Load XPT files +#' +#' This function loads XPT files via haven::read_xpt() and returns a list of data frames. +#' +#' @param files A character vector of file paths to XPT files. +#' @return A list of data frames, each containing the data from an XPT file. +#' @examples +#' # Create temporary directory and files +#' temp_dir <- tempdir() +#' adsl_xpt_file <- file.path(temp_dir, "adsl.xpt") +#' adae_xpt_file <- file.path(temp_dir, "adae.xpt") +#' +#' # Write example data to XPT files +#' haven::write_xpt(pharmaverseadam::adsl, adsl_xpt_file) +#' haven::write_xpt(pharmaverseadam::adae, adae_xpt_file) +#' +#' # Load XPT files +#' xpt_data_list <- load_xpt(c(adsl_xpt_file, adae_xpt_file)) +#' +#' # Clean up +#' unlink(c(adsl_xpt_file, adae_xpt_file)) +#' @export +load_xpt <- function(files) { + # Check if files is a character vector + checkmate::assert_character(files) + + # Read each file and add metadata + data_list <- lapply(files, function(file) { + # Check if file exists + checkmate::assert_file_exists(file) + # Check if file is an XPT file + check_file_ext(file, extension = "xpt") + + # Read XPT file + data <- haven::read_xpt(file) + + # Get file info and add to data as an attribute + attr(data, "meta") <- file_info(file) + + return(data) + }) + + # Set names of data_list to the file names + names(data_list) <- basename(files) + + return(data_list) +} diff --git a/R/utils.R b/R/utils.R index a3791e1..6204502 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,81 +1,55 @@ -#' For each file name provided, reads in the first matching file and its meta data/attributes. -#' Preference is given to RDS because its faster -#' @param file_path the folder where the files are -#' @param file_names CDISC names for the files -#' @param prefer_sas if TRUE, imports .sas7bdat files first instead of .RDS files -#' @return returns a list of dataframes with metadata as an attribute on each dataframe -create_data_list <- function(file_path, file_names, prefer_sas) { - data_list <- lapply(file_names, function(x) { - extensions <- c("", ".rds", ".sas7bdat") - if (prefer_sas) { - extensions <- c("", ".sas7bdat", ".rds") - } - - file_name_to_load <- NULL - - candidates <- list.files(file_path) - uppercase_candidates <- Map(toupper, candidates) - - for (ext in extensions) { - # Case insensitive file name match - uppercase_file_name <- toupper(paste0(x, ext)) - - match_count <- sum(uppercase_candidates == uppercase_file_name) - if (match_count > 1) { - stop(paste("create_data_list(): More than one case-insensitive file name match for", file_path, x)) - } - - index <- match(uppercase_file_name, uppercase_candidates) - if (!is.na(index)) { - file_name_to_load <- candidates[[index]] - break - } - } - - if (is.null(file_name_to_load)) { - stop(paste("create_data_list(): No RDS or SAS files found for", file_path, x)) - } - - output <- read_file(file_path, file_name_to_load) - - return(output) - }) - - names(data_list) <- file_names - - return(data_list) +#' Check if a file has a specific extension (case-insensitive) +#' +#' This function checks if a given file has a specific extension, ignoring case. +#' +#' @param file A character string specifying the path to the file. +#' @param extension A character string specifying the expected file extension (without the dot). +#' @return A logical value: TRUE if the file has the specified extension, FALSE otherwise. +check_file_ext <- function(file, extension) { + # Check input types + checkmate::assert_string(file) + checkmate::assert_string(extension) + + # Extract file extension (case-insensitive) + file_ext <- tolower(tools::file_ext(file)) + + # Check that the file extension is not empty + checkmate::assert_true(file_ext != "") + + # Check that the file extension is one of the allowed choices + checkmate::assert_choice(file_ext, choices = c("rds", "sas7bdat", "xpt")) + + # Compare with the given extension (case-insensitive) + return(file_ext == tolower(extension)) } -#' Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path -#' @param file_path a path to a file -#' @param file_name name of a file -#' @return a data object with an extra attribute of metadata -read_file <- function(file_path, file_name) { - ext <- tools::file_ext(file_name) - - if (!(toupper(ext) %in% c("RDS", "SAS7BDAT"))) { - stop("Usage error: read_file: file_name: file must either be RDS or SAS7BDAT.") - } - - is_rds <- toupper(ext) == "RDS" - - file <- file.path(file_path, file_name) - file_name <- tools::file_path_sans_ext(file_name) - - # grab file info - meta <- file.info(file)[1L:6L] - meta[["path"]] <- row.names(meta) - meta[["file_name"]] <- file_name - meta <- data.frame(meta, stringsAsFactors = FALSE) - row.names(meta) <- NULL - - if (is_rds) { - out <- readRDS(file) - } else { - out <- haven::read_sas(file) - } - attr(out, "meta") <- meta - - return(out) +#' Extract file information based on file.info() +#' +#' This function extracts file information from a given file. +#' +#' @param file A character string specifying the path to the file. +#' @return A list containing file information from file.info(file, extra_cols = FALSE) and the path and file name. +file_info <- function(file) { + # Check if the file exists + checkmate::assert_file_exists(file) + + # Get file information from file.info() + info <- file.info(file, extra_cols = FALSE) + + # Get the path from the rownames + path <- rownames(info) + + # Check file and path are the same + checkmate::assert_true(file == path) + + # Add path and file name + info[["path"]] <- path + info[["file_name"]] <- basename(path) + + # Convert to list to remove row names + info <- as.list(info) + + # Return the file information as a list + return(info) } diff --git a/README.md b/README.md index 1199129..e3af8fa 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,55 @@ -# Data Loading - -The {dv.loader} package provides a simple interface for loading data from a network file storage folder or -locally. It is designed to be used with `.RDS` and `.sas7bdat` file formats. -The package provides a simple function, `load_data()`, which loads R and SAS data files into memory. -Loading data from SQL databases is not yet supported. The function returns a list named by the file names passed, -and containing data frames, along with metadata for that table. By default, the function will look for files in a -sub-directory `sub_dir` of the base path defined by a environment variable "RXD_DATA". You can check if the base path -is set by running `Sys.getenv("RXD_DATA")`. A single file or multiple files can be loaded at once. -To make the loading process faster for large datasets, it is suggested that '.sas7bdat' files are converted to -'.RDS' files. The function will prefer '.RDS' files over '.sas7bdat' files by default. +# dv.loader + +The {dv.loader} package offers a unified approach for loading various data file formats in R. It provides a set of functions to import RDS, SAS, and XPT files, with built-in error handling and metadata extraction. The package is specifically designed to work seamlessly with the [{dv.manager}](https://boehringer-ingelheim.github.io/dv.manager/) package, facilitating the creation of modular Shiny applications within the DaVinci framework. + +## Key Features + +Below are the key features of {dv.loader}: + +- **Versatile File Formats**: Currently supports RDS, SAS, and XPT files. +- **Rich Metadata**: Includes file-specific metadata for each dataset. +- **Error Handling**: Checks for file existence and format for each file. +- **Consistent Output**: Returns a named list of data frames and associated metadata. +- **DaVinci Framework Integration**: Works seamlessly with other DaVinci framework packages. ## Installation +While {dv.loader} is not currently available on CRAN, you can obtain the development version from GitHub using the following method: + ```r +# Install the {remotes} package if you haven't already if (!require("remotes")) install.packages("remotes") + +# Install the development version of {dv.loader} remotes::install_github("Boehringer-Ingelheim/dv.loader") + +# Check the package version +packageVersion("dv.loader") ``` -## Basic usage +NOTE: The legacy `load_data()` function has been deprecated in version 3.0.0 of {dv.loader}. It is strongly recommended to transition to the new set of functions for data loading. See the [Migration Guide](https://boehringer-ingelheim.github.io/dv.loader/articles/migration-guide.html) vignette for more details. -```r -# getting data from a network file storage folder -dv.loader::load_data(sub_dir = "subdir1/subdir2", file_names = c("adsl", "adae")) -``` +## Main Functions + +The package includes a collection of functions designed to handle various file formats. + +- `load_rds()`: Imports RDS files (extension: `.rds`) +- `load_sas()`: Imports SAS files (extension: `.sas7bdat`) +- `load_xpt()`: Imports XPT files (extension: `.xpt`) + +These functions provide error handling, metadata extraction, and return a consistent output format of named lists containing data frames and associated metadata, streamlining the process of importing multiple data files in R. + +## Working Example + +To illustrate the usage of {dv.loader}, let's explore a hands-on example using the `load_sas()` function to import SAS data. This example serves as a template for working with other file formats, as `load_rds()` and `load_xpt()` functions operate in a similar manner. ```r -# getting data locally (e.g., if you have file `./data/adsl.RDS`) -dv.loader::load_data(sub_dir = "data", file_names = c("adsl"), use_wd = TRUE) +# Identify the path to the directory containing the data files +data_dir <- system.file("extdata", "pharmaverseadam", package = "dv.loader") + +# Provide the vector of file names to be loaded +file_names <- paste0(c("adsl", "adae"), ".sas7bdat") + +# Load SAS data files from the specified directory +dv.loader::load_sas(files = file.path(data_dir, file_names)) ``` diff --git a/_pkgdown.yml b/_pkgdown.yml index 3cc3a6f..8de0756 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -9,8 +9,9 @@ navbar: qc: text: Quality Control href: articles/qc.html + home: - title: dv.loader + title: Data Loader for DaVinci Modular Applications links: - text: Browse source code href: https://github.com/Boehringer-Ingelheim/dv.loader diff --git a/inst/extdata/data.R b/inst/extdata/data.R new file mode 100644 index 0000000..2dd1a3d --- /dev/null +++ b/inst/extdata/data.R @@ -0,0 +1,36 @@ +# Save data to inst/extdata +data_dir <- "inst/extdata" + +# Create directory for pharmaverseadam data +if (!dir.exists(file.path(data_dir, "pharmaverseadam"))) { + dir.create(file.path(data_dir, "pharmaverseadam"), recursive = TRUE) +} + +# Save adsl data +haven::write_sas( + data = pharmaverseadam::adsl, + path = file.path(data_dir, "pharmaverseadam", "adsl.sas7bdat") +) + +# Save adae data +haven::write_sas( + data = pharmaverseadam::adae, + path = file.path(data_dir, "pharmaverseadam", "adae.sas7bdat") +) + +# Create directory for pharmaversesdtm data +if (!dir.exists(file.path(data_dir, "pharmaversesdtm"))) { + dir.create(file.path(data_dir, "pharmaversesdtm"), recursive = TRUE) +} + +# Save dm data +haven::write_sas( + data = pharmaversesdtm::dm, + path = file.path(data_dir, "pharmaversesdtm", "dm.sas7bdat") +) + +# Save ae data +haven::write_sas( + data = pharmaversesdtm::ae, + path = file.path(data_dir, "pharmaversesdtm", "ae.sas7bdat") +) diff --git a/inst/extdata/pharmaverseadam/adae.sas7bdat b/inst/extdata/pharmaverseadam/adae.sas7bdat new file mode 100644 index 0000000..1a5c7e9 Binary files /dev/null and b/inst/extdata/pharmaverseadam/adae.sas7bdat differ diff --git a/inst/extdata/pharmaverseadam/adsl.sas7bdat b/inst/extdata/pharmaverseadam/adsl.sas7bdat new file mode 100644 index 0000000..f0e27d6 Binary files /dev/null and b/inst/extdata/pharmaverseadam/adsl.sas7bdat differ diff --git a/inst/extdata/pharmaversesdtm/ae.sas7bdat b/inst/extdata/pharmaversesdtm/ae.sas7bdat new file mode 100644 index 0000000..7e30215 Binary files /dev/null and b/inst/extdata/pharmaversesdtm/ae.sas7bdat differ diff --git a/inst/extdata/pharmaversesdtm/dm.sas7bdat b/inst/extdata/pharmaversesdtm/dm.sas7bdat new file mode 100644 index 0000000..2758b06 Binary files /dev/null and b/inst/extdata/pharmaversesdtm/dm.sas7bdat differ diff --git a/man/check_file_ext.Rd b/man/check_file_ext.Rd new file mode 100644 index 0000000..9528434 --- /dev/null +++ b/man/check_file_ext.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{check_file_ext} +\alias{check_file_ext} +\title{Check if a file has a specific extension (case-insensitive)} +\usage{ +check_file_ext(file, extension) +} +\arguments{ +\item{file}{A character string specifying the path to the file.} + +\item{extension}{A character string specifying the expected file extension (without the dot).} +} +\value{ +A logical value: TRUE if the file has the specified extension, FALSE otherwise. +} +\description{ +This function checks if a given file has a specific extension, ignoring case. +} diff --git a/man/create_data_list.Rd b/man/create_data_list.Rd index 2368307..b945601 100644 --- a/man/create_data_list.Rd +++ b/man/create_data_list.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/load_data.R \name{create_data_list} \alias{create_data_list} \title{For each file name provided, reads in the first matching file and its meta data/attributes. diff --git a/man/file_info.Rd b/man/file_info.Rd new file mode 100644 index 0000000..33c113c --- /dev/null +++ b/man/file_info.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{file_info} +\alias{file_info} +\title{Extract file information based on file.info()} +\usage{ +file_info(file) +} +\arguments{ +\item{file}{A character string specifying the path to the file.} +} +\value{ +A list containing file information from file.info(file, extra_cols = FALSE) and the path and file name. +} +\description{ +This function extracts file information from a given file. +} diff --git a/man/get_cre_path.Rd b/man/get_cre_path.Rd index 86b2b78..d68a42b 100644 --- a/man/get_cre_path.Rd +++ b/man/get_cre_path.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dvloader.R +% Please edit documentation in R/load_data.R \name{get_cre_path} \alias{get_cre_path} \title{gets the NFS base path from an env var diff --git a/man/get_nfs_path.Rd b/man/get_nfs_path.Rd index 5cbad09..07273b5 100644 --- a/man/get_nfs_path.Rd +++ b/man/get_nfs_path.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dvloader.R +% Please edit documentation in R/load_data.R \name{get_nfs_path} \alias{get_nfs_path} \title{gets the NFS base path from an env var diff --git a/man/load_data.Rd b/man/load_data.Rd index 5367158..8d44207 100644 --- a/man/load_data.Rd +++ b/man/load_data.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dvloader.R +% Please edit documentation in R/load_data.R \name{load_data} \alias{load_data} \title{Loads data into memory based on study directory and one or more file_names.} @@ -7,7 +7,7 @@ load_data(sub_dir = NULL, file_names, use_wd = FALSE, prefer_sas = FALSE) } \arguments{ -\item{sub_dir}{A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. +\item{sub_dir}{A relative directory/folder that will be appended to a base path defined by `Sys.getenv("RXD_DATA")`. If the argument is left as NULL, the function will load data from the working directory `getwd()`.} \item{file_names}{Study file or file_names name(s) - can be a vector of strings. diff --git a/man/load_rds.Rd b/man/load_rds.Rd new file mode 100644 index 0000000..c82a15b --- /dev/null +++ b/man/load_rds.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_rds.R +\name{load_rds} +\alias{load_rds} +\title{Load RDS files} +\usage{ +load_rds(files) +} +\arguments{ +\item{files}{A character vector of file paths to RDS files.} +} +\value{ +A list of data frames, each containing the data from an RDS file. +} +\description{ +This function loads RDS files via readRDS() and returns a list of data frames. +} +\examples{ +# Create temporary directory and files +temp_dir <- tempdir() +adsl_rds_file <- file.path(temp_dir, "adsl.rds") +adae_rds_file <- file.path(temp_dir, "adae.rds") + +# Write example data to RDS files +saveRDS(pharmaverseadam::adsl, adsl_rds_file) +saveRDS(pharmaverseadam::adae, adae_rds_file) + +# Load RDS files +rds_data_list <- load_rds(c(adsl_rds_file, adae_rds_file)) + +# Clean up +unlink(c(adsl_rds_file, adae_rds_file)) +} diff --git a/man/load_sas.Rd b/man/load_sas.Rd new file mode 100644 index 0000000..e434167 --- /dev/null +++ b/man/load_sas.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_sas.R +\name{load_sas} +\alias{load_sas} +\title{Load SAS files} +\usage{ +load_sas(files) +} +\arguments{ +\item{files}{A character vector of file paths to SAS files.} +} +\value{ +A list of data frames, each containing the data from a SAS file. +} +\description{ +This function loads SAS files via haven::read_sas() and returns a list of data frames. +} +\examples{ +# Create temporary directory and files +temp_dir <- tempdir() +adsl_sas_file <- file.path(temp_dir, "adsl.sas7bdat") +adae_sas_file <- file.path(temp_dir, "adae.sas7bdat") + +# Write example data to SAS files +haven::write_sas(pharmaverseadam::adsl, adsl_sas_file) +haven::write_sas(pharmaverseadam::adae, adae_sas_file) + +# Load SAS files +sas_data_list <- load_sas(c(adsl_sas_file, adae_sas_file)) + +# Clean up +unlink(c(adsl_sas_file, adae_sas_file)) +} diff --git a/man/load_xpt.Rd b/man/load_xpt.Rd new file mode 100644 index 0000000..80a6a01 --- /dev/null +++ b/man/load_xpt.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_xpt.R +\name{load_xpt} +\alias{load_xpt} +\title{Load XPT files} +\usage{ +load_xpt(files) +} +\arguments{ +\item{files}{A character vector of file paths to XPT files.} +} +\value{ +A list of data frames, each containing the data from an XPT file. +} +\description{ +This function loads XPT files via haven::read_xpt() and returns a list of data frames. +} +\examples{ +# Create temporary directory and files +temp_dir <- tempdir() +adsl_xpt_file <- file.path(temp_dir, "adsl.xpt") +adae_xpt_file <- file.path(temp_dir, "adae.xpt") + +# Write example data to XPT files +haven::write_xpt(pharmaverseadam::adsl, adsl_xpt_file) +haven::write_xpt(pharmaverseadam::adae, adae_xpt_file) + +# Load XPT files +xpt_data_list <- load_xpt(c(adsl_xpt_file, adae_xpt_file)) + +# Clean up +unlink(c(adsl_xpt_file, adae_xpt_file)) +} diff --git a/man/read_file.Rd b/man/read_file.Rd index e46767b..c07c14f 100644 --- a/man/read_file.Rd +++ b/man/read_file.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/load_data.R \name{read_file} \alias{read_file} \title{Reads RDS/SAS file and metadatas from first 6 items from file.info() its file path} diff --git a/tests/testthat/inst/extdata/bad_file_type.myrds b/tests/testthat/inst/extdata/bad_file_type.myrds deleted file mode 100644 index 2ca4327..0000000 Binary files a/tests/testthat/inst/extdata/bad_file_type.myrds and /dev/null differ diff --git a/tests/testthat/inst/extdata/bad_file_type.txt b/tests/testthat/inst/extdata/bad_file_type.txt deleted file mode 100644 index e69de29..0000000 diff --git a/tests/testthat/inst/extdata/dummyads1.RDS b/tests/testthat/inst/extdata/dummyads1.RDS deleted file mode 100644 index 06add3b..0000000 Binary files a/tests/testthat/inst/extdata/dummyads1.RDS and /dev/null differ diff --git a/tests/testthat/inst/extdata/dummyads1.sas7bdat b/tests/testthat/inst/extdata/dummyads1.sas7bdat deleted file mode 100644 index bc7cb8a..0000000 Binary files a/tests/testthat/inst/extdata/dummyads1.sas7bdat and /dev/null differ diff --git a/tests/testthat/inst/extdata/dummyads2.RDS b/tests/testthat/inst/extdata/dummyads2.RDS deleted file mode 100644 index 2ca4327..0000000 Binary files a/tests/testthat/inst/extdata/dummyads2.RDS and /dev/null differ diff --git a/tests/testthat/inst/extdata/dummyads2.sas7bdat b/tests/testthat/inst/extdata/dummyads2.sas7bdat deleted file mode 100644 index 6eeec6d..0000000 Binary files a/tests/testthat/inst/extdata/dummyads2.sas7bdat and /dev/null differ diff --git a/tests/testthat/inst/extdata/just_rds/dummyads1.RDS b/tests/testthat/inst/extdata/just_rds/dummyads1.RDS deleted file mode 100644 index 06add3b..0000000 Binary files a/tests/testthat/inst/extdata/just_rds/dummyads1.RDS and /dev/null differ diff --git a/tests/testthat/inst/extdata/just_rds/dummyads2.RDS b/tests/testthat/inst/extdata/just_rds/dummyads2.RDS deleted file mode 100644 index 2ca4327..0000000 Binary files a/tests/testthat/inst/extdata/just_rds/dummyads2.RDS and /dev/null differ diff --git a/tests/testthat/inst/extdata/just_sas/dummyads1.sas7bdat b/tests/testthat/inst/extdata/just_sas/dummyads1.sas7bdat deleted file mode 100644 index bc7cb8a..0000000 Binary files a/tests/testthat/inst/extdata/just_sas/dummyads1.sas7bdat and /dev/null differ diff --git a/tests/testthat/inst/extdata/just_sas/dummyads2.sas7bdat b/tests/testthat/inst/extdata/just_sas/dummyads2.sas7bdat deleted file mode 100644 index 6eeec6d..0000000 Binary files a/tests/testthat/inst/extdata/just_sas/dummyads2.sas7bdat and /dev/null differ diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 1dab7a6..ac6ce9e 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,25 +1,66 @@ # validation (S) vdoc <- local({ - # ########## - # package_name is used # INSIDE # the sourced file below - # ########## - package_name <- read.dcf("../../DESCRIPTION")[, "Package"] + package_name <- "dv.loader" # package_name is used *INSIDE* the sourced file below utils_file_path <- system.file("validation", "utils-validation.R", package = package_name, mustWork = TRUE) - source(utils_file_path, local = TRUE)[["value"]] + source(file = utils_file_path, local = TRUE)[["value"]] }) + specs <- vdoc[["specs"]] -# validation (F) +# validation (F) + +# Create a copy of the iris data +iris_data <- iris + +# Change . to _ in column names +names(iris_data) <- gsub("\\.", "_", names(iris_data)) + +# Create a temporary directory +temp_dir <- tempdir() -Sys.setenv("RXD_DATA" = find.package(package = "dv.loader")) -local_test_path <- "inst/extdata" +# Path to the data files +iris_file <- file.path(temp_dir, "iris") +iris_rds_file <- file.path(temp_dir, "iris.rds") +iris_sas_file <- file.path(temp_dir, "iris.sas7bdat") +iris_xpt_file <- file.path(temp_dir, "iris.xpt") +iris_txt_file <- file.path(temp_dir, "iris.txt") -test_file_path <- "test/" -cre_test_files <- c("adsl.sas7bdat", "adae.sas7bdat") -cre_file_names <- c("adsl", "adae") -local_test_files <- c("dummyads1.sas7bdat", "dummyads2.sas7bdat") -local_file_names <- c("dummyads1", "dummyads2") +# Save data to file with no extension +saveRDS(iris_data, iris_file) -expected_meta_cols <- c( - "size", "isdir", "mode", "mtime", - "ctime", "atime", "path", "file_name" +# Save iris data to RDS file +saveRDS(iris_data, iris_rds_file) + +# Save iris data to SAS file +lifecycle::expect_deprecated( + haven::write_sas(iris_data, iris_sas_file) ) + +# Save iris data to XPT file +haven::write_xpt(iris_data, iris_xpt_file) + +# Save iris data to TXT file +write.table(iris_data, file = iris_txt_file, row.names = FALSE) + +# Set the RXD_DATA environment variable +Sys.setenv(RXD_DATA = temp_dir) + +# Load RDS data via load_data() +lifecycle::expect_deprecated( + iris_data_rds <- dv.loader::load_data( + sub_dir = ".", + file_names = "iris.rds" + ) +) + +# Load SAS data via load_data() +lifecycle::expect_deprecated( + iris_data_sas <- dv.loader::load_data( + sub_dir = ".", + file_names = "iris.sas7bdat" + ) +) + +# Load the data from the files +iris_rds <- dv.loader::load_rds(files = iris_rds_file) +iris_sas <- dv.loader::load_sas(files = iris_sas_file) +iris_xpt <- dv.loader::load_xpt(files = iris_xpt_file) diff --git a/tests/testthat/test-data_integrity.R b/tests/testthat/test-data_integrity.R new file mode 100644 index 0000000..a8cc040 --- /dev/null +++ b/tests/testthat/test-data_integrity.R @@ -0,0 +1,42 @@ +test_that( + desc = vdoc[["add_spec"]]( + desc = "Ensures that the data integrity is maintained across different file types and loading methods, comparing the loaded data against a known reference dataset.", + spec = specs$data_integrity + ), + code = { + # load_data(): check that the RDS file is loaded correctly + expect_equal( + object = iris_data_rds[["iris.rds"]], + expected = iris_data, + ignore_attr = TRUE + ) + + # load_data(): check that the SAS file is loaded correctly + expect_equal( + object = iris_data_sas[["iris.sas7bdat"]], + expected = iris_data, + ignore_attr = TRUE + ) + + # load_rds(): check that the RDS file is loaded correctly + expect_equal( + object = iris_rds[["iris.rds"]], + expected = iris_data, + ignore_attr = TRUE + ) + + # load_sas(): check that the SAS file is loaded correctly + expect_equal( + object = iris_sas[["iris.sas7bdat"]], + expected = iris_data, + ignore_attr = TRUE + ) + + # load_xpt(): check that the XPT file is loaded correctly + expect_equal( + object = iris_xpt[["iris.xpt"]], + expected = iris_data, + ignore_attr = TRUE + ) + } +) diff --git a/tests/testthat/test-default_dir.R b/tests/testthat/test-default_dir.R new file mode 100644 index 0000000..b9336fe --- /dev/null +++ b/tests/testthat/test-default_dir.R @@ -0,0 +1,43 @@ +test_that( + desc = vdoc[["add_spec"]]( + desc = "Verifies that load_data() can correctly locate and load data files using relative paths from the current working directory.", + spec = specs$default_dir + ), + code = { + # Save the current working directory + old_wd <- getwd() + + # Change the working directory to the temporary directory + setwd(temp_dir) + + lifecycle::expect_deprecated( + # load_data(): load the RDS file with use_wd = TRUE + data1 <- dv.loader::load_data( + sub_dir = ".", + file_names = "iris.rds", + use_wd = TRUE + ) + ) + + lifecycle::expect_deprecated( + # load_data(): load the SAS file with use_wd = TRUE + data2 <- dv.loader::load_data( + sub_dir = ".", + file_names = "iris.sas7bdat", + use_wd = TRUE + ) + ) + + # Expect that the RDS file is loaded + expect_named(data1, "iris.rds") + + # Expect that the SAS file is loaded + expect_named(data2, "iris.sas7bdat") + + # Expect that the two data sets are the same + expect_equal(data1, data2, ignore_attr = TRUE) + + # Set the working directory back to the original directory + setwd(old_wd) + } +) diff --git a/tests/testthat/test-file_extensions.R b/tests/testthat/test-file_extensions.R new file mode 100644 index 0000000..dd9a6dd --- /dev/null +++ b/tests/testthat/test-file_extensions.R @@ -0,0 +1,28 @@ +test_that( + desc = vdoc[["add_spec"]]( + desc = "appropriate error messages are thrown when attempting to load or read files without valid extensions", + spec = specs$file_extensions + ), + code = { + # load_data(): load data with no file extension + lifecycle::expect_deprecated( + dv.loader::load_data(sub_dir = ".", file_names = "iris") + ) |> + expect_error("file must either be RDS or SAS7BDAT") + + # Expected error message + error_msg <- "Assertion on 'file_ext != \"\"' failed: Must be TRUE." + + # load_rds(): expect an error when the file extension is empty + dv.loader::load_rds(files = iris_file) |> + expect_error(error_msg) + + # load_sas(): expect an error when the file extension is empty + dv.loader::load_sas(files = iris_file) |> + expect_error(error_msg) + + # load_xpt(): expect an error when the file extension is empty + dv.loader::load_xpt(files = iris_file) |> + expect_error(error_msg) + } +) diff --git a/tests/testthat/test-file_names.R b/tests/testthat/test-file_names.R new file mode 100644 index 0000000..4212ae9 --- /dev/null +++ b/tests/testthat/test-file_names.R @@ -0,0 +1,25 @@ +test_that( + desc = vdoc[["add_spec"]]( + desc = "appropriate error messages are thrown when the required 'file_names' argument is not provided", + spec = specs$file_names + ), + code = { + lifecycle::expect_deprecated( + # load_data(): expect an error when the file_names argument is missing + load_data(sub_dir = ".") + ) |> + expect_error('argument "file_names" is missing, with no default') + + # Expected error message + error_msg <- 'argument "files" is missing, with no default' + + # load_rds(): expect an error when the files argument is missing + expect_error(dv.loader::load_rds(), error_msg) + + # load_sas(): expect an error when the files argument is missing + expect_error(dv.loader::load_sas(), error_msg) + + # load_xpt(): expect an error when the files argument is missing + expect_error(dv.loader::load_xpt(), error_msg) + } +) diff --git a/tests/testthat/test-file_type.R b/tests/testthat/test-file_type.R new file mode 100644 index 0000000..59bec5b --- /dev/null +++ b/tests/testthat/test-file_type.R @@ -0,0 +1,25 @@ +test_that( + desc = vdoc[["add_spec"]]( + desc = "appropriate error messages are thrown when attempting to load or read file with types that are not supported.", + spec = specs$file_type + ), + code = { + # load_data(): expect an error when the file type is not supported + lifecycle::expect_deprecated( + dv.loader::load_data(sub_dir = ".", file_names = "iris.txt") + ) |> + expect_error("file must either be RDS or SAS7BDAT") + + # Expected error message + error_msg <- "Must be element of set \\{'rds','sas7bdat','xpt'\\}, but is 'txt'." + + # load_rds(): expect an error when the file extension is not supported + expect_error(dv.loader::load_rds(files = iris_txt_file), error_msg) + + # load_sas(): expect an error when the file extension is not supported + expect_error(dv.loader::load_sas(files = iris_txt_file), error_msg) + + # load_xpt(): expect an error when the file extension is not supported + expect_error(dv.loader::load_xpt(files = iris_txt_file), error_msg) + } +) diff --git a/tests/testthat/test-meta_data.R b/tests/testthat/test-meta_data.R new file mode 100644 index 0000000..227e5db --- /dev/null +++ b/tests/testthat/test-meta_data.R @@ -0,0 +1,42 @@ +test_that( + desc = vdoc[["add_spec"]]( + desc = "load_data(), load_rds(), load_sas(), and load_xpt() functions correctly extract and attach metadata of the file to the loaded data", + spec = specs$meta_data + ), + code = { + # Get the file info for the iris files + rds_file_info <- file.info(iris_rds_file, extra_cols = FALSE) + sas_file_info <- file.info(iris_sas_file, extra_cols = FALSE) + xpt_file_info <- file.info(iris_xpt_file, extra_cols = FALSE) + + # load_data(): check metadata for RDS file + expect_equal( + as.list(attr(iris_data_rds[["iris.rds"]], "meta")[names(rds_file_info)]), + as.list(rds_file_info) + ) + + # load_data(): check metadata for SAS file + expect_equal( + as.list(attr(iris_data_sas[["iris.sas7bdat"]], "meta")[names(sas_file_info)]), + as.list(sas_file_info) + ) + + # load_rds(): check metadata for RDS file + expect_equal( + as.list(attr(iris_rds[["iris.rds"]], "meta")[names(rds_file_info)]), + as.list(rds_file_info) + ) + + # load_sas(): check metadata for SAS file + expect_equal( + as.list(attr(iris_sas[["iris.sas7bdat"]], "meta")[names(sas_file_info)]), + as.list(sas_file_info) + ) + + # load_xpt(): check metadata for XPT file + expect_equal( + as.list(attr(iris_xpt[["iris.xpt"]], "meta")[names(xpt_file_info)]), + as.list(xpt_file_info) + ) + } +) diff --git a/tests/testthat/test-prefer_sas.R b/tests/testthat/test-prefer_sas.R new file mode 100644 index 0000000..837ba34 --- /dev/null +++ b/tests/testthat/test-prefer_sas.R @@ -0,0 +1,37 @@ +test_that( + desc = vdoc[["add_spec"]]( + desc = "load_data() loads the RDS file if prefer_sas is FALSE, and it loads the SAS file if prefer_sas is TRUE.", + spec = specs$prefer_sas + ), + code = { + # load_data(): load the RDS file with prefer_sas = FALSE + lifecycle::expect_deprecated( + data_rds <- dv.loader::load_data( + sub_dir = ".", + file_names = "iris.rds", + prefer_sas = FALSE + ) + ) + + # load_data(): load the SAS file with prefer_sas = TRUE + lifecycle::expect_deprecated( + data_sas <- load_data( + sub_dir = ".", + file_names = "iris.sas7bdat", + prefer_sas = TRUE + ) + ) + + # Get metadata for RDS file + meta_rds <- attr(data_rds[["iris.rds"]], "meta") + + # Get metadata for SAS file + meta_sas <- attr(data_sas[["iris.sas7bdat"]], "meta") + + # Check if the correct RDS file is loaded + expect_equal(basename(meta_rds[["path"]]), "iris.rds") + + # Check if the correct file is loaded + expect_equal(basename(meta_sas[["path"]]), "iris.sas7bdat") + } +) diff --git a/tests/testthat/tests.R b/tests/testthat/tests.R deleted file mode 100644 index d5e5747..0000000 --- a/tests/testthat/tests.R +++ /dev/null @@ -1,207 +0,0 @@ -test_that( - "defaults to using the working directory when sub_dir arg is NULL" %>% - vdoc[["add_spec"]](specs$default_dir), - { - expect_error( - load_data(file_names = local_file_names) - ) - } -) - -test_that( - "throws an error if you don't provide 'file_names'" %>% - vdoc[["add_spec"]](specs$file_names), - { - expect_error( - load_data(file_names = NULL) - ) - } -) - -test_that( - "throws an error if you provide a file type which is not supported" %>% - vdoc[["add_spec"]](specs$file_type), - { - expect_error( - load_data( - sub_dir = local_test_path, - file_names = "bad_file_type" - ) - ) - } -) - -test_that( - "does not throw an error if provided valid extensions" %>% - vdoc[["add_spec"]](specs$file_extensions), - { - expect_error( - load_data( - sub_dir = local_test_path, - file_names = "dummyads1.RDS", - use_wd = TRUE - ), - NA - ) - } -) - - -test_that( - "can mix file_names with valid extensions" %>% - vdoc[["add_spec"]](specs$file_extensions), - { - actual <- load_data( - sub_dir = local_test_path, - file_names = c("dummyads1.RDS", "dummyads1.sas7bdat"), - use_wd = TRUE - ) - actual <- c( - tools::file_ext(attributes(actual[[1]])$meta$path), - tools::file_ext(attributes(actual[[2]])$meta$path) - ) - expected <- c("RDS", "sas7bdat") - expect_equal(actual, expected) - } -) - -test_that( - "can mix file_names with and without valid extensions" %>% - vdoc[["add_spec"]](specs$file_extensions), - { - expect_error( - load_data( - sub_dir = local_test_path, - file_names = c("dummyads1", "dummyads2.RDS"), - use_wd = TRUE - ), - NA - ) - } -) - -test_that( - "does not throw an error if you provide valid extensions" %>% - vdoc[["add_spec"]](specs$file_extensions), - { - expect_error( - load_data( - sub_dir = local_test_path, - file_names = c("bad_file_type.txt"), - use_wd = TRUE - ) - ) - expect_error( - load_data( - sub_dir = local_test_path, - file_names = c("bad_file_type.myrds"), - use_wd = TRUE - ) - ) - } -) - -test_that( - "maintains integrity of data from producing system to consuming system" %>% - vdoc[["add_spec"]](specs$data_integrity), - { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE, - prefer_sas = TRUE - ) - - actual <- actual[[1]] - - attr(actual, "meta") <- NULL - attr(actual, "label") <- "dummyads2" - - expected <- haven::read_sas(file.path(local_test_path, local_test_files[2])) - attr(expected, "label") <- "dummyads2" - - expect_equal( - actual, - expected - ) - } -) - -test_that( - "has correct metadata" %>% - vdoc[["add_spec"]](specs$meta_data), - { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE - ) - actual_meta <- attr(actual[[1]], "meta") - expect_equal( - c( - "size", "isdir", "mode", - "mtime", "ctime", "atime", - "path", "file_name" - ), - names(actual_meta) - ) - } -) - -test_that( - "loads an RDS file when prefer_sas is FALSE (default) and both SAS and RDS files exist" %>% - vdoc[["add_spec"]](specs$prefer_sas), - { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".RDS$", actual, ignore.case = FALSE), TRUE) - } -) - -test_that( - "loads a SAS file when prefer_sas is FALSE (default) and an RDS file doesn't exist" %>% - vdoc[["add_spec"]](specs$prefer_sas), - { - actual <- load_data( - sub_dir = file.path(local_test_path, "just_sas"), - file_names = local_file_names[2], - use_wd = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".sas7bdat$", actual, ignore.case = TRUE), TRUE) - } -) - -test_that( - "loads a SAS file when prefer_sas is TRUE and both SAS and RDS files exist" %>% - vdoc[["add_spec"]](specs$prefer_sas), - { - actual <- load_data( - sub_dir = local_test_path, - file_names = local_file_names[2], - use_wd = TRUE, - prefer_sas = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".sas7bdat$", actual), TRUE) - } -) - -test_that( - "loads an RDS file when prefer_sas is TRUE and a SAS file doesn't exist" %>% - vdoc[["add_spec"]](specs$prefer_sas), - { - actual <- load_data( - sub_dir = file.path(local_test_path, "just_rds"), - file_names = local_file_names[2], - use_wd = TRUE, - prefer_sas = TRUE - ) - actual <- attr(actual[[1]], "meta")[["path"]] - expect_equal(grepl(".RDS$", actual), TRUE) - } -) diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/integration-guide.Rmd b/vignettes/integration-guide.Rmd new file mode 100644 index 0000000..f6393de --- /dev/null +++ b/vignettes/integration-guide.Rmd @@ -0,0 +1,74 @@ +--- +title: "Integration Guide" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Integration Guide} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This vignette showcases the integration of the {dv.loader} package with other packages from the DaVinci framework. It guides you through the process of combining {dv.loader} with {dv.manager} and {dv.listings} to create a modular Shiny application. + +```{r, eval=FALSE} +# Install the {remotes} package if you haven't already +if (!require("remotes")) install.packages("remotes") + +# Define the packages to install +pkgs <- c("dv.loader", "dv.manager", "dv.listings") + +# Install the packages from GitHub +remotes::install_github(repo = paste0("Boehringer-Ingelheim/", pkgs)) +``` + +## Step 1: Load data files + +Use the `dv.loader::load_sas()` function to import two example SAS data files included in the `dv.loader` package. + +```{r, eval=FALSE} +# Define the directory containing the data files +data_dir <- system.file("extdata", "pharmaverseadam", package = "dv.loader") + +# Define the file names +file_names <- paste0(c("adsl", "adae"), ".sas7bdat") + +# Import the data files +data_list <- dv.loader::load_sas(files = file.path(data_dir, file_names)) + +# Remove file extensions from the dataset names +names(data_list) <- tools::file_path_sans_ext(names(data_list)) +``` + +## Step 2: Create a module list + +Utilize `dv.listings::mod_listings()` to generate a module that presents tabular views of the two datasets imported in the preceding step. + +```{r, eval=FALSE} +# Include a module containing two data listings +module_list <- list( + "Data Listings" = dv.listings::mod_listings( + module_id = "data_listings", + dataset_names = c("adsl", "adae") + ) +) +``` + +## Step 3: Run the DaVinci application + +Execute the `dv.manager::run_app()` function to initiate the DaVinci application, incorporating the data and module lists prepared in the preceding steps. + +```{r, eval=FALSE} +# Run the DaVinci application with data and module lists defined earlier +dv.manager::run_app( + data = list("pharmaverseadam" = data_list), + module_list = module_list, + filter_data = "adsl", + title = "DaVinci Application" +) +``` diff --git a/vignettes/loading-data-into-memory.Rmd b/vignettes/loading-data-into-memory.Rmd deleted file mode 100644 index 577da15..0000000 --- a/vignettes/loading-data-into-memory.Rmd +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: "Loading Data into Memory" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Loading Data into Memory} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -library(dv.loader) -``` - -Note: `use_wd = TRUE` can be used to source from local folder. Just set your working directory before using with `setwd()`, or use an explicit path in `sub_dir`. - -## Usage: `load_data()` - -```{r} -test_data_path <- "../tests/testthat/inst/extdata" -data_list <- load_data( - sub_dir = test_data_path, - file_names = "dummyads2", - use_wd = TRUE -) - - -class(data_list) - -class(data_list[["dummyads2"]]) - -head(data_list[["dummyads2"]]) -``` - -Get the dataframe's metadata through its attributes: - -```{r} -attr(data_list[["dummyads2"]], "meta") -``` diff --git a/vignettes/migration-guide.Rmd b/vignettes/migration-guide.Rmd new file mode 100644 index 0000000..5cbe1e1 --- /dev/null +++ b/vignettes/migration-guide.Rmd @@ -0,0 +1,118 @@ +--- +title: "Migration Guide" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Migration Guide} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +In version 3.0.0 of {dv.loader}, significant changes have been made to improve the data loading functionality. The `load_data()` function, which was previously used for multiple file types, has been deprecated. In its place, a set of specialized functions have been introduced to handle specific file types more efficiently. This change aims to simplify the data loading process and make it more intuitive for users. + +The new functions introduced in version 3.0.0 are: + +1. `load_rds()`: Specifically designed to load RDS files (.rds) +2. `load_sas()`: Dedicated to loading SAS data files (.sas7bdat) +3. `load_xpt()`: A new addition to support loading XPT files (.xpt), which wasn't available in previous versions + +These changes bring several benefits: + +- Improved clarity: Each function clearly indicates the type of file it's meant to handle. +- Enhanced performance: Specialized functions can be optimized for their specific file types. +- Extended functionality: Support for XPT files has been added, expanding the package's capabilities. + +## Migration Steps + +To migrate from the deprecated `load_data()` function to the new specialized functions in {dv.loader} 3.0.0, follow these steps: + +1. Identify the file types you're working with: + - RDS files (.rds) + - SAS data files (.sas7bdat) + - XPT files (.xpt) + +2. Replace `load_data()` calls with the appropriate new function: + - For RDS files: Use `load_rds()` + - For SAS data files: Use `load_sas()` + - For XPT files: Use `load_xpt()` + +3. Update function arguments: + - Remove arguments `sub_dir`, `file_names`, `use_wd`, and `prefer_sas`. + - Specify the path to the file(s) using the `files` argument in the new functions. + +Test your code thoroughly to ensure all data is loaded correctly. + +## Example + +Let's walk through a detailed example of migrating from the old `load_data()` function to the new specialized functions in {dv.loader} 3.0.0. This example will demonstrate how to load SAS data files (.sas7bdat) using both the old and new methods. + +### Set up the data directory + +First, we'll set up our data directory `data_dir` using a sample dataset provided by the {dv.loader} package. The `data_dir` is the path to the directory containing the data files and it will be used in both the old and new methods. + +```{r} +# Set the data directory +data_dir <- system.file("extdata", package = "dv.loader") +data_dir + +# List the files in the data directory +list.files(data_dir) +``` + +### Load data using the old method + +In the old method, we need to set the environment variable `RXD_DATA` to the path of the data directory. The `sub_dir` and `file_names` arguments are used to specify the subdirectory and file names within the data directory. The `use_wd` and `prefer_sas` argument are optional and can be used to specify the working directory and the preference for SAS data files. + +```{r} +# Set the RXD_DATA environment variable +Sys.setenv(RXD_DATA = data_dir) + +# Load the data using the old method +data_list1 <- dv.loader::load_data( + sub_dir = "pharmaverseadam", + file_names = c("adsl", "adae"), + use_wd = FALSE, + prefer_sas = TRUE +) + +# Check the names of the data list +names(data_list1) + +# Check the contents of the adsl data +data_list1[["adsl"]] + +# Check the metadata of the adsl data +attr(data_list1[["adsl"]], "meta") +``` + +### Load data using the new method + +In the new method, we need to specify the path to the file(s) using the `files` argument. File extension is not required in the file name to load the corresponding data. Unlike the old method, there is no need to set the environment variable `RXD_DATA`. + +```{r} +# Set the files argument +files <- file.path(data_dir, "pharmaverseadam", c("adsl.sas7bdat", "adae.sas7bdat")) +files + +# Load the data using the new method +data_list2 <- dv.loader::load_sas(files = files) + +# Check the names of the data list +names(data_list2) + +# Check the contents of the adsl data +data_list2[["adsl.sas7bdat"]] + +# Check the metadata of the adsl data +attr(data_list2[["adsl.sas7bdat"]], "meta") + +# Remove the file extension from the names if needed +names(data_list2) <- tools::file_path_sans_ext(names(data_list2)) +names(data_list2) +```