diff --git a/DESCRIPTION b/DESCRIPTION index 381f318..b713e75 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metacore Title: A Centralized Metadata Object Focus on Clinical Trial Data Programming Workflows -Version: 0.0.1.0000 +Version: 0.0.1.1000 Authors@R: c(person(given = "Christina", family = "Fillmore", diff --git a/NAMESPACE b/NAMESPACE index d8a0fe1..409364f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export("%>%") export(create_tbl) export(define_to_MetaCore) +export(get_control_term) export(load_metacore) export(metacore) export(metacore_example) @@ -70,7 +71,9 @@ importFrom(purrr,reduce) importFrom(readxl,excel_sheets) importFrom(readxl,read_excel) importFrom(rlang,"!!") +importFrom(rlang,as_label) importFrom(rlang,as_string) +importFrom(rlang,enexpr) importFrom(rlang,expr) importFrom(rlang,prim_name) importFrom(rlang,sym) diff --git a/NEWS.md b/NEWS.md index 1eda483..0c131bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,11 @@ This fixes the following issues: -- #16 the metacore function now accepts any empty datasets and creates an empty dataset with the correct column names and types -- #10 yn function checks for logicals and returns them -- #11 updated function description to make this clearer -- #12 updated regex so to [F|f]ormat so it can accept lower case -- #14 added supp_flag to ds_vars (on a side note we did a really good job with this it was super easy to change and only required a few edits) -- #15 modified create =tbl so if there are two potential matches in the same dataset and one is an exact match it uses that +- [#16](https://github.com/atorus-research/metacore/issues/16) the metacore function now accepts any empty datasets and creates an empty dataset with the correct column names and types +- [#10](https://github.com/atorus-research/metacore/issues/10) yn function checks for logicals and returns them +- [#11](https://github.com/atorus-research/metacore/issues/11) updated function description to make this clearer +- [#12](https://github.com/atorus-research/metacore/issues/12) updated regex so to [F|f]ormat so it can accept lower case +- [#14](https://github.com/atorus-research/metacore/issues/14) added supp_flag to ds_vars (on a side note we did a really good job with this it was super easy to change and only required a few edits) +- [#15](https://github.com/atorus-research/metacore/issues/15) modified create =tbl so if there are two potential matches in the same dataset and one is an exact match it uses that +Additionally, it adds the `get_control_term` function to get pull out the control term for a given variable. diff --git a/R/metacore.R b/R/metacore.R index 6b5e235..0386762 100644 --- a/R/metacore.R +++ b/R/metacore.R @@ -288,6 +288,61 @@ select_dataset <- function(.data, dataset, simplify = FALSE) { } + +#' Get Control Term +#' +#' Returns the control term (a vector for permitted values and a tibble for code +#' lists) for a given variable. The dataset can be optionally specified if there +#' is different control terminology for different datasets +#' +#' @param metacode metacore object +#' @param variable A variable name to get the controlled terms for. This can +#' either be a string or just the name of the variable +#' @param dataset A dataset name. This is not required if there is only one set +#' of control terminology across all datasets +#' +#' @return a vector for permitted values and a 2-column tibble for codelists +#' @export +#' +#' @importFrom rlang as_label enexpr +#' +#' @examples +#' meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx")) +#' get_control_term(meta_ex, QVAL, SUPPAE) +#' get_control_term(meta_ex, "QVAL", "SUPPAE") +get_control_term <- function(metacode, variable, dataset = NULL){ + var_str <- ifelse(mode(enexpr(variable)) == "character", + variable, as_label(enexpr(variable))) + dataset_val <- ifelse(mode(enexpr(dataset)) == "character", + dataset, as_label(enexpr(dataset))) # to make the filter more explicit + if(dataset_val == "NULL"){ + var_code_id <- metacode$value_spec %>% + filter(variable == var_str) %>% + pull(code_id) %>% + unique() + } else { + subset_data <- metacode$value_spec %>% + filter(dataset == dataset_val) + if(nrow(subset_data) == 0){ + stop(paste0(dataset_val, " not found in the value_spec table. Please check the dataset name")) + } + var_code_id <- subset_data %>% + filter(variable == var_str) %>% + pull(code_id) %>% + unique() + } + if(length(var_code_id) > 1){ + stop(paste0(var_str, " does not have a unique control term, consider spcificing a dataset")) + } + + metacode$codelist %>% + filter(code_id == var_code_id) %>% + pull(codes) %>% + .[[1]] +} + + + #' save metacore object #' #' @param metacore_object the metacore object in memory to save to disc diff --git a/README.Rmd b/README.Rmd index 88909f5..7b1bd3c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,12 +8,11 @@ output: github_document knitr::opts_chunk$set( collapse = TRUE, comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" + fig.path = "man/figures/README-" ) ``` -# metacore +# metacore [](https://RValidationHub.slack.com) @@ -55,7 +54,7 @@ Here is a schema of how all this fits together: ![](man/figures/schema-colors.png "man/figures/Metacore Schema") -### ds_spec +### ds_spec This table covers the basic information about each dataset. There is only a single row per dataset, with the following information: @@ -65,7 +64,7 @@ This table covers the basic information about each dataset. There is only a sing - *Label*: Dataset label -### ds_vars +### ds_vars This table contains the information that bridges between purely dataset level and purely variable level. There is one row per dataset per variable: @@ -83,7 +82,7 @@ This table contains the information that bridges between purely dataset level an - *supp_flag*: Logical to determine if the variable is in the supplementals -### var_spec +### var_spec This table contains the information the purely variable level information. The goal is there is a single row per variable, which is common across all datasets. This helps ensure variables follow the CDISC standard. But, this isn't always possible, so if information for a given variable differs across datasets, the variable will be recorded as dataset.variable in the variable column. @@ -99,7 +98,7 @@ This table contains the information the purely variable level information. The g - *format*: Variable format -### value_spec +### value_spec This table contains the information the information at the value level. There will be at least one row per dataset/variable combination. There is more than one row per dataset/variable combination if the combination has values which have differing metadata. For instance LBORRES that are different data types depending on the value. The information contained are as follows: @@ -117,7 +116,7 @@ This table contains the information the information at the value level. There wi - *derivation_id*: ID for the derivation to match with the **derivation** table -### derivation +### derivation This table has all the derivation information, with one row per derivation ID and the following information: @@ -125,7 +124,7 @@ This table has all the derivation information, with one row per derivation ID an - *derivation*: Text describing the derivation -### codelist +### codelist This table contains the code lists, permitted value lists, and external libraries nested within a tibble. There is only a single row per list/library, with the following information: diff --git a/README.md b/README.md index e618a7b..8a8454b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# metacore +# metacore @@ -54,13 +54,13 @@ normalize the information as much as possible, while keeping together like information. Each table has a basic theme to make them easier to remember. They are as follows: -- **ds\_spec**: Contains dataset level information +- **ds_spec**: Contains dataset level information -- **ds\_vars**: Bridges the dataset and variable level information +- **ds_vars**: Bridges the dataset and variable level information -- **var\_spec**: Contains variable level information +- **var_spec**: Contains variable level information -- **value\_spec**: Contains value level information +- **value_spec**: Contains value level information - **derivations**: Contains all derivations @@ -71,29 +71,29 @@ Here is a schema of how all this fits together: ![](man/figures/schema-colors.png "man/figures/Metacore Schema") -### ds\_spec +### ds_spec This table covers the basic information about each dataset. There is only a single row per dataset, with the following information: - *dataset*: The abbreviated name of the dataset (e.g. AE) -- *structure*: Value structure of the dataset as a sting +- *Structure*: Value structure of the dataset as a sting -- *label*: Dataset label +- *Label*: Dataset label -### ds\_vars +### ds_vars This table contains the information that bridges between purely dataset level and purely variable level. There is one row per dataset per variable: - *dataset*: The abbreviated name of the dataset. This will match to - the name in **ds\_spec** + the name in **ds_spec** - *variable*: Variable name -- *key\_seq*: Sequence key, which are the variables used to order a +- *key_seq*: Sequence key, which are the variables used to order a dataset. This is a column of integers, where 1 is the first sorting variable and 2 is the second etc. If the variable is not used in sorting it will be left `NA` @@ -108,7 +108,10 @@ variable: “Conditionally Expected”, or NA. For more information about core see [CDISC](https://www.cdisc.org/standards/foundational/adam) -### var\_spec +- *supp_flag*: Logical to determine if the variable is in the + supplementals + +### var_spec This table contains the information the purely variable level information. The goal is there is a single row per variable, which is @@ -118,9 +121,9 @@ variable differs across datasets, the variable will be recorded as dataset.variable in the variable column. - *variable*: Variable name, which should match the name in - **ds\_spec**. Unless the variable needs to be duplicated, then the + **ds_spec**. Unless the variable needs to be duplicated, then the name will be a combination of the the dataset name and variable name - from **ds\_spec** (dataset.variable) + from **ds_spec** (dataset.variable) - *type*: Variable class @@ -133,7 +136,7 @@ dataset.variable in the variable column. - *format*: Variable format -### value\_spec +### value_spec This table contains the information the information at the value level. There will be at least one row per dataset/variable combination. There @@ -143,40 +146,40 @@ different data types depending on the value. The information contained are as follows: - *dataset*: The abbreviated name of the dataset. This will match to - the name in **ds\_spec** + the name in **ds_spec** - *variable*: Variable name. This will match to the name in - **ds\_spec** + **ds_spec** - *type*: String of the value type - *origin*: Origin of the value -- *code\_id*: ID for the code list to match the id in the **codelist** +- *code_id*: ID for the code list to match the id in the **codelist** table - *where*: Value of the variable -- *derivation\_id*: ID for the derivation to match with the +- *derivation_id*: ID for the derivation to match with the **derivation** table -### derivation +### derivation This table has all the derivation information, with one row per derivation ID and the following information: -- *derivation\_id*: The ID, which should match to **value\_spec** +- *derivation_id*: The ID, which should match to **value_spec** - *derivation*: Text describing the derivation -### codelist +### codelist This table contains the code lists, permitted value lists, and external libraries nested within a tibble. There is only a single row per list/library, with the following information: -- *code\_id*: the ID used to identify the code list. This should be - the same as the *code\_id* in **val\_spec** +- *code_id*: the ID used to identify the code list. This should be the + same as the *code_id* in **val_spec** - *name*: Name of the code list diff --git a/man/get_control_term.Rd b/man/get_control_term.Rd new file mode 100644 index 0000000..3171bdd --- /dev/null +++ b/man/get_control_term.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metacore.R +\name{get_control_term} +\alias{get_control_term} +\title{Get Control Term} +\usage{ +get_control_term(metacode, variable, dataset = NULL) +} +\arguments{ +\item{metacode}{metacore object} + +\item{variable}{A variable name to get the controlled terms for. This can +either be a string or just the name of the variable} + +\item{dataset}{A dataset name. This is not required if there is only one set +of control terminology across all datasets} +} +\value{ +a vector for permitted values and a 2-column tibble for codelists +} +\description{ +Returns the control term (a vector for permitted values and a tibble for code +lists) for a given variable. The dataset can be optionally specified if there +is different control terminology for different datasets +} +\examples{ +meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx")) +get_control_term(meta_ex, QVAL, SUPPAE) +get_control_term(meta_ex, "QVAL", "SUPPAE") +} diff --git a/tests/testthat/test-metacore.R b/tests/testthat/test-metacore.R index 66d15e0..25565c3 100644 --- a/tests/testthat/test-metacore.R +++ b/tests/testthat/test-metacore.R @@ -132,3 +132,20 @@ test_that("load metacore fails with no path and rdss in wd", { ) unlink(my_temp_dir) }) + +test_that("pulling out control terminology works", { + test <- suppressWarnings( + spec_to_metacore(metacore_example("p21_mock.xlsx")) + ) + expect_error(get_control_term(test, QVAL)) + expect_error(get_control_term(test, QVAL, LB)) + expect_equal( + get_control_term(test, QVAL, SUPPAE), + tibble(code = c("N", "Y"), decode = c("No", "Yes")) + ) + expect_equal( + get_control_term(test, "QVAL", "SUPPAE"), + tibble(code = c("N", "Y"), decode = c("No", "Yes")) + ) + }) +