diff --git a/DESCRIPTION b/DESCRIPTION
index 381f318..b713e75 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: metacore
Title: A Centralized Metadata Object Focus on Clinical Trial Data Programming Workflows
-Version: 0.0.1.0000
+Version: 0.0.1.1000
Authors@R:
c(person(given = "Christina",
family = "Fillmore",
diff --git a/NAMESPACE b/NAMESPACE
index d8a0fe1..409364f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,7 @@
export("%>%")
export(create_tbl)
export(define_to_MetaCore)
+export(get_control_term)
export(load_metacore)
export(metacore)
export(metacore_example)
@@ -70,7 +71,9 @@ importFrom(purrr,reduce)
importFrom(readxl,excel_sheets)
importFrom(readxl,read_excel)
importFrom(rlang,"!!")
+importFrom(rlang,as_label)
importFrom(rlang,as_string)
+importFrom(rlang,enexpr)
importFrom(rlang,expr)
importFrom(rlang,prim_name)
importFrom(rlang,sym)
diff --git a/NEWS.md b/NEWS.md
index 1eda483..0c131bc 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,10 +2,11 @@
This fixes the following issues:
-- #16 the metacore function now accepts any empty datasets and creates an empty dataset with the correct column names and types
-- #10 yn function checks for logicals and returns them
-- #11 updated function description to make this clearer
-- #12 updated regex so to [F|f]ormat so it can accept lower case
-- #14 added supp_flag to ds_vars (on a side note we did a really good job with this it was super easy to change and only required a few edits)
-- #15 modified create =tbl so if there are two potential matches in the same dataset and one is an exact match it uses that
+- [#16](https://github.com/atorus-research/metacore/issues/16) the metacore function now accepts any empty datasets and creates an empty dataset with the correct column names and types
+- [#10](https://github.com/atorus-research/metacore/issues/10) yn function checks for logicals and returns them
+- [#11](https://github.com/atorus-research/metacore/issues/11) updated function description to make this clearer
+- [#12](https://github.com/atorus-research/metacore/issues/12) updated regex so to [F|f]ormat so it can accept lower case
+- [#14](https://github.com/atorus-research/metacore/issues/14) added supp_flag to ds_vars (on a side note we did a really good job with this it was super easy to change and only required a few edits)
+- [#15](https://github.com/atorus-research/metacore/issues/15) modified create =tbl so if there are two potential matches in the same dataset and one is an exact match it uses that
+Additionally, it adds the `get_control_term` function to get pull out the control term for a given variable.
diff --git a/R/metacore.R b/R/metacore.R
index 6b5e235..0386762 100644
--- a/R/metacore.R
+++ b/R/metacore.R
@@ -288,6 +288,61 @@ select_dataset <- function(.data, dataset, simplify = FALSE) {
}
+
+#' Get Control Term
+#'
+#' Returns the control term (a vector for permitted values and a tibble for code
+#' lists) for a given variable. The dataset can be optionally specified if there
+#' is different control terminology for different datasets
+#'
+#' @param metacode metacore object
+#' @param variable A variable name to get the controlled terms for. This can
+#' either be a string or just the name of the variable
+#' @param dataset A dataset name. This is not required if there is only one set
+#' of control terminology across all datasets
+#'
+#' @return a vector for permitted values and a 2-column tibble for codelists
+#' @export
+#'
+#' @importFrom rlang as_label enexpr
+#'
+#' @examples
+#' meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx"))
+#' get_control_term(meta_ex, QVAL, SUPPAE)
+#' get_control_term(meta_ex, "QVAL", "SUPPAE")
+get_control_term <- function(metacode, variable, dataset = NULL){
+ var_str <- ifelse(mode(enexpr(variable)) == "character",
+ variable, as_label(enexpr(variable)))
+ dataset_val <- ifelse(mode(enexpr(dataset)) == "character",
+ dataset, as_label(enexpr(dataset))) # to make the filter more explicit
+ if(dataset_val == "NULL"){
+ var_code_id <- metacode$value_spec %>%
+ filter(variable == var_str) %>%
+ pull(code_id) %>%
+ unique()
+ } else {
+ subset_data <- metacode$value_spec %>%
+ filter(dataset == dataset_val)
+ if(nrow(subset_data) == 0){
+ stop(paste0(dataset_val, " not found in the value_spec table. Please check the dataset name"))
+ }
+ var_code_id <- subset_data %>%
+ filter(variable == var_str) %>%
+ pull(code_id) %>%
+ unique()
+ }
+ if(length(var_code_id) > 1){
+ stop(paste0(var_str, " does not have a unique control term, consider spcificing a dataset"))
+ }
+
+ metacode$codelist %>%
+ filter(code_id == var_code_id) %>%
+ pull(codes) %>%
+ .[[1]]
+}
+
+
+
#' save metacore object
#'
#' @param metacore_object the metacore object in memory to save to disc
diff --git a/README.Rmd b/README.Rmd
index 88909f5..7b1bd3c 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -8,12 +8,11 @@ output: github_document
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
- fig.path = "man/figures/README-",
- out.width = "100%"
+ fig.path = "man/figures/README-"
)
```
-# metacore
+# metacore
[](https://RValidationHub.slack.com)
@@ -55,7 +54,7 @@ Here is a schema of how all this fits together:
![](man/figures/schema-colors.png "man/figures/Metacore Schema")
-### ds_spec
+### ds_spec
This table covers the basic information about each dataset. There is only a single row per dataset, with the following information:
@@ -65,7 +64,7 @@ This table covers the basic information about each dataset. There is only a sing
- *Label*: Dataset label
-### ds_vars
+### ds_vars
This table contains the information that bridges between purely dataset level and purely variable level. There is one row per dataset per variable:
@@ -83,7 +82,7 @@ This table contains the information that bridges between purely dataset level an
- *supp_flag*: Logical to determine if the variable is in the supplementals
-### var_spec
+### var_spec
This table contains the information the purely variable level information. The goal is there is a single row per variable, which is common across all datasets. This helps ensure variables follow the CDISC standard. But, this isn't always possible, so if information for a given variable differs across datasets, the variable will be recorded as dataset.variable in the variable column.
@@ -99,7 +98,7 @@ This table contains the information the purely variable level information. The g
- *format*: Variable format
-### value_spec
+### value_spec
This table contains the information the information at the value level. There will be at least one row per dataset/variable combination. There is more than one row per dataset/variable combination if the combination has values which have differing metadata. For instance LBORRES that are different data types depending on the value. The information contained are as follows:
@@ -117,7 +116,7 @@ This table contains the information the information at the value level. There wi
- *derivation_id*: ID for the derivation to match with the **derivation** table
-### derivation
+### derivation
This table has all the derivation information, with one row per derivation ID and the following information:
@@ -125,7 +124,7 @@ This table has all the derivation information, with one row per derivation ID an
- *derivation*: Text describing the derivation
-### codelist
+### codelist
This table contains the code lists, permitted value lists, and external libraries nested within a tibble. There is only a single row per list/library, with the following information:
diff --git a/README.md b/README.md
index e618a7b..8a8454b 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-# metacore
+# metacore
@@ -54,13 +54,13 @@ normalize the information as much as possible, while keeping together
like information. Each table has a basic theme to make them easier to
remember. They are as follows:
-- **ds\_spec**: Contains dataset level information
+- **ds_spec**: Contains dataset level information
-- **ds\_vars**: Bridges the dataset and variable level information
+- **ds_vars**: Bridges the dataset and variable level information
-- **var\_spec**: Contains variable level information
+- **var_spec**: Contains variable level information
-- **value\_spec**: Contains value level information
+- **value_spec**: Contains value level information
- **derivations**: Contains all derivations
@@ -71,29 +71,29 @@ Here is a schema of how all this fits together:
![](man/figures/schema-colors.png "man/figures/Metacore Schema")
-### ds\_spec
+### ds_spec
This table covers the basic information about each dataset. There is
only a single row per dataset, with the following information:
- *dataset*: The abbreviated name of the dataset (e.g. AE)
-- *structure*: Value structure of the dataset as a sting
+- *Structure*: Value structure of the dataset as a sting
-- *label*: Dataset label
+- *Label*: Dataset label
-### ds\_vars
+### ds_vars
This table contains the information that bridges between purely dataset
level and purely variable level. There is one row per dataset per
variable:
- *dataset*: The abbreviated name of the dataset. This will match to
- the name in **ds\_spec**
+ the name in **ds_spec**
- *variable*: Variable name
-- *key\_seq*: Sequence key, which are the variables used to order a
+- *key_seq*: Sequence key, which are the variables used to order a
dataset. This is a column of integers, where 1 is the first sorting
variable and 2 is the second etc. If the variable is not used in
sorting it will be left `NA`
@@ -108,7 +108,10 @@ variable:
“Conditionally Expected”, or NA. For more information about core see
[CDISC](https://www.cdisc.org/standards/foundational/adam)
-### var\_spec
+- *supp_flag*: Logical to determine if the variable is in the
+ supplementals
+
+### var_spec
This table contains the information the purely variable level
information. The goal is there is a single row per variable, which is
@@ -118,9 +121,9 @@ variable differs across datasets, the variable will be recorded as
dataset.variable in the variable column.
- *variable*: Variable name, which should match the name in
- **ds\_spec**. Unless the variable needs to be duplicated, then the
+ **ds_spec**. Unless the variable needs to be duplicated, then the
name will be a combination of the the dataset name and variable name
- from **ds\_spec** (dataset.variable)
+ from **ds_spec** (dataset.variable)
- *type*: Variable class
@@ -133,7 +136,7 @@ dataset.variable in the variable column.
- *format*: Variable format
-### value\_spec
+### value_spec
This table contains the information the information at the value level.
There will be at least one row per dataset/variable combination. There
@@ -143,40 +146,40 @@ different data types depending on the value. The information contained
are as follows:
- *dataset*: The abbreviated name of the dataset. This will match to
- the name in **ds\_spec**
+ the name in **ds_spec**
- *variable*: Variable name. This will match to the name in
- **ds\_spec**
+ **ds_spec**
- *type*: String of the value type
- *origin*: Origin of the value
-- *code\_id*: ID for the code list to match the id in the **codelist**
+- *code_id*: ID for the code list to match the id in the **codelist**
table
- *where*: Value of the variable
-- *derivation\_id*: ID for the derivation to match with the
+- *derivation_id*: ID for the derivation to match with the
**derivation** table
-### derivation
+### derivation
This table has all the derivation information, with one row per
derivation ID and the following information:
-- *derivation\_id*: The ID, which should match to **value\_spec**
+- *derivation_id*: The ID, which should match to **value_spec**
- *derivation*: Text describing the derivation
-### codelist
+### codelist
This table contains the code lists, permitted value lists, and external
libraries nested within a tibble. There is only a single row per
list/library, with the following information:
-- *code\_id*: the ID used to identify the code list. This should be
- the same as the *code\_id* in **val\_spec**
+- *code_id*: the ID used to identify the code list. This should be the
+ same as the *code_id* in **val_spec**
- *name*: Name of the code list
diff --git a/man/get_control_term.Rd b/man/get_control_term.Rd
new file mode 100644
index 0000000..3171bdd
--- /dev/null
+++ b/man/get_control_term.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/metacore.R
+\name{get_control_term}
+\alias{get_control_term}
+\title{Get Control Term}
+\usage{
+get_control_term(metacode, variable, dataset = NULL)
+}
+\arguments{
+\item{metacode}{metacore object}
+
+\item{variable}{A variable name to get the controlled terms for. This can
+either be a string or just the name of the variable}
+
+\item{dataset}{A dataset name. This is not required if there is only one set
+of control terminology across all datasets}
+}
+\value{
+a vector for permitted values and a 2-column tibble for codelists
+}
+\description{
+Returns the control term (a vector for permitted values and a tibble for code
+lists) for a given variable. The dataset can be optionally specified if there
+is different control terminology for different datasets
+}
+\examples{
+meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx"))
+get_control_term(meta_ex, QVAL, SUPPAE)
+get_control_term(meta_ex, "QVAL", "SUPPAE")
+}
diff --git a/tests/testthat/test-metacore.R b/tests/testthat/test-metacore.R
index 66d15e0..25565c3 100644
--- a/tests/testthat/test-metacore.R
+++ b/tests/testthat/test-metacore.R
@@ -132,3 +132,20 @@ test_that("load metacore fails with no path and rdss in wd", {
)
unlink(my_temp_dir)
})
+
+test_that("pulling out control terminology works", {
+ test <- suppressWarnings(
+ spec_to_metacore(metacore_example("p21_mock.xlsx"))
+ )
+ expect_error(get_control_term(test, QVAL))
+ expect_error(get_control_term(test, QVAL, LB))
+ expect_equal(
+ get_control_term(test, QVAL, SUPPAE),
+ tibble(code = c("N", "Y"), decode = c("No", "Yes"))
+ )
+ expect_equal(
+ get_control_term(test, "QVAL", "SUPPAE"),
+ tibble(code = c("N", "Y"), decode = c("No", "Yes"))
+ )
+ })
+