diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..805283a Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml index 0efd455..242e3d8 100644 --- a/.github/workflows/check-standard.yaml +++ b/.github/workflows/check-standard.yaml @@ -5,10 +5,12 @@ on: branches: - main - master + - dev pull_request: branches: - main - master + - dev name: R-CMD-check diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index ba1f94f..106bec0 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -3,10 +3,12 @@ on: branches: - main - master + - dev pull_request: branches: - main - master + - dev name: test-coverage diff --git a/DESCRIPTION b/DESCRIPTION index 79fbae5..483f190 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metacore Title: A Centralized Metadata Object Focus on Clinical Trial Data Programming Workflows -Version: 0.0.3 +Version: 0.0.4 Authors@R: c(person(given = "Christina", family = "Fillmore", diff --git a/NEWS.md b/NEWS.md index 73f6a0a..8789308 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,9 @@ +# Metacore 0.0.4 +- Adds a supp table to store the `idvar` and `qeval` information +- Adds `sig_dig`column to the `value_spec` table + + # Metacore 0.0.3 - Fixes bugs found in the `get_control_term` function and improves error messages for `get_control_term` - Improves internal naming consistency by renaming `define_to_MetaCore` to `define_to_metacore` diff --git a/R/metacore.R b/R/metacore.R index c4aded7..ce6b3da 100644 --- a/R/metacore.R +++ b/R/metacore.R @@ -1,7 +1,7 @@ #' This file includes the internal functions needed to create the readonly #' Metacore R6 object #' -#' @param ds_spec contians each dataset in the study, with the labels for each +#' @param ds_spec contains each dataset in the study, with the labels for each #' @param ds_vars information on what variables are in each dataset + plus #' dataset specific variable information #' @param var_spec variable information that is shared across all datasets @@ -10,12 +10,13 @@ #' @param derivations contains derivation, it allows for different variables to #' have the same derivation #' @param code_list contains the code/decode information +#' @param supp contains the idvar and qeval information for supplemental variables #' #' @family Metacore #' @noRd #' #' -MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist){ +MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp){ private$.ds_spec <- ds_spec %>% add_labs(dataset = "Dataset Name", @@ -58,6 +59,18 @@ MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivati type = "Code List/Permitted Values/External Library", codes = "List of Codes") + private$.codelist <- codelist %>% + add_labs(code_id = "ID of the Code List", + names = "Name of the Code List", + type = "Code List/Permitted Values/External Library", + codes = "List of Codes") + + private$.supp <- supp %>% + add_labs(dataset = "Dataset Name", + variable = "Variable Name", + idvar = "Identifying Variable", + qeval = "Evaluator") + self$validate() message("\n Metadata successfully imported") } @@ -85,18 +98,34 @@ MetaCore_print <- function(...){ MetaCore_validate <- function() { if(var_name_check(private)){ - check_columns(private$.ds_spec, - private$.ds_vars, - private$.var_spec, - private$.value_spec, - private$.derivations, - private$.codelist - ) + if(nrow(private$.ds_spec) == 0 & + nrow(private$.ds_vars) == 0 & + nrow(private$.var_spec) == 0 & + nrow(private$.value_spec) == 0 & + nrow(private$.derivations) == 0 & + nrow(private$.codelist) == 0 & + nrow(private$.supp) == 0 ){ + warning("Other checks were not preformed, because all datasets are empty", + call. = FALSE) + } else { + check_columns(private$.ds_spec, + private$.ds_vars, + private$.var_spec, + private$.value_spec, + private$.derivations, + private$.codelist, + private$.supp + ) + + ds_vars_check(private$.ds_vars, private$.var_spec) + value_check(private$.ds_vars, private$.value_spec) + derivation_check(private$.value_spec, private$.derivations) + codelist_check(private$.value_spec, private$.codelist) + if(nrow(private$.supp) == 0){ + supp_check(private$.ds_vars, private$.supp) + } - ds_vars_check(private$.ds_vars, private$.var_spec) - value_check(private$.ds_vars, private$.value_spec) - derivation_check(private$.value_spec, private$.derivations) - codelist_check(private$.value_spec, private$.codelist) + } } else { warning("Other checks were not preformed, because column names were incorrect", @@ -162,6 +191,8 @@ MetaCore_filter <- function(value) { private$.codelist <- private$.codelist %>% right_join(private$.value_spec %>% select(code_id) %>% na.omit(), by = "code_id") + + private$.supp <- private$.supp %>% filter(dataset == value) } #' The Metacore R6 Class @@ -180,22 +211,24 @@ MetaCore <- R6::R6Class("Metacore", metacore_filter = MetaCore_filter ), private = list( - .ds_spec = tibble(dataset = character(), label = character()), + .ds_spec = tibble(dataset = character(), structure = character(), label = character()), .ds_vars = tibble(dataset = character(), variable = character(), keep = logical(), key_seq = integer(), order = integer(), core = character(), supp_flag = logical()), - .var_spec = tibble(variable = character(), label = character(), length = integer()), + .var_spec = tibble(variable = character(), label = character(), length = integer(), + type = character(), common = character(), format = character()), .value_spec = tibble(dataset = character(), variable = character(), where = character(), type = character(), - codelist = character(), + sig_dig = integer(), + code_id = character(), origin = character(), derivation_id = integer()), .derivations = tibble(derivation_id = integer(), derivation = character()), # code_type == df | permitted_val | external_lib - .codelist = tibble(code_id = character(), code_type = character(), codelist = list()), - .change_log = tibble(table_chg = character(), column_chg = character(), what_chg = list()) + .codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()), + .supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character()) ), active = list( ds_spec = readonly('ds_spec'), @@ -204,7 +237,7 @@ MetaCore <- R6::R6Class("Metacore", value_spec = readonly('value_spec'), derivations = readonly('derivations'), codelist = readonly('codelist'), - changelog = readonly('changelog') + supp = readonly('supp') ) ) @@ -217,12 +250,29 @@ MetaCore <- R6::R6Class("Metacore", #' @param value_spec parameter specific information, as data is long the specs for wbc might be difference the hgb #' @param derivations contains derivation, it allows for different variables to have the same derivation #' @param codelist contains the code/decode information +#' @param supp contains the idvar and qeval information for supplemental variables #' #' @family Metacore #' #' @export #' -metacore <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) { +metacore <- function(ds_spec = tibble(dataset = character(), structure = character(), label = character()), + ds_vars = tibble(dataset = character(), variable = character(), keep = logical(), + key_seq = integer(), order = integer(), core = character(), + supp_flag = logical()), + var_spec = tibble(variable = character(), label = character(), length = integer(), + type = character(), common = character(), format = character()), + value_spec = tibble(dataset = character(), + variable = character(), + where = character(), + type = character(), + sig_dig = integer(), + code_id = character(), + origin = character(), + derivation_id = integer()), + derivations = tibble(derivation_id = integer(), derivation = character()), + codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()), + supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character())) { # Check if there are any empty datasets that need adding is_empty_df <- as.list(environment()) %>% keep(is.null) @@ -252,7 +302,7 @@ metacore <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codeli names(replaced) <- to_replace %>% map_chr(~unique(.$dataset)) list2env(replaced, environment()) } - MetaCore$new(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) + MetaCore$new(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp) } @@ -279,7 +329,8 @@ select_dataset <- function(.data, dataset, simplify = FALSE) { cl$var_spec, cl$value_spec, cl$derivations, - cl$codelist + cl$codelist, + cl$supp ) %>% reduce(left_join) ) diff --git a/R/spec_builder.R b/R/spec_builder.R index 089fed8..8efe855 100644 --- a/R/spec_builder.R +++ b/R/spec_builder.R @@ -314,6 +314,7 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d] "origin" = "[O|o]rigin", "type" = "[T|t]ype", "code_id" = "[C|c]odelist|Controlled Term", + "sig_dig" = "[S|s]ignificant", "where" = "[W|w]here", "derivation_id" = "[M|m]ethod"), sheet = NULL, @@ -321,17 +322,18 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d] where_cols = c("id" = "ID", "where" = c("Variable", "Comparator", "Value")), var_sheet = "[V|v]ar"){ - name_check <- names(cols) %in% c("variable", "origin", "code_id", + name_check <- names(cols) %in% c("variable", "origin", "code_id", "sig_dig", "type", "dataset", "where", "derivation_id") %>% all() if(!name_check| is.null(names(cols))){ stop("Supplied column vector must be named using the following names: - 'dataset', 'variable', 'origin', 'code_id', 'type', 'where', 'derivation_id' + 'dataset', 'variable', 'origin', 'code_id', 'type', 'where', 'sig_dig', 'derivation_id' If derivation_id is not avaliable it can be excluded and dataset.variable will be used. If the where information is on a seperate sheet, put the column with cross ref as where.") } + # Select a subset of sheets if specified if(!is.null(sheet)){ sheet_ls <- str_subset(names(doc), sheet) @@ -405,7 +407,8 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d] discard(~. %in% names(out)) out %>% `is.na<-`(missing) %>% - distinct() + distinct() %>% + mutate(sig_dig = as.integer(.data$sig_dig)) } diff --git a/R/validators.R b/R/validators.R index cfed20f..4dab67e 100644 --- a/R/validators.R +++ b/R/validators.R @@ -90,7 +90,7 @@ derivation_check <- function(value_spec, derivations){ variables <- not_in_val %>% pull(.data$variable) %>% str_c(collapse = "\n ") - message <- paste("The following variables are missing derivations:\n", + message <- paste("The following variables have derivation ids not found in the derivations table:\n", variables, "\n\n") warning(message, call. = FALSE) } @@ -126,7 +126,7 @@ codelist_check <- function(value_spec, codelist){ variables <- not_in_val %>% pull(.data$variable) %>% str_c(collapse = "\n ") - message <- paste("The following variables are missing codelist(s):\n", + message <- paste("The following variables hace code ids not found in the codelist(s):\n", variables, "\n") warning(message, call. = FALSE) } @@ -142,6 +142,53 @@ codelist_check <- function(value_spec, codelist){ } } + +#' Check Supp +#' +#' +#' Check the supp table works with the ds_var tables. All variables in the +#' ds_var with a TRUE supp flag should be in the supp and all variables in supp +#' should be in ds_vars +#' @param ds_vars ds_vars table +#' @param supp supp table +#' +#' @return writes warning to console if there is an issue +#' @noRd +supp_check <- function(ds_vars, supp){ + dist_test <- supp %>% + distinct(.data$dataset, .data$variable) %>% + nrow() == nrow(supp) + if(!dist_test){ + warning("Supp table contains non-unique dataset/variable combinations") + } + + ds_vars <- ds_vars %>% + filter(.data$supp_flag) + + #Check the variables in ds_vars that don't have value specs + not_in_supp <- anti_join(ds_vars, supp, by = c("dataset", "variable")) + if(nrow(not_in_supp) != 0){ + variables <- not_in_supp %>% + mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>% + pull(.data$full) %>% + str_c(collapse = ", ") + message <- paste("The following variables are in the ds_vars table and tagged as supplement, but don't have supp specs:\n", + variables, "\n\n") + warning(message, call. = FALSE) + } + # Check the variables in value spec that aren't in ds_vars + not_in_ds <- anti_join(supp, ds_vars, by = c("dataset", "variable")) + if(nrow(not_in_ds) != 0){ + variables <- not_in_ds %>% + pull(.data$variable) %>% + str_c(collapse = ", ") + message <- paste("The following variables are have supp specifications, but aren't in the ds_vars table:\n", + variables, "\n\n") + warning(message, call. = FALSE) + } +} + + #' Column Names by dataset #' #' @return list of column names by dataset @@ -150,10 +197,10 @@ col_vars <- function(){ list(.ds_spec = c("dataset", "structure", "label"), .ds_vars = c("dataset", "variable", "key_seq", "order","keep", "core", "supp_flag"), .var_spec = c("variable", "length", "label", "type", "common", "format"), - .value_spec = c("type", "origin", "code_id", "dataset", "variable", "where", "derivation_id"), + .value_spec = c("dataset", "variable", "type", "origin","sig_dig", "code_id", "where", "derivation_id"), .derivations = c("derivation_id", "derivation"), .codelist= c("code_id", "name","type", "codes"), - .change_log = c("table_chg", "column_chg", "what_chg")) + .supp = c("dataset", "variable", "idvar", "qeval")) } @@ -223,6 +270,7 @@ all_message <- function() { "var_spec", "format", is.character, TRUE, "var_spec", "common", is.logical, TRUE, "value_spec", "type", is.character, TRUE, + "value_spec", "sig_dig", is.integer, TRUE, "value_spec", "origin", is.character, TRUE, "value_spec", "code_id", is.character, TRUE, "value_spec", "dataset", is.character, FALSE, @@ -234,6 +282,10 @@ all_message <- function() { "codelist", "name", is.character, TRUE, "codelist", "codes", function(x){!is.null(x)}, TRUE, "codelist", "type", is.character, TRUE, + "supp", "dataset", is.character, FALSE, + "supp", "variable", is.character, FALSE, + "supp", "idvar", is.character, TRUE, + "supp", "qeval", is.character, TRUE, ) } @@ -248,8 +300,9 @@ all_message <- function() { #' @param value_spec value specification #' @param derivations derivation information #' @param codelist codelist information +#' @param supp supp information #' -check_columns <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) { +check_columns <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp) { messages <- purrr::pmap(all_message(), diff --git a/R/xml_builders.R b/R/xml_builders.R index 83d7ebb..17aaf07 100644 --- a/R/xml_builders.R +++ b/R/xml_builders.R @@ -191,6 +191,8 @@ xml_to_value_spec <- function(doc) { var_info <- tibble( id = var_nodes %>% get_node_attr("OID"), type = var_nodes %>% get_node_attr("DataType"), + sig_dig = var_nodes %>% get_node_attr("SignificantDigits") %>% + as.integer(), origin = or_vec, code_id = code_id_vec ) %>% diff --git a/README.Rmd b/README.Rmd index 4078a82..7cdf44c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -50,6 +50,8 @@ A metacore object is made-up of 6 different tables, which are connected with a s - **codelist**: Contains information about code/decodes, permitted values and external libraries +- **supp**: Contains information specific to supplemental variables + Here is a schema of how all this fits together: ![](man/figures/schema-colors.png "man/figures/Metacore Schema") @@ -110,6 +112,8 @@ This table contains the information the information at the value level. There wi - *origin*: Origin of the value +- *sig_dig*: Significant digits of the value + - *code_id*: ID for the code list to match the id in the **codelist** table - *where*: Value of the variable @@ -138,8 +142,23 @@ This table contains the code lists, permitted value lists, and external librarie To see a metacore object in about please see our vignettes -![](man/figures/labeled-schema.png "man/figures/Metacore Schema") + +![](man/figures/labeled_schema.png "man/figures/Metacore Schema") + +### supp + +This table contains the information needed to create supplemental tables. If you want to add a variable which will go into a supplemental qualifier then you can create as normal (i.e. label information going to the **var_spec** table and derivation and origin going into the **value_spec** table), but you need to flag it as supplemental in the **ds_vars** table and add a row to the **supp** table. There is only a single row per dataset/variable, with the following information: + +- *dataset*: The abbreviated name of the dataset. This will match to the name in **ds_spec** + +- *variable*: Variable name. This will match to the name in **ds_spec** + +- *idvar*: ID variable used for the supplemental variable. Can be left missing if not needed + +- *qeval*: Evaluator for the supplemental variable + +To see a metacore object in about please see our vignettes ## Future Development -This is an alpha release of this package, so if you have ideas on future improvements please add them to the issue log. Additionally we are looking into creating an additional **changelog** table to track any changes to metacore objects. This would provide a robust and traceable solution to changing data requirements. +This is an alpha release of this package, so if you have ideas on future improvements please add them to the issue log. diff --git a/README.md b/README.md index 694c85b..20945bb 100644 --- a/README.md +++ b/README.md @@ -54,19 +54,21 @@ normalize the information as much as possible, while keeping together like information. Each table has a basic theme to make them easier to remember. They are as follows: - - **ds\_spec**: Contains dataset level information +- **ds\_spec**: Contains dataset level information - - **ds\_vars**: Bridges the dataset and variable level information +- **ds\_vars**: Bridges the dataset and variable level information - - **var\_spec**: Contains variable level information +- **var\_spec**: Contains variable level information - - **value\_spec**: Contains value level information +- **value\_spec**: Contains value level information - - **derivations**: Contains all derivations +- **derivations**: Contains all derivations - - **codelist**: Contains information about code/decodes, permitted +- **codelist**: Contains information about code/decodes, permitted values and external libraries +- **supp**: Contains information specific to supplemental variables + Here is a schema of how all this fits together: ![](man/figures/schema-colors.png "man/figures/Metacore Schema") @@ -76,11 +78,11 @@ Here is a schema of how all this fits together: This table covers the basic information about each dataset. There is only a single row per dataset, with the following information: - - *dataset*: The abbreviated name of the dataset (e.g. AE) +- *dataset*: The abbreviated name of the dataset (e.g. AE) - - *Structure*: Value structure of the dataset as a sting +- *Structure*: Value structure of the dataset as a sting - - *Label*: Dataset label +- *Label*: Dataset label ### ds\_vars @@ -88,27 +90,27 @@ This table contains the information that bridges between purely dataset level and purely variable level. There is one row per dataset per variable: - - *dataset*: The abbreviated name of the dataset. This will match to +- *dataset*: The abbreviated name of the dataset. This will match to the name in **ds\_spec** - - *variable*: Variable name +- *variable*: Variable name - - *key\_seq*: Sequence key, which are the variables used to order a +- *key\_seq*: Sequence key, which are the variables used to order a dataset. This is a column of integers, where 1 is the first sorting variable and 2 is the second etc. If the variable is not used in sorting it will be left `NA` - - *order*: Order sets the order of the columns to appear in the +- *order*: Order sets the order of the columns to appear in the dataset. This is also a numeric value - - *keep*: Logical value about if the variable needs to be kept +- *keep*: Logical value about if the variable needs to be kept - - *core*: ADaM core, which should be one of the following values: +- *core*: ADaM core, which should be one of the following values: “Expected”, “Required”, “Permissible”, “Conditionally Required”, - “Conditionally Expected”, or NA. For more information about core - see [CDISC](https://www.cdisc.org/standards/foundational/adam) + “Conditionally Expected”, or NA. For more information about core see + [CDISC](https://www.cdisc.org/standards/foundational/adam) - - *supp\_flag*: Logical to determine if the variable is in the +- *supp\_flag*: Logical to determine if the variable is in the supplemental datasets ### var\_spec @@ -120,21 +122,21 @@ standard. But, this isn’t always possible, so if information for a given variable differs across datasets, the variable will be recorded as dataset.variable in the variable column. - - *variable*: Variable name, which should match the name in +- *variable*: Variable name, which should match the name in **ds\_spec**. Unless the variable needs to be duplicated, then the name will be a combination of the the dataset name and variable name from **ds\_spec** (dataset.variable) - - *type*: Variable class +- *type*: Variable class - - *length*: Variable length (while not relevant to R datasets, this is +- *length*: Variable length (while not relevant to R datasets, this is important for when creating XPT files) - - *label*: Variable label +- *label*: Variable label - - *common*: Common across ADaM datasets +- *common*: Common across ADaM datasets - - *format*: Variable format +- *format*: Variable format ### value\_spec @@ -145,22 +147,24 @@ has values which have differing metadata. For instance LBORRES that are different data types depending on the value. The information contained are as follows: - - *dataset*: The abbreviated name of the dataset. This will match to +- *dataset*: The abbreviated name of the dataset. This will match to the name in **ds\_spec** - - *variable*: Variable name. This will match to the name in +- *variable*: Variable name. This will match to the name in **ds\_vars** - - *type*: String of the value type +- *type*: String of the value type + +- *origin*: Origin of the value - - *origin*: Origin of the value +- *sig\_dig*: Significant digits of the value - - *code\_id*: ID for the code list to match the id in the **codelist** +- *code\_id*: ID for the code list to match the id in the **codelist** table - - *where*: Value of the variable +- *where*: Value of the variable - - *derivation\_id*: ID for the derivation to match with the +- *derivation\_id*: ID for the derivation to match with the **derivation** table ### derivation @@ -168,9 +172,9 @@ are as follows: This table has all the derivation information, with one row per derivation ID and the following information: - - *derivation\_id*: The ID, which should match to **value\_spec** +- *derivation\_id*: The ID, which should match to **value\_spec** - - *derivation*: Text describing the derivation +- *derivation*: Text describing the derivation ### codelist @@ -178,26 +182,46 @@ This table contains the code lists, permitted value lists, and external libraries nested within a tibble. There is only a single row per list/library, with the following information: - - *code\_id*: the ID used to identify the code list. This should be +- *code\_id*: the ID used to identify the code list. This should be the same as the *code\_id* in **val\_spec** - - *name*: Name of the code list +- *name*: Name of the code list - - *code*: A list of tibbles (for code / decode combinations) and +- *code*: A list of tibbles (for code / decode combinations) and vectors (for permitted values and libraries), which contain all the codes - - *type*: An indicator of if the information in the code column is a +- *type*: An indicator of if the information in the code column is a code/decode table, permitted value, or external library To see a metacore object in about please see our vignettes -![](man/figures/labeled-schema.png "man/figures/Metacore Schema") +![](man/figures/labeled_schema.png "man/figures/Metacore Schema") + +### supp + +This table contains the information needed to create supplemental +tables. If you want to add a variable which will go into a supplemental +qualifier then you can create as normal (i.e. label information going to +the **var\_spec** table and derivation and origin going into the +**value\_spec** table), but you need to flag it as supplemental in the +**ds\_vars** table and add a row to the **supp** table. There is only a +single row per dataset/variable, with the following information: + +- *dataset*: The abbreviated name of the dataset. This will match to + the name in **ds\_spec** + +- *variable*: Variable name. This will match to the name in + **ds\_spec** + +- *idvar*: ID variable used for the supplemental variable. Can be left + missing if not needed + +- *qeval*: Evaluator for the supplemental variable + +To see a metacore object in about please see our vignettes ## Future Development This is an alpha release of this package, so if you have ideas on future -improvements please add them to the issue log. Additionally we are -looking into creating an additional **changelog** table to track any -changes to metacore objects. This would provide a robust and traceable -solution to changing data requirements. +improvements please add them to the issue log. diff --git a/dev/to_make_rda_ex.R b/dev/to_make_rda_ex.R new file mode 100644 index 0000000..d589423 --- /dev/null +++ b/dev/to_make_rda_ex.R @@ -0,0 +1,77 @@ +library(safetyData) +library(dplyr) +library(admiral) +library(metatools) + +#Read from XML +doc <- xmlTreeParse(metacore_example("ADaM_define.xml"), useInternalNodes = TRUE) +ds_spec <- xml_to_ds_spec(doc) +ds_vars <- xml_to_ds_vars(doc) +var_spec <- xml_to_var_spec(doc) +value_spec <- xml_to_value_spec(doc) +code_list <- xml_to_codelist(doc) +derivations <- xml_to_derivations(doc) + +metacore<- metacore(ds_spec= ds_spec, ds_vars = ds_vars, var_spec = var_spec, + value_spec = value_spec, codelist = code_list, derivations = derivations) %>% + select_dataset("ADSL") + +adsl_preds <- build_from_derived(metacore, list("dm" = sdtm_dm), predecessor_only = FALSE, keep = TRUE, "ADSL") +# Pulling ct for DISCREAS from the cdisc pilot data +new_ct <- adsl_preds %>% + derive_disposition_reason( + dataset_ds = sdtm_ds, + new_var = DCSREAS, + reason_var = DSDECOD, + filter = DSCAT == "DISPOSITION EVENT" & DSDECOD != "SCREEN FAILURE" + ) %>% + pull(DCSREAS) %>% + unique() %>% + purrr::discard(is.na) %>% + tibble(code = ., decode = .) +code_list2 <- code_list %>% + mutate(codes = if_else(name == "DISCREAS", list(new_ct), codes)) + +metacore <- metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, code_list2) + +save(metacore, file = "inst/extdata/pilot_ADaM.rda") + + +### Create an SDTM sample file +doc <- xmlTreeParse(metacore_example("SDTM_define.xml"), useInternalNodes = TRUE) +ds_spec <- xml_to_ds_spec(doc) +ds_vars <- xml_to_ds_vars(doc) +var_spec <- xml_to_var_spec(doc) +value_spec <- xml_to_value_spec(doc) +code_list <- xml_to_codelist(doc) +derivations <- xml_to_derivations(doc) +supp <- tibble(dataset = character(), variable = character(), idvar = character(), qeval = character()) + +datasets <- c("sdtm_suppae", "sdtm_suppdm", + "sdtm_suppds", "sdtm_supplb") +# i <- datasets[1] +for(i in datasets){ + dat <- get(i) + + ds_vars <- dat %>% + distinct(dataset= RDOMAIN, variable = QNAM) %>% + mutate(supp_flag = TRUE) %>% + bind_rows(ds_vars, .) + + var_spec <- dat %>% + distinct(variable = QNAM, label = QLABEL) %>% + mutate(type = "text", length = 20) %>% + bind_rows(var_spec, . ) + + value_spec <- dat %>% + distinct(dataset= RDOMAIN, variable = QNAM, origin = QORIG) %>% + bind_rows(value_spec, . ) + supp <- dat %>% + distinct(dataset= RDOMAIN, variable = QNAM, idvar = IDVAR, qeval = QEVAL) %>% + bind_rows(supp, .) +} + + +metacore <- metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, code_list, supp) + +save(metacore, file = "inst/extdata/pilot_SDTM.rda") diff --git a/inst/.DS_Store b/inst/.DS_Store new file mode 100644 index 0000000..9e128af Binary files /dev/null and b/inst/.DS_Store differ diff --git a/inst/extdata/pilot_ADaM.rda b/inst/extdata/pilot_ADaM.rda index 695d474..d62bdde 100644 Binary files a/inst/extdata/pilot_ADaM.rda and b/inst/extdata/pilot_ADaM.rda differ diff --git a/inst/extdata/pilot_SDTM.rda b/inst/extdata/pilot_SDTM.rda new file mode 100644 index 0000000..aaeb546 Binary files /dev/null and b/inst/extdata/pilot_SDTM.rda differ diff --git a/man/.DS_Store b/man/.DS_Store new file mode 100644 index 0000000..578117d Binary files /dev/null and b/man/.DS_Store differ diff --git a/man/check_columns.Rd b/man/check_columns.Rd index 4cfc31d..001fae4 100644 --- a/man/check_columns.Rd +++ b/man/check_columns.Rd @@ -4,7 +4,15 @@ \alias{check_columns} \title{Check all data frames include the correct types of columns} \usage{ -check_columns(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) +check_columns( + ds_spec, + ds_vars, + var_spec, + value_spec, + derivations, + codelist, + supp +) } \arguments{ \item{ds_spec}{dataset specification} @@ -18,6 +26,8 @@ check_columns(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) \item{derivations}{derivation information} \item{codelist}{codelist information} + +\item{supp}{supp information} } \description{ This function checks for vector types and accepted words diff --git a/man/figures/.DS_Store b/man/figures/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/man/figures/.DS_Store differ diff --git a/man/figures/labeled-schema.png b/man/figures/labeled-schema.png deleted file mode 100644 index 68d1bc3..0000000 Binary files a/man/figures/labeled-schema.png and /dev/null differ diff --git a/man/figures/labeled-supp.png b/man/figures/labeled-supp.png new file mode 100644 index 0000000..6c9bcb9 Binary files /dev/null and b/man/figures/labeled-supp.png differ diff --git a/man/figures/labeled-value_spec.png b/man/figures/labeled-value_spec.png index 5468438..9d36ef5 100644 Binary files a/man/figures/labeled-value_spec.png and b/man/figures/labeled-value_spec.png differ diff --git a/man/figures/labeled_schema.png b/man/figures/labeled_schema.png index 8d7996d..72fce00 100644 Binary files a/man/figures/labeled_schema.png and b/man/figures/labeled_schema.png differ diff --git a/man/figures/schema-colors.png b/man/figures/schema-colors.png index 56aa343..5dfcab8 100644 Binary files a/man/figures/schema-colors.png and b/man/figures/schema-colors.png differ diff --git a/man/metacore.Rd b/man/metacore.Rd index ce23aed..d5a6e4c 100644 --- a/man/metacore.Rd +++ b/man/metacore.Rd @@ -4,7 +4,21 @@ \alias{metacore} \title{R6 Class wrapper to create your own metacore object} \usage{ -metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) +metacore( + ds_spec = tibble(dataset = character(), structure = character(), label = character()), + ds_vars = tibble(dataset = character(), variable = character(), keep = logical(), + key_seq = integer(), order = integer(), core = character(), supp_flag = logical()), + var_spec = tibble(variable = character(), label = character(), length = integer(), + type = character(), common = character(), format = character()), + value_spec = tibble(dataset = character(), variable = character(), where = + character(), type = character(), sig_dig = integer(), code_id = character(), origin = + character(), derivation_id = integer()), + derivations = tibble(derivation_id = integer(), derivation = character()), + codelist = tibble(code_id = character(), name = character(), type = character(), + codes = list()), + supp = tibble(dataset = character(), variable = character(), idvar = character(), + qeval = character()) +) } \arguments{ \item{ds_spec}{contains each dataset in the study, with the labels for each} @@ -18,6 +32,8 @@ metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist) \item{derivations}{contains derivation, it allows for different variables to have the same derivation} \item{codelist}{contains the code/decode information} + +\item{supp}{contains the idvar and qeval information for supplemental variables} } \description{ R6 Class wrapper to create your own metacore object diff --git a/man/spec_type_to_value_spec.Rd b/man/spec_type_to_value_spec.Rd index 13802a4..edcd102 100644 --- a/man/spec_type_to_value_spec.Rd +++ b/man/spec_type_to_value_spec.Rd @@ -8,7 +8,7 @@ spec_type_to_value_spec( doc, cols = c(dataset = "[D|d]ataset|[D|d]omain", variable = "[N|n]ame|[V|v]ariables?", origin = "[O|o]rigin", type = "[T|t]ype", code_id = "[C|c]odelist|Controlled Term", - where = "[W|w]here", derivation_id = "[M|m]ethod"), + sig_dig = "[S|s]ignificant", where = "[W|w]here", derivation_id = "[M|m]ethod"), sheet = NULL, where_sep_sheet = TRUE, where_cols = c(id = "ID", where = c("Variable", "Comparator", "Value")), diff --git a/tests/testthat/.DS_Store b/tests/testthat/.DS_Store new file mode 100644 index 0000000..7ddc22a Binary files /dev/null and b/tests/testthat/.DS_Store differ diff --git a/tests/testthat/test-metacore.R b/tests/testthat/test-metacore.R index ae30dea..655a1a6 100644 --- a/tests/testthat/test-metacore.R +++ b/tests/testthat/test-metacore.R @@ -12,7 +12,9 @@ dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% "value_spec", "derivations", "codelist", - "changelog")) + "supp")) +dfs$ds_vars <- dfs$ds_vars %>% + mutate(supp_flag = FALSE) # function from the withr package with_dir <- function (new, code) { @@ -48,7 +50,7 @@ test_that("readonly function factory", { }) test_that("metacore wrapper function works", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:6])) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) r6 <- suppressWarnings( MetaCore$new(dfs$ds_spec, @@ -56,7 +58,8 @@ test_that("metacore wrapper function works", { dfs$var_spec, dfs$value_spec, dfs$derivations, - dfs$codelist) + dfs$codelist, + dfs$supp) ) expect_equal(wrapper, r6) @@ -68,7 +71,7 @@ test_that("metacore wrapper function works", { test_that("Can pass metacore NULL df's", { wrapper <- suppressWarnings(metacore(dfs$ds_spec, NULL, dfs$var_spec, - dfs$value_spec, dfs$derivations, dfs$codelist)) + dfs$value_spec, dfs$derivations, dfs$codelist, dfs$supp)) dummy <- list(character(), character(), numeric(), numeric(), logical(), character(), logical()) names(dummy) <- c("dataset", "variable", "key_seq", "order", @@ -88,7 +91,7 @@ test_that("subsetting works", { }) test_that("save_metacore creates .rds with no file path", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:6])) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) my_temp_dir <- tempdir() with_dir(my_temp_dir, save_metacore(wrapper)) expect_true("wrapper.rds" %in% list.files(my_temp_dir)) @@ -96,7 +99,7 @@ test_that("save_metacore creates .rds with no file path", { }) test_that("save_metacore replaces file path", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:6])) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) my_temp_dir <- tempdir() save_metacore(wrapper, file.path(my_temp_dir, "wrapper.csv")) expect_true("wrapper.rds" %in% list.files(my_temp_dir)) @@ -104,7 +107,7 @@ test_that("save_metacore replaces file path", { }) test_that("save_metacore uses file path", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:6])) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) my_temp_dir <- tempdir() save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) expect_true("wrapper.rds" %in% list.files(my_temp_dir)) @@ -112,7 +115,7 @@ test_that("save_metacore uses file path", { }) test_that("load_metacore loads .rds", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:6])) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) my_temp_dir <- tempdir() save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) wrapper <- load_metacore(file.path(my_temp_dir, "wrapper.rds")) @@ -125,7 +128,7 @@ test_that("load metacore fails with no path", { }) test_that("load metacore fails with no path and rdss in wd", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:6])) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) my_temp_dir <- tempdir() save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) expect_error( diff --git a/tests/testthat/test-reader.R b/tests/testthat/test-reader.R index b731949..9044ba6 100644 --- a/tests/testthat/test-reader.R +++ b/tests/testthat/test-reader.R @@ -266,122 +266,123 @@ test_that("Test var_spec readers", { test_that("values_spec reader tests", { ref_value_spec <- tibble::tribble( - ~dataset, ~variable, ~type, ~origin, ~code_id, ~where, ~derivation_id, - "AE", "AEACN", "text", "Derived", NA, NA, "MT.AE.AEACN", - "AE", "AEBDSYCD", "integer", "Assigned", NA, NA, NA, - "AE", "AEBODSYS", "text", "Assigned", "CL.AEDICT", NA, NA, - "AE", "AEDECOD", "text", "Assigned", "CL.AEDICT", NA, NA, - "AE", "AEDTC", "date", "Derived", NA, NA, "MT.AE.AEDTC", - "AE", "AEDY", "integer", "Derived", NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "AE", "AEENDTC", "date", "CRF", NA, NA, NA, - "AE", "AEENDY", "integer", "Derived", NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "AE", "AEHLGT", "text", "Assigned", "CL.AEDICT", NA, NA, - "AE", "AEHLGTCD", "integer", "Assigned", NA, NA, NA, - "AE", "AEHLT", "text", "Assigned", "CL.AEDICT", NA, NA, - "AE", "AEHLTCD", "integer", "Assigned", NA, NA, NA, - "AE", "AELLT", "text", "Assigned", "CL.AEDICT", NA, NA, - "AE", "AELLTCD", "integer", "Assigned", NA, NA, NA, - "AE", "AEOUT", "text", "CRF", "CL.OUT", NA, NA, - "AE", "AEPTCD", "integer", "Assigned", NA, NA, NA, - "AE", "AEREL", "text", "CRF", "CL.AECAUS", NA, NA, - "AE", "AESCAN", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESCONG", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESDISAB", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESDTH", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESEQ", "integer", "Derived", NA, NA, "MT.AE.AESEQ", - "AE", "AESER", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESEV", "text", "CRF", "CL.SEV", NA, NA, - "AE", "AESHOSP", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESLIFE", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESOC", "text", "Assigned", "CL.AEDICT", NA, NA, - "AE", "AESOCCD", "integer", "Assigned", NA, NA, NA, - "AE", "AESOD", "text", "CRF", "CL.YN", NA, NA, - "AE", "AESPID", "text", "CRF", NA, NA, NA, - "AE", "AESTDTC", "date", "CRF", NA, NA, NA, - "AE", "AESTDY", "integer", "Derived", NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "AE", "AETERM", "text", "CRF", NA, NA, NA, - "AE", "DOMAIN", "text", "Assigned", NA, NA, NA, - "AE", "EPOCH", "text", "Derived", "CL.EPOCH", NA, "MT.AE.EPOCH", - "AE", "STUDYID", "text", "CRF", NA, NA, NA, - "AE", "USUBJID", "text", "Derived", NA, NA, "MT.AE.USUBJID", - "DM", "ACTARM", "text", "Derived", "CL.ARM", NA, "MT.DM.ACTARM", - "DM", "ACTARMCD", "text", "Derived", "CL.ARMCD", NA, "MT.DM.ACTARMCD", - "DM", "AGE", "integer", "Derived", NA, NA, "MT.DM.AGE", - "DM", "AGEU", "text", "Assigned", "CL.AGEU", NA, NA, - "DM", "ARM", "text", "Assigned", "CL.ARM", NA, NA, - "DM", "ARMCD", "text", "Assigned", "CL.ARMCD", NA, NA, - "DM", "COUNTRY", "text", "Derived", "CL.COUNTRY", NA, "MT.DM.COUNTRY", - "DM", "DMDTC", "date", "CRF", NA, NA, NA, - "DM", "DMDY", "integer", "Derived", NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "DM", "DOMAIN", "text", "Assigned", NA, NA, NA, - "DM", "DTHDTC", "datetime", "Derived", NA, NA, "MT.DM.DTHDTC", - "DM", "DTHFL", "text", "Derived", "CL.Y_BLANK", NA, "MT.DM.DTHFL", - "DM", "ETHNIC", "text", "Derived", "CL.ETHNIC", NA, "MT.DM.ETHNIC", - "DM", "RACE", "text", "CRF", "CL.RACE", NA, NA, - "DM", "RFENDTC", "date", "Derived", NA, NA, "MT.DM.RFENDTC", - "DM", "RFICDTC", "datetime", "Derived", NA, NA, "MT.DM.RFICDTC", - "DM", "RFPENDTC", "datetime", "Derived", NA, NA, "MT.DM.RFPENDTC", - "DM", "RFSTDTC", "date", "Derived", NA, NA, "MT.DM.RFSTDTC", - "DM", "RFXENDTC", "datetime", "Derived", NA, NA, "MT.DM.RFXENDTC", - "DM", "RFXSTDTC", "datetime", "Derived", NA, NA, "MT.DM.RFXSTDTC", - "DM", "SEX", "text", "CRF", "CL.SEX", NA, NA, - "DM", "SITEID", "text", "Assigned", NA, NA, NA, - "DM", "STUDYID", "text", "CRF", NA, NA, NA, - "DM", "SUBJID", "text", "CRF", NA, NA, NA, - "DM", "USUBJID", "text", "Derived", NA, NA, "MT.DM.USUBJID", - "EX", "DOMAIN", "text", "Assigned", NA, NA, NA, - "EX", "EPOCH", "text", "Derived", "CL.EPOCH", NA, "MT.EX.EPOCH", - "EX", "EXDOSE", "integer", "eDT", NA, NA, NA, - "EX", "EXDOSFRM", "text", "eDT", "CL.EXDOSFRM", NA, NA, - "EX", "EXDOSFRQ", "text", "eDT", "CL.EXFREQ", NA, NA, - "EX", "EXDOSU", "text", "eDT", "CL.EXDOSEU", NA, NA, - "EX", "EXENDTC", "date", "CRF", NA, NA, NA, - "EX", "EXENDY", "integer", "Derived", NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "EX", "EXROUTE", "text", "eDT", "CL.EXROUTE", NA, NA, - "EX", "EXSEQ", "integer", "Derived", NA, NA, "MT.EX.EXSEQ", - "EX", "EXSTDTC", "date", "CRF", NA, NA, NA, - "EX", "EXSTDY", "integer", "Derived", NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "EX", "EXTRT", "text", "eDT", "CL.EXTRT", NA, NA, - "EX", "STUDYID", "text", "CRF", NA, NA, NA, - "EX", "USUBJID", "text", "Derived", NA, NA, "MT.EX.USUBJID", - "EX", "VISIT", "text", "CRF", "CL.VISIT", NA, NA, - "EX", "VISITDY", "integer", "Derived", NA, NA, "MT.EX.VISITDY", - "EX", "VISITNUM", "float", "CRF", "CL.VISITNUM", NA, NA, - "SUPPAE", "IDVAR", "text", "Assigned", NA, NA, NA, - "SUPPAE", "IDVARVAL", "text", "Derived", NA, NA, "MT.SUPPAE.IDVARVAL", - "SUPPAE", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, - "SUPPAE", "QLABEL", "text", "Assigned", NA, NA, NA, - "SUPPAE", "QNAM", "text", "Assigned", "CL.SUPPAE.QNAM", NA, NA, - "SUPPAE", "QORIG", "text", "Assigned", NA, NA, NA, - "SUPPAE", "QVAL", "text", "Derived", "CL.YN", "QNAM = 'TRTEMFL'", "MT.SUPPAE.QNAM.TRTEMFL", - "SUPPAE", "RDOMAIN", "text", "Assigned", NA, NA, NA, - "SUPPAE", "STUDYID", "text", "CRF", NA, NA, NA, - "SUPPAE", "USUBJID", "text", "Derived", NA, NA, "MT.SUPPAE.USUBJID", - "SUPPDM", "IDVAR", "text", "Assigned", NA, NA, NA, - "SUPPDM", "IDVARVAL", "text", "Assigned", NA, NA, NA, - "SUPPDM", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, - "SUPPDM", "QLABEL", "text", "Assigned", NA, NA, NA, - "SUPPDM", "QNAM", "text", "Assigned", "CL.SUPPDM.QNAM", NA, NA, - "SUPPDM", "QORIG", "text", "Assigned", NA, NA, NA, - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", "QNAM = 'COMPLT16'", "MT.SUPPDM.QNAM.COMPLT16", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", "QNAM = 'COMPLT24'", "MT.SUPPDM.QNAM.COMPLT24", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", "QNAM = 'COMPLT8'", "MT.SUPPDM.QNAM.COMPLT8", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", "QNAM = 'EFFICACY'", "MT.SUPPDM.QNAM.EFFICACY", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", "QNAM = 'SAFETY'", "MT.SUPPDM.QNAM.SAFETY", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", "QNAM = 'ITT'", "MT.SUPPDM.QNAM.ITT", - "SUPPDM", "RDOMAIN", "text", "Assigned", NA, NA, NA, - "SUPPDM", "STUDYID", "text", "CRF", NA, NA, NA, - "SUPPDM", "USUBJID", "text", "Derived", NA, NA, "MT.SUPPDM.USUBJID" + ~dataset, ~variable, ~type, ~origin, ~code_id, ~sig_dig, ~where, ~derivation_id, + "AE", "AEACN", "text", "Derived", NA, NA, NA, "MT.AE.AEACN", + "AE", "AEBDSYCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEBODSYS", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEDECOD", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEDTC", "date", "Derived", NA, NA, NA, "MT.AE.AEDTC", + "AE", "AEDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "AE", "AEENDTC", "date", "CRF", NA, NA, NA, NA, + "AE", "AEENDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "AE", "AEHLGT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEHLGTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEHLT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEHLTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AELLT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AELLTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEOUT", "text", "CRF", "CL.OUT", NA, NA, NA, + "AE", "AEPTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEREL", "text", "CRF", "CL.AECAUS", NA, NA, NA, + "AE", "AESCAN", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESCONG", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESDISAB", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESDTH", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESEQ", "integer", "Derived", NA, NA, NA, "MT.AE.AESEQ", + "AE", "AESER", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESEV", "text", "CRF", "CL.SEV", NA, NA, NA, + "AE", "AESHOSP", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESLIFE", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESOC", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AESOCCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AESOD", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESPID", "text", "CRF", NA, NA, NA, NA, + "AE", "AESTDTC", "date", "CRF", NA, NA, NA, NA, + "AE", "AESTDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "AE", "AETERM", "text", "CRF", NA, NA, NA, NA, + "AE", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, + "AE", "EPOCH", "text", "Derived", "CL.EPOCH", NA, NA, "MT.AE.EPOCH", + "AE", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "AE", "USUBJID", "text", "Derived", NA, NA, NA, "MT.AE.USUBJID", + "DM", "ACTARM", "text", "Derived", "CL.ARM", NA, NA, "MT.DM.ACTARM", + "DM", "ACTARMCD", "text", "Derived", "CL.ARMCD", NA, NA, "MT.DM.ACTARMCD", + "DM", "AGE", "integer", "Derived", NA, NA, NA, "MT.DM.AGE", + "DM", "AGEU", "text", "Assigned", "CL.AGEU", NA, NA, NA, + "DM", "ARM", "text", "Assigned", "CL.ARM", NA, NA, NA, + "DM", "ARMCD", "text", "Assigned", "CL.ARMCD", NA, NA, NA, + "DM", "COUNTRY", "text", "Derived", "CL.COUNTRY", NA, NA, "MT.DM.COUNTRY", + "DM", "DMDTC", "date", "CRF", NA, NA, NA, NA, + "DM", "DMDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "DM", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, + "DM", "DTHDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.DTHDTC", + "DM", "DTHFL", "text", "Derived", "CL.Y_BLANK", NA, NA, "MT.DM.DTHFL", + "DM", "ETHNIC", "text", "Derived", "CL.ETHNIC", NA, NA, "MT.DM.ETHNIC", + "DM", "RACE", "text", "CRF", "CL.RACE", NA, NA, NA, + "DM", "RFENDTC", "date", "Derived", NA, NA, NA, "MT.DM.RFENDTC", + "DM", "RFICDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFICDTC", + "DM", "RFPENDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFPENDTC", + "DM", "RFSTDTC", "date", "Derived", NA, NA, NA, "MT.DM.RFSTDTC", + "DM", "RFXENDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFXENDTC", + "DM", "RFXSTDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFXSTDTC", + "DM", "SEX", "text", "CRF", "CL.SEX", NA, NA, NA, + "DM", "SITEID", "text", "Assigned", NA, NA, NA, NA, + "DM", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "DM", "SUBJID", "text", "CRF", NA, NA, NA, NA, + "DM", "USUBJID", "text", "Derived", NA, NA, NA, "MT.DM.USUBJID", + "EX", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, + "EX", "EPOCH", "text", "Derived", "CL.EPOCH", NA, NA, "MT.EX.EPOCH", + "EX", "EXDOSE", "integer", "eDT", NA, NA, NA, NA, + "EX", "EXDOSFRM", "text", "eDT", "CL.EXDOSFRM", NA, NA, NA, + "EX", "EXDOSFRQ", "text", "eDT", "CL.EXFREQ", NA, NA, NA, + "EX", "EXDOSU", "text", "eDT", "CL.EXDOSEU", NA, NA, NA, + "EX", "EXENDTC", "date", "CRF", NA, NA, NA, NA, + "EX", "EXENDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "EX", "EXROUTE", "text", "eDT", "CL.EXROUTE", NA, NA, NA, + "EX", "EXSEQ", "integer", "Derived", NA, NA, NA, "MT.EX.EXSEQ", + "EX", "EXSTDTC", "date", "CRF", NA, NA, NA, NA, + "EX", "EXSTDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "EX", "EXTRT", "text", "eDT", "CL.EXTRT", NA, NA, NA, + "EX", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "EX", "USUBJID", "text", "Derived", NA, NA, NA, "MT.EX.USUBJID", + "EX", "VISIT", "text", "CRF", "CL.VISIT", NA, NA, NA, + "EX", "VISITDY", "integer", "Derived", NA, NA, NA, "MT.EX.VISITDY", + "EX", "VISITNUM", "float", "CRF", "CL.VISITNUM", 1L, NA, NA, + "SUPPAE", "IDVAR", "text", "Assigned", NA, NA, NA, NA, + "SUPPAE", "IDVARVAL", "text", "Derived", NA, NA, NA, "MT.SUPPAE.IDVARVAL", + "SUPPAE", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, NA, + "SUPPAE", "QLABEL", "text", "Assigned", NA, NA, NA, NA, + "SUPPAE", "QNAM", "text", "Assigned", "CL.SUPPAE.QNAM", NA, NA, NA, + "SUPPAE", "QORIG", "text", "Assigned", NA, NA, NA, NA, + "SUPPAE", "QVAL", "text", "Derived", "CL.YN", NA, "QNAM = 'TRTEMFL'", "MT.SUPPAE.QNAM.TRTEMFL", + "SUPPAE", "RDOMAIN", "text", "Assigned", NA, NA, NA, NA, + "SUPPAE", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "SUPPAE", "USUBJID", "text", "Derived", NA, NA, NA, "MT.SUPPAE.USUBJID", + "SUPPDM", "IDVAR", "text", "Assigned", NA, NA, NA, NA, + "SUPPDM", "IDVARVAL", "text", "Assigned", NA, NA, NA, NA, + "SUPPDM", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, NA, + "SUPPDM", "QLABEL", "text", "Assigned", NA, NA, NA, NA, + "SUPPDM", "QNAM", "text", "Assigned", "CL.SUPPDM.QNAM", NA, NA, NA, + "SUPPDM", "QORIG", "text", "Assigned", NA, NA, NA, NA, + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM = 'COMPLT16'", "MT.SUPPDM.QNAM.COMPLT16", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM = 'COMPLT24'", "MT.SUPPDM.QNAM.COMPLT24", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM = 'COMPLT8'", "MT.SUPPDM.QNAM.COMPLT8", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM = 'EFFICACY'", "MT.SUPPDM.QNAM.EFFICACY", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM = 'SAFETY'", "MT.SUPPDM.QNAM.SAFETY", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM = 'ITT'", "MT.SUPPDM.QNAM.ITT", + "SUPPDM", "RDOMAIN", "text", "Assigned", NA, NA, NA, NA, + "SUPPDM", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "SUPPDM", "USUBJID", "text", "Derived", NA, NA, NA, "MT.SUPPDM.USUBJID" ) # Read from define def_value_spec <- xml_to_value_spec(define) %>% - arrange(dataset, variable) + arrange(dataset, variable) %>% + select(dataset, variable, type, origin, code_id, sig_dig, where, derivation_id ) # Read from spec spec_value_spec <- spec_type_to_value_spec(spec) %>% arrange(dataset, variable) %>% - select(dataset, variable, type, origin, code_id, where, derivation_id) %>% + select(dataset, variable, type, origin, code_id, sig_dig, where, derivation_id) %>% #Fix naming as it is slightly different, but matches within metacore mutate(code_id = if_else(!is.na(code_id), paste0("CL.", code_id), code_id, NA_character_), derivation_id = if_else(!is.na(derivation_id), paste0("MT.", derivation_id), NA_character_), diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 6b02fad..2561cf5 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -49,5 +49,6 @@ test_that("add labels adds NULL to missing labels", { test_that("metacore example returns file options", { expect_equal(sort(metacore_example()), sort(c("ADaM_define.xml", "mock_spec.xlsx", "p21_mock.xlsx", "pilot_ADaM.rda", + "pilot_SDTM.rda", "SDTM_define.xml", "SDTM_spec_CDISC_pilot.xlsx"))) }) diff --git a/tests/testthat/test-validators.R b/tests/testthat/test-validators.R index 1b4fb22..2fa5a78 100644 --- a/tests/testthat/test-validators.R +++ b/tests/testthat/test-validators.R @@ -18,9 +18,9 @@ test_that("specific words and primitive columns fail when character", { "value_spec", "derivations", "codelist", - "changelog")) + "supp")) - expect_warning(do.call(check_columns, dfs[-7])) + expect_warning(do.call(check_columns, dfs)) }) @@ -33,9 +33,9 @@ test_that("NA columns fail", { "value_spec", "derivations", "codelist", - "changelog")) + "supp")) - expect_error(do.call(check_columns, dfs[-7])) + expect_error(do.call(check_columns, dfs)) }) @@ -48,17 +48,17 @@ test_that("NA columns fail", { "value_spec", "derivations", "codelist", - "changelog")) + "supp")) dfs$ds_spec$label <- NA - expect_warning(do.call(check_columns, dfs[-7])) + expect_warning(do.call(check_columns, dfs)) }) test_that("all_message dataframe contains 6 datasets", { expect_equal(all_message() %>% distinct(dataset) %>% - nrow(), 6) + nrow(), 7) }) test_that("check cross-reference tests", { @@ -69,7 +69,7 @@ test_that("check cross-reference tests", { "value_spec", "derivations", "codelist", - "changelog")) + "supp")) dfs$var_spec <- dfs$var_spec %>% mutate(variable = "B") @@ -77,7 +77,7 @@ test_that("check cross-reference tests", { mutate(derivation_id = "C") dfs$codelist <- dfs$codelist %>% mutate(code_id = "D") - expect_warning(do.call(metacore, dfs[1:6])) + expect_warning(do.call(metacore, dfs[1:7])) }) test_that("test for incorrect column names", { @@ -88,9 +88,9 @@ test_that("test for incorrect column names", { "value_spec", "derivations", "codelist", - "changelog")) + "supp")) dfs$codelist <- dfs$codelist %>% mutate(codelist2 = "A") - expect_warning(do.call(metacore, dfs[1:6])) + expect_warning(do.call(metacore, dfs[1:7])) }) diff --git a/vignettes/Building_Specification_Readers.Rmd b/vignettes/Building_Specification_Readers.Rmd index db74a38..3e57b50 100644 --- a/vignettes/Building_Specification_Readers.Rmd +++ b/vignettes/Building_Specification_Readers.Rmd @@ -43,7 +43,7 @@ As we can see, the mock spec we are using here doesn't match the format. Therefo Here is a schema of how all this fits together -![](../man/figures/labeled-schema.png "Metacore Schema") +![](../man/figures/labeled_schema.png "Metacore Schema") ds_spec is connected to ds_vars by the 'dataset' variable and ds_vars is connected to var_spec by the 'variable' variable, etc. For more information on the make-up of metacore objects please see the README.