From cbf145f54aeff9b957a56d2d85c02a9879f89a06 Mon Sep 17 00:00:00 2001 From: jgilber2 Date: Wed, 14 Aug 2024 12:41:13 -0700 Subject: [PATCH] New branch for testing patch --- DESCRIPTION | 2 +- R/RunDiagnostics.R | 104 ++++++++++++----------- tests/testthat/test-1-ResultsDataModel.R | 4 +- tests/testthat/test-2-againstCdm.R | 6 +- 4 files changed, 61 insertions(+), 55 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 52d02780e..e13a4810d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,7 @@ Imports: SqlRender (>= 1.9.0), stringr, tidyr (>= 1.2.0), - CohortGenerator (>= 0.8.0), + CohortGenerator (>= 0.10.0), remotes, scales Suggests: diff --git a/R/RunDiagnostics.R b/R/RunDiagnostics.R index 0f0f8cab8..d93c02ba7 100644 --- a/R/RunDiagnostics.R +++ b/R/RunDiagnostics.R @@ -136,11 +136,11 @@ getDefaultCovariateSettings <- function() { #' @param incremental Create only cohort diagnostics that haven't been created before? #' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where records are kept #' of which cohort diagnostics has been executed. -#' @param runOnSample Logical. If TRUE, the function will operate on a sample of the data. +#' @param runFeatureExtractionOnSample Logical. If TRUE, the function will operate on a sample of the data. #' Default is FALSE, meaning the function will operate on the full data set. #' -#' @param sampleN Integer. The number of records to include in the sample if runOnSample is TRUE. -#' Default is 1000. Ignored if runOnSample is FALSE. +#' @param sampleN Integer. The number of records to include in the sample if runFeatureExtractionOnSample is TRUE. +#' Default is 1000. Ignored if runFeatureExtractionOnSample is FALSE. #' #' @param seed Integer. The seed for the random number generator used to create the sample. #' This ensures that the same sample can be drawn again in future runs. Default is 64374. @@ -228,7 +228,7 @@ executeDiagnostics <- function(cohortDefinitionSet, irWashoutPeriod = 0, incremental = FALSE, incrementalFolder = file.path(exportFolder, "incremental"), - runOnSample = FALSE, + runFeatureExtractionOnSample = FALSE, sampleN = 1000, seed = 64374, seedArgs = NULL, @@ -250,7 +250,7 @@ executeDiagnostics <- function(cohortDefinitionSet, incremental = callingArgs$incremental, temporalCovariateSettings = callingArgs$temporalCovariateSettings ) %>% - RJSONIO::toJSON(digits = 23, pretty = TRUE) + RJSONIO::toJSON(digits = 23, pretty = TRUE) exportFolder <- normalizePath(exportFolder, mustWork = FALSE) incrementalFolder <- normalizePath(incrementalFolder, mustWork = FALSE) @@ -279,25 +279,25 @@ executeDiagnostics <- function(cohortDefinitionSet, errorMessage <- checkmate::makeAssertCollection() checkmate::assertList(cohortTableNames, null.ok = FALSE, types = "character", add = errorMessage, names = "named") checkmate::assertNames(names(cohortTableNames), - must.include = c( - "cohortTable", - "cohortInclusionTable", - "cohortInclusionResultTable", - "cohortInclusionStatsTable", - "cohortSummaryStatsTable", - "cohortCensorStatsTable" - ), - add = errorMessage + must.include = c( + "cohortTable", + "cohortInclusionTable", + "cohortInclusionResultTable", + "cohortInclusionStatsTable", + "cohortSummaryStatsTable", + "cohortCensorStatsTable" + ), + add = errorMessage ) checkmate::assertDataFrame(cohortDefinitionSet, add = errorMessage) checkmate::assertNames(names(cohortDefinitionSet), - must.include = c( - "json", - "cohortId", - "cohortName", - "sql" - ), - add = errorMessage + must.include = c( + "json", + "cohortId", + "cohortName", + "sql" + ), + add = errorMessage ) cohortTable <- cohortTableNames$cohortTable @@ -474,17 +474,17 @@ executeDiagnostics <- function(cohortDefinitionSet, sort() cohortTableColumnNamesExpected <- getResultsDataModelSpecifications() %>% - dplyr::filter(.data$tableName == "cohort") %>% - dplyr::pull(.data$columnName) %>% - SqlRender::snakeCaseToCamelCase() %>% - sort() + dplyr::filter(.data$tableName == "cohort") %>% + dplyr::pull(.data$columnName) %>% + SqlRender::snakeCaseToCamelCase() %>% + sort() cohortTableColumnNamesRequired <- getResultsDataModelSpecifications() %>% - dplyr::filter(.data$tableName == "cohort") %>% - dplyr::filter(.data$isRequired == "Yes") %>% - dplyr::pull(.data$columnName) %>% - SqlRender::snakeCaseToCamelCase() %>% - sort() + dplyr::filter(.data$tableName == "cohort") %>% + dplyr::filter(.data$isRequired == "Yes") %>% + dplyr::pull(.data$columnName) %>% + SqlRender::snakeCaseToCamelCase() %>% + sort() expectedButNotObsevered <- setdiff(x = cohortTableColumnNamesExpected, y = cohortTableColumnNamesObserved) @@ -549,23 +549,6 @@ executeDiagnostics <- function(cohortDefinitionSet, } } - if (runOnSample & !isTRUE(attr(cohortDefinitionSet, "isSampledCohortDefinition"))) { - cohortDefinitionSet <- - CohortGenerator::sampleCohortDefinitionSet( - connection = connection, - cohortDefinitionSet = cohortDefinitionSet, - tempEmulationSchema = tempEmulationSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTableNames = cohortTableNames, - n = sampleN, - seed = seed, - seedArgs = seedArgs, - identifierExpression = sampleIdentifierExpression, - incremental = incremental, - incrementalFolder = incrementalFolder - ) - } - ## CDM source information---- timeExecution( exportFolder, @@ -871,17 +854,40 @@ executeDiagnostics <- function(cohortDefinitionSet, cohortIds, parent = "executeDiagnostics", expr = { + + feCohortDefinitionSet <- cohortDefinitionSet + feCohortTable <- cohortTable + + if (runFeatureExtractionOnSample & !isTRUE(attr(cohortDefinitionSet, "isSampledCohortDefinition"))) { + feCohortTable <- cohortTableNames$cohortSampleTable + feCohortDefinitionSet <- + CohortGenerator::sampleCohortDefinitionSet( + connection = connection, + cohortDefinitionSet = cohortDefinitionSet, + tempEmulationSchema = tempEmulationSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortTableNames = cohortTableNames, + n = sampleN, + seed = seed, + seedArgs = seedArgs, + identifierExpression = sampleIdentifierExpression, + incremental = incremental, + incrementalFolder = incrementalFolder + ) + } + + executeCohortCharacterization( connection = connection, databaseId = databaseId, exportFolder = exportFolder, cdmDatabaseSchema = cdmDatabaseSchema, cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, + cohortTable = feCohortTable, covariateSettings = temporalCovariateSettings, tempEmulationSchema = tempEmulationSchema, cdmVersion = cdmVersion, - cohorts = cohortDefinitionSet, + cohorts = feCohortDefinitionSet, cohortCounts = cohortCounts, minCellCount = minCellCount, instantiatedCohorts = instantiatedCohorts, diff --git a/tests/testthat/test-1-ResultsDataModel.R b/tests/testthat/test-1-ResultsDataModel.R index 07d71a30e..9c34d976c 100644 --- a/tests/testthat/test-1-ResultsDataModel.R +++ b/tests/testthat/test-1-ResultsDataModel.R @@ -123,7 +123,7 @@ VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient incremental = TRUE, incrementalFolder = file.path(folder, "incremental"), temporalCovariateSettings = temporalCovariateSettings, - runOnSample = TRUE + runFeatureExtractionOnSample = TRUE ) }, "CDM Source table has more than one record while only one is expected." @@ -149,7 +149,7 @@ VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient incremental = TRUE, incrementalFolder = file.path(folder, "incremental"), temporalCovariateSettings = temporalCovariateSettings, - runOnSample = TRUE + runFeatureExtractionOnSample = TRUE ) } diff --git a/tests/testthat/test-2-againstCdm.R b/tests/testthat/test-2-againstCdm.R index fa2ffeb15..ccac99aac 100644 --- a/tests/testthat/test-2-againstCdm.R +++ b/tests/testthat/test-2-againstCdm.R @@ -45,7 +45,7 @@ test_that("Cohort diagnostics in incremental mode", { incremental = TRUE, incrementalFolder = file.path(folder, "incremental"), temporalCovariateSettings = temporalCovariateSettings, - runOnSample = TRUE + runFeatureExtractionOnSample = TRUE ) ) @@ -76,7 +76,7 @@ test_that("Cohort diagnostics in incremental mode", { incremental = TRUE, incrementalFolder = file.path(folder, "incremental"), temporalCovariateSettings = temporalCovariateSettings, - runOnSample = TRUE + runFeatureExtractionOnSample = TRUE ) ) # generate sqlite file @@ -123,7 +123,7 @@ test_that("Cohort diagnostics in incremental mode", { incremental = FALSE, incrementalFolder = file.path(folder, "incremental"), temporalCovariateSettings = temporalCovariateSettings, - runOnSample = TRUE + runFeatureExtractionOnSample = TRUE ) })