diff --git a/R/Execution.R b/R/Execution.R index ad6272c9..a303cbec 100644 --- a/R/Execution.R +++ b/R/Execution.R @@ -96,53 +96,10 @@ execute <- function(analysisSpecifications, # modulesToExecute are present in the analysis specification # before attempting to subset the analyses to run. if (length(executionSettings$modulesToExecute) > 0) { - # Get the modules in the analysis specification with their - # index in the array - modulesWithIndex <- lapply( - X = seq_along(analysisSpecifications$moduleSpecifications), - FUN = function(i) { - list( - idx = i, - module = analysisSpecifications$moduleSpecifications[[i]]$module - ) - } - ) - modulesInAnalysisSpecification <- sapply( - X = modulesWithIndex, - FUN = function(x) { - x$module - } - ) - - modulesToExecuteString <- paste(executionSettings$modulesToExecute, collapse = ", ") - modulesInAnalysisSpecificationString <- paste(modulesInAnalysisSpecification, collapse = ", ") - - # Stop if we cannot find all of the requested modules - # to execute in the overall analysis specification - if (!all(tolower(executionSettings$modulesToExecute) %in% tolower(modulesInAnalysisSpecification))) { - errorMsg <- paste0( - "The executionSettings specified to run only the modules: ", - modulesToExecuteString, - ".\n However the analysis specification includes the following modules: ", - modulesInAnalysisSpecificationString - ) - stop(errorMsg) - } - - # Subset the analysis specifications to those modules - # specified by the user - cli::cli_alert_info(paste0("Runnning a subset of modules: ", modulesToExecuteString)) - moduleSubset <- unlist( - lapply( - X = modulesWithIndex, - FUN = function(x) { - if (tolower(x$module) %in% tolower(executionSettings$modulesToExecute)) { - return(x$idx) - } - } - ) + analysisSpecifications <- .subsetAnalysisSpecificationByModulesToExecute( + analysisSpecifications = analysisSpecifications, + modulesToExecute = executionSettings$modulesToExecute ) - analysisSpecifications$moduleSpecifications <- analysisSpecifications$moduleSpecifications[moduleSubset] } if (is(executionSettings, "CdmExecutionSettings")) { @@ -275,3 +232,54 @@ execute <- function(analysisSpecifications, error <- cli::combine_ansi_styles("red") cat(error(paste0("ERROR: ", message, "\n"))) } + +.subsetAnalysisSpecificationByModulesToExecute <- function(analysisSpecifications, modulesToExecute) { + # Get the modules in the analysis specification with their + # index in the array + modulesWithIndex <- lapply( + X = seq_along(analysisSpecifications$moduleSpecifications), + FUN = function(i) { + list( + idx = i, + module = analysisSpecifications$moduleSpecifications[[i]]$module + ) + } + ) + modulesInAnalysisSpecification <- sapply( + X = modulesWithIndex, + FUN = function(x) { + x$module + } + ) + + modulesToExecuteString <- paste(modulesToExecute, collapse = ", ") + modulesInAnalysisSpecificationString <- paste(modulesInAnalysisSpecification, collapse = ", ") + + # Stop if we cannot find all of the requested modules + # to execute in the overall analysis specification + if (!all(tolower(modulesToExecute) %in% tolower(modulesInAnalysisSpecification))) { + errorMsg <- paste0( + "The executionSettings specified to run only the modules: ", + modulesToExecuteString, + ".\n However the analysis specification includes the following modules: ", + modulesInAnalysisSpecificationString + ) + stop(errorMsg) + } + + # Subset the analysis specifications to those modules + # specified by the user + cli::cli_alert_info(paste0("Runnning a subset of modules: ", modulesToExecuteString)) + moduleSubset <- unlist( + lapply( + X = modulesWithIndex, + FUN = function(x) { + if (tolower(x$module) %in% tolower(modulesToExecute)) { + return(x$idx) + } + } + ) + ) + analysisSpecifications$moduleSpecifications <- analysisSpecifications$moduleSpecifications[moduleSubset] + return(analysisSpecifications) +} diff --git a/R/ResultDataModel.R b/R/ResultDataModel.R index 06cea307..465fd972 100644 --- a/R/ResultDataModel.R +++ b/R/ResultDataModel.R @@ -41,6 +41,18 @@ createResultDataModel <- function(analysisSpecifications, resultsDataModelSettings = resultsDataModelSettings ) + # Determine if the user has opted to subset to specific modules + # in the analysis specification. If so, validate that the + # modulesToExecute are present in the analysis specification + # before attempting to subset the analyses to run. + if (length(resultsDataModelSettings$modulesToExecute) > 0) { + analysisSpecifications <- .subsetAnalysisSpecificationByModulesToExecute( + analysisSpecifications = analysisSpecifications, + modulesToExecute = resultsDataModelSettings$modulesToExecute + ) + } + + for (i in 1:length(analysisSpecifications$moduleSpecifications)) { moduleName <- analysisSpecifications$moduleSpecifications[[i]]$module moduleObj <- get(moduleName)$new() @@ -77,6 +89,17 @@ uploadResults <- function(analysisSpecifications, resultsDataModelSettings = resultsDataModelSettings ) + # Determine if the user has opted to subset to specific modules + # in the analysis specification. If so, validate that the + # modulesToExecute are present in the analysis specification + # before attempting to subset the analyses to run. + if (length(resultsDataModelSettings$modulesToExecute) > 0) { + analysisSpecifications <- .subsetAnalysisSpecificationByModulesToExecute( + analysisSpecifications = analysisSpecifications, + modulesToExecute = resultsDataModelSettings$modulesToExecute + ) + } + for (i in 1:length(analysisSpecifications$moduleSpecifications)) { moduleName <- analysisSpecifications$moduleSpecifications[[i]]$module moduleObj <- get(moduleName)$new() diff --git a/R/Settings.R b/R/Settings.R index e1b77cc0..e7c52d97 100644 --- a/R/Settings.R +++ b/R/Settings.R @@ -258,8 +258,7 @@ createEmptyAnalysisSpecificiations <- function() { #' and attempt to pick up where they left off when this value is set to TRUE. #' @param maxCores The maximum number of processing cores to use for execution. The default is to #' use all available cores on the machine. -#' @param modulesToExecute (Optional) A vector with the list of modules to execute. When an empty vector/NULL is supplied (default), -#' all modules in the analysis specification are executed. +#' @template modulesToExecute #' #' @return #' An object of type `ExecutionSettings`. @@ -312,8 +311,7 @@ createCdmExecutionSettings <- function(workDatabaseSchema, #' in results. #' @param maxCores The maximum number of processing cores to use for execution. The default is to #' use all available cores on the machine. -#' @param modulesToExecute (Optional) A vector with the list of modules to execute. When an empty vector/NULL is supplied (default), -#' all modules in the analysis specification are executed. +#' @template modulesToExecute #' #' @return #' An object of type `ExecutionSettings`. @@ -358,6 +356,7 @@ createResultsExecutionSettings <- function(resultsDatabaseSchema, #' @template resultsDatabaseSchema #' @template resultsFolder #' @param logFileName Log location for data model operations +#' @template modulesToExecute #' #' @return #' An object of type `ResultsDataModelSettings` @@ -365,11 +364,13 @@ createResultsExecutionSettings <- function(resultsDatabaseSchema, #' @export createResultsDataModelSettings <- function(resultsDatabaseSchema, resultsFolder, - logFileName = file.path(resultsFolder, "strategus-results-data-model-log.txt")) { + logFileName = file.path(resultsFolder, "strategus-results-data-model-log.txt"), + modulesToExecute = c()) { errorMessages <- checkmate::makeAssertCollection() checkmate::assertCharacter(resultsDatabaseSchema, len = 1, add = errorMessages) checkmate::assertCharacter(resultsFolder, len = 1, add = errorMessages) checkmate::assertCharacter(logFileName, len = 1, add = errorMessages) + checkmate::assertVector(modulesToExecute, null.ok = TRUE, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) # Normalize paths to convert relative paths to absolute paths diff --git a/man-roxygen/modulesToExecute.R b/man-roxygen/modulesToExecute.R new file mode 100644 index 00000000..ac937b31 --- /dev/null +++ b/man-roxygen/modulesToExecute.R @@ -0,0 +1,2 @@ +#' @param modulesToExecute (Optional) A vector with the list of modules to execute. When an empty vector/NULL is supplied (default), +#' all modules in the analysis specification are executed. diff --git a/man/createResultsDataModelSettings.Rd b/man/createResultsDataModelSettings.Rd index 28afc453..6fafdadb 100644 --- a/man/createResultsDataModelSettings.Rd +++ b/man/createResultsDataModelSettings.Rd @@ -7,7 +7,8 @@ createResultsDataModelSettings( resultsDatabaseSchema, resultsFolder, - logFileName = file.path(resultsFolder, "strategus-results-data-model-log.txt") + logFileName = file.path(resultsFolder, "strategus-results-data-model-log.txt"), + modulesToExecute = c() ) } \arguments{ @@ -16,6 +17,9 @@ createResultsDataModelSettings( \item{resultsFolder}{The root folder holding the study results.} \item{logFileName}{Log location for data model operations} + +\item{modulesToExecute}{(Optional) A vector with the list of modules to execute. When an empty vector/NULL is supplied (default), +all modules in the analysis specification are executed.} } \value{ An object of type \code{ResultsDataModelSettings} diff --git a/tests/testthat/test-Execution.R b/tests/testthat/test-Execution.R index 064eb31c..365d5d7d 100644 --- a/tests/testthat/test-Execution.R +++ b/tests/testthat/test-Execution.R @@ -282,6 +282,49 @@ test_that("Specify subset of modules to run", { ) expect_true(all(modulesExecuted %in% modulesToExecute)) + + # Create a results DB and upload results + dbFilePath <- file.path(tempDir, "testdm.sqlite") + mydb <- dbConnect(RSQLite::SQLite(), dbFilePath) + dbDisconnect(mydb) + + withr::defer( + { + unlink(dbFilePath, recursive = TRUE, force = TRUE) + }, + testthat::teardown_env() + ) + + resultsConnectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "sqlite", + server = dbFilePath + ) + + resultsDataModelSettings <- Strategus::createResultsDataModelSettings( + resultsDatabaseSchema = "main", + resultsFolder = executionSettings$resultsFolder, + modulesToExecute = modulesToExecute + ) + + # Create cdm modules results data model ------------------------- + cdmModulesAnalysisSpecifications <- ParallelLogger::loadSettingsFromJson( + fileName = system.file("testdata/cdmModulesAnalysisSpecifications.json", + package = "Strategus" + ) + ) + + Strategus::createResultDataModel( + analysisSpecifications = cdmModulesAnalysisSpecifications, + resultsDataModelSettings = resultsDataModelSettings, + resultsConnectionDetails = resultsConnectionDetails + ) + + # Upload cdm related results -------------------- + Strategus::uploadResults( + analysisSpecifications = analysisSpecifications, + resultsDataModelSettings = resultsDataModelSettings, + resultsConnectionDetails = resultsConnectionDetails + ) }) test_that("Stop if error occurs during cohort generation", {