Fixes for sampled usage and docs - changed api

OHDSI · Aug 26, 2024 · e10ac60 · e10ac60
1 parent cbf145f
commit e10ac60
Show file tree

Hide file tree

Showing 6 changed files with 40 additions and 27 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -61,7 +61,7 @@ License: Apache License
 VignetteBuilder: knitr
 URL: https://ohdsi.github.io/CohortDiagnostics, https://github.com/OHDSI/CohortDiagnostics
 BugReports: https://github.com/OHDSI/CohortDiagnostics/issues
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 Language: en-US
 StagedInstall: no

diff --git a/R/CohortLevelDiagnostics.R b/R/CohortLevelDiagnostics.R
@@ -53,7 +53,7 @@ getCohortCounts <- function(connectionDetails = NULL,
     )
   counts <-
     DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = TRUE) %>%
-    tidyr::tibble()
+      tidyr::tibble()
 
   if (length(cohortIds) > 0) {
     cohortIdDf <- tidyr::tibble(cohortId = as.numeric(cohortIds))
@@ -97,7 +97,8 @@ computeCohortCounts <- function(connection,
                                 cohorts,
                                 exportFolder,
                                 minCellCount,
-                                databaseId) {
+                                databaseId,
+                                writeResult = TRUE) {
   ParallelLogger::logInfo("Counting cohort records and subjects")
   cohortCounts <- getCohortCounts(
     connection = connection,
@@ -117,11 +118,13 @@ computeCohortCounts <- function(connection,
     databaseId = databaseId
   )
 
-  writeToCsv(
-    data = cohortCounts,
-    fileName = file.path(exportFolder, "cohort_count.csv"),
-    incremental = FALSE,
-    cohortId = cohorts$cohortId
-  )
+  if (writeResult) {
+    writeToCsv(
+      data = cohortCounts,
+      fileName = file.path(exportFolder, "cohort_count.csv"),
+      incremental = FALSE,
+      cohortId = cohorts$cohortId
+    )
+  }
   return(cohortCounts)
 }
diff --git a/R/Incremental.R b/R/Incremental.R
@@ -153,7 +153,7 @@ writeToCsv <- function(data, fileName, incremental = FALSE, ...) {
   UseMethod("writeToCsv", data)
 }
 
-
+#' @noRd
 writeToCsv.default <- function(data, fileName, incremental = FALSE, ...) {
   colnames(data) <- SqlRender::camelCaseToSnakeCase(colnames(data))
   if (incremental) {
@@ -186,6 +186,7 @@ writeToCsv.default <- function(data, fileName, incremental = FALSE, ...) {
   }
 }
 
+#'@noRd
 writeToCsv.tbl_Andromeda <-
   function(data, fileName, incremental = FALSE, ...) {
     if (incremental && file.exists(fileName)) {

diff --git a/R/Private.R b/R/Private.R
@@ -317,8 +317,9 @@ getPrefixedTableNames <- function(tablePrefix) {
   return(resultList)
 }
 
-#' @noRd
+
 #' Internal utility function for logging execution of variables
+#' @noRd
 timeExecution <- function(exportFolder,
                           taskName,
                           cohortIds = NULL,

diff --git a/R/RunDiagnostics.R b/R/RunDiagnostics.R
@@ -148,11 +148,6 @@ getDefaultCovariateSettings <- function() {
 #' @param seedArgs                    List. Additional arguments to pass to the sampling function.
 #'                                    This can be used to control aspects of the sampling process beyond the seed and sample size.
 #'
-#' @param sampleIdentifierExpression Character. An expression that generates unique identifiers for each sample.
-#'                                   This expression can use the variables 'cohortId' and 'seed'.
-#'                                   Default is "cohortId * 1000 + seed", which ensures unique identifiers
-#'                                   as long as there are fewer than 1000 cohorts.
-
 #' @examples
 #' \dontrun{
 #' # Load cohorts (assumes that they have already been instantiated)
@@ -857,8 +852,15 @@ executeDiagnostics <- function(cohortDefinitionSet,
 
         feCohortDefinitionSet <- cohortDefinitionSet
         feCohortTable <- cohortTable
+        feCohortCounts <- cohortCounts
 
         if (runFeatureExtractionOnSample & !isTRUE(attr(cohortDefinitionSet, "isSampledCohortDefinition"))) {
+          cohortTableNames$cohortSampleTable <- paste0(cohortTableNames$cohortTable, "_cd_sample")
+          CohortGenerator::createCohortTables(connection = connection,
+                                              cohortTableNames = cohortTableNames,
+                                              cohortDatabaseSchema = cohortDatabaseSchema,
+                                              incremental = TRUE)
+
           feCohortTable <- cohortTableNames$cohortSampleTable
           feCohortDefinitionSet <-
             CohortGenerator::sampleCohortDefinitionSet(
@@ -870,10 +872,21 @@ executeDiagnostics <- function(cohortDefinitionSet,
               n = sampleN,
               seed = seed,
               seedArgs = seedArgs,
-              identifierExpression = sampleIdentifierExpression,
+              identifierExpression = "cohortId",
               incremental = incremental,
               incrementalFolder = incrementalFolder
             )
+
+          feCohortCounts <- computeCohortCounts(
+            connection = connection,
+            cohortDatabaseSchema = cohortDatabaseSchema,
+            cohortTable =  cohortTableNames$cohortSampleTable,
+            cohorts = feCohortDefinitionSet,
+            exportFolder = exportFolder,
+            minCellCount = minCellCount,
+            databaseId = databaseId,
+            writeResult = FALSE
+          )
         }
 
 
@@ -888,7 +901,7 @@ executeDiagnostics <- function(cohortDefinitionSet,
           tempEmulationSchema = tempEmulationSchema,
           cdmVersion = cdmVersion,
           cohorts = feCohortDefinitionSet,
-          cohortCounts = cohortCounts,
+          cohortCounts = feCohortCounts,
           minCellCount = minCellCount,
           instantiatedCohorts = instantiatedCohorts,
           incremental = incremental,

diff --git a/man/executeDiagnostics.Rd b/man/executeDiagnostics.Rd